]> git.proxmox.com Git - mirror_ubuntu-focal-kernel.git/blame - fs/shiftfs.c
UBUNTU: SAUCE: ACPI: video: Use native backlight on Lenovo E41-25/45
[mirror_ubuntu-focal-kernel.git] / fs / shiftfs.c
CommitLineData
f7dfaa67 1#include <linux/btrfs.h>
2b77b5c4 2#include <linux/capability.h>
8ef17b62
JB
3#include <linux/cred.h>
4#include <linux/mount.h>
f7dfaa67 5#include <linux/fdtable.h>
8ef17b62
JB
6#include <linux/file.h>
7#include <linux/fs.h>
8#include <linux/namei.h>
9#include <linux/module.h>
10#include <linux/kernel.h>
11#include <linux/magic.h>
12#include <linux/parser.h>
2b77b5c4 13#include <linux/security.h>
8ef17b62
JB
14#include <linux/seq_file.h>
15#include <linux/statfs.h>
16#include <linux/slab.h>
17#include <linux/user_namespace.h>
18#include <linux/uidgid.h>
19#include <linux/xattr.h>
2b77b5c4
CB
20#include <linux/posix_acl.h>
21#include <linux/posix_acl_xattr.h>
22#include <linux/uio.h>
8ef17b62
JB
23
24struct shiftfs_super_info {
25 struct vfsmount *mnt;
26 struct user_namespace *userns;
2b77b5c4
CB
27 /* creds of process who created the super block */
28 const struct cred *creator_cred;
8ef17b62 29 bool mark;
2b77b5c4 30 unsigned int passthrough;
b4c9cb0c 31 unsigned int passthrough_mark;
8ef17b62
JB
32};
33
2b77b5c4
CB
34static void shiftfs_fill_inode(struct inode *inode, unsigned long ino,
35 umode_t mode, dev_t dev, struct dentry *dentry);
36
37#define SHIFTFS_PASSTHROUGH_NONE 0
38#define SHIFTFS_PASSTHROUGH_STAT 1
f7dfaa67
CB
39#define SHIFTFS_PASSTHROUGH_IOCTL 2
40#define SHIFTFS_PASSTHROUGH_ALL \
41 (SHIFTFS_PASSTHROUGH_STAT | SHIFTFS_PASSTHROUGH_IOCTL)
42
43static inline bool shiftfs_passthrough_ioctls(struct shiftfs_super_info *info)
44{
45 if (!(info->passthrough & SHIFTFS_PASSTHROUGH_IOCTL))
46 return false;
47
f7dfaa67
CB
48 return true;
49}
2b77b5c4
CB
50
51static inline bool shiftfs_passthrough_statfs(struct shiftfs_super_info *info)
52{
53 if (!(info->passthrough & SHIFTFS_PASSTHROUGH_STAT))
54 return false;
55
2b77b5c4
CB
56 return true;
57}
8ef17b62
JB
58
59enum {
60 OPT_MARK,
2b77b5c4 61 OPT_PASSTHROUGH,
8ef17b62
JB
62 OPT_LAST,
63};
64
65/* global filesystem options */
66static const match_table_t tokens = {
67 { OPT_MARK, "mark" },
2b77b5c4 68 { OPT_PASSTHROUGH, "passthrough=%u" },
8ef17b62
JB
69 { OPT_LAST, NULL }
70};
71
2b77b5c4 72static const struct cred *shiftfs_override_creds(const struct super_block *sb)
8ef17b62 73{
2b77b5c4 74 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
8ef17b62 75
2b77b5c4
CB
76 return override_creds(sbinfo->creator_cred);
77}
78
79static inline void shiftfs_revert_object_creds(const struct cred *oldcred,
80 struct cred *newcred)
81{
82 revert_creds(oldcred);
83 put_cred(newcred);
84}
85
b674a8b8
SF
86static kuid_t shift_kuid(struct user_namespace *from, struct user_namespace *to,
87 kuid_t kuid)
88{
89 uid_t uid = from_kuid(from, kuid);
90 return make_kuid(to, uid);
91}
92
93static kgid_t shift_kgid(struct user_namespace *from, struct user_namespace *to,
94 kgid_t kgid)
95{
96 gid_t gid = from_kgid(from, kgid);
97 return make_kgid(to, gid);
98}
99
2b77b5c4
CB
100static int shiftfs_override_object_creds(const struct super_block *sb,
101 const struct cred **oldcred,
102 struct cred **newcred,
103 struct dentry *dentry, umode_t mode,
104 bool hardlink)
105{
b674a8b8 106 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
2b77b5c4
CB
107 kuid_t fsuid = current_fsuid();
108 kgid_t fsgid = current_fsgid();
109
110 *oldcred = shiftfs_override_creds(sb);
111
112 *newcred = prepare_creds();
113 if (!*newcred) {
114 revert_creds(*oldcred);
115 return -ENOMEM;
116 }
117
b674a8b8
SF
118 (*newcred)->fsuid = shift_kuid(sb->s_user_ns, sbinfo->userns, fsuid);
119 (*newcred)->fsgid = shift_kgid(sb->s_user_ns, sbinfo->userns, fsgid);
2b77b5c4
CB
120
121 if (!hardlink) {
122 int err = security_dentry_create_files_as(dentry, mode,
123 &dentry->d_name,
124 *oldcred, *newcred);
125 if (err) {
126 shiftfs_revert_object_creds(*oldcred, *newcred);
127 return err;
128 }
129 }
8ef17b62 130
2b77b5c4
CB
131 put_cred(override_creds(*newcred));
132 return 0;
133}
8ef17b62 134
2b77b5c4
CB
135static void shiftfs_copyattr(struct inode *from, struct inode *to)
136{
137 struct user_namespace *from_ns = from->i_sb->s_user_ns;
138 struct user_namespace *to_ns = to->i_sb->s_user_ns;
139
140 to->i_uid = shift_kuid(from_ns, to_ns, from->i_uid);
141 to->i_gid = shift_kgid(from_ns, to_ns, from->i_gid);
142 to->i_mode = from->i_mode;
143 to->i_atime = from->i_atime;
144 to->i_mtime = from->i_mtime;
145 to->i_ctime = from->i_ctime;
146 i_size_write(to, i_size_read(from));
147}
8ef17b62 148
2b77b5c4
CB
149static void shiftfs_copyflags(struct inode *from, struct inode *to)
150{
151 unsigned int mask = S_SYNC | S_IMMUTABLE | S_APPEND | S_NOATIME;
8ef17b62 152
2b77b5c4 153 inode_set_flags(to, from->i_flags & mask, mask);
8ef17b62
JB
154}
155
2b77b5c4 156static void shiftfs_file_accessed(struct file *file)
8ef17b62 157{
2b77b5c4
CB
158 struct inode *upperi, *loweri;
159
160 if (file->f_flags & O_NOATIME)
8ef17b62
JB
161 return;
162
2b77b5c4
CB
163 upperi = file_inode(file);
164 loweri = upperi->i_private;
165
166 if (!loweri)
167 return;
168
169 upperi->i_mtime = loweri->i_mtime;
170 upperi->i_ctime = loweri->i_ctime;
171
172 touch_atime(&file->f_path);
8ef17b62
JB
173}
174
2b77b5c4
CB
175static int shiftfs_parse_mount_options(struct shiftfs_super_info *sbinfo,
176 char *options)
8ef17b62
JB
177{
178 char *p;
179 substring_t args[MAX_OPT_ARGS];
180
2b77b5c4
CB
181 sbinfo->mark = false;
182 sbinfo->passthrough = 0;
8ef17b62
JB
183
184 while ((p = strsep(&options, ",")) != NULL) {
2b77b5c4 185 int err, intarg, token;
8ef17b62
JB
186
187 if (!*p)
188 continue;
189
190 token = match_token(p, tokens, args);
191 switch (token) {
192 case OPT_MARK:
2b77b5c4
CB
193 sbinfo->mark = true;
194 break;
195 case OPT_PASSTHROUGH:
196 err = match_int(&args[0], &intarg);
197 if (err)
198 return err;
199
200 if (intarg & ~SHIFTFS_PASSTHROUGH_ALL)
201 return -EINVAL;
202
203 sbinfo->passthrough = intarg;
8ef17b62
JB
204 break;
205 default:
206 return -EINVAL;
207 }
208 }
2b77b5c4 209
8ef17b62
JB
210 return 0;
211}
212
213static void shiftfs_d_release(struct dentry *dentry)
214{
2b77b5c4 215 struct dentry *lowerd = dentry->d_fsdata;
8ef17b62 216
2b77b5c4
CB
217 if (lowerd)
218 dput(lowerd);
8ef17b62
JB
219}
220
221static struct dentry *shiftfs_d_real(struct dentry *dentry,
222 const struct inode *inode)
223{
2b77b5c4
CB
224 struct dentry *lowerd = dentry->d_fsdata;
225
226 if (inode && d_inode(dentry) == inode)
227 return dentry;
8ef17b62 228
2b77b5c4
CB
229 lowerd = d_real(lowerd, inode);
230 if (lowerd && (!inode || inode == d_inode(lowerd)))
231 return lowerd;
8ef17b62 232
2b77b5c4
CB
233 WARN(1, "shiftfs_d_real(%pd4, %s:%lu): real dentry not found\n", dentry,
234 inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0);
235 return dentry;
8ef17b62
JB
236}
237
238static int shiftfs_d_weak_revalidate(struct dentry *dentry, unsigned int flags)
239{
2b77b5c4
CB
240 int err = 1;
241 struct dentry *lowerd = dentry->d_fsdata;
8ef17b62 242
2b77b5c4 243 if (d_is_negative(lowerd) != d_is_negative(dentry))
8ef17b62
JB
244 return 0;
245
2b77b5c4
CB
246 if ((lowerd->d_flags & DCACHE_OP_WEAK_REVALIDATE))
247 err = lowerd->d_op->d_weak_revalidate(lowerd, flags);
8ef17b62 248
2b77b5c4
CB
249 if (d_really_is_positive(dentry)) {
250 struct inode *inode = d_inode(dentry);
251 struct inode *loweri = d_inode(lowerd);
252
253 shiftfs_copyattr(loweri, inode);
254 if (!inode->i_nlink)
255 err = 0;
256 }
257
258 return err;
8ef17b62
JB
259}
260
261static int shiftfs_d_revalidate(struct dentry *dentry, unsigned int flags)
262{
2b77b5c4
CB
263 int err = 1;
264 struct dentry *lowerd = dentry->d_fsdata;
8ef17b62 265
2b77b5c4
CB
266 if (d_unhashed(lowerd) ||
267 ((d_is_negative(lowerd) != d_is_negative(dentry))))
8ef17b62
JB
268 return 0;
269
2b77b5c4
CB
270 if (flags & LOOKUP_RCU)
271 return -ECHILD;
8ef17b62 272
2b77b5c4
CB
273 if ((lowerd->d_flags & DCACHE_OP_REVALIDATE))
274 err = lowerd->d_op->d_revalidate(lowerd, flags);
8ef17b62 275
2b77b5c4
CB
276 if (d_really_is_positive(dentry)) {
277 struct inode *inode = d_inode(dentry);
278 struct inode *loweri = d_inode(lowerd);
8ef17b62 279
2b77b5c4
CB
280 shiftfs_copyattr(loweri, inode);
281 if (!inode->i_nlink)
282 err = 0;
283 }
8ef17b62 284
2b77b5c4 285 return err;
8ef17b62
JB
286}
287
288static const struct dentry_operations shiftfs_dentry_ops = {
2b77b5c4
CB
289 .d_release = shiftfs_d_release,
290 .d_real = shiftfs_d_real,
291 .d_revalidate = shiftfs_d_revalidate,
8ef17b62
JB
292 .d_weak_revalidate = shiftfs_d_weak_revalidate,
293};
294
8ef17b62
JB
295static const char *shiftfs_get_link(struct dentry *dentry, struct inode *inode,
296 struct delayed_call *done)
297{
2b77b5c4
CB
298 const char *p;
299 const struct cred *oldcred;
300 struct dentry *lowerd;
8ef17b62 301
2b77b5c4
CB
302 /* RCU lookup not supported */
303 if (!dentry)
8ef17b62 304 return ERR_PTR(-ECHILD);
2b77b5c4
CB
305
306 lowerd = dentry->d_fsdata;
307 oldcred = shiftfs_override_creds(dentry->d_sb);
308 p = vfs_get_link(lowerd, done);
309 revert_creds(oldcred);
310
311 return p;
8ef17b62
JB
312}
313
314static int shiftfs_setxattr(struct dentry *dentry, struct inode *inode,
315 const char *name, const void *value,
316 size_t size, int flags)
317{
2b77b5c4
CB
318 struct dentry *lowerd = dentry->d_fsdata;
319 int err;
320 const struct cred *oldcred;
321
322 oldcred = shiftfs_override_creds(dentry->d_sb);
323 err = vfs_setxattr(lowerd, name, value, size, flags);
324 revert_creds(oldcred);
8ef17b62 325
2b77b5c4 326 shiftfs_copyattr(lowerd->d_inode, inode);
8ef17b62
JB
327
328 return err;
329}
330
331static int shiftfs_xattr_get(const struct xattr_handler *handler,
332 struct dentry *dentry, struct inode *inode,
333 const char *name, void *value, size_t size)
334{
2b77b5c4 335 struct dentry *lowerd = dentry->d_fsdata;
8ef17b62 336 int err;
2b77b5c4 337 const struct cred *oldcred;
8ef17b62 338
2b77b5c4
CB
339 oldcred = shiftfs_override_creds(dentry->d_sb);
340 err = vfs_getxattr(lowerd, name, value, size);
341 revert_creds(oldcred);
8ef17b62
JB
342
343 return err;
344}
345
346static ssize_t shiftfs_listxattr(struct dentry *dentry, char *list,
347 size_t size)
348{
2b77b5c4 349 struct dentry *lowerd = dentry->d_fsdata;
8ef17b62 350 int err;
2b77b5c4 351 const struct cred *oldcred;
8ef17b62 352
2b77b5c4
CB
353 oldcred = shiftfs_override_creds(dentry->d_sb);
354 err = vfs_listxattr(lowerd, list, size);
355 revert_creds(oldcred);
8ef17b62
JB
356
357 return err;
358}
359
360static int shiftfs_removexattr(struct dentry *dentry, const char *name)
361{
2b77b5c4 362 struct dentry *lowerd = dentry->d_fsdata;
8ef17b62 363 int err;
2b77b5c4
CB
364 const struct cred *oldcred;
365
366 oldcred = shiftfs_override_creds(dentry->d_sb);
367 err = vfs_removexattr(lowerd, name);
368 revert_creds(oldcred);
8ef17b62 369
2b77b5c4
CB
370 /* update c/mtime */
371 shiftfs_copyattr(lowerd->d_inode, d_inode(dentry));
8ef17b62
JB
372
373 return err;
374}
375
376static int shiftfs_xattr_set(const struct xattr_handler *handler,
377 struct dentry *dentry, struct inode *inode,
378 const char *name, const void *value, size_t size,
379 int flags)
380{
381 if (!value)
382 return shiftfs_removexattr(dentry, name);
383 return shiftfs_setxattr(dentry, inode, name, value, size, flags);
384}
385
2b77b5c4 386static int shiftfs_inode_test(struct inode *inode, void *data)
8ef17b62 387{
2b77b5c4
CB
388 return inode->i_private == data;
389}
8ef17b62 390
2b77b5c4
CB
391static int shiftfs_inode_set(struct inode *inode, void *data)
392{
393 inode->i_private = data;
394 return 0;
8ef17b62
JB
395}
396
2b77b5c4
CB
397static int shiftfs_create_object(struct inode *diri, struct dentry *dentry,
398 umode_t mode, const char *symlink,
399 struct dentry *hardlink, bool excl)
8ef17b62 400{
8ef17b62 401 int err;
2b77b5c4
CB
402 const struct cred *oldcred;
403 struct cred *newcred;
404 void *loweri_iop_ptr = NULL;
405 umode_t modei = mode;
406 struct super_block *dir_sb = diri->i_sb;
407 struct dentry *lowerd_new = dentry->d_fsdata;
408 struct inode *inode = NULL, *loweri_dir = diri->i_private;
409 const struct inode_operations *loweri_dir_iop = loweri_dir->i_op;
410 struct dentry *lowerd_link = NULL;
8ef17b62
JB
411
412 if (hardlink) {
2b77b5c4 413 loweri_iop_ptr = loweri_dir_iop->link;
8ef17b62
JB
414 } else {
415 switch (mode & S_IFMT) {
416 case S_IFDIR:
2b77b5c4 417 loweri_iop_ptr = loweri_dir_iop->mkdir;
8ef17b62
JB
418 break;
419 case S_IFREG:
2b77b5c4 420 loweri_iop_ptr = loweri_dir_iop->create;
8ef17b62
JB
421 break;
422 case S_IFLNK:
2b77b5c4
CB
423 loweri_iop_ptr = loweri_dir_iop->symlink;
424 break;
425 case S_IFSOCK:
426 /* fall through */
427 case S_IFIFO:
428 loweri_iop_ptr = loweri_dir_iop->mknod;
429 break;
8ef17b62
JB
430 }
431 }
2b77b5c4
CB
432 if (!loweri_iop_ptr) {
433 err = -EINVAL;
434 goto out_iput;
435 }
8ef17b62 436
2b77b5c4 437 inode_lock_nested(loweri_dir, I_MUTEX_PARENT);
8ef17b62 438
2b77b5c4
CB
439 if (!hardlink) {
440 inode = new_inode(dir_sb);
441 if (!inode) {
442 err = -ENOMEM;
443 goto out_iput;
444 }
445
446 /*
447 * new_inode() will have added the new inode to the super
448 * block's list of inodes. Further below we will call
449 * inode_insert5() Which would perform the same operation again
450 * thereby corrupting the list. To avoid this raise I_CREATING
451 * in i_state which will cause inode_insert5() to skip this
452 * step. I_CREATING will be cleared by d_instantiate_new()
453 * below.
454 */
455 spin_lock(&inode->i_lock);
456 inode->i_state |= I_CREATING;
457 spin_unlock(&inode->i_lock);
8ef17b62 458
2b77b5c4
CB
459 inode_init_owner(inode, diri, mode);
460 modei = inode->i_mode;
461 }
8ef17b62 462
2b77b5c4
CB
463 err = shiftfs_override_object_creds(dentry->d_sb, &oldcred, &newcred,
464 dentry, modei, hardlink != NULL);
465 if (err)
466 goto out_iput;
8ef17b62 467
8ef17b62 468 if (hardlink) {
2b77b5c4
CB
469 lowerd_link = hardlink->d_fsdata;
470 err = vfs_link(lowerd_link, loweri_dir, lowerd_new, NULL);
8ef17b62 471 } else {
2b77b5c4 472 switch (modei & S_IFMT) {
8ef17b62 473 case S_IFDIR:
2b77b5c4 474 err = vfs_mkdir(loweri_dir, lowerd_new, modei);
8ef17b62
JB
475 break;
476 case S_IFREG:
2b77b5c4 477 err = vfs_create(loweri_dir, lowerd_new, modei, excl);
8ef17b62
JB
478 break;
479 case S_IFLNK:
2b77b5c4
CB
480 err = vfs_symlink(loweri_dir, lowerd_new, symlink);
481 break;
482 case S_IFSOCK:
483 /* fall through */
484 case S_IFIFO:
485 err = vfs_mknod(loweri_dir, lowerd_new, modei, 0);
486 break;
487 default:
488 err = -EINVAL;
489 break;
8ef17b62
JB
490 }
491 }
492
2b77b5c4 493 shiftfs_revert_object_creds(oldcred, newcred);
8ef17b62 494
2b77b5c4
CB
495 if (!err && WARN_ON(!lowerd_new->d_inode))
496 err = -EIO;
8ef17b62 497 if (err)
2b77b5c4
CB
498 goto out_iput;
499
500 if (hardlink) {
501 inode = d_inode(hardlink);
502 ihold(inode);
503
504 /* copy up times from lower inode */
505 shiftfs_copyattr(d_inode(lowerd_link), inode);
506 set_nlink(d_inode(hardlink), d_inode(lowerd_link)->i_nlink);
507 d_instantiate(dentry, inode);
508 } else {
509 struct inode *inode_tmp;
510 struct inode *loweri_new = d_inode(lowerd_new);
511
512 inode_tmp = inode_insert5(inode, (unsigned long)loweri_new,
513 shiftfs_inode_test, shiftfs_inode_set,
514 loweri_new);
515 if (unlikely(inode_tmp != inode)) {
516 pr_err_ratelimited("shiftfs: newly created inode found in cache\n");
517 iput(inode_tmp);
518 err = -EINVAL;
519 goto out_iput;
520 }
8ef17b62 521
2b77b5c4
CB
522 ihold(loweri_new);
523 shiftfs_fill_inode(inode, loweri_new->i_ino, loweri_new->i_mode,
524 0, lowerd_new);
525 d_instantiate_new(dentry, inode);
526 }
8ef17b62 527
2b77b5c4
CB
528 shiftfs_copyattr(loweri_dir, diri);
529 if (loweri_iop_ptr == loweri_dir_iop->mkdir)
530 set_nlink(diri, loweri_dir->i_nlink);
8ef17b62 531
2b77b5c4 532 inode = NULL;
8ef17b62 533
2b77b5c4
CB
534out_iput:
535 iput(inode);
536 inode_unlock(loweri_dir);
8ef17b62
JB
537
538 return err;
539}
540
541static int shiftfs_create(struct inode *dir, struct dentry *dentry,
542 umode_t mode, bool excl)
543{
544 mode |= S_IFREG;
545
2b77b5c4 546 return shiftfs_create_object(dir, dentry, mode, NULL, NULL, excl);
8ef17b62
JB
547}
548
549static int shiftfs_mkdir(struct inode *dir, struct dentry *dentry,
550 umode_t mode)
551{
552 mode |= S_IFDIR;
553
2b77b5c4 554 return shiftfs_create_object(dir, dentry, mode, NULL, NULL, false);
8ef17b62
JB
555}
556
557static int shiftfs_link(struct dentry *hardlink, struct inode *dir,
558 struct dentry *dentry)
559{
2b77b5c4
CB
560 return shiftfs_create_object(dir, dentry, 0, NULL, hardlink, false);
561}
562
563static int shiftfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
564 dev_t rdev)
565{
566 if (!S_ISFIFO(mode) && !S_ISSOCK(mode))
567 return -EPERM;
568
569 return shiftfs_create_object(dir, dentry, mode, NULL, NULL, false);
8ef17b62
JB
570}
571
572static int shiftfs_symlink(struct inode *dir, struct dentry *dentry,
573 const char *symlink)
574{
2b77b5c4 575 return shiftfs_create_object(dir, dentry, S_IFLNK, symlink, NULL, false);
8ef17b62
JB
576}
577
578static int shiftfs_rm(struct inode *dir, struct dentry *dentry, bool rmdir)
579{
2b77b5c4
CB
580 struct dentry *lowerd = dentry->d_fsdata;
581 struct inode *loweri = dir->i_private;
3773abfa 582 struct inode *inode = d_inode(dentry);
8ef17b62 583 int err;
2b77b5c4 584 const struct cred *oldcred;
8ef17b62 585
2b77b5c4
CB
586 oldcred = shiftfs_override_creds(dentry->d_sb);
587 inode_lock_nested(loweri, I_MUTEX_PARENT);
8ef17b62 588 if (rmdir)
2b77b5c4 589 err = vfs_rmdir(loweri, lowerd);
8ef17b62 590 else
2b77b5c4 591 err = vfs_unlink(loweri, lowerd, NULL);
2b77b5c4 592 revert_creds(oldcred);
8ef17b62 593
3773abfa 594 if (!err) {
2b77b5c4
CB
595 d_drop(dentry);
596
3773abfa
CB
597 if (rmdir)
598 clear_nlink(inode);
599 else
600 drop_nlink(inode);
601 }
602 inode_unlock(loweri);
603
604 shiftfs_copyattr(loweri, dir);
8ef17b62
JB
605
606 return err;
607}
608
609static int shiftfs_unlink(struct inode *dir, struct dentry *dentry)
610{
611 return shiftfs_rm(dir, dentry, false);
612}
613
614static int shiftfs_rmdir(struct inode *dir, struct dentry *dentry)
615{
616 return shiftfs_rm(dir, dentry, true);
617}
618
619static int shiftfs_rename(struct inode *olddir, struct dentry *old,
620 struct inode *newdir, struct dentry *new,
621 unsigned int flags)
622{
2b77b5c4
CB
623 struct dentry *lowerd_dir_old = old->d_parent->d_fsdata,
624 *lowerd_dir_new = new->d_parent->d_fsdata,
625 *lowerd_old = old->d_fsdata, *lowerd_new = new->d_fsdata,
626 *trapd;
627 struct inode *loweri_dir_old = lowerd_dir_old->d_inode,
628 *loweri_dir_new = lowerd_dir_new->d_inode;
8ef17b62 629 int err = -EINVAL;
2b77b5c4 630 const struct cred *oldcred;
8ef17b62 631
2b77b5c4 632 trapd = lock_rename(lowerd_dir_new, lowerd_dir_old);
8ef17b62 633
2b77b5c4 634 if (trapd == lowerd_old || trapd == lowerd_new)
8ef17b62
JB
635 goto out_unlock;
636
2b77b5c4
CB
637 oldcred = shiftfs_override_creds(old->d_sb);
638 err = vfs_rename(loweri_dir_old, lowerd_old, loweri_dir_new, lowerd_new,
639 NULL, flags);
640 revert_creds(oldcred);
8ef17b62 641
2b77b5c4
CB
642 shiftfs_copyattr(loweri_dir_old, olddir);
643 shiftfs_copyattr(loweri_dir_new, newdir);
8ef17b62 644
2b77b5c4
CB
645out_unlock:
646 unlock_rename(lowerd_dir_new, lowerd_dir_old);
8ef17b62
JB
647
648 return err;
649}
650
651static struct dentry *shiftfs_lookup(struct inode *dir, struct dentry *dentry,
652 unsigned int flags)
653{
2b77b5c4
CB
654 struct dentry *new;
655 struct inode *newi;
656 const struct cred *oldcred;
657 struct dentry *lowerd = dentry->d_parent->d_fsdata;
658 struct inode *inode = NULL, *loweri = lowerd->d_inode;
659
660 inode_lock(loweri);
661 oldcred = shiftfs_override_creds(dentry->d_sb);
662 new = lookup_one_len(dentry->d_name.name, lowerd, dentry->d_name.len);
663 revert_creds(oldcred);
664 inode_unlock(loweri);
8ef17b62
JB
665
666 if (IS_ERR(new))
667 return new;
668
669 dentry->d_fsdata = new;
670
2b77b5c4
CB
671 newi = new->d_inode;
672 if (!newi)
8ef17b62
JB
673 goto out;
674
2b77b5c4
CB
675 inode = iget5_locked(dentry->d_sb, (unsigned long)newi,
676 shiftfs_inode_test, shiftfs_inode_set, newi);
677 if (!inode) {
8ef17b62
JB
678 dput(new);
679 return ERR_PTR(-ENOMEM);
680 }
2b77b5c4
CB
681 if (inode->i_state & I_NEW) {
682 /*
683 * inode->i_private set by shiftfs_inode_set(), but we still
684 * need to take a reference
685 */
686 ihold(newi);
687 shiftfs_fill_inode(inode, newi->i_ino, newi->i_mode, 0, new);
688 unlock_new_inode(inode);
689 }
8ef17b62 690
2b77b5c4
CB
691out:
692 return d_splice_alias(inode, dentry);
8ef17b62
JB
693}
694
695static int shiftfs_permission(struct inode *inode, int mask)
696{
8ef17b62 697 int err;
2b77b5c4
CB
698 const struct cred *oldcred;
699 struct inode *loweri = inode->i_private;
8ef17b62 700
2b77b5c4
CB
701 if (!loweri) {
702 WARN_ON(!(mask & MAY_NOT_BLOCK));
8ef17b62 703 return -ECHILD;
2b77b5c4 704 }
8ef17b62 705
2b77b5c4
CB
706 err = generic_permission(inode, mask);
707 if (err)
708 return err;
709
710 oldcred = shiftfs_override_creds(inode->i_sb);
711 err = inode_permission(loweri, mask);
712 revert_creds(oldcred);
713
714 return err;
715}
716
717static int shiftfs_fiemap(struct inode *inode,
718 struct fiemap_extent_info *fieinfo, u64 start,
719 u64 len)
720{
721 int err;
722 const struct cred *oldcred;
723 struct inode *loweri = inode->i_private;
724
725 if (!loweri->i_op->fiemap)
726 return -EOPNOTSUPP;
727
728 oldcred = shiftfs_override_creds(inode->i_sb);
729 if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC)
730 filemap_write_and_wait(loweri->i_mapping);
731 err = loweri->i_op->fiemap(loweri, fieinfo, start, len);
732 revert_creds(oldcred);
733
734 return err;
735}
736
737static int shiftfs_tmpfile(struct inode *dir, struct dentry *dentry,
738 umode_t mode)
739{
740 int err;
741 const struct cred *oldcred;
742 struct dentry *lowerd = dentry->d_fsdata;
743 struct inode *loweri = dir->i_private;
744
745 if (!loweri->i_op->tmpfile)
746 return -EOPNOTSUPP;
747
748 oldcred = shiftfs_override_creds(dir->i_sb);
749 err = loweri->i_op->tmpfile(loweri, lowerd, mode);
750 revert_creds(oldcred);
8ef17b62
JB
751
752 return err;
753}
754
755static int shiftfs_setattr(struct dentry *dentry, struct iattr *attr)
756{
2b77b5c4
CB
757 struct dentry *lowerd = dentry->d_fsdata;
758 struct inode *loweri = lowerd->d_inode;
fe8eb7df 759 struct iattr newattr;
2b77b5c4 760 const struct cred *oldcred;
8ef17b62 761 struct super_block *sb = dentry->d_sb;
b674a8b8 762 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
8ef17b62
JB
763 int err;
764
2b77b5c4
CB
765 err = setattr_prepare(dentry, attr);
766 if (err)
767 return err;
768
fe8eb7df 769 newattr = *attr;
b674a8b8
SF
770 newattr.ia_uid = shift_kuid(sb->s_user_ns, sbinfo->userns, attr->ia_uid);
771 newattr.ia_gid = shift_kgid(sb->s_user_ns, sbinfo->userns, attr->ia_gid);
8ef17b62 772
fe8eb7df
SF
773 /*
774 * mode change is for clearing setuid/setgid bits. Allow lower fs
775 * to interpret this in its own way.
776 */
777 if (newattr.ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
778 newattr.ia_valid &= ~ATTR_MODE;
779
2b77b5c4
CB
780 inode_lock(loweri);
781 oldcred = shiftfs_override_creds(dentry->d_sb);
70e662da 782 err = notify_change(lowerd, &newattr, NULL);
2b77b5c4
CB
783 revert_creds(oldcred);
784 inode_unlock(loweri);
8ef17b62 785
2b77b5c4 786 shiftfs_copyattr(loweri, d_inode(dentry));
8ef17b62 787
2b77b5c4 788 return err;
8ef17b62
JB
789}
790
791static int shiftfs_getattr(const struct path *path, struct kstat *stat,
792 u32 request_mask, unsigned int query_flags)
793{
794 struct inode *inode = path->dentry->d_inode;
2b77b5c4
CB
795 struct dentry *lowerd = path->dentry->d_fsdata;
796 struct inode *loweri = lowerd->d_inode;
797 struct shiftfs_super_info *info = path->dentry->d_sb->s_fs_info;
798 struct path newpath = { .mnt = info->mnt, .dentry = lowerd };
799 struct user_namespace *from_ns = loweri->i_sb->s_user_ns;
800 struct user_namespace *to_ns = inode->i_sb->s_user_ns;
801 const struct cred *oldcred;
802 int err;
803
804 oldcred = shiftfs_override_creds(inode->i_sb);
805 err = vfs_getattr(&newpath, stat, request_mask, query_flags);
806 revert_creds(oldcred);
8ef17b62
JB
807
808 if (err)
809 return err;
810
811 /* transform the underlying id */
2b77b5c4
CB
812 stat->uid = shift_kuid(from_ns, to_ns, stat->uid);
813 stat->gid = shift_kgid(from_ns, to_ns, stat->gid);
8ef17b62
JB
814 return 0;
815}
816
2b77b5c4 817#ifdef CONFIG_SHIFT_FS_POSIX_ACL
8ef17b62 818
2b77b5c4
CB
819static int
820shift_acl_ids(struct user_namespace *from, struct user_namespace *to,
821 struct posix_acl *acl)
8ef17b62 822{
2b77b5c4
CB
823 int i;
824
825 for (i = 0; i < acl->a_count; i++) {
826 struct posix_acl_entry *e = &acl->a_entries[i];
827 switch(e->e_tag) {
828 case ACL_USER:
829 e->e_uid = shift_kuid(from, to, e->e_uid);
830 if (!uid_valid(e->e_uid))
831 return -EOVERFLOW;
832 break;
833 case ACL_GROUP:
834 e->e_gid = shift_kgid(from, to, e->e_gid);
835 if (!gid_valid(e->e_gid))
836 return -EOVERFLOW;
837 break;
838 }
839 }
840 return 0;
841}
8ef17b62 842
2b77b5c4
CB
843static void
844shift_acl_xattr_ids(struct user_namespace *from, struct user_namespace *to,
845 void *value, size_t size)
846{
847 struct posix_acl_xattr_header *header = value;
848 struct posix_acl_xattr_entry *entry = (void *)(header + 1), *end;
849 int count;
850 kuid_t kuid;
851 kgid_t kgid;
8ef17b62 852
2b77b5c4
CB
853 if (!value)
854 return;
855 if (size < sizeof(struct posix_acl_xattr_header))
856 return;
857 if (header->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION))
858 return;
8ef17b62 859
2b77b5c4
CB
860 count = posix_acl_xattr_count(size);
861 if (count < 0)
862 return;
863 if (count == 0)
864 return;
8ef17b62 865
2b77b5c4
CB
866 for (end = entry + count; entry != end; entry++) {
867 switch(le16_to_cpu(entry->e_tag)) {
868 case ACL_USER:
869 kuid = make_kuid(&init_user_ns, le32_to_cpu(entry->e_id));
870 kuid = shift_kuid(from, to, kuid);
871 entry->e_id = cpu_to_le32(from_kuid(&init_user_ns, kuid));
872 break;
873 case ACL_GROUP:
874 kgid = make_kgid(&init_user_ns, le32_to_cpu(entry->e_id));
875 kgid = shift_kgid(from, to, kgid);
876 entry->e_id = cpu_to_le32(from_kgid(&init_user_ns, kgid));
877 break;
878 default:
879 break;
880 }
881 }
8ef17b62
JB
882}
883
2b77b5c4 884static struct posix_acl *shiftfs_get_acl(struct inode *inode, int type)
8ef17b62 885{
2b77b5c4
CB
886 struct inode *loweri = inode->i_private;
887 const struct cred *oldcred;
888 struct posix_acl *lower_acl, *acl = NULL;
889 struct user_namespace *from_ns = loweri->i_sb->s_user_ns;
890 struct user_namespace *to_ns = inode->i_sb->s_user_ns;
891 int size;
892 int err;
8ef17b62 893
2b77b5c4
CB
894 if (!IS_POSIXACL(loweri))
895 return NULL;
8ef17b62 896
2b77b5c4
CB
897 oldcred = shiftfs_override_creds(inode->i_sb);
898 lower_acl = get_acl(loweri, type);
899 revert_creds(oldcred);
8ef17b62 900
2b77b5c4
CB
901 if (lower_acl && !IS_ERR(lower_acl)) {
902 /* XXX: export posix_acl_clone? */
903 size = sizeof(struct posix_acl) +
904 lower_acl->a_count * sizeof(struct posix_acl_entry);
905 acl = kmemdup(lower_acl, size, GFP_KERNEL);
906 posix_acl_release(lower_acl);
8ef17b62 907
2b77b5c4
CB
908 if (!acl)
909 return ERR_PTR(-ENOMEM);
8ef17b62 910
2b77b5c4 911 refcount_set(&acl->a_refcount, 1);
8ef17b62 912
2b77b5c4
CB
913 err = shift_acl_ids(from_ns, to_ns, acl);
914 if (err) {
915 kfree(acl);
916 return ERR_PTR(err);
917 }
918 }
919
920 return acl;
8ef17b62
JB
921}
922
2b77b5c4
CB
923static int
924shiftfs_posix_acl_xattr_get(const struct xattr_handler *handler,
925 struct dentry *dentry, struct inode *inode,
926 const char *name, void *buffer, size_t size)
8ef17b62 927{
2b77b5c4
CB
928 struct inode *loweri = inode->i_private;
929 int ret;
930
931 ret = shiftfs_xattr_get(NULL, dentry, inode, handler->name,
932 buffer, size);
933 if (ret < 0)
934 return ret;
8ef17b62 935
2b77b5c4
CB
936 inode_lock(loweri);
937 shift_acl_xattr_ids(loweri->i_sb->s_user_ns, inode->i_sb->s_user_ns,
938 buffer, size);
939 inode_unlock(loweri);
940 return ret;
8ef17b62
JB
941}
942
2b77b5c4
CB
943static int
944shiftfs_posix_acl_xattr_set(const struct xattr_handler *handler,
945 struct dentry *dentry, struct inode *inode,
946 const char *name, const void *value,
947 size_t size, int flags)
948{
949 struct inode *loweri = inode->i_private;
950 int err;
8ef17b62 951
2b77b5c4
CB
952 if (!IS_POSIXACL(loweri) || !loweri->i_op->set_acl)
953 return -EOPNOTSUPP;
954 if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
955 return value ? -EACCES : 0;
956 if (!inode_owner_or_capable(inode))
957 return -EPERM;
958
959 if (value) {
960 shift_acl_xattr_ids(inode->i_sb->s_user_ns,
961 loweri->i_sb->s_user_ns,
962 (void *)value, size);
963 err = shiftfs_setxattr(dentry, inode, handler->name, value,
964 size, flags);
965 } else {
966 err = shiftfs_removexattr(dentry, handler->name);
967 }
8ef17b62 968
2b77b5c4
CB
969 if (!err)
970 shiftfs_copyattr(loweri, inode);
971
972 return err;
973}
974
975static const struct xattr_handler
976shiftfs_posix_acl_access_xattr_handler = {
977 .name = XATTR_NAME_POSIX_ACL_ACCESS,
978 .flags = ACL_TYPE_ACCESS,
979 .get = shiftfs_posix_acl_xattr_get,
980 .set = shiftfs_posix_acl_xattr_set,
8ef17b62
JB
981};
982
2b77b5c4
CB
983static const struct xattr_handler
984shiftfs_posix_acl_default_xattr_handler = {
985 .name = XATTR_NAME_POSIX_ACL_DEFAULT,
986 .flags = ACL_TYPE_DEFAULT,
987 .get = shiftfs_posix_acl_xattr_get,
988 .set = shiftfs_posix_acl_xattr_set,
8ef17b62
JB
989};
990
2b77b5c4 991#else /* !CONFIG_SHIFT_FS_POSIX_ACL */
8ef17b62 992
2b77b5c4 993#define shiftfs_get_acl NULL
8ef17b62 994
2b77b5c4 995#endif /* CONFIG_SHIFT_FS_POSIX_ACL */
8ef17b62 996
2b77b5c4
CB
997static const struct inode_operations shiftfs_dir_inode_operations = {
998 .lookup = shiftfs_lookup,
999 .mkdir = shiftfs_mkdir,
1000 .symlink = shiftfs_symlink,
1001 .unlink = shiftfs_unlink,
1002 .rmdir = shiftfs_rmdir,
1003 .rename = shiftfs_rename,
1004 .link = shiftfs_link,
1005 .setattr = shiftfs_setattr,
1006 .create = shiftfs_create,
1007 .mknod = shiftfs_mknod,
1008 .permission = shiftfs_permission,
1009 .getattr = shiftfs_getattr,
1010 .listxattr = shiftfs_listxattr,
1011 .get_acl = shiftfs_get_acl,
1012};
1013
1014static const struct inode_operations shiftfs_file_inode_operations = {
1015 .fiemap = shiftfs_fiemap,
1016 .getattr = shiftfs_getattr,
1017 .get_acl = shiftfs_get_acl,
1018 .listxattr = shiftfs_listxattr,
1019 .permission = shiftfs_permission,
1020 .setattr = shiftfs_setattr,
1021 .tmpfile = shiftfs_tmpfile,
1022};
1023
1024static const struct inode_operations shiftfs_special_inode_operations = {
1025 .getattr = shiftfs_getattr,
1026 .get_acl = shiftfs_get_acl,
1027 .listxattr = shiftfs_listxattr,
1028 .permission = shiftfs_permission,
1029 .setattr = shiftfs_setattr,
1030};
1031
1032static const struct inode_operations shiftfs_symlink_inode_operations = {
1033 .getattr = shiftfs_getattr,
1034 .get_link = shiftfs_get_link,
1035 .listxattr = shiftfs_listxattr,
1036 .setattr = shiftfs_setattr,
1037};
1038
1039static struct file *shiftfs_open_realfile(const struct file *file,
d73a8b3d 1040 struct inode *realinode)
2b77b5c4 1041{
d73a8b3d
CB
1042 struct file *realfile;
1043 const struct cred *old_cred;
2b77b5c4 1044 struct inode *inode = file_inode(file);
d73a8b3d 1045 struct dentry *lowerd = file->f_path.dentry->d_fsdata;
2b77b5c4 1046 struct shiftfs_super_info *info = inode->i_sb->s_fs_info;
d73a8b3d 1047 struct path realpath = { .mnt = info->mnt, .dentry = lowerd };
2b77b5c4 1048
d73a8b3d
CB
1049 old_cred = shiftfs_override_creds(inode->i_sb);
1050 realfile = open_with_fake_path(&realpath, file->f_flags, realinode,
1051 info->creator_cred);
1052 revert_creds(old_cred);
2b77b5c4 1053
d73a8b3d 1054 return realfile;
2b77b5c4
CB
1055}
1056
1057#define SHIFTFS_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
1058
1059static int shiftfs_change_flags(struct file *file, unsigned int flags)
1060{
1061 struct inode *inode = file_inode(file);
1062 int err;
1063
1064 /* if some flag changed that cannot be changed then something's amiss */
1065 if (WARN_ON((file->f_flags ^ flags) & ~SHIFTFS_SETFL_MASK))
1066 return -EIO;
1067
1068 flags &= SHIFTFS_SETFL_MASK;
1069
1070 if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
1071 return -EPERM;
1072
1073 if (flags & O_DIRECT) {
1074 if (!file->f_mapping->a_ops ||
1075 !file->f_mapping->a_ops->direct_IO)
1076 return -EINVAL;
1077 }
1078
1079 if (file->f_op->check_flags) {
1080 err = file->f_op->check_flags(flags);
1081 if (err)
1082 return err;
1083 }
1084
1085 spin_lock(&file->f_lock);
1086 file->f_flags = (file->f_flags & ~SHIFTFS_SETFL_MASK) | flags;
1087 spin_unlock(&file->f_lock);
1088
1089 return 0;
1090}
1091
2b77b5c4
CB
1092static int shiftfs_open(struct inode *inode, struct file *file)
1093{
2b77b5c4 1094 struct file *realfile;
2b77b5c4 1095
d73a8b3d
CB
1096 realfile = shiftfs_open_realfile(file, inode->i_private);
1097 if (IS_ERR(realfile))
2b77b5c4 1098 return PTR_ERR(realfile);
2b77b5c4 1099
d73a8b3d 1100 file->private_data = realfile;
1c4d2a96
CB
1101 /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO. */
1102 file->f_mapping = realfile->f_mapping;
1103
2b77b5c4
CB
1104 return 0;
1105}
1106
d73a8b3d 1107static int shiftfs_dir_open(struct inode *inode, struct file *file)
2b77b5c4 1108{
d73a8b3d
CB
1109 struct file *realfile;
1110 const struct cred *oldcred;
1111 struct dentry *lowerd = file->f_path.dentry->d_fsdata;
1112 struct shiftfs_super_info *info = inode->i_sb->s_fs_info;
1113 struct path realpath = { .mnt = info->mnt, .dentry = lowerd };
1114
1115 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1116 realfile = dentry_open(&realpath, file->f_flags | O_NOATIME,
1117 info->creator_cred);
1118 revert_creds(oldcred);
1119 if (IS_ERR(realfile))
1120 return PTR_ERR(realfile);
2b77b5c4 1121
d73a8b3d 1122 file->private_data = realfile;
2b77b5c4 1123
d73a8b3d
CB
1124 return 0;
1125}
1126
1127static int shiftfs_release(struct inode *inode, struct file *file)
1128{
1129 struct file *realfile = file->private_data;
1130
1131 if (realfile)
1132 fput(realfile);
2b77b5c4
CB
1133
1134 return 0;
1135}
1136
d73a8b3d
CB
1137static int shiftfs_dir_release(struct inode *inode, struct file *file)
1138{
1139 return shiftfs_release(inode, file);
1140}
1141
c106770a
CB
1142static loff_t shiftfs_dir_llseek(struct file *file, loff_t offset, int whence)
1143{
d73a8b3d 1144 struct file *realfile = file->private_data;
c106770a
CB
1145
1146 return vfs_llseek(realfile, offset, whence);
1147}
1148
1149static loff_t shiftfs_file_llseek(struct file *file, loff_t offset, int whence)
2b77b5c4
CB
1150{
1151 struct inode *realinode = file_inode(file)->i_private;
1152
1153 return generic_file_llseek_size(file, offset, whence,
1154 realinode->i_sb->s_maxbytes,
1155 i_size_read(realinode));
1156}
1157
1158/* XXX: Need to figure out what to to about atime updates, maybe other
1159 * timestamps too ... ref. ovl_file_accessed() */
1160
1161static rwf_t shiftfs_iocb_to_rwf(struct kiocb *iocb)
1162{
1163 int ifl = iocb->ki_flags;
1164 rwf_t flags = 0;
1165
1166 if (ifl & IOCB_NOWAIT)
1167 flags |= RWF_NOWAIT;
1168 if (ifl & IOCB_HIPRI)
1169 flags |= RWF_HIPRI;
1170 if (ifl & IOCB_DSYNC)
1171 flags |= RWF_DSYNC;
1172 if (ifl & IOCB_SYNC)
1173 flags |= RWF_SYNC;
1174
1175 return flags;
1176}
1177
65081024
CB
1178static int shiftfs_real_fdget(const struct file *file, struct fd *lowerfd)
1179{
1180 struct file *realfile;
1181
1182 if (file->f_op->open != shiftfs_open &&
1183 file->f_op->open != shiftfs_dir_open)
1184 return -EINVAL;
1185
1186 realfile = file->private_data;
1187 lowerfd->flags = 0;
1188 lowerfd->file = realfile;
1189
1190 /* Did the flags change since open? */
1191 if (unlikely(file->f_flags & ~lowerfd->file->f_flags))
1192 return shiftfs_change_flags(lowerfd->file, file->f_flags);
1193
1194 return 0;
1195}
1196
2b77b5c4
CB
1197static ssize_t shiftfs_read_iter(struct kiocb *iocb, struct iov_iter *iter)
1198{
1199 struct file *file = iocb->ki_filp;
1200 struct fd lowerfd;
1201 const struct cred *oldcred;
1202 ssize_t ret;
1203
1204 if (!iov_iter_count(iter))
1205 return 0;
1206
1207 ret = shiftfs_real_fdget(file, &lowerfd);
1208 if (ret)
1209 return ret;
1210
1211 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1212 ret = vfs_iter_read(lowerfd.file, iter, &iocb->ki_pos,
1213 shiftfs_iocb_to_rwf(iocb));
1214 revert_creds(oldcred);
1215
1216 shiftfs_file_accessed(file);
1217
1218 fdput(lowerfd);
1219 return ret;
1220}
1221
1222static ssize_t shiftfs_write_iter(struct kiocb *iocb, struct iov_iter *iter)
1223{
1224 struct file *file = iocb->ki_filp;
1225 struct inode *inode = file_inode(file);
1226 struct fd lowerfd;
1227 const struct cred *oldcred;
1228 ssize_t ret;
1229
1230 if (!iov_iter_count(iter))
1231 return 0;
1232
1233 inode_lock(inode);
1234 /* Update mode */
1235 shiftfs_copyattr(inode->i_private, inode);
1236 ret = file_remove_privs(file);
1237 if (ret)
1238 goto out_unlock;
1239
1240 ret = shiftfs_real_fdget(file, &lowerfd);
1241 if (ret)
1242 goto out_unlock;
1243
1244 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1245 file_start_write(lowerfd.file);
1246 ret = vfs_iter_write(lowerfd.file, iter, &iocb->ki_pos,
1247 shiftfs_iocb_to_rwf(iocb));
1248 file_end_write(lowerfd.file);
1249 revert_creds(oldcred);
1250
1251 /* Update size */
1252 shiftfs_copyattr(inode->i_private, inode);
1253
1254 fdput(lowerfd);
1255
1256out_unlock:
1257 inode_unlock(inode);
1258 return ret;
1259}
1260
1261static int shiftfs_fsync(struct file *file, loff_t start, loff_t end,
1262 int datasync)
1263{
1264 struct fd lowerfd;
1265 const struct cred *oldcred;
1266 int ret;
1267
1268 ret = shiftfs_real_fdget(file, &lowerfd);
1269 if (ret)
1270 return ret;
1271
1272 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1273 ret = vfs_fsync_range(lowerfd.file, start, end, datasync);
1274 revert_creds(oldcred);
1275
1276 fdput(lowerfd);
1277 return ret;
1278}
1279
1280static int shiftfs_mmap(struct file *file, struct vm_area_struct *vma)
1281{
d73a8b3d 1282 struct file *realfile = file->private_data;
2b77b5c4
CB
1283 const struct cred *oldcred;
1284 int ret;
1285
1286 if (!realfile->f_op->mmap)
1287 return -ENODEV;
1288
1289 if (WARN_ON(file != vma->vm_file))
1290 return -EIO;
1291
1292 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1293 vma->vm_file = get_file(realfile);
1294 ret = call_mmap(vma->vm_file, vma);
1295 revert_creds(oldcred);
1296
1297 shiftfs_file_accessed(file);
1298
616d0ac0
SF
1299 if (ret) {
1300 /*
1301 * Drop refcount from new vm_file value and restore original
1302 * vm_file value
1303 */
1304 vma->vm_file = file;
1305 fput(realfile);
1306 } else {
1307 /* Drop refcount from previous vm_file value */
1308 fput(file);
1309 }
2b77b5c4
CB
1310
1311 return ret;
1312}
1313
1314static long shiftfs_fallocate(struct file *file, int mode, loff_t offset,
1315 loff_t len)
1316{
1317 struct inode *inode = file_inode(file);
1318 struct inode *loweri = inode->i_private;
1319 struct fd lowerfd;
1320 const struct cred *oldcred;
1321 int ret;
1322
1323 ret = shiftfs_real_fdget(file, &lowerfd);
1324 if (ret)
1325 return ret;
1326
1327 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1328 ret = vfs_fallocate(lowerfd.file, mode, offset, len);
1329 revert_creds(oldcred);
1330
1331 /* Update size */
1332 shiftfs_copyattr(loweri, inode);
1333
1334 fdput(lowerfd);
1335 return ret;
1336}
1337
1338static int shiftfs_fadvise(struct file *file, loff_t offset, loff_t len,
1339 int advice)
1340{
1341 struct fd lowerfd;
1342 const struct cred *oldcred;
1343 int ret;
1344
1345 ret = shiftfs_real_fdget(file, &lowerfd);
1346 if (ret)
1347 return ret;
1348
1349 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1350 ret = vfs_fadvise(lowerfd.file, offset, len, advice);
1351 revert_creds(oldcred);
1352
1353 fdput(lowerfd);
1354 return ret;
1355}
1356
1357static int shiftfs_override_ioctl_creds(const struct super_block *sb,
1358 const struct cred **oldcred,
1359 struct cred **newcred)
1360{
b674a8b8 1361 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
2b77b5c4
CB
1362 kuid_t fsuid = current_fsuid();
1363 kgid_t fsgid = current_fsgid();
1364
1365 *oldcred = shiftfs_override_creds(sb);
1366
1367 *newcred = prepare_creds();
1368 if (!*newcred) {
1369 revert_creds(*oldcred);
1370 return -ENOMEM;
1371 }
1372
b674a8b8
SF
1373 (*newcred)->fsuid = shift_kuid(sb->s_user_ns, sbinfo->userns, fsuid);
1374 (*newcred)->fsgid = shift_kgid(sb->s_user_ns, sbinfo->userns, fsgid);
2b77b5c4
CB
1375
1376 /* clear all caps to prevent bypassing capable() checks */
1377 cap_clear((*newcred)->cap_bset);
1378 cap_clear((*newcred)->cap_effective);
1379 cap_clear((*newcred)->cap_inheritable);
1380 cap_clear((*newcred)->cap_permitted);
1381
1382 put_cred(override_creds(*newcred));
1383 return 0;
1384}
1385
1386static inline void shiftfs_revert_ioctl_creds(const struct cred *oldcred,
1387 struct cred *newcred)
1388{
1389 return shiftfs_revert_object_creds(oldcred, newcred);
1390}
1391
f7dfaa67
CB
1392static inline bool is_btrfs_snap_ioctl(int cmd)
1393{
1394 if ((cmd == BTRFS_IOC_SNAP_CREATE) || (cmd == BTRFS_IOC_SNAP_CREATE_V2))
1395 return true;
1396
1397 return false;
1398}
1399
25d42169 1400static int shiftfs_btrfs_ioctl_fd_restore(int cmd, int fd, void __user *arg,
f7dfaa67
CB
1401 struct btrfs_ioctl_vol_args *v1,
1402 struct btrfs_ioctl_vol_args_v2 *v2)
1403{
1404 int ret;
1405
1406 if (!is_btrfs_snap_ioctl(cmd))
1407 return 0;
1408
1409 if (cmd == BTRFS_IOC_SNAP_CREATE)
1410 ret = copy_to_user(arg, v1, sizeof(*v1));
1411 else
1412 ret = copy_to_user(arg, v2, sizeof(*v2));
1413
f7dfaa67
CB
1414 __close_fd(current->files, fd);
1415 kfree(v1);
1416 kfree(v2);
1417
1418 return ret;
1419}
1420
1421static int shiftfs_btrfs_ioctl_fd_replace(int cmd, void __user *arg,
1422 struct btrfs_ioctl_vol_args **b1,
1423 struct btrfs_ioctl_vol_args_v2 **b2,
f7dfaa67
CB
1424 int *newfd)
1425{
1426 int oldfd, ret;
1427 struct fd src;
25d42169 1428 struct fd lfd = {};
f7dfaa67
CB
1429 struct btrfs_ioctl_vol_args *v1 = NULL;
1430 struct btrfs_ioctl_vol_args_v2 *v2 = NULL;
1431
1432 if (!is_btrfs_snap_ioctl(cmd))
1433 return 0;
1434
1435 if (cmd == BTRFS_IOC_SNAP_CREATE) {
1436 v1 = memdup_user(arg, sizeof(*v1));
1437 if (IS_ERR(v1))
1438 return PTR_ERR(v1);
1439 oldfd = v1->fd;
1440 *b1 = v1;
1441 } else {
1442 v2 = memdup_user(arg, sizeof(*v2));
1443 if (IS_ERR(v2))
1444 return PTR_ERR(v2);
1445 oldfd = v2->fd;
1446 *b2 = v2;
1447 }
1448
1449 src = fdget(oldfd);
1450 if (!src.file)
1451 return -EINVAL;
1452
25d42169
SF
1453 ret = shiftfs_real_fdget(src.file, &lfd);
1454 if (ret) {
1455 fdput(src);
f7dfaa67 1456 return ret;
25d42169
SF
1457 }
1458
1459 /*
1460 * shiftfs_real_fdget() does not take a reference to lfd.file, so
1461 * take a reference here to offset the one which will be put by
1462 * __close_fd(), and make sure that reference is put on fdput(lfd).
1463 */
1464 get_file(lfd.file);
1465 lfd.flags |= FDPUT_FPUT;
1466 fdput(src);
f7dfaa67 1467
25d42169 1468 *newfd = get_unused_fd_flags(lfd.file->f_flags);
f7dfaa67 1469 if (*newfd < 0) {
25d42169 1470 fdput(lfd);
f7dfaa67
CB
1471 return *newfd;
1472 }
1473
25d42169 1474 fd_install(*newfd, lfd.file);
f7dfaa67
CB
1475
1476 if (cmd == BTRFS_IOC_SNAP_CREATE) {
1477 v1->fd = *newfd;
1478 ret = copy_to_user(arg, v1, sizeof(*v1));
1479 v1->fd = oldfd;
1480 } else {
1481 v2->fd = *newfd;
1482 ret = copy_to_user(arg, v2, sizeof(*v2));
1483 v2->fd = oldfd;
1484 }
1485
1486 if (ret)
25d42169 1487 shiftfs_btrfs_ioctl_fd_restore(cmd, *newfd, arg, v1, v2);
f7dfaa67
CB
1488
1489 return ret;
1490}
1491
2b77b5c4
CB
1492static long shiftfs_real_ioctl(struct file *file, unsigned int cmd,
1493 unsigned long arg)
1494{
2b77b5c4
CB
1495 struct fd lowerfd;
1496 struct cred *newcred;
1497 const struct cred *oldcred;
f7dfaa67
CB
1498 int newfd = -EBADF;
1499 long err = 0, ret = 0;
1500 void __user *argp = (void __user *)arg;
2b77b5c4 1501 struct super_block *sb = file->f_path.dentry->d_sb;
f7dfaa67
CB
1502 struct btrfs_ioctl_vol_args *btrfs_v1 = NULL;
1503 struct btrfs_ioctl_vol_args_v2 *btrfs_v2 = NULL;
1504
1505 ret = shiftfs_btrfs_ioctl_fd_replace(cmd, argp, &btrfs_v1, &btrfs_v2,
25d42169 1506 &newfd);
f7dfaa67
CB
1507 if (ret < 0)
1508 return ret;
2b77b5c4
CB
1509
1510 ret = shiftfs_real_fdget(file, &lowerfd);
1511 if (ret)
f7dfaa67 1512 goto out_restore;
2b77b5c4
CB
1513
1514 ret = shiftfs_override_ioctl_creds(sb, &oldcred, &newcred);
1515 if (ret)
1516 goto out_fdput;
1517
1518 ret = vfs_ioctl(lowerfd.file, cmd, arg);
1519
1520 shiftfs_revert_ioctl_creds(oldcred, newcred);
1521
1522 shiftfs_copyattr(file_inode(lowerfd.file), file_inode(file));
1523 shiftfs_copyflags(file_inode(lowerfd.file), file_inode(file));
1524
1525out_fdput:
1526 fdput(lowerfd);
1527
f7dfaa67 1528out_restore:
25d42169 1529 err = shiftfs_btrfs_ioctl_fd_restore(cmd, newfd, argp,
f7dfaa67
CB
1530 btrfs_v1, btrfs_v2);
1531 if (!ret)
1532 ret = err;
1533
2b77b5c4
CB
1534 return ret;
1535}
1536
43e11811 1537static bool in_ioctl_whitelist(int flag, unsigned long arg)
f7dfaa67 1538{
43e11811
CB
1539 void __user *argp = (void __user *)arg;
1540 u64 flags = 0;
1541
f7dfaa67 1542 switch (flag) {
43e11811
CB
1543 case BTRFS_IOC_FS_INFO:
1544 return true;
f7dfaa67
CB
1545 case BTRFS_IOC_SNAP_CREATE:
1546 return true;
1547 case BTRFS_IOC_SNAP_CREATE_V2:
1548 return true;
1549 case BTRFS_IOC_SUBVOL_CREATE:
1550 return true;
1551 case BTRFS_IOC_SUBVOL_CREATE_V2:
43e11811
CB
1552 return true;
1553 case BTRFS_IOC_SUBVOL_GETFLAGS:
1554 return true;
1555 case BTRFS_IOC_SUBVOL_SETFLAGS:
786af19d 1556 if (copy_from_user(&flags, argp, sizeof(flags)))
43e11811
CB
1557 return false;
1558
1559 if (flags & ~BTRFS_SUBVOL_RDONLY)
1560 return false;
1561
f7dfaa67
CB
1562 return true;
1563 case BTRFS_IOC_SNAP_DESTROY:
1564 return true;
1565 }
1566
1567 return false;
1568}
1569
2b77b5c4
CB
1570static long shiftfs_ioctl(struct file *file, unsigned int cmd,
1571 unsigned long arg)
1572{
1573 switch (cmd) {
1574 case FS_IOC_GETVERSION:
1575 /* fall through */
1576 case FS_IOC_GETFLAGS:
1577 /* fall through */
1578 case FS_IOC_SETFLAGS:
1579 break;
1580 default:
43e11811 1581 if (!in_ioctl_whitelist(cmd, arg) ||
f7dfaa67
CB
1582 !shiftfs_passthrough_ioctls(file->f_path.dentry->d_sb->s_fs_info))
1583 return -ENOTTY;
2b77b5c4
CB
1584 }
1585
1586 return shiftfs_real_ioctl(file, cmd, arg);
1587}
1588
1589static long shiftfs_compat_ioctl(struct file *file, unsigned int cmd,
1590 unsigned long arg)
1591{
1592 switch (cmd) {
1593 case FS_IOC32_GETVERSION:
1594 /* fall through */
1595 case FS_IOC32_GETFLAGS:
1596 /* fall through */
1597 case FS_IOC32_SETFLAGS:
1598 break;
1599 default:
43e11811 1600 if (!in_ioctl_whitelist(cmd, arg) ||
f7dfaa67
CB
1601 !shiftfs_passthrough_ioctls(file->f_path.dentry->d_sb->s_fs_info))
1602 return -ENOIOCTLCMD;
2b77b5c4
CB
1603 }
1604
1605 return shiftfs_real_ioctl(file, cmd, arg);
1606}
1607
1608enum shiftfs_copyop {
1609 SHIFTFS_COPY,
1610 SHIFTFS_CLONE,
1611 SHIFTFS_DEDUPE,
1612};
1613
1614static ssize_t shiftfs_copyfile(struct file *file_in, loff_t pos_in,
1615 struct file *file_out, loff_t pos_out, u64 len,
1616 unsigned int flags, enum shiftfs_copyop op)
1617{
1618 ssize_t ret;
1619 struct fd real_in, real_out;
1620 const struct cred *oldcred;
1621 struct inode *inode_out = file_inode(file_out);
1622 struct inode *loweri = inode_out->i_private;
1623
1624 ret = shiftfs_real_fdget(file_out, &real_out);
1625 if (ret)
1626 return ret;
1627
1628 ret = shiftfs_real_fdget(file_in, &real_in);
1629 if (ret) {
1630 fdput(real_out);
1631 return ret;
1632 }
1633
1634 oldcred = shiftfs_override_creds(inode_out->i_sb);
1635 switch (op) {
1636 case SHIFTFS_COPY:
1637 ret = vfs_copy_file_range(real_in.file, pos_in, real_out.file,
1638 pos_out, len, flags);
1639 break;
1640
1641 case SHIFTFS_CLONE:
1642 ret = vfs_clone_file_range(real_in.file, pos_in, real_out.file,
1643 pos_out, len, flags);
1644 break;
1645
1646 case SHIFTFS_DEDUPE:
1647 ret = vfs_dedupe_file_range_one(real_in.file, pos_in,
1648 real_out.file, pos_out, len,
1649 flags);
1650 break;
1651 }
1652 revert_creds(oldcred);
1653
1654 /* Update size */
1655 shiftfs_copyattr(loweri, inode_out);
1656
1657 fdput(real_in);
1658 fdput(real_out);
1659
1660 return ret;
1661}
1662
1663static ssize_t shiftfs_copy_file_range(struct file *file_in, loff_t pos_in,
1664 struct file *file_out, loff_t pos_out,
1665 size_t len, unsigned int flags)
1666{
1667 return shiftfs_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
1668 SHIFTFS_COPY);
1669}
1670
1671static loff_t shiftfs_remap_file_range(struct file *file_in, loff_t pos_in,
1672 struct file *file_out, loff_t pos_out,
1673 loff_t len, unsigned int remap_flags)
1674{
1675 enum shiftfs_copyop op;
1676
1677 if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
1678 return -EINVAL;
1679
1680 if (remap_flags & REMAP_FILE_DEDUP)
1681 op = SHIFTFS_DEDUPE;
1682 else
1683 op = SHIFTFS_CLONE;
1684
1685 return shiftfs_copyfile(file_in, pos_in, file_out, pos_out, len,
1686 remap_flags, op);
1687}
1688
1689static int shiftfs_iterate_shared(struct file *file, struct dir_context *ctx)
1690{
1691 const struct cred *oldcred;
1692 int err = -ENOTDIR;
d73a8b3d 1693 struct file *realfile = file->private_data;
2b77b5c4
CB
1694
1695 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1696 err = iterate_dir(realfile, ctx);
1697 revert_creds(oldcred);
1698
1699 return err;
1700}
1701
1702const struct file_operations shiftfs_file_operations = {
1703 .open = shiftfs_open,
1704 .release = shiftfs_release,
c106770a 1705 .llseek = shiftfs_file_llseek,
2b77b5c4
CB
1706 .read_iter = shiftfs_read_iter,
1707 .write_iter = shiftfs_write_iter,
1708 .fsync = shiftfs_fsync,
1709 .mmap = shiftfs_mmap,
1710 .fallocate = shiftfs_fallocate,
1711 .fadvise = shiftfs_fadvise,
1712 .unlocked_ioctl = shiftfs_ioctl,
1713 .compat_ioctl = shiftfs_compat_ioctl,
1714 .copy_file_range = shiftfs_copy_file_range,
1715 .remap_file_range = shiftfs_remap_file_range,
1716};
1717
1718const struct file_operations shiftfs_dir_operations = {
d73a8b3d
CB
1719 .open = shiftfs_dir_open,
1720 .release = shiftfs_dir_release,
2b77b5c4
CB
1721 .compat_ioctl = shiftfs_compat_ioctl,
1722 .fsync = shiftfs_fsync,
1723 .iterate_shared = shiftfs_iterate_shared,
c106770a 1724 .llseek = shiftfs_dir_llseek,
2b77b5c4 1725 .read = generic_read_dir,
2b77b5c4
CB
1726 .unlocked_ioctl = shiftfs_ioctl,
1727};
1728
1729static const struct address_space_operations shiftfs_aops = {
1730 /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */
1731 .direct_IO = noop_direct_IO,
1732};
1733
1734static void shiftfs_fill_inode(struct inode *inode, unsigned long ino,
1735 umode_t mode, dev_t dev, struct dentry *dentry)
1736{
1737 struct inode *loweri;
1738
1739 inode->i_ino = ino;
1740 inode->i_flags |= S_NOCMTIME;
1741
1742 mode &= S_IFMT;
1743 inode->i_mode = mode;
1744 switch (mode & S_IFMT) {
1745 case S_IFDIR:
1746 inode->i_op = &shiftfs_dir_inode_operations;
1747 inode->i_fop = &shiftfs_dir_operations;
1748 break;
1749 case S_IFLNK:
1750 inode->i_op = &shiftfs_symlink_inode_operations;
1751 break;
1752 case S_IFREG:
1753 inode->i_op = &shiftfs_file_inode_operations;
1754 inode->i_fop = &shiftfs_file_operations;
1755 inode->i_mapping->a_ops = &shiftfs_aops;
1756 break;
1757 default:
1758 inode->i_op = &shiftfs_special_inode_operations;
1759 init_special_inode(inode, mode, dev);
1760 break;
1761 }
1762
1763 if (!dentry)
1764 return;
1765
1766 loweri = dentry->d_inode;
1767 if (!loweri->i_op->get_link)
1768 inode->i_opflags |= IOP_NOFOLLOW;
1769
1770 shiftfs_copyattr(loweri, inode);
1771 shiftfs_copyflags(loweri, inode);
1772 set_nlink(inode, loweri->i_nlink);
1773}
1774
1775static int shiftfs_show_options(struct seq_file *m, struct dentry *dentry)
1776{
1777 struct super_block *sb = dentry->d_sb;
1778 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
1779
1780 if (sbinfo->mark)
1781 seq_show_option(m, "mark", NULL);
1782
1783 if (sbinfo->passthrough)
1784 seq_printf(m, ",passthrough=%u", sbinfo->passthrough);
1785
1786 return 0;
1787}
1788
1789static int shiftfs_statfs(struct dentry *dentry, struct kstatfs *buf)
1790{
1791 struct super_block *sb = dentry->d_sb;
1792 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
1793 struct dentry *root = sb->s_root;
1794 struct dentry *realroot = root->d_fsdata;
1795 struct path realpath = { .mnt = sbinfo->mnt, .dentry = realroot };
1796 int err;
1797
1798 err = vfs_statfs(&realpath, buf);
8ef17b62 1799 if (err)
2b77b5c4 1800 return err;
8ef17b62 1801
2b77b5c4
CB
1802 if (!shiftfs_passthrough_statfs(sbinfo))
1803 buf->f_type = sb->s_magic;
8ef17b62 1804
2b77b5c4
CB
1805 return 0;
1806}
8ef17b62 1807
2b77b5c4
CB
1808static void shiftfs_evict_inode(struct inode *inode)
1809{
1810 struct inode *loweri = inode->i_private;
1811
1812 clear_inode(inode);
1813
1814 if (loweri)
1815 iput(loweri);
1816}
1817
1818static void shiftfs_put_super(struct super_block *sb)
1819{
1820 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
1821
1822 if (sbinfo) {
1823 mntput(sbinfo->mnt);
1824 put_cred(sbinfo->creator_cred);
1825 kfree(sbinfo);
1826 }
1827}
1828
1829static const struct xattr_handler shiftfs_xattr_handler = {
1830 .prefix = "",
1831 .get = shiftfs_xattr_get,
1832 .set = shiftfs_xattr_set,
1833};
1834
1835const struct xattr_handler *shiftfs_xattr_handlers[] = {
1836#ifdef CONFIG_SHIFT_FS_POSIX_ACL
1837 &shiftfs_posix_acl_access_xattr_handler,
1838 &shiftfs_posix_acl_default_xattr_handler,
1839#endif
1840 &shiftfs_xattr_handler,
1841 NULL
1842};
1843
1844static inline bool passthrough_is_subset(int old_flags, int new_flags)
1845{
1846 if ((new_flags & old_flags) != new_flags)
1847 return false;
1848
1849 return true;
1850}
1851
63d6c068
CB
1852static int shiftfs_super_check_flags(unsigned long old_flags,
1853 unsigned long new_flags)
1854{
1855 if ((old_flags & SB_RDONLY) && !(new_flags & SB_RDONLY))
1856 return -EPERM;
1857
1858 if ((old_flags & SB_NOSUID) && !(new_flags & SB_NOSUID))
1859 return -EPERM;
1860
1861 if ((old_flags & SB_NODEV) && !(new_flags & SB_NODEV))
1862 return -EPERM;
1863
1864 if ((old_flags & SB_NOEXEC) && !(new_flags & SB_NOEXEC))
1865 return -EPERM;
1866
1867 if ((old_flags & SB_NOATIME) && !(new_flags & SB_NOATIME))
1868 return -EPERM;
1869
1870 if ((old_flags & SB_NODIRATIME) && !(new_flags & SB_NODIRATIME))
1871 return -EPERM;
1872
1873 if (!(old_flags & SB_POSIXACL) && (new_flags & SB_POSIXACL))
1874 return -EPERM;
1875
1876 return 0;
1877}
1878
2b77b5c4
CB
1879static int shiftfs_remount(struct super_block *sb, int *flags, char *data)
1880{
1881 int err;
1882 struct shiftfs_super_info new = {};
1883 struct shiftfs_super_info *info = sb->s_fs_info;
1884
1885 err = shiftfs_parse_mount_options(&new, data);
8ef17b62 1886 if (err)
2b77b5c4
CB
1887 return err;
1888
63d6c068
CB
1889 err = shiftfs_super_check_flags(sb->s_flags, *flags);
1890 if (err)
1891 return err;
1892
2b77b5c4
CB
1893 /* Mark mount option cannot be changed. */
1894 if (info->mark || (info->mark != new.mark))
1895 return -EPERM;
1896
1897 if (info->passthrough != new.passthrough) {
1898 /* Don't allow exceeding passthrough options of mark mount. */
b4c9cb0c 1899 if (!passthrough_is_subset(info->passthrough_mark,
2b77b5c4
CB
1900 info->passthrough))
1901 return -EPERM;
1902
1903 info->passthrough = new.passthrough;
1904 }
1905
1906 return 0;
1907}
8ef17b62 1908
2b77b5c4
CB
1909static const struct super_operations shiftfs_super_ops = {
1910 .put_super = shiftfs_put_super,
1911 .show_options = shiftfs_show_options,
1912 .statfs = shiftfs_statfs,
1913 .remount_fs = shiftfs_remount,
1914 .evict_inode = shiftfs_evict_inode,
1915};
1916
1917struct shiftfs_data {
1918 void *data;
1919 const char *path;
1920};
1921
63d6c068
CB
1922static void shiftfs_super_force_flags(struct super_block *sb,
1923 unsigned long lower_flags)
1924{
1925 sb->s_flags |= lower_flags & (SB_RDONLY | SB_NOSUID | SB_NODEV |
1926 SB_NOEXEC | SB_NOATIME | SB_NODIRATIME);
1927
1928 if (!(lower_flags & SB_POSIXACL))
1929 sb->s_flags &= ~SB_POSIXACL;
1930}
1931
2b77b5c4
CB
1932static int shiftfs_fill_super(struct super_block *sb, void *raw_data,
1933 int silent)
1934{
1935 int err;
1936 struct path path = {};
1937 struct shiftfs_super_info *sbinfo_mp;
1938 char *name = NULL;
1939 struct inode *inode = NULL;
1940 struct dentry *dentry = NULL;
1941 struct shiftfs_data *data = raw_data;
1942 struct shiftfs_super_info *sbinfo = NULL;
1943
1944 if (!data->path)
1945 return -EINVAL;
1946
1947 sb->s_fs_info = kzalloc(sizeof(*sbinfo), GFP_KERNEL);
1948 if (!sb->s_fs_info)
1949 return -ENOMEM;
1950 sbinfo = sb->s_fs_info;
1951
1952 err = shiftfs_parse_mount_options(sbinfo, data->data);
1953 if (err)
1954 return err;
1955
1956 /* to mount a mark, must be userns admin */
1957 if (!sbinfo->mark && !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
1958 return -EPERM;
1959
1960 name = kstrdup(data->path, GFP_KERNEL);
1961 if (!name)
1962 return -ENOMEM;
1963
1964 err = kern_path(name, LOOKUP_FOLLOW, &path);
1965 if (err)
1966 goto out_free_name;
8ef17b62
JB
1967
1968 if (!S_ISDIR(path.dentry->d_inode->i_mode)) {
1969 err = -ENOTDIR;
2b77b5c4 1970 goto out_put_path;
8ef17b62
JB
1971 }
1972
63d6c068
CB
1973 sb->s_flags |= SB_POSIXACL;
1974
2b77b5c4 1975 if (sbinfo->mark) {
700720b3 1976 struct cred *cred_tmp;
2b77b5c4
CB
1977 struct super_block *lower_sb = path.mnt->mnt_sb;
1978
1979 /* to mark a mount point, must root wrt lower s_user_ns */
1980 if (!ns_capable(lower_sb->s_user_ns, CAP_SYS_ADMIN)) {
1981 err = -EPERM;
1982 goto out_put_path;
1983 }
8ef17b62 1984
8ef17b62
JB
1985 /*
1986 * this part is visible unshifted, so make sure no
1987 * executables that could be used to give suid
1988 * privileges
1989 */
1990 sb->s_iflags = SB_I_NOEXEC;
8ef17b62 1991
63d6c068
CB
1992 shiftfs_super_force_flags(sb, lower_sb->s_flags);
1993
8ef17b62 1994 /*
2b77b5c4
CB
1995 * Handle nesting of shiftfs mounts by referring this mark
1996 * mount back to the original mark mount. This is more
1997 * efficient and alleviates concerns about stack depth.
8ef17b62 1998 */
2b77b5c4
CB
1999 if (lower_sb->s_magic == SHIFTFS_MAGIC) {
2000 sbinfo_mp = lower_sb->s_fs_info;
2001
2002 /* Doesn't make sense to mark a mark mount */
2003 if (sbinfo_mp->mark) {
2004 err = -EINVAL;
2005 goto out_put_path;
2006 }
2007
2008 if (!passthrough_is_subset(sbinfo_mp->passthrough,
2009 sbinfo->passthrough)) {
2010 err = -EPERM;
2011 goto out_put_path;
2012 }
2013
2014 sbinfo->mnt = mntget(sbinfo_mp->mnt);
2015 dentry = dget(path.dentry->d_fsdata);
b4c9cb0c
CB
2016 /*
2017 * Copy up the passthrough mount options from the
2018 * parent mark mountpoint.
2019 */
2020 sbinfo->passthrough_mark = sbinfo_mp->passthrough_mark;
2b77b5c4
CB
2021 } else {
2022 sbinfo->mnt = mntget(path.mnt);
2023 dentry = dget(path.dentry);
b4c9cb0c
CB
2024 /*
2025 * For a new mark passthrough_mark and passthrough
2026 * are identical.
2027 */
2028 sbinfo->passthrough_mark = sbinfo->passthrough;
2b77b5c4
CB
2029 }
2030
700720b3
CB
2031 cred_tmp = prepare_creds();
2032 if (!cred_tmp) {
2b77b5c4
CB
2033 err = -ENOMEM;
2034 goto out_put_path;
2035 }
700720b3
CB
2036 /* Don't override disk quota limits or use reserved space. */
2037 cap_lower(cred_tmp->cap_effective, CAP_SYS_RESOURCE);
2038 sbinfo->creator_cred = cred_tmp;
2b77b5c4
CB
2039 } else {
2040 /*
2041 * This leg executes if we're admin capable in the namespace,
2042 * so be very careful.
2043 */
2044 err = -EPERM;
8ef17b62 2045 if (path.dentry->d_sb->s_magic != SHIFTFS_MAGIC)
2b77b5c4
CB
2046 goto out_put_path;
2047
2048 sbinfo_mp = path.dentry->d_sb->s_fs_info;
2049 if (!sbinfo_mp->mark)
2050 goto out_put_path;
2051
2052 if (!passthrough_is_subset(sbinfo_mp->passthrough,
2053 sbinfo->passthrough))
2054 goto out_put_path;
2055
2056 sbinfo->mnt = mntget(sbinfo_mp->mnt);
2057 sbinfo->creator_cred = get_cred(sbinfo_mp->creator_cred);
8ef17b62 2058 dentry = dget(path.dentry->d_fsdata);
b4c9cb0c
CB
2059 /*
2060 * Copy up passthrough settings from mark mountpoint so we can
2061 * verify when the overlay wants to remount with different
2062 * passthrough settings.
2063 */
2064 sbinfo->passthrough_mark = sbinfo_mp->passthrough;
63d6c068 2065 shiftfs_super_force_flags(sb, path.mnt->mnt_sb->s_flags);
2b77b5c4
CB
2066 }
2067
2068 sb->s_stack_depth = dentry->d_sb->s_stack_depth + 1;
2069 if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
2070 printk(KERN_ERR "shiftfs: maximum stacking depth exceeded\n");
2071 err = -EINVAL;
2072 goto out_put_path;
2073 }
2074
2075 inode = new_inode(sb);
2076 if (!inode) {
2077 err = -ENOMEM;
2078 goto out_put_path;
8ef17b62 2079 }
2b77b5c4
CB
2080 shiftfs_fill_inode(inode, dentry->d_inode->i_ino, S_IFDIR, 0, dentry);
2081
2082 ihold(dentry->d_inode);
2083 inode->i_private = dentry->d_inode;
2084
8ef17b62 2085 sb->s_magic = SHIFTFS_MAGIC;
3340be2b 2086 sb->s_maxbytes = MAX_LFS_FILESIZE;
8ef17b62
JB
2087 sb->s_op = &shiftfs_super_ops;
2088 sb->s_xattr = shiftfs_xattr_handlers;
2089 sb->s_d_op = &shiftfs_dentry_ops;
2b77b5c4
CB
2090 sb->s_root = d_make_root(inode);
2091 if (!sb->s_root) {
2092 err = -ENOMEM;
2093 goto out_put_path;
2094 }
2095
8ef17b62 2096 sb->s_root->d_fsdata = dentry;
2b77b5c4
CB
2097 sbinfo->userns = get_user_ns(dentry->d_sb->s_user_ns);
2098 shiftfs_copyattr(dentry->d_inode, sb->s_root->d_inode);
8ef17b62 2099
2b77b5c4
CB
2100 dentry = NULL;
2101 err = 0;
8ef17b62 2102
2b77b5c4 2103out_put_path:
8ef17b62 2104 path_put(&path);
2b77b5c4
CB
2105
2106out_free_name:
8ef17b62 2107 kfree(name);
2b77b5c4
CB
2108
2109 dput(dentry);
2110
8ef17b62
JB
2111 return err;
2112}
2113
2114static struct dentry *shiftfs_mount(struct file_system_type *fs_type,
2115 int flags, const char *dev_name, void *data)
2116{
2117 struct shiftfs_data d = { data, dev_name };
2118
2119 return mount_nodev(fs_type, flags, &d, shiftfs_fill_super);
2120}
2121
2122static struct file_system_type shiftfs_type = {
2123 .owner = THIS_MODULE,
2124 .name = "shiftfs",
2125 .mount = shiftfs_mount,
2126 .kill_sb = kill_anon_super,
2127 .fs_flags = FS_USERNS_MOUNT,
2128};
2129
2130static int __init shiftfs_init(void)
2131{
2132 return register_filesystem(&shiftfs_type);
2133}
2134
2135static void __exit shiftfs_exit(void)
2136{
2137 unregister_filesystem(&shiftfs_type);
2138}
2139
2140MODULE_ALIAS_FS("shiftfs");
2141MODULE_AUTHOR("James Bottomley");
2b77b5c4
CB
2142MODULE_AUTHOR("Seth Forshee <seth.forshee@canonical.com>");
2143MODULE_AUTHOR("Christian Brauner <christian.brauner@ubuntu.com>");
2144MODULE_DESCRIPTION("id shifting filesystem");
8ef17b62
JB
2145MODULE_LICENSE("GPL v2");
2146module_init(shiftfs_init)
2147module_exit(shiftfs_exit)