]> git.proxmox.com Git - mirror_ubuntu-kernels.git/blame - fs/shiftfs.c
scsi: core: raid_class: Remove raid_component_add()
[mirror_ubuntu-kernels.git] / fs / shiftfs.c
CommitLineData
1f1d0142 1#include <linux/btrfs.h>
bc97019b 2#include <linux/capability.h>
065efa1d
JB
3#include <linux/cred.h>
4#include <linux/mount.h>
1f1d0142 5#include <linux/fdtable.h>
065efa1d
JB
6#include <linux/file.h>
7#include <linux/fs.h>
8#include <linux/namei.h>
9#include <linux/module.h>
10#include <linux/kernel.h>
11#include <linux/magic.h>
12#include <linux/parser.h>
bc97019b 13#include <linux/security.h>
065efa1d
JB
14#include <linux/seq_file.h>
15#include <linux/statfs.h>
16#include <linux/slab.h>
17#include <linux/user_namespace.h>
18#include <linux/uidgid.h>
19#include <linux/xattr.h>
bc97019b
CB
20#include <linux/posix_acl.h>
21#include <linux/posix_acl_xattr.h>
22#include <linux/uio.h>
78398fd5 23#include <linux/fiemap.h>
d00e5271 24#include <linux/pagemap.h>
065efa1d
JB
25
26struct shiftfs_super_info {
27 struct vfsmount *mnt;
28 struct user_namespace *userns;
bc97019b
CB
29 /* creds of process who created the super block */
30 const struct cred *creator_cred;
065efa1d 31 bool mark;
bc97019b 32 unsigned int passthrough;
353409ee 33 unsigned int passthrough_mark;
065efa1d
JB
34};
35
bc97019b
CB
36static void shiftfs_fill_inode(struct inode *inode, unsigned long ino,
37 umode_t mode, dev_t dev, struct dentry *dentry);
38
39#define SHIFTFS_PASSTHROUGH_NONE 0
40#define SHIFTFS_PASSTHROUGH_STAT 1
1f1d0142
CB
41#define SHIFTFS_PASSTHROUGH_IOCTL 2
42#define SHIFTFS_PASSTHROUGH_ALL \
43 (SHIFTFS_PASSTHROUGH_STAT | SHIFTFS_PASSTHROUGH_IOCTL)
44
45static inline bool shiftfs_passthrough_ioctls(struct shiftfs_super_info *info)
46{
47 if (!(info->passthrough & SHIFTFS_PASSTHROUGH_IOCTL))
48 return false;
49
1f1d0142
CB
50 return true;
51}
bc97019b
CB
52
53static inline bool shiftfs_passthrough_statfs(struct shiftfs_super_info *info)
54{
55 if (!(info->passthrough & SHIFTFS_PASSTHROUGH_STAT))
56 return false;
57
bc97019b
CB
58 return true;
59}
065efa1d
JB
60
61enum {
62 OPT_MARK,
bc97019b 63 OPT_PASSTHROUGH,
065efa1d
JB
64 OPT_LAST,
65};
66
67/* global filesystem options */
68static const match_table_t tokens = {
69 { OPT_MARK, "mark" },
bc97019b 70 { OPT_PASSTHROUGH, "passthrough=%u" },
065efa1d
JB
71 { OPT_LAST, NULL }
72};
73
bc97019b 74static const struct cred *shiftfs_override_creds(const struct super_block *sb)
065efa1d 75{
bc97019b 76 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
065efa1d 77
bc97019b
CB
78 return override_creds(sbinfo->creator_cred);
79}
80
81static inline void shiftfs_revert_object_creds(const struct cred *oldcred,
82 struct cred *newcred)
83{
84 revert_creds(oldcred);
85 put_cred(newcred);
86}
87
ff3521da
SF
88static kuid_t shift_kuid(struct user_namespace *from, struct user_namespace *to,
89 kuid_t kuid)
90{
91 uid_t uid = from_kuid(from, kuid);
92 return make_kuid(to, uid);
93}
94
95static kgid_t shift_kgid(struct user_namespace *from, struct user_namespace *to,
96 kgid_t kgid)
97{
98 gid_t gid = from_kgid(from, kgid);
99 return make_kgid(to, gid);
100}
101
bc97019b
CB
102static int shiftfs_override_object_creds(const struct super_block *sb,
103 const struct cred **oldcred,
104 struct cred **newcred,
105 struct dentry *dentry, umode_t mode,
106 bool hardlink)
107{
ff3521da 108 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
bc97019b
CB
109 kuid_t fsuid = current_fsuid();
110 kgid_t fsgid = current_fsgid();
111
112 *oldcred = shiftfs_override_creds(sb);
113
114 *newcred = prepare_creds();
115 if (!*newcred) {
116 revert_creds(*oldcred);
117 return -ENOMEM;
118 }
119
ff3521da
SF
120 (*newcred)->fsuid = shift_kuid(sb->s_user_ns, sbinfo->userns, fsuid);
121 (*newcred)->fsgid = shift_kgid(sb->s_user_ns, sbinfo->userns, fsgid);
bc97019b
CB
122
123 if (!hardlink) {
124 int err = security_dentry_create_files_as(dentry, mode,
125 &dentry->d_name,
126 *oldcred, *newcred);
127 if (err) {
128 shiftfs_revert_object_creds(*oldcred, *newcred);
129 return err;
130 }
131 }
065efa1d 132
bc97019b
CB
133 put_cred(override_creds(*newcred));
134 return 0;
135}
065efa1d 136
bc97019b
CB
137static void shiftfs_copyattr(struct inode *from, struct inode *to)
138{
139 struct user_namespace *from_ns = from->i_sb->s_user_ns;
140 struct user_namespace *to_ns = to->i_sb->s_user_ns;
141
142 to->i_uid = shift_kuid(from_ns, to_ns, from->i_uid);
143 to->i_gid = shift_kgid(from_ns, to_ns, from->i_gid);
144 to->i_mode = from->i_mode;
145 to->i_atime = from->i_atime;
146 to->i_mtime = from->i_mtime;
147 to->i_ctime = from->i_ctime;
148 i_size_write(to, i_size_read(from));
149}
065efa1d 150
bc97019b
CB
151static void shiftfs_copyflags(struct inode *from, struct inode *to)
152{
153 unsigned int mask = S_SYNC | S_IMMUTABLE | S_APPEND | S_NOATIME;
065efa1d 154
bc97019b 155 inode_set_flags(to, from->i_flags & mask, mask);
065efa1d
JB
156}
157
bc97019b 158static void shiftfs_file_accessed(struct file *file)
065efa1d 159{
bc97019b
CB
160 struct inode *upperi, *loweri;
161
162 if (file->f_flags & O_NOATIME)
065efa1d
JB
163 return;
164
bc97019b
CB
165 upperi = file_inode(file);
166 loweri = upperi->i_private;
167
168 if (!loweri)
169 return;
170
171 upperi->i_mtime = loweri->i_mtime;
172 upperi->i_ctime = loweri->i_ctime;
173
174 touch_atime(&file->f_path);
065efa1d
JB
175}
176
bc97019b
CB
177static int shiftfs_parse_mount_options(struct shiftfs_super_info *sbinfo,
178 char *options)
065efa1d
JB
179{
180 char *p;
181 substring_t args[MAX_OPT_ARGS];
182
bc97019b
CB
183 sbinfo->mark = false;
184 sbinfo->passthrough = 0;
065efa1d
JB
185
186 while ((p = strsep(&options, ",")) != NULL) {
bc97019b 187 int err, intarg, token;
065efa1d
JB
188
189 if (!*p)
190 continue;
191
192 token = match_token(p, tokens, args);
193 switch (token) {
194 case OPT_MARK:
bc97019b
CB
195 sbinfo->mark = true;
196 break;
197 case OPT_PASSTHROUGH:
198 err = match_int(&args[0], &intarg);
199 if (err)
200 return err;
201
202 if (intarg & ~SHIFTFS_PASSTHROUGH_ALL)
203 return -EINVAL;
204
205 sbinfo->passthrough = intarg;
065efa1d
JB
206 break;
207 default:
208 return -EINVAL;
209 }
210 }
bc97019b 211
065efa1d
JB
212 return 0;
213}
214
215static void shiftfs_d_release(struct dentry *dentry)
216{
bc97019b 217 struct dentry *lowerd = dentry->d_fsdata;
065efa1d 218
bc97019b
CB
219 if (lowerd)
220 dput(lowerd);
065efa1d
JB
221}
222
223static struct dentry *shiftfs_d_real(struct dentry *dentry,
224 const struct inode *inode)
225{
bc97019b
CB
226 struct dentry *lowerd = dentry->d_fsdata;
227
228 if (inode && d_inode(dentry) == inode)
229 return dentry;
065efa1d 230
bc97019b
CB
231 lowerd = d_real(lowerd, inode);
232 if (lowerd && (!inode || inode == d_inode(lowerd)))
233 return lowerd;
065efa1d 234
bc97019b
CB
235 WARN(1, "shiftfs_d_real(%pd4, %s:%lu): real dentry not found\n", dentry,
236 inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0);
237 return dentry;
065efa1d
JB
238}
239
240static int shiftfs_d_weak_revalidate(struct dentry *dentry, unsigned int flags)
241{
bc97019b
CB
242 int err = 1;
243 struct dentry *lowerd = dentry->d_fsdata;
065efa1d 244
bc97019b 245 if (d_is_negative(lowerd) != d_is_negative(dentry))
065efa1d
JB
246 return 0;
247
bc97019b
CB
248 if ((lowerd->d_flags & DCACHE_OP_WEAK_REVALIDATE))
249 err = lowerd->d_op->d_weak_revalidate(lowerd, flags);
065efa1d 250
bc97019b
CB
251 if (d_really_is_positive(dentry)) {
252 struct inode *inode = d_inode(dentry);
253 struct inode *loweri = d_inode(lowerd);
254
255 shiftfs_copyattr(loweri, inode);
bc97019b
CB
256 }
257
258 return err;
065efa1d
JB
259}
260
261static int shiftfs_d_revalidate(struct dentry *dentry, unsigned int flags)
262{
bc97019b
CB
263 int err = 1;
264 struct dentry *lowerd = dentry->d_fsdata;
065efa1d 265
bc97019b
CB
266 if (d_unhashed(lowerd) ||
267 ((d_is_negative(lowerd) != d_is_negative(dentry))))
065efa1d
JB
268 return 0;
269
bc97019b
CB
270 if (flags & LOOKUP_RCU)
271 return -ECHILD;
065efa1d 272
bc97019b
CB
273 if ((lowerd->d_flags & DCACHE_OP_REVALIDATE))
274 err = lowerd->d_op->d_revalidate(lowerd, flags);
065efa1d 275
bc97019b
CB
276 if (d_really_is_positive(dentry)) {
277 struct inode *inode = d_inode(dentry);
278 struct inode *loweri = d_inode(lowerd);
065efa1d 279
bc97019b 280 shiftfs_copyattr(loweri, inode);
bc97019b 281 }
065efa1d 282
bc97019b 283 return err;
065efa1d
JB
284}
285
286static const struct dentry_operations shiftfs_dentry_ops = {
bc97019b
CB
287 .d_release = shiftfs_d_release,
288 .d_real = shiftfs_d_real,
289 .d_revalidate = shiftfs_d_revalidate,
065efa1d
JB
290 .d_weak_revalidate = shiftfs_d_weak_revalidate,
291};
292
065efa1d
JB
293static const char *shiftfs_get_link(struct dentry *dentry, struct inode *inode,
294 struct delayed_call *done)
295{
bc97019b
CB
296 const char *p;
297 const struct cred *oldcred;
298 struct dentry *lowerd;
065efa1d 299
bc97019b
CB
300 /* RCU lookup not supported */
301 if (!dentry)
065efa1d 302 return ERR_PTR(-ECHILD);
bc97019b
CB
303
304 lowerd = dentry->d_fsdata;
305 oldcred = shiftfs_override_creds(dentry->d_sb);
306 p = vfs_get_link(lowerd, done);
307 revert_creds(oldcred);
308
309 return p;
065efa1d
JB
310}
311
930f5211 312static int shiftfs_setxattr(struct dentry *dentry, struct inode *inode,
065efa1d
JB
313 const char *name, const void *value,
314 size_t size, int flags)
315{
bc97019b
CB
316 struct dentry *lowerd = dentry->d_fsdata;
317 int err;
318 const struct cred *oldcred;
319
320 oldcred = shiftfs_override_creds(dentry->d_sb);
930f5211 321 err = vfs_setxattr(&init_user_ns, lowerd, name, value, size, flags);
bc97019b 322 revert_creds(oldcred);
065efa1d 323
bc97019b 324 shiftfs_copyattr(lowerd->d_inode, inode);
065efa1d
JB
325
326 return err;
327}
328
329static int shiftfs_xattr_get(const struct xattr_handler *handler,
330 struct dentry *dentry, struct inode *inode,
331 const char *name, void *value, size_t size)
332{
bc97019b 333 struct dentry *lowerd = dentry->d_fsdata;
065efa1d 334 int err;
bc97019b 335 const struct cred *oldcred;
065efa1d 336
bc97019b 337 oldcred = shiftfs_override_creds(dentry->d_sb);
96670e34 338 err = vfs_getxattr(&init_user_ns, lowerd, name, value, size);
bc97019b 339 revert_creds(oldcred);
065efa1d
JB
340
341 return err;
342}
343
344static ssize_t shiftfs_listxattr(struct dentry *dentry, char *list,
345 size_t size)
346{
bc97019b 347 struct dentry *lowerd = dentry->d_fsdata;
065efa1d 348 int err;
bc97019b 349 const struct cred *oldcred;
065efa1d 350
bc97019b
CB
351 oldcred = shiftfs_override_creds(dentry->d_sb);
352 err = vfs_listxattr(lowerd, list, size);
353 revert_creds(oldcred);
065efa1d
JB
354
355 return err;
356}
357
96670e34
AR
358static int shiftfs_removexattr(struct user_namespace *ns,
359 struct dentry *dentry, const char *name)
065efa1d 360{
bc97019b 361 struct dentry *lowerd = dentry->d_fsdata;
065efa1d 362 int err;
bc97019b
CB
363 const struct cred *oldcred;
364
365 oldcred = shiftfs_override_creds(dentry->d_sb);
930f5211 366 err = vfs_removexattr(&init_user_ns, lowerd, name);
bc97019b 367 revert_creds(oldcred);
065efa1d 368
bc97019b
CB
369 /* update c/mtime */
370 shiftfs_copyattr(lowerd->d_inode, d_inode(dentry));
065efa1d
JB
371
372 return err;
373}
374
375static int shiftfs_xattr_set(const struct xattr_handler *handler,
96670e34 376 struct user_namespace *ns,
065efa1d
JB
377 struct dentry *dentry, struct inode *inode,
378 const char *name, const void *value, size_t size,
379 int flags)
380{
381 if (!value)
930f5211
AR
382 return shiftfs_removexattr(&init_user_ns, dentry, name);
383 return shiftfs_setxattr(dentry, inode, name, value, size, flags);
065efa1d
JB
384}
385
bc97019b 386static int shiftfs_inode_test(struct inode *inode, void *data)
065efa1d 387{
bc97019b
CB
388 return inode->i_private == data;
389}
065efa1d 390
bc97019b
CB
391static int shiftfs_inode_set(struct inode *inode, void *data)
392{
393 inode->i_private = data;
394 return 0;
065efa1d
JB
395}
396
930f5211 397static int shiftfs_create_object(struct inode *diri, struct dentry *dentry,
bc97019b
CB
398 umode_t mode, const char *symlink,
399 struct dentry *hardlink, bool excl)
065efa1d 400{
065efa1d 401 int err;
bc97019b
CB
402 const struct cred *oldcred;
403 struct cred *newcred;
404 void *loweri_iop_ptr = NULL;
405 umode_t modei = mode;
406 struct super_block *dir_sb = diri->i_sb;
407 struct dentry *lowerd_new = dentry->d_fsdata;
408 struct inode *inode = NULL, *loweri_dir = diri->i_private;
409 const struct inode_operations *loweri_dir_iop = loweri_dir->i_op;
410 struct dentry *lowerd_link = NULL;
065efa1d 411
a209452b
AR
412 inode_lock_nested(loweri_dir, I_MUTEX_PARENT);
413
065efa1d 414 if (hardlink) {
bc97019b 415 loweri_iop_ptr = loweri_dir_iop->link;
065efa1d
JB
416 } else {
417 switch (mode & S_IFMT) {
418 case S_IFDIR:
bc97019b 419 loweri_iop_ptr = loweri_dir_iop->mkdir;
065efa1d
JB
420 break;
421 case S_IFREG:
bc97019b 422 loweri_iop_ptr = loweri_dir_iop->create;
065efa1d
JB
423 break;
424 case S_IFLNK:
bc97019b
CB
425 loweri_iop_ptr = loweri_dir_iop->symlink;
426 break;
427 case S_IFSOCK:
428 /* fall through */
429 case S_IFIFO:
430 loweri_iop_ptr = loweri_dir_iop->mknod;
431 break;
065efa1d
JB
432 }
433 }
bc97019b
CB
434 if (!loweri_iop_ptr) {
435 err = -EINVAL;
436 goto out_iput;
437 }
065efa1d 438
bc97019b
CB
439 if (!hardlink) {
440 inode = new_inode(dir_sb);
441 if (!inode) {
442 err = -ENOMEM;
443 goto out_iput;
444 }
445
446 /*
447 * new_inode() will have added the new inode to the super
448 * block's list of inodes. Further below we will call
449 * inode_insert5() Which would perform the same operation again
450 * thereby corrupting the list. To avoid this raise I_CREATING
451 * in i_state which will cause inode_insert5() to skip this
452 * step. I_CREATING will be cleared by d_instantiate_new()
453 * below.
454 */
455 spin_lock(&inode->i_lock);
456 inode->i_state |= I_CREATING;
457 spin_unlock(&inode->i_lock);
065efa1d 458
930f5211 459 inode_init_owner(&init_user_ns, inode, diri, mode);
bc97019b
CB
460 modei = inode->i_mode;
461 }
065efa1d 462
bc97019b
CB
463 err = shiftfs_override_object_creds(dentry->d_sb, &oldcred, &newcred,
464 dentry, modei, hardlink != NULL);
465 if (err)
466 goto out_iput;
065efa1d 467
065efa1d 468 if (hardlink) {
bc97019b 469 lowerd_link = hardlink->d_fsdata;
930f5211 470 err = vfs_link(lowerd_link, &init_user_ns, loweri_dir, lowerd_new, NULL);
065efa1d 471 } else {
bc97019b 472 switch (modei & S_IFMT) {
065efa1d 473 case S_IFDIR:
930f5211 474 err = vfs_mkdir(&init_user_ns, loweri_dir, lowerd_new, modei);
065efa1d
JB
475 break;
476 case S_IFREG:
930f5211 477 err = vfs_create(&init_user_ns, loweri_dir, lowerd_new, modei, excl);
065efa1d
JB
478 break;
479 case S_IFLNK:
930f5211 480 err = vfs_symlink(&init_user_ns, loweri_dir, lowerd_new, symlink);
bc97019b
CB
481 break;
482 case S_IFSOCK:
483 /* fall through */
484 case S_IFIFO:
930f5211 485 err = vfs_mknod(&init_user_ns, loweri_dir, lowerd_new, modei, 0);
bc97019b
CB
486 break;
487 default:
488 err = -EINVAL;
489 break;
065efa1d
JB
490 }
491 }
492
bc97019b 493 shiftfs_revert_object_creds(oldcred, newcred);
065efa1d 494
bc97019b
CB
495 if (!err && WARN_ON(!lowerd_new->d_inode))
496 err = -EIO;
065efa1d 497 if (err)
bc97019b
CB
498 goto out_iput;
499
500 if (hardlink) {
501 inode = d_inode(hardlink);
502 ihold(inode);
503
504 /* copy up times from lower inode */
505 shiftfs_copyattr(d_inode(lowerd_link), inode);
506 set_nlink(d_inode(hardlink), d_inode(lowerd_link)->i_nlink);
507 d_instantiate(dentry, inode);
508 } else {
509 struct inode *inode_tmp;
510 struct inode *loweri_new = d_inode(lowerd_new);
511
512 inode_tmp = inode_insert5(inode, (unsigned long)loweri_new,
513 shiftfs_inode_test, shiftfs_inode_set,
514 loweri_new);
515 if (unlikely(inode_tmp != inode)) {
516 pr_err_ratelimited("shiftfs: newly created inode found in cache\n");
517 iput(inode_tmp);
518 err = -EINVAL;
519 goto out_iput;
520 }
065efa1d 521
bc97019b
CB
522 ihold(loweri_new);
523 shiftfs_fill_inode(inode, loweri_new->i_ino, loweri_new->i_mode,
524 0, lowerd_new);
525 d_instantiate_new(dentry, inode);
526 }
065efa1d 527
bc97019b
CB
528 shiftfs_copyattr(loweri_dir, diri);
529 if (loweri_iop_ptr == loweri_dir_iop->mkdir)
530 set_nlink(diri, loweri_dir->i_nlink);
065efa1d 531
bc97019b 532 inode = NULL;
065efa1d 533
bc97019b
CB
534out_iput:
535 iput(inode);
536 inode_unlock(loweri_dir);
065efa1d
JB
537
538 return err;
539}
540
96670e34
AR
541static int shiftfs_create(struct user_namespace *ns,
542 struct inode *dir, struct dentry *dentry,
065efa1d
JB
543 umode_t mode, bool excl)
544{
545 mode |= S_IFREG;
546
930f5211 547 return shiftfs_create_object(dir, dentry, mode, NULL, NULL, excl);
065efa1d
JB
548}
549
96670e34 550static int shiftfs_mkdir(struct user_namespace *ns, struct inode *dir, struct dentry *dentry,
065efa1d
JB
551 umode_t mode)
552{
553 mode |= S_IFDIR;
554
930f5211 555 return shiftfs_create_object(dir, dentry, mode, NULL, NULL, false);
065efa1d
JB
556}
557
558static int shiftfs_link(struct dentry *hardlink, struct inode *dir,
559 struct dentry *dentry)
560{
930f5211 561 return shiftfs_create_object(dir, dentry, 0, NULL, hardlink, false);
bc97019b
CB
562}
563
96670e34
AR
564static int shiftfs_mknod(struct user_namespace *ns,
565 struct inode *dir, struct dentry *dentry, umode_t mode,
bc97019b
CB
566 dev_t rdev)
567{
568 if (!S_ISFIFO(mode) && !S_ISSOCK(mode))
569 return -EPERM;
570
930f5211 571 return shiftfs_create_object(dir, dentry, mode, NULL, NULL, false);
065efa1d
JB
572}
573
96670e34 574static int shiftfs_symlink(struct user_namespace *ns, struct inode *dir, struct dentry *dentry,
065efa1d
JB
575 const char *symlink)
576{
930f5211 577 return shiftfs_create_object(dir, dentry, S_IFLNK, symlink, NULL, false);
065efa1d
JB
578}
579
580static int shiftfs_rm(struct inode *dir, struct dentry *dentry, bool rmdir)
581{
bc97019b
CB
582 struct dentry *lowerd = dentry->d_fsdata;
583 struct inode *loweri = dir->i_private;
fd72b31c 584 struct inode *inode = d_inode(dentry);
065efa1d 585 int err;
bc97019b 586 const struct cred *oldcred;
065efa1d 587
0402d484 588 dget(lowerd);
bc97019b
CB
589 oldcred = shiftfs_override_creds(dentry->d_sb);
590 inode_lock_nested(loweri, I_MUTEX_PARENT);
065efa1d 591 if (rmdir)
96670e34 592 err = vfs_rmdir(&init_user_ns, loweri, lowerd);
065efa1d 593 else
96670e34 594 err = vfs_unlink(&init_user_ns, loweri, lowerd, NULL);
bc97019b 595 revert_creds(oldcred);
065efa1d 596
fd72b31c 597 if (!err) {
bc97019b
CB
598 d_drop(dentry);
599
fd72b31c
CB
600 if (rmdir)
601 clear_nlink(inode);
602 else
603 drop_nlink(inode);
604 }
605 inode_unlock(loweri);
606
607 shiftfs_copyattr(loweri, dir);
0402d484 608 dput(lowerd);
065efa1d
JB
609
610 return err;
611}
612
613static int shiftfs_unlink(struct inode *dir, struct dentry *dentry)
614{
615 return shiftfs_rm(dir, dentry, false);
616}
617
618static int shiftfs_rmdir(struct inode *dir, struct dentry *dentry)
619{
620 return shiftfs_rm(dir, dentry, true);
621}
622
96670e34
AR
623static int shiftfs_rename(struct user_namespace *ns,
624 struct inode *olddir, struct dentry *old,
065efa1d
JB
625 struct inode *newdir, struct dentry *new,
626 unsigned int flags)
627{
bc97019b
CB
628 struct dentry *lowerd_dir_old = old->d_parent->d_fsdata,
629 *lowerd_dir_new = new->d_parent->d_fsdata,
630 *lowerd_old = old->d_fsdata, *lowerd_new = new->d_fsdata,
631 *trapd;
632 struct inode *loweri_dir_old = lowerd_dir_old->d_inode,
633 *loweri_dir_new = lowerd_dir_new->d_inode;
96670e34 634 struct renamedata rd = {
e3b65b02 635 .old_mnt_userns = &init_user_ns,
96670e34
AR
636 .old_dir = loweri_dir_old,
637 .old_dentry = lowerd_old,
e3b65b02 638 .new_mnt_userns = &init_user_ns,
96670e34
AR
639 .new_dir = loweri_dir_new,
640 .new_dentry = lowerd_new,
641 };
065efa1d 642 int err = -EINVAL;
bc97019b 643 const struct cred *oldcred;
065efa1d 644
bc97019b 645 trapd = lock_rename(lowerd_dir_new, lowerd_dir_old);
065efa1d 646
bc97019b 647 if (trapd == lowerd_old || trapd == lowerd_new)
065efa1d
JB
648 goto out_unlock;
649
bc97019b 650 oldcred = shiftfs_override_creds(old->d_sb);
96670e34 651 err = vfs_rename(&rd);
bc97019b 652 revert_creds(oldcred);
065efa1d 653
bc97019b
CB
654 shiftfs_copyattr(loweri_dir_old, olddir);
655 shiftfs_copyattr(loweri_dir_new, newdir);
065efa1d 656
bc97019b
CB
657out_unlock:
658 unlock_rename(lowerd_dir_new, lowerd_dir_old);
065efa1d
JB
659
660 return err;
661}
662
663static struct dentry *shiftfs_lookup(struct inode *dir, struct dentry *dentry,
664 unsigned int flags)
665{
bc97019b
CB
666 struct dentry *new;
667 struct inode *newi;
668 const struct cred *oldcred;
669 struct dentry *lowerd = dentry->d_parent->d_fsdata;
670 struct inode *inode = NULL, *loweri = lowerd->d_inode;
671
672 inode_lock(loweri);
673 oldcred = shiftfs_override_creds(dentry->d_sb);
674 new = lookup_one_len(dentry->d_name.name, lowerd, dentry->d_name.len);
675 revert_creds(oldcred);
676 inode_unlock(loweri);
065efa1d
JB
677
678 if (IS_ERR(new))
679 return new;
680
681 dentry->d_fsdata = new;
682
bc97019b
CB
683 newi = new->d_inode;
684 if (!newi)
065efa1d
JB
685 goto out;
686
bc97019b
CB
687 inode = iget5_locked(dentry->d_sb, (unsigned long)newi,
688 shiftfs_inode_test, shiftfs_inode_set, newi);
689 if (!inode) {
065efa1d
JB
690 dput(new);
691 return ERR_PTR(-ENOMEM);
692 }
bc97019b
CB
693 if (inode->i_state & I_NEW) {
694 /*
695 * inode->i_private set by shiftfs_inode_set(), but we still
696 * need to take a reference
697 */
698 ihold(newi);
699 shiftfs_fill_inode(inode, newi->i_ino, newi->i_mode, 0, new);
700 unlock_new_inode(inode);
701 }
065efa1d 702
bc97019b
CB
703out:
704 return d_splice_alias(inode, dentry);
065efa1d
JB
705}
706
96670e34 707static int shiftfs_permission(struct user_namespace *ns, struct inode *inode, int mask)
065efa1d 708{
065efa1d 709 int err;
bc97019b
CB
710 const struct cred *oldcred;
711 struct inode *loweri = inode->i_private;
065efa1d 712
bc97019b
CB
713 if (!loweri) {
714 WARN_ON(!(mask & MAY_NOT_BLOCK));
065efa1d 715 return -ECHILD;
bc97019b 716 }
065efa1d 717
930f5211 718 err = generic_permission(&init_user_ns, inode, mask);
bc97019b
CB
719 if (err)
720 return err;
721
722 oldcred = shiftfs_override_creds(inode->i_sb);
930f5211 723 err = inode_permission(&init_user_ns, loweri, mask);
bc97019b
CB
724 revert_creds(oldcred);
725
726 return err;
727}
728
729static int shiftfs_fiemap(struct inode *inode,
730 struct fiemap_extent_info *fieinfo, u64 start,
731 u64 len)
732{
733 int err;
734 const struct cred *oldcred;
735 struct inode *loweri = inode->i_private;
736
737 if (!loweri->i_op->fiemap)
738 return -EOPNOTSUPP;
739
740 oldcred = shiftfs_override_creds(inode->i_sb);
741 if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC)
742 filemap_write_and_wait(loweri->i_mapping);
743 err = loweri->i_op->fiemap(loweri, fieinfo, start, len);
744 revert_creds(oldcred);
745
746 return err;
747}
748
96670e34 749static int shiftfs_tmpfile(struct user_namespace *ns,
eb1fa37b 750 struct inode *dir, struct file *file,
bc97019b
CB
751 umode_t mode)
752{
753 int err;
754 const struct cred *oldcred;
bc97019b
CB
755 struct inode *loweri = dir->i_private;
756
757 if (!loweri->i_op->tmpfile)
758 return -EOPNOTSUPP;
759
760 oldcred = shiftfs_override_creds(dir->i_sb);
eb1fa37b 761 err = loweri->i_op->tmpfile(&init_user_ns, loweri, file, mode);
bc97019b 762 revert_creds(oldcred);
065efa1d
JB
763
764 return err;
765}
766
96670e34 767static int shiftfs_setattr(struct user_namespace *ns, struct dentry *dentry, struct iattr *attr)
065efa1d 768{
bc97019b
CB
769 struct dentry *lowerd = dentry->d_fsdata;
770 struct inode *loweri = lowerd->d_inode;
c1d58583 771 struct iattr newattr;
bc97019b 772 const struct cred *oldcred;
065efa1d 773 struct super_block *sb = dentry->d_sb;
ff3521da 774 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
065efa1d
JB
775 int err;
776
930f5211 777 err = setattr_prepare(&init_user_ns, dentry, attr);
bc97019b
CB
778 if (err)
779 return err;
780
c1d58583 781 newattr = *attr;
ff3521da
SF
782 newattr.ia_uid = shift_kuid(sb->s_user_ns, sbinfo->userns, attr->ia_uid);
783 newattr.ia_gid = shift_kgid(sb->s_user_ns, sbinfo->userns, attr->ia_gid);
065efa1d 784
c1d58583
SF
785 /*
786 * mode change is for clearing setuid/setgid bits. Allow lower fs
787 * to interpret this in its own way.
788 */
789 if (newattr.ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
790 newattr.ia_valid &= ~ATTR_MODE;
791
bc97019b
CB
792 inode_lock(loweri);
793 oldcred = shiftfs_override_creds(dentry->d_sb);
930f5211 794 err = notify_change(&init_user_ns, lowerd, &newattr, NULL);
bc97019b
CB
795 revert_creds(oldcred);
796 inode_unlock(loweri);
065efa1d 797
bc97019b 798 shiftfs_copyattr(loweri, d_inode(dentry));
065efa1d 799
bc97019b 800 return err;
065efa1d
JB
801}
802
96670e34
AR
803static int shiftfs_getattr(struct user_namespace *ns,
804 const struct path *path, struct kstat *stat,
065efa1d
JB
805 u32 request_mask, unsigned int query_flags)
806{
807 struct inode *inode = path->dentry->d_inode;
bc97019b
CB
808 struct dentry *lowerd = path->dentry->d_fsdata;
809 struct inode *loweri = lowerd->d_inode;
810 struct shiftfs_super_info *info = path->dentry->d_sb->s_fs_info;
811 struct path newpath = { .mnt = info->mnt, .dentry = lowerd };
812 struct user_namespace *from_ns = loweri->i_sb->s_user_ns;
813 struct user_namespace *to_ns = inode->i_sb->s_user_ns;
814 const struct cred *oldcred;
815 int err;
816
817 oldcred = shiftfs_override_creds(inode->i_sb);
818 err = vfs_getattr(&newpath, stat, request_mask, query_flags);
819 revert_creds(oldcred);
065efa1d
JB
820
821 if (err)
822 return err;
823
824 /* transform the underlying id */
bc97019b
CB
825 stat->uid = shift_kuid(from_ns, to_ns, stat->uid);
826 stat->gid = shift_kgid(from_ns, to_ns, stat->gid);
065efa1d
JB
827 return 0;
828}
829
bc97019b 830#ifdef CONFIG_SHIFT_FS_POSIX_ACL
065efa1d 831
bc97019b
CB
832static int
833shift_acl_ids(struct user_namespace *from, struct user_namespace *to,
834 struct posix_acl *acl)
065efa1d 835{
bc97019b
CB
836 int i;
837
838 for (i = 0; i < acl->a_count; i++) {
839 struct posix_acl_entry *e = &acl->a_entries[i];
840 switch(e->e_tag) {
841 case ACL_USER:
842 e->e_uid = shift_kuid(from, to, e->e_uid);
843 if (!uid_valid(e->e_uid))
844 return -EOVERFLOW;
845 break;
846 case ACL_GROUP:
847 e->e_gid = shift_kgid(from, to, e->e_gid);
848 if (!gid_valid(e->e_gid))
849 return -EOVERFLOW;
850 break;
851 }
852 }
853 return 0;
854}
065efa1d 855
bc97019b
CB
856static void
857shift_acl_xattr_ids(struct user_namespace *from, struct user_namespace *to,
858 void *value, size_t size)
859{
860 struct posix_acl_xattr_header *header = value;
861 struct posix_acl_xattr_entry *entry = (void *)(header + 1), *end;
862 int count;
863 kuid_t kuid;
864 kgid_t kgid;
065efa1d 865
bc97019b
CB
866 if (!value)
867 return;
868 if (size < sizeof(struct posix_acl_xattr_header))
869 return;
870 if (header->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION))
871 return;
065efa1d 872
bc97019b
CB
873 count = posix_acl_xattr_count(size);
874 if (count < 0)
875 return;
876 if (count == 0)
877 return;
065efa1d 878
bc97019b
CB
879 for (end = entry + count; entry != end; entry++) {
880 switch(le16_to_cpu(entry->e_tag)) {
881 case ACL_USER:
882 kuid = make_kuid(&init_user_ns, le32_to_cpu(entry->e_id));
883 kuid = shift_kuid(from, to, kuid);
884 entry->e_id = cpu_to_le32(from_kuid(&init_user_ns, kuid));
885 break;
886 case ACL_GROUP:
96670e34 887 kgid = make_kgid(from, le32_to_cpu(entry->e_id));
bc97019b 888 kgid = shift_kgid(from, to, kgid);
96670e34 889 entry->e_id = cpu_to_le32(from_kgid(from, kgid));
bc97019b
CB
890 break;
891 default:
892 break;
893 }
894 }
065efa1d
JB
895}
896
96670e34
AR
897static struct posix_acl *
898shiftfs_get_acl(struct inode *inode, int type, bool rcu)
065efa1d 899{
bc97019b
CB
900 struct inode *loweri = inode->i_private;
901 const struct cred *oldcred;
902 struct posix_acl *lower_acl, *acl = NULL;
903 struct user_namespace *from_ns = loweri->i_sb->s_user_ns;
904 struct user_namespace *to_ns = inode->i_sb->s_user_ns;
905 int size;
906 int err;
065efa1d 907
96670e34
AR
908 if (rcu)
909 return ERR_PTR(-ECHILD);
910
bc97019b
CB
911 if (!IS_POSIXACL(loweri))
912 return NULL;
065efa1d 913
bc97019b 914 oldcred = shiftfs_override_creds(inode->i_sb);
f9cf053b 915 lower_acl = get_inode_acl(loweri, type);
bc97019b 916 revert_creds(oldcred);
065efa1d 917
bc97019b
CB
918 if (lower_acl && !IS_ERR(lower_acl)) {
919 /* XXX: export posix_acl_clone? */
920 size = sizeof(struct posix_acl) +
921 lower_acl->a_count * sizeof(struct posix_acl_entry);
922 acl = kmemdup(lower_acl, size, GFP_KERNEL);
923 posix_acl_release(lower_acl);
065efa1d 924
bc97019b
CB
925 if (!acl)
926 return ERR_PTR(-ENOMEM);
065efa1d 927
bc97019b 928 refcount_set(&acl->a_refcount, 1);
065efa1d 929
bc97019b
CB
930 err = shift_acl_ids(from_ns, to_ns, acl);
931 if (err) {
932 kfree(acl);
933 return ERR_PTR(err);
934 }
935 }
936
937 return acl;
065efa1d
JB
938}
939
bc97019b
CB
940static int
941shiftfs_posix_acl_xattr_get(const struct xattr_handler *handler,
942 struct dentry *dentry, struct inode *inode,
943 const char *name, void *buffer, size_t size)
065efa1d 944{
bc97019b
CB
945 struct inode *loweri = inode->i_private;
946 int ret;
947
948 ret = shiftfs_xattr_get(NULL, dentry, inode, handler->name,
949 buffer, size);
950 if (ret < 0)
951 return ret;
065efa1d 952
bc97019b
CB
953 inode_lock(loweri);
954 shift_acl_xattr_ids(loweri->i_sb->s_user_ns, inode->i_sb->s_user_ns,
955 buffer, size);
956 inode_unlock(loweri);
957 return ret;
065efa1d
JB
958}
959
bc97019b
CB
960static int
961shiftfs_posix_acl_xattr_set(const struct xattr_handler *handler,
96670e34 962 struct user_namespace *ns,
bc97019b
CB
963 struct dentry *dentry, struct inode *inode,
964 const char *name, const void *value,
965 size_t size, int flags)
966{
967 struct inode *loweri = inode->i_private;
968 int err;
065efa1d 969
bc97019b
CB
970 if (!IS_POSIXACL(loweri) || !loweri->i_op->set_acl)
971 return -EOPNOTSUPP;
972 if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
973 return value ? -EACCES : 0;
e3b65b02 974 if (!inode_owner_or_capable(&init_user_ns, inode))
bc97019b
CB
975 return -EPERM;
976
977 if (value) {
978 shift_acl_xattr_ids(inode->i_sb->s_user_ns,
979 loweri->i_sb->s_user_ns,
980 (void *)value, size);
930f5211 981 err = shiftfs_setxattr(dentry, inode, handler->name, value,
bc97019b
CB
982 size, flags);
983 } else {
930f5211 984 err = shiftfs_removexattr(&init_user_ns, dentry, handler->name);
bc97019b 985 }
065efa1d 986
bc97019b
CB
987 if (!err)
988 shiftfs_copyattr(loweri, inode);
989
990 return err;
991}
992
993static const struct xattr_handler
994shiftfs_posix_acl_access_xattr_handler = {
995 .name = XATTR_NAME_POSIX_ACL_ACCESS,
996 .flags = ACL_TYPE_ACCESS,
997 .get = shiftfs_posix_acl_xattr_get,
998 .set = shiftfs_posix_acl_xattr_set,
065efa1d
JB
999};
1000
bc97019b
CB
1001static const struct xattr_handler
1002shiftfs_posix_acl_default_xattr_handler = {
1003 .name = XATTR_NAME_POSIX_ACL_DEFAULT,
1004 .flags = ACL_TYPE_DEFAULT,
1005 .get = shiftfs_posix_acl_xattr_get,
1006 .set = shiftfs_posix_acl_xattr_set,
065efa1d
JB
1007};
1008
bc97019b 1009#else /* !CONFIG_SHIFT_FS_POSIX_ACL */
065efa1d 1010
bc97019b 1011#define shiftfs_get_acl NULL
065efa1d 1012
bc97019b 1013#endif /* CONFIG_SHIFT_FS_POSIX_ACL */
065efa1d 1014
bc97019b
CB
1015static const struct inode_operations shiftfs_dir_inode_operations = {
1016 .lookup = shiftfs_lookup,
1017 .mkdir = shiftfs_mkdir,
1018 .symlink = shiftfs_symlink,
1019 .unlink = shiftfs_unlink,
1020 .rmdir = shiftfs_rmdir,
1021 .rename = shiftfs_rename,
1022 .link = shiftfs_link,
1023 .setattr = shiftfs_setattr,
1024 .create = shiftfs_create,
1025 .mknod = shiftfs_mknod,
1026 .permission = shiftfs_permission,
1027 .getattr = shiftfs_getattr,
1028 .listxattr = shiftfs_listxattr,
f9cf053b 1029 .get_inode_acl = shiftfs_get_acl,
bc97019b
CB
1030};
1031
1032static const struct inode_operations shiftfs_file_inode_operations = {
1033 .fiemap = shiftfs_fiemap,
1034 .getattr = shiftfs_getattr,
f9cf053b 1035 .get_inode_acl = shiftfs_get_acl,
bc97019b
CB
1036 .listxattr = shiftfs_listxattr,
1037 .permission = shiftfs_permission,
1038 .setattr = shiftfs_setattr,
1039 .tmpfile = shiftfs_tmpfile,
1040};
1041
1042static const struct inode_operations shiftfs_special_inode_operations = {
1043 .getattr = shiftfs_getattr,
f9cf053b 1044 .get_inode_acl = shiftfs_get_acl,
bc97019b
CB
1045 .listxattr = shiftfs_listxattr,
1046 .permission = shiftfs_permission,
1047 .setattr = shiftfs_setattr,
1048};
1049
1050static const struct inode_operations shiftfs_symlink_inode_operations = {
1051 .getattr = shiftfs_getattr,
1052 .get_link = shiftfs_get_link,
1053 .listxattr = shiftfs_listxattr,
1054 .setattr = shiftfs_setattr,
1055};
1056
1057static struct file *shiftfs_open_realfile(const struct file *file,
e4d81d10 1058 struct inode *realinode)
bc97019b 1059{
e4d81d10
CB
1060 struct file *realfile;
1061 const struct cred *old_cred;
bc97019b 1062 struct inode *inode = file_inode(file);
e4d81d10 1063 struct dentry *lowerd = file->f_path.dentry->d_fsdata;
bc97019b 1064 struct shiftfs_super_info *info = inode->i_sb->s_fs_info;
e4d81d10 1065 struct path realpath = { .mnt = info->mnt, .dentry = lowerd };
bc97019b 1066
e4d81d10
CB
1067 old_cred = shiftfs_override_creds(inode->i_sb);
1068 realfile = open_with_fake_path(&realpath, file->f_flags, realinode,
1069 info->creator_cred);
1070 revert_creds(old_cred);
bc97019b 1071
e4d81d10 1072 return realfile;
bc97019b
CB
1073}
1074
1075#define SHIFTFS_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
1076
1077static int shiftfs_change_flags(struct file *file, unsigned int flags)
1078{
1079 struct inode *inode = file_inode(file);
1080 int err;
1081
1082 /* if some flag changed that cannot be changed then something's amiss */
1083 if (WARN_ON((file->f_flags ^ flags) & ~SHIFTFS_SETFL_MASK))
1084 return -EIO;
1085
1086 flags &= SHIFTFS_SETFL_MASK;
1087
1088 if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
1089 return -EPERM;
1090
1091 if (flags & O_DIRECT) {
1092 if (!file->f_mapping->a_ops ||
1093 !file->f_mapping->a_ops->direct_IO)
1094 return -EINVAL;
1095 }
1096
1097 if (file->f_op->check_flags) {
1098 err = file->f_op->check_flags(flags);
1099 if (err)
1100 return err;
1101 }
1102
1103 spin_lock(&file->f_lock);
1104 file->f_flags = (file->f_flags & ~SHIFTFS_SETFL_MASK) | flags;
1105 spin_unlock(&file->f_lock);
1106
1107 return 0;
1108}
1109
bc97019b
CB
1110static int shiftfs_open(struct inode *inode, struct file *file)
1111{
bc97019b 1112 struct file *realfile;
bc97019b 1113
e4d81d10
CB
1114 realfile = shiftfs_open_realfile(file, inode->i_private);
1115 if (IS_ERR(realfile))
bc97019b 1116 return PTR_ERR(realfile);
bc97019b 1117
e4d81d10 1118 file->private_data = realfile;
8afc6600
CB
1119 /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO. */
1120 file->f_mapping = realfile->f_mapping;
1121
bc97019b
CB
1122 return 0;
1123}
1124
e4d81d10 1125static int shiftfs_dir_open(struct inode *inode, struct file *file)
bc97019b 1126{
e4d81d10
CB
1127 struct file *realfile;
1128 const struct cred *oldcred;
1129 struct dentry *lowerd = file->f_path.dentry->d_fsdata;
1130 struct shiftfs_super_info *info = inode->i_sb->s_fs_info;
1131 struct path realpath = { .mnt = info->mnt, .dentry = lowerd };
1132
1133 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1134 realfile = dentry_open(&realpath, file->f_flags | O_NOATIME,
1135 info->creator_cred);
1136 revert_creds(oldcred);
1137 if (IS_ERR(realfile))
1138 return PTR_ERR(realfile);
bc97019b 1139
e4d81d10 1140 file->private_data = realfile;
bc97019b 1141
e4d81d10
CB
1142 return 0;
1143}
1144
1145static int shiftfs_release(struct inode *inode, struct file *file)
1146{
1147 struct file *realfile = file->private_data;
1148
1149 if (realfile)
1150 fput(realfile);
bc97019b
CB
1151
1152 return 0;
1153}
1154
e4d81d10
CB
1155static int shiftfs_dir_release(struct inode *inode, struct file *file)
1156{
1157 return shiftfs_release(inode, file);
1158}
1159
b4b8af8e
CB
1160static loff_t shiftfs_dir_llseek(struct file *file, loff_t offset, int whence)
1161{
e4d81d10 1162 struct file *realfile = file->private_data;
b4b8af8e
CB
1163
1164 return vfs_llseek(realfile, offset, whence);
1165}
1166
1167static loff_t shiftfs_file_llseek(struct file *file, loff_t offset, int whence)
bc97019b
CB
1168{
1169 struct inode *realinode = file_inode(file)->i_private;
1170
1171 return generic_file_llseek_size(file, offset, whence,
1172 realinode->i_sb->s_maxbytes,
1173 i_size_read(realinode));
1174}
1175
1176/* XXX: Need to figure out what to to about atime updates, maybe other
1177 * timestamps too ... ref. ovl_file_accessed() */
1178
1179static rwf_t shiftfs_iocb_to_rwf(struct kiocb *iocb)
1180{
1181 int ifl = iocb->ki_flags;
1182 rwf_t flags = 0;
1183
1184 if (ifl & IOCB_NOWAIT)
1185 flags |= RWF_NOWAIT;
1186 if (ifl & IOCB_HIPRI)
1187 flags |= RWF_HIPRI;
1188 if (ifl & IOCB_DSYNC)
1189 flags |= RWF_DSYNC;
1190 if (ifl & IOCB_SYNC)
1191 flags |= RWF_SYNC;
1192
1193 return flags;
1194}
1195
4811c23a
CB
1196static int shiftfs_real_fdget(const struct file *file, struct fd *lowerfd)
1197{
1198 struct file *realfile;
1199
1200 if (file->f_op->open != shiftfs_open &&
1201 file->f_op->open != shiftfs_dir_open)
1202 return -EINVAL;
1203
1204 realfile = file->private_data;
1205 lowerfd->flags = 0;
1206 lowerfd->file = realfile;
1207
1208 /* Did the flags change since open? */
1209 if (unlikely(file->f_flags & ~lowerfd->file->f_flags))
1210 return shiftfs_change_flags(lowerfd->file, file->f_flags);
1211
1212 return 0;
1213}
1214
bc97019b
CB
1215static ssize_t shiftfs_read_iter(struct kiocb *iocb, struct iov_iter *iter)
1216{
1217 struct file *file = iocb->ki_filp;
1218 struct fd lowerfd;
1219 const struct cred *oldcred;
1220 ssize_t ret;
1221
1222 if (!iov_iter_count(iter))
1223 return 0;
1224
1225 ret = shiftfs_real_fdget(file, &lowerfd);
1226 if (ret)
1227 return ret;
1228
1229 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1230 ret = vfs_iter_read(lowerfd.file, iter, &iocb->ki_pos,
1231 shiftfs_iocb_to_rwf(iocb));
1232 revert_creds(oldcred);
1233
1234 shiftfs_file_accessed(file);
1235
1236 fdput(lowerfd);
1237 return ret;
1238}
1239
1240static ssize_t shiftfs_write_iter(struct kiocb *iocb, struct iov_iter *iter)
1241{
1242 struct file *file = iocb->ki_filp;
1243 struct inode *inode = file_inode(file);
1244 struct fd lowerfd;
1245 const struct cred *oldcred;
1246 ssize_t ret;
1247
1248 if (!iov_iter_count(iter))
1249 return 0;
1250
1251 inode_lock(inode);
1252 /* Update mode */
1253 shiftfs_copyattr(inode->i_private, inode);
1254 ret = file_remove_privs(file);
1255 if (ret)
1256 goto out_unlock;
1257
1258 ret = shiftfs_real_fdget(file, &lowerfd);
1259 if (ret)
1260 goto out_unlock;
1261
1262 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1263 file_start_write(lowerfd.file);
1264 ret = vfs_iter_write(lowerfd.file, iter, &iocb->ki_pos,
1265 shiftfs_iocb_to_rwf(iocb));
1266 file_end_write(lowerfd.file);
1267 revert_creds(oldcred);
1268
1269 /* Update size */
1270 shiftfs_copyattr(inode->i_private, inode);
1271
1272 fdput(lowerfd);
1273
1274out_unlock:
1275 inode_unlock(inode);
1276 return ret;
1277}
1278
1279static int shiftfs_fsync(struct file *file, loff_t start, loff_t end,
1280 int datasync)
1281{
1282 struct fd lowerfd;
1283 const struct cred *oldcred;
1284 int ret;
1285
1286 ret = shiftfs_real_fdget(file, &lowerfd);
1287 if (ret)
1288 return ret;
1289
1290 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1291 ret = vfs_fsync_range(lowerfd.file, start, end, datasync);
1292 revert_creds(oldcred);
1293
1294 fdput(lowerfd);
1295 return ret;
1296}
1297
1298static int shiftfs_mmap(struct file *file, struct vm_area_struct *vma)
1299{
e4d81d10 1300 struct file *realfile = file->private_data;
bc97019b
CB
1301 const struct cred *oldcred;
1302 int ret;
1303
1304 if (!realfile->f_op->mmap)
1305 return -ENODEV;
1306
1307 if (WARN_ON(file != vma->vm_file))
1308 return -EIO;
1309
1310 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1311 vma->vm_file = get_file(realfile);
1312 ret = call_mmap(vma->vm_file, vma);
1313 revert_creds(oldcred);
1314
1315 shiftfs_file_accessed(file);
1316
1bce1c2d
SF
1317 if (ret) {
1318 /*
1319 * Drop refcount from new vm_file value and restore original
1320 * vm_file value
1321 */
1322 vma->vm_file = file;
1323 fput(realfile);
1324 } else {
1325 /* Drop refcount from previous vm_file value */
1326 fput(file);
1327 }
bc97019b
CB
1328
1329 return ret;
1330}
1331
1332static long shiftfs_fallocate(struct file *file, int mode, loff_t offset,
1333 loff_t len)
1334{
1335 struct inode *inode = file_inode(file);
1336 struct inode *loweri = inode->i_private;
1337 struct fd lowerfd;
1338 const struct cred *oldcred;
1339 int ret;
1340
1341 ret = shiftfs_real_fdget(file, &lowerfd);
1342 if (ret)
1343 return ret;
1344
1345 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1346 ret = vfs_fallocate(lowerfd.file, mode, offset, len);
1347 revert_creds(oldcred);
1348
1349 /* Update size */
1350 shiftfs_copyattr(loweri, inode);
1351
1352 fdput(lowerfd);
1353 return ret;
1354}
1355
1356static int shiftfs_fadvise(struct file *file, loff_t offset, loff_t len,
1357 int advice)
1358{
1359 struct fd lowerfd;
1360 const struct cred *oldcred;
1361 int ret;
1362
1363 ret = shiftfs_real_fdget(file, &lowerfd);
1364 if (ret)
1365 return ret;
1366
1367 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1368 ret = vfs_fadvise(lowerfd.file, offset, len, advice);
1369 revert_creds(oldcred);
1370
1371 fdput(lowerfd);
1372 return ret;
1373}
1374
f7237743 1375static int shiftfs_override_ioctl_creds(int cmd, const struct super_block *sb,
bc97019b
CB
1376 const struct cred **oldcred,
1377 struct cred **newcred)
1378{
ff3521da 1379 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
bc97019b
CB
1380 kuid_t fsuid = current_fsuid();
1381 kgid_t fsgid = current_fsgid();
1382
1383 *oldcred = shiftfs_override_creds(sb);
1384
1385 *newcred = prepare_creds();
1386 if (!*newcred) {
1387 revert_creds(*oldcred);
1388 return -ENOMEM;
1389 }
1390
ff3521da
SF
1391 (*newcred)->fsuid = shift_kuid(sb->s_user_ns, sbinfo->userns, fsuid);
1392 (*newcred)->fsgid = shift_kgid(sb->s_user_ns, sbinfo->userns, fsgid);
bc97019b
CB
1393
1394 /* clear all caps to prevent bypassing capable() checks */
1395 cap_clear((*newcred)->cap_bset);
1396 cap_clear((*newcred)->cap_effective);
1397 cap_clear((*newcred)->cap_inheritable);
1398 cap_clear((*newcred)->cap_permitted);
1399
f7237743
CB
1400 if (cmd == BTRFS_IOC_SNAP_DESTROY) {
1401 kuid_t kuid_root = make_kuid(sb->s_user_ns, 0);
1402 /*
1403 * Allow the root user in the container to remove subvolumes
1404 * from other users.
1405 */
1406 if (uid_valid(kuid_root) && uid_eq(fsuid, kuid_root))
1407 cap_raise((*newcred)->cap_effective, CAP_DAC_OVERRIDE);
1408 }
1409
bc97019b
CB
1410 put_cred(override_creds(*newcred));
1411 return 0;
1412}
1413
1414static inline void shiftfs_revert_ioctl_creds(const struct cred *oldcred,
1415 struct cred *newcred)
1416{
1417 return shiftfs_revert_object_creds(oldcred, newcred);
1418}
1419
1f1d0142
CB
1420static inline bool is_btrfs_snap_ioctl(int cmd)
1421{
1422 if ((cmd == BTRFS_IOC_SNAP_CREATE) || (cmd == BTRFS_IOC_SNAP_CREATE_V2))
1423 return true;
1424
1425 return false;
1426}
1427
62af3299 1428static int shiftfs_btrfs_ioctl_fd_restore(int cmd, int fd, void __user *arg,
1f1d0142
CB
1429 struct btrfs_ioctl_vol_args *v1,
1430 struct btrfs_ioctl_vol_args_v2 *v2)
1431{
1432 int ret;
1433
1434 if (!is_btrfs_snap_ioctl(cmd))
1435 return 0;
1436
1437 if (cmd == BTRFS_IOC_SNAP_CREATE)
1438 ret = copy_to_user(arg, v1, sizeof(*v1));
1439 else
1440 ret = copy_to_user(arg, v2, sizeof(*v2));
1441
5319c5b3 1442 close_fd(fd);
1f1d0142
CB
1443 kfree(v1);
1444 kfree(v2);
1445
63fbf814 1446 return ret ? -EFAULT: 0;
1f1d0142
CB
1447}
1448
1449static int shiftfs_btrfs_ioctl_fd_replace(int cmd, void __user *arg,
1450 struct btrfs_ioctl_vol_args **b1,
1451 struct btrfs_ioctl_vol_args_v2 **b2,
1f1d0142
CB
1452 int *newfd)
1453{
1454 int oldfd, ret;
1455 struct fd src;
62af3299 1456 struct fd lfd = {};
1f1d0142
CB
1457 struct btrfs_ioctl_vol_args *v1 = NULL;
1458 struct btrfs_ioctl_vol_args_v2 *v2 = NULL;
1459
c2c8cffe
SF
1460 *b1 = NULL;
1461 *b2 = NULL;
1462
1f1d0142
CB
1463 if (!is_btrfs_snap_ioctl(cmd))
1464 return 0;
1465
1466 if (cmd == BTRFS_IOC_SNAP_CREATE) {
1467 v1 = memdup_user(arg, sizeof(*v1));
1468 if (IS_ERR(v1))
1469 return PTR_ERR(v1);
1470 oldfd = v1->fd;
1f1d0142
CB
1471 } else {
1472 v2 = memdup_user(arg, sizeof(*v2));
1473 if (IS_ERR(v2))
1474 return PTR_ERR(v2);
1475 oldfd = v2->fd;
1f1d0142
CB
1476 }
1477
1478 src = fdget(oldfd);
c2c8cffe
SF
1479 if (!src.file) {
1480 ret = -EINVAL;
1481 goto err_free;
1482 }
1f1d0142 1483
62af3299
SF
1484 ret = shiftfs_real_fdget(src.file, &lfd);
1485 if (ret) {
1486 fdput(src);
c2c8cffe 1487 goto err_free;
62af3299
SF
1488 }
1489
1490 /*
1491 * shiftfs_real_fdget() does not take a reference to lfd.file, so
1492 * take a reference here to offset the one which will be put by
5319c5b3 1493 * close_fd(), and make sure that reference is put on fdput(lfd).
62af3299
SF
1494 */
1495 get_file(lfd.file);
1496 lfd.flags |= FDPUT_FPUT;
1497 fdput(src);
1f1d0142 1498
62af3299 1499 *newfd = get_unused_fd_flags(lfd.file->f_flags);
1f1d0142 1500 if (*newfd < 0) {
62af3299 1501 fdput(lfd);
c2c8cffe
SF
1502 ret = *newfd;
1503 goto err_free;
1f1d0142
CB
1504 }
1505
62af3299 1506 fd_install(*newfd, lfd.file);
1f1d0142
CB
1507
1508 if (cmd == BTRFS_IOC_SNAP_CREATE) {
1509 v1->fd = *newfd;
1510 ret = copy_to_user(arg, v1, sizeof(*v1));
1511 v1->fd = oldfd;
1512 } else {
1513 v2->fd = *newfd;
1514 ret = copy_to_user(arg, v2, sizeof(*v2));
1515 v2->fd = oldfd;
1516 }
1517
c2c8cffe
SF
1518 if (!ret) {
1519 *b1 = v1;
1520 *b2 = v2;
1521 } else {
62af3299 1522 shiftfs_btrfs_ioctl_fd_restore(cmd, *newfd, arg, v1, v2);
63fbf814 1523 ret = -EFAULT;
c2c8cffe
SF
1524 }
1525
1526 return ret;
1527
1528err_free:
1529 kfree(v1);
1530 kfree(v2);
1f1d0142
CB
1531
1532 return ret;
1533}
1534
bc97019b
CB
1535static long shiftfs_real_ioctl(struct file *file, unsigned int cmd,
1536 unsigned long arg)
1537{
bc97019b
CB
1538 struct fd lowerfd;
1539 struct cred *newcred;
1540 const struct cred *oldcred;
1f1d0142
CB
1541 int newfd = -EBADF;
1542 long err = 0, ret = 0;
1543 void __user *argp = (void __user *)arg;
bc97019b 1544 struct super_block *sb = file->f_path.dentry->d_sb;
1f1d0142
CB
1545 struct btrfs_ioctl_vol_args *btrfs_v1 = NULL;
1546 struct btrfs_ioctl_vol_args_v2 *btrfs_v2 = NULL;
1547
1548 ret = shiftfs_btrfs_ioctl_fd_replace(cmd, argp, &btrfs_v1, &btrfs_v2,
62af3299 1549 &newfd);
1f1d0142
CB
1550 if (ret < 0)
1551 return ret;
bc97019b
CB
1552
1553 ret = shiftfs_real_fdget(file, &lowerfd);
1554 if (ret)
1f1d0142 1555 goto out_restore;
bc97019b 1556
f7237743 1557 ret = shiftfs_override_ioctl_creds(cmd, sb, &oldcred, &newcred);
bc97019b
CB
1558 if (ret)
1559 goto out_fdput;
1560
1561 ret = vfs_ioctl(lowerfd.file, cmd, arg);
1562
1563 shiftfs_revert_ioctl_creds(oldcred, newcred);
1564
1565 shiftfs_copyattr(file_inode(lowerfd.file), file_inode(file));
1566 shiftfs_copyflags(file_inode(lowerfd.file), file_inode(file));
1567
1568out_fdput:
1569 fdput(lowerfd);
1570
1f1d0142 1571out_restore:
62af3299 1572 err = shiftfs_btrfs_ioctl_fd_restore(cmd, newfd, argp,
1f1d0142
CB
1573 btrfs_v1, btrfs_v2);
1574 if (!ret)
1575 ret = err;
1576
bc97019b
CB
1577 return ret;
1578}
1579
5752ee16 1580static bool in_ioctl_whitelist(int flag, unsigned long arg)
1f1d0142 1581{
5752ee16
CB
1582 void __user *argp = (void __user *)arg;
1583 u64 flags = 0;
1584
1f1d0142 1585 switch (flag) {
5752ee16
CB
1586 case BTRFS_IOC_FS_INFO:
1587 return true;
1f1d0142
CB
1588 case BTRFS_IOC_SNAP_CREATE:
1589 return true;
1590 case BTRFS_IOC_SNAP_CREATE_V2:
1591 return true;
1592 case BTRFS_IOC_SUBVOL_CREATE:
1593 return true;
1594 case BTRFS_IOC_SUBVOL_CREATE_V2:
5752ee16
CB
1595 return true;
1596 case BTRFS_IOC_SUBVOL_GETFLAGS:
1597 return true;
1598 case BTRFS_IOC_SUBVOL_SETFLAGS:
6ce3e462 1599 if (copy_from_user(&flags, argp, sizeof(flags)))
5752ee16
CB
1600 return false;
1601
1602 if (flags & ~BTRFS_SUBVOL_RDONLY)
1603 return false;
1604
1f1d0142
CB
1605 return true;
1606 case BTRFS_IOC_SNAP_DESTROY:
1607 return true;
1608 }
1609
1610 return false;
1611}
1612
bc97019b
CB
1613static long shiftfs_ioctl(struct file *file, unsigned int cmd,
1614 unsigned long arg)
1615{
1616 switch (cmd) {
1617 case FS_IOC_GETVERSION:
1618 /* fall through */
1619 case FS_IOC_GETFLAGS:
1620 /* fall through */
1621 case FS_IOC_SETFLAGS:
1622 break;
1623 default:
5752ee16 1624 if (!in_ioctl_whitelist(cmd, arg) ||
1f1d0142
CB
1625 !shiftfs_passthrough_ioctls(file->f_path.dentry->d_sb->s_fs_info))
1626 return -ENOTTY;
bc97019b
CB
1627 }
1628
1629 return shiftfs_real_ioctl(file, cmd, arg);
1630}
1631
1632static long shiftfs_compat_ioctl(struct file *file, unsigned int cmd,
1633 unsigned long arg)
1634{
1635 switch (cmd) {
1636 case FS_IOC32_GETVERSION:
1637 /* fall through */
1638 case FS_IOC32_GETFLAGS:
1639 /* fall through */
1640 case FS_IOC32_SETFLAGS:
1641 break;
1642 default:
5752ee16 1643 if (!in_ioctl_whitelist(cmd, arg) ||
1f1d0142
CB
1644 !shiftfs_passthrough_ioctls(file->f_path.dentry->d_sb->s_fs_info))
1645 return -ENOIOCTLCMD;
bc97019b
CB
1646 }
1647
1648 return shiftfs_real_ioctl(file, cmd, arg);
1649}
1650
1651enum shiftfs_copyop {
1652 SHIFTFS_COPY,
1653 SHIFTFS_CLONE,
1654 SHIFTFS_DEDUPE,
1655};
1656
1657static ssize_t shiftfs_copyfile(struct file *file_in, loff_t pos_in,
1658 struct file *file_out, loff_t pos_out, u64 len,
1659 unsigned int flags, enum shiftfs_copyop op)
1660{
1661 ssize_t ret;
1662 struct fd real_in, real_out;
1663 const struct cred *oldcred;
1664 struct inode *inode_out = file_inode(file_out);
1665 struct inode *loweri = inode_out->i_private;
1666
1667 ret = shiftfs_real_fdget(file_out, &real_out);
1668 if (ret)
1669 return ret;
1670
1671 ret = shiftfs_real_fdget(file_in, &real_in);
1672 if (ret) {
1673 fdput(real_out);
1674 return ret;
1675 }
1676
1677 oldcred = shiftfs_override_creds(inode_out->i_sb);
1678 switch (op) {
1679 case SHIFTFS_COPY:
1680 ret = vfs_copy_file_range(real_in.file, pos_in, real_out.file,
1681 pos_out, len, flags);
1682 break;
1683
1684 case SHIFTFS_CLONE:
1685 ret = vfs_clone_file_range(real_in.file, pos_in, real_out.file,
1686 pos_out, len, flags);
1687 break;
1688
1689 case SHIFTFS_DEDUPE:
1690 ret = vfs_dedupe_file_range_one(real_in.file, pos_in,
1691 real_out.file, pos_out, len,
1692 flags);
1693 break;
1694 }
1695 revert_creds(oldcred);
1696
1697 /* Update size */
1698 shiftfs_copyattr(loweri, inode_out);
1699
1700 fdput(real_in);
1701 fdput(real_out);
1702
1703 return ret;
1704}
1705
1706static ssize_t shiftfs_copy_file_range(struct file *file_in, loff_t pos_in,
1707 struct file *file_out, loff_t pos_out,
1708 size_t len, unsigned int flags)
1709{
1710 return shiftfs_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
1711 SHIFTFS_COPY);
1712}
1713
1714static loff_t shiftfs_remap_file_range(struct file *file_in, loff_t pos_in,
1715 struct file *file_out, loff_t pos_out,
1716 loff_t len, unsigned int remap_flags)
1717{
1718 enum shiftfs_copyop op;
1719
1720 if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
1721 return -EINVAL;
1722
1723 if (remap_flags & REMAP_FILE_DEDUP)
1724 op = SHIFTFS_DEDUPE;
1725 else
1726 op = SHIFTFS_CLONE;
1727
1728 return shiftfs_copyfile(file_in, pos_in, file_out, pos_out, len,
1729 remap_flags, op);
1730}
1731
1732static int shiftfs_iterate_shared(struct file *file, struct dir_context *ctx)
1733{
1734 const struct cred *oldcred;
1735 int err = -ENOTDIR;
e4d81d10 1736 struct file *realfile = file->private_data;
bc97019b
CB
1737
1738 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1739 err = iterate_dir(realfile, ctx);
1740 revert_creds(oldcred);
1741
1742 return err;
1743}
1744
1745const struct file_operations shiftfs_file_operations = {
1746 .open = shiftfs_open,
1747 .release = shiftfs_release,
b4b8af8e 1748 .llseek = shiftfs_file_llseek,
bc97019b
CB
1749 .read_iter = shiftfs_read_iter,
1750 .write_iter = shiftfs_write_iter,
1751 .fsync = shiftfs_fsync,
1752 .mmap = shiftfs_mmap,
1753 .fallocate = shiftfs_fallocate,
1754 .fadvise = shiftfs_fadvise,
1755 .unlocked_ioctl = shiftfs_ioctl,
1756 .compat_ioctl = shiftfs_compat_ioctl,
1757 .copy_file_range = shiftfs_copy_file_range,
1758 .remap_file_range = shiftfs_remap_file_range,
c01adb61
CB
1759 .splice_read = generic_file_splice_read,
1760 .splice_write = iter_file_splice_write,
bc97019b
CB
1761};
1762
1763const struct file_operations shiftfs_dir_operations = {
e4d81d10
CB
1764 .open = shiftfs_dir_open,
1765 .release = shiftfs_dir_release,
bc97019b
CB
1766 .compat_ioctl = shiftfs_compat_ioctl,
1767 .fsync = shiftfs_fsync,
1768 .iterate_shared = shiftfs_iterate_shared,
b4b8af8e 1769 .llseek = shiftfs_dir_llseek,
bc97019b 1770 .read = generic_read_dir,
bc97019b
CB
1771 .unlocked_ioctl = shiftfs_ioctl,
1772};
1773
1774static const struct address_space_operations shiftfs_aops = {
1775 /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */
1776 .direct_IO = noop_direct_IO,
1777};
1778
1779static void shiftfs_fill_inode(struct inode *inode, unsigned long ino,
1780 umode_t mode, dev_t dev, struct dentry *dentry)
1781{
1782 struct inode *loweri;
1783
1784 inode->i_ino = ino;
1785 inode->i_flags |= S_NOCMTIME;
1786
1787 mode &= S_IFMT;
1788 inode->i_mode = mode;
1789 switch (mode & S_IFMT) {
1790 case S_IFDIR:
1791 inode->i_op = &shiftfs_dir_inode_operations;
1792 inode->i_fop = &shiftfs_dir_operations;
1793 break;
1794 case S_IFLNK:
1795 inode->i_op = &shiftfs_symlink_inode_operations;
1796 break;
1797 case S_IFREG:
1798 inode->i_op = &shiftfs_file_inode_operations;
1799 inode->i_fop = &shiftfs_file_operations;
1800 inode->i_mapping->a_ops = &shiftfs_aops;
1801 break;
1802 default:
1803 inode->i_op = &shiftfs_special_inode_operations;
1804 init_special_inode(inode, mode, dev);
1805 break;
1806 }
1807
1808 if (!dentry)
1809 return;
1810
1811 loweri = dentry->d_inode;
1812 if (!loweri->i_op->get_link)
1813 inode->i_opflags |= IOP_NOFOLLOW;
1814
1815 shiftfs_copyattr(loweri, inode);
1816 shiftfs_copyflags(loweri, inode);
1817 set_nlink(inode, loweri->i_nlink);
1818}
1819
1820static int shiftfs_show_options(struct seq_file *m, struct dentry *dentry)
1821{
1822 struct super_block *sb = dentry->d_sb;
1823 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
1824
1825 if (sbinfo->mark)
1826 seq_show_option(m, "mark", NULL);
1827
1828 if (sbinfo->passthrough)
1829 seq_printf(m, ",passthrough=%u", sbinfo->passthrough);
1830
1831 return 0;
1832}
1833
1834static int shiftfs_statfs(struct dentry *dentry, struct kstatfs *buf)
1835{
1836 struct super_block *sb = dentry->d_sb;
1837 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
1838 struct dentry *root = sb->s_root;
1839 struct dentry *realroot = root->d_fsdata;
1840 struct path realpath = { .mnt = sbinfo->mnt, .dentry = realroot };
1841 int err;
1842
1843 err = vfs_statfs(&realpath, buf);
065efa1d 1844 if (err)
bc97019b 1845 return err;
065efa1d 1846
bc97019b
CB
1847 if (!shiftfs_passthrough_statfs(sbinfo))
1848 buf->f_type = sb->s_magic;
065efa1d 1849
bc97019b
CB
1850 return 0;
1851}
065efa1d 1852
bc97019b
CB
1853static void shiftfs_evict_inode(struct inode *inode)
1854{
1855 struct inode *loweri = inode->i_private;
1856
1857 clear_inode(inode);
1858
1859 if (loweri)
1860 iput(loweri);
1861}
1862
1863static void shiftfs_put_super(struct super_block *sb)
1864{
1865 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
1866
1867 if (sbinfo) {
1868 mntput(sbinfo->mnt);
1869 put_cred(sbinfo->creator_cred);
1870 kfree(sbinfo);
1871 }
1872}
1873
1874static const struct xattr_handler shiftfs_xattr_handler = {
1875 .prefix = "",
1876 .get = shiftfs_xattr_get,
1877 .set = shiftfs_xattr_set,
1878};
1879
1880const struct xattr_handler *shiftfs_xattr_handlers[] = {
1881#ifdef CONFIG_SHIFT_FS_POSIX_ACL
1882 &shiftfs_posix_acl_access_xattr_handler,
1883 &shiftfs_posix_acl_default_xattr_handler,
1884#endif
1885 &shiftfs_xattr_handler,
1886 NULL
1887};
1888
1889static inline bool passthrough_is_subset(int old_flags, int new_flags)
1890{
1891 if ((new_flags & old_flags) != new_flags)
1892 return false;
1893
1894 return true;
1895}
1896
855e06ea
CB
1897static int shiftfs_super_check_flags(unsigned long old_flags,
1898 unsigned long new_flags)
1899{
1900 if ((old_flags & SB_RDONLY) && !(new_flags & SB_RDONLY))
1901 return -EPERM;
1902
1903 if ((old_flags & SB_NOSUID) && !(new_flags & SB_NOSUID))
1904 return -EPERM;
1905
1906 if ((old_flags & SB_NODEV) && !(new_flags & SB_NODEV))
1907 return -EPERM;
1908
1909 if ((old_flags & SB_NOEXEC) && !(new_flags & SB_NOEXEC))
1910 return -EPERM;
1911
1912 if ((old_flags & SB_NOATIME) && !(new_flags & SB_NOATIME))
1913 return -EPERM;
1914
1915 if ((old_flags & SB_NODIRATIME) && !(new_flags & SB_NODIRATIME))
1916 return -EPERM;
1917
1918 if (!(old_flags & SB_POSIXACL) && (new_flags & SB_POSIXACL))
1919 return -EPERM;
1920
1921 return 0;
1922}
1923
bc97019b
CB
1924static int shiftfs_remount(struct super_block *sb, int *flags, char *data)
1925{
1926 int err;
1927 struct shiftfs_super_info new = {};
1928 struct shiftfs_super_info *info = sb->s_fs_info;
1929
1930 err = shiftfs_parse_mount_options(&new, data);
065efa1d 1931 if (err)
bc97019b
CB
1932 return err;
1933
855e06ea
CB
1934 err = shiftfs_super_check_flags(sb->s_flags, *flags);
1935 if (err)
1936 return err;
1937
bc97019b
CB
1938 /* Mark mount option cannot be changed. */
1939 if (info->mark || (info->mark != new.mark))
1940 return -EPERM;
1941
1942 if (info->passthrough != new.passthrough) {
1943 /* Don't allow exceeding passthrough options of mark mount. */
353409ee 1944 if (!passthrough_is_subset(info->passthrough_mark,
bc97019b
CB
1945 info->passthrough))
1946 return -EPERM;
1947
1948 info->passthrough = new.passthrough;
1949 }
1950
1951 return 0;
1952}
065efa1d 1953
bc97019b
CB
1954static const struct super_operations shiftfs_super_ops = {
1955 .put_super = shiftfs_put_super,
1956 .show_options = shiftfs_show_options,
1957 .statfs = shiftfs_statfs,
1958 .remount_fs = shiftfs_remount,
1959 .evict_inode = shiftfs_evict_inode,
1960};
1961
1962struct shiftfs_data {
1963 void *data;
1964 const char *path;
1965};
1966
855e06ea
CB
1967static void shiftfs_super_force_flags(struct super_block *sb,
1968 unsigned long lower_flags)
1969{
1970 sb->s_flags |= lower_flags & (SB_RDONLY | SB_NOSUID | SB_NODEV |
1971 SB_NOEXEC | SB_NOATIME | SB_NODIRATIME);
1972
1973 if (!(lower_flags & SB_POSIXACL))
1974 sb->s_flags &= ~SB_POSIXACL;
1975}
1976
bc97019b
CB
1977static int shiftfs_fill_super(struct super_block *sb, void *raw_data,
1978 int silent)
1979{
1980 int err;
1981 struct path path = {};
1982 struct shiftfs_super_info *sbinfo_mp;
1983 char *name = NULL;
1984 struct inode *inode = NULL;
1985 struct dentry *dentry = NULL;
1986 struct shiftfs_data *data = raw_data;
1987 struct shiftfs_super_info *sbinfo = NULL;
1988
1989 if (!data->path)
1990 return -EINVAL;
1991
1992 sb->s_fs_info = kzalloc(sizeof(*sbinfo), GFP_KERNEL);
1993 if (!sb->s_fs_info)
1994 return -ENOMEM;
1995 sbinfo = sb->s_fs_info;
1996
1997 err = shiftfs_parse_mount_options(sbinfo, data->data);
1998 if (err)
1999 return err;
2000
2001 /* to mount a mark, must be userns admin */
2002 if (!sbinfo->mark && !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
2003 return -EPERM;
2004
2005 name = kstrdup(data->path, GFP_KERNEL);
2006 if (!name)
2007 return -ENOMEM;
2008
2009 err = kern_path(name, LOOKUP_FOLLOW, &path);
2010 if (err)
2011 goto out_free_name;
065efa1d
JB
2012
2013 if (!S_ISDIR(path.dentry->d_inode->i_mode)) {
2014 err = -ENOTDIR;
bc97019b 2015 goto out_put_path;
065efa1d
JB
2016 }
2017
e3b65b02
AM
2018 /*
2019 * It makes no sense to handle idmapped layers from shiftfs.
2020 * And we didn't support it properly anyway.
2021 */
2022 if (is_idmapped_mnt(path.mnt)) {
2023 err = -EINVAL;
2024 pr_err("idmapped layers are currently not supported\n");
2025 goto out_put_path;
2026 }
2027
855e06ea
CB
2028 sb->s_flags |= SB_POSIXACL;
2029
bc97019b 2030 if (sbinfo->mark) {
92732237 2031 struct cred *cred_tmp;
bc97019b
CB
2032 struct super_block *lower_sb = path.mnt->mnt_sb;
2033
2034 /* to mark a mount point, must root wrt lower s_user_ns */
2035 if (!ns_capable(lower_sb->s_user_ns, CAP_SYS_ADMIN)) {
2036 err = -EPERM;
2037 goto out_put_path;
2038 }
065efa1d 2039
065efa1d
JB
2040 /*
2041 * this part is visible unshifted, so make sure no
2042 * executables that could be used to give suid
2043 * privileges
2044 */
2045 sb->s_iflags = SB_I_NOEXEC;
065efa1d 2046
855e06ea
CB
2047 shiftfs_super_force_flags(sb, lower_sb->s_flags);
2048
065efa1d 2049 /*
bc97019b
CB
2050 * Handle nesting of shiftfs mounts by referring this mark
2051 * mount back to the original mark mount. This is more
2052 * efficient and alleviates concerns about stack depth.
065efa1d 2053 */
bc97019b
CB
2054 if (lower_sb->s_magic == SHIFTFS_MAGIC) {
2055 sbinfo_mp = lower_sb->s_fs_info;
2056
2057 /* Doesn't make sense to mark a mark mount */
2058 if (sbinfo_mp->mark) {
2059 err = -EINVAL;
2060 goto out_put_path;
2061 }
2062
2063 if (!passthrough_is_subset(sbinfo_mp->passthrough,
2064 sbinfo->passthrough)) {
2065 err = -EPERM;
2066 goto out_put_path;
2067 }
2068
2069 sbinfo->mnt = mntget(sbinfo_mp->mnt);
2070 dentry = dget(path.dentry->d_fsdata);
353409ee
CB
2071 /*
2072 * Copy up the passthrough mount options from the
2073 * parent mark mountpoint.
2074 */
2075 sbinfo->passthrough_mark = sbinfo_mp->passthrough_mark;
e2c2d543 2076 sbinfo->creator_cred = get_cred(sbinfo_mp->creator_cred);
bc97019b
CB
2077 } else {
2078 sbinfo->mnt = mntget(path.mnt);
2079 dentry = dget(path.dentry);
353409ee
CB
2080 /*
2081 * For a new mark passthrough_mark and passthrough
2082 * are identical.
2083 */
2084 sbinfo->passthrough_mark = sbinfo->passthrough;
bc97019b 2085
e2c2d543
CB
2086 cred_tmp = prepare_creds();
2087 if (!cred_tmp) {
2088 err = -ENOMEM;
2089 goto out_put_path;
2090 }
2091 /* Don't override disk quota limits or use reserved space. */
2092 cap_lower(cred_tmp->cap_effective, CAP_SYS_RESOURCE);
2093 sbinfo->creator_cred = cred_tmp;
bc97019b
CB
2094 }
2095 } else {
2096 /*
2097 * This leg executes if we're admin capable in the namespace,
2098 * so be very careful.
2099 */
2100 err = -EPERM;
065efa1d 2101 if (path.dentry->d_sb->s_magic != SHIFTFS_MAGIC)
bc97019b
CB
2102 goto out_put_path;
2103
2104 sbinfo_mp = path.dentry->d_sb->s_fs_info;
2105 if (!sbinfo_mp->mark)
2106 goto out_put_path;
2107
2108 if (!passthrough_is_subset(sbinfo_mp->passthrough,
2109 sbinfo->passthrough))
2110 goto out_put_path;
2111
2112 sbinfo->mnt = mntget(sbinfo_mp->mnt);
2113 sbinfo->creator_cred = get_cred(sbinfo_mp->creator_cred);
065efa1d 2114 dentry = dget(path.dentry->d_fsdata);
353409ee
CB
2115 /*
2116 * Copy up passthrough settings from mark mountpoint so we can
2117 * verify when the overlay wants to remount with different
2118 * passthrough settings.
2119 */
2120 sbinfo->passthrough_mark = sbinfo_mp->passthrough;
855e06ea 2121 shiftfs_super_force_flags(sb, path.mnt->mnt_sb->s_flags);
bc97019b
CB
2122 }
2123
2124 sb->s_stack_depth = dentry->d_sb->s_stack_depth + 1;
2125 if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
2126 printk(KERN_ERR "shiftfs: maximum stacking depth exceeded\n");
2127 err = -EINVAL;
2128 goto out_put_path;
2129 }
2130
2131 inode = new_inode(sb);
2132 if (!inode) {
2133 err = -ENOMEM;
2134 goto out_put_path;
065efa1d 2135 }
bc97019b
CB
2136 shiftfs_fill_inode(inode, dentry->d_inode->i_ino, S_IFDIR, 0, dentry);
2137
2138 ihold(dentry->d_inode);
2139 inode->i_private = dentry->d_inode;
2140
065efa1d 2141 sb->s_magic = SHIFTFS_MAGIC;
875846af 2142 sb->s_maxbytes = MAX_LFS_FILESIZE;
065efa1d
JB
2143 sb->s_op = &shiftfs_super_ops;
2144 sb->s_xattr = shiftfs_xattr_handlers;
2145 sb->s_d_op = &shiftfs_dentry_ops;
bc97019b
CB
2146 sb->s_root = d_make_root(inode);
2147 if (!sb->s_root) {
2148 err = -ENOMEM;
2149 goto out_put_path;
2150 }
2151
065efa1d 2152 sb->s_root->d_fsdata = dentry;
bc97019b
CB
2153 sbinfo->userns = get_user_ns(dentry->d_sb->s_user_ns);
2154 shiftfs_copyattr(dentry->d_inode, sb->s_root->d_inode);
065efa1d 2155
bc97019b
CB
2156 dentry = NULL;
2157 err = 0;
065efa1d 2158
bc97019b 2159out_put_path:
065efa1d 2160 path_put(&path);
bc97019b
CB
2161
2162out_free_name:
065efa1d 2163 kfree(name);
bc97019b
CB
2164
2165 dput(dentry);
2166
065efa1d
JB
2167 return err;
2168}
2169
2170static struct dentry *shiftfs_mount(struct file_system_type *fs_type,
2171 int flags, const char *dev_name, void *data)
2172{
2173 struct shiftfs_data d = { data, dev_name };
2174
2175 return mount_nodev(fs_type, flags, &d, shiftfs_fill_super);
2176}
2177
2178static struct file_system_type shiftfs_type = {
2179 .owner = THIS_MODULE,
2180 .name = "shiftfs",
2181 .mount = shiftfs_mount,
2182 .kill_sb = kill_anon_super,
2183 .fs_flags = FS_USERNS_MOUNT,
2184};
2185
2186static int __init shiftfs_init(void)
2187{
2188 return register_filesystem(&shiftfs_type);
2189}
2190
2191static void __exit shiftfs_exit(void)
2192{
2193 unregister_filesystem(&shiftfs_type);
2194}
2195
2196MODULE_ALIAS_FS("shiftfs");
2197MODULE_AUTHOR("James Bottomley");
bc97019b
CB
2198MODULE_AUTHOR("Seth Forshee <seth.forshee@canonical.com>");
2199MODULE_AUTHOR("Christian Brauner <christian.brauner@ubuntu.com>");
2200MODULE_DESCRIPTION("id shifting filesystem");
065efa1d
JB
2201MODULE_LICENSE("GPL v2");
2202module_init(shiftfs_init)
2203module_exit(shiftfs_exit)