]> git.proxmox.com Git - mirror_ubuntu-focal-kernel.git/blame - fs/shiftfs.c
i2c: octeon: check correct size of maximum RECV_LEN packet
[mirror_ubuntu-focal-kernel.git] / fs / shiftfs.c
CommitLineData
f7dfaa67 1#include <linux/btrfs.h>
2b77b5c4 2#include <linux/capability.h>
8ef17b62
JB
3#include <linux/cred.h>
4#include <linux/mount.h>
f7dfaa67 5#include <linux/fdtable.h>
8ef17b62
JB
6#include <linux/file.h>
7#include <linux/fs.h>
8#include <linux/namei.h>
9#include <linux/module.h>
10#include <linux/kernel.h>
11#include <linux/magic.h>
12#include <linux/parser.h>
2b77b5c4 13#include <linux/security.h>
8ef17b62
JB
14#include <linux/seq_file.h>
15#include <linux/statfs.h>
16#include <linux/slab.h>
17#include <linux/user_namespace.h>
18#include <linux/uidgid.h>
19#include <linux/xattr.h>
2b77b5c4
CB
20#include <linux/posix_acl.h>
21#include <linux/posix_acl_xattr.h>
22#include <linux/uio.h>
8ef17b62
JB
23
24struct shiftfs_super_info {
25 struct vfsmount *mnt;
26 struct user_namespace *userns;
2b77b5c4
CB
27 /* creds of process who created the super block */
28 const struct cred *creator_cred;
8ef17b62 29 bool mark;
2b77b5c4 30 unsigned int passthrough;
b4c9cb0c 31 unsigned int passthrough_mark;
8ef17b62
JB
32};
33
2b77b5c4
CB
34static void shiftfs_fill_inode(struct inode *inode, unsigned long ino,
35 umode_t mode, dev_t dev, struct dentry *dentry);
36
37#define SHIFTFS_PASSTHROUGH_NONE 0
38#define SHIFTFS_PASSTHROUGH_STAT 1
f7dfaa67
CB
39#define SHIFTFS_PASSTHROUGH_IOCTL 2
40#define SHIFTFS_PASSTHROUGH_ALL \
41 (SHIFTFS_PASSTHROUGH_STAT | SHIFTFS_PASSTHROUGH_IOCTL)
42
43static inline bool shiftfs_passthrough_ioctls(struct shiftfs_super_info *info)
44{
45 if (!(info->passthrough & SHIFTFS_PASSTHROUGH_IOCTL))
46 return false;
47
f7dfaa67
CB
48 return true;
49}
2b77b5c4
CB
50
51static inline bool shiftfs_passthrough_statfs(struct shiftfs_super_info *info)
52{
53 if (!(info->passthrough & SHIFTFS_PASSTHROUGH_STAT))
54 return false;
55
2b77b5c4
CB
56 return true;
57}
8ef17b62
JB
58
59enum {
60 OPT_MARK,
2b77b5c4 61 OPT_PASSTHROUGH,
8ef17b62
JB
62 OPT_LAST,
63};
64
65/* global filesystem options */
66static const match_table_t tokens = {
67 { OPT_MARK, "mark" },
2b77b5c4 68 { OPT_PASSTHROUGH, "passthrough=%u" },
8ef17b62
JB
69 { OPT_LAST, NULL }
70};
71
2b77b5c4 72static const struct cred *shiftfs_override_creds(const struct super_block *sb)
8ef17b62 73{
2b77b5c4 74 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
8ef17b62 75
2b77b5c4
CB
76 return override_creds(sbinfo->creator_cred);
77}
78
79static inline void shiftfs_revert_object_creds(const struct cred *oldcred,
80 struct cred *newcred)
81{
82 revert_creds(oldcred);
83 put_cred(newcred);
84}
85
b674a8b8
SF
86static kuid_t shift_kuid(struct user_namespace *from, struct user_namespace *to,
87 kuid_t kuid)
88{
89 uid_t uid = from_kuid(from, kuid);
90 return make_kuid(to, uid);
91}
92
93static kgid_t shift_kgid(struct user_namespace *from, struct user_namespace *to,
94 kgid_t kgid)
95{
96 gid_t gid = from_kgid(from, kgid);
97 return make_kgid(to, gid);
98}
99
2b77b5c4
CB
100static int shiftfs_override_object_creds(const struct super_block *sb,
101 const struct cred **oldcred,
102 struct cred **newcred,
103 struct dentry *dentry, umode_t mode,
104 bool hardlink)
105{
b674a8b8 106 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
2b77b5c4
CB
107 kuid_t fsuid = current_fsuid();
108 kgid_t fsgid = current_fsgid();
109
110 *oldcred = shiftfs_override_creds(sb);
111
112 *newcred = prepare_creds();
113 if (!*newcred) {
114 revert_creds(*oldcred);
115 return -ENOMEM;
116 }
117
b674a8b8
SF
118 (*newcred)->fsuid = shift_kuid(sb->s_user_ns, sbinfo->userns, fsuid);
119 (*newcred)->fsgid = shift_kgid(sb->s_user_ns, sbinfo->userns, fsgid);
2b77b5c4
CB
120
121 if (!hardlink) {
122 int err = security_dentry_create_files_as(dentry, mode,
123 &dentry->d_name,
124 *oldcred, *newcred);
125 if (err) {
126 shiftfs_revert_object_creds(*oldcred, *newcred);
127 return err;
128 }
129 }
8ef17b62 130
2b77b5c4
CB
131 put_cred(override_creds(*newcred));
132 return 0;
133}
8ef17b62 134
2b77b5c4
CB
135static void shiftfs_copyattr(struct inode *from, struct inode *to)
136{
137 struct user_namespace *from_ns = from->i_sb->s_user_ns;
138 struct user_namespace *to_ns = to->i_sb->s_user_ns;
139
140 to->i_uid = shift_kuid(from_ns, to_ns, from->i_uid);
141 to->i_gid = shift_kgid(from_ns, to_ns, from->i_gid);
142 to->i_mode = from->i_mode;
143 to->i_atime = from->i_atime;
144 to->i_mtime = from->i_mtime;
145 to->i_ctime = from->i_ctime;
146 i_size_write(to, i_size_read(from));
147}
8ef17b62 148
2b77b5c4
CB
149static void shiftfs_copyflags(struct inode *from, struct inode *to)
150{
151 unsigned int mask = S_SYNC | S_IMMUTABLE | S_APPEND | S_NOATIME;
8ef17b62 152
2b77b5c4 153 inode_set_flags(to, from->i_flags & mask, mask);
8ef17b62
JB
154}
155
2b77b5c4 156static void shiftfs_file_accessed(struct file *file)
8ef17b62 157{
2b77b5c4
CB
158 struct inode *upperi, *loweri;
159
160 if (file->f_flags & O_NOATIME)
8ef17b62
JB
161 return;
162
2b77b5c4
CB
163 upperi = file_inode(file);
164 loweri = upperi->i_private;
165
166 if (!loweri)
167 return;
168
169 upperi->i_mtime = loweri->i_mtime;
170 upperi->i_ctime = loweri->i_ctime;
171
172 touch_atime(&file->f_path);
8ef17b62
JB
173}
174
2b77b5c4
CB
175static int shiftfs_parse_mount_options(struct shiftfs_super_info *sbinfo,
176 char *options)
8ef17b62
JB
177{
178 char *p;
179 substring_t args[MAX_OPT_ARGS];
180
2b77b5c4
CB
181 sbinfo->mark = false;
182 sbinfo->passthrough = 0;
8ef17b62
JB
183
184 while ((p = strsep(&options, ",")) != NULL) {
2b77b5c4 185 int err, intarg, token;
8ef17b62
JB
186
187 if (!*p)
188 continue;
189
190 token = match_token(p, tokens, args);
191 switch (token) {
192 case OPT_MARK:
2b77b5c4
CB
193 sbinfo->mark = true;
194 break;
195 case OPT_PASSTHROUGH:
196 err = match_int(&args[0], &intarg);
197 if (err)
198 return err;
199
200 if (intarg & ~SHIFTFS_PASSTHROUGH_ALL)
201 return -EINVAL;
202
203 sbinfo->passthrough = intarg;
8ef17b62
JB
204 break;
205 default:
206 return -EINVAL;
207 }
208 }
2b77b5c4 209
8ef17b62
JB
210 return 0;
211}
212
213static void shiftfs_d_release(struct dentry *dentry)
214{
2b77b5c4 215 struct dentry *lowerd = dentry->d_fsdata;
8ef17b62 216
2b77b5c4
CB
217 if (lowerd)
218 dput(lowerd);
8ef17b62
JB
219}
220
221static struct dentry *shiftfs_d_real(struct dentry *dentry,
222 const struct inode *inode)
223{
2b77b5c4
CB
224 struct dentry *lowerd = dentry->d_fsdata;
225
226 if (inode && d_inode(dentry) == inode)
227 return dentry;
8ef17b62 228
2b77b5c4
CB
229 lowerd = d_real(lowerd, inode);
230 if (lowerd && (!inode || inode == d_inode(lowerd)))
231 return lowerd;
8ef17b62 232
2b77b5c4
CB
233 WARN(1, "shiftfs_d_real(%pd4, %s:%lu): real dentry not found\n", dentry,
234 inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0);
235 return dentry;
8ef17b62
JB
236}
237
238static int shiftfs_d_weak_revalidate(struct dentry *dentry, unsigned int flags)
239{
2b77b5c4
CB
240 int err = 1;
241 struct dentry *lowerd = dentry->d_fsdata;
8ef17b62 242
5b86d13e
CB
243 if (d_is_negative(lowerd) != d_is_negative(dentry))
244 return 0;
245
246 if ((lowerd->d_flags & DCACHE_OP_WEAK_REVALIDATE))
2b77b5c4 247 err = lowerd->d_op->d_weak_revalidate(lowerd, flags);
8ef17b62 248
2b77b5c4
CB
249 if (d_really_is_positive(dentry)) {
250 struct inode *inode = d_inode(dentry);
251 struct inode *loweri = d_inode(lowerd);
252
253 shiftfs_copyattr(loweri, inode);
2b77b5c4
CB
254 }
255
256 return err;
8ef17b62
JB
257}
258
259static int shiftfs_d_revalidate(struct dentry *dentry, unsigned int flags)
260{
2b77b5c4
CB
261 int err = 1;
262 struct dentry *lowerd = dentry->d_fsdata;
8ef17b62 263
5b86d13e
CB
264 if (d_unhashed(lowerd) ||
265 ((d_is_negative(lowerd) != d_is_negative(dentry))))
266 return 0;
267
2b77b5c4
CB
268 if (flags & LOOKUP_RCU)
269 return -ECHILD;
8ef17b62 270
5b86d13e 271 if ((lowerd->d_flags & DCACHE_OP_REVALIDATE))
2b77b5c4 272 err = lowerd->d_op->d_revalidate(lowerd, flags);
8ef17b62 273
2b77b5c4
CB
274 if (d_really_is_positive(dentry)) {
275 struct inode *inode = d_inode(dentry);
276 struct inode *loweri = d_inode(lowerd);
8ef17b62 277
2b77b5c4 278 shiftfs_copyattr(loweri, inode);
2b77b5c4 279 }
8ef17b62 280
2b77b5c4 281 return err;
8ef17b62
JB
282}
283
284static const struct dentry_operations shiftfs_dentry_ops = {
2b77b5c4
CB
285 .d_release = shiftfs_d_release,
286 .d_real = shiftfs_d_real,
287 .d_revalidate = shiftfs_d_revalidate,
8ef17b62
JB
288 .d_weak_revalidate = shiftfs_d_weak_revalidate,
289};
290
8ef17b62
JB
291static const char *shiftfs_get_link(struct dentry *dentry, struct inode *inode,
292 struct delayed_call *done)
293{
2b77b5c4
CB
294 const char *p;
295 const struct cred *oldcred;
296 struct dentry *lowerd;
8ef17b62 297
2b77b5c4
CB
298 /* RCU lookup not supported */
299 if (!dentry)
8ef17b62 300 return ERR_PTR(-ECHILD);
2b77b5c4
CB
301
302 lowerd = dentry->d_fsdata;
303 oldcred = shiftfs_override_creds(dentry->d_sb);
304 p = vfs_get_link(lowerd, done);
305 revert_creds(oldcred);
306
307 return p;
8ef17b62
JB
308}
309
310static int shiftfs_setxattr(struct dentry *dentry, struct inode *inode,
311 const char *name, const void *value,
312 size_t size, int flags)
313{
2b77b5c4
CB
314 struct dentry *lowerd = dentry->d_fsdata;
315 int err;
316 const struct cred *oldcred;
317
318 oldcred = shiftfs_override_creds(dentry->d_sb);
319 err = vfs_setxattr(lowerd, name, value, size, flags);
320 revert_creds(oldcred);
8ef17b62 321
2b77b5c4 322 shiftfs_copyattr(lowerd->d_inode, inode);
8ef17b62
JB
323
324 return err;
325}
326
327static int shiftfs_xattr_get(const struct xattr_handler *handler,
328 struct dentry *dentry, struct inode *inode,
329 const char *name, void *value, size_t size)
330{
2b77b5c4 331 struct dentry *lowerd = dentry->d_fsdata;
8ef17b62 332 int err;
2b77b5c4 333 const struct cred *oldcred;
8ef17b62 334
2b77b5c4
CB
335 oldcred = shiftfs_override_creds(dentry->d_sb);
336 err = vfs_getxattr(lowerd, name, value, size);
337 revert_creds(oldcred);
8ef17b62
JB
338
339 return err;
340}
341
342static ssize_t shiftfs_listxattr(struct dentry *dentry, char *list,
343 size_t size)
344{
2b77b5c4 345 struct dentry *lowerd = dentry->d_fsdata;
8ef17b62 346 int err;
2b77b5c4 347 const struct cred *oldcred;
8ef17b62 348
2b77b5c4
CB
349 oldcred = shiftfs_override_creds(dentry->d_sb);
350 err = vfs_listxattr(lowerd, list, size);
351 revert_creds(oldcred);
8ef17b62
JB
352
353 return err;
354}
355
356static int shiftfs_removexattr(struct dentry *dentry, const char *name)
357{
2b77b5c4 358 struct dentry *lowerd = dentry->d_fsdata;
8ef17b62 359 int err;
2b77b5c4
CB
360 const struct cred *oldcred;
361
362 oldcred = shiftfs_override_creds(dentry->d_sb);
363 err = vfs_removexattr(lowerd, name);
364 revert_creds(oldcred);
8ef17b62 365
2b77b5c4
CB
366 /* update c/mtime */
367 shiftfs_copyattr(lowerd->d_inode, d_inode(dentry));
8ef17b62
JB
368
369 return err;
370}
371
372static int shiftfs_xattr_set(const struct xattr_handler *handler,
373 struct dentry *dentry, struct inode *inode,
374 const char *name, const void *value, size_t size,
375 int flags)
376{
377 if (!value)
378 return shiftfs_removexattr(dentry, name);
379 return shiftfs_setxattr(dentry, inode, name, value, size, flags);
380}
381
2b77b5c4 382static int shiftfs_inode_test(struct inode *inode, void *data)
8ef17b62 383{
2b77b5c4
CB
384 return inode->i_private == data;
385}
8ef17b62 386
2b77b5c4
CB
387static int shiftfs_inode_set(struct inode *inode, void *data)
388{
389 inode->i_private = data;
390 return 0;
8ef17b62
JB
391}
392
2b77b5c4
CB
393static int shiftfs_create_object(struct inode *diri, struct dentry *dentry,
394 umode_t mode, const char *symlink,
395 struct dentry *hardlink, bool excl)
8ef17b62 396{
8ef17b62 397 int err;
2b77b5c4
CB
398 const struct cred *oldcred;
399 struct cred *newcred;
400 void *loweri_iop_ptr = NULL;
401 umode_t modei = mode;
402 struct super_block *dir_sb = diri->i_sb;
403 struct dentry *lowerd_new = dentry->d_fsdata;
404 struct inode *inode = NULL, *loweri_dir = diri->i_private;
405 const struct inode_operations *loweri_dir_iop = loweri_dir->i_op;
406 struct dentry *lowerd_link = NULL;
8ef17b62
JB
407
408 if (hardlink) {
2b77b5c4 409 loweri_iop_ptr = loweri_dir_iop->link;
8ef17b62
JB
410 } else {
411 switch (mode & S_IFMT) {
412 case S_IFDIR:
2b77b5c4 413 loweri_iop_ptr = loweri_dir_iop->mkdir;
8ef17b62
JB
414 break;
415 case S_IFREG:
2b77b5c4 416 loweri_iop_ptr = loweri_dir_iop->create;
8ef17b62
JB
417 break;
418 case S_IFLNK:
2b77b5c4
CB
419 loweri_iop_ptr = loweri_dir_iop->symlink;
420 break;
421 case S_IFSOCK:
422 /* fall through */
423 case S_IFIFO:
424 loweri_iop_ptr = loweri_dir_iop->mknod;
425 break;
8ef17b62
JB
426 }
427 }
2b77b5c4
CB
428 if (!loweri_iop_ptr) {
429 err = -EINVAL;
430 goto out_iput;
431 }
8ef17b62 432
2b77b5c4 433 inode_lock_nested(loweri_dir, I_MUTEX_PARENT);
8ef17b62 434
2b77b5c4
CB
435 if (!hardlink) {
436 inode = new_inode(dir_sb);
437 if (!inode) {
438 err = -ENOMEM;
439 goto out_iput;
440 }
441
442 /*
443 * new_inode() will have added the new inode to the super
444 * block's list of inodes. Further below we will call
445 * inode_insert5() Which would perform the same operation again
446 * thereby corrupting the list. To avoid this raise I_CREATING
447 * in i_state which will cause inode_insert5() to skip this
448 * step. I_CREATING will be cleared by d_instantiate_new()
449 * below.
450 */
451 spin_lock(&inode->i_lock);
452 inode->i_state |= I_CREATING;
453 spin_unlock(&inode->i_lock);
8ef17b62 454
2b77b5c4
CB
455 inode_init_owner(inode, diri, mode);
456 modei = inode->i_mode;
457 }
8ef17b62 458
2b77b5c4
CB
459 err = shiftfs_override_object_creds(dentry->d_sb, &oldcred, &newcred,
460 dentry, modei, hardlink != NULL);
461 if (err)
462 goto out_iput;
8ef17b62 463
8ef17b62 464 if (hardlink) {
2b77b5c4
CB
465 lowerd_link = hardlink->d_fsdata;
466 err = vfs_link(lowerd_link, loweri_dir, lowerd_new, NULL);
8ef17b62 467 } else {
2b77b5c4 468 switch (modei & S_IFMT) {
8ef17b62 469 case S_IFDIR:
2b77b5c4 470 err = vfs_mkdir(loweri_dir, lowerd_new, modei);
8ef17b62
JB
471 break;
472 case S_IFREG:
2b77b5c4 473 err = vfs_create(loweri_dir, lowerd_new, modei, excl);
8ef17b62
JB
474 break;
475 case S_IFLNK:
2b77b5c4
CB
476 err = vfs_symlink(loweri_dir, lowerd_new, symlink);
477 break;
478 case S_IFSOCK:
479 /* fall through */
480 case S_IFIFO:
481 err = vfs_mknod(loweri_dir, lowerd_new, modei, 0);
482 break;
483 default:
484 err = -EINVAL;
485 break;
8ef17b62
JB
486 }
487 }
488
2b77b5c4 489 shiftfs_revert_object_creds(oldcred, newcred);
8ef17b62 490
2b77b5c4
CB
491 if (!err && WARN_ON(!lowerd_new->d_inode))
492 err = -EIO;
8ef17b62 493 if (err)
2b77b5c4
CB
494 goto out_iput;
495
496 if (hardlink) {
497 inode = d_inode(hardlink);
498 ihold(inode);
499
500 /* copy up times from lower inode */
501 shiftfs_copyattr(d_inode(lowerd_link), inode);
502 set_nlink(d_inode(hardlink), d_inode(lowerd_link)->i_nlink);
503 d_instantiate(dentry, inode);
504 } else {
505 struct inode *inode_tmp;
506 struct inode *loweri_new = d_inode(lowerd_new);
507
508 inode_tmp = inode_insert5(inode, (unsigned long)loweri_new,
509 shiftfs_inode_test, shiftfs_inode_set,
510 loweri_new);
511 if (unlikely(inode_tmp != inode)) {
512 pr_err_ratelimited("shiftfs: newly created inode found in cache\n");
513 iput(inode_tmp);
514 err = -EINVAL;
515 goto out_iput;
516 }
8ef17b62 517
2b77b5c4
CB
518 ihold(loweri_new);
519 shiftfs_fill_inode(inode, loweri_new->i_ino, loweri_new->i_mode,
520 0, lowerd_new);
521 d_instantiate_new(dentry, inode);
522 }
8ef17b62 523
2b77b5c4
CB
524 shiftfs_copyattr(loweri_dir, diri);
525 if (loweri_iop_ptr == loweri_dir_iop->mkdir)
526 set_nlink(diri, loweri_dir->i_nlink);
8ef17b62 527
2b77b5c4 528 inode = NULL;
8ef17b62 529
2b77b5c4
CB
530out_iput:
531 iput(inode);
532 inode_unlock(loweri_dir);
8ef17b62
JB
533
534 return err;
535}
536
537static int shiftfs_create(struct inode *dir, struct dentry *dentry,
538 umode_t mode, bool excl)
539{
540 mode |= S_IFREG;
541
2b77b5c4 542 return shiftfs_create_object(dir, dentry, mode, NULL, NULL, excl);
8ef17b62
JB
543}
544
545static int shiftfs_mkdir(struct inode *dir, struct dentry *dentry,
546 umode_t mode)
547{
548 mode |= S_IFDIR;
549
2b77b5c4 550 return shiftfs_create_object(dir, dentry, mode, NULL, NULL, false);
8ef17b62
JB
551}
552
553static int shiftfs_link(struct dentry *hardlink, struct inode *dir,
554 struct dentry *dentry)
555{
2b77b5c4
CB
556 return shiftfs_create_object(dir, dentry, 0, NULL, hardlink, false);
557}
558
559static int shiftfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
560 dev_t rdev)
561{
562 if (!S_ISFIFO(mode) && !S_ISSOCK(mode))
563 return -EPERM;
564
565 return shiftfs_create_object(dir, dentry, mode, NULL, NULL, false);
8ef17b62
JB
566}
567
568static int shiftfs_symlink(struct inode *dir, struct dentry *dentry,
569 const char *symlink)
570{
2b77b5c4 571 return shiftfs_create_object(dir, dentry, S_IFLNK, symlink, NULL, false);
8ef17b62
JB
572}
573
574static int shiftfs_rm(struct inode *dir, struct dentry *dentry, bool rmdir)
575{
2b77b5c4
CB
576 struct dentry *lowerd = dentry->d_fsdata;
577 struct inode *loweri = dir->i_private;
3773abfa 578 struct inode *inode = d_inode(dentry);
8ef17b62 579 int err;
2b77b5c4 580 const struct cred *oldcred;
8ef17b62 581
72c36499 582 dget(lowerd);
2b77b5c4
CB
583 oldcred = shiftfs_override_creds(dentry->d_sb);
584 inode_lock_nested(loweri, I_MUTEX_PARENT);
8ef17b62 585 if (rmdir)
2b77b5c4 586 err = vfs_rmdir(loweri, lowerd);
8ef17b62 587 else
2b77b5c4 588 err = vfs_unlink(loweri, lowerd, NULL);
2b77b5c4 589 revert_creds(oldcred);
8ef17b62 590
3773abfa 591 if (!err) {
2b77b5c4
CB
592 d_drop(dentry);
593
3773abfa
CB
594 if (rmdir)
595 clear_nlink(inode);
596 else
597 drop_nlink(inode);
598 }
599 inode_unlock(loweri);
600
601 shiftfs_copyattr(loweri, dir);
72c36499 602 dput(lowerd);
8ef17b62
JB
603
604 return err;
605}
606
607static int shiftfs_unlink(struct inode *dir, struct dentry *dentry)
608{
609 return shiftfs_rm(dir, dentry, false);
610}
611
612static int shiftfs_rmdir(struct inode *dir, struct dentry *dentry)
613{
614 return shiftfs_rm(dir, dentry, true);
615}
616
617static int shiftfs_rename(struct inode *olddir, struct dentry *old,
618 struct inode *newdir, struct dentry *new,
619 unsigned int flags)
620{
2b77b5c4
CB
621 struct dentry *lowerd_dir_old = old->d_parent->d_fsdata,
622 *lowerd_dir_new = new->d_parent->d_fsdata,
623 *lowerd_old = old->d_fsdata, *lowerd_new = new->d_fsdata,
624 *trapd;
625 struct inode *loweri_dir_old = lowerd_dir_old->d_inode,
626 *loweri_dir_new = lowerd_dir_new->d_inode;
8ef17b62 627 int err = -EINVAL;
2b77b5c4 628 const struct cred *oldcred;
8ef17b62 629
2b77b5c4 630 trapd = lock_rename(lowerd_dir_new, lowerd_dir_old);
8ef17b62 631
2b77b5c4 632 if (trapd == lowerd_old || trapd == lowerd_new)
8ef17b62
JB
633 goto out_unlock;
634
2b77b5c4
CB
635 oldcred = shiftfs_override_creds(old->d_sb);
636 err = vfs_rename(loweri_dir_old, lowerd_old, loweri_dir_new, lowerd_new,
637 NULL, flags);
638 revert_creds(oldcred);
8ef17b62 639
2b77b5c4
CB
640 shiftfs_copyattr(loweri_dir_old, olddir);
641 shiftfs_copyattr(loweri_dir_new, newdir);
8ef17b62 642
2b77b5c4
CB
643out_unlock:
644 unlock_rename(lowerd_dir_new, lowerd_dir_old);
8ef17b62
JB
645
646 return err;
647}
648
649static struct dentry *shiftfs_lookup(struct inode *dir, struct dentry *dentry,
650 unsigned int flags)
651{
2b77b5c4
CB
652 struct dentry *new;
653 struct inode *newi;
654 const struct cred *oldcred;
655 struct dentry *lowerd = dentry->d_parent->d_fsdata;
656 struct inode *inode = NULL, *loweri = lowerd->d_inode;
657
658 inode_lock(loweri);
659 oldcred = shiftfs_override_creds(dentry->d_sb);
660 new = lookup_one_len(dentry->d_name.name, lowerd, dentry->d_name.len);
661 revert_creds(oldcred);
662 inode_unlock(loweri);
8ef17b62
JB
663
664 if (IS_ERR(new))
665 return new;
666
667 dentry->d_fsdata = new;
668
2b77b5c4
CB
669 newi = new->d_inode;
670 if (!newi)
8ef17b62
JB
671 goto out;
672
2b77b5c4
CB
673 inode = iget5_locked(dentry->d_sb, (unsigned long)newi,
674 shiftfs_inode_test, shiftfs_inode_set, newi);
675 if (!inode) {
8ef17b62
JB
676 dput(new);
677 return ERR_PTR(-ENOMEM);
678 }
2b77b5c4
CB
679 if (inode->i_state & I_NEW) {
680 /*
681 * inode->i_private set by shiftfs_inode_set(), but we still
682 * need to take a reference
683 */
684 ihold(newi);
685 shiftfs_fill_inode(inode, newi->i_ino, newi->i_mode, 0, new);
686 unlock_new_inode(inode);
687 }
8ef17b62 688
2b77b5c4
CB
689out:
690 return d_splice_alias(inode, dentry);
8ef17b62
JB
691}
692
693static int shiftfs_permission(struct inode *inode, int mask)
694{
8ef17b62 695 int err;
2b77b5c4
CB
696 const struct cred *oldcred;
697 struct inode *loweri = inode->i_private;
8ef17b62 698
2b77b5c4
CB
699 if (!loweri) {
700 WARN_ON(!(mask & MAY_NOT_BLOCK));
8ef17b62 701 return -ECHILD;
2b77b5c4 702 }
8ef17b62 703
2b77b5c4
CB
704 err = generic_permission(inode, mask);
705 if (err)
706 return err;
707
708 oldcred = shiftfs_override_creds(inode->i_sb);
709 err = inode_permission(loweri, mask);
710 revert_creds(oldcred);
711
712 return err;
713}
714
715static int shiftfs_fiemap(struct inode *inode,
716 struct fiemap_extent_info *fieinfo, u64 start,
717 u64 len)
718{
719 int err;
720 const struct cred *oldcred;
721 struct inode *loweri = inode->i_private;
722
723 if (!loweri->i_op->fiemap)
724 return -EOPNOTSUPP;
725
726 oldcred = shiftfs_override_creds(inode->i_sb);
727 if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC)
728 filemap_write_and_wait(loweri->i_mapping);
729 err = loweri->i_op->fiemap(loweri, fieinfo, start, len);
730 revert_creds(oldcred);
731
732 return err;
733}
734
5b86d13e
CB
735static int shiftfs_tmpfile(struct inode *dir, struct dentry *dentry,
736 umode_t mode)
737{
738 int err;
739 const struct cred *oldcred;
740 struct dentry *lowerd = dentry->d_fsdata;
741 struct inode *loweri = dir->i_private;
742
743 if (!loweri->i_op->tmpfile)
744 return -EOPNOTSUPP;
745
746 oldcred = shiftfs_override_creds(dir->i_sb);
747 err = loweri->i_op->tmpfile(loweri, lowerd, mode);
748 revert_creds(oldcred);
749
750 return err;
751}
752
8ef17b62
JB
753static int shiftfs_setattr(struct dentry *dentry, struct iattr *attr)
754{
2b77b5c4
CB
755 struct dentry *lowerd = dentry->d_fsdata;
756 struct inode *loweri = lowerd->d_inode;
fe8eb7df 757 struct iattr newattr;
2b77b5c4 758 const struct cred *oldcred;
8ef17b62 759 struct super_block *sb = dentry->d_sb;
b674a8b8 760 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
8ef17b62
JB
761 int err;
762
2b77b5c4
CB
763 err = setattr_prepare(dentry, attr);
764 if (err)
765 return err;
766
fe8eb7df 767 newattr = *attr;
b674a8b8
SF
768 newattr.ia_uid = shift_kuid(sb->s_user_ns, sbinfo->userns, attr->ia_uid);
769 newattr.ia_gid = shift_kgid(sb->s_user_ns, sbinfo->userns, attr->ia_gid);
8ef17b62 770
fe8eb7df
SF
771 /*
772 * mode change is for clearing setuid/setgid bits. Allow lower fs
773 * to interpret this in its own way.
774 */
775 if (newattr.ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
776 newattr.ia_valid &= ~ATTR_MODE;
777
2b77b5c4
CB
778 inode_lock(loweri);
779 oldcred = shiftfs_override_creds(dentry->d_sb);
70e662da 780 err = notify_change(lowerd, &newattr, NULL);
2b77b5c4
CB
781 revert_creds(oldcred);
782 inode_unlock(loweri);
8ef17b62 783
2b77b5c4 784 shiftfs_copyattr(loweri, d_inode(dentry));
8ef17b62 785
2b77b5c4 786 return err;
8ef17b62
JB
787}
788
789static int shiftfs_getattr(const struct path *path, struct kstat *stat,
790 u32 request_mask, unsigned int query_flags)
791{
792 struct inode *inode = path->dentry->d_inode;
2b77b5c4
CB
793 struct dentry *lowerd = path->dentry->d_fsdata;
794 struct inode *loweri = lowerd->d_inode;
795 struct shiftfs_super_info *info = path->dentry->d_sb->s_fs_info;
796 struct path newpath = { .mnt = info->mnt, .dentry = lowerd };
797 struct user_namespace *from_ns = loweri->i_sb->s_user_ns;
798 struct user_namespace *to_ns = inode->i_sb->s_user_ns;
799 const struct cred *oldcred;
800 int err;
801
802 oldcred = shiftfs_override_creds(inode->i_sb);
803 err = vfs_getattr(&newpath, stat, request_mask, query_flags);
804 revert_creds(oldcred);
8ef17b62
JB
805
806 if (err)
807 return err;
808
809 /* transform the underlying id */
2b77b5c4
CB
810 stat->uid = shift_kuid(from_ns, to_ns, stat->uid);
811 stat->gid = shift_kgid(from_ns, to_ns, stat->gid);
8ef17b62
JB
812 return 0;
813}
814
2b77b5c4 815#ifdef CONFIG_SHIFT_FS_POSIX_ACL
8ef17b62 816
2b77b5c4
CB
817static int
818shift_acl_ids(struct user_namespace *from, struct user_namespace *to,
819 struct posix_acl *acl)
8ef17b62 820{
2b77b5c4
CB
821 int i;
822
823 for (i = 0; i < acl->a_count; i++) {
824 struct posix_acl_entry *e = &acl->a_entries[i];
825 switch(e->e_tag) {
826 case ACL_USER:
827 e->e_uid = shift_kuid(from, to, e->e_uid);
828 if (!uid_valid(e->e_uid))
829 return -EOVERFLOW;
830 break;
831 case ACL_GROUP:
832 e->e_gid = shift_kgid(from, to, e->e_gid);
833 if (!gid_valid(e->e_gid))
834 return -EOVERFLOW;
835 break;
836 }
837 }
838 return 0;
839}
8ef17b62 840
2b77b5c4
CB
841static void
842shift_acl_xattr_ids(struct user_namespace *from, struct user_namespace *to,
843 void *value, size_t size)
844{
845 struct posix_acl_xattr_header *header = value;
846 struct posix_acl_xattr_entry *entry = (void *)(header + 1), *end;
847 int count;
848 kuid_t kuid;
849 kgid_t kgid;
8ef17b62 850
2b77b5c4
CB
851 if (!value)
852 return;
853 if (size < sizeof(struct posix_acl_xattr_header))
854 return;
855 if (header->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION))
856 return;
8ef17b62 857
2b77b5c4
CB
858 count = posix_acl_xattr_count(size);
859 if (count < 0)
860 return;
861 if (count == 0)
862 return;
8ef17b62 863
2b77b5c4
CB
864 for (end = entry + count; entry != end; entry++) {
865 switch(le16_to_cpu(entry->e_tag)) {
866 case ACL_USER:
867 kuid = make_kuid(&init_user_ns, le32_to_cpu(entry->e_id));
868 kuid = shift_kuid(from, to, kuid);
869 entry->e_id = cpu_to_le32(from_kuid(&init_user_ns, kuid));
870 break;
871 case ACL_GROUP:
872 kgid = make_kgid(&init_user_ns, le32_to_cpu(entry->e_id));
873 kgid = shift_kgid(from, to, kgid);
874 entry->e_id = cpu_to_le32(from_kgid(&init_user_ns, kgid));
875 break;
876 default:
877 break;
878 }
879 }
8ef17b62
JB
880}
881
2b77b5c4 882static struct posix_acl *shiftfs_get_acl(struct inode *inode, int type)
8ef17b62 883{
2b77b5c4
CB
884 struct inode *loweri = inode->i_private;
885 const struct cred *oldcred;
886 struct posix_acl *lower_acl, *acl = NULL;
887 struct user_namespace *from_ns = loweri->i_sb->s_user_ns;
888 struct user_namespace *to_ns = inode->i_sb->s_user_ns;
889 int size;
890 int err;
8ef17b62 891
2b77b5c4
CB
892 if (!IS_POSIXACL(loweri))
893 return NULL;
8ef17b62 894
2b77b5c4
CB
895 oldcred = shiftfs_override_creds(inode->i_sb);
896 lower_acl = get_acl(loweri, type);
897 revert_creds(oldcred);
8ef17b62 898
2b77b5c4
CB
899 if (lower_acl && !IS_ERR(lower_acl)) {
900 /* XXX: export posix_acl_clone? */
901 size = sizeof(struct posix_acl) +
902 lower_acl->a_count * sizeof(struct posix_acl_entry);
903 acl = kmemdup(lower_acl, size, GFP_KERNEL);
904 posix_acl_release(lower_acl);
8ef17b62 905
2b77b5c4
CB
906 if (!acl)
907 return ERR_PTR(-ENOMEM);
8ef17b62 908
2b77b5c4 909 refcount_set(&acl->a_refcount, 1);
8ef17b62 910
2b77b5c4
CB
911 err = shift_acl_ids(from_ns, to_ns, acl);
912 if (err) {
913 kfree(acl);
914 return ERR_PTR(err);
915 }
916 }
917
918 return acl;
8ef17b62
JB
919}
920
2b77b5c4
CB
921static int
922shiftfs_posix_acl_xattr_get(const struct xattr_handler *handler,
923 struct dentry *dentry, struct inode *inode,
924 const char *name, void *buffer, size_t size)
8ef17b62 925{
2b77b5c4
CB
926 struct inode *loweri = inode->i_private;
927 int ret;
928
929 ret = shiftfs_xattr_get(NULL, dentry, inode, handler->name,
930 buffer, size);
931 if (ret < 0)
932 return ret;
8ef17b62 933
2b77b5c4
CB
934 inode_lock(loweri);
935 shift_acl_xattr_ids(loweri->i_sb->s_user_ns, inode->i_sb->s_user_ns,
936 buffer, size);
937 inode_unlock(loweri);
938 return ret;
8ef17b62
JB
939}
940
2b77b5c4
CB
941static int
942shiftfs_posix_acl_xattr_set(const struct xattr_handler *handler,
943 struct dentry *dentry, struct inode *inode,
944 const char *name, const void *value,
945 size_t size, int flags)
946{
947 struct inode *loweri = inode->i_private;
948 int err;
8ef17b62 949
2b77b5c4
CB
950 if (!IS_POSIXACL(loweri) || !loweri->i_op->set_acl)
951 return -EOPNOTSUPP;
952 if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
953 return value ? -EACCES : 0;
954 if (!inode_owner_or_capable(inode))
955 return -EPERM;
956
957 if (value) {
958 shift_acl_xattr_ids(inode->i_sb->s_user_ns,
959 loweri->i_sb->s_user_ns,
960 (void *)value, size);
961 err = shiftfs_setxattr(dentry, inode, handler->name, value,
962 size, flags);
963 } else {
964 err = shiftfs_removexattr(dentry, handler->name);
965 }
8ef17b62 966
2b77b5c4
CB
967 if (!err)
968 shiftfs_copyattr(loweri, inode);
969
970 return err;
971}
972
973static const struct xattr_handler
974shiftfs_posix_acl_access_xattr_handler = {
975 .name = XATTR_NAME_POSIX_ACL_ACCESS,
976 .flags = ACL_TYPE_ACCESS,
977 .get = shiftfs_posix_acl_xattr_get,
978 .set = shiftfs_posix_acl_xattr_set,
8ef17b62
JB
979};
980
2b77b5c4
CB
981static const struct xattr_handler
982shiftfs_posix_acl_default_xattr_handler = {
983 .name = XATTR_NAME_POSIX_ACL_DEFAULT,
984 .flags = ACL_TYPE_DEFAULT,
985 .get = shiftfs_posix_acl_xattr_get,
986 .set = shiftfs_posix_acl_xattr_set,
8ef17b62
JB
987};
988
2b77b5c4 989#else /* !CONFIG_SHIFT_FS_POSIX_ACL */
8ef17b62 990
2b77b5c4 991#define shiftfs_get_acl NULL
8ef17b62 992
2b77b5c4 993#endif /* CONFIG_SHIFT_FS_POSIX_ACL */
8ef17b62 994
2b77b5c4
CB
995static const struct inode_operations shiftfs_dir_inode_operations = {
996 .lookup = shiftfs_lookup,
997 .mkdir = shiftfs_mkdir,
998 .symlink = shiftfs_symlink,
999 .unlink = shiftfs_unlink,
1000 .rmdir = shiftfs_rmdir,
1001 .rename = shiftfs_rename,
1002 .link = shiftfs_link,
1003 .setattr = shiftfs_setattr,
1004 .create = shiftfs_create,
1005 .mknod = shiftfs_mknod,
1006 .permission = shiftfs_permission,
1007 .getattr = shiftfs_getattr,
1008 .listxattr = shiftfs_listxattr,
1009 .get_acl = shiftfs_get_acl,
1010};
1011
1012static const struct inode_operations shiftfs_file_inode_operations = {
1013 .fiemap = shiftfs_fiemap,
1014 .getattr = shiftfs_getattr,
1015 .get_acl = shiftfs_get_acl,
1016 .listxattr = shiftfs_listxattr,
1017 .permission = shiftfs_permission,
1018 .setattr = shiftfs_setattr,
5b86d13e 1019 .tmpfile = shiftfs_tmpfile,
2b77b5c4
CB
1020};
1021
1022static const struct inode_operations shiftfs_special_inode_operations = {
1023 .getattr = shiftfs_getattr,
1024 .get_acl = shiftfs_get_acl,
1025 .listxattr = shiftfs_listxattr,
1026 .permission = shiftfs_permission,
1027 .setattr = shiftfs_setattr,
1028};
1029
1030static const struct inode_operations shiftfs_symlink_inode_operations = {
1031 .getattr = shiftfs_getattr,
1032 .get_link = shiftfs_get_link,
1033 .listxattr = shiftfs_listxattr,
1034 .setattr = shiftfs_setattr,
1035};
1036
1037static struct file *shiftfs_open_realfile(const struct file *file,
d73a8b3d 1038 struct inode *realinode)
2b77b5c4 1039{
d73a8b3d
CB
1040 struct file *realfile;
1041 const struct cred *old_cred;
2b77b5c4 1042 struct inode *inode = file_inode(file);
d73a8b3d 1043 struct dentry *lowerd = file->f_path.dentry->d_fsdata;
2b77b5c4 1044 struct shiftfs_super_info *info = inode->i_sb->s_fs_info;
d73a8b3d 1045 struct path realpath = { .mnt = info->mnt, .dentry = lowerd };
2b77b5c4 1046
d73a8b3d
CB
1047 old_cred = shiftfs_override_creds(inode->i_sb);
1048 realfile = open_with_fake_path(&realpath, file->f_flags, realinode,
1049 info->creator_cred);
1050 revert_creds(old_cred);
2b77b5c4 1051
d73a8b3d 1052 return realfile;
2b77b5c4
CB
1053}
1054
1055#define SHIFTFS_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
1056
1057static int shiftfs_change_flags(struct file *file, unsigned int flags)
1058{
1059 struct inode *inode = file_inode(file);
1060 int err;
1061
1062 /* if some flag changed that cannot be changed then something's amiss */
1063 if (WARN_ON((file->f_flags ^ flags) & ~SHIFTFS_SETFL_MASK))
1064 return -EIO;
1065
1066 flags &= SHIFTFS_SETFL_MASK;
1067
1068 if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
1069 return -EPERM;
1070
1071 if (flags & O_DIRECT) {
1072 if (!file->f_mapping->a_ops ||
1073 !file->f_mapping->a_ops->direct_IO)
1074 return -EINVAL;
1075 }
1076
1077 if (file->f_op->check_flags) {
1078 err = file->f_op->check_flags(flags);
1079 if (err)
1080 return err;
1081 }
1082
1083 spin_lock(&file->f_lock);
1084 file->f_flags = (file->f_flags & ~SHIFTFS_SETFL_MASK) | flags;
1085 spin_unlock(&file->f_lock);
1086
1087 return 0;
1088}
1089
2b77b5c4
CB
1090static int shiftfs_open(struct inode *inode, struct file *file)
1091{
2b77b5c4 1092 struct file *realfile;
2b77b5c4 1093
d73a8b3d
CB
1094 realfile = shiftfs_open_realfile(file, inode->i_private);
1095 if (IS_ERR(realfile))
2b77b5c4 1096 return PTR_ERR(realfile);
2b77b5c4 1097
d73a8b3d 1098 file->private_data = realfile;
1c4d2a96
CB
1099 /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO. */
1100 file->f_mapping = realfile->f_mapping;
1101
2b77b5c4
CB
1102 return 0;
1103}
1104
d73a8b3d 1105static int shiftfs_dir_open(struct inode *inode, struct file *file)
2b77b5c4 1106{
d73a8b3d
CB
1107 struct file *realfile;
1108 const struct cred *oldcred;
1109 struct dentry *lowerd = file->f_path.dentry->d_fsdata;
1110 struct shiftfs_super_info *info = inode->i_sb->s_fs_info;
1111 struct path realpath = { .mnt = info->mnt, .dentry = lowerd };
1112
1113 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1114 realfile = dentry_open(&realpath, file->f_flags | O_NOATIME,
1115 info->creator_cred);
1116 revert_creds(oldcred);
1117 if (IS_ERR(realfile))
1118 return PTR_ERR(realfile);
2b77b5c4 1119
d73a8b3d 1120 file->private_data = realfile;
2b77b5c4 1121
d73a8b3d
CB
1122 return 0;
1123}
1124
1125static int shiftfs_release(struct inode *inode, struct file *file)
1126{
1127 struct file *realfile = file->private_data;
1128
1129 if (realfile)
1130 fput(realfile);
2b77b5c4
CB
1131
1132 return 0;
1133}
1134
d73a8b3d
CB
1135static int shiftfs_dir_release(struct inode *inode, struct file *file)
1136{
1137 return shiftfs_release(inode, file);
1138}
1139
c106770a
CB
1140static loff_t shiftfs_dir_llseek(struct file *file, loff_t offset, int whence)
1141{
d73a8b3d 1142 struct file *realfile = file->private_data;
c106770a
CB
1143
1144 return vfs_llseek(realfile, offset, whence);
1145}
1146
1147static loff_t shiftfs_file_llseek(struct file *file, loff_t offset, int whence)
2b77b5c4
CB
1148{
1149 struct inode *realinode = file_inode(file)->i_private;
1150
1151 return generic_file_llseek_size(file, offset, whence,
1152 realinode->i_sb->s_maxbytes,
1153 i_size_read(realinode));
1154}
1155
1156/* XXX: Need to figure out what to to about atime updates, maybe other
1157 * timestamps too ... ref. ovl_file_accessed() */
1158
1159static rwf_t shiftfs_iocb_to_rwf(struct kiocb *iocb)
1160{
1161 int ifl = iocb->ki_flags;
1162 rwf_t flags = 0;
1163
1164 if (ifl & IOCB_NOWAIT)
1165 flags |= RWF_NOWAIT;
1166 if (ifl & IOCB_HIPRI)
1167 flags |= RWF_HIPRI;
1168 if (ifl & IOCB_DSYNC)
1169 flags |= RWF_DSYNC;
1170 if (ifl & IOCB_SYNC)
1171 flags |= RWF_SYNC;
1172
1173 return flags;
1174}
1175
65081024
CB
1176static int shiftfs_real_fdget(const struct file *file, struct fd *lowerfd)
1177{
1178 struct file *realfile;
1179
1180 if (file->f_op->open != shiftfs_open &&
1181 file->f_op->open != shiftfs_dir_open)
1182 return -EINVAL;
1183
1184 realfile = file->private_data;
1185 lowerfd->flags = 0;
1186 lowerfd->file = realfile;
1187
1188 /* Did the flags change since open? */
1189 if (unlikely(file->f_flags & ~lowerfd->file->f_flags))
1190 return shiftfs_change_flags(lowerfd->file, file->f_flags);
1191
1192 return 0;
1193}
1194
2b77b5c4
CB
1195static ssize_t shiftfs_read_iter(struct kiocb *iocb, struct iov_iter *iter)
1196{
1197 struct file *file = iocb->ki_filp;
1198 struct fd lowerfd;
1199 const struct cred *oldcred;
1200 ssize_t ret;
1201
1202 if (!iov_iter_count(iter))
1203 return 0;
1204
1205 ret = shiftfs_real_fdget(file, &lowerfd);
1206 if (ret)
1207 return ret;
1208
1209 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1210 ret = vfs_iter_read(lowerfd.file, iter, &iocb->ki_pos,
1211 shiftfs_iocb_to_rwf(iocb));
1212 revert_creds(oldcred);
1213
1214 shiftfs_file_accessed(file);
1215
1216 fdput(lowerfd);
1217 return ret;
1218}
1219
1220static ssize_t shiftfs_write_iter(struct kiocb *iocb, struct iov_iter *iter)
1221{
1222 struct file *file = iocb->ki_filp;
1223 struct inode *inode = file_inode(file);
1224 struct fd lowerfd;
1225 const struct cred *oldcred;
1226 ssize_t ret;
1227
1228 if (!iov_iter_count(iter))
1229 return 0;
1230
1231 inode_lock(inode);
1232 /* Update mode */
1233 shiftfs_copyattr(inode->i_private, inode);
1234 ret = file_remove_privs(file);
1235 if (ret)
1236 goto out_unlock;
1237
1238 ret = shiftfs_real_fdget(file, &lowerfd);
1239 if (ret)
1240 goto out_unlock;
1241
1242 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1243 file_start_write(lowerfd.file);
1244 ret = vfs_iter_write(lowerfd.file, iter, &iocb->ki_pos,
1245 shiftfs_iocb_to_rwf(iocb));
1246 file_end_write(lowerfd.file);
1247 revert_creds(oldcred);
1248
1249 /* Update size */
1250 shiftfs_copyattr(inode->i_private, inode);
1251
1252 fdput(lowerfd);
1253
1254out_unlock:
1255 inode_unlock(inode);
1256 return ret;
1257}
1258
1259static int shiftfs_fsync(struct file *file, loff_t start, loff_t end,
1260 int datasync)
1261{
1262 struct fd lowerfd;
1263 const struct cred *oldcred;
1264 int ret;
1265
1266 ret = shiftfs_real_fdget(file, &lowerfd);
1267 if (ret)
1268 return ret;
1269
1270 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1271 ret = vfs_fsync_range(lowerfd.file, start, end, datasync);
1272 revert_creds(oldcred);
1273
1274 fdput(lowerfd);
1275 return ret;
1276}
1277
1278static int shiftfs_mmap(struct file *file, struct vm_area_struct *vma)
1279{
d73a8b3d 1280 struct file *realfile = file->private_data;
2b77b5c4
CB
1281 const struct cred *oldcred;
1282 int ret;
1283
1284 if (!realfile->f_op->mmap)
1285 return -ENODEV;
1286
1287 if (WARN_ON(file != vma->vm_file))
1288 return -EIO;
1289
1290 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1291 vma->vm_file = get_file(realfile);
1292 ret = call_mmap(vma->vm_file, vma);
1293 revert_creds(oldcred);
1294
1295 shiftfs_file_accessed(file);
1296
616d0ac0
SF
1297 if (ret) {
1298 /*
1299 * Drop refcount from new vm_file value and restore original
1300 * vm_file value
1301 */
1302 vma->vm_file = file;
1303 fput(realfile);
1304 } else {
1305 /* Drop refcount from previous vm_file value */
1306 fput(file);
1307 }
2b77b5c4
CB
1308
1309 return ret;
1310}
1311
1312static long shiftfs_fallocate(struct file *file, int mode, loff_t offset,
1313 loff_t len)
1314{
1315 struct inode *inode = file_inode(file);
1316 struct inode *loweri = inode->i_private;
1317 struct fd lowerfd;
1318 const struct cred *oldcred;
1319 int ret;
1320
1321 ret = shiftfs_real_fdget(file, &lowerfd);
1322 if (ret)
1323 return ret;
1324
1325 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1326 ret = vfs_fallocate(lowerfd.file, mode, offset, len);
1327 revert_creds(oldcred);
1328
1329 /* Update size */
1330 shiftfs_copyattr(loweri, inode);
1331
1332 fdput(lowerfd);
1333 return ret;
1334}
1335
1336static int shiftfs_fadvise(struct file *file, loff_t offset, loff_t len,
1337 int advice)
1338{
1339 struct fd lowerfd;
1340 const struct cred *oldcred;
1341 int ret;
1342
1343 ret = shiftfs_real_fdget(file, &lowerfd);
1344 if (ret)
1345 return ret;
1346
1347 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1348 ret = vfs_fadvise(lowerfd.file, offset, len, advice);
1349 revert_creds(oldcred);
1350
1351 fdput(lowerfd);
1352 return ret;
1353}
1354
ff75469d 1355static int shiftfs_override_ioctl_creds(int cmd, const struct super_block *sb,
2b77b5c4
CB
1356 const struct cred **oldcred,
1357 struct cred **newcred)
1358{
b674a8b8 1359 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
2b77b5c4
CB
1360 kuid_t fsuid = current_fsuid();
1361 kgid_t fsgid = current_fsgid();
1362
1363 *oldcred = shiftfs_override_creds(sb);
1364
1365 *newcred = prepare_creds();
1366 if (!*newcred) {
1367 revert_creds(*oldcred);
1368 return -ENOMEM;
1369 }
1370
b674a8b8
SF
1371 (*newcred)->fsuid = shift_kuid(sb->s_user_ns, sbinfo->userns, fsuid);
1372 (*newcred)->fsgid = shift_kgid(sb->s_user_ns, sbinfo->userns, fsgid);
2b77b5c4
CB
1373
1374 /* clear all caps to prevent bypassing capable() checks */
1375 cap_clear((*newcred)->cap_bset);
1376 cap_clear((*newcred)->cap_effective);
1377 cap_clear((*newcred)->cap_inheritable);
1378 cap_clear((*newcred)->cap_permitted);
1379
ff75469d
CB
1380 if (cmd == BTRFS_IOC_SNAP_DESTROY) {
1381 kuid_t kuid_root = make_kuid(sb->s_user_ns, 0);
1382 /*
1383 * Allow the root user in the container to remove subvolumes
1384 * from other users.
1385 */
1386 if (uid_valid(kuid_root) && uid_eq(fsuid, kuid_root))
1387 cap_raise((*newcred)->cap_effective, CAP_DAC_OVERRIDE);
1388 }
1389
2b77b5c4
CB
1390 put_cred(override_creds(*newcred));
1391 return 0;
1392}
1393
1394static inline void shiftfs_revert_ioctl_creds(const struct cred *oldcred,
1395 struct cred *newcred)
1396{
1397 return shiftfs_revert_object_creds(oldcred, newcred);
1398}
1399
f7dfaa67
CB
1400static inline bool is_btrfs_snap_ioctl(int cmd)
1401{
1402 if ((cmd == BTRFS_IOC_SNAP_CREATE) || (cmd == BTRFS_IOC_SNAP_CREATE_V2))
1403 return true;
1404
1405 return false;
1406}
1407
25d42169 1408static int shiftfs_btrfs_ioctl_fd_restore(int cmd, int fd, void __user *arg,
f7dfaa67
CB
1409 struct btrfs_ioctl_vol_args *v1,
1410 struct btrfs_ioctl_vol_args_v2 *v2)
1411{
1412 int ret;
1413
1414 if (!is_btrfs_snap_ioctl(cmd))
1415 return 0;
1416
1417 if (cmd == BTRFS_IOC_SNAP_CREATE)
1418 ret = copy_to_user(arg, v1, sizeof(*v1));
1419 else
1420 ret = copy_to_user(arg, v2, sizeof(*v2));
1421
f7dfaa67
CB
1422 __close_fd(current->files, fd);
1423 kfree(v1);
1424 kfree(v2);
1425
1426 return ret;
1427}
1428
1429static int shiftfs_btrfs_ioctl_fd_replace(int cmd, void __user *arg,
1430 struct btrfs_ioctl_vol_args **b1,
1431 struct btrfs_ioctl_vol_args_v2 **b2,
f7dfaa67
CB
1432 int *newfd)
1433{
1434 int oldfd, ret;
1435 struct fd src;
25d42169 1436 struct fd lfd = {};
f7dfaa67
CB
1437 struct btrfs_ioctl_vol_args *v1 = NULL;
1438 struct btrfs_ioctl_vol_args_v2 *v2 = NULL;
1439
1440 if (!is_btrfs_snap_ioctl(cmd))
1441 return 0;
1442
1443 if (cmd == BTRFS_IOC_SNAP_CREATE) {
1444 v1 = memdup_user(arg, sizeof(*v1));
1445 if (IS_ERR(v1))
1446 return PTR_ERR(v1);
1447 oldfd = v1->fd;
1448 *b1 = v1;
1449 } else {
1450 v2 = memdup_user(arg, sizeof(*v2));
1451 if (IS_ERR(v2))
1452 return PTR_ERR(v2);
1453 oldfd = v2->fd;
1454 *b2 = v2;
1455 }
1456
1457 src = fdget(oldfd);
1458 if (!src.file)
1459 return -EINVAL;
1460
25d42169
SF
1461 ret = shiftfs_real_fdget(src.file, &lfd);
1462 if (ret) {
1463 fdput(src);
f7dfaa67 1464 return ret;
25d42169
SF
1465 }
1466
1467 /*
1468 * shiftfs_real_fdget() does not take a reference to lfd.file, so
1469 * take a reference here to offset the one which will be put by
1470 * __close_fd(), and make sure that reference is put on fdput(lfd).
1471 */
1472 get_file(lfd.file);
1473 lfd.flags |= FDPUT_FPUT;
1474 fdput(src);
f7dfaa67 1475
25d42169 1476 *newfd = get_unused_fd_flags(lfd.file->f_flags);
f7dfaa67 1477 if (*newfd < 0) {
25d42169 1478 fdput(lfd);
f7dfaa67
CB
1479 return *newfd;
1480 }
1481
25d42169 1482 fd_install(*newfd, lfd.file);
f7dfaa67
CB
1483
1484 if (cmd == BTRFS_IOC_SNAP_CREATE) {
1485 v1->fd = *newfd;
1486 ret = copy_to_user(arg, v1, sizeof(*v1));
1487 v1->fd = oldfd;
1488 } else {
1489 v2->fd = *newfd;
1490 ret = copy_to_user(arg, v2, sizeof(*v2));
1491 v2->fd = oldfd;
1492 }
1493
1494 if (ret)
25d42169 1495 shiftfs_btrfs_ioctl_fd_restore(cmd, *newfd, arg, v1, v2);
f7dfaa67
CB
1496
1497 return ret;
1498}
1499
2b77b5c4
CB
1500static long shiftfs_real_ioctl(struct file *file, unsigned int cmd,
1501 unsigned long arg)
1502{
2b77b5c4
CB
1503 struct fd lowerfd;
1504 struct cred *newcred;
1505 const struct cred *oldcred;
f7dfaa67
CB
1506 int newfd = -EBADF;
1507 long err = 0, ret = 0;
1508 void __user *argp = (void __user *)arg;
2b77b5c4 1509 struct super_block *sb = file->f_path.dentry->d_sb;
f7dfaa67
CB
1510 struct btrfs_ioctl_vol_args *btrfs_v1 = NULL;
1511 struct btrfs_ioctl_vol_args_v2 *btrfs_v2 = NULL;
1512
1513 ret = shiftfs_btrfs_ioctl_fd_replace(cmd, argp, &btrfs_v1, &btrfs_v2,
25d42169 1514 &newfd);
f7dfaa67
CB
1515 if (ret < 0)
1516 return ret;
2b77b5c4
CB
1517
1518 ret = shiftfs_real_fdget(file, &lowerfd);
1519 if (ret)
f7dfaa67 1520 goto out_restore;
2b77b5c4 1521
ff75469d 1522 ret = shiftfs_override_ioctl_creds(cmd, sb, &oldcred, &newcred);
2b77b5c4
CB
1523 if (ret)
1524 goto out_fdput;
1525
1526 ret = vfs_ioctl(lowerfd.file, cmd, arg);
1527
1528 shiftfs_revert_ioctl_creds(oldcred, newcred);
1529
1530 shiftfs_copyattr(file_inode(lowerfd.file), file_inode(file));
1531 shiftfs_copyflags(file_inode(lowerfd.file), file_inode(file));
1532
1533out_fdput:
1534 fdput(lowerfd);
1535
f7dfaa67 1536out_restore:
25d42169 1537 err = shiftfs_btrfs_ioctl_fd_restore(cmd, newfd, argp,
f7dfaa67
CB
1538 btrfs_v1, btrfs_v2);
1539 if (!ret)
1540 ret = err;
1541
2b77b5c4
CB
1542 return ret;
1543}
1544
43e11811 1545static bool in_ioctl_whitelist(int flag, unsigned long arg)
f7dfaa67 1546{
43e11811
CB
1547 void __user *argp = (void __user *)arg;
1548 u64 flags = 0;
1549
f7dfaa67 1550 switch (flag) {
43e11811
CB
1551 case BTRFS_IOC_FS_INFO:
1552 return true;
f7dfaa67
CB
1553 case BTRFS_IOC_SNAP_CREATE:
1554 return true;
1555 case BTRFS_IOC_SNAP_CREATE_V2:
1556 return true;
1557 case BTRFS_IOC_SUBVOL_CREATE:
1558 return true;
1559 case BTRFS_IOC_SUBVOL_CREATE_V2:
43e11811
CB
1560 return true;
1561 case BTRFS_IOC_SUBVOL_GETFLAGS:
1562 return true;
1563 case BTRFS_IOC_SUBVOL_SETFLAGS:
786af19d 1564 if (copy_from_user(&flags, argp, sizeof(flags)))
43e11811
CB
1565 return false;
1566
1567 if (flags & ~BTRFS_SUBVOL_RDONLY)
1568 return false;
1569
f7dfaa67
CB
1570 return true;
1571 case BTRFS_IOC_SNAP_DESTROY:
1572 return true;
1573 }
1574
1575 return false;
1576}
1577
2b77b5c4
CB
1578static long shiftfs_ioctl(struct file *file, unsigned int cmd,
1579 unsigned long arg)
1580{
1581 switch (cmd) {
1582 case FS_IOC_GETVERSION:
1583 /* fall through */
1584 case FS_IOC_GETFLAGS:
1585 /* fall through */
1586 case FS_IOC_SETFLAGS:
1587 break;
1588 default:
43e11811 1589 if (!in_ioctl_whitelist(cmd, arg) ||
f7dfaa67
CB
1590 !shiftfs_passthrough_ioctls(file->f_path.dentry->d_sb->s_fs_info))
1591 return -ENOTTY;
2b77b5c4
CB
1592 }
1593
1594 return shiftfs_real_ioctl(file, cmd, arg);
1595}
1596
1597static long shiftfs_compat_ioctl(struct file *file, unsigned int cmd,
1598 unsigned long arg)
1599{
1600 switch (cmd) {
1601 case FS_IOC32_GETVERSION:
1602 /* fall through */
1603 case FS_IOC32_GETFLAGS:
1604 /* fall through */
1605 case FS_IOC32_SETFLAGS:
1606 break;
1607 default:
43e11811 1608 if (!in_ioctl_whitelist(cmd, arg) ||
f7dfaa67
CB
1609 !shiftfs_passthrough_ioctls(file->f_path.dentry->d_sb->s_fs_info))
1610 return -ENOIOCTLCMD;
2b77b5c4
CB
1611 }
1612
1613 return shiftfs_real_ioctl(file, cmd, arg);
1614}
1615
1616enum shiftfs_copyop {
1617 SHIFTFS_COPY,
1618 SHIFTFS_CLONE,
1619 SHIFTFS_DEDUPE,
1620};
1621
1622static ssize_t shiftfs_copyfile(struct file *file_in, loff_t pos_in,
1623 struct file *file_out, loff_t pos_out, u64 len,
1624 unsigned int flags, enum shiftfs_copyop op)
1625{
1626 ssize_t ret;
1627 struct fd real_in, real_out;
1628 const struct cred *oldcred;
1629 struct inode *inode_out = file_inode(file_out);
1630 struct inode *loweri = inode_out->i_private;
1631
1632 ret = shiftfs_real_fdget(file_out, &real_out);
1633 if (ret)
1634 return ret;
1635
1636 ret = shiftfs_real_fdget(file_in, &real_in);
1637 if (ret) {
1638 fdput(real_out);
1639 return ret;
1640 }
1641
1642 oldcred = shiftfs_override_creds(inode_out->i_sb);
1643 switch (op) {
1644 case SHIFTFS_COPY:
1645 ret = vfs_copy_file_range(real_in.file, pos_in, real_out.file,
1646 pos_out, len, flags);
1647 break;
1648
1649 case SHIFTFS_CLONE:
1650 ret = vfs_clone_file_range(real_in.file, pos_in, real_out.file,
1651 pos_out, len, flags);
1652 break;
1653
1654 case SHIFTFS_DEDUPE:
1655 ret = vfs_dedupe_file_range_one(real_in.file, pos_in,
1656 real_out.file, pos_out, len,
1657 flags);
1658 break;
1659 }
1660 revert_creds(oldcred);
1661
1662 /* Update size */
1663 shiftfs_copyattr(loweri, inode_out);
1664
1665 fdput(real_in);
1666 fdput(real_out);
1667
1668 return ret;
1669}
1670
1671static ssize_t shiftfs_copy_file_range(struct file *file_in, loff_t pos_in,
1672 struct file *file_out, loff_t pos_out,
1673 size_t len, unsigned int flags)
1674{
1675 return shiftfs_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
1676 SHIFTFS_COPY);
1677}
1678
1679static loff_t shiftfs_remap_file_range(struct file *file_in, loff_t pos_in,
1680 struct file *file_out, loff_t pos_out,
1681 loff_t len, unsigned int remap_flags)
1682{
1683 enum shiftfs_copyop op;
1684
1685 if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
1686 return -EINVAL;
1687
1688 if (remap_flags & REMAP_FILE_DEDUP)
1689 op = SHIFTFS_DEDUPE;
1690 else
1691 op = SHIFTFS_CLONE;
1692
1693 return shiftfs_copyfile(file_in, pos_in, file_out, pos_out, len,
1694 remap_flags, op);
1695}
1696
1697static int shiftfs_iterate_shared(struct file *file, struct dir_context *ctx)
1698{
1699 const struct cred *oldcred;
1700 int err = -ENOTDIR;
d73a8b3d 1701 struct file *realfile = file->private_data;
2b77b5c4
CB
1702
1703 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1704 err = iterate_dir(realfile, ctx);
1705 revert_creds(oldcred);
1706
1707 return err;
1708}
1709
1710const struct file_operations shiftfs_file_operations = {
1711 .open = shiftfs_open,
1712 .release = shiftfs_release,
c106770a 1713 .llseek = shiftfs_file_llseek,
2b77b5c4
CB
1714 .read_iter = shiftfs_read_iter,
1715 .write_iter = shiftfs_write_iter,
1716 .fsync = shiftfs_fsync,
1717 .mmap = shiftfs_mmap,
1718 .fallocate = shiftfs_fallocate,
1719 .fadvise = shiftfs_fadvise,
1720 .unlocked_ioctl = shiftfs_ioctl,
1721 .compat_ioctl = shiftfs_compat_ioctl,
1722 .copy_file_range = shiftfs_copy_file_range,
1723 .remap_file_range = shiftfs_remap_file_range,
1724};
1725
1726const struct file_operations shiftfs_dir_operations = {
d73a8b3d
CB
1727 .open = shiftfs_dir_open,
1728 .release = shiftfs_dir_release,
2b77b5c4
CB
1729 .compat_ioctl = shiftfs_compat_ioctl,
1730 .fsync = shiftfs_fsync,
1731 .iterate_shared = shiftfs_iterate_shared,
c106770a 1732 .llseek = shiftfs_dir_llseek,
2b77b5c4 1733 .read = generic_read_dir,
2b77b5c4
CB
1734 .unlocked_ioctl = shiftfs_ioctl,
1735};
1736
1737static const struct address_space_operations shiftfs_aops = {
1738 /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */
1739 .direct_IO = noop_direct_IO,
1740};
1741
1742static void shiftfs_fill_inode(struct inode *inode, unsigned long ino,
1743 umode_t mode, dev_t dev, struct dentry *dentry)
1744{
1745 struct inode *loweri;
1746
1747 inode->i_ino = ino;
1748 inode->i_flags |= S_NOCMTIME;
1749
1750 mode &= S_IFMT;
1751 inode->i_mode = mode;
1752 switch (mode & S_IFMT) {
1753 case S_IFDIR:
1754 inode->i_op = &shiftfs_dir_inode_operations;
1755 inode->i_fop = &shiftfs_dir_operations;
1756 break;
1757 case S_IFLNK:
1758 inode->i_op = &shiftfs_symlink_inode_operations;
1759 break;
1760 case S_IFREG:
1761 inode->i_op = &shiftfs_file_inode_operations;
1762 inode->i_fop = &shiftfs_file_operations;
1763 inode->i_mapping->a_ops = &shiftfs_aops;
1764 break;
1765 default:
1766 inode->i_op = &shiftfs_special_inode_operations;
1767 init_special_inode(inode, mode, dev);
1768 break;
1769 }
1770
1771 if (!dentry)
1772 return;
1773
1774 loweri = dentry->d_inode;
1775 if (!loweri->i_op->get_link)
1776 inode->i_opflags |= IOP_NOFOLLOW;
1777
1778 shiftfs_copyattr(loweri, inode);
1779 shiftfs_copyflags(loweri, inode);
1780 set_nlink(inode, loweri->i_nlink);
1781}
1782
1783static int shiftfs_show_options(struct seq_file *m, struct dentry *dentry)
1784{
1785 struct super_block *sb = dentry->d_sb;
1786 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
1787
1788 if (sbinfo->mark)
1789 seq_show_option(m, "mark", NULL);
1790
1791 if (sbinfo->passthrough)
1792 seq_printf(m, ",passthrough=%u", sbinfo->passthrough);
1793
1794 return 0;
1795}
1796
1797static int shiftfs_statfs(struct dentry *dentry, struct kstatfs *buf)
1798{
1799 struct super_block *sb = dentry->d_sb;
1800 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
1801 struct dentry *root = sb->s_root;
1802 struct dentry *realroot = root->d_fsdata;
1803 struct path realpath = { .mnt = sbinfo->mnt, .dentry = realroot };
1804 int err;
1805
1806 err = vfs_statfs(&realpath, buf);
8ef17b62 1807 if (err)
2b77b5c4 1808 return err;
8ef17b62 1809
2b77b5c4
CB
1810 if (!shiftfs_passthrough_statfs(sbinfo))
1811 buf->f_type = sb->s_magic;
8ef17b62 1812
2b77b5c4
CB
1813 return 0;
1814}
8ef17b62 1815
2b77b5c4
CB
1816static void shiftfs_evict_inode(struct inode *inode)
1817{
1818 struct inode *loweri = inode->i_private;
1819
1820 clear_inode(inode);
1821
1822 if (loweri)
1823 iput(loweri);
1824}
1825
1826static void shiftfs_put_super(struct super_block *sb)
1827{
1828 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
1829
1830 if (sbinfo) {
1831 mntput(sbinfo->mnt);
1832 put_cred(sbinfo->creator_cred);
1833 kfree(sbinfo);
1834 }
1835}
1836
1837static const struct xattr_handler shiftfs_xattr_handler = {
1838 .prefix = "",
1839 .get = shiftfs_xattr_get,
1840 .set = shiftfs_xattr_set,
1841};
1842
1843const struct xattr_handler *shiftfs_xattr_handlers[] = {
1844#ifdef CONFIG_SHIFT_FS_POSIX_ACL
1845 &shiftfs_posix_acl_access_xattr_handler,
1846 &shiftfs_posix_acl_default_xattr_handler,
1847#endif
1848 &shiftfs_xattr_handler,
1849 NULL
1850};
1851
1852static inline bool passthrough_is_subset(int old_flags, int new_flags)
1853{
1854 if ((new_flags & old_flags) != new_flags)
1855 return false;
1856
1857 return true;
1858}
1859
63d6c068
CB
1860static int shiftfs_super_check_flags(unsigned long old_flags,
1861 unsigned long new_flags)
1862{
1863 if ((old_flags & SB_RDONLY) && !(new_flags & SB_RDONLY))
1864 return -EPERM;
1865
1866 if ((old_flags & SB_NOSUID) && !(new_flags & SB_NOSUID))
1867 return -EPERM;
1868
1869 if ((old_flags & SB_NODEV) && !(new_flags & SB_NODEV))
1870 return -EPERM;
1871
1872 if ((old_flags & SB_NOEXEC) && !(new_flags & SB_NOEXEC))
1873 return -EPERM;
1874
1875 if ((old_flags & SB_NOATIME) && !(new_flags & SB_NOATIME))
1876 return -EPERM;
1877
1878 if ((old_flags & SB_NODIRATIME) && !(new_flags & SB_NODIRATIME))
1879 return -EPERM;
1880
1881 if (!(old_flags & SB_POSIXACL) && (new_flags & SB_POSIXACL))
1882 return -EPERM;
1883
1884 return 0;
1885}
1886
2b77b5c4
CB
1887static int shiftfs_remount(struct super_block *sb, int *flags, char *data)
1888{
1889 int err;
1890 struct shiftfs_super_info new = {};
1891 struct shiftfs_super_info *info = sb->s_fs_info;
1892
1893 err = shiftfs_parse_mount_options(&new, data);
8ef17b62 1894 if (err)
2b77b5c4
CB
1895 return err;
1896
63d6c068
CB
1897 err = shiftfs_super_check_flags(sb->s_flags, *flags);
1898 if (err)
1899 return err;
1900
2b77b5c4
CB
1901 /* Mark mount option cannot be changed. */
1902 if (info->mark || (info->mark != new.mark))
1903 return -EPERM;
1904
1905 if (info->passthrough != new.passthrough) {
1906 /* Don't allow exceeding passthrough options of mark mount. */
b4c9cb0c 1907 if (!passthrough_is_subset(info->passthrough_mark,
2b77b5c4
CB
1908 info->passthrough))
1909 return -EPERM;
1910
1911 info->passthrough = new.passthrough;
1912 }
1913
1914 return 0;
1915}
8ef17b62 1916
2b77b5c4
CB
1917static const struct super_operations shiftfs_super_ops = {
1918 .put_super = shiftfs_put_super,
1919 .show_options = shiftfs_show_options,
1920 .statfs = shiftfs_statfs,
1921 .remount_fs = shiftfs_remount,
1922 .evict_inode = shiftfs_evict_inode,
1923};
1924
1925struct shiftfs_data {
1926 void *data;
1927 const char *path;
1928};
1929
63d6c068
CB
1930static void shiftfs_super_force_flags(struct super_block *sb,
1931 unsigned long lower_flags)
1932{
1933 sb->s_flags |= lower_flags & (SB_RDONLY | SB_NOSUID | SB_NODEV |
1934 SB_NOEXEC | SB_NOATIME | SB_NODIRATIME);
1935
1936 if (!(lower_flags & SB_POSIXACL))
1937 sb->s_flags &= ~SB_POSIXACL;
1938}
1939
2b77b5c4
CB
1940static int shiftfs_fill_super(struct super_block *sb, void *raw_data,
1941 int silent)
1942{
1943 int err;
1944 struct path path = {};
1945 struct shiftfs_super_info *sbinfo_mp;
1946 char *name = NULL;
1947 struct inode *inode = NULL;
1948 struct dentry *dentry = NULL;
1949 struct shiftfs_data *data = raw_data;
1950 struct shiftfs_super_info *sbinfo = NULL;
1951
1952 if (!data->path)
1953 return -EINVAL;
1954
1955 sb->s_fs_info = kzalloc(sizeof(*sbinfo), GFP_KERNEL);
1956 if (!sb->s_fs_info)
1957 return -ENOMEM;
1958 sbinfo = sb->s_fs_info;
1959
1960 err = shiftfs_parse_mount_options(sbinfo, data->data);
1961 if (err)
1962 return err;
1963
1964 /* to mount a mark, must be userns admin */
1965 if (!sbinfo->mark && !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
1966 return -EPERM;
1967
1968 name = kstrdup(data->path, GFP_KERNEL);
1969 if (!name)
1970 return -ENOMEM;
1971
1972 err = kern_path(name, LOOKUP_FOLLOW, &path);
1973 if (err)
1974 goto out_free_name;
8ef17b62
JB
1975
1976 if (!S_ISDIR(path.dentry->d_inode->i_mode)) {
1977 err = -ENOTDIR;
2b77b5c4 1978 goto out_put_path;
8ef17b62
JB
1979 }
1980
63d6c068
CB
1981 sb->s_flags |= SB_POSIXACL;
1982
2b77b5c4 1983 if (sbinfo->mark) {
700720b3 1984 struct cred *cred_tmp;
2b77b5c4
CB
1985 struct super_block *lower_sb = path.mnt->mnt_sb;
1986
1987 /* to mark a mount point, must root wrt lower s_user_ns */
1988 if (!ns_capable(lower_sb->s_user_ns, CAP_SYS_ADMIN)) {
1989 err = -EPERM;
1990 goto out_put_path;
1991 }
8ef17b62 1992
8ef17b62
JB
1993 /*
1994 * this part is visible unshifted, so make sure no
1995 * executables that could be used to give suid
1996 * privileges
1997 */
1998 sb->s_iflags = SB_I_NOEXEC;
8ef17b62 1999
63d6c068
CB
2000 shiftfs_super_force_flags(sb, lower_sb->s_flags);
2001
8ef17b62 2002 /*
2b77b5c4
CB
2003 * Handle nesting of shiftfs mounts by referring this mark
2004 * mount back to the original mark mount. This is more
2005 * efficient and alleviates concerns about stack depth.
8ef17b62 2006 */
2b77b5c4
CB
2007 if (lower_sb->s_magic == SHIFTFS_MAGIC) {
2008 sbinfo_mp = lower_sb->s_fs_info;
2009
2010 /* Doesn't make sense to mark a mark mount */
2011 if (sbinfo_mp->mark) {
2012 err = -EINVAL;
2013 goto out_put_path;
2014 }
2015
2016 if (!passthrough_is_subset(sbinfo_mp->passthrough,
2017 sbinfo->passthrough)) {
2018 err = -EPERM;
2019 goto out_put_path;
2020 }
2021
2022 sbinfo->mnt = mntget(sbinfo_mp->mnt);
2023 dentry = dget(path.dentry->d_fsdata);
b4c9cb0c
CB
2024 /*
2025 * Copy up the passthrough mount options from the
2026 * parent mark mountpoint.
2027 */
2028 sbinfo->passthrough_mark = sbinfo_mp->passthrough_mark;
629edd70 2029 sbinfo->creator_cred = get_cred(sbinfo_mp->creator_cred);
2b77b5c4
CB
2030 } else {
2031 sbinfo->mnt = mntget(path.mnt);
2032 dentry = dget(path.dentry);
b4c9cb0c
CB
2033 /*
2034 * For a new mark passthrough_mark and passthrough
2035 * are identical.
2036 */
2037 sbinfo->passthrough_mark = sbinfo->passthrough;
2b77b5c4 2038
629edd70
CB
2039 cred_tmp = prepare_creds();
2040 if (!cred_tmp) {
2041 err = -ENOMEM;
2042 goto out_put_path;
2043 }
2044 /* Don't override disk quota limits or use reserved space. */
2045 cap_lower(cred_tmp->cap_effective, CAP_SYS_RESOURCE);
2046 sbinfo->creator_cred = cred_tmp;
2b77b5c4
CB
2047 }
2048 } else {
2049 /*
2050 * This leg executes if we're admin capable in the namespace,
2051 * so be very careful.
2052 */
2053 err = -EPERM;
8ef17b62 2054 if (path.dentry->d_sb->s_magic != SHIFTFS_MAGIC)
2b77b5c4
CB
2055 goto out_put_path;
2056
2057 sbinfo_mp = path.dentry->d_sb->s_fs_info;
2058 if (!sbinfo_mp->mark)
2059 goto out_put_path;
2060
2061 if (!passthrough_is_subset(sbinfo_mp->passthrough,
2062 sbinfo->passthrough))
2063 goto out_put_path;
2064
2065 sbinfo->mnt = mntget(sbinfo_mp->mnt);
2066 sbinfo->creator_cred = get_cred(sbinfo_mp->creator_cred);
8ef17b62 2067 dentry = dget(path.dentry->d_fsdata);
b4c9cb0c
CB
2068 /*
2069 * Copy up passthrough settings from mark mountpoint so we can
2070 * verify when the overlay wants to remount with different
2071 * passthrough settings.
2072 */
2073 sbinfo->passthrough_mark = sbinfo_mp->passthrough;
63d6c068 2074 shiftfs_super_force_flags(sb, path.mnt->mnt_sb->s_flags);
2b77b5c4
CB
2075 }
2076
2077 sb->s_stack_depth = dentry->d_sb->s_stack_depth + 1;
2078 if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
2079 printk(KERN_ERR "shiftfs: maximum stacking depth exceeded\n");
2080 err = -EINVAL;
2081 goto out_put_path;
2082 }
2083
2084 inode = new_inode(sb);
2085 if (!inode) {
2086 err = -ENOMEM;
2087 goto out_put_path;
8ef17b62 2088 }
2b77b5c4
CB
2089 shiftfs_fill_inode(inode, dentry->d_inode->i_ino, S_IFDIR, 0, dentry);
2090
2091 ihold(dentry->d_inode);
2092 inode->i_private = dentry->d_inode;
2093
8ef17b62 2094 sb->s_magic = SHIFTFS_MAGIC;
3340be2b 2095 sb->s_maxbytes = MAX_LFS_FILESIZE;
8ef17b62
JB
2096 sb->s_op = &shiftfs_super_ops;
2097 sb->s_xattr = shiftfs_xattr_handlers;
2098 sb->s_d_op = &shiftfs_dentry_ops;
2b77b5c4
CB
2099 sb->s_root = d_make_root(inode);
2100 if (!sb->s_root) {
2101 err = -ENOMEM;
2102 goto out_put_path;
2103 }
2104
8ef17b62 2105 sb->s_root->d_fsdata = dentry;
2b77b5c4
CB
2106 sbinfo->userns = get_user_ns(dentry->d_sb->s_user_ns);
2107 shiftfs_copyattr(dentry->d_inode, sb->s_root->d_inode);
8ef17b62 2108
2b77b5c4
CB
2109 dentry = NULL;
2110 err = 0;
8ef17b62 2111
2b77b5c4 2112out_put_path:
8ef17b62 2113 path_put(&path);
2b77b5c4
CB
2114
2115out_free_name:
8ef17b62 2116 kfree(name);
2b77b5c4
CB
2117
2118 dput(dentry);
2119
8ef17b62
JB
2120 return err;
2121}
2122
2123static struct dentry *shiftfs_mount(struct file_system_type *fs_type,
2124 int flags, const char *dev_name, void *data)
2125{
2126 struct shiftfs_data d = { data, dev_name };
2127
2128 return mount_nodev(fs_type, flags, &d, shiftfs_fill_super);
2129}
2130
2131static struct file_system_type shiftfs_type = {
2132 .owner = THIS_MODULE,
2133 .name = "shiftfs",
2134 .mount = shiftfs_mount,
2135 .kill_sb = kill_anon_super,
2136 .fs_flags = FS_USERNS_MOUNT,
2137};
2138
2139static int __init shiftfs_init(void)
2140{
2141 return register_filesystem(&shiftfs_type);
2142}
2143
2144static void __exit shiftfs_exit(void)
2145{
2146 unregister_filesystem(&shiftfs_type);
2147}
2148
2149MODULE_ALIAS_FS("shiftfs");
2150MODULE_AUTHOR("James Bottomley");
2b77b5c4
CB
2151MODULE_AUTHOR("Seth Forshee <seth.forshee@canonical.com>");
2152MODULE_AUTHOR("Christian Brauner <christian.brauner@ubuntu.com>");
2153MODULE_DESCRIPTION("id shifting filesystem");
8ef17b62
JB
2154MODULE_LICENSE("GPL v2");
2155module_init(shiftfs_init)
2156module_exit(shiftfs_exit)