]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - fs/overlayfs/copy_up.c
UBUNTU: [Config] arm64: snapdragon: SND*=m
[mirror_ubuntu-bionic-kernel.git] / fs / overlayfs / copy_up.c
1 /*
2 *
3 * Copyright (C) 2011 Novell Inc.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published by
7 * the Free Software Foundation.
8 */
9
10 #include <linux/module.h>
11 #include <linux/fs.h>
12 #include <linux/slab.h>
13 #include <linux/file.h>
14 #include <linux/splice.h>
15 #include <linux/xattr.h>
16 #include <linux/security.h>
17 #include <linux/uaccess.h>
18 #include <linux/sched/signal.h>
19 #include <linux/cred.h>
20 #include <linux/namei.h>
21 #include <linux/fdtable.h>
22 #include <linux/ratelimit.h>
23 #include <linux/exportfs.h>
24 #include "overlayfs.h"
25
26 #define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
27
28 static bool __read_mostly ovl_check_copy_up;
29 module_param_named(check_copy_up, ovl_check_copy_up, bool,
30 S_IWUSR | S_IRUGO);
31 MODULE_PARM_DESC(ovl_check_copy_up,
32 "Warn on copy-up when causing process also has a R/O fd open");
33
34 static int ovl_check_fd(const void *data, struct file *f, unsigned int fd)
35 {
36 const struct dentry *dentry = data;
37
38 if (file_inode(f) == d_inode(dentry))
39 pr_warn_ratelimited("overlayfs: Warning: Copying up %pD, but open R/O on fd %u which will cease to be coherent [pid=%d %s]\n",
40 f, fd, current->pid, current->comm);
41 return 0;
42 }
43
44 /*
45 * Check the fds open by this process and warn if something like the following
46 * scenario is about to occur:
47 *
48 * fd1 = open("foo", O_RDONLY);
49 * fd2 = open("foo", O_RDWR);
50 */
51 static void ovl_do_check_copy_up(struct dentry *dentry)
52 {
53 if (ovl_check_copy_up)
54 iterate_fd(current->files, 0, ovl_check_fd, dentry);
55 }
56
57 int ovl_copy_xattr(struct dentry *old, struct dentry *new)
58 {
59 ssize_t list_size, size, value_size = 0;
60 char *buf, *name, *value = NULL;
61 int uninitialized_var(error);
62 size_t slen;
63
64 if (!(old->d_inode->i_opflags & IOP_XATTR) ||
65 !(new->d_inode->i_opflags & IOP_XATTR))
66 return 0;
67
68 list_size = vfs_listxattr(old, NULL, 0);
69 if (list_size <= 0) {
70 if (list_size == -EOPNOTSUPP)
71 return 0;
72 return list_size;
73 }
74
75 buf = kzalloc(list_size, GFP_KERNEL);
76 if (!buf)
77 return -ENOMEM;
78
79 list_size = vfs_listxattr(old, buf, list_size);
80 if (list_size <= 0) {
81 error = list_size;
82 goto out;
83 }
84
85 for (name = buf; list_size; name += slen) {
86 slen = strnlen(name, list_size) + 1;
87
88 /* underlying fs providing us with an broken xattr list? */
89 if (WARN_ON(slen > list_size)) {
90 error = -EIO;
91 break;
92 }
93 list_size -= slen;
94
95 if (ovl_is_private_xattr(name))
96 continue;
97 retry:
98 size = vfs_getxattr(old, name, value, value_size);
99 if (size == -ERANGE)
100 size = vfs_getxattr(old, name, NULL, 0);
101
102 if (size < 0) {
103 error = size;
104 break;
105 }
106
107 if (size > value_size) {
108 void *new;
109
110 new = krealloc(value, size, GFP_KERNEL);
111 if (!new) {
112 error = -ENOMEM;
113 break;
114 }
115 value = new;
116 value_size = size;
117 goto retry;
118 }
119
120 error = security_inode_copy_up_xattr(name);
121 if (error < 0 && error != -EOPNOTSUPP)
122 break;
123 if (error == 1) {
124 error = 0;
125 continue; /* Discard */
126 }
127 error = vfs_setxattr(new, name, value, size, 0);
128 if (error)
129 break;
130 }
131 kfree(value);
132 out:
133 kfree(buf);
134 return error;
135 }
136
137 static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
138 {
139 struct file *old_file;
140 struct file *new_file;
141 loff_t old_pos = 0;
142 loff_t new_pos = 0;
143 int error = 0;
144
145 if (len == 0)
146 return 0;
147
148 old_file = ovl_path_open(old, O_LARGEFILE | O_RDONLY);
149 if (IS_ERR(old_file))
150 return PTR_ERR(old_file);
151
152 new_file = ovl_path_open(new, O_LARGEFILE | O_WRONLY);
153 if (IS_ERR(new_file)) {
154 error = PTR_ERR(new_file);
155 goto out_fput;
156 }
157
158 /* Try to use clone_file_range to clone up within the same fs */
159 error = vfs_clone_file_range(old_file, 0, new_file, 0, len);
160 if (!error)
161 goto out;
162 /* Couldn't clone, so now we try to copy the data */
163 error = 0;
164
165 /* FIXME: copy up sparse files efficiently */
166 while (len) {
167 size_t this_len = OVL_COPY_UP_CHUNK_SIZE;
168 long bytes;
169
170 if (len < this_len)
171 this_len = len;
172
173 if (signal_pending_state(TASK_KILLABLE, current)) {
174 error = -EINTR;
175 break;
176 }
177
178 bytes = do_splice_direct(old_file, &old_pos,
179 new_file, &new_pos,
180 this_len, SPLICE_F_MOVE);
181 if (bytes <= 0) {
182 error = bytes;
183 break;
184 }
185 WARN_ON(old_pos != new_pos);
186
187 len -= bytes;
188 }
189 out:
190 if (!error)
191 error = vfs_fsync(new_file, 0);
192 fput(new_file);
193 out_fput:
194 fput(old_file);
195 return error;
196 }
197
198 static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
199 {
200 struct iattr attr = {
201 .ia_valid =
202 ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET,
203 .ia_atime = stat->atime,
204 .ia_mtime = stat->mtime,
205 };
206
207 return notify_change(upperdentry, &attr, NULL);
208 }
209
210 int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
211 {
212 int err = 0;
213
214 /*
215 * For the most part we want to set the mode bits before setting
216 * the user, otherwise the current context might lack permission
217 * for setting the mode. However for sxid/sticky bits we want
218 * the operation to fail if the current user isn't privileged
219 * towards the resulting inode. So we first set the mode but
220 * exclude the sxid/sticky bits, then set the user, then set the
221 * mode again if any of the sxid/sticky bits are set.
222 */
223 if (!S_ISLNK(stat->mode)) {
224 struct iattr attr = {
225 .ia_valid = ATTR_MODE,
226 .ia_mode = stat->mode & ~(S_ISUID|S_ISGID|S_ISVTX),
227 };
228 err = notify_change(upperdentry, &attr, NULL);
229 }
230 if (!err) {
231 struct iattr attr = {
232 .ia_valid = ATTR_UID | ATTR_GID,
233 .ia_uid = stat->uid,
234 .ia_gid = stat->gid,
235 };
236 err = notify_change(upperdentry, &attr, NULL);
237 }
238 if (!err && !S_ISLNK(stat->mode) &&
239 (stat->mode & (S_ISUID|S_ISGID|S_ISVTX))) {
240 struct iattr attr = {
241 .ia_valid = ATTR_MODE,
242 .ia_mode = stat->mode,
243 };
244 err = notify_change(upperdentry, &attr, NULL);
245 }
246 if (!err)
247 ovl_set_timestamps(upperdentry, stat);
248
249 return err;
250 }
251
252 struct ovl_fh *ovl_encode_fh(struct dentry *lower, bool is_upper)
253 {
254 struct ovl_fh *fh;
255 int fh_type, fh_len, dwords;
256 void *buf;
257 int buflen = MAX_HANDLE_SZ;
258 uuid_t *uuid = &lower->d_sb->s_uuid;
259
260 buf = kmalloc(buflen, GFP_KERNEL);
261 if (!buf)
262 return ERR_PTR(-ENOMEM);
263
264 /*
265 * We encode a non-connectable file handle for non-dir, because we
266 * only need to find the lower inode number and we don't want to pay
267 * the price or reconnecting the dentry.
268 */
269 dwords = buflen >> 2;
270 fh_type = exportfs_encode_fh(lower, buf, &dwords, 0);
271 buflen = (dwords << 2);
272
273 fh = ERR_PTR(-EIO);
274 if (WARN_ON(fh_type < 0) ||
275 WARN_ON(buflen > MAX_HANDLE_SZ) ||
276 WARN_ON(fh_type == FILEID_INVALID))
277 goto out;
278
279 BUILD_BUG_ON(MAX_HANDLE_SZ + offsetof(struct ovl_fh, fid) > 255);
280 fh_len = offsetof(struct ovl_fh, fid) + buflen;
281 fh = kmalloc(fh_len, GFP_KERNEL);
282 if (!fh) {
283 fh = ERR_PTR(-ENOMEM);
284 goto out;
285 }
286
287 fh->version = OVL_FH_VERSION;
288 fh->magic = OVL_FH_MAGIC;
289 fh->type = fh_type;
290 fh->flags = OVL_FH_FLAG_CPU_ENDIAN;
291 /*
292 * When we will want to decode an overlay dentry from this handle
293 * and all layers are on the same fs, if we get a disconncted real
294 * dentry when we decode fid, the only way to tell if we should assign
295 * it to upperdentry or to lowerstack is by checking this flag.
296 */
297 if (is_upper)
298 fh->flags |= OVL_FH_FLAG_PATH_UPPER;
299 fh->len = fh_len;
300 fh->uuid = *uuid;
301 memcpy(fh->fid, buf, buflen);
302
303 out:
304 kfree(buf);
305 return fh;
306 }
307
308 static int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
309 struct dentry *upper)
310 {
311 const struct ovl_fh *fh = NULL;
312 int err;
313
314 /*
315 * When lower layer doesn't support export operations store a 'null' fh,
316 * so we can use the overlay.origin xattr to distignuish between a copy
317 * up and a pure upper inode.
318 */
319 if (ovl_can_decode_fh(lower->d_sb)) {
320 fh = ovl_encode_fh(lower, false);
321 if (IS_ERR(fh))
322 return PTR_ERR(fh);
323 }
324
325 /*
326 * Do not fail when upper doesn't support xattrs.
327 */
328 err = ovl_check_setxattr(dentry, upper, OVL_XATTR_ORIGIN, fh,
329 fh ? fh->len : 0, 0);
330 kfree(fh);
331
332 return err;
333 }
334
335 struct ovl_copy_up_ctx {
336 struct dentry *parent;
337 struct dentry *dentry;
338 struct path lowerpath;
339 struct kstat stat;
340 struct kstat pstat;
341 const char *link;
342 struct dentry *destdir;
343 struct qstr destname;
344 struct dentry *workdir;
345 bool tmpfile;
346 bool origin;
347 };
348
349 static int ovl_link_up(struct ovl_copy_up_ctx *c)
350 {
351 int err;
352 struct dentry *upper;
353 struct dentry *upperdir = ovl_dentry_upper(c->parent);
354 struct inode *udir = d_inode(upperdir);
355
356 /* Mark parent "impure" because it may now contain non-pure upper */
357 err = ovl_set_impure(c->parent, upperdir);
358 if (err)
359 return err;
360
361 err = ovl_set_nlink_lower(c->dentry);
362 if (err)
363 return err;
364
365 inode_lock_nested(udir, I_MUTEX_PARENT);
366 upper = lookup_one_len(c->dentry->d_name.name, upperdir,
367 c->dentry->d_name.len);
368 err = PTR_ERR(upper);
369 if (!IS_ERR(upper)) {
370 err = ovl_do_link(ovl_dentry_upper(c->dentry), udir, upper,
371 true);
372 dput(upper);
373
374 if (!err) {
375 /* Restore timestamps on parent (best effort) */
376 ovl_set_timestamps(upperdir, &c->pstat);
377 ovl_dentry_set_upper_alias(c->dentry);
378 }
379 }
380 inode_unlock(udir);
381 ovl_set_nlink_upper(c->dentry);
382
383 return err;
384 }
385
386 static int ovl_install_temp(struct ovl_copy_up_ctx *c, struct dentry *temp,
387 struct dentry **newdentry)
388 {
389 int err;
390 struct dentry *upper;
391 struct inode *udir = d_inode(c->destdir);
392
393 upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len);
394 if (IS_ERR(upper))
395 return PTR_ERR(upper);
396
397 if (c->tmpfile)
398 err = ovl_do_link(temp, udir, upper, true);
399 else
400 err = ovl_do_rename(d_inode(c->workdir), temp, udir, upper, 0);
401
402 if (!err)
403 *newdentry = dget(c->tmpfile ? upper : temp);
404 dput(upper);
405
406 return err;
407 }
408
409 static int ovl_get_tmpfile(struct ovl_copy_up_ctx *c, struct dentry **tempp)
410 {
411 int err;
412 struct dentry *temp;
413 const struct cred *old_creds = NULL;
414 struct cred *new_creds = NULL;
415 struct cattr cattr = {
416 /* Can't properly set mode on creation because of the umask */
417 .mode = c->stat.mode & S_IFMT,
418 .rdev = c->stat.rdev,
419 .link = c->link
420 };
421
422 err = security_inode_copy_up(c->dentry, &new_creds);
423 if (err < 0)
424 goto out;
425
426 if (new_creds)
427 old_creds = override_creds(new_creds);
428
429 if (c->tmpfile) {
430 temp = ovl_do_tmpfile(c->workdir, c->stat.mode);
431 if (IS_ERR(temp))
432 goto temp_err;
433 } else {
434 temp = ovl_lookup_temp(c->workdir);
435 if (IS_ERR(temp))
436 goto temp_err;
437
438 err = ovl_create_real(d_inode(c->workdir), temp, &cattr,
439 NULL, true);
440 if (err) {
441 dput(temp);
442 goto out;
443 }
444 }
445 err = 0;
446 *tempp = temp;
447 out:
448 if (new_creds) {
449 revert_creds(old_creds);
450 put_cred(new_creds);
451 }
452
453 return err;
454
455 temp_err:
456 err = PTR_ERR(temp);
457 goto out;
458 }
459
460 static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
461 {
462 int err;
463
464 if (S_ISREG(c->stat.mode)) {
465 struct path upperpath;
466
467 ovl_path_upper(c->dentry, &upperpath);
468 BUG_ON(upperpath.dentry != NULL);
469 upperpath.dentry = temp;
470
471 err = ovl_copy_up_data(&c->lowerpath, &upperpath, c->stat.size);
472 if (err)
473 return err;
474 }
475
476 err = ovl_copy_xattr(c->lowerpath.dentry, temp);
477 if (err)
478 return err;
479
480 inode_lock(temp->d_inode);
481 err = ovl_set_attr(temp, &c->stat);
482 inode_unlock(temp->d_inode);
483 if (err)
484 return err;
485
486 /*
487 * Store identifier of lower inode in upper inode xattr to
488 * allow lookup of the copy up origin inode.
489 *
490 * Don't set origin when we are breaking the association with a lower
491 * hard link.
492 */
493 if (c->origin) {
494 err = ovl_set_origin(c->dentry, c->lowerpath.dentry, temp);
495 if (err)
496 return err;
497 }
498
499 return 0;
500 }
501
502 static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c)
503 {
504 struct inode *udir = c->destdir->d_inode;
505 struct inode *inode;
506 struct dentry *newdentry = NULL;
507 struct dentry *temp = NULL;
508 int err;
509
510 err = ovl_get_tmpfile(c, &temp);
511 if (err)
512 goto out;
513
514 err = ovl_copy_up_inode(c, temp);
515 if (err)
516 goto out_cleanup;
517
518 if (c->tmpfile) {
519 inode_lock_nested(udir, I_MUTEX_PARENT);
520 err = ovl_install_temp(c, temp, &newdentry);
521 inode_unlock(udir);
522 } else {
523 err = ovl_install_temp(c, temp, &newdentry);
524 }
525 if (err)
526 goto out_cleanup;
527
528 inode = d_inode(c->dentry);
529 ovl_inode_update(inode, newdentry);
530 if (S_ISDIR(inode->i_mode))
531 ovl_set_flag(OVL_WHITEOUTS, inode);
532
533 out:
534 dput(temp);
535 return err;
536
537 out_cleanup:
538 if (!c->tmpfile)
539 ovl_cleanup(d_inode(c->workdir), temp);
540 goto out;
541 }
542
543 /*
544 * Copy up a single dentry
545 *
546 * All renames start with copy up of source if necessary. The actual
547 * rename will only proceed once the copy up was successful. Copy up uses
548 * upper parent i_mutex for exclusion. Since rename can change d_parent it
549 * is possible that the copy up will lock the old parent. At that point
550 * the file will have already been copied up anyway.
551 */
552 static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
553 {
554 int err;
555 struct ovl_fs *ofs = c->dentry->d_sb->s_fs_info;
556 bool indexed = false;
557
558 if (ovl_indexdir(c->dentry->d_sb) && !S_ISDIR(c->stat.mode) &&
559 c->stat.nlink > 1)
560 indexed = true;
561
562 if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || indexed)
563 c->origin = true;
564
565 if (indexed) {
566 c->destdir = ovl_indexdir(c->dentry->d_sb);
567 err = ovl_get_index_name(c->lowerpath.dentry, &c->destname);
568 if (err)
569 return err;
570 } else {
571 /*
572 * Mark parent "impure" because it may now contain non-pure
573 * upper
574 */
575 err = ovl_set_impure(c->parent, c->destdir);
576 if (err)
577 return err;
578 }
579
580 /* Should we copyup with O_TMPFILE or with workdir? */
581 if (S_ISREG(c->stat.mode) && ofs->tmpfile) {
582 c->tmpfile = true;
583 err = ovl_copy_up_locked(c);
584 } else {
585 err = ovl_lock_rename_workdir(c->workdir, c->destdir);
586 if (!err) {
587 err = ovl_copy_up_locked(c);
588 unlock_rename(c->workdir, c->destdir);
589 }
590 }
591
592 if (indexed) {
593 if (!err)
594 ovl_set_flag(OVL_INDEX, d_inode(c->dentry));
595 kfree(c->destname.name);
596 } else if (!err) {
597 struct inode *udir = d_inode(c->destdir);
598
599 /* Restore timestamps on parent (best effort) */
600 inode_lock(udir);
601 ovl_set_timestamps(c->destdir, &c->pstat);
602 inode_unlock(udir);
603
604 ovl_dentry_set_upper_alias(c->dentry);
605 }
606
607 return err;
608 }
609
610 static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
611 int flags)
612 {
613 int err;
614 DEFINE_DELAYED_CALL(done);
615 struct path parentpath;
616 struct ovl_copy_up_ctx ctx = {
617 .parent = parent,
618 .dentry = dentry,
619 .workdir = ovl_workdir(dentry),
620 };
621
622 if (WARN_ON(!ctx.workdir))
623 return -EROFS;
624
625 ovl_path_lower(dentry, &ctx.lowerpath);
626 err = vfs_getattr(&ctx.lowerpath, &ctx.stat,
627 STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
628 if (err)
629 return err;
630
631 ovl_path_upper(parent, &parentpath);
632 ctx.destdir = parentpath.dentry;
633 ctx.destname = dentry->d_name;
634
635 err = vfs_getattr(&parentpath, &ctx.pstat,
636 STATX_ATIME | STATX_MTIME, AT_STATX_SYNC_AS_STAT);
637 if (err)
638 return err;
639
640 /* maybe truncate regular file. this has no effect on dirs */
641 if (flags & O_TRUNC)
642 ctx.stat.size = 0;
643
644 if (S_ISLNK(ctx.stat.mode)) {
645 ctx.link = vfs_get_link(ctx.lowerpath.dentry, &done);
646 if (IS_ERR(ctx.link))
647 return PTR_ERR(ctx.link);
648 }
649 ovl_do_check_copy_up(ctx.lowerpath.dentry);
650
651 err = ovl_copy_up_start(dentry);
652 /* err < 0: interrupted, err > 0: raced with another copy-up */
653 if (unlikely(err)) {
654 if (err > 0)
655 err = 0;
656 } else {
657 if (!ovl_dentry_upper(dentry))
658 err = ovl_do_copy_up(&ctx);
659 if (!err && !ovl_dentry_has_upper_alias(dentry))
660 err = ovl_link_up(&ctx);
661 ovl_copy_up_end(dentry);
662 }
663 do_delayed_call(&done);
664
665 return err;
666 }
667
668 int ovl_copy_up_flags(struct dentry *dentry, int flags)
669 {
670 int err = 0;
671 const struct cred *old_cred = ovl_override_creds(dentry->d_sb);
672
673 while (!err) {
674 struct dentry *next;
675 struct dentry *parent;
676
677 /*
678 * Check if copy-up has happened as well as for upper alias (in
679 * case of hard links) is there.
680 *
681 * Both checks are lockless:
682 * - false negatives: will recheck under oi->lock
683 * - false positives:
684 * + ovl_dentry_upper() uses memory barriers to ensure the
685 * upper dentry is up-to-date
686 * + ovl_dentry_has_upper_alias() relies on locking of
687 * upper parent i_rwsem to prevent reordering copy-up
688 * with rename.
689 */
690 if (ovl_dentry_upper(dentry) &&
691 ovl_dentry_has_upper_alias(dentry))
692 break;
693
694 next = dget(dentry);
695 /* find the topmost dentry not yet copied up */
696 for (;;) {
697 parent = dget_parent(next);
698
699 if (ovl_dentry_upper(parent))
700 break;
701
702 dput(next);
703 next = parent;
704 }
705
706 err = ovl_copy_up_one(parent, next, flags);
707
708 dput(parent);
709 dput(next);
710 }
711 revert_creds(old_cred);
712
713 return err;
714 }
715
716 int ovl_copy_up(struct dentry *dentry)
717 {
718 return ovl_copy_up_flags(dentry, 0);
719 }