]> git.proxmox.com Git - mirror_ubuntu-focal-kernel.git/blame - fs/aufs/xino.c
UBUNTU: SAUCE: Update aufs to 5.4.3 20200302
[mirror_ubuntu-focal-kernel.git] / fs / aufs / xino.c
CommitLineData
a3a49a17
SF
1// SPDX-License-Identifier: GPL-2.0
2/*
e4a3f096 3 * Copyright (C) 2005-2020 Junjiro R. Okajima
a3a49a17
SF
4 *
5 * This program, aufs is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19/*
20 * external inode number translation table and bitmap
21 *
22 * things to consider
23 * - the lifetime
24 * + au_xino object
25 * + XINO files (xino, xib, xigen)
26 * + dynamic debugfs entries (xiN)
27 * + static debugfs entries (xib, xigen)
28 * + static sysfs entry (xi_path)
29 * - several entry points to handle them.
30 * + mount(2) without xino option (default)
31 * + mount(2) with xino option
32 * + mount(2) with noxino option
33 * + umount(2)
34 * + remount with add/del branches
35 * + remount with xino/noxino options
36 */
37
38#include <linux/seq_file.h>
39#include <linux/statfs.h>
40#include "aufs.h"
41
42static aufs_bindex_t sbr_find_shared(struct super_block *sb, aufs_bindex_t btop,
43 aufs_bindex_t bbot,
44 struct super_block *h_sb)
45{
46 /* todo: try binary-search if the branches are many */
47 for (; btop <= bbot; btop++)
48 if (h_sb == au_sbr_sb(sb, btop))
49 return btop;
50 return -1;
51}
52
53/*
54 * find another branch who is on the same filesystem of the specified
55 * branch{@btgt}. search until @bbot.
56 */
57static aufs_bindex_t is_sb_shared(struct super_block *sb, aufs_bindex_t btgt,
58 aufs_bindex_t bbot)
59{
60 aufs_bindex_t bindex;
61 struct super_block *tgt_sb;
62
63 tgt_sb = au_sbr_sb(sb, btgt);
64 bindex = sbr_find_shared(sb, /*btop*/0, btgt - 1, tgt_sb);
65 if (bindex < 0)
66 bindex = sbr_find_shared(sb, btgt + 1, bbot, tgt_sb);
67
68 return bindex;
69}
70
71/* ---------------------------------------------------------------------- */
72
73/*
74 * stop unnecessary notify events at creating xino files
75 */
76
77aufs_bindex_t au_xi_root(struct super_block *sb, struct dentry *dentry)
78{
79 aufs_bindex_t bfound, bindex, bbot;
80 struct dentry *parent;
81 struct au_branch *br;
82
83 bfound = -1;
84 parent = dentry->d_parent; /* safe d_parent access */
85 bbot = au_sbbot(sb);
86 for (bindex = 0; bindex <= bbot; bindex++) {
87 br = au_sbr(sb, bindex);
88 if (au_br_dentry(br) == parent) {
89 bfound = bindex;
90 break;
91 }
92 }
93
94 AuDbg("bfound b%d\n", bfound);
95 return bfound;
96}
97
98struct au_xino_lock_dir {
99 struct au_hinode *hdir;
100 struct dentry *parent;
101 struct inode *dir;
102};
103
104static struct dentry *au_dget_parent_lock(struct dentry *dentry,
105 unsigned int lsc)
106{
107 struct dentry *parent;
108 struct inode *dir;
109
110 parent = dget_parent(dentry);
111 dir = d_inode(parent);
112 inode_lock_nested(dir, lsc);
113#if 0 /* it should not happen */
114 spin_lock(&dentry->d_lock);
115 if (unlikely(dentry->d_parent != parent)) {
116 spin_unlock(&dentry->d_lock);
117 inode_unlock(dir);
118 dput(parent);
119 parent = NULL;
120 goto out;
121 }
122 spin_unlock(&dentry->d_lock);
123
124out:
125#endif
126 return parent;
127}
128
129static void au_xino_lock_dir(struct super_block *sb, struct path *xipath,
130 struct au_xino_lock_dir *ldir)
131{
132 aufs_bindex_t bindex;
133
134 ldir->hdir = NULL;
135 bindex = au_xi_root(sb, xipath->dentry);
136 if (bindex >= 0) {
137 /* rw branch root */
138 ldir->hdir = au_hi(d_inode(sb->s_root), bindex);
139 au_hn_inode_lock_nested(ldir->hdir, AuLsc_I_PARENT);
140 } else {
141 /* other */
142 ldir->parent = au_dget_parent_lock(xipath->dentry,
143 AuLsc_I_PARENT);
144 ldir->dir = d_inode(ldir->parent);
145 }
146}
147
148static void au_xino_unlock_dir(struct au_xino_lock_dir *ldir)
149{
150 if (ldir->hdir)
151 au_hn_inode_unlock(ldir->hdir);
152 else {
153 inode_unlock(ldir->dir);
154 dput(ldir->parent);
155 }
156}
157
158/* ---------------------------------------------------------------------- */
159
160/*
161 * create and set a new xino file
162 */
163struct file *au_xino_create(struct super_block *sb, char *fpath, int silent,
164 int wbrtop)
165{
166 struct file *file;
167 struct dentry *h_parent, *d;
168 struct inode *h_dir, *inode;
169 int err;
170 static DEFINE_MUTEX(mtx);
171
172 /*
173 * at mount-time, and the xino file is the default path,
174 * hnotify is disabled so we have no notify events to ignore.
175 * when a user specified the xino, we cannot get au_hdir to be ignored.
176 */
177 if (!wbrtop)
178 mutex_lock(&mtx);
179 file = vfsub_filp_open(fpath, O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
180 /* | __FMODE_NONOTIFY */,
181 0666);
182 if (IS_ERR(file)) {
183 if (!wbrtop)
184 mutex_unlock(&mtx);
185 if (!silent)
186 pr_err("open %s(%ld)\n", fpath, PTR_ERR(file));
187 return file;
188 }
189
190 /* keep file count */
191 err = 0;
192 d = file->f_path.dentry;
193 h_parent = au_dget_parent_lock(d, AuLsc_I_PARENT);
194 if (!wbrtop)
195 mutex_unlock(&mtx);
196 /* mnt_want_write() is unnecessary here */
197 h_dir = d_inode(h_parent);
198 inode = file_inode(file);
199 /* no delegation since it is just created */
200 if (inode->i_nlink)
201 err = vfsub_unlink(h_dir, &file->f_path, /*delegated*/NULL,
202 /*force*/0);
203 inode_unlock(h_dir);
204 dput(h_parent);
205 if (unlikely(err)) {
206 if (!silent)
207 pr_err("unlink %s(%d)\n", fpath, err);
208 goto out;
209 }
210
211 err = -EINVAL;
212 if (unlikely(sb == d->d_sb)) {
213 if (!silent)
214 pr_err("%s must be outside\n", fpath);
215 goto out;
216 }
217 if (unlikely(au_test_fs_bad_xino(d->d_sb))) {
218 if (!silent)
219 pr_err("xino doesn't support %s(%s)\n",
220 fpath, au_sbtype(d->d_sb));
221 goto out;
222 }
223 return file; /* success */
224
225out:
226 fput(file);
227 file = ERR_PTR(err);
228 return file;
229}
230
231/*
232 * create a new xinofile at the same place/path as @base.
233 */
234struct file *au_xino_create2(struct super_block *sb, struct path *base,
235 struct file *copy_src)
236{
237 struct file *file;
238 struct dentry *dentry, *parent;
239 struct inode *dir, *delegated;
240 struct qstr *name;
241 struct path path;
242 int err, do_unlock;
243 struct au_xino_lock_dir ldir;
244
245 do_unlock = 1;
246 au_xino_lock_dir(sb, base, &ldir);
247 dentry = base->dentry;
248 parent = dentry->d_parent; /* dir inode is locked */
249 dir = d_inode(parent);
250 IMustLock(dir);
251
252 name = &dentry->d_name;
253 path.dentry = vfsub_lookup_one_len(name->name, parent, name->len);
254 if (IS_ERR(path.dentry)) {
255 file = (void *)path.dentry;
256 pr_err("%pd lookup err %ld\n", dentry, PTR_ERR(path.dentry));
257 goto out;
258 }
259
260 /* no need to mnt_want_write() since we call dentry_open() later */
261 err = vfs_create(dir, path.dentry, 0666, NULL);
262 if (unlikely(err)) {
263 file = ERR_PTR(err);
264 pr_err("%pd create err %d\n", dentry, err);
265 goto out_dput;
266 }
267
268 path.mnt = base->mnt;
269 file = vfsub_dentry_open(&path,
270 O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
271 /* | __FMODE_NONOTIFY */);
272 if (IS_ERR(file)) {
273 pr_err("%pd open err %ld\n", dentry, PTR_ERR(file));
274 goto out_dput;
275 }
276
277 delegated = NULL;
278 err = vfsub_unlink(dir, &file->f_path, &delegated, /*force*/0);
279 au_xino_unlock_dir(&ldir);
280 do_unlock = 0;
281 if (unlikely(err == -EWOULDBLOCK)) {
282 pr_warn("cannot retry for NFSv4 delegation"
283 " for an internal unlink\n");
284 iput(delegated);
285 }
286 if (unlikely(err)) {
287 pr_err("%pd unlink err %d\n", dentry, err);
288 goto out_fput;
289 }
290
291 if (copy_src) {
292 /* no one can touch copy_src xino */
293 err = au_copy_file(file, copy_src, vfsub_f_size_read(copy_src));
294 if (unlikely(err)) {
295 pr_err("%pd copy err %d\n", dentry, err);
296 goto out_fput;
297 }
298 }
299 goto out_dput; /* success */
300
301out_fput:
302 fput(file);
303 file = ERR_PTR(err);
304out_dput:
305 dput(path.dentry);
306out:
307 if (do_unlock)
308 au_xino_unlock_dir(&ldir);
309 return file;
310}
311
312struct file *au_xino_file1(struct au_xino *xi)
313{
314 struct file *file;
315 unsigned int u, nfile;
316
317 file = NULL;
318 nfile = xi->xi_nfile;
319 for (u = 0; u < nfile; u++) {
320 file = xi->xi_file[u];
321 if (file)
322 break;
323 }
324
325 return file;
326}
327
328static int au_xino_file_set(struct au_xino *xi, int idx, struct file *file)
329{
330 int err;
331 struct file *f;
332 void *p;
333
334 if (file)
335 get_file(file);
336
337 err = 0;
338 f = NULL;
339 if (idx < xi->xi_nfile) {
340 f = xi->xi_file[idx];
341 if (f)
342 fput(f);
343 } else {
344 p = au_kzrealloc(xi->xi_file,
345 sizeof(*xi->xi_file) * xi->xi_nfile,
346 sizeof(*xi->xi_file) * (idx + 1),
347 GFP_NOFS, /*may_shrink*/0);
348 if (p) {
349 MtxMustLock(&xi->xi_mtx);
350 xi->xi_file = p;
351 xi->xi_nfile = idx + 1;
352 } else {
353 err = -ENOMEM;
354 if (file)
355 fput(file);
356 goto out;
357 }
358 }
359 xi->xi_file[idx] = file;
360
361out:
362 return err;
363}
364
365/*
366 * if @xinew->xi is not set, then create new xigen file.
367 */
368struct file *au_xi_new(struct super_block *sb, struct au_xi_new *xinew)
369{
370 struct file *file;
371 int err;
372
373 SiMustAnyLock(sb);
374
375 file = au_xino_create2(sb, xinew->base, xinew->copy_src);
376 if (IS_ERR(file)) {
377 err = PTR_ERR(file);
378 pr_err("%s[%d], err %d\n",
379 xinew->xi ? "xino" : "xigen",
380 xinew->idx, err);
381 goto out;
382 }
383
384 if (xinew->xi)
385 err = au_xino_file_set(xinew->xi, xinew->idx, file);
386 else {
387 BUG();
388 /* todo: make xigen file an array */
389 /* err = au_xigen_file_set(sb, xinew->idx, file); */
390 }
391 fput(file);
392 if (unlikely(err))
393 file = ERR_PTR(err);
394
395out:
396 return file;
397}
398
399/* ---------------------------------------------------------------------- */
400
401/*
402 * truncate xino files
403 */
404static int au_xino_do_trunc(struct super_block *sb, aufs_bindex_t bindex,
405 int idx, struct kstatfs *st)
406{
407 int err;
408 blkcnt_t blocks;
409 struct file *file, *new_xino;
410 struct au_xi_new xinew = {
411 .idx = idx
412 };
413
414 err = 0;
415 xinew.xi = au_sbr(sb, bindex)->br_xino;
416 file = au_xino_file(xinew.xi, idx);
417 if (!file)
418 goto out;
419
420 xinew.base = &file->f_path;
421 err = vfs_statfs(xinew.base, st);
422 if (unlikely(err)) {
423 AuErr1("statfs err %d, ignored\n", err);
424 err = 0;
425 goto out;
426 }
427
428 blocks = file_inode(file)->i_blocks;
429 pr_info("begin truncating xino(b%d-%d), ib%llu, %llu/%llu free blks\n",
430 bindex, idx, (u64)blocks, st->f_bfree, st->f_blocks);
431
432 xinew.copy_src = file;
433 new_xino = au_xi_new(sb, &xinew);
434 if (IS_ERR(new_xino)) {
435 err = PTR_ERR(new_xino);
436 pr_err("xino(b%d-%d), err %d, ignored\n", bindex, idx, err);
437 goto out;
438 }
439
440 err = vfs_statfs(&new_xino->f_path, st);
441 if (!err)
442 pr_info("end truncating xino(b%d-%d), ib%llu, %llu/%llu free blks\n",
443 bindex, idx, (u64)file_inode(new_xino)->i_blocks,
444 st->f_bfree, st->f_blocks);
445 else {
446 AuErr1("statfs err %d, ignored\n", err);
447 err = 0;
448 }
449
450out:
451 return err;
452}
453
454int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex, int idx_begin)
455{
456 int err, i;
457 unsigned long jiffy;
458 aufs_bindex_t bbot;
459 struct kstatfs *st;
460 struct au_branch *br;
461 struct au_xino *xi;
462
463 err = -ENOMEM;
464 st = kmalloc(sizeof(*st), GFP_NOFS);
465 if (unlikely(!st))
466 goto out;
467
468 err = -EINVAL;
469 bbot = au_sbbot(sb);
470 if (unlikely(bindex < 0 || bbot < bindex))
471 goto out_st;
472
473 err = 0;
474 jiffy = jiffies;
475 br = au_sbr(sb, bindex);
476 xi = br->br_xino;
477 for (i = idx_begin; !err && i < xi->xi_nfile; i++)
478 err = au_xino_do_trunc(sb, bindex, i, st);
479 if (!err)
480 au_sbi(sb)->si_xino_jiffy = jiffy;
481
482out_st:
483 au_kfree_rcu(st);
484out:
485 return err;
486}
487
488struct xino_do_trunc_args {
489 struct super_block *sb;
490 struct au_branch *br;
491 int idx;
492};
493
494static void xino_do_trunc(void *_args)
495{
496 struct xino_do_trunc_args *args = _args;
497 struct super_block *sb;
498 struct au_branch *br;
499 struct inode *dir;
500 int err, idx;
501 aufs_bindex_t bindex;
502
503 err = 0;
504 sb = args->sb;
505 dir = d_inode(sb->s_root);
506 br = args->br;
507 idx = args->idx;
508
509 si_noflush_write_lock(sb);
510 ii_read_lock_parent(dir);
511 bindex = au_br_index(sb, br->br_id);
512 err = au_xino_trunc(sb, bindex, idx);
513 ii_read_unlock(dir);
514 if (unlikely(err))
515 pr_warn("err b%d, (%d)\n", bindex, err);
516 atomic_dec(&br->br_xino->xi_truncating);
517 au_lcnt_dec(&br->br_count);
518 si_write_unlock(sb);
519 au_nwt_done(&au_sbi(sb)->si_nowait);
520 au_kfree_rcu(args);
521}
522
523/*
524 * returns the index in the xi_file array whose corresponding file is necessary
525 * to truncate, or -1 which means no need to truncate.
526 */
527static int xino_trunc_test(struct super_block *sb, struct au_branch *br)
528{
529 int err;
530 unsigned int u;
531 struct kstatfs st;
532 struct au_sbinfo *sbinfo;
533 struct au_xino *xi;
534 struct file *file;
535
536 /* todo: si_xino_expire and the ratio should be customizable */
537 sbinfo = au_sbi(sb);
538 if (time_before(jiffies,
539 sbinfo->si_xino_jiffy + sbinfo->si_xino_expire))
540 return -1;
541
542 /* truncation border */
543 xi = br->br_xino;
544 for (u = 0; u < xi->xi_nfile; u++) {
545 file = au_xino_file(xi, u);
546 if (!file)
547 continue;
548
549 err = vfs_statfs(&file->f_path, &st);
550 if (unlikely(err)) {
551 AuErr1("statfs err %d, ignored\n", err);
552 return -1;
553 }
554 if (div64_u64(st.f_bfree * 100, st.f_blocks)
555 >= AUFS_XINO_DEF_TRUNC)
556 return u;
557 }
558
559 return -1;
560}
561
562static void xino_try_trunc(struct super_block *sb, struct au_branch *br)
563{
564 int idx;
565 struct xino_do_trunc_args *args;
566 int wkq_err;
567
568 idx = xino_trunc_test(sb, br);
569 if (idx < 0)
570 return;
571
572 if (atomic_inc_return(&br->br_xino->xi_truncating) > 1)
573 goto out;
574
575 /* lock and kfree() will be called in trunc_xino() */
576 args = kmalloc(sizeof(*args), GFP_NOFS);
577 if (unlikely(!args)) {
578 AuErr1("no memory\n");
579 goto out;
580 }
581
582 au_lcnt_inc(&br->br_count);
583 args->sb = sb;
584 args->br = br;
585 args->idx = idx;
586 wkq_err = au_wkq_nowait(xino_do_trunc, args, sb, /*flags*/0);
587 if (!wkq_err)
588 return; /* success */
589
590 pr_err("wkq %d\n", wkq_err);
591 au_lcnt_dec(&br->br_count);
592 au_kfree_rcu(args);
593
594out:
595 atomic_dec(&br->br_xino->xi_truncating);
596}
597
598/* ---------------------------------------------------------------------- */
599
600struct au_xi_calc {
601 int idx;
602 loff_t pos;
603};
604
605static void au_xi_calc(struct super_block *sb, ino_t h_ino,
606 struct au_xi_calc *calc)
607{
608 loff_t maxent;
609
610 maxent = au_xi_maxent(sb);
611 calc->idx = div64_u64_rem(h_ino, maxent, &calc->pos);
612 calc->pos *= sizeof(ino_t);
613}
614
615static int au_xino_do_new_async(struct super_block *sb, struct au_branch *br,
616 struct au_xi_calc *calc)
617{
618 int err;
619 struct file *file;
620 struct au_xino *xi = br->br_xino;
621 struct au_xi_new xinew = {
622 .xi = xi
623 };
624
625 SiMustAnyLock(sb);
626
627 err = 0;
628 if (!xi)
629 goto out;
630
631 mutex_lock(&xi->xi_mtx);
632 file = au_xino_file(xi, calc->idx);
633 if (file)
634 goto out_mtx;
635
636 file = au_xino_file(xi, /*idx*/-1);
637 AuDebugOn(!file);
638 xinew.idx = calc->idx;
639 xinew.base = &file->f_path;
640 /* xinew.copy_src = NULL; */
641 file = au_xi_new(sb, &xinew);
642 if (IS_ERR(file))
643 err = PTR_ERR(file);
644
645out_mtx:
646 mutex_unlock(&xi->xi_mtx);
647out:
648 return err;
649}
650
651struct au_xino_do_new_async_args {
652 struct super_block *sb;
653 struct au_branch *br;
654 struct au_xi_calc calc;
655 ino_t ino;
656};
657
658struct au_xi_writing {
659 struct hlist_bl_node node;
660 ino_t h_ino, ino;
661};
662
663static int au_xino_do_write(vfs_writef_t write, struct file *file,
664 struct au_xi_calc *calc, ino_t ino);
665
666static void au_xino_call_do_new_async(void *args)
667{
668 struct au_xino_do_new_async_args *a = args;
669 struct au_branch *br;
670 struct super_block *sb;
671 struct au_sbinfo *sbi;
672 struct inode *root;
673 struct file *file;
674 struct au_xi_writing *del, *p;
675 struct hlist_bl_head *hbl;
676 struct hlist_bl_node *pos;
677 int err;
678
679 br = a->br;
680 sb = a->sb;
681 sbi = au_sbi(sb);
682 si_noflush_read_lock(sb);
683 root = d_inode(sb->s_root);
684 ii_read_lock_child(root);
685 err = au_xino_do_new_async(sb, br, &a->calc);
686 if (unlikely(err)) {
687 AuIOErr("err %d\n", err);
688 goto out;
689 }
690
691 file = au_xino_file(br->br_xino, a->calc.idx);
692 AuDebugOn(!file);
693 err = au_xino_do_write(sbi->si_xwrite, file, &a->calc, a->ino);
694 if (unlikely(err)) {
695 AuIOErr("err %d\n", err);
696 goto out;
697 }
698
699 del = NULL;
700 hbl = &br->br_xino->xi_writing;
701 hlist_bl_lock(hbl);
702 au_hbl_for_each(pos, hbl) {
703 p = container_of(pos, struct au_xi_writing, node);
704 if (p->ino == a->ino) {
705 del = p;
706 hlist_bl_del(&p->node);
707 break;
708 }
709 }
710 hlist_bl_unlock(hbl);
711 au_kfree_rcu(del);
712
713out:
714 au_lcnt_dec(&br->br_count);
715 ii_read_unlock(root);
716 si_read_unlock(sb);
717 au_nwt_done(&sbi->si_nowait);
718 au_kfree_rcu(a);
719}
720
721/*
722 * create a new xino file asynchronously
723 */
724static int au_xino_new_async(struct super_block *sb, struct au_branch *br,
725 struct au_xi_calc *calc, ino_t ino)
726{
727 int err;
728 struct au_xino_do_new_async_args *arg;
729
730 err = -ENOMEM;
731 arg = kmalloc(sizeof(*arg), GFP_NOFS);
732 if (unlikely(!arg))
733 goto out;
734
735 arg->sb = sb;
736 arg->br = br;
737 arg->calc = *calc;
738 arg->ino = ino;
739 au_lcnt_inc(&br->br_count);
740 err = au_wkq_nowait(au_xino_call_do_new_async, arg, sb, AuWkq_NEST);
741 if (unlikely(err)) {
742 pr_err("wkq %d\n", err);
743 au_lcnt_dec(&br->br_count);
744 au_kfree_rcu(arg);
745 }
746
747out:
748 return err;
749}
750
751/*
752 * read @ino from xinofile for the specified branch{@sb, @bindex}
753 * at the position of @h_ino.
754 */
755int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
756 ino_t *ino)
757{
758 int err;
759 ssize_t sz;
760 struct au_xi_calc calc;
761 struct au_sbinfo *sbinfo;
762 struct file *file;
763 struct au_xino *xi;
764 struct hlist_bl_head *hbl;
765 struct hlist_bl_node *pos;
766 struct au_xi_writing *p;
767
768 *ino = 0;
769 if (!au_opt_test(au_mntflags(sb), XINO))
770 return 0; /* no xino */
771
772 err = 0;
773 au_xi_calc(sb, h_ino, &calc);
774 xi = au_sbr(sb, bindex)->br_xino;
775 file = au_xino_file(xi, calc.idx);
776 if (!file) {
777 hbl = &xi->xi_writing;
778 hlist_bl_lock(hbl);
779 au_hbl_for_each(pos, hbl) {
780 p = container_of(pos, struct au_xi_writing, node);
781 if (p->h_ino == h_ino) {
782 AuDbg("hi%llu, i%llu, found\n",
783 (u64)p->h_ino, (u64)p->ino);
784 *ino = p->ino;
785 break;
786 }
787 }
788 hlist_bl_unlock(hbl);
789 return 0;
790 } else if (vfsub_f_size_read(file) < calc.pos + sizeof(*ino))
791 return 0; /* no xino */
792
793 sbinfo = au_sbi(sb);
794 sz = xino_fread(sbinfo->si_xread, file, ino, sizeof(*ino), &calc.pos);
795 if (sz == sizeof(*ino))
796 return 0; /* success */
797
798 err = sz;
799 if (unlikely(sz >= 0)) {
800 err = -EIO;
801 AuIOErr("xino read error (%zd)\n", sz);
802 }
803 return err;
804}
805
806static int au_xino_do_write(vfs_writef_t write, struct file *file,
807 struct au_xi_calc *calc, ino_t ino)
808{
809 ssize_t sz;
810
811 sz = xino_fwrite(write, file, &ino, sizeof(ino), &calc->pos);
812 if (sz == sizeof(ino))
813 return 0; /* success */
814
815 AuIOErr("write failed (%zd)\n", sz);
816 return -EIO;
817}
818
819/*
820 * write @ino to the xinofile for the specified branch{@sb, @bindex}
821 * at the position of @h_ino.
822 * even if @ino is zero, it is written to the xinofile and means no entry.
823 * if the size of the xino file on a specific filesystem exceeds the watermark,
824 * try truncating it.
825 */
826int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
827 ino_t ino)
828{
829 int err;
830 unsigned int mnt_flags;
831 struct au_xi_calc calc;
832 struct file *file;
833 struct au_branch *br;
834 struct au_xino *xi;
835 struct au_xi_writing *p;
836
837 SiMustAnyLock(sb);
838
839 mnt_flags = au_mntflags(sb);
840 if (!au_opt_test(mnt_flags, XINO))
841 return 0;
842
843 au_xi_calc(sb, h_ino, &calc);
844 br = au_sbr(sb, bindex);
845 xi = br->br_xino;
846 file = au_xino_file(xi, calc.idx);
847 if (!file) {
848 /* store the inum pair into the list */
849 p = kmalloc(sizeof(*p), GFP_NOFS | __GFP_NOFAIL);
850 p->h_ino = h_ino;
851 p->ino = ino;
852 au_hbl_add(&p->node, &xi->xi_writing);
853
854 /* create and write a new xino file asynchronously */
855 err = au_xino_new_async(sb, br, &calc, ino);
856 if (!err)
857 return 0; /* success */
858 goto out;
859 }
860
861 err = au_xino_do_write(au_sbi(sb)->si_xwrite, file, &calc, ino);
862 if (!err) {
863 br = au_sbr(sb, bindex);
864 if (au_opt_test(mnt_flags, TRUNC_XINO)
865 && au_test_fs_trunc_xino(au_br_sb(br)))
866 xino_try_trunc(sb, br);
867 return 0; /* success */
868 }
869
870out:
871 AuIOErr("write failed (%d)\n", err);
872 return -EIO;
873}
874
875static ssize_t xino_fread_wkq(vfs_readf_t func, struct file *file, void *buf,
876 size_t size, loff_t *pos);
877
878/* todo: unnecessary to support mmap_sem since kernel-space? */
879ssize_t xino_fread(vfs_readf_t func, struct file *file, void *kbuf, size_t size,
880 loff_t *pos)
881{
882 ssize_t err;
883 mm_segment_t oldfs;
884 union {
885 void *k;
886 char __user *u;
887 } buf;
888 int i;
889 const int prevent_endless = 10;
890
891 i = 0;
892 buf.k = kbuf;
893 oldfs = get_fs();
894 set_fs(KERNEL_DS);
895 do {
896 err = func(file, buf.u, size, pos);
897 if (err == -EINTR
898 && !au_wkq_test()
899 && fatal_signal_pending(current)) {
900 set_fs(oldfs);
901 err = xino_fread_wkq(func, file, kbuf, size, pos);
902 BUG_ON(err == -EINTR);
903 oldfs = get_fs();
904 set_fs(KERNEL_DS);
905 }
906 } while (i++ < prevent_endless
907 && (err == -EAGAIN || err == -EINTR));
908 set_fs(oldfs);
909
910#if 0 /* reserved for future use */
911 if (err > 0)
912 fsnotify_access(file->f_path.dentry);
913#endif
914
915 return err;
916}
917
918struct xino_fread_args {
919 ssize_t *errp;
920 vfs_readf_t func;
921 struct file *file;
922 void *buf;
923 size_t size;
924 loff_t *pos;
925};
926
927static void call_xino_fread(void *args)
928{
929 struct xino_fread_args *a = args;
930 *a->errp = xino_fread(a->func, a->file, a->buf, a->size, a->pos);
931}
932
933static ssize_t xino_fread_wkq(vfs_readf_t func, struct file *file, void *buf,
934 size_t size, loff_t *pos)
935{
936 ssize_t err;
937 int wkq_err;
938 struct xino_fread_args args = {
939 .errp = &err,
940 .func = func,
941 .file = file,
942 .buf = buf,
943 .size = size,
944 .pos = pos
945 };
946
947 wkq_err = au_wkq_wait(call_xino_fread, &args);
948 if (unlikely(wkq_err))
949 err = wkq_err;
950
951 return err;
952}
953
954static ssize_t xino_fwrite_wkq(vfs_writef_t func, struct file *file, void *buf,
955 size_t size, loff_t *pos);
956
957static ssize_t do_xino_fwrite(vfs_writef_t func, struct file *file, void *kbuf,
958 size_t size, loff_t *pos)
959{
960 ssize_t err;
961 mm_segment_t oldfs;
962 union {
963 void *k;
964 const char __user *u;
965 } buf;
966 int i;
967 const int prevent_endless = 10;
968
969 i = 0;
970 buf.k = kbuf;
971 oldfs = get_fs();
972 set_fs(KERNEL_DS);
973 do {
974 err = func(file, buf.u, size, pos);
975 if (err == -EINTR
976 && !au_wkq_test()
977 && fatal_signal_pending(current)) {
978 set_fs(oldfs);
979 err = xino_fwrite_wkq(func, file, kbuf, size, pos);
980 BUG_ON(err == -EINTR);
981 oldfs = get_fs();
982 set_fs(KERNEL_DS);
983 }
984 } while (i++ < prevent_endless
985 && (err == -EAGAIN || err == -EINTR));
986 set_fs(oldfs);
987
988#if 0 /* reserved for future use */
989 if (err > 0)
990 fsnotify_modify(file->f_path.dentry);
991#endif
992
993 return err;
994}
995
996struct do_xino_fwrite_args {
997 ssize_t *errp;
998 vfs_writef_t func;
999 struct file *file;
1000 void *buf;
1001 size_t size;
1002 loff_t *pos;
1003};
1004
1005static void call_do_xino_fwrite(void *args)
1006{
1007 struct do_xino_fwrite_args *a = args;
1008 *a->errp = do_xino_fwrite(a->func, a->file, a->buf, a->size, a->pos);
1009}
1010
1011static ssize_t xino_fwrite_wkq(vfs_writef_t func, struct file *file, void *buf,
1012 size_t size, loff_t *pos)
1013{
1014 ssize_t err;
1015 int wkq_err;
1016 struct do_xino_fwrite_args args = {
1017 .errp = &err,
1018 .func = func,
1019 .file = file,
1020 .buf = buf,
1021 .size = size,
1022 .pos = pos
1023 };
1024
1025 /*
1026 * it breaks RLIMIT_FSIZE and normal user's limit,
1027 * users should care about quota and real 'filesystem full.'
1028 */
1029 wkq_err = au_wkq_wait(call_do_xino_fwrite, &args);
1030 if (unlikely(wkq_err))
1031 err = wkq_err;
1032
1033 return err;
1034}
1035
1036ssize_t xino_fwrite(vfs_writef_t func, struct file *file, void *buf,
1037 size_t size, loff_t *pos)
1038{
1039 ssize_t err;
1040
1041 if (rlimit(RLIMIT_FSIZE) == RLIM_INFINITY) {
1042 lockdep_off();
1043 err = do_xino_fwrite(func, file, buf, size, pos);
1044 lockdep_on();
1045 } else {
1046 lockdep_off();
1047 err = xino_fwrite_wkq(func, file, buf, size, pos);
1048 lockdep_on();
1049 }
1050
1051 return err;
1052}
1053
1054/* ---------------------------------------------------------------------- */
1055
1056/*
1057 * inode number bitmap
1058 */
1059static const int page_bits = (int)PAGE_SIZE * BITS_PER_BYTE;
1060static ino_t xib_calc_ino(unsigned long pindex, int bit)
1061{
1062 ino_t ino;
1063
1064 AuDebugOn(bit < 0 || page_bits <= bit);
1065 ino = AUFS_FIRST_INO + pindex * page_bits + bit;
1066 return ino;
1067}
1068
1069static void xib_calc_bit(ino_t ino, unsigned long *pindex, int *bit)
1070{
1071 AuDebugOn(ino < AUFS_FIRST_INO);
1072 ino -= AUFS_FIRST_INO;
1073 *pindex = ino / page_bits;
1074 *bit = ino % page_bits;
1075}
1076
1077static int xib_pindex(struct super_block *sb, unsigned long pindex)
1078{
1079 int err;
1080 loff_t pos;
1081 ssize_t sz;
1082 struct au_sbinfo *sbinfo;
1083 struct file *xib;
1084 unsigned long *p;
1085
1086 sbinfo = au_sbi(sb);
1087 MtxMustLock(&sbinfo->si_xib_mtx);
1088 AuDebugOn(pindex > ULONG_MAX / PAGE_SIZE
1089 || !au_opt_test(sbinfo->si_mntflags, XINO));
1090
1091 if (pindex == sbinfo->si_xib_last_pindex)
1092 return 0;
1093
1094 xib = sbinfo->si_xib;
1095 p = sbinfo->si_xib_buf;
1096 pos = sbinfo->si_xib_last_pindex;
1097 pos *= PAGE_SIZE;
1098 sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
1099 if (unlikely(sz != PAGE_SIZE))
1100 goto out;
1101
1102 pos = pindex;
1103 pos *= PAGE_SIZE;
1104 if (vfsub_f_size_read(xib) >= pos + PAGE_SIZE)
1105 sz = xino_fread(sbinfo->si_xread, xib, p, PAGE_SIZE, &pos);
1106 else {
1107 memset(p, 0, PAGE_SIZE);
1108 sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
1109 }
1110 if (sz == PAGE_SIZE) {
1111 sbinfo->si_xib_last_pindex = pindex;
1112 return 0; /* success */
1113 }
1114
1115out:
1116 AuIOErr1("write failed (%zd)\n", sz);
1117 err = sz;
1118 if (sz >= 0)
1119 err = -EIO;
1120 return err;
1121}
1122
1123static void au_xib_clear_bit(struct inode *inode)
1124{
1125 int err, bit;
1126 unsigned long pindex;
1127 struct super_block *sb;
1128 struct au_sbinfo *sbinfo;
1129
1130 AuDebugOn(inode->i_nlink);
1131
1132 sb = inode->i_sb;
1133 xib_calc_bit(inode->i_ino, &pindex, &bit);
1134 AuDebugOn(page_bits <= bit);
1135 sbinfo = au_sbi(sb);
1136 mutex_lock(&sbinfo->si_xib_mtx);
1137 err = xib_pindex(sb, pindex);
1138 if (!err) {
1139 clear_bit(bit, sbinfo->si_xib_buf);
1140 sbinfo->si_xib_next_bit = bit;
1141 }
1142 mutex_unlock(&sbinfo->si_xib_mtx);
1143}
1144
1145/* ---------------------------------------------------------------------- */
1146
1147/*
1148 * truncate a xino bitmap file
1149 */
1150
1151/* todo: slow */
1152static int do_xib_restore(struct super_block *sb, struct file *file, void *page)
1153{
1154 int err, bit;
1155 ssize_t sz;
1156 unsigned long pindex;
1157 loff_t pos, pend;
1158 struct au_sbinfo *sbinfo;
1159 vfs_readf_t func;
1160 ino_t *ino;
1161 unsigned long *p;
1162
1163 err = 0;
1164 sbinfo = au_sbi(sb);
1165 MtxMustLock(&sbinfo->si_xib_mtx);
1166 p = sbinfo->si_xib_buf;
1167 func = sbinfo->si_xread;
1168 pend = vfsub_f_size_read(file);
1169 pos = 0;
1170 while (pos < pend) {
1171 sz = xino_fread(func, file, page, PAGE_SIZE, &pos);
1172 err = sz;
1173 if (unlikely(sz <= 0))
1174 goto out;
1175
1176 err = 0;
1177 for (ino = page; sz > 0; ino++, sz -= sizeof(ino)) {
1178 if (unlikely(*ino < AUFS_FIRST_INO))
1179 continue;
1180
1181 xib_calc_bit(*ino, &pindex, &bit);
1182 AuDebugOn(page_bits <= bit);
1183 err = xib_pindex(sb, pindex);
1184 if (!err)
1185 set_bit(bit, p);
1186 else
1187 goto out;
1188 }
1189 }
1190
1191out:
1192 return err;
1193}
1194
1195static int xib_restore(struct super_block *sb)
1196{
1197 int err, i;
1198 unsigned int nfile;
1199 aufs_bindex_t bindex, bbot;
1200 void *page;
1201 struct au_branch *br;
1202 struct au_xino *xi;
1203 struct file *file;
1204
1205 err = -ENOMEM;
1206 page = (void *)__get_free_page(GFP_NOFS);
1207 if (unlikely(!page))
1208 goto out;
1209
1210 err = 0;
1211 bbot = au_sbbot(sb);
1212 for (bindex = 0; !err && bindex <= bbot; bindex++)
1213 if (!bindex || is_sb_shared(sb, bindex, bindex - 1) < 0) {
1214 br = au_sbr(sb, bindex);
1215 xi = br->br_xino;
1216 nfile = xi->xi_nfile;
1217 for (i = 0; i < nfile; i++) {
1218 file = au_xino_file(xi, i);
1219 if (file)
1220 err = do_xib_restore(sb, file, page);
1221 }
1222 } else
1223 AuDbg("skip shared b%d\n", bindex);
1224 free_page((unsigned long)page);
1225
1226out:
1227 return err;
1228}
1229
1230int au_xib_trunc(struct super_block *sb)
1231{
1232 int err;
1233 ssize_t sz;
1234 loff_t pos;
1235 struct au_sbinfo *sbinfo;
1236 unsigned long *p;
1237 struct file *file;
1238
1239 SiMustWriteLock(sb);
1240
1241 err = 0;
1242 sbinfo = au_sbi(sb);
1243 if (!au_opt_test(sbinfo->si_mntflags, XINO))
1244 goto out;
1245
1246 file = sbinfo->si_xib;
1247 if (vfsub_f_size_read(file) <= PAGE_SIZE)
1248 goto out;
1249
1250 file = au_xino_create2(sb, &sbinfo->si_xib->f_path, NULL);
1251 err = PTR_ERR(file);
1252 if (IS_ERR(file))
1253 goto out;
1254 fput(sbinfo->si_xib);
1255 sbinfo->si_xib = file;
1256
1257 p = sbinfo->si_xib_buf;
1258 memset(p, 0, PAGE_SIZE);
1259 pos = 0;
1260 sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xib, p, PAGE_SIZE, &pos);
1261 if (unlikely(sz != PAGE_SIZE)) {
1262 err = sz;
1263 AuIOErr("err %d\n", err);
1264 if (sz >= 0)
1265 err = -EIO;
1266 goto out;
1267 }
1268
1269 mutex_lock(&sbinfo->si_xib_mtx);
1270 /* mnt_want_write() is unnecessary here */
1271 err = xib_restore(sb);
1272 mutex_unlock(&sbinfo->si_xib_mtx);
1273
1274out:
1275 return err;
1276}
1277
1278/* ---------------------------------------------------------------------- */
1279
1280struct au_xino *au_xino_alloc(unsigned int nfile)
1281{
1282 struct au_xino *xi;
1283
1284 xi = kzalloc(sizeof(*xi), GFP_NOFS);
1285 if (unlikely(!xi))
1286 goto out;
1287 xi->xi_nfile = nfile;
1288 xi->xi_file = kcalloc(nfile, sizeof(*xi->xi_file), GFP_NOFS);
1289 if (unlikely(!xi->xi_file))
1290 goto out_free;
1291
1292 xi->xi_nondir.total = 8; /* initial size */
1293 xi->xi_nondir.array = kcalloc(xi->xi_nondir.total, sizeof(ino_t),
1294 GFP_NOFS);
1295 if (unlikely(!xi->xi_nondir.array))
1296 goto out_file;
1297
1298 spin_lock_init(&xi->xi_nondir.spin);
1299 init_waitqueue_head(&xi->xi_nondir.wqh);
1300 mutex_init(&xi->xi_mtx);
1301 INIT_HLIST_BL_HEAD(&xi->xi_writing);
1302 atomic_set(&xi->xi_truncating, 0);
1303 kref_init(&xi->xi_kref);
1304 goto out; /* success */
1305
1306out_file:
1307 au_kfree_try_rcu(xi->xi_file);
1308out_free:
1309 au_kfree_rcu(xi);
1310 xi = NULL;
1311out:
1312 return xi;
1313}
1314
1315static int au_xino_init(struct au_branch *br, int idx, struct file *file)
1316{
1317 int err;
1318 struct au_xino *xi;
1319
1320 err = 0;
1321 xi = au_xino_alloc(idx + 1);
1322 if (unlikely(!xi)) {
1323 err = -ENOMEM;
1324 goto out;
1325 }
1326
1327 if (file)
1328 get_file(file);
1329 xi->xi_file[idx] = file;
1330 AuDebugOn(br->br_xino);
1331 br->br_xino = xi;
1332
1333out:
1334 return err;
1335}
1336
1337static void au_xino_release(struct kref *kref)
1338{
1339 struct au_xino *xi;
1340 int i;
1341 unsigned long ul;
1342 struct hlist_bl_head *hbl;
1343 struct hlist_bl_node *pos, *n;
1344 struct au_xi_writing *p;
1345
1346 xi = container_of(kref, struct au_xino, xi_kref);
1347 for (i = 0; i < xi->xi_nfile; i++)
1348 if (xi->xi_file[i])
1349 fput(xi->xi_file[i]);
1350 for (i = xi->xi_nondir.total - 1; i >= 0; i--)
1351 AuDebugOn(xi->xi_nondir.array[i]);
1352 mutex_destroy(&xi->xi_mtx);
1353 hbl = &xi->xi_writing;
1354 ul = au_hbl_count(hbl);
1355 if (unlikely(ul)) {
1356 pr_warn("xi_writing %lu\n", ul);
1357 hlist_bl_lock(hbl);
e4a3f096 1358 hlist_bl_for_each_entry_safe(p, pos, n, hbl, node) {
a3a49a17 1359 hlist_bl_del(&p->node);
e4a3f096
SF
1360 /* kmemleak reported au_kfree_rcu() doesn't free it */
1361 kfree(p);
a3a49a17
SF
1362 }
1363 hlist_bl_unlock(hbl);
1364 }
1365 au_kfree_try_rcu(xi->xi_file);
1366 au_kfree_try_rcu(xi->xi_nondir.array);
1367 au_kfree_rcu(xi);
1368}
1369
1370int au_xino_put(struct au_branch *br)
1371{
1372 int ret;
1373 struct au_xino *xi;
1374
1375 ret = 0;
1376 xi = br->br_xino;
1377 if (xi) {
1378 br->br_xino = NULL;
1379 ret = kref_put(&xi->xi_kref, au_xino_release);
1380 }
1381
1382 return ret;
1383}
1384
1385/* ---------------------------------------------------------------------- */
1386
1387/*
1388 * xino mount option handlers
1389 */
1390
1391/* xino bitmap */
1392static void xino_clear_xib(struct super_block *sb)
1393{
1394 struct au_sbinfo *sbinfo;
1395
1396 SiMustWriteLock(sb);
1397
1398 sbinfo = au_sbi(sb);
1399 /* unnecessary to clear sbinfo->si_xread and ->si_xwrite */
1400 if (sbinfo->si_xib)
1401 fput(sbinfo->si_xib);
1402 sbinfo->si_xib = NULL;
1403 if (sbinfo->si_xib_buf)
1404 free_page((unsigned long)sbinfo->si_xib_buf);
1405 sbinfo->si_xib_buf = NULL;
1406}
1407
1408static int au_xino_set_xib(struct super_block *sb, struct path *path)
1409{
1410 int err;
1411 loff_t pos;
1412 struct au_sbinfo *sbinfo;
1413 struct file *file;
1414 struct super_block *xi_sb;
1415
1416 SiMustWriteLock(sb);
1417
1418 sbinfo = au_sbi(sb);
1419 file = au_xino_create2(sb, path, sbinfo->si_xib);
1420 err = PTR_ERR(file);
1421 if (IS_ERR(file))
1422 goto out;
1423 if (sbinfo->si_xib)
1424 fput(sbinfo->si_xib);
1425 sbinfo->si_xib = file;
1426 sbinfo->si_xread = vfs_readf(file);
1427 sbinfo->si_xwrite = vfs_writef(file);
1428 xi_sb = file_inode(file)->i_sb;
1429 sbinfo->si_ximaxent = xi_sb->s_maxbytes;
1430 if (unlikely(sbinfo->si_ximaxent < PAGE_SIZE)) {
1431 err = -EIO;
1432 pr_err("s_maxbytes(%llu) on %s is too small\n",
1433 (u64)sbinfo->si_ximaxent, au_sbtype(xi_sb));
1434 goto out_unset;
1435 }
1436 sbinfo->si_ximaxent /= sizeof(ino_t);
1437
1438 err = -ENOMEM;
1439 if (!sbinfo->si_xib_buf)
1440 sbinfo->si_xib_buf = (void *)get_zeroed_page(GFP_NOFS);
1441 if (unlikely(!sbinfo->si_xib_buf))
1442 goto out_unset;
1443
1444 sbinfo->si_xib_last_pindex = 0;
1445 sbinfo->si_xib_next_bit = 0;
1446 if (vfsub_f_size_read(file) < PAGE_SIZE) {
1447 pos = 0;
1448 err = xino_fwrite(sbinfo->si_xwrite, file, sbinfo->si_xib_buf,
1449 PAGE_SIZE, &pos);
1450 if (unlikely(err != PAGE_SIZE))
1451 goto out_free;
1452 }
1453 err = 0;
1454 goto out; /* success */
1455
1456out_free:
1457 if (sbinfo->si_xib_buf)
1458 free_page((unsigned long)sbinfo->si_xib_buf);
1459 sbinfo->si_xib_buf = NULL;
1460 if (err >= 0)
1461 err = -EIO;
1462out_unset:
1463 fput(sbinfo->si_xib);
1464 sbinfo->si_xib = NULL;
1465out:
1466 AuTraceErr(err);
1467 return err;
1468}
1469
1470/* xino for each branch */
1471static void xino_clear_br(struct super_block *sb)
1472{
1473 aufs_bindex_t bindex, bbot;
1474 struct au_branch *br;
1475
1476 bbot = au_sbbot(sb);
1477 for (bindex = 0; bindex <= bbot; bindex++) {
1478 br = au_sbr(sb, bindex);
1479 AuDebugOn(!br);
1480 au_xino_put(br);
1481 }
1482}
1483
1484static void au_xino_set_br_shared(struct super_block *sb, struct au_branch *br,
1485 aufs_bindex_t bshared)
1486{
1487 struct au_branch *brshared;
1488
1489 brshared = au_sbr(sb, bshared);
1490 AuDebugOn(!brshared->br_xino);
1491 AuDebugOn(!brshared->br_xino->xi_file);
1492 if (br->br_xino != brshared->br_xino) {
1493 au_xino_get(brshared);
1494 au_xino_put(br);
1495 br->br_xino = brshared->br_xino;
1496 }
1497}
1498
1499struct au_xino_do_set_br {
1500 vfs_writef_t writef;
1501 struct au_branch *br;
1502 ino_t h_ino;
1503 aufs_bindex_t bshared;
1504};
1505
1506static int au_xino_do_set_br(struct super_block *sb, struct path *path,
1507 struct au_xino_do_set_br *args)
1508{
1509 int err;
1510 struct au_xi_calc calc;
1511 struct file *file;
1512 struct au_branch *br;
1513 struct au_xi_new xinew = {
1514 .base = path
1515 };
1516
1517 br = args->br;
1518 xinew.xi = br->br_xino;
1519 au_xi_calc(sb, args->h_ino, &calc);
1520 xinew.copy_src = au_xino_file(xinew.xi, calc.idx);
1521 if (args->bshared >= 0)
1522 /* shared xino */
1523 au_xino_set_br_shared(sb, br, args->bshared);
1524 else if (!xinew.xi) {
1525 /* new xino */
1526 err = au_xino_init(br, calc.idx, xinew.copy_src);
1527 if (unlikely(err))
1528 goto out;
1529 }
1530
1531 /* force re-creating */
1532 xinew.xi = br->br_xino;
1533 xinew.idx = calc.idx;
1534 mutex_lock(&xinew.xi->xi_mtx);
1535 file = au_xi_new(sb, &xinew);
1536 mutex_unlock(&xinew.xi->xi_mtx);
1537 err = PTR_ERR(file);
1538 if (IS_ERR(file))
1539 goto out;
1540 AuDebugOn(!file);
1541
1542 err = au_xino_do_write(args->writef, file, &calc, AUFS_ROOT_INO);
1543 if (unlikely(err))
1544 au_xino_put(br);
1545
1546out:
1547 AuTraceErr(err);
1548 return err;
1549}
1550
1551static int au_xino_set_br(struct super_block *sb, struct path *path)
1552{
1553 int err;
1554 aufs_bindex_t bindex, bbot;
1555 struct au_xino_do_set_br args;
1556 struct inode *inode;
1557
1558 SiMustWriteLock(sb);
1559
1560 bbot = au_sbbot(sb);
1561 inode = d_inode(sb->s_root);
1562 args.writef = au_sbi(sb)->si_xwrite;
1563 for (bindex = 0; bindex <= bbot; bindex++) {
1564 args.h_ino = au_h_iptr(inode, bindex)->i_ino;
1565 args.br = au_sbr(sb, bindex);
1566 args.bshared = is_sb_shared(sb, bindex, bindex - 1);
1567 err = au_xino_do_set_br(sb, path, &args);
1568 if (unlikely(err))
1569 break;
1570 }
1571
1572 AuTraceErr(err);
1573 return err;
1574}
1575
1576void au_xino_clr(struct super_block *sb)
1577{
1578 struct au_sbinfo *sbinfo;
1579
1580 au_xigen_clr(sb);
1581 xino_clear_xib(sb);
1582 xino_clear_br(sb);
1583 dbgaufs_brs_del(sb, 0);
1584 sbinfo = au_sbi(sb);
1585 /* lvalue, do not call au_mntflags() */
1586 au_opt_clr(sbinfo->si_mntflags, XINO);
1587}
1588
1589int au_xino_set(struct super_block *sb, struct au_opt_xino *xiopt, int remount)
1590{
1591 int err, skip;
1592 struct dentry *dentry, *parent, *cur_dentry, *cur_parent;
1593 struct qstr *dname, *cur_name;
1594 struct file *cur_xino;
1595 struct au_sbinfo *sbinfo;
1596 struct path *path, *cur_path;
1597
1598 SiMustWriteLock(sb);
1599
1600 err = 0;
1601 sbinfo = au_sbi(sb);
1602 path = &xiopt->file->f_path;
1603 dentry = path->dentry;
1604 parent = dget_parent(dentry);
1605 if (remount) {
1606 skip = 0;
1607 cur_xino = sbinfo->si_xib;
1608 if (cur_xino) {
1609 cur_path = &cur_xino->f_path;
1610 cur_dentry = cur_path->dentry;
1611 cur_parent = dget_parent(cur_dentry);
1612 cur_name = &cur_dentry->d_name;
1613 dname = &dentry->d_name;
1614 skip = (cur_parent == parent
1615 && au_qstreq(dname, cur_name));
1616 dput(cur_parent);
1617 }
1618 if (skip)
1619 goto out;
1620 }
1621
1622 au_opt_set(sbinfo->si_mntflags, XINO);
1623 err = au_xino_set_xib(sb, path);
1624 /* si_x{read,write} are set */
1625 if (!err)
1626 err = au_xigen_set(sb, path);
1627 if (!err)
1628 err = au_xino_set_br(sb, path);
1629 if (!err) {
1630 dbgaufs_brs_add(sb, 0, /*topdown*/1);
1631 goto out; /* success */
1632 }
1633
1634 /* reset all */
1635 AuIOErr("failed setting xino(%d).\n", err);
1636 au_xino_clr(sb);
1637
1638out:
1639 dput(parent);
1640 return err;
1641}
1642
1643/*
1644 * create a xinofile at the default place/path.
1645 */
1646struct file *au_xino_def(struct super_block *sb)
1647{
1648 struct file *file;
1649 char *page, *p;
1650 struct au_branch *br;
1651 struct super_block *h_sb;
1652 struct path path;
1653 aufs_bindex_t bbot, bindex, bwr;
1654
1655 br = NULL;
1656 bbot = au_sbbot(sb);
1657 bwr = -1;
1658 for (bindex = 0; bindex <= bbot; bindex++) {
1659 br = au_sbr(sb, bindex);
1660 if (au_br_writable(br->br_perm)
1661 && !au_test_fs_bad_xino(au_br_sb(br))) {
1662 bwr = bindex;
1663 break;
1664 }
1665 }
1666
1667 if (bwr >= 0) {
1668 file = ERR_PTR(-ENOMEM);
1669 page = (void *)__get_free_page(GFP_NOFS);
1670 if (unlikely(!page))
1671 goto out;
1672 path.mnt = au_br_mnt(br);
1673 path.dentry = au_h_dptr(sb->s_root, bwr);
1674 p = d_path(&path, page, PATH_MAX - sizeof(AUFS_XINO_FNAME));
1675 file = (void *)p;
1676 if (!IS_ERR(p)) {
1677 strcat(p, "/" AUFS_XINO_FNAME);
1678 AuDbg("%s\n", p);
1679 file = au_xino_create(sb, p, /*silent*/0, /*wbrtop*/1);
1680 }
1681 free_page((unsigned long)page);
1682 } else {
1683 file = au_xino_create(sb, AUFS_XINO_DEFPATH, /*silent*/0,
1684 /*wbrtop*/0);
1685 if (IS_ERR(file))
1686 goto out;
1687 h_sb = file->f_path.dentry->d_sb;
1688 if (unlikely(au_test_fs_bad_xino(h_sb))) {
1689 pr_err("xino doesn't support %s(%s)\n",
1690 AUFS_XINO_DEFPATH, au_sbtype(h_sb));
1691 fput(file);
1692 file = ERR_PTR(-EINVAL);
1693 }
1694 }
1695
1696out:
1697 return file;
1698}
1699
1700/* ---------------------------------------------------------------------- */
1701
1702/*
1703 * initialize the xinofile for the specified branch @br
1704 * at the place/path where @base_file indicates.
1705 * test whether another branch is on the same filesystem or not,
1706 * if found then share the xinofile with another branch.
1707 */
1708int au_xino_init_br(struct super_block *sb, struct au_branch *br, ino_t h_ino,
1709 struct path *base)
1710{
1711 int err;
1712 struct au_xino_do_set_br args = {
1713 .h_ino = h_ino,
1714 .br = br
1715 };
1716
1717 args.writef = au_sbi(sb)->si_xwrite;
1718 args.bshared = sbr_find_shared(sb, /*btop*/0, au_sbbot(sb),
1719 au_br_sb(br));
1720 err = au_xino_do_set_br(sb, base, &args);
1721 if (unlikely(err))
1722 au_xino_put(br);
1723
1724 return err;
1725}
1726
1727/* ---------------------------------------------------------------------- */
1728
1729/*
1730 * get an unused inode number from bitmap
1731 */
1732ino_t au_xino_new_ino(struct super_block *sb)
1733{
1734 ino_t ino;
1735 unsigned long *p, pindex, ul, pend;
1736 struct au_sbinfo *sbinfo;
1737 struct file *file;
1738 int free_bit, err;
1739
1740 if (!au_opt_test(au_mntflags(sb), XINO))
1741 return iunique(sb, AUFS_FIRST_INO);
1742
1743 sbinfo = au_sbi(sb);
1744 mutex_lock(&sbinfo->si_xib_mtx);
1745 p = sbinfo->si_xib_buf;
1746 free_bit = sbinfo->si_xib_next_bit;
1747 if (free_bit < page_bits && !test_bit(free_bit, p))
1748 goto out; /* success */
1749 free_bit = find_first_zero_bit(p, page_bits);
1750 if (free_bit < page_bits)
1751 goto out; /* success */
1752
1753 pindex = sbinfo->si_xib_last_pindex;
1754 for (ul = pindex - 1; ul < ULONG_MAX; ul--) {
1755 err = xib_pindex(sb, ul);
1756 if (unlikely(err))
1757 goto out_err;
1758 free_bit = find_first_zero_bit(p, page_bits);
1759 if (free_bit < page_bits)
1760 goto out; /* success */
1761 }
1762
1763 file = sbinfo->si_xib;
1764 pend = vfsub_f_size_read(file) / PAGE_SIZE;
1765 for (ul = pindex + 1; ul <= pend; ul++) {
1766 err = xib_pindex(sb, ul);
1767 if (unlikely(err))
1768 goto out_err;
1769 free_bit = find_first_zero_bit(p, page_bits);
1770 if (free_bit < page_bits)
1771 goto out; /* success */
1772 }
1773 BUG();
1774
1775out:
1776 set_bit(free_bit, p);
1777 sbinfo->si_xib_next_bit = free_bit + 1;
1778 pindex = sbinfo->si_xib_last_pindex;
1779 mutex_unlock(&sbinfo->si_xib_mtx);
1780 ino = xib_calc_ino(pindex, free_bit);
1781 AuDbg("i%lu\n", (unsigned long)ino);
1782 return ino;
1783out_err:
1784 mutex_unlock(&sbinfo->si_xib_mtx);
1785 AuDbg("i0\n");
1786 return 0;
1787}
1788
1789/* for s_op->delete_inode() */
1790void au_xino_delete_inode(struct inode *inode, const int unlinked)
1791{
1792 int err;
1793 unsigned int mnt_flags;
1794 aufs_bindex_t bindex, bbot, bi;
1795 unsigned char try_trunc;
1796 struct au_iinfo *iinfo;
1797 struct super_block *sb;
1798 struct au_hinode *hi;
1799 struct inode *h_inode;
1800 struct au_branch *br;
1801 vfs_writef_t xwrite;
1802 struct au_xi_calc calc;
1803 struct file *file;
1804
1805 AuDebugOn(au_is_bad_inode(inode));
1806
1807 sb = inode->i_sb;
1808 mnt_flags = au_mntflags(sb);
1809 if (!au_opt_test(mnt_flags, XINO)
1810 || inode->i_ino == AUFS_ROOT_INO)
1811 return;
1812
1813 if (unlinked) {
1814 au_xigen_inc(inode);
1815 au_xib_clear_bit(inode);
1816 }
1817
1818 iinfo = au_ii(inode);
1819 bindex = iinfo->ii_btop;
1820 if (bindex < 0)
1821 return;
1822
1823 xwrite = au_sbi(sb)->si_xwrite;
1824 try_trunc = !!au_opt_test(mnt_flags, TRUNC_XINO);
1825 hi = au_hinode(iinfo, bindex);
1826 bbot = iinfo->ii_bbot;
1827 for (; bindex <= bbot; bindex++, hi++) {
1828 h_inode = hi->hi_inode;
1829 if (!h_inode
1830 || (!unlinked && h_inode->i_nlink))
1831 continue;
1832
1833 /* inode may not be revalidated */
1834 bi = au_br_index(sb, hi->hi_id);
1835 if (bi < 0)
1836 continue;
1837
1838 br = au_sbr(sb, bi);
1839 au_xi_calc(sb, h_inode->i_ino, &calc);
1840 file = au_xino_file(br->br_xino, calc.idx);
1841 if (IS_ERR_OR_NULL(file))
1842 continue;
1843
1844 err = au_xino_do_write(xwrite, file, &calc, /*ino*/0);
1845 if (!err && try_trunc
1846 && au_test_fs_trunc_xino(au_br_sb(br)))
1847 xino_try_trunc(sb, br);
1848 }
1849}
1850
1851/* ---------------------------------------------------------------------- */
1852
1853static int au_xinondir_find(struct au_xino *xi, ino_t h_ino)
1854{
1855 int found, total, i;
1856
1857 found = -1;
1858 total = xi->xi_nondir.total;
1859 for (i = 0; i < total; i++) {
1860 if (xi->xi_nondir.array[i] != h_ino)
1861 continue;
1862 found = i;
1863 break;
1864 }
1865
1866 return found;
1867}
1868
1869static int au_xinondir_expand(struct au_xino *xi)
1870{
1871 int err, sz;
1872 ino_t *p;
1873
1874 BUILD_BUG_ON(KMALLOC_MAX_SIZE > INT_MAX);
1875
1876 err = -ENOMEM;
1877 sz = xi->xi_nondir.total * sizeof(ino_t);
1878 if (unlikely(sz > KMALLOC_MAX_SIZE / 2))
1879 goto out;
1880 p = au_kzrealloc(xi->xi_nondir.array, sz, sz << 1, GFP_ATOMIC,
1881 /*may_shrink*/0);
1882 if (p) {
1883 xi->xi_nondir.array = p;
1884 xi->xi_nondir.total <<= 1;
1885 AuDbg("xi_nondir.total %d\n", xi->xi_nondir.total);
1886 err = 0;
1887 }
1888
1889out:
1890 return err;
1891}
1892
1893void au_xinondir_leave(struct super_block *sb, aufs_bindex_t bindex,
1894 ino_t h_ino, int idx)
1895{
1896 struct au_xino *xi;
1897
1898 AuDebugOn(!au_opt_test(au_mntflags(sb), XINO));
1899 xi = au_sbr(sb, bindex)->br_xino;
1900 AuDebugOn(idx < 0 || xi->xi_nondir.total <= idx);
1901
1902 spin_lock(&xi->xi_nondir.spin);
1903 AuDebugOn(xi->xi_nondir.array[idx] != h_ino);
1904 xi->xi_nondir.array[idx] = 0;
1905 spin_unlock(&xi->xi_nondir.spin);
1906 wake_up_all(&xi->xi_nondir.wqh);
1907}
1908
1909int au_xinondir_enter(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
1910 int *idx)
1911{
1912 int err, found, empty;
1913 struct au_xino *xi;
1914
1915 err = 0;
1916 *idx = -1;
1917 if (!au_opt_test(au_mntflags(sb), XINO))
1918 goto out; /* no xino */
1919
1920 xi = au_sbr(sb, bindex)->br_xino;
1921
1922again:
1923 spin_lock(&xi->xi_nondir.spin);
1924 found = au_xinondir_find(xi, h_ino);
1925 if (found == -1) {
1926 empty = au_xinondir_find(xi, /*h_ino*/0);
1927 if (empty == -1) {
1928 empty = xi->xi_nondir.total;
1929 err = au_xinondir_expand(xi);
1930 if (unlikely(err))
1931 goto out_unlock;
1932 }
1933 xi->xi_nondir.array[empty] = h_ino;
1934 *idx = empty;
1935 } else {
1936 spin_unlock(&xi->xi_nondir.spin);
1937 wait_event(xi->xi_nondir.wqh,
1938 xi->xi_nondir.array[found] != h_ino);
1939 goto again;
1940 }
1941
1942out_unlock:
1943 spin_unlock(&xi->xi_nondir.spin);
1944out:
1945 return err;
1946}
1947
1948/* ---------------------------------------------------------------------- */
1949
1950int au_xino_path(struct seq_file *seq, struct file *file)
1951{
1952 int err;
1953
1954 err = au_seq_path(seq, &file->f_path);
1955 if (unlikely(err))
1956 goto out;
1957
1958#define Deleted "\\040(deleted)"
1959 seq->count -= sizeof(Deleted) - 1;
1960 AuDebugOn(memcmp(seq->buf + seq->count, Deleted,
1961 sizeof(Deleted) - 1));
1962#undef Deleted
1963
1964out:
1965 return err;
1966}