]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - fs/aufs/xino.c
UBUNTU: SAUCE: Import aufs driver
[mirror_ubuntu-artful-kernel.git] / fs / aufs / xino.c
1 /*
2 * Copyright (C) 2005-2017 Junjiro R. Okajima
3 *
4 * This program, aufs is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 /*
19 * external inode number translation table and bitmap
20 */
21
22 #include <linux/seq_file.h>
23 #include <linux/statfs.h>
24 #include "aufs.h"
25
26 /* todo: unnecessary to support mmap_sem since kernel-space? */
27 ssize_t xino_fread(vfs_readf_t func, struct file *file, void *kbuf, size_t size,
28 loff_t *pos)
29 {
30 ssize_t err;
31 mm_segment_t oldfs;
32 union {
33 void *k;
34 char __user *u;
35 } buf;
36
37 buf.k = kbuf;
38 oldfs = get_fs();
39 set_fs(KERNEL_DS);
40 do {
41 /* todo: signal_pending? */
42 err = func(file, buf.u, size, pos);
43 } while (err == -EAGAIN || err == -EINTR);
44 set_fs(oldfs);
45
46 #if 0 /* reserved for future use */
47 if (err > 0)
48 fsnotify_access(file->f_path.dentry);
49 #endif
50
51 return err;
52 }
53
54 /* ---------------------------------------------------------------------- */
55
56 static ssize_t xino_fwrite_wkq(vfs_writef_t func, struct file *file, void *buf,
57 size_t size, loff_t *pos);
58
59 static ssize_t do_xino_fwrite(vfs_writef_t func, struct file *file, void *kbuf,
60 size_t size, loff_t *pos)
61 {
62 ssize_t err;
63 mm_segment_t oldfs;
64 union {
65 void *k;
66 const char __user *u;
67 } buf;
68 int i;
69 const int prevent_endless = 10;
70
71 i = 0;
72 buf.k = kbuf;
73 oldfs = get_fs();
74 set_fs(KERNEL_DS);
75 do {
76 err = func(file, buf.u, size, pos);
77 if (err == -EINTR
78 && !au_wkq_test()
79 && fatal_signal_pending(current)) {
80 set_fs(oldfs);
81 err = xino_fwrite_wkq(func, file, kbuf, size, pos);
82 BUG_ON(err == -EINTR);
83 oldfs = get_fs();
84 set_fs(KERNEL_DS);
85 }
86 } while (i++ < prevent_endless
87 && (err == -EAGAIN || err == -EINTR));
88 set_fs(oldfs);
89
90 #if 0 /* reserved for future use */
91 if (err > 0)
92 fsnotify_modify(file->f_path.dentry);
93 #endif
94
95 return err;
96 }
97
98 struct do_xino_fwrite_args {
99 ssize_t *errp;
100 vfs_writef_t func;
101 struct file *file;
102 void *buf;
103 size_t size;
104 loff_t *pos;
105 };
106
107 static void call_do_xino_fwrite(void *args)
108 {
109 struct do_xino_fwrite_args *a = args;
110 *a->errp = do_xino_fwrite(a->func, a->file, a->buf, a->size, a->pos);
111 }
112
113 static ssize_t xino_fwrite_wkq(vfs_writef_t func, struct file *file, void *buf,
114 size_t size, loff_t *pos)
115 {
116 ssize_t err;
117 int wkq_err;
118 struct do_xino_fwrite_args args = {
119 .errp = &err,
120 .func = func,
121 .file = file,
122 .buf = buf,
123 .size = size,
124 .pos = pos
125 };
126
127 /*
128 * it breaks RLIMIT_FSIZE and normal user's limit,
129 * users should care about quota and real 'filesystem full.'
130 */
131 wkq_err = au_wkq_wait(call_do_xino_fwrite, &args);
132 if (unlikely(wkq_err))
133 err = wkq_err;
134
135 return err;
136 }
137
138 ssize_t xino_fwrite(vfs_writef_t func, struct file *file, void *buf,
139 size_t size, loff_t *pos)
140 {
141 ssize_t err;
142
143 if (rlimit(RLIMIT_FSIZE) == RLIM_INFINITY) {
144 lockdep_off();
145 err = do_xino_fwrite(func, file, buf, size, pos);
146 lockdep_on();
147 } else
148 err = xino_fwrite_wkq(func, file, buf, size, pos);
149
150 return err;
151 }
152
153 /* ---------------------------------------------------------------------- */
154
155 /*
156 * create a new xinofile at the same place/path as @base_file.
157 */
158 struct file *au_xino_create2(struct file *base_file, struct file *copy_src)
159 {
160 struct file *file;
161 struct dentry *base, *parent;
162 struct inode *dir, *delegated;
163 struct qstr *name;
164 struct path path;
165 int err;
166
167 base = base_file->f_path.dentry;
168 parent = base->d_parent; /* dir inode is locked */
169 dir = d_inode(parent);
170 IMustLock(dir);
171
172 file = ERR_PTR(-EINVAL);
173 name = &base->d_name;
174 path.dentry = vfsub_lookup_one_len(name->name, parent, name->len);
175 if (IS_ERR(path.dentry)) {
176 file = (void *)path.dentry;
177 pr_err("%pd lookup err %ld\n",
178 base, PTR_ERR(path.dentry));
179 goto out;
180 }
181
182 /* no need to mnt_want_write() since we call dentry_open() later */
183 err = vfs_create(dir, path.dentry, S_IRUGO | S_IWUGO, NULL);
184 if (unlikely(err)) {
185 file = ERR_PTR(err);
186 pr_err("%pd create err %d\n", base, err);
187 goto out_dput;
188 }
189
190 path.mnt = base_file->f_path.mnt;
191 file = vfsub_dentry_open(&path,
192 O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
193 /* | __FMODE_NONOTIFY */);
194 if (IS_ERR(file)) {
195 pr_err("%pd open err %ld\n", base, PTR_ERR(file));
196 goto out_dput;
197 }
198
199 delegated = NULL;
200 err = vfsub_unlink(dir, &file->f_path, &delegated, /*force*/0);
201 if (unlikely(err == -EWOULDBLOCK)) {
202 pr_warn("cannot retry for NFSv4 delegation"
203 " for an internal unlink\n");
204 iput(delegated);
205 }
206 if (unlikely(err)) {
207 pr_err("%pd unlink err %d\n", base, err);
208 goto out_fput;
209 }
210
211 if (copy_src) {
212 /* no one can touch copy_src xino */
213 err = au_copy_file(file, copy_src, vfsub_f_size_read(copy_src));
214 if (unlikely(err)) {
215 pr_err("%pd copy err %d\n", base, err);
216 goto out_fput;
217 }
218 }
219 goto out_dput; /* success */
220
221 out_fput:
222 fput(file);
223 file = ERR_PTR(err);
224 out_dput:
225 dput(path.dentry);
226 out:
227 return file;
228 }
229
230 struct au_xino_lock_dir {
231 struct au_hinode *hdir;
232 struct dentry *parent;
233 struct inode *dir;
234 };
235
236 static void au_xino_lock_dir(struct super_block *sb, struct file *xino,
237 struct au_xino_lock_dir *ldir)
238 {
239 aufs_bindex_t brid, bindex;
240
241 ldir->hdir = NULL;
242 bindex = -1;
243 brid = au_xino_brid(sb);
244 if (brid >= 0)
245 bindex = au_br_index(sb, brid);
246 if (bindex >= 0) {
247 ldir->hdir = au_hi(d_inode(sb->s_root), bindex);
248 au_hn_inode_lock_nested(ldir->hdir, AuLsc_I_PARENT);
249 } else {
250 ldir->parent = dget_parent(xino->f_path.dentry);
251 ldir->dir = d_inode(ldir->parent);
252 inode_lock_nested(ldir->dir, AuLsc_I_PARENT);
253 }
254 }
255
256 static void au_xino_unlock_dir(struct au_xino_lock_dir *ldir)
257 {
258 if (ldir->hdir)
259 au_hn_inode_unlock(ldir->hdir);
260 else {
261 inode_unlock(ldir->dir);
262 dput(ldir->parent);
263 }
264 }
265
266 /* ---------------------------------------------------------------------- */
267
268 /* trucate xino files asynchronously */
269
270 int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex)
271 {
272 int err;
273 unsigned long jiffy;
274 blkcnt_t blocks;
275 aufs_bindex_t bi, bbot;
276 struct kstatfs *st;
277 struct au_branch *br;
278 struct file *new_xino, *file;
279 struct super_block *h_sb;
280 struct au_xino_lock_dir ldir;
281
282 err = -ENOMEM;
283 st = kmalloc(sizeof(*st), GFP_NOFS);
284 if (unlikely(!st))
285 goto out;
286
287 err = -EINVAL;
288 bbot = au_sbbot(sb);
289 if (unlikely(bindex < 0 || bbot < bindex))
290 goto out_st;
291 br = au_sbr(sb, bindex);
292 file = br->br_xino.xi_file;
293 if (!file)
294 goto out_st;
295
296 err = vfs_statfs(&file->f_path, st);
297 if (unlikely(err))
298 AuErr1("statfs err %d, ignored\n", err);
299 jiffy = jiffies;
300 blocks = file_inode(file)->i_blocks;
301 pr_info("begin truncating xino(b%d), ib%llu, %llu/%llu free blks\n",
302 bindex, (u64)blocks, st->f_bfree, st->f_blocks);
303
304 au_xino_lock_dir(sb, file, &ldir);
305 /* mnt_want_write() is unnecessary here */
306 new_xino = au_xino_create2(file, file);
307 au_xino_unlock_dir(&ldir);
308 err = PTR_ERR(new_xino);
309 if (IS_ERR(new_xino)) {
310 pr_err("err %d, ignored\n", err);
311 goto out_st;
312 }
313 err = 0;
314 fput(file);
315 br->br_xino.xi_file = new_xino;
316
317 h_sb = au_br_sb(br);
318 for (bi = 0; bi <= bbot; bi++) {
319 if (unlikely(bi == bindex))
320 continue;
321 br = au_sbr(sb, bi);
322 if (au_br_sb(br) != h_sb)
323 continue;
324
325 fput(br->br_xino.xi_file);
326 br->br_xino.xi_file = new_xino;
327 get_file(new_xino);
328 }
329
330 err = vfs_statfs(&new_xino->f_path, st);
331 if (!err) {
332 pr_info("end truncating xino(b%d), ib%llu, %llu/%llu free blks\n",
333 bindex, (u64)file_inode(new_xino)->i_blocks,
334 st->f_bfree, st->f_blocks);
335 if (file_inode(new_xino)->i_blocks < blocks)
336 au_sbi(sb)->si_xino_jiffy = jiffy;
337 } else
338 AuErr1("statfs err %d, ignored\n", err);
339
340 out_st:
341 kfree(st);
342 out:
343 return err;
344 }
345
346 struct xino_do_trunc_args {
347 struct super_block *sb;
348 struct au_branch *br;
349 };
350
351 static void xino_do_trunc(void *_args)
352 {
353 struct xino_do_trunc_args *args = _args;
354 struct super_block *sb;
355 struct au_branch *br;
356 struct inode *dir;
357 int err;
358 aufs_bindex_t bindex;
359
360 err = 0;
361 sb = args->sb;
362 dir = d_inode(sb->s_root);
363 br = args->br;
364
365 si_noflush_write_lock(sb);
366 ii_read_lock_parent(dir);
367 bindex = au_br_index(sb, br->br_id);
368 err = au_xino_trunc(sb, bindex);
369 ii_read_unlock(dir);
370 if (unlikely(err))
371 pr_warn("err b%d, (%d)\n", bindex, err);
372 atomic_dec(&br->br_xino_running);
373 au_br_put(br);
374 si_write_unlock(sb);
375 au_nwt_done(&au_sbi(sb)->si_nowait);
376 kfree(args);
377 }
378
379 static int xino_trunc_test(struct super_block *sb, struct au_branch *br)
380 {
381 int err;
382 struct kstatfs st;
383 struct au_sbinfo *sbinfo;
384
385 /* todo: si_xino_expire and the ratio should be customizable */
386 sbinfo = au_sbi(sb);
387 if (time_before(jiffies,
388 sbinfo->si_xino_jiffy + sbinfo->si_xino_expire))
389 return 0;
390
391 /* truncation border */
392 err = vfs_statfs(&br->br_xino.xi_file->f_path, &st);
393 if (unlikely(err)) {
394 AuErr1("statfs err %d, ignored\n", err);
395 return 0;
396 }
397 if (div64_u64(st.f_bfree * 100, st.f_blocks) >= AUFS_XINO_DEF_TRUNC)
398 return 0;
399
400 return 1;
401 }
402
403 static void xino_try_trunc(struct super_block *sb, struct au_branch *br)
404 {
405 struct xino_do_trunc_args *args;
406 int wkq_err;
407
408 if (!xino_trunc_test(sb, br))
409 return;
410
411 if (atomic_inc_return(&br->br_xino_running) > 1)
412 goto out;
413
414 /* lock and kfree() will be called in trunc_xino() */
415 args = kmalloc(sizeof(*args), GFP_NOFS);
416 if (unlikely(!args)) {
417 AuErr1("no memory\n");
418 goto out;
419 }
420
421 au_br_get(br);
422 args->sb = sb;
423 args->br = br;
424 wkq_err = au_wkq_nowait(xino_do_trunc, args, sb, /*flags*/0);
425 if (!wkq_err)
426 return; /* success */
427
428 pr_err("wkq %d\n", wkq_err);
429 au_br_put(br);
430 kfree(args);
431
432 out:
433 atomic_dec(&br->br_xino_running);
434 }
435
436 /* ---------------------------------------------------------------------- */
437
438 static int au_xino_do_write(vfs_writef_t write, struct file *file,
439 ino_t h_ino, ino_t ino)
440 {
441 loff_t pos;
442 ssize_t sz;
443
444 pos = h_ino;
445 if (unlikely(au_loff_max / sizeof(ino) - 1 < pos)) {
446 AuIOErr1("too large hi%lu\n", (unsigned long)h_ino);
447 return -EFBIG;
448 }
449 pos *= sizeof(ino);
450 sz = xino_fwrite(write, file, &ino, sizeof(ino), &pos);
451 if (sz == sizeof(ino))
452 return 0; /* success */
453
454 AuIOErr("write failed (%zd)\n", sz);
455 return -EIO;
456 }
457
458 /*
459 * write @ino to the xinofile for the specified branch{@sb, @bindex}
460 * at the position of @h_ino.
461 * even if @ino is zero, it is written to the xinofile and means no entry.
462 * if the size of the xino file on a specific filesystem exceeds the watermark,
463 * try truncating it.
464 */
465 int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
466 ino_t ino)
467 {
468 int err;
469 unsigned int mnt_flags;
470 struct au_branch *br;
471
472 BUILD_BUG_ON(sizeof(long long) != sizeof(au_loff_max)
473 || ((loff_t)-1) > 0);
474 SiMustAnyLock(sb);
475
476 mnt_flags = au_mntflags(sb);
477 if (!au_opt_test(mnt_flags, XINO))
478 return 0;
479
480 br = au_sbr(sb, bindex);
481 err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file,
482 h_ino, ino);
483 if (!err) {
484 if (au_opt_test(mnt_flags, TRUNC_XINO)
485 && au_test_fs_trunc_xino(au_br_sb(br)))
486 xino_try_trunc(sb, br);
487 return 0; /* success */
488 }
489
490 AuIOErr("write failed (%d)\n", err);
491 return -EIO;
492 }
493
494 /* ---------------------------------------------------------------------- */
495
496 /* aufs inode number bitmap */
497
498 static const int page_bits = (int)PAGE_SIZE * BITS_PER_BYTE;
499 static ino_t xib_calc_ino(unsigned long pindex, int bit)
500 {
501 ino_t ino;
502
503 AuDebugOn(bit < 0 || page_bits <= bit);
504 ino = AUFS_FIRST_INO + pindex * page_bits + bit;
505 return ino;
506 }
507
508 static void xib_calc_bit(ino_t ino, unsigned long *pindex, int *bit)
509 {
510 AuDebugOn(ino < AUFS_FIRST_INO);
511 ino -= AUFS_FIRST_INO;
512 *pindex = ino / page_bits;
513 *bit = ino % page_bits;
514 }
515
516 static int xib_pindex(struct super_block *sb, unsigned long pindex)
517 {
518 int err;
519 loff_t pos;
520 ssize_t sz;
521 struct au_sbinfo *sbinfo;
522 struct file *xib;
523 unsigned long *p;
524
525 sbinfo = au_sbi(sb);
526 MtxMustLock(&sbinfo->si_xib_mtx);
527 AuDebugOn(pindex > ULONG_MAX / PAGE_SIZE
528 || !au_opt_test(sbinfo->si_mntflags, XINO));
529
530 if (pindex == sbinfo->si_xib_last_pindex)
531 return 0;
532
533 xib = sbinfo->si_xib;
534 p = sbinfo->si_xib_buf;
535 pos = sbinfo->si_xib_last_pindex;
536 pos *= PAGE_SIZE;
537 sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
538 if (unlikely(sz != PAGE_SIZE))
539 goto out;
540
541 pos = pindex;
542 pos *= PAGE_SIZE;
543 if (vfsub_f_size_read(xib) >= pos + PAGE_SIZE)
544 sz = xino_fread(sbinfo->si_xread, xib, p, PAGE_SIZE, &pos);
545 else {
546 memset(p, 0, PAGE_SIZE);
547 sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
548 }
549 if (sz == PAGE_SIZE) {
550 sbinfo->si_xib_last_pindex = pindex;
551 return 0; /* success */
552 }
553
554 out:
555 AuIOErr1("write failed (%zd)\n", sz);
556 err = sz;
557 if (sz >= 0)
558 err = -EIO;
559 return err;
560 }
561
562 /* ---------------------------------------------------------------------- */
563
564 static void au_xib_clear_bit(struct inode *inode)
565 {
566 int err, bit;
567 unsigned long pindex;
568 struct super_block *sb;
569 struct au_sbinfo *sbinfo;
570
571 AuDebugOn(inode->i_nlink);
572
573 sb = inode->i_sb;
574 xib_calc_bit(inode->i_ino, &pindex, &bit);
575 AuDebugOn(page_bits <= bit);
576 sbinfo = au_sbi(sb);
577 mutex_lock(&sbinfo->si_xib_mtx);
578 err = xib_pindex(sb, pindex);
579 if (!err) {
580 clear_bit(bit, sbinfo->si_xib_buf);
581 sbinfo->si_xib_next_bit = bit;
582 }
583 mutex_unlock(&sbinfo->si_xib_mtx);
584 }
585
586 /* for s_op->delete_inode() */
587 void au_xino_delete_inode(struct inode *inode, const int unlinked)
588 {
589 int err;
590 unsigned int mnt_flags;
591 aufs_bindex_t bindex, bbot, bi;
592 unsigned char try_trunc;
593 struct au_iinfo *iinfo;
594 struct super_block *sb;
595 struct au_hinode *hi;
596 struct inode *h_inode;
597 struct au_branch *br;
598 vfs_writef_t xwrite;
599
600 AuDebugOn(au_is_bad_inode(inode));
601
602 sb = inode->i_sb;
603 mnt_flags = au_mntflags(sb);
604 if (!au_opt_test(mnt_flags, XINO)
605 || inode->i_ino == AUFS_ROOT_INO)
606 return;
607
608 if (unlinked) {
609 au_xigen_inc(inode);
610 au_xib_clear_bit(inode);
611 }
612
613 iinfo = au_ii(inode);
614 bindex = iinfo->ii_btop;
615 if (bindex < 0)
616 return;
617
618 xwrite = au_sbi(sb)->si_xwrite;
619 try_trunc = !!au_opt_test(mnt_flags, TRUNC_XINO);
620 hi = au_hinode(iinfo, bindex);
621 bbot = iinfo->ii_bbot;
622 for (; bindex <= bbot; bindex++, hi++) {
623 h_inode = hi->hi_inode;
624 if (!h_inode
625 || (!unlinked && h_inode->i_nlink))
626 continue;
627
628 /* inode may not be revalidated */
629 bi = au_br_index(sb, hi->hi_id);
630 if (bi < 0)
631 continue;
632
633 br = au_sbr(sb, bi);
634 err = au_xino_do_write(xwrite, br->br_xino.xi_file,
635 h_inode->i_ino, /*ino*/0);
636 if (!err && try_trunc
637 && au_test_fs_trunc_xino(au_br_sb(br)))
638 xino_try_trunc(sb, br);
639 }
640 }
641
642 /* get an unused inode number from bitmap */
643 ino_t au_xino_new_ino(struct super_block *sb)
644 {
645 ino_t ino;
646 unsigned long *p, pindex, ul, pend;
647 struct au_sbinfo *sbinfo;
648 struct file *file;
649 int free_bit, err;
650
651 if (!au_opt_test(au_mntflags(sb), XINO))
652 return iunique(sb, AUFS_FIRST_INO);
653
654 sbinfo = au_sbi(sb);
655 mutex_lock(&sbinfo->si_xib_mtx);
656 p = sbinfo->si_xib_buf;
657 free_bit = sbinfo->si_xib_next_bit;
658 if (free_bit < page_bits && !test_bit(free_bit, p))
659 goto out; /* success */
660 free_bit = find_first_zero_bit(p, page_bits);
661 if (free_bit < page_bits)
662 goto out; /* success */
663
664 pindex = sbinfo->si_xib_last_pindex;
665 for (ul = pindex - 1; ul < ULONG_MAX; ul--) {
666 err = xib_pindex(sb, ul);
667 if (unlikely(err))
668 goto out_err;
669 free_bit = find_first_zero_bit(p, page_bits);
670 if (free_bit < page_bits)
671 goto out; /* success */
672 }
673
674 file = sbinfo->si_xib;
675 pend = vfsub_f_size_read(file) / PAGE_SIZE;
676 for (ul = pindex + 1; ul <= pend; ul++) {
677 err = xib_pindex(sb, ul);
678 if (unlikely(err))
679 goto out_err;
680 free_bit = find_first_zero_bit(p, page_bits);
681 if (free_bit < page_bits)
682 goto out; /* success */
683 }
684 BUG();
685
686 out:
687 set_bit(free_bit, p);
688 sbinfo->si_xib_next_bit = free_bit + 1;
689 pindex = sbinfo->si_xib_last_pindex;
690 mutex_unlock(&sbinfo->si_xib_mtx);
691 ino = xib_calc_ino(pindex, free_bit);
692 AuDbg("i%lu\n", (unsigned long)ino);
693 return ino;
694 out_err:
695 mutex_unlock(&sbinfo->si_xib_mtx);
696 AuDbg("i0\n");
697 return 0;
698 }
699
700 /*
701 * read @ino from xinofile for the specified branch{@sb, @bindex}
702 * at the position of @h_ino.
703 * if @ino does not exist and @do_new is true, get new one.
704 */
705 int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
706 ino_t *ino)
707 {
708 int err;
709 ssize_t sz;
710 loff_t pos;
711 struct file *file;
712 struct au_sbinfo *sbinfo;
713
714 *ino = 0;
715 if (!au_opt_test(au_mntflags(sb), XINO))
716 return 0; /* no xino */
717
718 err = 0;
719 sbinfo = au_sbi(sb);
720 pos = h_ino;
721 if (unlikely(au_loff_max / sizeof(*ino) - 1 < pos)) {
722 AuIOErr1("too large hi%lu\n", (unsigned long)h_ino);
723 return -EFBIG;
724 }
725 pos *= sizeof(*ino);
726
727 file = au_sbr(sb, bindex)->br_xino.xi_file;
728 if (vfsub_f_size_read(file) < pos + sizeof(*ino))
729 return 0; /* no ino */
730
731 sz = xino_fread(sbinfo->si_xread, file, ino, sizeof(*ino), &pos);
732 if (sz == sizeof(*ino))
733 return 0; /* success */
734
735 err = sz;
736 if (unlikely(sz >= 0)) {
737 err = -EIO;
738 AuIOErr("xino read error (%zd)\n", sz);
739 }
740
741 return err;
742 }
743
744 /* ---------------------------------------------------------------------- */
745
746 /* create and set a new xino file */
747
748 struct file *au_xino_create(struct super_block *sb, char *fname, int silent)
749 {
750 struct file *file;
751 struct dentry *h_parent, *d;
752 struct inode *h_dir, *inode;
753 int err;
754
755 /*
756 * at mount-time, and the xino file is the default path,
757 * hnotify is disabled so we have no notify events to ignore.
758 * when a user specified the xino, we cannot get au_hdir to be ignored.
759 */
760 file = vfsub_filp_open(fname, O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
761 /* | __FMODE_NONOTIFY */,
762 S_IRUGO | S_IWUGO);
763 if (IS_ERR(file)) {
764 if (!silent)
765 pr_err("open %s(%ld)\n", fname, PTR_ERR(file));
766 return file;
767 }
768
769 /* keep file count */
770 err = 0;
771 inode = file_inode(file);
772 h_parent = dget_parent(file->f_path.dentry);
773 h_dir = d_inode(h_parent);
774 inode_lock_nested(h_dir, AuLsc_I_PARENT);
775 /* mnt_want_write() is unnecessary here */
776 /* no delegation since it is just created */
777 if (inode->i_nlink)
778 err = vfsub_unlink(h_dir, &file->f_path, /*delegated*/NULL,
779 /*force*/0);
780 inode_unlock(h_dir);
781 dput(h_parent);
782 if (unlikely(err)) {
783 if (!silent)
784 pr_err("unlink %s(%d)\n", fname, err);
785 goto out;
786 }
787
788 err = -EINVAL;
789 d = file->f_path.dentry;
790 if (unlikely(sb == d->d_sb)) {
791 if (!silent)
792 pr_err("%s must be outside\n", fname);
793 goto out;
794 }
795 if (unlikely(au_test_fs_bad_xino(d->d_sb))) {
796 if (!silent)
797 pr_err("xino doesn't support %s(%s)\n",
798 fname, au_sbtype(d->d_sb));
799 goto out;
800 }
801 return file; /* success */
802
803 out:
804 fput(file);
805 file = ERR_PTR(err);
806 return file;
807 }
808
809 /*
810 * find another branch who is on the same filesystem of the specified
811 * branch{@btgt}. search until @bbot.
812 */
813 static int is_sb_shared(struct super_block *sb, aufs_bindex_t btgt,
814 aufs_bindex_t bbot)
815 {
816 aufs_bindex_t bindex;
817 struct super_block *tgt_sb = au_sbr_sb(sb, btgt);
818
819 for (bindex = 0; bindex < btgt; bindex++)
820 if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
821 return bindex;
822 for (bindex++; bindex <= bbot; bindex++)
823 if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
824 return bindex;
825 return -1;
826 }
827
828 /* ---------------------------------------------------------------------- */
829
830 /*
831 * initialize the xinofile for the specified branch @br
832 * at the place/path where @base_file indicates.
833 * test whether another branch is on the same filesystem or not,
834 * if @do_test is true.
835 */
836 int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t h_ino,
837 struct file *base_file, int do_test)
838 {
839 int err;
840 ino_t ino;
841 aufs_bindex_t bbot, bindex;
842 struct au_branch *shared_br, *b;
843 struct file *file;
844 struct super_block *tgt_sb;
845
846 shared_br = NULL;
847 bbot = au_sbbot(sb);
848 if (do_test) {
849 tgt_sb = au_br_sb(br);
850 for (bindex = 0; bindex <= bbot; bindex++) {
851 b = au_sbr(sb, bindex);
852 if (tgt_sb == au_br_sb(b)) {
853 shared_br = b;
854 break;
855 }
856 }
857 }
858
859 if (!shared_br || !shared_br->br_xino.xi_file) {
860 struct au_xino_lock_dir ldir;
861
862 au_xino_lock_dir(sb, base_file, &ldir);
863 /* mnt_want_write() is unnecessary here */
864 file = au_xino_create2(base_file, NULL);
865 au_xino_unlock_dir(&ldir);
866 err = PTR_ERR(file);
867 if (IS_ERR(file))
868 goto out;
869 br->br_xino.xi_file = file;
870 } else {
871 br->br_xino.xi_file = shared_br->br_xino.xi_file;
872 get_file(br->br_xino.xi_file);
873 }
874
875 ino = AUFS_ROOT_INO;
876 err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file,
877 h_ino, ino);
878 if (unlikely(err)) {
879 fput(br->br_xino.xi_file);
880 br->br_xino.xi_file = NULL;
881 }
882
883 out:
884 return err;
885 }
886
887 /* ---------------------------------------------------------------------- */
888
889 /* trucate a xino bitmap file */
890
891 /* todo: slow */
892 static int do_xib_restore(struct super_block *sb, struct file *file, void *page)
893 {
894 int err, bit;
895 ssize_t sz;
896 unsigned long pindex;
897 loff_t pos, pend;
898 struct au_sbinfo *sbinfo;
899 vfs_readf_t func;
900 ino_t *ino;
901 unsigned long *p;
902
903 err = 0;
904 sbinfo = au_sbi(sb);
905 MtxMustLock(&sbinfo->si_xib_mtx);
906 p = sbinfo->si_xib_buf;
907 func = sbinfo->si_xread;
908 pend = vfsub_f_size_read(file);
909 pos = 0;
910 while (pos < pend) {
911 sz = xino_fread(func, file, page, PAGE_SIZE, &pos);
912 err = sz;
913 if (unlikely(sz <= 0))
914 goto out;
915
916 err = 0;
917 for (ino = page; sz > 0; ino++, sz -= sizeof(ino)) {
918 if (unlikely(*ino < AUFS_FIRST_INO))
919 continue;
920
921 xib_calc_bit(*ino, &pindex, &bit);
922 AuDebugOn(page_bits <= bit);
923 err = xib_pindex(sb, pindex);
924 if (!err)
925 set_bit(bit, p);
926 else
927 goto out;
928 }
929 }
930
931 out:
932 return err;
933 }
934
935 static int xib_restore(struct super_block *sb)
936 {
937 int err;
938 aufs_bindex_t bindex, bbot;
939 void *page;
940
941 err = -ENOMEM;
942 page = (void *)__get_free_page(GFP_NOFS);
943 if (unlikely(!page))
944 goto out;
945
946 err = 0;
947 bbot = au_sbbot(sb);
948 for (bindex = 0; !err && bindex <= bbot; bindex++)
949 if (!bindex || is_sb_shared(sb, bindex, bindex - 1) < 0)
950 err = do_xib_restore
951 (sb, au_sbr(sb, bindex)->br_xino.xi_file, page);
952 else
953 AuDbg("b%d\n", bindex);
954 free_page((unsigned long)page);
955
956 out:
957 return err;
958 }
959
960 int au_xib_trunc(struct super_block *sb)
961 {
962 int err;
963 ssize_t sz;
964 loff_t pos;
965 struct au_xino_lock_dir ldir;
966 struct au_sbinfo *sbinfo;
967 unsigned long *p;
968 struct file *file;
969
970 SiMustWriteLock(sb);
971
972 err = 0;
973 sbinfo = au_sbi(sb);
974 if (!au_opt_test(sbinfo->si_mntflags, XINO))
975 goto out;
976
977 file = sbinfo->si_xib;
978 if (vfsub_f_size_read(file) <= PAGE_SIZE)
979 goto out;
980
981 au_xino_lock_dir(sb, file, &ldir);
982 /* mnt_want_write() is unnecessary here */
983 file = au_xino_create2(sbinfo->si_xib, NULL);
984 au_xino_unlock_dir(&ldir);
985 err = PTR_ERR(file);
986 if (IS_ERR(file))
987 goto out;
988 fput(sbinfo->si_xib);
989 sbinfo->si_xib = file;
990
991 p = sbinfo->si_xib_buf;
992 memset(p, 0, PAGE_SIZE);
993 pos = 0;
994 sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xib, p, PAGE_SIZE, &pos);
995 if (unlikely(sz != PAGE_SIZE)) {
996 err = sz;
997 AuIOErr("err %d\n", err);
998 if (sz >= 0)
999 err = -EIO;
1000 goto out;
1001 }
1002
1003 mutex_lock(&sbinfo->si_xib_mtx);
1004 /* mnt_want_write() is unnecessary here */
1005 err = xib_restore(sb);
1006 mutex_unlock(&sbinfo->si_xib_mtx);
1007
1008 out:
1009 return err;
1010 }
1011
1012 /* ---------------------------------------------------------------------- */
1013
1014 /*
1015 * xino mount option handlers
1016 */
1017
1018 /* xino bitmap */
1019 static void xino_clear_xib(struct super_block *sb)
1020 {
1021 struct au_sbinfo *sbinfo;
1022
1023 SiMustWriteLock(sb);
1024
1025 sbinfo = au_sbi(sb);
1026 sbinfo->si_xread = NULL;
1027 sbinfo->si_xwrite = NULL;
1028 if (sbinfo->si_xib)
1029 fput(sbinfo->si_xib);
1030 sbinfo->si_xib = NULL;
1031 if (sbinfo->si_xib_buf)
1032 free_page((unsigned long)sbinfo->si_xib_buf);
1033 sbinfo->si_xib_buf = NULL;
1034 }
1035
1036 static int au_xino_set_xib(struct super_block *sb, struct file *base)
1037 {
1038 int err;
1039 loff_t pos;
1040 struct au_sbinfo *sbinfo;
1041 struct file *file;
1042
1043 SiMustWriteLock(sb);
1044
1045 sbinfo = au_sbi(sb);
1046 file = au_xino_create2(base, sbinfo->si_xib);
1047 err = PTR_ERR(file);
1048 if (IS_ERR(file))
1049 goto out;
1050 if (sbinfo->si_xib)
1051 fput(sbinfo->si_xib);
1052 sbinfo->si_xib = file;
1053 sbinfo->si_xread = vfs_readf(file);
1054 sbinfo->si_xwrite = vfs_writef(file);
1055
1056 err = -ENOMEM;
1057 if (!sbinfo->si_xib_buf)
1058 sbinfo->si_xib_buf = (void *)get_zeroed_page(GFP_NOFS);
1059 if (unlikely(!sbinfo->si_xib_buf))
1060 goto out_unset;
1061
1062 sbinfo->si_xib_last_pindex = 0;
1063 sbinfo->si_xib_next_bit = 0;
1064 if (vfsub_f_size_read(file) < PAGE_SIZE) {
1065 pos = 0;
1066 err = xino_fwrite(sbinfo->si_xwrite, file, sbinfo->si_xib_buf,
1067 PAGE_SIZE, &pos);
1068 if (unlikely(err != PAGE_SIZE))
1069 goto out_free;
1070 }
1071 err = 0;
1072 goto out; /* success */
1073
1074 out_free:
1075 if (sbinfo->si_xib_buf)
1076 free_page((unsigned long)sbinfo->si_xib_buf);
1077 sbinfo->si_xib_buf = NULL;
1078 if (err >= 0)
1079 err = -EIO;
1080 out_unset:
1081 fput(sbinfo->si_xib);
1082 sbinfo->si_xib = NULL;
1083 sbinfo->si_xread = NULL;
1084 sbinfo->si_xwrite = NULL;
1085 out:
1086 return err;
1087 }
1088
1089 /* xino for each branch */
1090 static void xino_clear_br(struct super_block *sb)
1091 {
1092 aufs_bindex_t bindex, bbot;
1093 struct au_branch *br;
1094
1095 bbot = au_sbbot(sb);
1096 for (bindex = 0; bindex <= bbot; bindex++) {
1097 br = au_sbr(sb, bindex);
1098 if (!br || !br->br_xino.xi_file)
1099 continue;
1100
1101 fput(br->br_xino.xi_file);
1102 br->br_xino.xi_file = NULL;
1103 }
1104 }
1105
1106 static int au_xino_set_br(struct super_block *sb, struct file *base)
1107 {
1108 int err;
1109 ino_t ino;
1110 aufs_bindex_t bindex, bbot, bshared;
1111 struct {
1112 struct file *old, *new;
1113 } *fpair, *p;
1114 struct au_branch *br;
1115 struct inode *inode;
1116 vfs_writef_t writef;
1117
1118 SiMustWriteLock(sb);
1119
1120 err = -ENOMEM;
1121 bbot = au_sbbot(sb);
1122 fpair = kcalloc(bbot + 1, sizeof(*fpair), GFP_NOFS);
1123 if (unlikely(!fpair))
1124 goto out;
1125
1126 inode = d_inode(sb->s_root);
1127 ino = AUFS_ROOT_INO;
1128 writef = au_sbi(sb)->si_xwrite;
1129 for (bindex = 0, p = fpair; bindex <= bbot; bindex++, p++) {
1130 bshared = is_sb_shared(sb, bindex, bindex - 1);
1131 if (bshared >= 0) {
1132 /* shared xino */
1133 *p = fpair[bshared];
1134 get_file(p->new);
1135 }
1136
1137 if (!p->new) {
1138 /* new xino */
1139 br = au_sbr(sb, bindex);
1140 p->old = br->br_xino.xi_file;
1141 p->new = au_xino_create2(base, br->br_xino.xi_file);
1142 err = PTR_ERR(p->new);
1143 if (IS_ERR(p->new)) {
1144 p->new = NULL;
1145 goto out_pair;
1146 }
1147 }
1148
1149 err = au_xino_do_write(writef, p->new,
1150 au_h_iptr(inode, bindex)->i_ino, ino);
1151 if (unlikely(err))
1152 goto out_pair;
1153 }
1154
1155 for (bindex = 0, p = fpair; bindex <= bbot; bindex++, p++) {
1156 br = au_sbr(sb, bindex);
1157 if (br->br_xino.xi_file)
1158 fput(br->br_xino.xi_file);
1159 get_file(p->new);
1160 br->br_xino.xi_file = p->new;
1161 }
1162
1163 out_pair:
1164 for (bindex = 0, p = fpair; bindex <= bbot; bindex++, p++)
1165 if (p->new)
1166 fput(p->new);
1167 else
1168 break;
1169 kfree(fpair);
1170 out:
1171 return err;
1172 }
1173
1174 void au_xino_clr(struct super_block *sb)
1175 {
1176 struct au_sbinfo *sbinfo;
1177
1178 au_xigen_clr(sb);
1179 xino_clear_xib(sb);
1180 xino_clear_br(sb);
1181 sbinfo = au_sbi(sb);
1182 /* lvalue, do not call au_mntflags() */
1183 au_opt_clr(sbinfo->si_mntflags, XINO);
1184 }
1185
1186 int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount)
1187 {
1188 int err, skip;
1189 struct dentry *parent, *cur_parent;
1190 struct qstr *dname, *cur_name;
1191 struct file *cur_xino;
1192 struct inode *dir;
1193 struct au_sbinfo *sbinfo;
1194
1195 SiMustWriteLock(sb);
1196
1197 err = 0;
1198 sbinfo = au_sbi(sb);
1199 parent = dget_parent(xino->file->f_path.dentry);
1200 if (remount) {
1201 skip = 0;
1202 dname = &xino->file->f_path.dentry->d_name;
1203 cur_xino = sbinfo->si_xib;
1204 if (cur_xino) {
1205 cur_parent = dget_parent(cur_xino->f_path.dentry);
1206 cur_name = &cur_xino->f_path.dentry->d_name;
1207 skip = (cur_parent == parent
1208 && au_qstreq(dname, cur_name));
1209 dput(cur_parent);
1210 }
1211 if (skip)
1212 goto out;
1213 }
1214
1215 au_opt_set(sbinfo->si_mntflags, XINO);
1216 dir = d_inode(parent);
1217 inode_lock_nested(dir, AuLsc_I_PARENT);
1218 /* mnt_want_write() is unnecessary here */
1219 err = au_xino_set_xib(sb, xino->file);
1220 if (!err)
1221 err = au_xigen_set(sb, xino->file);
1222 if (!err)
1223 err = au_xino_set_br(sb, xino->file);
1224 inode_unlock(dir);
1225 if (!err)
1226 goto out; /* success */
1227
1228 /* reset all */
1229 AuIOErr("failed creating xino(%d).\n", err);
1230 au_xigen_clr(sb);
1231 xino_clear_xib(sb);
1232
1233 out:
1234 dput(parent);
1235 return err;
1236 }
1237
1238 /* ---------------------------------------------------------------------- */
1239
1240 /*
1241 * create a xinofile at the default place/path.
1242 */
1243 struct file *au_xino_def(struct super_block *sb)
1244 {
1245 struct file *file;
1246 char *page, *p;
1247 struct au_branch *br;
1248 struct super_block *h_sb;
1249 struct path path;
1250 aufs_bindex_t bbot, bindex, bwr;
1251
1252 br = NULL;
1253 bbot = au_sbbot(sb);
1254 bwr = -1;
1255 for (bindex = 0; bindex <= bbot; bindex++) {
1256 br = au_sbr(sb, bindex);
1257 if (au_br_writable(br->br_perm)
1258 && !au_test_fs_bad_xino(au_br_sb(br))) {
1259 bwr = bindex;
1260 break;
1261 }
1262 }
1263
1264 if (bwr >= 0) {
1265 file = ERR_PTR(-ENOMEM);
1266 page = (void *)__get_free_page(GFP_NOFS);
1267 if (unlikely(!page))
1268 goto out;
1269 path.mnt = au_br_mnt(br);
1270 path.dentry = au_h_dptr(sb->s_root, bwr);
1271 p = d_path(&path, page, PATH_MAX - sizeof(AUFS_XINO_FNAME));
1272 file = (void *)p;
1273 if (!IS_ERR(p)) {
1274 strcat(p, "/" AUFS_XINO_FNAME);
1275 AuDbg("%s\n", p);
1276 file = au_xino_create(sb, p, /*silent*/0);
1277 if (!IS_ERR(file))
1278 au_xino_brid_set(sb, br->br_id);
1279 }
1280 free_page((unsigned long)page);
1281 } else {
1282 file = au_xino_create(sb, AUFS_XINO_DEFPATH, /*silent*/0);
1283 if (IS_ERR(file))
1284 goto out;
1285 h_sb = file->f_path.dentry->d_sb;
1286 if (unlikely(au_test_fs_bad_xino(h_sb))) {
1287 pr_err("xino doesn't support %s(%s)\n",
1288 AUFS_XINO_DEFPATH, au_sbtype(h_sb));
1289 fput(file);
1290 file = ERR_PTR(-EINVAL);
1291 }
1292 if (!IS_ERR(file))
1293 au_xino_brid_set(sb, -1);
1294 }
1295
1296 out:
1297 return file;
1298 }
1299
1300 /* ---------------------------------------------------------------------- */
1301
1302 int au_xino_path(struct seq_file *seq, struct file *file)
1303 {
1304 int err;
1305
1306 err = au_seq_path(seq, &file->f_path);
1307 if (unlikely(err))
1308 goto out;
1309
1310 #define Deleted "\\040(deleted)"
1311 seq->count -= sizeof(Deleted) - 1;
1312 AuDebugOn(memcmp(seq->buf + seq->count, Deleted,
1313 sizeof(Deleted) - 1));
1314 #undef Deleted
1315
1316 out:
1317 return err;
1318 }
1319
1320 /* ---------------------------------------------------------------------- */
1321
1322 void au_xinondir_leave(struct super_block *sb, aufs_bindex_t bindex,
1323 ino_t h_ino, int idx)
1324 {
1325 struct au_xino_file *xino;
1326
1327 AuDebugOn(!au_opt_test(au_mntflags(sb), XINO));
1328 xino = &au_sbr(sb, bindex)->br_xino;
1329 AuDebugOn(idx < 0 || xino->xi_nondir.total <= idx);
1330
1331 spin_lock(&xino->xi_nondir.spin);
1332 AuDebugOn(xino->xi_nondir.array[idx] != h_ino);
1333 xino->xi_nondir.array[idx] = 0;
1334 spin_unlock(&xino->xi_nondir.spin);
1335 wake_up_all(&xino->xi_nondir.wqh);
1336 }
1337
1338 static int au_xinondir_find(struct au_xino_file *xino, ino_t h_ino)
1339 {
1340 int found, total, i;
1341
1342 found = -1;
1343 total = xino->xi_nondir.total;
1344 for (i = 0; i < total; i++) {
1345 if (xino->xi_nondir.array[i] != h_ino)
1346 continue;
1347 found = i;
1348 break;
1349 }
1350
1351 return found;
1352 }
1353
1354 static int au_xinondir_expand(struct au_xino_file *xino)
1355 {
1356 int err, sz;
1357 ino_t *p;
1358
1359 BUILD_BUG_ON(KMALLOC_MAX_SIZE > INT_MAX);
1360
1361 err = -ENOMEM;
1362 sz = xino->xi_nondir.total * sizeof(ino_t);
1363 if (unlikely(sz > KMALLOC_MAX_SIZE / 2))
1364 goto out;
1365 p = au_kzrealloc(xino->xi_nondir.array, sz, sz << 1, GFP_ATOMIC,
1366 /*may_shrink*/0);
1367 if (p) {
1368 xino->xi_nondir.array = p;
1369 xino->xi_nondir.total <<= 1;
1370 AuDbg("xi_nondir.total %d\n", xino->xi_nondir.total);
1371 err = 0;
1372 }
1373
1374 out:
1375 return err;
1376 }
1377
1378 int au_xinondir_enter(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
1379 int *idx)
1380 {
1381 int err, found, empty;
1382 struct au_xino_file *xino;
1383
1384 err = 0;
1385 *idx = -1;
1386 if (!au_opt_test(au_mntflags(sb), XINO))
1387 goto out; /* no xino */
1388
1389 xino = &au_sbr(sb, bindex)->br_xino;
1390
1391 again:
1392 spin_lock(&xino->xi_nondir.spin);
1393 found = au_xinondir_find(xino, h_ino);
1394 if (found == -1) {
1395 empty = au_xinondir_find(xino, /*h_ino*/0);
1396 if (empty == -1) {
1397 empty = xino->xi_nondir.total;
1398 err = au_xinondir_expand(xino);
1399 if (unlikely(err))
1400 goto out_unlock;
1401 }
1402 xino->xi_nondir.array[empty] = h_ino;
1403 *idx = empty;
1404 } else {
1405 spin_unlock(&xino->xi_nondir.spin);
1406 wait_event(xino->xi_nondir.wqh,
1407 xino->xi_nondir.array[found] != h_ino);
1408 goto again;
1409 }
1410
1411 out_unlock:
1412 spin_unlock(&xino->xi_nondir.spin);
1413 out:
1414 return err;
1415 }