2 * Copyright (C) 2005-2017 Junjiro R. Okajima
4 * This program, aufs is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 * policies for selecting one among multiple writable branches
22 #include <linux/statfs.h>
25 /* subset of cpup_attr() */
26 static noinline_for_stack
27 int au_cpdown_attr(struct path
*h_path
, struct dentry
*h_src
)
33 h_isrc
= d_inode(h_src
);
34 ia
.ia_valid
= ATTR_FORCE
| ATTR_MODE
| ATTR_UID
| ATTR_GID
;
35 ia
.ia_mode
= h_isrc
->i_mode
;
36 ia
.ia_uid
= h_isrc
->i_uid
;
37 ia
.ia_gid
= h_isrc
->i_gid
;
38 sbits
= !!(ia
.ia_mode
& (S_ISUID
| S_ISGID
));
39 au_cpup_attr_flags(d_inode(h_path
->dentry
), h_isrc
->i_flags
);
40 /* no delegation since it is just created */
41 err
= vfsub_sio_notify_change(h_path
, &ia
, /*delegated*/NULL
);
43 /* is this nfs only? */
44 if (!err
&& sbits
&& au_test_nfs(h_path
->dentry
->d_sb
)) {
45 ia
.ia_valid
= ATTR_FORCE
| ATTR_MODE
;
46 ia
.ia_mode
= h_isrc
->i_mode
;
47 err
= vfsub_sio_notify_change(h_path
, &ia
, /*delegated*/NULL
);
53 #define AuCpdown_PARENT_OPQ 1
54 #define AuCpdown_WHED (1 << 1)
55 #define AuCpdown_MADE_DIR (1 << 2)
56 #define AuCpdown_DIROPQ (1 << 3)
57 #define au_ftest_cpdown(flags, name) ((flags) & AuCpdown_##name)
58 #define au_fset_cpdown(flags, name) \
59 do { (flags) |= AuCpdown_##name; } while (0)
60 #define au_fclr_cpdown(flags, name) \
61 do { (flags) &= ~AuCpdown_##name; } while (0)
63 static int au_cpdown_dir_opq(struct dentry
*dentry
, aufs_bindex_t bdst
,
67 struct dentry
*opq_dentry
;
69 opq_dentry
= au_diropq_create(dentry
, bdst
);
70 err
= PTR_ERR(opq_dentry
);
71 if (IS_ERR(opq_dentry
))
74 au_fset_cpdown(*flags
, DIROPQ
);
80 static int au_cpdown_dir_wh(struct dentry
*dentry
, struct dentry
*h_parent
,
81 struct inode
*dir
, aufs_bindex_t bdst
)
87 br
= au_sbr(dentry
->d_sb
, bdst
);
88 h_path
.dentry
= au_wh_lkup(h_parent
, &dentry
->d_name
, br
);
89 err
= PTR_ERR(h_path
.dentry
);
90 if (IS_ERR(h_path
.dentry
))
94 if (d_is_positive(h_path
.dentry
)) {
95 h_path
.mnt
= au_br_mnt(br
);
96 err
= au_wh_unlink_dentry(au_h_iptr(dir
, bdst
), &h_path
,
105 static int au_cpdown_dir(struct dentry
*dentry
, aufs_bindex_t bdst
,
107 struct dentry
*h_parent
, void *arg
)
110 aufs_bindex_t bopq
, btop
;
112 struct dentry
*parent
;
113 struct inode
*h_dir
, *h_inode
, *inode
, *dir
;
114 unsigned int *flags
= arg
;
116 btop
= au_dbtop(dentry
);
117 /* dentry is di-locked */
118 parent
= dget_parent(dentry
);
119 dir
= d_inode(parent
);
120 h_dir
= d_inode(h_parent
);
121 AuDebugOn(h_dir
!= au_h_iptr(dir
, bdst
));
124 err
= au_lkup_neg(dentry
, bdst
, /*wh*/0);
125 if (unlikely(err
< 0))
127 h_path
.dentry
= au_h_dptr(dentry
, bdst
);
128 h_path
.mnt
= au_sbr_mnt(dentry
->d_sb
, bdst
);
129 err
= vfsub_sio_mkdir(au_h_iptr(dir
, bdst
), &h_path
,
130 S_IRWXU
| S_IRUGO
| S_IXUGO
);
133 au_fset_cpdown(*flags
, MADE_DIR
);
135 bopq
= au_dbdiropq(dentry
);
136 au_fclr_cpdown(*flags
, WHED
);
137 au_fclr_cpdown(*flags
, DIROPQ
);
138 if (au_dbwh(dentry
) == bdst
)
139 au_fset_cpdown(*flags
, WHED
);
140 if (!au_ftest_cpdown(*flags
, PARENT_OPQ
) && bopq
<= bdst
)
141 au_fset_cpdown(*flags
, PARENT_OPQ
);
142 h_inode
= d_inode(h_path
.dentry
);
143 inode_lock_nested(h_inode
, AuLsc_I_CHILD
);
144 if (au_ftest_cpdown(*flags
, WHED
)) {
145 err
= au_cpdown_dir_opq(dentry
, bdst
, flags
);
147 inode_unlock(h_inode
);
152 err
= au_cpdown_attr(&h_path
, au_h_dptr(dentry
, btop
));
153 inode_unlock(h_inode
);
157 if (au_ftest_cpdown(*flags
, WHED
)) {
158 err
= au_cpdown_dir_wh(dentry
, h_parent
, dir
, bdst
);
163 inode
= d_inode(dentry
);
164 if (au_ibbot(inode
) < bdst
)
165 au_set_ibbot(inode
, bdst
);
166 au_set_h_iptr(inode
, bdst
, au_igrab(h_inode
),
167 au_hi_flags(inode
, /*isdir*/1));
168 au_fhsm_wrote(dentry
->d_sb
, bdst
, /*force*/0);
169 goto out
; /* success */
173 if (au_ftest_cpdown(*flags
, DIROPQ
)) {
174 inode_lock_nested(h_inode
, AuLsc_I_CHILD
);
175 rerr
= au_diropq_remove(dentry
, bdst
);
176 inode_unlock(h_inode
);
177 if (unlikely(rerr
)) {
178 AuIOErr("failed removing diropq for %pd b%d (%d)\n",
185 if (au_ftest_cpdown(*flags
, MADE_DIR
)) {
186 rerr
= vfsub_sio_rmdir(au_h_iptr(dir
, bdst
), &h_path
);
187 if (unlikely(rerr
)) {
188 AuIOErr("failed removing %pd b%d (%d)\n",
194 au_set_h_dptr(dentry
, bdst
, NULL
);
195 if (au_dbbot(dentry
) == bdst
)
196 au_update_dbbot(dentry
);
202 int au_cpdown_dirs(struct dentry
*dentry
, aufs_bindex_t bdst
)
208 err
= au_cp_dirs(dentry
, bdst
, au_cpdown_dir
, &flags
);
213 /* ---------------------------------------------------------------------- */
215 /* policies for create */
217 int au_wbr_nonopq(struct dentry
*dentry
, aufs_bindex_t bindex
)
219 int err
, i
, j
, ndentry
;
221 struct au_dcsub_pages dpages
;
222 struct au_dpage
*dpage
;
223 struct dentry
**dentries
, *parent
, *d
;
225 err
= au_dpages_init(&dpages
, GFP_NOFS
);
228 parent
= dget_parent(dentry
);
229 err
= au_dcsub_pages_rev_aufs(&dpages
, parent
, /*do_include*/0);
234 for (i
= 0; i
< dpages
.ndpage
; i
++) {
235 dpage
= dpages
.dpages
+ i
;
236 dentries
= dpage
->dentries
;
237 ndentry
= dpage
->ndentry
;
238 for (j
= 0; j
< ndentry
; j
++) {
240 di_read_lock_parent2(d
, !AuLock_IR
);
241 bopq
= au_dbdiropq(d
);
242 di_read_unlock(d
, !AuLock_IR
);
243 if (bopq
>= 0 && bopq
< err
)
250 au_dpages_free(&dpages
);
255 static int au_wbr_bu(struct super_block
*sb
, aufs_bindex_t bindex
)
257 for (; bindex
>= 0; bindex
--)
258 if (!au_br_rdonly(au_sbr(sb
, bindex
)))
263 /* top down parent */
264 static int au_wbr_create_tdp(struct dentry
*dentry
,
265 unsigned int flags __maybe_unused
)
268 aufs_bindex_t btop
, bindex
;
269 struct super_block
*sb
;
270 struct dentry
*parent
, *h_parent
;
273 btop
= au_dbtop(dentry
);
275 if (!au_br_rdonly(au_sbr(sb
, btop
)))
279 parent
= dget_parent(dentry
);
280 for (bindex
= au_dbtop(parent
); bindex
< btop
; bindex
++) {
281 h_parent
= au_h_dptr(parent
, bindex
);
282 if (!h_parent
|| d_is_negative(h_parent
))
285 if (!au_br_rdonly(au_sbr(sb
, bindex
))) {
293 if (unlikely(err
< 0)) {
294 err
= au_wbr_bu(sb
, btop
- 1);
296 err
= au_wbr_nonopq(dentry
, err
);
304 /* ---------------------------------------------------------------------- */
306 /* an exception for the policy other than tdp */
307 static int au_wbr_create_exp(struct dentry
*dentry
)
310 aufs_bindex_t bwh
, bdiropq
;
311 struct dentry
*parent
;
314 bwh
= au_dbwh(dentry
);
315 parent
= dget_parent(dentry
);
316 bdiropq
= au_dbdiropq(parent
);
319 err
= min(bdiropq
, bwh
);
323 } else if (bdiropq
>= 0) {
330 err
= au_wbr_nonopq(dentry
, err
);
332 if (err
>= 0 && au_br_rdonly(au_sbr(dentry
->d_sb
, err
)))
339 /* ---------------------------------------------------------------------- */
342 static int au_wbr_create_init_rr(struct super_block
*sb
)
346 err
= au_wbr_bu(sb
, au_sbbot(sb
));
347 atomic_set(&au_sbi(sb
)->si_wbr_rr_next
, -err
); /* less important */
354 static int au_wbr_create_rr(struct dentry
*dentry
, unsigned int flags
)
358 aufs_bindex_t bindex
, bbot
;
359 struct super_block
*sb
;
362 err
= au_wbr_create_exp(dentry
);
367 next
= &au_sbi(sb
)->si_wbr_rr_next
;
370 for (bindex
= 0; bindex
<= bbot
; bindex
++) {
371 if (!au_ftest_wbr(flags
, DIR)) {
372 err
= atomic_dec_return(next
) + 1;
373 /* modulo for 0 is meaningless */
375 err
= atomic_dec_return(next
) + 1;
377 err
= atomic_read(next
);
382 if (!au_br_rdonly(au_sbr(sb
, err
)))
388 err
= au_wbr_nonopq(dentry
, err
);
395 /* ---------------------------------------------------------------------- */
397 /* most free space */
398 static void au_mfs(struct dentry
*dentry
, struct dentry
*parent
)
400 struct super_block
*sb
;
401 struct au_branch
*br
;
402 struct au_wbr_mfs
*mfs
;
403 struct dentry
*h_parent
;
404 aufs_bindex_t bindex
, bbot
;
406 unsigned long long b
, bavail
;
408 /* reduce the stack usage */
411 st
= kmalloc(sizeof(*st
), GFP_NOFS
);
413 AuWarn1("failed updating mfs(%d), ignored\n", -ENOMEM
);
419 mfs
= &au_sbi(sb
)->si_wbr_mfs
;
420 MtxMustLock(&mfs
->mfs_lock
);
421 mfs
->mfs_bindex
= -EROFS
;
422 mfs
->mfsrr_bytes
= 0;
427 bindex
= au_dbtop(parent
);
428 bbot
= au_dbtaildir(parent
);
431 for (; bindex
<= bbot
; bindex
++) {
433 h_parent
= au_h_dptr(parent
, bindex
);
434 if (!h_parent
|| d_is_negative(h_parent
))
437 br
= au_sbr(sb
, bindex
);
438 if (au_br_rdonly(br
))
441 /* sb->s_root for NFS is unreliable */
442 h_path
.mnt
= au_br_mnt(br
);
443 h_path
.dentry
= h_path
.mnt
->mnt_root
;
444 err
= vfs_statfs(&h_path
, st
);
446 AuWarn1("failed statfs, b%d, %d\n", bindex
, err
);
450 /* when the available size is equal, select the lower one */
451 BUILD_BUG_ON(sizeof(b
) < sizeof(st
->f_bavail
)
452 || sizeof(b
) < sizeof(st
->f_bsize
));
453 b
= st
->f_bavail
* st
->f_bsize
;
454 br
->br_wbr
->wbr_bytes
= b
;
457 mfs
->mfs_bindex
= bindex
;
458 mfs
->mfs_jiffy
= jiffies
;
462 mfs
->mfsrr_bytes
= bavail
;
463 AuDbg("b%d\n", mfs
->mfs_bindex
);
467 static int au_wbr_create_mfs(struct dentry
*dentry
, unsigned int flags
)
470 struct dentry
*parent
;
471 struct super_block
*sb
;
472 struct au_wbr_mfs
*mfs
;
474 err
= au_wbr_create_exp(dentry
);
480 if (au_ftest_wbr(flags
, PARENT
))
481 parent
= dget_parent(dentry
);
482 mfs
= &au_sbi(sb
)->si_wbr_mfs
;
483 mutex_lock(&mfs
->mfs_lock
);
484 if (time_after(jiffies
, mfs
->mfs_jiffy
+ mfs
->mfs_expire
)
485 || mfs
->mfs_bindex
< 0
486 || au_br_rdonly(au_sbr(sb
, mfs
->mfs_bindex
)))
487 au_mfs(dentry
, parent
);
488 mutex_unlock(&mfs
->mfs_lock
);
489 err
= mfs
->mfs_bindex
;
493 err
= au_wbr_nonopq(dentry
, err
);
500 static int au_wbr_create_init_mfs(struct super_block
*sb
)
502 struct au_wbr_mfs
*mfs
;
504 mfs
= &au_sbi(sb
)->si_wbr_mfs
;
505 mutex_init(&mfs
->mfs_lock
);
507 mfs
->mfs_bindex
= -EROFS
;
512 static int au_wbr_create_fin_mfs(struct super_block
*sb __maybe_unused
)
514 mutex_destroy(&au_sbi(sb
)->si_wbr_mfs
.mfs_lock
);
518 /* ---------------------------------------------------------------------- */
520 /* top down regardless parent, and then mfs */
521 static int au_wbr_create_tdmfs(struct dentry
*dentry
,
522 unsigned int flags __maybe_unused
)
525 aufs_bindex_t bwh
, btail
, bindex
, bfound
, bmfs
;
526 unsigned long long watermark
;
527 struct super_block
*sb
;
528 struct au_wbr_mfs
*mfs
;
529 struct au_branch
*br
;
530 struct dentry
*parent
;
533 mfs
= &au_sbi(sb
)->si_wbr_mfs
;
534 mutex_lock(&mfs
->mfs_lock
);
535 if (time_after(jiffies
, mfs
->mfs_jiffy
+ mfs
->mfs_expire
)
536 || mfs
->mfs_bindex
< 0)
537 au_mfs(dentry
, /*parent*/NULL
);
538 watermark
= mfs
->mfsrr_watermark
;
539 bmfs
= mfs
->mfs_bindex
;
540 mutex_unlock(&mfs
->mfs_lock
);
542 /* another style of au_wbr_create_exp() */
543 bwh
= au_dbwh(dentry
);
544 parent
= dget_parent(dentry
);
545 btail
= au_dbtaildir(parent
);
546 if (bwh
>= 0 && bwh
< btail
)
549 err
= au_wbr_nonopq(dentry
, btail
);
550 if (unlikely(err
< 0))
554 for (bindex
= 0; bindex
<= btail
; bindex
++) {
555 br
= au_sbr(sb
, bindex
);
556 if (au_br_rdonly(br
))
558 if (br
->br_wbr
->wbr_bytes
> watermark
) {
573 /* ---------------------------------------------------------------------- */
575 /* most free space and then round robin */
576 static int au_wbr_create_mfsrr(struct dentry
*dentry
, unsigned int flags
)
579 struct au_wbr_mfs
*mfs
;
581 err
= au_wbr_create_mfs(dentry
, flags
);
583 mfs
= &au_sbi(dentry
->d_sb
)->si_wbr_mfs
;
584 mutex_lock(&mfs
->mfs_lock
);
585 if (mfs
->mfsrr_bytes
< mfs
->mfsrr_watermark
)
586 err
= au_wbr_create_rr(dentry
, flags
);
587 mutex_unlock(&mfs
->mfs_lock
);
594 static int au_wbr_create_init_mfsrr(struct super_block
*sb
)
598 au_wbr_create_init_mfs(sb
); /* ignore */
599 err
= au_wbr_create_init_rr(sb
);
604 /* ---------------------------------------------------------------------- */
606 /* top down parent and most free space */
607 static int au_wbr_create_pmfs(struct dentry
*dentry
, unsigned int flags
)
610 unsigned long long b
;
611 aufs_bindex_t bindex
, btop
, bbot
;
612 struct super_block
*sb
;
613 struct dentry
*parent
, *h_parent
;
614 struct au_branch
*br
;
616 err
= au_wbr_create_tdp(dentry
, flags
);
617 if (unlikely(err
< 0))
619 parent
= dget_parent(dentry
);
620 btop
= au_dbtop(parent
);
621 bbot
= au_dbtaildir(parent
);
623 goto out_parent
; /* success */
625 e2
= au_wbr_create_mfs(dentry
, flags
);
627 goto out_parent
; /* success */
629 /* when the available size is equal, select upper one */
631 br
= au_sbr(sb
, err
);
632 b
= br
->br_wbr
->wbr_bytes
;
633 AuDbg("b%d, %llu\n", err
, b
);
635 for (bindex
= btop
; bindex
<= bbot
; bindex
++) {
636 h_parent
= au_h_dptr(parent
, bindex
);
637 if (!h_parent
|| d_is_negative(h_parent
))
640 br
= au_sbr(sb
, bindex
);
641 if (!au_br_rdonly(br
) && br
->br_wbr
->wbr_bytes
> b
) {
642 b
= br
->br_wbr
->wbr_bytes
;
644 AuDbg("b%d, %llu\n", err
, b
);
649 err
= au_wbr_nonopq(dentry
, err
);
658 /* ---------------------------------------------------------------------- */
662 * - most free space with parent
663 * - most free space round-robin regardless parent
665 static int au_wbr_create_pmfsrr(struct dentry
*dentry
, unsigned int flags
)
668 unsigned long long watermark
;
669 struct super_block
*sb
;
670 struct au_branch
*br
;
671 struct au_wbr_mfs
*mfs
;
673 err
= au_wbr_create_pmfs(dentry
, flags
| AuWbr_PARENT
);
674 if (unlikely(err
< 0))
678 br
= au_sbr(sb
, err
);
679 mfs
= &au_sbi(sb
)->si_wbr_mfs
;
680 mutex_lock(&mfs
->mfs_lock
);
681 watermark
= mfs
->mfsrr_watermark
;
682 mutex_unlock(&mfs
->mfs_lock
);
683 if (br
->br_wbr
->wbr_bytes
< watermark
)
684 /* regardless the parent dir */
685 err
= au_wbr_create_mfsrr(dentry
, flags
);
692 /* ---------------------------------------------------------------------- */
694 /* policies for copyup */
696 /* top down parent */
697 static int au_wbr_copyup_tdp(struct dentry
*dentry
)
699 return au_wbr_create_tdp(dentry
, /*flags, anything is ok*/0);
702 /* bottom up parent */
703 static int au_wbr_copyup_bup(struct dentry
*dentry
)
706 aufs_bindex_t bindex
, btop
;
707 struct dentry
*parent
, *h_parent
;
708 struct super_block
*sb
;
712 parent
= dget_parent(dentry
);
713 btop
= au_dbtop(parent
);
714 for (bindex
= au_dbtop(dentry
); bindex
>= btop
; bindex
--) {
715 h_parent
= au_h_dptr(parent
, bindex
);
716 if (!h_parent
|| d_is_negative(h_parent
))
719 if (!au_br_rdonly(au_sbr(sb
, bindex
))) {
727 if (unlikely(err
< 0))
728 err
= au_wbr_bu(sb
, btop
- 1);
735 int au_wbr_do_copyup_bu(struct dentry
*dentry
, aufs_bindex_t btop
)
739 err
= au_wbr_bu(dentry
->d_sb
, btop
);
742 err
= au_wbr_nonopq(dentry
, err
);
748 static int au_wbr_copyup_bu(struct dentry
*dentry
)
753 btop
= au_dbtop(dentry
);
754 err
= au_wbr_do_copyup_bu(dentry
, btop
);
758 /* ---------------------------------------------------------------------- */
760 struct au_wbr_copyup_operations au_wbr_copyup_ops
[] = {
761 [AuWbrCopyup_TDP
] = {
762 .copyup
= au_wbr_copyup_tdp
764 [AuWbrCopyup_BUP
] = {
765 .copyup
= au_wbr_copyup_bup
768 .copyup
= au_wbr_copyup_bu
772 struct au_wbr_create_operations au_wbr_create_ops
[] = {
773 [AuWbrCreate_TDP
] = {
774 .create
= au_wbr_create_tdp
777 .create
= au_wbr_create_rr
,
778 .init
= au_wbr_create_init_rr
780 [AuWbrCreate_MFS
] = {
781 .create
= au_wbr_create_mfs
,
782 .init
= au_wbr_create_init_mfs
,
783 .fin
= au_wbr_create_fin_mfs
785 [AuWbrCreate_MFSV
] = {
786 .create
= au_wbr_create_mfs
,
787 .init
= au_wbr_create_init_mfs
,
788 .fin
= au_wbr_create_fin_mfs
790 [AuWbrCreate_MFSRR
] = {
791 .create
= au_wbr_create_mfsrr
,
792 .init
= au_wbr_create_init_mfsrr
,
793 .fin
= au_wbr_create_fin_mfs
795 [AuWbrCreate_MFSRRV
] = {
796 .create
= au_wbr_create_mfsrr
,
797 .init
= au_wbr_create_init_mfsrr
,
798 .fin
= au_wbr_create_fin_mfs
800 [AuWbrCreate_TDMFS
] = {
801 .create
= au_wbr_create_tdmfs
,
802 .init
= au_wbr_create_init_mfs
,
803 .fin
= au_wbr_create_fin_mfs
805 [AuWbrCreate_TDMFSV
] = {
806 .create
= au_wbr_create_tdmfs
,
807 .init
= au_wbr_create_init_mfs
,
808 .fin
= au_wbr_create_fin_mfs
810 [AuWbrCreate_PMFS
] = {
811 .create
= au_wbr_create_pmfs
,
812 .init
= au_wbr_create_init_mfs
,
813 .fin
= au_wbr_create_fin_mfs
815 [AuWbrCreate_PMFSV
] = {
816 .create
= au_wbr_create_pmfs
,
817 .init
= au_wbr_create_init_mfs
,
818 .fin
= au_wbr_create_fin_mfs
820 [AuWbrCreate_PMFSRR
] = {
821 .create
= au_wbr_create_pmfsrr
,
822 .init
= au_wbr_create_init_mfsrr
,
823 .fin
= au_wbr_create_fin_mfs
825 [AuWbrCreate_PMFSRRV
] = {
826 .create
= au_wbr_create_pmfsrr
,
827 .init
= au_wbr_create_init_mfsrr
,
828 .fin
= au_wbr_create_fin_mfs