]>
Commit | Line | Data |
---|---|---|
29b24f6c | 1 | // SPDX-License-Identifier: GPL-2.0-only |
b29e64d8 | 2 | /* |
b29e64d8 | 3 | * Copyright (C) 2018 HUAWEI, Inc. |
592e7cd0 | 4 | * https://www.huawei.com/ |
b29e64d8 | 5 | */ |
b29e64d8 | 6 | #include "internal.h" |
3883a79a | 7 | #include <linux/pagevec.h> |
b29e64d8 | 8 | |
5ddcee1f | 9 | struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp) |
b29e64d8 GX |
10 | { |
11 | struct page *page; | |
12 | ||
13 | if (!list_empty(pool)) { | |
14 | page = lru_to_page(pool); | |
b25a1519 | 15 | DBG_BUGON(page_ref_count(page) != 1); |
b29e64d8 GX |
16 | list_del(&page->lru); |
17 | } else { | |
5ddcee1f | 18 | page = alloc_page(gfp); |
b29e64d8 GX |
19 | } |
20 | return page; | |
21 | } | |
22 | ||
22fe04a7 | 23 | #ifdef CONFIG_EROFS_FS_ZIP |
e7e9a307 GX |
24 | /* global shrink count (for all mounted EROFS instances) */ |
25 | static atomic_long_t erofs_global_shrink_cnt; | |
26 | ||
4501ca36 | 27 | static int erofs_workgroup_get(struct erofs_workgroup *grp) |
d60eff43 GX |
28 | { |
29 | int o; | |
30 | ||
31 | repeat: | |
32 | o = erofs_wait_on_workgroup_freezed(grp); | |
8d8a09b0 | 33 | if (o <= 0) |
d60eff43 GX |
34 | return -1; |
35 | ||
8d8a09b0 | 36 | if (atomic_cmpxchg(&grp->refcount, o, o + 1) != o) |
d60eff43 GX |
37 | goto repeat; |
38 | ||
4501ca36 | 39 | /* decrease refcount paired by erofs_workgroup_put */ |
8d8a09b0 | 40 | if (o == 1) |
4501ca36 | 41 | atomic_long_dec(&erofs_global_shrink_cnt); |
d60eff43 GX |
42 | return 0; |
43 | } | |
e7e9a307 | 44 | |
4501ca36 | 45 | struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb, |
997626d8 | 46 | pgoff_t index) |
e7e9a307 GX |
47 | { |
48 | struct erofs_sb_info *sbi = EROFS_SB(sb); | |
49 | struct erofs_workgroup *grp; | |
e7e9a307 GX |
50 | |
51 | repeat: | |
52 | rcu_read_lock(); | |
64094a04 | 53 | grp = xa_load(&sbi->managed_pslots, index); |
561fb35a | 54 | if (grp) { |
4501ca36 | 55 | if (erofs_workgroup_get(grp)) { |
e7e9a307 GX |
56 | /* prefer to relax rcu read side */ |
57 | rcu_read_unlock(); | |
58 | goto repeat; | |
59 | } | |
60 | ||
b8e076a6 | 61 | DBG_BUGON(index != grp->index); |
e7e9a307 GX |
62 | } |
63 | rcu_read_unlock(); | |
64 | return grp; | |
65 | } | |
66 | ||
64094a04 GX |
67 | struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb, |
68 | struct erofs_workgroup *grp) | |
e7e9a307 | 69 | { |
64094a04 GX |
70 | struct erofs_sb_info *const sbi = EROFS_SB(sb); |
71 | struct erofs_workgroup *pre; | |
e7e9a307 | 72 | |
51232df5 | 73 | /* |
64094a04 GX |
74 | * Bump up a reference count before making this visible |
75 | * to others for the XArray in order to avoid potential | |
76 | * UAF without serialized by xa_lock. | |
51232df5 | 77 | */ |
64094a04 GX |
78 | atomic_inc(&grp->refcount); |
79 | ||
80 | repeat: | |
81 | xa_lock(&sbi->managed_pslots); | |
82 | pre = __xa_cmpxchg(&sbi->managed_pslots, grp->index, | |
83 | NULL, grp, GFP_NOFS); | |
84 | if (pre) { | |
85 | if (xa_is_err(pre)) { | |
86 | pre = ERR_PTR(xa_err(pre)); | |
87 | } else if (erofs_workgroup_get(pre)) { | |
88 | /* try to legitimize the current in-tree one */ | |
89 | xa_unlock(&sbi->managed_pslots); | |
90 | cond_resched(); | |
91 | goto repeat; | |
92 | } | |
93 | atomic_dec(&grp->refcount); | |
94 | grp = pre; | |
95 | } | |
96 | xa_unlock(&sbi->managed_pslots); | |
97 | return grp; | |
e7e9a307 GX |
98 | } |
99 | ||
51232df5 GX |
100 | static void __erofs_workgroup_free(struct erofs_workgroup *grp) |
101 | { | |
102 | atomic_long_dec(&erofs_global_shrink_cnt); | |
103 | erofs_workgroup_free_rcu(grp); | |
104 | } | |
105 | ||
3883a79a GX |
106 | int erofs_workgroup_put(struct erofs_workgroup *grp) |
107 | { | |
108 | int count = atomic_dec_return(&grp->refcount); | |
109 | ||
110 | if (count == 1) | |
111 | atomic_long_inc(&erofs_global_shrink_cnt); | |
51232df5 GX |
112 | else if (!count) |
113 | __erofs_workgroup_free(grp); | |
3883a79a GX |
114 | return count; |
115 | } | |
116 | ||
0a64d62d | 117 | static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, |
bda17a45 | 118 | struct erofs_workgroup *grp) |
51232df5 GX |
119 | { |
120 | /* | |
2bb90cc2 GX |
121 | * If managed cache is on, refcount of workgroups |
122 | * themselves could be < 0 (freezed). In other words, | |
123 | * there is no guarantee that all refcounts > 0. | |
51232df5 GX |
124 | */ |
125 | if (!erofs_workgroup_try_to_freeze(grp, 1)) | |
126 | return false; | |
127 | ||
128 | /* | |
2bb90cc2 | 129 | * Note that all cached pages should be unattached |
64094a04 | 130 | * before deleted from the XArray. Otherwise some |
2bb90cc2 GX |
131 | * cached pages could be still attached to the orphan |
132 | * old workgroup when the new one is available in the tree. | |
51232df5 GX |
133 | */ |
134 | if (erofs_try_to_free_all_cached_pages(sbi, grp)) { | |
135 | erofs_workgroup_unfreeze(grp, 1); | |
136 | return false; | |
137 | } | |
138 | ||
139 | /* | |
2bb90cc2 | 140 | * It's impossible to fail after the workgroup is freezed, |
51232df5 GX |
141 | * however in order to avoid some race conditions, add a |
142 | * DBG_BUGON to observe this in advance. | |
143 | */ | |
936e46c9 | 144 | DBG_BUGON(__xa_erase(&sbi->managed_pslots, grp->index) != grp); |
51232df5 | 145 | |
ee4bf86c GX |
146 | /* last refcount should be connected with its managed pslot. */ |
147 | erofs_workgroup_unfreeze(grp, 0); | |
148 | __erofs_workgroup_free(grp); | |
51232df5 GX |
149 | return true; |
150 | } | |
151 | ||
22fe04a7 | 152 | static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi, |
bda17a45 | 153 | unsigned long nr_shrink) |
e7e9a307 | 154 | { |
64094a04 | 155 | struct erofs_workgroup *grp; |
7dd68b14 | 156 | unsigned int freed = 0; |
64094a04 | 157 | unsigned long index; |
3883a79a | 158 | |
936e46c9 | 159 | xa_lock(&sbi->managed_pslots); |
64094a04 | 160 | xa_for_each(&sbi->managed_pslots, index, grp) { |
51232df5 | 161 | /* try to shrink each valid workgroup */ |
bda17a45 | 162 | if (!erofs_try_to_release_workgroup(sbi, grp)) |
3883a79a | 163 | continue; |
936e46c9 | 164 | xa_unlock(&sbi->managed_pslots); |
3883a79a GX |
165 | |
166 | ++freed; | |
8d8a09b0 | 167 | if (!--nr_shrink) |
936e46c9 HJ |
168 | return freed; |
169 | xa_lock(&sbi->managed_pslots); | |
3883a79a | 170 | } |
936e46c9 | 171 | xa_unlock(&sbi->managed_pslots); |
3883a79a | 172 | return freed; |
e7e9a307 GX |
173 | } |
174 | ||
a1581312 GX |
175 | /* protected by 'erofs_sb_list_lock' */ |
176 | static unsigned int shrinker_run_no; | |
177 | ||
178 | /* protects the mounted 'erofs_sb_list' */ | |
179 | static DEFINE_SPINLOCK(erofs_sb_list_lock); | |
2497ee41 GX |
180 | static LIST_HEAD(erofs_sb_list); |
181 | ||
22fe04a7 | 182 | void erofs_shrinker_register(struct super_block *sb) |
2497ee41 | 183 | { |
a1581312 GX |
184 | struct erofs_sb_info *sbi = EROFS_SB(sb); |
185 | ||
186 | mutex_init(&sbi->umount_mutex); | |
187 | ||
188 | spin_lock(&erofs_sb_list_lock); | |
189 | list_add(&sbi->list, &erofs_sb_list); | |
190 | spin_unlock(&erofs_sb_list_lock); | |
2497ee41 GX |
191 | } |
192 | ||
22fe04a7 | 193 | void erofs_shrinker_unregister(struct super_block *sb) |
2497ee41 | 194 | { |
22fe04a7 GX |
195 | struct erofs_sb_info *const sbi = EROFS_SB(sb); |
196 | ||
197 | mutex_lock(&sbi->umount_mutex); | |
bda17a45 GX |
198 | /* clean up all remaining workgroups in memory */ |
199 | erofs_shrink_workstation(sbi, ~0UL); | |
22fe04a7 | 200 | |
a1581312 | 201 | spin_lock(&erofs_sb_list_lock); |
22fe04a7 | 202 | list_del(&sbi->list); |
a1581312 | 203 | spin_unlock(&erofs_sb_list_lock); |
22fe04a7 | 204 | mutex_unlock(&sbi->umount_mutex); |
a1581312 GX |
205 | } |
206 | ||
d55bc7ba GX |
207 | static unsigned long erofs_shrink_count(struct shrinker *shrink, |
208 | struct shrink_control *sc) | |
a1581312 GX |
209 | { |
210 | return atomic_long_read(&erofs_global_shrink_cnt); | |
211 | } | |
212 | ||
d55bc7ba GX |
213 | static unsigned long erofs_shrink_scan(struct shrinker *shrink, |
214 | struct shrink_control *sc) | |
a1581312 GX |
215 | { |
216 | struct erofs_sb_info *sbi; | |
217 | struct list_head *p; | |
218 | ||
219 | unsigned long nr = sc->nr_to_scan; | |
220 | unsigned int run_no; | |
221 | unsigned long freed = 0; | |
222 | ||
223 | spin_lock(&erofs_sb_list_lock); | |
2bb90cc2 | 224 | do { |
a1581312 | 225 | run_no = ++shrinker_run_no; |
2bb90cc2 | 226 | } while (run_no == 0); |
a1581312 GX |
227 | |
228 | /* Iterate over all mounted superblocks and try to shrink them */ | |
229 | p = erofs_sb_list.next; | |
230 | while (p != &erofs_sb_list) { | |
231 | sbi = list_entry(p, struct erofs_sb_info, list); | |
232 | ||
233 | /* | |
234 | * We move the ones we do to the end of the list, so we stop | |
235 | * when we see one we have already done. | |
236 | */ | |
237 | if (sbi->shrinker_run_no == run_no) | |
238 | break; | |
239 | ||
240 | if (!mutex_trylock(&sbi->umount_mutex)) { | |
241 | p = p->next; | |
242 | continue; | |
243 | } | |
244 | ||
245 | spin_unlock(&erofs_sb_list_lock); | |
246 | sbi->shrinker_run_no = run_no; | |
247 | ||
9d5a09c6 | 248 | freed += erofs_shrink_workstation(sbi, nr - freed); |
a1581312 GX |
249 | |
250 | spin_lock(&erofs_sb_list_lock); | |
251 | /* Get the next list element before we move this one */ | |
252 | p = p->next; | |
253 | ||
254 | /* | |
255 | * Move this one to the end of the list to provide some | |
256 | * fairness. | |
257 | */ | |
258 | list_move_tail(&sbi->list, &erofs_sb_list); | |
259 | mutex_unlock(&sbi->umount_mutex); | |
260 | ||
261 | if (freed >= nr) | |
262 | break; | |
263 | } | |
264 | spin_unlock(&erofs_sb_list_lock); | |
265 | return freed; | |
2497ee41 GX |
266 | } |
267 | ||
22fe04a7 | 268 | static struct shrinker erofs_shrinker_info = { |
d55bc7ba GX |
269 | .scan_objects = erofs_shrink_scan, |
270 | .count_objects = erofs_shrink_count, | |
271 | .seeks = DEFAULT_SEEKS, | |
272 | }; | |
273 | ||
22fe04a7 GX |
274 | int __init erofs_init_shrinker(void) |
275 | { | |
276 | return register_shrinker(&erofs_shrinker_info); | |
277 | } | |
278 | ||
279 | void erofs_exit_shrinker(void) | |
280 | { | |
281 | unregister_shrinker(&erofs_shrinker_info); | |
282 | } | |
283 | #endif /* !CONFIG_EROFS_FS_ZIP */ |