]>
Commit | Line | Data |
---|---|---|
29b24f6c | 1 | // SPDX-License-Identifier: GPL-2.0-only |
b29e64d8 | 2 | /* |
b29e64d8 | 3 | * Copyright (C) 2018 HUAWEI, Inc. |
592e7cd0 | 4 | * https://www.huawei.com/ |
b29e64d8 | 5 | * Created by Gao Xiang <gaoxiang25@huawei.com> |
b29e64d8 | 6 | */ |
b29e64d8 | 7 | #include "internal.h" |
3883a79a | 8 | #include <linux/pagevec.h> |
b29e64d8 | 9 | |
5ddcee1f | 10 | struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp) |
b29e64d8 GX |
11 | { |
12 | struct page *page; | |
13 | ||
14 | if (!list_empty(pool)) { | |
15 | page = lru_to_page(pool); | |
b25a1519 | 16 | DBG_BUGON(page_ref_count(page) != 1); |
b29e64d8 GX |
17 | list_del(&page->lru); |
18 | } else { | |
5ddcee1f | 19 | page = alloc_page(gfp); |
b29e64d8 GX |
20 | } |
21 | return page; | |
22 | } | |
23 | ||
fa61a33f GX |
24 | #if (EROFS_PCPUBUF_NR_PAGES > 0) |
25 | static struct { | |
26 | u8 data[PAGE_SIZE * EROFS_PCPUBUF_NR_PAGES]; | |
27 | } ____cacheline_aligned_in_smp erofs_pcpubuf[NR_CPUS]; | |
28 | ||
29 | void *erofs_get_pcpubuf(unsigned int pagenr) | |
30 | { | |
31 | preempt_disable(); | |
32 | return &erofs_pcpubuf[smp_processor_id()].data[pagenr * PAGE_SIZE]; | |
33 | } | |
34 | #endif | |
35 | ||
22fe04a7 | 36 | #ifdef CONFIG_EROFS_FS_ZIP |
e7e9a307 GX |
37 | /* global shrink count (for all mounted EROFS instances) */ |
38 | static atomic_long_t erofs_global_shrink_cnt; | |
39 | ||
4501ca36 | 40 | static int erofs_workgroup_get(struct erofs_workgroup *grp) |
d60eff43 GX |
41 | { |
42 | int o; | |
43 | ||
44 | repeat: | |
45 | o = erofs_wait_on_workgroup_freezed(grp); | |
8d8a09b0 | 46 | if (o <= 0) |
d60eff43 GX |
47 | return -1; |
48 | ||
8d8a09b0 | 49 | if (atomic_cmpxchg(&grp->refcount, o, o + 1) != o) |
d60eff43 GX |
50 | goto repeat; |
51 | ||
4501ca36 | 52 | /* decrease refcount paired by erofs_workgroup_put */ |
8d8a09b0 | 53 | if (o == 1) |
4501ca36 | 54 | atomic_long_dec(&erofs_global_shrink_cnt); |
d60eff43 GX |
55 | return 0; |
56 | } | |
e7e9a307 | 57 | |
4501ca36 | 58 | struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb, |
997626d8 | 59 | pgoff_t index) |
e7e9a307 GX |
60 | { |
61 | struct erofs_sb_info *sbi = EROFS_SB(sb); | |
62 | struct erofs_workgroup *grp; | |
e7e9a307 GX |
63 | |
64 | repeat: | |
65 | rcu_read_lock(); | |
64094a04 | 66 | grp = xa_load(&sbi->managed_pslots, index); |
561fb35a | 67 | if (grp) { |
4501ca36 | 68 | if (erofs_workgroup_get(grp)) { |
e7e9a307 GX |
69 | /* prefer to relax rcu read side */ |
70 | rcu_read_unlock(); | |
71 | goto repeat; | |
72 | } | |
73 | ||
b8e076a6 | 74 | DBG_BUGON(index != grp->index); |
e7e9a307 GX |
75 | } |
76 | rcu_read_unlock(); | |
77 | return grp; | |
78 | } | |
79 | ||
64094a04 GX |
80 | struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb, |
81 | struct erofs_workgroup *grp) | |
e7e9a307 | 82 | { |
64094a04 GX |
83 | struct erofs_sb_info *const sbi = EROFS_SB(sb); |
84 | struct erofs_workgroup *pre; | |
e7e9a307 | 85 | |
51232df5 | 86 | /* |
64094a04 GX |
87 | * Bump up a reference count before making this visible |
88 | * to others for the XArray in order to avoid potential | |
89 | * UAF without serialized by xa_lock. | |
51232df5 | 90 | */ |
64094a04 GX |
91 | atomic_inc(&grp->refcount); |
92 | ||
93 | repeat: | |
94 | xa_lock(&sbi->managed_pslots); | |
95 | pre = __xa_cmpxchg(&sbi->managed_pslots, grp->index, | |
96 | NULL, grp, GFP_NOFS); | |
97 | if (pre) { | |
98 | if (xa_is_err(pre)) { | |
99 | pre = ERR_PTR(xa_err(pre)); | |
100 | } else if (erofs_workgroup_get(pre)) { | |
101 | /* try to legitimize the current in-tree one */ | |
102 | xa_unlock(&sbi->managed_pslots); | |
103 | cond_resched(); | |
104 | goto repeat; | |
105 | } | |
106 | atomic_dec(&grp->refcount); | |
107 | grp = pre; | |
108 | } | |
109 | xa_unlock(&sbi->managed_pslots); | |
110 | return grp; | |
e7e9a307 GX |
111 | } |
112 | ||
51232df5 GX |
113 | static void __erofs_workgroup_free(struct erofs_workgroup *grp) |
114 | { | |
115 | atomic_long_dec(&erofs_global_shrink_cnt); | |
116 | erofs_workgroup_free_rcu(grp); | |
117 | } | |
118 | ||
3883a79a GX |
119 | int erofs_workgroup_put(struct erofs_workgroup *grp) |
120 | { | |
121 | int count = atomic_dec_return(&grp->refcount); | |
122 | ||
123 | if (count == 1) | |
124 | atomic_long_inc(&erofs_global_shrink_cnt); | |
51232df5 GX |
125 | else if (!count) |
126 | __erofs_workgroup_free(grp); | |
3883a79a GX |
127 | return count; |
128 | } | |
129 | ||
0a64d62d | 130 | static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, |
bda17a45 | 131 | struct erofs_workgroup *grp) |
51232df5 GX |
132 | { |
133 | /* | |
2bb90cc2 GX |
134 | * If managed cache is on, refcount of workgroups |
135 | * themselves could be < 0 (freezed). In other words, | |
136 | * there is no guarantee that all refcounts > 0. | |
51232df5 GX |
137 | */ |
138 | if (!erofs_workgroup_try_to_freeze(grp, 1)) | |
139 | return false; | |
140 | ||
141 | /* | |
2bb90cc2 | 142 | * Note that all cached pages should be unattached |
64094a04 | 143 | * before deleted from the XArray. Otherwise some |
2bb90cc2 GX |
144 | * cached pages could be still attached to the orphan |
145 | * old workgroup when the new one is available in the tree. | |
51232df5 GX |
146 | */ |
147 | if (erofs_try_to_free_all_cached_pages(sbi, grp)) { | |
148 | erofs_workgroup_unfreeze(grp, 1); | |
149 | return false; | |
150 | } | |
151 | ||
152 | /* | |
2bb90cc2 | 153 | * It's impossible to fail after the workgroup is freezed, |
51232df5 GX |
154 | * however in order to avoid some race conditions, add a |
155 | * DBG_BUGON to observe this in advance. | |
156 | */ | |
64094a04 | 157 | DBG_BUGON(xa_erase(&sbi->managed_pslots, grp->index) != grp); |
51232df5 | 158 | |
ee4bf86c GX |
159 | /* last refcount should be connected with its managed pslot. */ |
160 | erofs_workgroup_unfreeze(grp, 0); | |
161 | __erofs_workgroup_free(grp); | |
51232df5 GX |
162 | return true; |
163 | } | |
164 | ||
22fe04a7 | 165 | static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi, |
bda17a45 | 166 | unsigned long nr_shrink) |
e7e9a307 | 167 | { |
64094a04 | 168 | struct erofs_workgroup *grp; |
7dd68b14 | 169 | unsigned int freed = 0; |
64094a04 | 170 | unsigned long index; |
3883a79a | 171 | |
64094a04 | 172 | xa_for_each(&sbi->managed_pslots, index, grp) { |
51232df5 | 173 | /* try to shrink each valid workgroup */ |
bda17a45 | 174 | if (!erofs_try_to_release_workgroup(sbi, grp)) |
3883a79a | 175 | continue; |
3883a79a GX |
176 | |
177 | ++freed; | |
8d8a09b0 | 178 | if (!--nr_shrink) |
3883a79a GX |
179 | break; |
180 | } | |
3883a79a | 181 | return freed; |
e7e9a307 GX |
182 | } |
183 | ||
a1581312 GX |
184 | /* protected by 'erofs_sb_list_lock' */ |
185 | static unsigned int shrinker_run_no; | |
186 | ||
187 | /* protects the mounted 'erofs_sb_list' */ | |
188 | static DEFINE_SPINLOCK(erofs_sb_list_lock); | |
2497ee41 GX |
189 | static LIST_HEAD(erofs_sb_list); |
190 | ||
22fe04a7 | 191 | void erofs_shrinker_register(struct super_block *sb) |
2497ee41 | 192 | { |
a1581312 GX |
193 | struct erofs_sb_info *sbi = EROFS_SB(sb); |
194 | ||
195 | mutex_init(&sbi->umount_mutex); | |
196 | ||
197 | spin_lock(&erofs_sb_list_lock); | |
198 | list_add(&sbi->list, &erofs_sb_list); | |
199 | spin_unlock(&erofs_sb_list_lock); | |
2497ee41 GX |
200 | } |
201 | ||
22fe04a7 | 202 | void erofs_shrinker_unregister(struct super_block *sb) |
2497ee41 | 203 | { |
22fe04a7 GX |
204 | struct erofs_sb_info *const sbi = EROFS_SB(sb); |
205 | ||
206 | mutex_lock(&sbi->umount_mutex); | |
bda17a45 GX |
207 | /* clean up all remaining workgroups in memory */ |
208 | erofs_shrink_workstation(sbi, ~0UL); | |
22fe04a7 | 209 | |
a1581312 | 210 | spin_lock(&erofs_sb_list_lock); |
22fe04a7 | 211 | list_del(&sbi->list); |
a1581312 | 212 | spin_unlock(&erofs_sb_list_lock); |
22fe04a7 | 213 | mutex_unlock(&sbi->umount_mutex); |
a1581312 GX |
214 | } |
215 | ||
d55bc7ba GX |
216 | static unsigned long erofs_shrink_count(struct shrinker *shrink, |
217 | struct shrink_control *sc) | |
a1581312 GX |
218 | { |
219 | return atomic_long_read(&erofs_global_shrink_cnt); | |
220 | } | |
221 | ||
d55bc7ba GX |
222 | static unsigned long erofs_shrink_scan(struct shrinker *shrink, |
223 | struct shrink_control *sc) | |
a1581312 GX |
224 | { |
225 | struct erofs_sb_info *sbi; | |
226 | struct list_head *p; | |
227 | ||
228 | unsigned long nr = sc->nr_to_scan; | |
229 | unsigned int run_no; | |
230 | unsigned long freed = 0; | |
231 | ||
232 | spin_lock(&erofs_sb_list_lock); | |
2bb90cc2 | 233 | do { |
a1581312 | 234 | run_no = ++shrinker_run_no; |
2bb90cc2 | 235 | } while (run_no == 0); |
a1581312 GX |
236 | |
237 | /* Iterate over all mounted superblocks and try to shrink them */ | |
238 | p = erofs_sb_list.next; | |
239 | while (p != &erofs_sb_list) { | |
240 | sbi = list_entry(p, struct erofs_sb_info, list); | |
241 | ||
242 | /* | |
243 | * We move the ones we do to the end of the list, so we stop | |
244 | * when we see one we have already done. | |
245 | */ | |
246 | if (sbi->shrinker_run_no == run_no) | |
247 | break; | |
248 | ||
249 | if (!mutex_trylock(&sbi->umount_mutex)) { | |
250 | p = p->next; | |
251 | continue; | |
252 | } | |
253 | ||
254 | spin_unlock(&erofs_sb_list_lock); | |
255 | sbi->shrinker_run_no = run_no; | |
256 | ||
9d5a09c6 | 257 | freed += erofs_shrink_workstation(sbi, nr - freed); |
a1581312 GX |
258 | |
259 | spin_lock(&erofs_sb_list_lock); | |
260 | /* Get the next list element before we move this one */ | |
261 | p = p->next; | |
262 | ||
263 | /* | |
264 | * Move this one to the end of the list to provide some | |
265 | * fairness. | |
266 | */ | |
267 | list_move_tail(&sbi->list, &erofs_sb_list); | |
268 | mutex_unlock(&sbi->umount_mutex); | |
269 | ||
270 | if (freed >= nr) | |
271 | break; | |
272 | } | |
273 | spin_unlock(&erofs_sb_list_lock); | |
274 | return freed; | |
2497ee41 GX |
275 | } |
276 | ||
22fe04a7 | 277 | static struct shrinker erofs_shrinker_info = { |
d55bc7ba GX |
278 | .scan_objects = erofs_shrink_scan, |
279 | .count_objects = erofs_shrink_count, | |
280 | .seeks = DEFAULT_SEEKS, | |
281 | }; | |
282 | ||
22fe04a7 GX |
283 | int __init erofs_init_shrinker(void) |
284 | { | |
285 | return register_shrinker(&erofs_shrinker_info); | |
286 | } | |
287 | ||
288 | void erofs_exit_shrinker(void) | |
289 | { | |
290 | unregister_shrinker(&erofs_shrinker_info); | |
291 | } | |
292 | #endif /* !CONFIG_EROFS_FS_ZIP */ | |
293 |