]>
Commit | Line | Data |
---|---|---|
b0643e59 DZ |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | ||
3 | #include <linux/jiffies.h> | |
4 | #include <linux/kernel.h> | |
5 | #include <linux/ktime.h> | |
6 | #include <linux/list.h> | |
e93591bb | 7 | #include <linux/math64.h> |
b0643e59 DZ |
8 | #include <linux/sizes.h> |
9 | #include <linux/workqueue.h> | |
10 | #include "ctree.h" | |
11 | #include "block-group.h" | |
12 | #include "discard.h" | |
13 | #include "free-space-cache.h" | |
14 | ||
15 | /* This is an initial delay to give some chance for block reuse */ | |
16 | #define BTRFS_DISCARD_DELAY (120ULL * NSEC_PER_SEC) | |
6e80d4f8 | 17 | #define BTRFS_DISCARD_UNUSED_DELAY (10ULL * NSEC_PER_SEC) |
b0643e59 | 18 | |
a2309300 DZ |
19 | /* Target completion latency of discarding all discardable extents */ |
20 | #define BTRFS_DISCARD_TARGET_MSEC (6 * 60 * 60UL * MSEC_PER_SEC) | |
21 | #define BTRFS_DISCARD_MIN_DELAY_MSEC (1UL) | |
22 | #define BTRFS_DISCARD_MAX_DELAY_MSEC (1000UL) | |
23 | #define BTRFS_DISCARD_MAX_IOPS (10U) | |
24 | ||
7fe6d45e DZ |
25 | /* Montonically decreasing minimum length filters after index 0 */ |
26 | static int discard_minlen[BTRFS_NR_DISCARD_LISTS] = { | |
27 | 0, | |
28 | BTRFS_ASYNC_DISCARD_MAX_FILTER, | |
29 | BTRFS_ASYNC_DISCARD_MIN_FILTER | |
30 | }; | |
31 | ||
b0643e59 DZ |
32 | static struct list_head *get_discard_list(struct btrfs_discard_ctl *discard_ctl, |
33 | struct btrfs_block_group *block_group) | |
34 | { | |
35 | return &discard_ctl->discard_list[block_group->discard_index]; | |
36 | } | |
37 | ||
2bee7eb8 DZ |
38 | static void __add_to_discard_list(struct btrfs_discard_ctl *discard_ctl, |
39 | struct btrfs_block_group *block_group) | |
b0643e59 | 40 | { |
2bee7eb8 | 41 | if (!btrfs_run_discard_work(discard_ctl)) |
b0643e59 | 42 | return; |
b0643e59 | 43 | |
6e80d4f8 DZ |
44 | if (list_empty(&block_group->discard_list) || |
45 | block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED) { | |
46 | if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED) | |
47 | block_group->discard_index = BTRFS_DISCARD_INDEX_START; | |
b0643e59 DZ |
48 | block_group->discard_eligible_time = (ktime_get_ns() + |
49 | BTRFS_DISCARD_DELAY); | |
2bee7eb8 | 50 | block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR; |
6e80d4f8 | 51 | } |
b0643e59 DZ |
52 | |
53 | list_move_tail(&block_group->discard_list, | |
54 | get_discard_list(discard_ctl, block_group)); | |
2bee7eb8 | 55 | } |
b0643e59 | 56 | |
2bee7eb8 DZ |
57 | static void add_to_discard_list(struct btrfs_discard_ctl *discard_ctl, |
58 | struct btrfs_block_group *block_group) | |
59 | { | |
5cb0724e DZ |
60 | if (!btrfs_is_block_group_data_only(block_group)) |
61 | return; | |
62 | ||
2bee7eb8 DZ |
63 | spin_lock(&discard_ctl->lock); |
64 | __add_to_discard_list(discard_ctl, block_group); | |
b0643e59 DZ |
65 | spin_unlock(&discard_ctl->lock); |
66 | } | |
67 | ||
6e80d4f8 DZ |
68 | static void add_to_discard_unused_list(struct btrfs_discard_ctl *discard_ctl, |
69 | struct btrfs_block_group *block_group) | |
70 | { | |
71 | spin_lock(&discard_ctl->lock); | |
72 | ||
73 | if (!btrfs_run_discard_work(discard_ctl)) { | |
74 | spin_unlock(&discard_ctl->lock); | |
75 | return; | |
76 | } | |
77 | ||
78 | list_del_init(&block_group->discard_list); | |
79 | ||
80 | block_group->discard_index = BTRFS_DISCARD_INDEX_UNUSED; | |
81 | block_group->discard_eligible_time = (ktime_get_ns() + | |
82 | BTRFS_DISCARD_UNUSED_DELAY); | |
2bee7eb8 | 83 | block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR; |
6e80d4f8 DZ |
84 | list_add_tail(&block_group->discard_list, |
85 | &discard_ctl->discard_list[BTRFS_DISCARD_INDEX_UNUSED]); | |
86 | ||
87 | spin_unlock(&discard_ctl->lock); | |
88 | } | |
89 | ||
b0643e59 DZ |
90 | static bool remove_from_discard_list(struct btrfs_discard_ctl *discard_ctl, |
91 | struct btrfs_block_group *block_group) | |
92 | { | |
93 | bool running = false; | |
94 | ||
95 | spin_lock(&discard_ctl->lock); | |
96 | ||
97 | if (block_group == discard_ctl->block_group) { | |
98 | running = true; | |
99 | discard_ctl->block_group = NULL; | |
100 | } | |
101 | ||
102 | block_group->discard_eligible_time = 0; | |
103 | list_del_init(&block_group->discard_list); | |
104 | ||
105 | spin_unlock(&discard_ctl->lock); | |
106 | ||
107 | return running; | |
108 | } | |
109 | ||
110 | /** | |
111 | * find_next_block_group - find block_group that's up next for discarding | |
112 | * @discard_ctl: discard control | |
113 | * @now: current time | |
114 | * | |
115 | * Iterate over the discard lists to find the next block_group up for | |
116 | * discarding checking the discard_eligible_time of block_group. | |
117 | */ | |
118 | static struct btrfs_block_group *find_next_block_group( | |
119 | struct btrfs_discard_ctl *discard_ctl, | |
120 | u64 now) | |
121 | { | |
122 | struct btrfs_block_group *ret_block_group = NULL, *block_group; | |
123 | int i; | |
124 | ||
125 | for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) { | |
126 | struct list_head *discard_list = &discard_ctl->discard_list[i]; | |
127 | ||
128 | if (!list_empty(discard_list)) { | |
129 | block_group = list_first_entry(discard_list, | |
130 | struct btrfs_block_group, | |
131 | discard_list); | |
132 | ||
133 | if (!ret_block_group) | |
134 | ret_block_group = block_group; | |
135 | ||
136 | if (ret_block_group->discard_eligible_time < now) | |
137 | break; | |
138 | ||
139 | if (ret_block_group->discard_eligible_time > | |
140 | block_group->discard_eligible_time) | |
141 | ret_block_group = block_group; | |
142 | } | |
143 | } | |
144 | ||
145 | return ret_block_group; | |
146 | } | |
147 | ||
148 | /** | |
149 | * peek_discard_list - wrap find_next_block_group() | |
150 | * @discard_ctl: discard control | |
2bee7eb8 | 151 | * @discard_state: the discard_state of the block_group after state management |
7fe6d45e | 152 | * @discard_index: the discard_index of the block_group after state management |
b0643e59 DZ |
153 | * |
154 | * This wraps find_next_block_group() and sets the block_group to be in use. | |
2bee7eb8 | 155 | * discard_state's control flow is managed here. Variables related to |
7fe6d45e DZ |
156 | * discard_state are reset here as needed (eg discard_cursor). @discard_state |
157 | * and @discard_index are remembered as it may change while we're discarding, | |
158 | * but we want the discard to execute in the context determined here. | |
b0643e59 DZ |
159 | */ |
160 | static struct btrfs_block_group *peek_discard_list( | |
2bee7eb8 | 161 | struct btrfs_discard_ctl *discard_ctl, |
7fe6d45e DZ |
162 | enum btrfs_discard_state *discard_state, |
163 | int *discard_index) | |
b0643e59 DZ |
164 | { |
165 | struct btrfs_block_group *block_group; | |
166 | const u64 now = ktime_get_ns(); | |
167 | ||
168 | spin_lock(&discard_ctl->lock); | |
2bee7eb8 | 169 | again: |
b0643e59 DZ |
170 | block_group = find_next_block_group(discard_ctl, now); |
171 | ||
2bee7eb8 DZ |
172 | if (block_group && now > block_group->discard_eligible_time) { |
173 | if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED && | |
174 | block_group->used != 0) { | |
5cb0724e DZ |
175 | if (btrfs_is_block_group_data_only(block_group)) |
176 | __add_to_discard_list(discard_ctl, block_group); | |
177 | else | |
178 | list_del_init(&block_group->discard_list); | |
2bee7eb8 DZ |
179 | goto again; |
180 | } | |
181 | if (block_group->discard_state == BTRFS_DISCARD_RESET_CURSOR) { | |
182 | block_group->discard_cursor = block_group->start; | |
183 | block_group->discard_state = BTRFS_DISCARD_EXTENTS; | |
184 | } | |
185 | discard_ctl->block_group = block_group; | |
186 | *discard_state = block_group->discard_state; | |
7fe6d45e | 187 | *discard_index = block_group->discard_index; |
2bee7eb8 | 188 | } else { |
b0643e59 | 189 | block_group = NULL; |
2bee7eb8 | 190 | } |
b0643e59 DZ |
191 | |
192 | spin_unlock(&discard_ctl->lock); | |
193 | ||
194 | return block_group; | |
195 | } | |
196 | ||
7fe6d45e DZ |
197 | /** |
198 | * btrfs_discard_check_filter - updates a block groups filters | |
199 | * @block_group: block group of interest | |
200 | * @bytes: recently freed region size after coalescing | |
201 | * | |
202 | * Async discard maintains multiple lists with progressively smaller filters | |
203 | * to prioritize discarding based on size. Should a free space that matches | |
204 | * a larger filter be returned to the free_space_cache, prioritize that discard | |
205 | * by moving @block_group to the proper filter. | |
206 | */ | |
207 | void btrfs_discard_check_filter(struct btrfs_block_group *block_group, | |
208 | u64 bytes) | |
209 | { | |
210 | struct btrfs_discard_ctl *discard_ctl; | |
211 | ||
212 | if (!block_group || | |
213 | !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC)) | |
214 | return; | |
215 | ||
216 | discard_ctl = &block_group->fs_info->discard_ctl; | |
217 | ||
218 | if (block_group->discard_index > BTRFS_DISCARD_INDEX_START && | |
219 | bytes >= discard_minlen[block_group->discard_index - 1]) { | |
220 | int i; | |
221 | ||
222 | remove_from_discard_list(discard_ctl, block_group); | |
223 | ||
224 | for (i = BTRFS_DISCARD_INDEX_START; i < BTRFS_NR_DISCARD_LISTS; | |
225 | i++) { | |
226 | if (bytes >= discard_minlen[i]) { | |
227 | block_group->discard_index = i; | |
228 | add_to_discard_list(discard_ctl, block_group); | |
229 | break; | |
230 | } | |
231 | } | |
232 | } | |
233 | } | |
234 | ||
235 | /** | |
236 | * btrfs_update_discard_index - moves a block group along the discard lists | |
237 | * @discard_ctl: discard control | |
238 | * @block_group: block_group of interest | |
239 | * | |
240 | * Increment @block_group's discard_index. If it falls of the list, let it be. | |
241 | * Otherwise add it back to the appropriate list. | |
242 | */ | |
243 | static void btrfs_update_discard_index(struct btrfs_discard_ctl *discard_ctl, | |
244 | struct btrfs_block_group *block_group) | |
245 | { | |
246 | block_group->discard_index++; | |
247 | if (block_group->discard_index == BTRFS_NR_DISCARD_LISTS) { | |
248 | block_group->discard_index = 1; | |
249 | return; | |
250 | } | |
251 | ||
252 | add_to_discard_list(discard_ctl, block_group); | |
253 | } | |
254 | ||
b0643e59 DZ |
255 | /** |
256 | * btrfs_discard_cancel_work - remove a block_group from the discard lists | |
257 | * @discard_ctl: discard control | |
258 | * @block_group: block_group of interest | |
259 | * | |
260 | * This removes @block_group from the discard lists. If necessary, it waits on | |
261 | * the current work and then reschedules the delayed work. | |
262 | */ | |
263 | void btrfs_discard_cancel_work(struct btrfs_discard_ctl *discard_ctl, | |
264 | struct btrfs_block_group *block_group) | |
265 | { | |
266 | if (remove_from_discard_list(discard_ctl, block_group)) { | |
267 | cancel_delayed_work_sync(&discard_ctl->work); | |
268 | btrfs_discard_schedule_work(discard_ctl, true); | |
269 | } | |
270 | } | |
271 | ||
272 | /** | |
273 | * btrfs_discard_queue_work - handles queuing the block_groups | |
274 | * @discard_ctl: discard control | |
275 | * @block_group: block_group of interest | |
276 | * | |
277 | * This maintains the LRU order of the discard lists. | |
278 | */ | |
279 | void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl, | |
280 | struct btrfs_block_group *block_group) | |
281 | { | |
282 | if (!block_group || !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC)) | |
283 | return; | |
284 | ||
6e80d4f8 DZ |
285 | if (block_group->used == 0) |
286 | add_to_discard_unused_list(discard_ctl, block_group); | |
287 | else | |
288 | add_to_discard_list(discard_ctl, block_group); | |
b0643e59 DZ |
289 | |
290 | if (!delayed_work_pending(&discard_ctl->work)) | |
291 | btrfs_discard_schedule_work(discard_ctl, false); | |
292 | } | |
293 | ||
294 | /** | |
295 | * btrfs_discard_schedule_work - responsible for scheduling the discard work | |
296 | * @discard_ctl: discard control | |
297 | * @override: override the current timer | |
298 | * | |
299 | * Discards are issued by a delayed workqueue item. @override is used to | |
e93591bb DZ |
300 | * update the current delay as the baseline delay interval is reevaluated on |
301 | * transaction commit. This is also maxed with any other rate limit. | |
b0643e59 DZ |
302 | */ |
303 | void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl, | |
304 | bool override) | |
305 | { | |
306 | struct btrfs_block_group *block_group; | |
307 | const u64 now = ktime_get_ns(); | |
308 | ||
309 | spin_lock(&discard_ctl->lock); | |
310 | ||
311 | if (!btrfs_run_discard_work(discard_ctl)) | |
312 | goto out; | |
313 | ||
314 | if (!override && delayed_work_pending(&discard_ctl->work)) | |
315 | goto out; | |
316 | ||
317 | block_group = find_next_block_group(discard_ctl, now); | |
318 | if (block_group) { | |
a2309300 | 319 | unsigned long delay = discard_ctl->delay; |
e93591bb DZ |
320 | u32 kbps_limit = READ_ONCE(discard_ctl->kbps_limit); |
321 | ||
322 | /* | |
323 | * A single delayed workqueue item is responsible for | |
324 | * discarding, so we can manage the bytes rate limit by keeping | |
325 | * track of the previous discard. | |
326 | */ | |
327 | if (kbps_limit && discard_ctl->prev_discard) { | |
328 | u64 bps_limit = ((u64)kbps_limit) * SZ_1K; | |
329 | u64 bps_delay = div64_u64(discard_ctl->prev_discard * | |
330 | MSEC_PER_SEC, bps_limit); | |
331 | ||
332 | delay = max(delay, msecs_to_jiffies(bps_delay)); | |
333 | } | |
a2309300 DZ |
334 | |
335 | /* | |
336 | * This timeout is to hopefully prevent immediate discarding | |
337 | * in a recently allocated block group. | |
338 | */ | |
339 | if (now < block_group->discard_eligible_time) { | |
340 | u64 bg_timeout = block_group->discard_eligible_time - now; | |
b0643e59 | 341 | |
a2309300 DZ |
342 | delay = max(delay, nsecs_to_jiffies(bg_timeout)); |
343 | } | |
b0643e59 DZ |
344 | |
345 | mod_delayed_work(discard_ctl->discard_workers, | |
346 | &discard_ctl->work, delay); | |
347 | } | |
348 | out: | |
349 | spin_unlock(&discard_ctl->lock); | |
350 | } | |
351 | ||
6e80d4f8 DZ |
352 | /** |
353 | * btrfs_finish_discard_pass - determine next step of a block_group | |
354 | * @discard_ctl: discard control | |
355 | * @block_group: block_group of interest | |
356 | * | |
357 | * This determines the next step for a block group after it's finished going | |
358 | * through a pass on a discard list. If it is unused and fully trimmed, we can | |
359 | * mark it unused and send it to the unused_bgs path. Otherwise, pass it onto | |
360 | * the appropriate filter list or let it fall off. | |
361 | */ | |
362 | static void btrfs_finish_discard_pass(struct btrfs_discard_ctl *discard_ctl, | |
363 | struct btrfs_block_group *block_group) | |
364 | { | |
365 | remove_from_discard_list(discard_ctl, block_group); | |
366 | ||
367 | if (block_group->used == 0) { | |
368 | if (btrfs_is_free_space_trimmed(block_group)) | |
369 | btrfs_mark_bg_unused(block_group); | |
370 | else | |
371 | add_to_discard_unused_list(discard_ctl, block_group); | |
7fe6d45e DZ |
372 | } else { |
373 | btrfs_update_discard_index(discard_ctl, block_group); | |
6e80d4f8 DZ |
374 | } |
375 | } | |
376 | ||
b0643e59 DZ |
377 | /** |
378 | * btrfs_discard_workfn - discard work function | |
379 | * @work: work | |
380 | * | |
2bee7eb8 DZ |
381 | * This finds the next block_group to start discarding and then discards a |
382 | * single region. It does this in a two-pass fashion: first extents and second | |
383 | * bitmaps. Completely discarded block groups are sent to the unused_bgs path. | |
b0643e59 DZ |
384 | */ |
385 | static void btrfs_discard_workfn(struct work_struct *work) | |
386 | { | |
387 | struct btrfs_discard_ctl *discard_ctl; | |
388 | struct btrfs_block_group *block_group; | |
2bee7eb8 | 389 | enum btrfs_discard_state discard_state; |
7fe6d45e | 390 | int discard_index = 0; |
b0643e59 | 391 | u64 trimmed = 0; |
7fe6d45e | 392 | u64 minlen = 0; |
b0643e59 DZ |
393 | |
394 | discard_ctl = container_of(work, struct btrfs_discard_ctl, work.work); | |
395 | ||
7fe6d45e DZ |
396 | block_group = peek_discard_list(discard_ctl, &discard_state, |
397 | &discard_index); | |
b0643e59 DZ |
398 | if (!block_group || !btrfs_run_discard_work(discard_ctl)) |
399 | return; | |
400 | ||
2bee7eb8 | 401 | /* Perform discarding */ |
7fe6d45e DZ |
402 | minlen = discard_minlen[discard_index]; |
403 | ||
404 | if (discard_state == BTRFS_DISCARD_BITMAPS) { | |
405 | u64 maxlen = 0; | |
406 | ||
407 | /* | |
408 | * Use the previous levels minimum discard length as the max | |
409 | * length filter. In the case something is added to make a | |
410 | * region go beyond the max filter, the entire bitmap is set | |
411 | * back to BTRFS_TRIM_STATE_UNTRIMMED. | |
412 | */ | |
413 | if (discard_index != BTRFS_DISCARD_INDEX_UNUSED) | |
414 | maxlen = discard_minlen[discard_index - 1]; | |
415 | ||
2bee7eb8 DZ |
416 | btrfs_trim_block_group_bitmaps(block_group, &trimmed, |
417 | block_group->discard_cursor, | |
418 | btrfs_block_group_end(block_group), | |
7fe6d45e DZ |
419 | minlen, maxlen, true); |
420 | } else { | |
2bee7eb8 DZ |
421 | btrfs_trim_block_group_extents(block_group, &trimmed, |
422 | block_group->discard_cursor, | |
423 | btrfs_block_group_end(block_group), | |
7fe6d45e DZ |
424 | minlen, true); |
425 | } | |
2bee7eb8 | 426 | |
e93591bb DZ |
427 | discard_ctl->prev_discard = trimmed; |
428 | ||
2bee7eb8 DZ |
429 | /* Determine next steps for a block_group */ |
430 | if (block_group->discard_cursor >= btrfs_block_group_end(block_group)) { | |
431 | if (discard_state == BTRFS_DISCARD_BITMAPS) { | |
432 | btrfs_finish_discard_pass(discard_ctl, block_group); | |
433 | } else { | |
434 | block_group->discard_cursor = block_group->start; | |
435 | spin_lock(&discard_ctl->lock); | |
436 | if (block_group->discard_state != | |
437 | BTRFS_DISCARD_RESET_CURSOR) | |
438 | block_group->discard_state = | |
439 | BTRFS_DISCARD_BITMAPS; | |
440 | spin_unlock(&discard_ctl->lock); | |
441 | } | |
442 | } | |
443 | ||
444 | spin_lock(&discard_ctl->lock); | |
445 | discard_ctl->block_group = NULL; | |
446 | spin_unlock(&discard_ctl->lock); | |
b0643e59 | 447 | |
b0643e59 DZ |
448 | btrfs_discard_schedule_work(discard_ctl, false); |
449 | } | |
450 | ||
451 | /** | |
452 | * btrfs_run_discard_work - determines if async discard should be running | |
453 | * @discard_ctl: discard control | |
454 | * | |
455 | * Checks if the file system is writeable and BTRFS_FS_DISCARD_RUNNING is set. | |
456 | */ | |
457 | bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl) | |
458 | { | |
459 | struct btrfs_fs_info *fs_info = container_of(discard_ctl, | |
460 | struct btrfs_fs_info, | |
461 | discard_ctl); | |
462 | ||
463 | return (!(fs_info->sb->s_flags & SB_RDONLY) && | |
464 | test_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags)); | |
465 | } | |
466 | ||
a2309300 DZ |
467 | /** |
468 | * btrfs_discard_calc_delay - recalculate the base delay | |
469 | * @discard_ctl: discard control | |
470 | * | |
471 | * Recalculate the base delay which is based off the total number of | |
472 | * discardable_extents. Clamp this between the lower_limit (iops_limit or 1ms) | |
473 | * and the upper_limit (BTRFS_DISCARD_MAX_DELAY_MSEC). | |
474 | */ | |
475 | void btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl) | |
476 | { | |
477 | s32 discardable_extents; | |
478 | u32 iops_limit; | |
479 | unsigned long delay; | |
480 | unsigned long lower_limit = BTRFS_DISCARD_MIN_DELAY_MSEC; | |
481 | ||
482 | discardable_extents = atomic_read(&discard_ctl->discardable_extents); | |
483 | if (!discardable_extents) | |
484 | return; | |
485 | ||
486 | spin_lock(&discard_ctl->lock); | |
487 | ||
488 | iops_limit = READ_ONCE(discard_ctl->iops_limit); | |
489 | if (iops_limit) | |
490 | lower_limit = max_t(unsigned long, lower_limit, | |
491 | MSEC_PER_SEC / iops_limit); | |
492 | ||
493 | delay = BTRFS_DISCARD_TARGET_MSEC / discardable_extents; | |
494 | delay = clamp(delay, lower_limit, BTRFS_DISCARD_MAX_DELAY_MSEC); | |
495 | discard_ctl->delay = msecs_to_jiffies(delay); | |
496 | ||
497 | spin_unlock(&discard_ctl->lock); | |
498 | } | |
499 | ||
dfb79ddb DZ |
500 | /** |
501 | * btrfs_discard_update_discardable - propagate discard counters | |
502 | * @block_group: block_group of interest | |
503 | * @ctl: free_space_ctl of @block_group | |
504 | * | |
505 | * This propagates deltas of counters up to the discard_ctl. It maintains a | |
506 | * current counter and a previous counter passing the delta up to the global | |
507 | * stat. Then the current counter value becomes the previous counter value. | |
508 | */ | |
509 | void btrfs_discard_update_discardable(struct btrfs_block_group *block_group, | |
510 | struct btrfs_free_space_ctl *ctl) | |
511 | { | |
512 | struct btrfs_discard_ctl *discard_ctl; | |
513 | s32 extents_delta; | |
5dc7c10b | 514 | s64 bytes_delta; |
dfb79ddb | 515 | |
5cb0724e DZ |
516 | if (!block_group || |
517 | !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC) || | |
518 | !btrfs_is_block_group_data_only(block_group)) | |
dfb79ddb DZ |
519 | return; |
520 | ||
521 | discard_ctl = &block_group->fs_info->discard_ctl; | |
522 | ||
523 | extents_delta = ctl->discardable_extents[BTRFS_STAT_CURR] - | |
524 | ctl->discardable_extents[BTRFS_STAT_PREV]; | |
525 | if (extents_delta) { | |
526 | atomic_add(extents_delta, &discard_ctl->discardable_extents); | |
527 | ctl->discardable_extents[BTRFS_STAT_PREV] = | |
528 | ctl->discardable_extents[BTRFS_STAT_CURR]; | |
529 | } | |
5dc7c10b DZ |
530 | |
531 | bytes_delta = ctl->discardable_bytes[BTRFS_STAT_CURR] - | |
532 | ctl->discardable_bytes[BTRFS_STAT_PREV]; | |
533 | if (bytes_delta) { | |
534 | atomic64_add(bytes_delta, &discard_ctl->discardable_bytes); | |
535 | ctl->discardable_bytes[BTRFS_STAT_PREV] = | |
536 | ctl->discardable_bytes[BTRFS_STAT_CURR]; | |
537 | } | |
dfb79ddb DZ |
538 | } |
539 | ||
6e80d4f8 DZ |
540 | /** |
541 | * btrfs_discard_punt_unused_bgs_list - punt unused_bgs list to discard lists | |
542 | * @fs_info: fs_info of interest | |
543 | * | |
544 | * The unused_bgs list needs to be punted to the discard lists because the | |
545 | * order of operations is changed. In the normal sychronous discard path, the | |
546 | * block groups are trimmed via a single large trim in transaction commit. This | |
547 | * is ultimately what we are trying to avoid with asynchronous discard. Thus, | |
548 | * it must be done before going down the unused_bgs path. | |
549 | */ | |
550 | void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info) | |
551 | { | |
552 | struct btrfs_block_group *block_group, *next; | |
553 | ||
554 | spin_lock(&fs_info->unused_bgs_lock); | |
555 | /* We enabled async discard, so punt all to the queue */ | |
556 | list_for_each_entry_safe(block_group, next, &fs_info->unused_bgs, | |
557 | bg_list) { | |
558 | list_del_init(&block_group->bg_list); | |
559 | btrfs_discard_queue_work(&fs_info->discard_ctl, block_group); | |
560 | } | |
561 | spin_unlock(&fs_info->unused_bgs_lock); | |
562 | } | |
563 | ||
564 | /** | |
565 | * btrfs_discard_purge_list - purge discard lists | |
566 | * @discard_ctl: discard control | |
567 | * | |
568 | * If we are disabling async discard, we may have intercepted block groups that | |
569 | * are completely free and ready for the unused_bgs path. As discarding will | |
570 | * now happen in transaction commit or not at all, we can safely mark the | |
571 | * corresponding block groups as unused and they will be sent on their merry | |
572 | * way to the unused_bgs list. | |
573 | */ | |
574 | static void btrfs_discard_purge_list(struct btrfs_discard_ctl *discard_ctl) | |
575 | { | |
576 | struct btrfs_block_group *block_group, *next; | |
577 | int i; | |
578 | ||
579 | spin_lock(&discard_ctl->lock); | |
580 | for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) { | |
581 | list_for_each_entry_safe(block_group, next, | |
582 | &discard_ctl->discard_list[i], | |
583 | discard_list) { | |
584 | list_del_init(&block_group->discard_list); | |
585 | spin_unlock(&discard_ctl->lock); | |
586 | if (block_group->used == 0) | |
587 | btrfs_mark_bg_unused(block_group); | |
588 | spin_lock(&discard_ctl->lock); | |
589 | } | |
590 | } | |
591 | spin_unlock(&discard_ctl->lock); | |
592 | } | |
593 | ||
b0643e59 DZ |
594 | void btrfs_discard_resume(struct btrfs_fs_info *fs_info) |
595 | { | |
596 | if (!btrfs_test_opt(fs_info, DISCARD_ASYNC)) { | |
597 | btrfs_discard_cleanup(fs_info); | |
598 | return; | |
599 | } | |
600 | ||
6e80d4f8 DZ |
601 | btrfs_discard_punt_unused_bgs_list(fs_info); |
602 | ||
b0643e59 DZ |
603 | set_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags); |
604 | } | |
605 | ||
606 | void btrfs_discard_stop(struct btrfs_fs_info *fs_info) | |
607 | { | |
608 | clear_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags); | |
609 | } | |
610 | ||
611 | void btrfs_discard_init(struct btrfs_fs_info *fs_info) | |
612 | { | |
613 | struct btrfs_discard_ctl *discard_ctl = &fs_info->discard_ctl; | |
614 | int i; | |
615 | ||
616 | spin_lock_init(&discard_ctl->lock); | |
617 | INIT_DELAYED_WORK(&discard_ctl->work, btrfs_discard_workfn); | |
618 | ||
619 | for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) | |
620 | INIT_LIST_HEAD(&discard_ctl->discard_list[i]); | |
dfb79ddb | 621 | |
e93591bb | 622 | discard_ctl->prev_discard = 0; |
dfb79ddb | 623 | atomic_set(&discard_ctl->discardable_extents, 0); |
5dc7c10b | 624 | atomic64_set(&discard_ctl->discardable_bytes, 0); |
19b2a2c7 | 625 | discard_ctl->max_discard_size = BTRFS_ASYNC_DISCARD_DEFAULT_MAX_SIZE; |
a2309300 DZ |
626 | discard_ctl->delay = BTRFS_DISCARD_MAX_DELAY_MSEC; |
627 | discard_ctl->iops_limit = BTRFS_DISCARD_MAX_IOPS; | |
e93591bb | 628 | discard_ctl->kbps_limit = 0; |
b0643e59 DZ |
629 | } |
630 | ||
631 | void btrfs_discard_cleanup(struct btrfs_fs_info *fs_info) | |
632 | { | |
633 | btrfs_discard_stop(fs_info); | |
634 | cancel_delayed_work_sync(&fs_info->discard_ctl.work); | |
6e80d4f8 | 635 | btrfs_discard_purge_list(&fs_info->discard_ctl); |
b0643e59 | 636 | } |