]>
git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blob - drivers/lightnvm/pblk-gc.c
1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (C) 2016 CNEX Labs
4 * Initial release: Javier Gonzalez <javier@cnexlabs.com>
5 * Matias Bjorling <matias@cnexlabs.com>
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License version
9 * 2 as published by the Free Software Foundation.
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
16 * pblk-gc.c - pblk's garbage collector
20 #include "pblk-trace.h"
21 #include <linux/delay.h>
24 static void pblk_gc_free_gc_rq(struct pblk_gc_rq
*gc_rq
)
30 static int pblk_gc_write(struct pblk
*pblk
)
32 struct pblk_gc
*gc
= &pblk
->gc
;
33 struct pblk_gc_rq
*gc_rq
, *tgc_rq
;
36 spin_lock(&gc
->w_lock
);
37 if (list_empty(&gc
->w_list
)) {
38 spin_unlock(&gc
->w_lock
);
42 list_cut_position(&w_list
, &gc
->w_list
, gc
->w_list
.prev
);
44 spin_unlock(&gc
->w_lock
);
46 list_for_each_entry_safe(gc_rq
, tgc_rq
, &w_list
, list
) {
47 pblk_write_gc_to_cache(pblk
, gc_rq
);
48 list_del(&gc_rq
->list
);
49 kref_put(&gc_rq
->line
->ref
, pblk_line_put
);
50 pblk_gc_free_gc_rq(gc_rq
);
56 static void pblk_gc_writer_kick(struct pblk_gc
*gc
)
58 wake_up_process(gc
->gc_writer_ts
);
61 void pblk_put_line_back(struct pblk
*pblk
, struct pblk_line
*line
)
63 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
64 struct list_head
*move_list
;
66 spin_lock(&l_mg
->gc_lock
);
67 spin_lock(&line
->lock
);
68 WARN_ON(line
->state
!= PBLK_LINESTATE_GC
);
69 line
->state
= PBLK_LINESTATE_CLOSED
;
70 trace_pblk_line_state(pblk_disk_name(pblk
), line
->id
,
73 /* We need to reset gc_group in order to ensure that
74 * pblk_line_gc_list will return proper move_list
75 * since right now current line is not on any of the
78 line
->gc_group
= PBLK_LINEGC_NONE
;
79 move_list
= pblk_line_gc_list(pblk
, line
);
80 spin_unlock(&line
->lock
);
81 list_add_tail(&line
->list
, move_list
);
82 spin_unlock(&l_mg
->gc_lock
);
85 static void pblk_gc_line_ws(struct work_struct
*work
)
87 struct pblk_line_ws
*gc_rq_ws
= container_of(work
,
88 struct pblk_line_ws
, ws
);
89 struct pblk
*pblk
= gc_rq_ws
->pblk
;
90 struct pblk_gc
*gc
= &pblk
->gc
;
91 struct pblk_line
*line
= gc_rq_ws
->line
;
92 struct pblk_gc_rq
*gc_rq
= gc_rq_ws
->priv
;
97 /* Read from GC victim block */
98 ret
= pblk_submit_read_gc(pblk
, gc_rq
);
100 line
->w_err_gc
->has_gc_err
= 1;
104 if (!gc_rq
->secs_to_gc
)
108 spin_lock(&gc
->w_lock
);
109 if (gc
->w_entries
>= PBLK_GC_RQ_QD
) {
110 spin_unlock(&gc
->w_lock
);
111 pblk_gc_writer_kick(&pblk
->gc
);
112 usleep_range(128, 256);
116 list_add_tail(&gc_rq
->list
, &gc
->w_list
);
117 spin_unlock(&gc
->w_lock
);
119 pblk_gc_writer_kick(&pblk
->gc
);
125 pblk_gc_free_gc_rq(gc_rq
);
126 kref_put(&line
->ref
, pblk_line_put
);
130 static __le64
*get_lba_list_from_emeta(struct pblk
*pblk
,
131 struct pblk_line
*line
)
133 struct line_emeta
*emeta_buf
;
134 struct pblk_line_meta
*lm
= &pblk
->lm
;
135 unsigned int lba_list_size
= lm
->emeta_len
[2];
139 emeta_buf
= kvmalloc(lm
->emeta_len
[0], GFP_KERNEL
);
143 ret
= pblk_line_emeta_read(pblk
, line
, emeta_buf
);
145 pblk_err(pblk
, "line %d read emeta failed (%d)\n",
151 /* If this read fails, it means that emeta is corrupted.
152 * For now, leave the line untouched.
153 * TODO: Implement a recovery routine that scans and moves
154 * all sectors on the line.
157 ret
= pblk_recov_check_emeta(pblk
, emeta_buf
);
159 pblk_err(pblk
, "inconsistent emeta (line %d)\n",
165 lba_list
= kvmalloc(lba_list_size
, GFP_KERNEL
);
168 memcpy(lba_list
, emeta_to_lbas(pblk
, emeta_buf
), lba_list_size
);
175 static void pblk_gc_line_prepare_ws(struct work_struct
*work
)
177 struct pblk_line_ws
*line_ws
= container_of(work
, struct pblk_line_ws
,
179 struct pblk
*pblk
= line_ws
->pblk
;
180 struct pblk_line
*line
= line_ws
->line
;
181 struct pblk_line_meta
*lm
= &pblk
->lm
;
182 struct nvm_tgt_dev
*dev
= pblk
->dev
;
183 struct nvm_geo
*geo
= &dev
->geo
;
184 struct pblk_gc
*gc
= &pblk
->gc
;
185 struct pblk_line_ws
*gc_rq_ws
;
186 struct pblk_gc_rq
*gc_rq
;
188 unsigned long *invalid_bitmap
;
189 int sec_left
, nr_secs
, bit
;
191 invalid_bitmap
= kmalloc(lm
->sec_bitmap_len
, GFP_KERNEL
);
195 if (line
->w_err_gc
->has_write_err
) {
196 lba_list
= line
->w_err_gc
->lba_list
;
197 line
->w_err_gc
->lba_list
= NULL
;
199 lba_list
= get_lba_list_from_emeta(pblk
, line
);
201 pblk_err(pblk
, "could not interpret emeta (line %d)\n",
203 goto fail_free_invalid_bitmap
;
207 spin_lock(&line
->lock
);
208 bitmap_copy(invalid_bitmap
, line
->invalid_bitmap
, lm
->sec_per_line
);
209 sec_left
= pblk_line_vsc(line
);
210 spin_unlock(&line
->lock
);
213 pblk_err(pblk
, "corrupted GC line (%d)\n", line
->id
);
214 goto fail_free_lba_list
;
219 gc_rq
= kmalloc(sizeof(struct pblk_gc_rq
), GFP_KERNEL
);
221 goto fail_free_lba_list
;
225 bit
= find_next_zero_bit(invalid_bitmap
, lm
->sec_per_line
,
227 if (bit
> line
->emeta_ssec
)
230 gc_rq
->paddr_list
[nr_secs
] = bit
;
231 gc_rq
->lba_list
[nr_secs
++] = le64_to_cpu(lba_list
[bit
]);
232 } while (nr_secs
< pblk
->max_write_pgs
);
234 if (unlikely(!nr_secs
)) {
239 gc_rq
->nr_secs
= nr_secs
;
242 gc_rq
->data
= vmalloc(array_size(gc_rq
->nr_secs
, geo
->csecs
));
244 goto fail_free_gc_rq
;
246 gc_rq_ws
= kmalloc(sizeof(struct pblk_line_ws
), GFP_KERNEL
);
248 goto fail_free_gc_data
;
250 gc_rq_ws
->pblk
= pblk
;
251 gc_rq_ws
->line
= line
;
252 gc_rq_ws
->priv
= gc_rq
;
254 /* The write GC path can be much slower than the read GC one due to
255 * the budget imposed by the rate-limiter. Balance in case that we get
256 * back pressure from the write GC path.
258 while (down_timeout(&gc
->gc_sem
, msecs_to_jiffies(30000)))
261 kref_get(&line
->ref
);
263 INIT_WORK(&gc_rq_ws
->ws
, pblk_gc_line_ws
);
264 queue_work(gc
->gc_line_reader_wq
, &gc_rq_ws
->ws
);
273 kfree(invalid_bitmap
);
275 kref_put(&line
->ref
, pblk_line_put
);
276 atomic_dec(&gc
->read_inflight_gc
);
286 fail_free_invalid_bitmap
:
287 kfree(invalid_bitmap
);
291 /* Line goes back to closed state, so we cannot release additional
292 * reference for line, since we do that only when we want to do
293 * gc to free line state transition.
295 pblk_put_line_back(pblk
, line
);
296 atomic_dec(&gc
->read_inflight_gc
);
298 pblk_err(pblk
, "failed to GC line %d\n", line
->id
);
301 static int pblk_gc_line(struct pblk
*pblk
, struct pblk_line
*line
)
303 struct pblk_gc
*gc
= &pblk
->gc
;
304 struct pblk_line_ws
*line_ws
;
306 pblk_debug(pblk
, "line '%d' being reclaimed for GC\n", line
->id
);
308 line_ws
= kmalloc(sizeof(struct pblk_line_ws
), GFP_KERNEL
);
312 line_ws
->pblk
= pblk
;
313 line_ws
->line
= line
;
315 atomic_inc(&gc
->pipeline_gc
);
316 INIT_WORK(&line_ws
->ws
, pblk_gc_line_prepare_ws
);
317 queue_work(gc
->gc_reader_wq
, &line_ws
->ws
);
322 static void pblk_gc_reader_kick(struct pblk_gc
*gc
)
324 wake_up_process(gc
->gc_reader_ts
);
327 static void pblk_gc_kick(struct pblk
*pblk
)
329 struct pblk_gc
*gc
= &pblk
->gc
;
331 pblk_gc_writer_kick(gc
);
332 pblk_gc_reader_kick(gc
);
334 /* If we're shutting down GC, let's not start it up again */
335 if (gc
->gc_enabled
) {
336 wake_up_process(gc
->gc_ts
);
337 mod_timer(&gc
->gc_timer
,
338 jiffies
+ msecs_to_jiffies(GC_TIME_MSECS
));
342 static int pblk_gc_read(struct pblk
*pblk
)
344 struct pblk_gc
*gc
= &pblk
->gc
;
345 struct pblk_line
*line
;
347 spin_lock(&gc
->r_lock
);
348 if (list_empty(&gc
->r_list
)) {
349 spin_unlock(&gc
->r_lock
);
353 line
= list_first_entry(&gc
->r_list
, struct pblk_line
, list
);
354 list_del(&line
->list
);
355 spin_unlock(&gc
->r_lock
);
359 if (pblk_gc_line(pblk
, line
)) {
360 pblk_err(pblk
, "failed to GC line %d\n", line
->id
);
362 spin_lock(&gc
->r_lock
);
363 list_add_tail(&line
->list
, &gc
->r_list
);
364 spin_unlock(&gc
->r_lock
);
370 static struct pblk_line
*pblk_gc_get_victim_line(struct pblk
*pblk
,
371 struct list_head
*group_list
)
373 struct pblk_line
*line
, *victim
;
374 unsigned int line_vsc
= ~0x0L
, victim_vsc
= ~0x0L
;
376 victim
= list_first_entry(group_list
, struct pblk_line
, list
);
378 list_for_each_entry(line
, group_list
, list
) {
379 if (!atomic_read(&line
->sec_to_update
))
380 line_vsc
= le32_to_cpu(*line
->vsc
);
381 if (line_vsc
< victim_vsc
) {
383 victim_vsc
= le32_to_cpu(*victim
->vsc
);
387 if (victim_vsc
== ~0x0)
393 static bool pblk_gc_should_run(struct pblk_gc
*gc
, struct pblk_rl
*rl
)
395 unsigned int nr_blocks_free
, nr_blocks_need
;
396 unsigned int werr_lines
= atomic_read(&rl
->werr_lines
);
398 nr_blocks_need
= pblk_rl_high_thrs(rl
);
399 nr_blocks_free
= pblk_rl_nr_free_blks(rl
);
401 /* This is not critical, no need to take lock here */
402 return ((werr_lines
> 0) ||
403 ((gc
->gc_active
) && (nr_blocks_need
> nr_blocks_free
)));
406 void pblk_gc_free_full_lines(struct pblk
*pblk
)
408 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
409 struct pblk_gc
*gc
= &pblk
->gc
;
410 struct pblk_line
*line
;
413 spin_lock(&l_mg
->gc_lock
);
414 if (list_empty(&l_mg
->gc_full_list
)) {
415 spin_unlock(&l_mg
->gc_lock
);
419 line
= list_first_entry(&l_mg
->gc_full_list
,
420 struct pblk_line
, list
);
422 spin_lock(&line
->lock
);
423 WARN_ON(line
->state
!= PBLK_LINESTATE_CLOSED
);
424 line
->state
= PBLK_LINESTATE_GC
;
425 trace_pblk_line_state(pblk_disk_name(pblk
), line
->id
,
427 spin_unlock(&line
->lock
);
429 list_del(&line
->list
);
430 spin_unlock(&l_mg
->gc_lock
);
432 atomic_inc(&gc
->pipeline_gc
);
433 kref_put(&line
->ref
, pblk_line_put
);
438 * Lines with no valid sectors will be returned to the free list immediately. If
439 * GC is activated - either because the free block count is under the determined
440 * threshold, or because it is being forced from user space - only lines with a
441 * high count of invalid sectors will be recycled.
443 static void pblk_gc_run(struct pblk
*pblk
)
445 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
446 struct pblk_gc
*gc
= &pblk
->gc
;
447 struct pblk_line
*line
;
448 struct list_head
*group_list
;
450 int read_inflight_gc
, gc_group
= 0, prev_group
= 0;
452 pblk_gc_free_full_lines(pblk
);
454 run_gc
= pblk_gc_should_run(&pblk
->gc
, &pblk
->rl
);
455 if (!run_gc
|| (atomic_read(&gc
->read_inflight_gc
) >= PBLK_GC_L_QD
))
459 group_list
= l_mg
->gc_lists
[gc_group
++];
462 spin_lock(&l_mg
->gc_lock
);
464 line
= pblk_gc_get_victim_line(pblk
, group_list
);
466 spin_unlock(&l_mg
->gc_lock
);
470 spin_lock(&line
->lock
);
471 WARN_ON(line
->state
!= PBLK_LINESTATE_CLOSED
);
472 line
->state
= PBLK_LINESTATE_GC
;
473 trace_pblk_line_state(pblk_disk_name(pblk
), line
->id
,
475 spin_unlock(&line
->lock
);
477 list_del(&line
->list
);
478 spin_unlock(&l_mg
->gc_lock
);
480 spin_lock(&gc
->r_lock
);
481 list_add_tail(&line
->list
, &gc
->r_list
);
482 spin_unlock(&gc
->r_lock
);
484 read_inflight_gc
= atomic_inc_return(&gc
->read_inflight_gc
);
485 pblk_gc_reader_kick(gc
);
489 /* No need to queue up more GC lines than we can handle */
490 run_gc
= pblk_gc_should_run(&pblk
->gc
, &pblk
->rl
);
491 if (!run_gc
|| read_inflight_gc
>= PBLK_GC_L_QD
)
495 if (!prev_group
&& pblk
->rl
.rb_state
> gc_group
&&
496 gc_group
< PBLK_GC_NR_LISTS
)
500 static void pblk_gc_timer(struct timer_list
*t
)
502 struct pblk
*pblk
= from_timer(pblk
, t
, gc
.gc_timer
);
507 static int pblk_gc_ts(void *data
)
509 struct pblk
*pblk
= data
;
511 while (!kthread_should_stop()) {
513 set_current_state(TASK_INTERRUPTIBLE
);
520 static int pblk_gc_writer_ts(void *data
)
522 struct pblk
*pblk
= data
;
524 while (!kthread_should_stop()) {
525 if (!pblk_gc_write(pblk
))
527 set_current_state(TASK_INTERRUPTIBLE
);
534 static int pblk_gc_reader_ts(void *data
)
536 struct pblk
*pblk
= data
;
537 struct pblk_gc
*gc
= &pblk
->gc
;
539 while (!kthread_should_stop()) {
540 if (!pblk_gc_read(pblk
))
542 set_current_state(TASK_INTERRUPTIBLE
);
546 #ifdef CONFIG_NVM_PBLK_DEBUG
547 pblk_info(pblk
, "flushing gc pipeline, %d lines left\n",
548 atomic_read(&gc
->pipeline_gc
));
552 if (!atomic_read(&gc
->pipeline_gc
))
561 static void pblk_gc_start(struct pblk
*pblk
)
563 pblk
->gc
.gc_active
= 1;
564 pblk_debug(pblk
, "gc start\n");
567 void pblk_gc_should_start(struct pblk
*pblk
)
569 struct pblk_gc
*gc
= &pblk
->gc
;
571 if (gc
->gc_enabled
&& !gc
->gc_active
) {
577 void pblk_gc_should_stop(struct pblk
*pblk
)
579 struct pblk_gc
*gc
= &pblk
->gc
;
581 if (gc
->gc_active
&& !gc
->gc_forced
)
585 void pblk_gc_should_kick(struct pblk
*pblk
)
587 pblk_rl_update_rates(&pblk
->rl
);
590 void pblk_gc_sysfs_state_show(struct pblk
*pblk
, int *gc_enabled
,
593 struct pblk_gc
*gc
= &pblk
->gc
;
595 spin_lock(&gc
->lock
);
596 *gc_enabled
= gc
->gc_enabled
;
597 *gc_active
= gc
->gc_active
;
598 spin_unlock(&gc
->lock
);
601 int pblk_gc_sysfs_force(struct pblk
*pblk
, int force
)
603 struct pblk_gc
*gc
= &pblk
->gc
;
605 if (force
< 0 || force
> 1)
608 spin_lock(&gc
->lock
);
609 gc
->gc_forced
= force
;
615 spin_unlock(&gc
->lock
);
617 pblk_gc_should_start(pblk
);
622 int pblk_gc_init(struct pblk
*pblk
)
624 struct pblk_gc
*gc
= &pblk
->gc
;
627 gc
->gc_ts
= kthread_create(pblk_gc_ts
, pblk
, "pblk-gc-ts");
628 if (IS_ERR(gc
->gc_ts
)) {
629 pblk_err(pblk
, "could not allocate GC main kthread\n");
630 return PTR_ERR(gc
->gc_ts
);
633 gc
->gc_writer_ts
= kthread_create(pblk_gc_writer_ts
, pblk
,
634 "pblk-gc-writer-ts");
635 if (IS_ERR(gc
->gc_writer_ts
)) {
636 pblk_err(pblk
, "could not allocate GC writer kthread\n");
637 ret
= PTR_ERR(gc
->gc_writer_ts
);
638 goto fail_free_main_kthread
;
641 gc
->gc_reader_ts
= kthread_create(pblk_gc_reader_ts
, pblk
,
642 "pblk-gc-reader-ts");
643 if (IS_ERR(gc
->gc_reader_ts
)) {
644 pblk_err(pblk
, "could not allocate GC reader kthread\n");
645 ret
= PTR_ERR(gc
->gc_reader_ts
);
646 goto fail_free_writer_kthread
;
649 timer_setup(&gc
->gc_timer
, pblk_gc_timer
, 0);
650 mod_timer(&gc
->gc_timer
, jiffies
+ msecs_to_jiffies(GC_TIME_MSECS
));
656 atomic_set(&gc
->read_inflight_gc
, 0);
657 atomic_set(&gc
->pipeline_gc
, 0);
659 /* Workqueue that reads valid sectors from a line and submit them to the
660 * GC writer to be recycled.
662 gc
->gc_line_reader_wq
= alloc_workqueue("pblk-gc-line-reader-wq",
663 WQ_MEM_RECLAIM
| WQ_UNBOUND
, PBLK_GC_MAX_READERS
);
664 if (!gc
->gc_line_reader_wq
) {
665 pblk_err(pblk
, "could not allocate GC line reader workqueue\n");
667 goto fail_free_reader_kthread
;
670 /* Workqueue that prepare lines for GC */
671 gc
->gc_reader_wq
= alloc_workqueue("pblk-gc-line_wq",
672 WQ_MEM_RECLAIM
| WQ_UNBOUND
, 1);
673 if (!gc
->gc_reader_wq
) {
674 pblk_err(pblk
, "could not allocate GC reader workqueue\n");
676 goto fail_free_reader_line_wq
;
679 spin_lock_init(&gc
->lock
);
680 spin_lock_init(&gc
->w_lock
);
681 spin_lock_init(&gc
->r_lock
);
683 sema_init(&gc
->gc_sem
, PBLK_GC_RQ_QD
);
685 INIT_LIST_HEAD(&gc
->w_list
);
686 INIT_LIST_HEAD(&gc
->r_list
);
690 fail_free_reader_line_wq
:
691 destroy_workqueue(gc
->gc_line_reader_wq
);
692 fail_free_reader_kthread
:
693 kthread_stop(gc
->gc_reader_ts
);
694 fail_free_writer_kthread
:
695 kthread_stop(gc
->gc_writer_ts
);
696 fail_free_main_kthread
:
697 kthread_stop(gc
->gc_ts
);
702 void pblk_gc_exit(struct pblk
*pblk
, bool graceful
)
704 struct pblk_gc
*gc
= &pblk
->gc
;
707 del_timer_sync(&gc
->gc_timer
);
711 kthread_stop(gc
->gc_ts
);
713 if (gc
->gc_reader_ts
)
714 kthread_stop(gc
->gc_reader_ts
);
717 flush_workqueue(gc
->gc_reader_wq
);
718 flush_workqueue(gc
->gc_line_reader_wq
);
721 destroy_workqueue(gc
->gc_reader_wq
);
722 destroy_workqueue(gc
->gc_line_reader_wq
);
724 if (gc
->gc_writer_ts
)
725 kthread_stop(gc
->gc_writer_ts
);