]>
git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - drivers/lightnvm/pblk-gc.c
2 * Copyright (C) 2016 CNEX Labs
3 * Initial release: Javier Gonzalez <javier@cnexlabs.com>
4 * Matias Bjorling <matias@cnexlabs.com>
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License version
8 * 2 as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
15 * pblk-gc.c - pblk's garbage collector
19 #include <linux/delay.h>
21 static void pblk_gc_free_gc_rq(struct pblk_gc_rq
*gc_rq
)
28 static int pblk_gc_write(struct pblk
*pblk
)
30 struct pblk_gc
*gc
= &pblk
->gc
;
31 struct pblk_gc_rq
*gc_rq
, *tgc_rq
;
34 spin_lock(&gc
->w_lock
);
35 if (list_empty(&gc
->w_list
)) {
36 spin_unlock(&gc
->w_lock
);
40 list_cut_position(&w_list
, &gc
->w_list
, gc
->w_list
.prev
);
42 spin_unlock(&gc
->w_lock
);
44 list_for_each_entry_safe(gc_rq
, tgc_rq
, &w_list
, list
) {
45 pblk_write_gc_to_cache(pblk
, gc_rq
);
46 list_del(&gc_rq
->list
);
47 kref_put(&gc_rq
->line
->ref
, pblk_line_put
);
48 pblk_gc_free_gc_rq(gc_rq
);
54 static void pblk_gc_writer_kick(struct pblk_gc
*gc
)
56 wake_up_process(gc
->gc_writer_ts
);
59 static void pblk_put_line_back(struct pblk
*pblk
, struct pblk_line
*line
)
61 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
62 struct list_head
*move_list
;
64 spin_lock(&line
->lock
);
65 WARN_ON(line
->state
!= PBLK_LINESTATE_GC
);
66 line
->state
= PBLK_LINESTATE_CLOSED
;
67 move_list
= pblk_line_gc_list(pblk
, line
);
68 spin_unlock(&line
->lock
);
71 spin_lock(&l_mg
->gc_lock
);
72 list_add_tail(&line
->list
, move_list
);
73 spin_unlock(&l_mg
->gc_lock
);
77 static void pblk_gc_line_ws(struct work_struct
*work
)
79 struct pblk_line_ws
*gc_rq_ws
= container_of(work
,
80 struct pblk_line_ws
, ws
);
81 struct pblk
*pblk
= gc_rq_ws
->pblk
;
82 struct nvm_tgt_dev
*dev
= pblk
->dev
;
83 struct nvm_geo
*geo
= &dev
->geo
;
84 struct pblk_gc
*gc
= &pblk
->gc
;
85 struct pblk_line
*line
= gc_rq_ws
->line
;
86 struct pblk_gc_rq
*gc_rq
= gc_rq_ws
->priv
;
91 gc_rq
->data
= vmalloc(gc_rq
->nr_secs
* geo
->sec_size
);
93 pr_err("pblk: could not GC line:%d (%d/%d)\n",
94 line
->id
, *line
->vsc
, gc_rq
->nr_secs
);
98 /* Read from GC victim block */
99 ret
= pblk_submit_read_gc(pblk
, gc_rq
);
101 pr_err("pblk: failed GC read in line:%d (err:%d)\n",
106 if (!gc_rq
->secs_to_gc
)
110 spin_lock(&gc
->w_lock
);
111 if (gc
->w_entries
>= PBLK_GC_RQ_QD
) {
112 spin_unlock(&gc
->w_lock
);
113 pblk_gc_writer_kick(&pblk
->gc
);
114 usleep_range(128, 256);
118 list_add_tail(&gc_rq
->list
, &gc
->w_list
);
119 spin_unlock(&gc
->w_lock
);
121 pblk_gc_writer_kick(&pblk
->gc
);
127 pblk_gc_free_gc_rq(gc_rq
);
128 kref_put(&line
->ref
, pblk_line_put
);
132 static void pblk_gc_line_prepare_ws(struct work_struct
*work
)
134 struct pblk_line_ws
*line_ws
= container_of(work
, struct pblk_line_ws
,
136 struct pblk
*pblk
= line_ws
->pblk
;
137 struct pblk_line
*line
= line_ws
->line
;
138 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
139 struct pblk_line_meta
*lm
= &pblk
->lm
;
140 struct pblk_gc
*gc
= &pblk
->gc
;
141 struct line_emeta
*emeta_buf
;
142 struct pblk_line_ws
*gc_rq_ws
;
143 struct pblk_gc_rq
*gc_rq
;
145 unsigned long *invalid_bitmap
;
146 int sec_left
, nr_secs
, bit
;
149 invalid_bitmap
= kmalloc(lm
->sec_bitmap_len
, GFP_KERNEL
);
150 if (!invalid_bitmap
) {
151 pr_err("pblk: could not allocate GC invalid bitmap\n");
155 emeta_buf
= pblk_malloc(lm
->emeta_len
[0], l_mg
->emeta_alloc_type
,
158 pr_err("pblk: cannot use GC emeta\n");
159 goto fail_free_bitmap
;
162 ret
= pblk_line_read_emeta(pblk
, line
, emeta_buf
);
164 pr_err("pblk: line %d read emeta failed (%d)\n", line
->id
, ret
);
165 goto fail_free_emeta
;
168 /* If this read fails, it means that emeta is corrupted. For now, leave
169 * the line untouched. TODO: Implement a recovery routine that scans and
170 * moves all sectors on the line.
172 lba_list
= pblk_recov_get_lba_list(pblk
, emeta_buf
);
174 pr_err("pblk: could not interpret emeta (line %d)\n", line
->id
);
175 goto fail_free_emeta
;
178 spin_lock(&line
->lock
);
179 bitmap_copy(invalid_bitmap
, line
->invalid_bitmap
, lm
->sec_per_line
);
180 sec_left
= pblk_line_vsc(line
);
181 spin_unlock(&line
->lock
);
184 pr_err("pblk: corrupted GC line (%d)\n", line
->id
);
185 goto fail_free_emeta
;
190 gc_rq
= kmalloc(sizeof(struct pblk_gc_rq
), GFP_KERNEL
);
192 goto fail_free_emeta
;
196 bit
= find_next_zero_bit(invalid_bitmap
, lm
->sec_per_line
,
198 if (bit
> line
->emeta_ssec
)
201 gc_rq
->paddr_list
[nr_secs
] = bit
;
202 gc_rq
->lba_list
[nr_secs
++] = le64_to_cpu(lba_list
[bit
]);
203 } while (nr_secs
< pblk
->max_write_pgs
);
205 if (unlikely(!nr_secs
)) {
210 gc_rq
->nr_secs
= nr_secs
;
213 gc_rq_ws
= kmalloc(sizeof(struct pblk_line_ws
), GFP_KERNEL
);
215 goto fail_free_gc_rq
;
217 gc_rq_ws
->pblk
= pblk
;
218 gc_rq_ws
->line
= line
;
219 gc_rq_ws
->priv
= gc_rq
;
221 /* The write GC path can be much slower than the read GC one due to
222 * the budget imposed by the rate-limiter. Balance in case that we get
223 * back pressure from the write GC path.
225 while (down_timeout(&gc
->gc_sem
, msecs_to_jiffies(30000)))
228 kref_get(&line
->ref
);
230 INIT_WORK(&gc_rq_ws
->ws
, pblk_gc_line_ws
);
231 queue_work(gc
->gc_line_reader_wq
, &gc_rq_ws
->ws
);
238 pblk_mfree(emeta_buf
, l_mg
->emeta_alloc_type
);
240 kfree(invalid_bitmap
);
242 kref_put(&line
->ref
, pblk_line_put
);
243 atomic_dec(&gc
->read_inflight_gc
);
250 pblk_mfree(emeta_buf
, l_mg
->emeta_alloc_type
);
252 kfree(invalid_bitmap
);
256 pblk_put_line_back(pblk
, line
);
257 kref_put(&line
->ref
, pblk_line_put
);
258 atomic_dec(&gc
->read_inflight_gc
);
260 pr_err("pblk: Failed to GC line %d\n", line
->id
);
263 static int pblk_gc_line(struct pblk
*pblk
, struct pblk_line
*line
)
265 struct pblk_gc
*gc
= &pblk
->gc
;
266 struct pblk_line_ws
*line_ws
;
268 pr_debug("pblk: line '%d' being reclaimed for GC\n", line
->id
);
270 line_ws
= kmalloc(sizeof(struct pblk_line_ws
), GFP_KERNEL
);
274 line_ws
->pblk
= pblk
;
275 line_ws
->line
= line
;
277 atomic_inc(&gc
->pipeline_gc
);
278 INIT_WORK(&line_ws
->ws
, pblk_gc_line_prepare_ws
);
279 queue_work(gc
->gc_reader_wq
, &line_ws
->ws
);
284 static void pblk_gc_reader_kick(struct pblk_gc
*gc
)
286 wake_up_process(gc
->gc_reader_ts
);
289 static void pblk_gc_kick(struct pblk
*pblk
)
291 struct pblk_gc
*gc
= &pblk
->gc
;
293 pblk_gc_writer_kick(gc
);
294 pblk_gc_reader_kick(gc
);
296 /* If we're shutting down GC, let's not start it up again */
297 if (gc
->gc_enabled
) {
298 wake_up_process(gc
->gc_ts
);
299 mod_timer(&gc
->gc_timer
,
300 jiffies
+ msecs_to_jiffies(GC_TIME_MSECS
));
304 static int pblk_gc_read(struct pblk
*pblk
)
306 struct pblk_gc
*gc
= &pblk
->gc
;
307 struct pblk_line
*line
;
309 spin_lock(&gc
->r_lock
);
310 if (list_empty(&gc
->r_list
)) {
311 spin_unlock(&gc
->r_lock
);
315 line
= list_first_entry(&gc
->r_list
, struct pblk_line
, list
);
316 list_del(&line
->list
);
317 spin_unlock(&gc
->r_lock
);
321 if (pblk_gc_line(pblk
, line
))
322 pr_err("pblk: failed to GC line %d\n", line
->id
);
327 static struct pblk_line
*pblk_gc_get_victim_line(struct pblk
*pblk
,
328 struct list_head
*group_list
)
330 struct pblk_line
*line
, *victim
;
331 int line_vsc
, victim_vsc
;
333 victim
= list_first_entry(group_list
, struct pblk_line
, list
);
334 list_for_each_entry(line
, group_list
, list
) {
335 line_vsc
= le32_to_cpu(*line
->vsc
);
336 victim_vsc
= le32_to_cpu(*victim
->vsc
);
337 if (line_vsc
< victim_vsc
)
344 static bool pblk_gc_should_run(struct pblk_gc
*gc
, struct pblk_rl
*rl
)
346 unsigned int nr_blocks_free
, nr_blocks_need
;
348 nr_blocks_need
= pblk_rl_high_thrs(rl
);
349 nr_blocks_free
= pblk_rl_nr_free_blks(rl
);
351 /* This is not critical, no need to take lock here */
352 return ((gc
->gc_active
) && (nr_blocks_need
> nr_blocks_free
));
355 void pblk_gc_free_full_lines(struct pblk
*pblk
)
357 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
358 struct pblk_gc
*gc
= &pblk
->gc
;
359 struct pblk_line
*line
;
362 spin_lock(&l_mg
->gc_lock
);
363 if (list_empty(&l_mg
->gc_full_list
)) {
364 spin_unlock(&l_mg
->gc_lock
);
368 line
= list_first_entry(&l_mg
->gc_full_list
,
369 struct pblk_line
, list
);
371 spin_lock(&line
->lock
);
372 WARN_ON(line
->state
!= PBLK_LINESTATE_CLOSED
);
373 line
->state
= PBLK_LINESTATE_GC
;
374 spin_unlock(&line
->lock
);
376 list_del(&line
->list
);
377 spin_unlock(&l_mg
->gc_lock
);
379 atomic_inc(&gc
->pipeline_gc
);
380 kref_put(&line
->ref
, pblk_line_put
);
385 * Lines with no valid sectors will be returned to the free list immediately. If
386 * GC is activated - either because the free block count is under the determined
387 * threshold, or because it is being forced from user space - only lines with a
388 * high count of invalid sectors will be recycled.
390 static void pblk_gc_run(struct pblk
*pblk
)
392 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
393 struct pblk_gc
*gc
= &pblk
->gc
;
394 struct pblk_line
*line
;
395 struct list_head
*group_list
;
397 int read_inflight_gc
, gc_group
= 0, prev_group
= 0;
399 pblk_gc_free_full_lines(pblk
);
401 run_gc
= pblk_gc_should_run(&pblk
->gc
, &pblk
->rl
);
402 if (!run_gc
|| (atomic_read(&gc
->read_inflight_gc
) >= PBLK_GC_L_QD
))
406 group_list
= l_mg
->gc_lists
[gc_group
++];
409 spin_lock(&l_mg
->gc_lock
);
410 if (list_empty(group_list
)) {
411 spin_unlock(&l_mg
->gc_lock
);
415 line
= pblk_gc_get_victim_line(pblk
, group_list
);
417 spin_lock(&line
->lock
);
418 WARN_ON(line
->state
!= PBLK_LINESTATE_CLOSED
);
419 line
->state
= PBLK_LINESTATE_GC
;
420 spin_unlock(&line
->lock
);
422 list_del(&line
->list
);
423 spin_unlock(&l_mg
->gc_lock
);
425 spin_lock(&gc
->r_lock
);
426 list_add_tail(&line
->list
, &gc
->r_list
);
427 spin_unlock(&gc
->r_lock
);
429 read_inflight_gc
= atomic_inc_return(&gc
->read_inflight_gc
);
430 pblk_gc_reader_kick(gc
);
434 /* No need to queue up more GC lines than we can handle */
435 run_gc
= pblk_gc_should_run(&pblk
->gc
, &pblk
->rl
);
436 if (!run_gc
|| read_inflight_gc
>= PBLK_GC_L_QD
)
440 if (!prev_group
&& pblk
->rl
.rb_state
> gc_group
&&
441 gc_group
< PBLK_GC_NR_LISTS
)
445 static void pblk_gc_timer(unsigned long data
)
447 struct pblk
*pblk
= (struct pblk
*)data
;
452 static int pblk_gc_ts(void *data
)
454 struct pblk
*pblk
= data
;
456 while (!kthread_should_stop()) {
458 set_current_state(TASK_INTERRUPTIBLE
);
465 static int pblk_gc_writer_ts(void *data
)
467 struct pblk
*pblk
= data
;
469 while (!kthread_should_stop()) {
470 if (!pblk_gc_write(pblk
))
472 set_current_state(TASK_INTERRUPTIBLE
);
479 static int pblk_gc_reader_ts(void *data
)
481 struct pblk
*pblk
= data
;
482 struct pblk_gc
*gc
= &pblk
->gc
;
484 while (!kthread_should_stop()) {
485 if (!pblk_gc_read(pblk
))
487 set_current_state(TASK_INTERRUPTIBLE
);
491 #ifdef CONFIG_NVM_DEBUG
492 pr_info("pblk: flushing gc pipeline, %d lines left\n",
493 atomic_read(&gc
->pipeline_gc
));
497 if (!atomic_read(&gc
->pipeline_gc
))
506 static void pblk_gc_start(struct pblk
*pblk
)
508 pblk
->gc
.gc_active
= 1;
509 pr_debug("pblk: gc start\n");
512 void pblk_gc_should_start(struct pblk
*pblk
)
514 struct pblk_gc
*gc
= &pblk
->gc
;
516 if (gc
->gc_enabled
&& !gc
->gc_active
) {
523 * If flush_wq == 1 then no lock should be held by the caller since
524 * flush_workqueue can sleep
526 static void pblk_gc_stop(struct pblk
*pblk
, int flush_wq
)
528 pblk
->gc
.gc_active
= 0;
529 pr_debug("pblk: gc stop\n");
532 void pblk_gc_should_stop(struct pblk
*pblk
)
534 struct pblk_gc
*gc
= &pblk
->gc
;
536 if (gc
->gc_active
&& !gc
->gc_forced
)
537 pblk_gc_stop(pblk
, 0);
540 void pblk_gc_should_kick(struct pblk
*pblk
)
542 pblk_rl_update_rates(&pblk
->rl
);
545 void pblk_gc_sysfs_state_show(struct pblk
*pblk
, int *gc_enabled
,
548 struct pblk_gc
*gc
= &pblk
->gc
;
550 spin_lock(&gc
->lock
);
551 *gc_enabled
= gc
->gc_enabled
;
552 *gc_active
= gc
->gc_active
;
553 spin_unlock(&gc
->lock
);
556 int pblk_gc_sysfs_force(struct pblk
*pblk
, int force
)
558 struct pblk_gc
*gc
= &pblk
->gc
;
560 if (force
< 0 || force
> 1)
563 spin_lock(&gc
->lock
);
564 gc
->gc_forced
= force
;
570 spin_unlock(&gc
->lock
);
572 pblk_gc_should_start(pblk
);
577 int pblk_gc_init(struct pblk
*pblk
)
579 struct pblk_gc
*gc
= &pblk
->gc
;
582 gc
->gc_ts
= kthread_create(pblk_gc_ts
, pblk
, "pblk-gc-ts");
583 if (IS_ERR(gc
->gc_ts
)) {
584 pr_err("pblk: could not allocate GC main kthread\n");
585 return PTR_ERR(gc
->gc_ts
);
588 gc
->gc_writer_ts
= kthread_create(pblk_gc_writer_ts
, pblk
,
589 "pblk-gc-writer-ts");
590 if (IS_ERR(gc
->gc_writer_ts
)) {
591 pr_err("pblk: could not allocate GC writer kthread\n");
592 ret
= PTR_ERR(gc
->gc_writer_ts
);
593 goto fail_free_main_kthread
;
596 gc
->gc_reader_ts
= kthread_create(pblk_gc_reader_ts
, pblk
,
597 "pblk-gc-reader-ts");
598 if (IS_ERR(gc
->gc_reader_ts
)) {
599 pr_err("pblk: could not allocate GC reader kthread\n");
600 ret
= PTR_ERR(gc
->gc_reader_ts
);
601 goto fail_free_writer_kthread
;
604 setup_timer(&gc
->gc_timer
, pblk_gc_timer
, (unsigned long)pblk
);
605 mod_timer(&gc
->gc_timer
, jiffies
+ msecs_to_jiffies(GC_TIME_MSECS
));
611 atomic_set(&gc
->read_inflight_gc
, 0);
612 atomic_set(&gc
->pipeline_gc
, 0);
614 /* Workqueue that reads valid sectors from a line and submit them to the
615 * GC writer to be recycled.
617 gc
->gc_line_reader_wq
= alloc_workqueue("pblk-gc-line-reader-wq",
618 WQ_MEM_RECLAIM
| WQ_UNBOUND
, PBLK_GC_MAX_READERS
);
619 if (!gc
->gc_line_reader_wq
) {
620 pr_err("pblk: could not allocate GC line reader workqueue\n");
622 goto fail_free_reader_kthread
;
625 /* Workqueue that prepare lines for GC */
626 gc
->gc_reader_wq
= alloc_workqueue("pblk-gc-line_wq",
627 WQ_MEM_RECLAIM
| WQ_UNBOUND
, 1);
628 if (!gc
->gc_reader_wq
) {
629 pr_err("pblk: could not allocate GC reader workqueue\n");
631 goto fail_free_reader_line_wq
;
634 spin_lock_init(&gc
->lock
);
635 spin_lock_init(&gc
->w_lock
);
636 spin_lock_init(&gc
->r_lock
);
638 sema_init(&gc
->gc_sem
, PBLK_GC_RQ_QD
);
640 INIT_LIST_HEAD(&gc
->w_list
);
641 INIT_LIST_HEAD(&gc
->r_list
);
645 fail_free_reader_line_wq
:
646 destroy_workqueue(gc
->gc_line_reader_wq
);
647 fail_free_reader_kthread
:
648 kthread_stop(gc
->gc_reader_ts
);
649 fail_free_writer_kthread
:
650 kthread_stop(gc
->gc_writer_ts
);
651 fail_free_main_kthread
:
652 kthread_stop(gc
->gc_ts
);
657 void pblk_gc_exit(struct pblk
*pblk
)
659 struct pblk_gc
*gc
= &pblk
->gc
;
662 del_timer_sync(&gc
->gc_timer
);
663 pblk_gc_stop(pblk
, 1);
666 kthread_stop(gc
->gc_ts
);
668 if (gc
->gc_reader_ts
)
669 kthread_stop(gc
->gc_reader_ts
);
671 flush_workqueue(gc
->gc_reader_wq
);
672 if (gc
->gc_reader_wq
)
673 destroy_workqueue(gc
->gc_reader_wq
);
675 flush_workqueue(gc
->gc_line_reader_wq
);
676 if (gc
->gc_line_reader_wq
)
677 destroy_workqueue(gc
->gc_line_reader_wq
);
679 if (gc
->gc_writer_ts
)
680 kthread_stop(gc
->gc_writer_ts
);