]>
git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - drivers/lightnvm/pblk-gc.c
2 * Copyright (C) 2016 CNEX Labs
3 * Initial release: Javier Gonzalez <javier@cnexlabs.com>
4 * Matias Bjorling <matias@cnexlabs.com>
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License version
8 * 2 as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
15 * pblk-gc.c - pblk's garbage collector
19 #include <linux/delay.h>
21 static void pblk_gc_free_gc_rq(struct pblk_gc_rq
*gc_rq
)
24 kfree(gc_rq
->lba_list
);
28 static int pblk_gc_write(struct pblk
*pblk
)
30 struct pblk_gc
*gc
= &pblk
->gc
;
31 struct pblk_gc_rq
*gc_rq
, *tgc_rq
;
34 spin_lock(&gc
->w_lock
);
35 if (list_empty(&gc
->w_list
)) {
36 spin_unlock(&gc
->w_lock
);
40 list_for_each_entry_safe(gc_rq
, tgc_rq
, &gc
->w_list
, list
) {
41 list_move_tail(&gc_rq
->list
, &w_list
);
44 spin_unlock(&gc
->w_lock
);
46 list_for_each_entry_safe(gc_rq
, tgc_rq
, &w_list
, list
) {
47 pblk_write_gc_to_cache(pblk
, gc_rq
->data
, gc_rq
->lba_list
,
48 gc_rq
->nr_secs
, gc_rq
->secs_to_gc
,
49 gc_rq
->line
, PBLK_IOTYPE_GC
);
51 kref_put(&gc_rq
->line
->ref
, pblk_line_put
);
53 list_del(&gc_rq
->list
);
54 pblk_gc_free_gc_rq(gc_rq
);
60 static void pblk_gc_writer_kick(struct pblk_gc
*gc
)
62 wake_up_process(gc
->gc_writer_ts
);
66 * Responsible for managing all memory related to a gc request. Also in case of
69 static int pblk_gc_move_valid_secs(struct pblk
*pblk
, struct pblk_line
*line
,
70 u64
*lba_list
, unsigned int nr_secs
)
72 struct nvm_tgt_dev
*dev
= pblk
->dev
;
73 struct nvm_geo
*geo
= &dev
->geo
;
74 struct pblk_gc
*gc
= &pblk
->gc
;
75 struct pblk_gc_rq
*gc_rq
;
77 unsigned int secs_to_gc
;
80 data
= kmalloc(nr_secs
* geo
->sec_size
, GFP_KERNEL
);
86 /* Read from GC victim block */
87 if (pblk_submit_read_gc(pblk
, lba_list
, data
, nr_secs
,
96 gc_rq
= kmalloc(sizeof(struct pblk_gc_rq
), GFP_KERNEL
);
104 gc_rq
->lba_list
= lba_list
;
105 gc_rq
->nr_secs
= nr_secs
;
106 gc_rq
->secs_to_gc
= secs_to_gc
;
108 kref_get(&line
->ref
);
111 spin_lock(&gc
->w_lock
);
112 if (gc
->w_entries
> 256) {
113 spin_unlock(&gc
->w_lock
);
114 usleep_range(256, 1024);
118 list_add_tail(&gc_rq
->list
, &gc
->w_list
);
119 spin_unlock(&gc
->w_lock
);
121 pblk_gc_writer_kick(&pblk
->gc
);
133 static void pblk_put_line_back(struct pblk
*pblk
, struct pblk_line
*line
)
135 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
136 struct list_head
*move_list
;
138 spin_lock(&line
->lock
);
139 WARN_ON(line
->state
!= PBLK_LINESTATE_GC
);
140 line
->state
= PBLK_LINESTATE_CLOSED
;
141 move_list
= pblk_line_gc_list(pblk
, line
);
142 spin_unlock(&line
->lock
);
145 spin_lock(&l_mg
->gc_lock
);
146 list_add_tail(&line
->list
, move_list
);
147 spin_unlock(&l_mg
->gc_lock
);
151 static void pblk_gc_line_ws(struct work_struct
*work
)
153 struct pblk_line_ws
*line_ws
= container_of(work
, struct pblk_line_ws
,
155 struct pblk
*pblk
= line_ws
->pblk
;
156 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
157 struct pblk_line
*line
= line_ws
->line
;
158 struct pblk_line_meta
*lm
= &pblk
->lm
;
159 __le64
*lba_list
= line_ws
->priv
;
165 pr_debug("pblk: line '%d' being reclaimed for GC\n", line
->id
);
167 spin_lock(&line
->lock
);
168 sec_left
= line
->vsc
;
170 /* Lines are erased before being used (l_mg->data_/log_next) */
171 spin_unlock(&line
->lock
);
174 spin_unlock(&line
->lock
);
177 pr_err("pblk: corrupted GC line (%d)\n", line
->id
);
179 pblk_put_line_back(pblk
, line
);
185 gc_list
= kmalloc_array(pblk
->max_write_pgs
, sizeof(u64
), GFP_KERNEL
);
188 pblk_put_line_back(pblk
, line
);
194 bit
= find_next_zero_bit(line
->invalid_bitmap
, lm
->sec_per_line
,
196 if (bit
> line
->emeta_ssec
)
199 gc_list
[nr_ppas
++] = le64_to_cpu(lba_list
[bit
]);
200 } while (nr_ppas
< pblk
->max_write_pgs
);
202 if (unlikely(!nr_ppas
)) {
207 if (pblk_gc_move_valid_secs(pblk
, line
, gc_list
, nr_ppas
)) {
208 pr_err("pblk: could not GC all sectors: line:%d (%d/%d/%d)\n",
212 pblk_put_line_back(pblk
, line
);
221 pblk_mfree(line
->emeta
, l_mg
->emeta_alloc_type
);
222 mempool_free(line_ws
, pblk
->line_ws_pool
);
223 atomic_dec(&pblk
->gc
.inflight_gc
);
225 kref_put(&line
->ref
, pblk_line_put
);
228 static int pblk_gc_line(struct pblk
*pblk
, struct pblk_line
*line
)
230 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
231 struct pblk_line_meta
*lm
= &pblk
->lm
;
232 struct pblk_line_ws
*line_ws
;
236 line_ws
= mempool_alloc(pblk
->line_ws_pool
, GFP_KERNEL
);
237 line
->emeta
= pblk_malloc(lm
->emeta_len
, l_mg
->emeta_alloc_type
,
240 pr_err("pblk: cannot use GC emeta\n");
244 ret
= pblk_line_read_emeta(pblk
, line
);
246 pr_err("pblk: line %d read emeta failed (%d)\n", line
->id
, ret
);
247 goto fail_free_emeta
;
250 /* If this read fails, it means that emeta is corrupted. For now, leave
251 * the line untouched. TODO: Implement a recovery routine that scans and
252 * moves all sectors on the line.
254 lba_list
= pblk_recov_get_lba_list(pblk
, line
->emeta
);
256 pr_err("pblk: could not interpret emeta (line %d)\n", line
->id
);
257 goto fail_free_emeta
;
260 line_ws
->pblk
= pblk
;
261 line_ws
->line
= line
;
262 line_ws
->priv
= lba_list
;
264 INIT_WORK(&line_ws
->ws
, pblk_gc_line_ws
);
265 queue_work(pblk
->gc
.gc_reader_wq
, &line_ws
->ws
);
270 pblk_mfree(line
->emeta
, l_mg
->emeta_alloc_type
);
272 mempool_free(line_ws
, pblk
->line_ws_pool
);
273 pblk_put_line_back(pblk
, line
);
278 static void pblk_gc_lines(struct pblk
*pblk
, struct list_head
*gc_list
)
280 struct pblk_line
*line
, *tline
;
282 list_for_each_entry_safe(line
, tline
, gc_list
, list
) {
283 if (pblk_gc_line(pblk
, line
))
284 pr_err("pblk: failed to GC line %d\n", line
->id
);
285 list_del(&line
->list
);
290 * Lines with no valid sectors will be returned to the free list immediately. If
291 * GC is activated - either because the free block count is under the determined
292 * threshold, or because it is being forced from user space - only lines with a
293 * high count of invalid sectors will be recycled.
295 static void pblk_gc_run(struct pblk
*pblk
)
297 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
298 struct pblk_gc
*gc
= &pblk
->gc
;
299 struct pblk_line
*line
, *tline
;
300 unsigned int nr_blocks_free
, nr_blocks_need
;
301 struct list_head
*group_list
;
302 int run_gc
, gc_group
= 0;
304 int inflight_gc
= atomic_read(&gc
->inflight_gc
);
307 spin_lock(&l_mg
->gc_lock
);
308 list_for_each_entry_safe(line
, tline
, &l_mg
->gc_full_list
, list
) {
309 spin_lock(&line
->lock
);
310 WARN_ON(line
->state
!= PBLK_LINESTATE_CLOSED
);
311 line
->state
= PBLK_LINESTATE_GC
;
312 spin_unlock(&line
->lock
);
314 list_del(&line
->list
);
315 kref_put(&line
->ref
, pblk_line_put
);
317 spin_unlock(&l_mg
->gc_lock
);
319 nr_blocks_need
= pblk_rl_gc_thrs(&pblk
->rl
);
320 nr_blocks_free
= pblk_rl_nr_free_blks(&pblk
->rl
);
321 run_gc
= (nr_blocks_need
> nr_blocks_free
|| gc
->gc_forced
);
324 group_list
= l_mg
->gc_lists
[gc_group
++];
325 spin_lock(&l_mg
->gc_lock
);
326 while (run_gc
&& !list_empty(group_list
)) {
327 /* No need to queue up more GC lines than we can handle */
328 if (!run_gc
|| inflight_gc
> gc
->gc_jobs_active
) {
329 spin_unlock(&l_mg
->gc_lock
);
330 pblk_gc_lines(pblk
, &gc_list
);
334 line
= list_first_entry(group_list
, struct pblk_line
, list
);
335 nr_blocks_free
+= atomic_read(&line
->blk_in_line
);
337 spin_lock(&line
->lock
);
338 WARN_ON(line
->state
!= PBLK_LINESTATE_CLOSED
);
339 line
->state
= PBLK_LINESTATE_GC
;
340 list_move_tail(&line
->list
, &gc_list
);
341 atomic_inc(&gc
->inflight_gc
);
343 spin_unlock(&line
->lock
);
346 run_gc
= (nr_blocks_need
> nr_blocks_free
|| gc
->gc_forced
);
348 spin_unlock(&l_mg
->gc_lock
);
350 pblk_gc_lines(pblk
, &gc_list
);
352 if (!prev_gc
&& pblk
->rl
.rb_state
> gc_group
&&
353 gc_group
< PBLK_NR_GC_LISTS
)
358 static void pblk_gc_kick(struct pblk
*pblk
)
360 struct pblk_gc
*gc
= &pblk
->gc
;
362 wake_up_process(gc
->gc_ts
);
363 pblk_gc_writer_kick(gc
);
364 mod_timer(&gc
->gc_timer
, jiffies
+ msecs_to_jiffies(GC_TIME_MSECS
));
367 static void pblk_gc_timer(unsigned long data
)
369 struct pblk
*pblk
= (struct pblk
*)data
;
374 static int pblk_gc_ts(void *data
)
376 struct pblk
*pblk
= data
;
378 while (!kthread_should_stop()) {
380 set_current_state(TASK_INTERRUPTIBLE
);
387 static int pblk_gc_writer_ts(void *data
)
389 struct pblk
*pblk
= data
;
391 while (!kthread_should_stop()) {
392 if (!pblk_gc_write(pblk
))
394 set_current_state(TASK_INTERRUPTIBLE
);
401 static void pblk_gc_start(struct pblk
*pblk
)
403 pblk
->gc
.gc_active
= 1;
405 pr_debug("pblk: gc start\n");
408 int pblk_gc_status(struct pblk
*pblk
)
410 struct pblk_gc
*gc
= &pblk
->gc
;
413 spin_lock(&gc
->lock
);
415 spin_unlock(&gc
->lock
);
420 static void __pblk_gc_should_start(struct pblk
*pblk
)
422 struct pblk_gc
*gc
= &pblk
->gc
;
424 lockdep_assert_held(&gc
->lock
);
426 if (gc
->gc_enabled
&& !gc
->gc_active
)
430 void pblk_gc_should_start(struct pblk
*pblk
)
432 struct pblk_gc
*gc
= &pblk
->gc
;
434 spin_lock(&gc
->lock
);
435 __pblk_gc_should_start(pblk
);
436 spin_unlock(&gc
->lock
);
440 * If flush_wq == 1 then no lock should be held by the caller since
441 * flush_workqueue can sleep
443 static void pblk_gc_stop(struct pblk
*pblk
, int flush_wq
)
445 spin_lock(&pblk
->gc
.lock
);
446 pblk
->gc
.gc_active
= 0;
447 spin_unlock(&pblk
->gc
.lock
);
449 pr_debug("pblk: gc stop\n");
452 void pblk_gc_should_stop(struct pblk
*pblk
)
454 struct pblk_gc
*gc
= &pblk
->gc
;
456 if (gc
->gc_active
&& !gc
->gc_forced
)
457 pblk_gc_stop(pblk
, 0);
460 void pblk_gc_sysfs_state_show(struct pblk
*pblk
, int *gc_enabled
,
463 struct pblk_gc
*gc
= &pblk
->gc
;
465 spin_lock(&gc
->lock
);
466 *gc_enabled
= gc
->gc_enabled
;
467 *gc_active
= gc
->gc_active
;
468 spin_unlock(&gc
->lock
);
471 void pblk_gc_sysfs_force(struct pblk
*pblk
, int force
)
473 struct pblk_gc
*gc
= &pblk
->gc
;
476 spin_lock(&gc
->lock
);
481 pblk_rl_set_gc_rsc(&pblk
->rl
, rsv
);
482 gc
->gc_forced
= force
;
483 __pblk_gc_should_start(pblk
);
484 spin_unlock(&gc
->lock
);
487 int pblk_gc_init(struct pblk
*pblk
)
489 struct pblk_gc
*gc
= &pblk
->gc
;
492 gc
->gc_ts
= kthread_create(pblk_gc_ts
, pblk
, "pblk-gc-ts");
493 if (IS_ERR(gc
->gc_ts
)) {
494 pr_err("pblk: could not allocate GC main kthread\n");
495 return PTR_ERR(gc
->gc_ts
);
498 gc
->gc_writer_ts
= kthread_create(pblk_gc_writer_ts
, pblk
,
499 "pblk-gc-writer-ts");
500 if (IS_ERR(gc
->gc_writer_ts
)) {
501 pr_err("pblk: could not allocate GC writer kthread\n");
502 ret
= PTR_ERR(gc
->gc_writer_ts
);
503 goto fail_free_main_kthread
;
506 setup_timer(&gc
->gc_timer
, pblk_gc_timer
, (unsigned long)pblk
);
507 mod_timer(&gc
->gc_timer
, jiffies
+ msecs_to_jiffies(GC_TIME_MSECS
));
512 gc
->gc_jobs_active
= 8;
514 atomic_set(&gc
->inflight_gc
, 0);
516 gc
->gc_reader_wq
= alloc_workqueue("pblk-gc-reader-wq",
517 WQ_MEM_RECLAIM
| WQ_UNBOUND
, gc
->gc_jobs_active
);
518 if (!gc
->gc_reader_wq
) {
519 pr_err("pblk: could not allocate GC reader workqueue\n");
521 goto fail_free_writer_kthread
;
524 spin_lock_init(&gc
->lock
);
525 spin_lock_init(&gc
->w_lock
);
526 INIT_LIST_HEAD(&gc
->w_list
);
530 fail_free_writer_kthread
:
531 kthread_stop(gc
->gc_writer_ts
);
532 fail_free_main_kthread
:
533 kthread_stop(gc
->gc_ts
);
538 void pblk_gc_exit(struct pblk
*pblk
)
540 struct pblk_gc
*gc
= &pblk
->gc
;
542 flush_workqueue(gc
->gc_reader_wq
);
544 del_timer(&gc
->gc_timer
);
545 pblk_gc_stop(pblk
, 1);
548 kthread_stop(gc
->gc_ts
);
550 if (pblk
->gc
.gc_reader_wq
)
551 destroy_workqueue(pblk
->gc
.gc_reader_wq
);
553 if (gc
->gc_writer_ts
)
554 kthread_stop(gc
->gc_writer_ts
);