]>
git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blob - drivers/lightnvm/pblk-init.c
2 * Copyright (C) 2015 IT University of Copenhagen (rrpc.c)
3 * Copyright (C) 2016 CNEX Labs
4 * Initial release: Javier Gonzalez <javier@cnexlabs.com>
5 * Matias Bjorling <matias@cnexlabs.com>
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License version
9 * 2 as published by the Free Software Foundation.
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
16 * Implementation of a physical block-device target for Open-channel SSDs.
18 * pblk-init.c - pblk's initialization.
23 static struct kmem_cache
*pblk_ws_cache
, *pblk_rec_cache
, *pblk_g_rq_cache
,
25 static DECLARE_RWSEM(pblk_lock
);
26 struct bio_set
*pblk_bio_set
;
28 static int pblk_rw_io(struct request_queue
*q
, struct pblk
*pblk
,
33 /* Read requests must be <= 256kb due to NVMe's 64 bit completion bitmap
34 * constraint. Writes can be of arbitrary size.
36 if (bio_data_dir(bio
) == READ
) {
37 blk_queue_split(q
, &bio
);
38 ret
= pblk_submit_read(pblk
, bio
);
39 if (ret
== NVM_IO_DONE
&& bio_flagged(bio
, BIO_CLONED
))
45 /* Prevent deadlock in the case of a modest LUN configuration and large
46 * user I/Os. Unless stalled, the rate limiter leaves at least 256KB
47 * available for user I/O.
49 if (pblk_get_secs(bio
) > pblk_rl_max_io(&pblk
->rl
))
50 blk_queue_split(q
, &bio
);
52 return pblk_write_to_cache(pblk
, bio
, PBLK_IOTYPE_USER
);
55 static blk_qc_t
pblk_make_rq(struct request_queue
*q
, struct bio
*bio
)
57 struct pblk
*pblk
= q
->queuedata
;
59 if (bio_op(bio
) == REQ_OP_DISCARD
) {
60 pblk_discard(pblk
, bio
);
61 if (!(bio
->bi_opf
& REQ_PREFLUSH
)) {
67 switch (pblk_rw_io(q
, pblk
, bio
)) {
79 static size_t pblk_trans_map_size(struct pblk
*pblk
)
83 if (pblk
->addrf_len
< 32)
86 return entry_size
* pblk
->rl
.nr_secs
;
89 #ifdef CONFIG_NVM_DEBUG
90 static u32
pblk_l2p_crc(struct pblk
*pblk
)
95 map_size
= pblk_trans_map_size(pblk
);
96 crc
= crc32_le(crc
, pblk
->trans_map
, map_size
);
101 static void pblk_l2p_free(struct pblk
*pblk
)
103 vfree(pblk
->trans_map
);
106 static int pblk_l2p_recover(struct pblk
*pblk
, bool factory_init
)
108 struct pblk_line
*line
= NULL
;
111 pblk_setup_uuid(pblk
);
113 line
= pblk_recov_l2p(pblk
);
115 pr_err("pblk: could not recover l2p table\n");
120 #ifdef CONFIG_NVM_DEBUG
121 pr_info("pblk init: L2P CRC: %x\n", pblk_l2p_crc(pblk
));
124 /* Free full lines directly as GC has not been started yet */
125 pblk_gc_free_full_lines(pblk
);
128 /* Configure next line for user data */
129 line
= pblk_line_get_first_data(pblk
);
131 pr_err("pblk: line list corrupted\n");
139 static int pblk_l2p_init(struct pblk
*pblk
, bool factory_init
)
145 map_size
= pblk_trans_map_size(pblk
);
146 pblk
->trans_map
= vmalloc(map_size
);
147 if (!pblk
->trans_map
)
150 pblk_ppa_set_empty(&ppa
);
152 for (i
= 0; i
< pblk
->rl
.nr_secs
; i
++)
153 pblk_trans_map_set(pblk
, i
, ppa
);
155 return pblk_l2p_recover(pblk
, factory_init
);
158 static void pblk_rwb_free(struct pblk
*pblk
)
160 if (pblk_rb_tear_down_check(&pblk
->rwb
))
161 pr_err("pblk: write buffer error on tear down\n");
163 pblk_rb_data_free(&pblk
->rwb
);
164 vfree(pblk_rb_entries_ref(&pblk
->rwb
));
167 static int pblk_rwb_init(struct pblk
*pblk
)
169 struct nvm_tgt_dev
*dev
= pblk
->dev
;
170 struct nvm_geo
*geo
= &dev
->geo
;
171 struct pblk_rb_entry
*entries
;
172 unsigned long nr_entries
;
173 unsigned int power_size
, power_seg_sz
;
175 nr_entries
= pblk_rb_calculate_size(pblk
->pgs_in_buffer
);
177 entries
= vzalloc(nr_entries
* sizeof(struct pblk_rb_entry
));
181 power_size
= get_count_order(nr_entries
);
182 power_seg_sz
= get_count_order(geo
->csecs
);
184 return pblk_rb_init(&pblk
->rwb
, entries
, power_size
, power_seg_sz
);
187 /* Minimum pages needed within a lun */
188 #define ADDR_POOL_SIZE 64
190 static int pblk_set_addrf_12(struct nvm_geo
*geo
, struct nvm_addrf_12
*dst
)
192 struct nvm_addrf_12
*src
= (struct nvm_addrf_12
*)&geo
->addrf
;
195 /* Re-calculate channel and lun format to adapt to configuration */
196 power_len
= get_count_order(geo
->num_ch
);
197 if (1 << power_len
!= geo
->num_ch
) {
198 pr_err("pblk: supports only power-of-two channel config.\n");
201 dst
->ch_len
= power_len
;
203 power_len
= get_count_order(geo
->num_lun
);
204 if (1 << power_len
!= geo
->num_lun
) {
205 pr_err("pblk: supports only power-of-two LUN config.\n");
208 dst
->lun_len
= power_len
;
210 dst
->blk_len
= src
->blk_len
;
211 dst
->pg_len
= src
->pg_len
;
212 dst
->pln_len
= src
->pln_len
;
213 dst
->sec_len
= src
->sec_len
;
216 dst
->pln_offset
= dst
->sec_len
;
217 dst
->ch_offset
= dst
->pln_offset
+ dst
->pln_len
;
218 dst
->lun_offset
= dst
->ch_offset
+ dst
->ch_len
;
219 dst
->pg_offset
= dst
->lun_offset
+ dst
->lun_len
;
220 dst
->blk_offset
= dst
->pg_offset
+ dst
->pg_len
;
222 dst
->sec_mask
= ((1ULL << dst
->sec_len
) - 1) << dst
->sec_offset
;
223 dst
->pln_mask
= ((1ULL << dst
->pln_len
) - 1) << dst
->pln_offset
;
224 dst
->ch_mask
= ((1ULL << dst
->ch_len
) - 1) << dst
->ch_offset
;
225 dst
->lun_mask
= ((1ULL << dst
->lun_len
) - 1) << dst
->lun_offset
;
226 dst
->pg_mask
= ((1ULL << dst
->pg_len
) - 1) << dst
->pg_offset
;
227 dst
->blk_mask
= ((1ULL << dst
->blk_len
) - 1) << dst
->blk_offset
;
229 return dst
->blk_offset
+ src
->blk_len
;
232 static int pblk_set_addrf_20(struct nvm_geo
*geo
, struct nvm_addrf
*adst
,
233 struct pblk_addrf
*udst
)
235 struct nvm_addrf
*src
= &geo
->addrf
;
237 adst
->ch_len
= get_count_order(geo
->num_ch
);
238 adst
->lun_len
= get_count_order(geo
->num_lun
);
239 adst
->chk_len
= src
->chk_len
;
240 adst
->sec_len
= src
->sec_len
;
242 adst
->sec_offset
= 0;
243 adst
->ch_offset
= adst
->sec_len
;
244 adst
->lun_offset
= adst
->ch_offset
+ adst
->ch_len
;
245 adst
->chk_offset
= adst
->lun_offset
+ adst
->lun_len
;
247 adst
->sec_mask
= ((1ULL << adst
->sec_len
) - 1) << adst
->sec_offset
;
248 adst
->chk_mask
= ((1ULL << adst
->chk_len
) - 1) << adst
->chk_offset
;
249 adst
->lun_mask
= ((1ULL << adst
->lun_len
) - 1) << adst
->lun_offset
;
250 adst
->ch_mask
= ((1ULL << adst
->ch_len
) - 1) << adst
->ch_offset
;
252 udst
->sec_stripe
= geo
->ws_opt
;
253 udst
->ch_stripe
= geo
->num_ch
;
254 udst
->lun_stripe
= geo
->num_lun
;
256 udst
->sec_lun_stripe
= udst
->sec_stripe
* udst
->ch_stripe
;
257 udst
->sec_ws_stripe
= udst
->sec_lun_stripe
* udst
->lun_stripe
;
259 return adst
->chk_offset
+ adst
->chk_len
;
262 static int pblk_set_addrf(struct pblk
*pblk
)
264 struct nvm_tgt_dev
*dev
= pblk
->dev
;
265 struct nvm_geo
*geo
= &dev
->geo
;
268 switch (geo
->version
) {
269 case NVM_OCSSD_SPEC_12
:
270 div_u64_rem(geo
->clba
, pblk
->min_write_pgs
, &mod
);
272 pr_err("pblk: bad configuration of sectors/pages\n");
276 pblk
->addrf_len
= pblk_set_addrf_12(geo
, (void *)&pblk
->addrf
);
278 case NVM_OCSSD_SPEC_20
:
279 pblk
->addrf_len
= pblk_set_addrf_20(geo
, (void *)&pblk
->addrf
,
283 pr_err("pblk: OCSSD revision not supported (%d)\n",
291 static int pblk_init_global_caches(struct pblk
*pblk
)
293 down_write(&pblk_lock
);
294 pblk_ws_cache
= kmem_cache_create("pblk_blk_ws",
295 sizeof(struct pblk_line_ws
), 0, 0, NULL
);
296 if (!pblk_ws_cache
) {
297 up_write(&pblk_lock
);
301 pblk_rec_cache
= kmem_cache_create("pblk_rec",
302 sizeof(struct pblk_rec_ctx
), 0, 0, NULL
);
303 if (!pblk_rec_cache
) {
304 kmem_cache_destroy(pblk_ws_cache
);
305 up_write(&pblk_lock
);
309 pblk_g_rq_cache
= kmem_cache_create("pblk_g_rq", pblk_g_rq_size
,
311 if (!pblk_g_rq_cache
) {
312 kmem_cache_destroy(pblk_ws_cache
);
313 kmem_cache_destroy(pblk_rec_cache
);
314 up_write(&pblk_lock
);
318 pblk_w_rq_cache
= kmem_cache_create("pblk_w_rq", pblk_w_rq_size
,
320 if (!pblk_w_rq_cache
) {
321 kmem_cache_destroy(pblk_ws_cache
);
322 kmem_cache_destroy(pblk_rec_cache
);
323 kmem_cache_destroy(pblk_g_rq_cache
);
324 up_write(&pblk_lock
);
327 up_write(&pblk_lock
);
332 static void pblk_free_global_caches(struct pblk
*pblk
)
334 kmem_cache_destroy(pblk_ws_cache
);
335 kmem_cache_destroy(pblk_rec_cache
);
336 kmem_cache_destroy(pblk_g_rq_cache
);
337 kmem_cache_destroy(pblk_w_rq_cache
);
340 static int pblk_core_init(struct pblk
*pblk
)
342 struct nvm_tgt_dev
*dev
= pblk
->dev
;
343 struct nvm_geo
*geo
= &dev
->geo
;
346 atomic64_set(&pblk
->user_wa
, 0);
347 atomic64_set(&pblk
->pad_wa
, 0);
348 atomic64_set(&pblk
->gc_wa
, 0);
349 pblk
->user_rst_wa
= 0;
350 pblk
->pad_rst_wa
= 0;
353 atomic64_set(&pblk
->nr_flush
, 0);
354 pblk
->nr_flush_rst
= 0;
356 pblk
->pgs_in_buffer
= geo
->mw_cunits
* geo
->all_luns
;
358 pblk
->min_write_pgs
= geo
->ws_opt
* (geo
->csecs
/ PAGE_SIZE
);
359 max_write_ppas
= pblk
->min_write_pgs
* geo
->all_luns
;
360 pblk
->max_write_pgs
= min_t(int, max_write_ppas
, NVM_MAX_VLBA
);
361 pblk_set_sec_per_write(pblk
, pblk
->min_write_pgs
);
363 if (pblk
->max_write_pgs
> PBLK_MAX_REQ_ADDRS
) {
364 pr_err("pblk: vector list too big(%u > %u)\n",
365 pblk
->max_write_pgs
, PBLK_MAX_REQ_ADDRS
);
369 pblk
->pad_dist
= kzalloc((pblk
->min_write_pgs
- 1) * sizeof(atomic64_t
),
374 if (pblk_init_global_caches(pblk
))
375 goto fail_free_pad_dist
;
377 /* Internal bios can be at most the sectors signaled by the device. */
378 pblk
->page_bio_pool
= mempool_create_page_pool(NVM_MAX_VLBA
, 0);
379 if (!pblk
->page_bio_pool
)
380 goto free_global_caches
;
382 pblk
->gen_ws_pool
= mempool_create_slab_pool(PBLK_GEN_WS_POOL_SIZE
,
384 if (!pblk
->gen_ws_pool
)
385 goto free_page_bio_pool
;
387 pblk
->rec_pool
= mempool_create_slab_pool(geo
->all_luns
,
390 goto free_gen_ws_pool
;
392 pblk
->r_rq_pool
= mempool_create_slab_pool(geo
->all_luns
,
394 if (!pblk
->r_rq_pool
)
397 pblk
->e_rq_pool
= mempool_create_slab_pool(geo
->all_luns
,
399 if (!pblk
->e_rq_pool
)
402 pblk
->w_rq_pool
= mempool_create_slab_pool(geo
->all_luns
,
404 if (!pblk
->w_rq_pool
)
407 pblk
->close_wq
= alloc_workqueue("pblk-close-wq",
408 WQ_MEM_RECLAIM
| WQ_UNBOUND
, PBLK_NR_CLOSE_JOBS
);
412 pblk
->bb_wq
= alloc_workqueue("pblk-bb-wq",
413 WQ_MEM_RECLAIM
| WQ_UNBOUND
, 0);
417 pblk
->r_end_wq
= alloc_workqueue("pblk-read-end-wq",
418 WQ_MEM_RECLAIM
| WQ_UNBOUND
, 0);
422 if (pblk_set_addrf(pblk
))
425 INIT_LIST_HEAD(&pblk
->compl_list
);
430 destroy_workqueue(pblk
->r_end_wq
);
432 destroy_workqueue(pblk
->bb_wq
);
434 destroy_workqueue(pblk
->close_wq
);
436 mempool_destroy(pblk
->w_rq_pool
);
438 mempool_destroy(pblk
->e_rq_pool
);
440 mempool_destroy(pblk
->r_rq_pool
);
442 mempool_destroy(pblk
->rec_pool
);
444 mempool_destroy(pblk
->gen_ws_pool
);
446 mempool_destroy(pblk
->page_bio_pool
);
448 pblk_free_global_caches(pblk
);
450 kfree(pblk
->pad_dist
);
454 static void pblk_core_free(struct pblk
*pblk
)
457 destroy_workqueue(pblk
->close_wq
);
460 destroy_workqueue(pblk
->r_end_wq
);
463 destroy_workqueue(pblk
->bb_wq
);
465 mempool_destroy(pblk
->page_bio_pool
);
466 mempool_destroy(pblk
->gen_ws_pool
);
467 mempool_destroy(pblk
->rec_pool
);
468 mempool_destroy(pblk
->r_rq_pool
);
469 mempool_destroy(pblk
->e_rq_pool
);
470 mempool_destroy(pblk
->w_rq_pool
);
472 pblk_free_global_caches(pblk
);
473 kfree(pblk
->pad_dist
);
476 static void pblk_line_mg_free(struct pblk
*pblk
)
478 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
481 kfree(l_mg
->bb_template
);
483 kfree(l_mg
->vsc_list
);
485 for (i
= 0; i
< PBLK_DATA_LINES
; i
++) {
486 kfree(l_mg
->sline_meta
[i
]);
487 pblk_mfree(l_mg
->eline_meta
[i
]->buf
, l_mg
->emeta_alloc_type
);
488 kfree(l_mg
->eline_meta
[i
]);
492 static void pblk_line_meta_free(struct pblk_line
*line
)
494 kfree(line
->blk_bitmap
);
495 kfree(line
->erase_bitmap
);
499 static void pblk_lines_free(struct pblk
*pblk
)
501 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
502 struct pblk_line
*line
;
505 spin_lock(&l_mg
->free_lock
);
506 for (i
= 0; i
< l_mg
->nr_lines
; i
++) {
507 line
= &pblk
->lines
[i
];
509 pblk_line_free(pblk
, line
);
510 pblk_line_meta_free(line
);
512 spin_unlock(&l_mg
->free_lock
);
514 pblk_line_mg_free(pblk
);
520 static int pblk_bb_get_tbl(struct nvm_tgt_dev
*dev
, struct pblk_lun
*rlun
,
521 u8
*blks
, int nr_blks
)
527 ppa
.g
.ch
= rlun
->bppa
.g
.ch
;
528 ppa
.g
.lun
= rlun
->bppa
.g
.lun
;
530 ret
= nvm_get_tgt_bb_tbl(dev
, ppa
, blks
);
534 nr_blks
= nvm_bb_tbl_fold(dev
->parent
, blks
, nr_blks
);
541 static void *pblk_bb_get_meta(struct pblk
*pblk
)
543 struct nvm_tgt_dev
*dev
= pblk
->dev
;
544 struct nvm_geo
*geo
= &dev
->geo
;
546 int i
, nr_blks
, blk_per_lun
;
549 blk_per_lun
= geo
->num_chk
* geo
->pln_mode
;
550 nr_blks
= blk_per_lun
* geo
->all_luns
;
552 meta
= kmalloc(nr_blks
, GFP_KERNEL
);
554 return ERR_PTR(-ENOMEM
);
556 for (i
= 0; i
< geo
->all_luns
; i
++) {
557 struct pblk_lun
*rlun
= &pblk
->luns
[i
];
558 u8
*meta_pos
= meta
+ i
* blk_per_lun
;
560 ret
= pblk_bb_get_tbl(dev
, rlun
, meta_pos
, blk_per_lun
);
563 return ERR_PTR(-EIO
);
570 static void *pblk_chunk_get_meta(struct pblk
*pblk
)
572 struct nvm_tgt_dev
*dev
= pblk
->dev
;
573 struct nvm_geo
*geo
= &dev
->geo
;
575 if (geo
->version
== NVM_OCSSD_SPEC_12
)
576 return pblk_bb_get_meta(pblk
);
578 return pblk_chunk_get_info(pblk
);
581 static int pblk_luns_init(struct pblk
*pblk
)
583 struct nvm_tgt_dev
*dev
= pblk
->dev
;
584 struct nvm_geo
*geo
= &dev
->geo
;
585 struct pblk_lun
*rlun
;
588 /* TODO: Implement unbalanced LUN support */
589 if (geo
->num_lun
< 0) {
590 pr_err("pblk: unbalanced LUN config.\n");
594 pblk
->luns
= kcalloc(geo
->all_luns
, sizeof(struct pblk_lun
),
599 for (i
= 0; i
< geo
->all_luns
; i
++) {
600 /* Stripe across channels */
601 int ch
= i
% geo
->num_ch
;
602 int lun_raw
= i
/ geo
->num_ch
;
603 int lunid
= lun_raw
+ ch
* geo
->num_lun
;
605 rlun
= &pblk
->luns
[i
];
606 rlun
->bppa
= dev
->luns
[lunid
];
608 sema_init(&rlun
->wr_sem
, 1);
614 /* See comment over struct line_emeta definition */
615 static unsigned int calc_emeta_len(struct pblk
*pblk
)
617 struct pblk_line_meta
*lm
= &pblk
->lm
;
618 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
619 struct nvm_tgt_dev
*dev
= pblk
->dev
;
620 struct nvm_geo
*geo
= &dev
->geo
;
622 /* Round to sector size so that lba_list starts on its own sector */
623 lm
->emeta_sec
[1] = DIV_ROUND_UP(
624 sizeof(struct line_emeta
) + lm
->blk_bitmap_len
+
625 sizeof(struct wa_counters
), geo
->csecs
);
626 lm
->emeta_len
[1] = lm
->emeta_sec
[1] * geo
->csecs
;
628 /* Round to sector size so that vsc_list starts on its own sector */
629 lm
->dsec_per_line
= lm
->sec_per_line
- lm
->emeta_sec
[0];
630 lm
->emeta_sec
[2] = DIV_ROUND_UP(lm
->dsec_per_line
* sizeof(u64
),
632 lm
->emeta_len
[2] = lm
->emeta_sec
[2] * geo
->csecs
;
634 lm
->emeta_sec
[3] = DIV_ROUND_UP(l_mg
->nr_lines
* sizeof(u32
),
636 lm
->emeta_len
[3] = lm
->emeta_sec
[3] * geo
->csecs
;
638 lm
->vsc_list_len
= l_mg
->nr_lines
* sizeof(u32
);
640 return (lm
->emeta_len
[1] + lm
->emeta_len
[2] + lm
->emeta_len
[3]);
643 static void pblk_set_provision(struct pblk
*pblk
, long nr_free_blks
)
645 struct nvm_tgt_dev
*dev
= pblk
->dev
;
646 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
647 struct pblk_line_meta
*lm
= &pblk
->lm
;
648 struct nvm_geo
*geo
= &dev
->geo
;
649 sector_t provisioned
;
650 int sec_meta
, blk_meta
;
652 if (geo
->op
== NVM_TARGET_DEFAULT_OP
)
653 pblk
->op
= PBLK_DEFAULT_OP
;
657 provisioned
= nr_free_blks
;
658 provisioned
*= (100 - pblk
->op
);
659 sector_div(provisioned
, 100);
661 pblk
->op_blks
= nr_free_blks
- provisioned
;
663 /* Internally pblk manages all free blocks, but all calculations based
664 * on user capacity consider only provisioned blocks
666 pblk
->rl
.total_blocks
= nr_free_blks
;
667 pblk
->rl
.nr_secs
= nr_free_blks
* geo
->clba
;
669 /* Consider sectors used for metadata */
670 sec_meta
= (lm
->smeta_sec
+ lm
->emeta_sec
[0]) * l_mg
->nr_free_lines
;
671 blk_meta
= DIV_ROUND_UP(sec_meta
, geo
->clba
);
673 pblk
->capacity
= (provisioned
- blk_meta
) * geo
->clba
;
675 atomic_set(&pblk
->rl
.free_blocks
, nr_free_blks
);
676 atomic_set(&pblk
->rl
.free_user_blocks
, nr_free_blks
);
679 static int pblk_setup_line_meta_12(struct pblk
*pblk
, struct pblk_line
*line
,
682 struct nvm_tgt_dev
*dev
= pblk
->dev
;
683 struct nvm_geo
*geo
= &dev
->geo
;
684 struct pblk_line_meta
*lm
= &pblk
->lm
;
685 int i
, chk_per_lun
, nr_bad_chks
= 0;
687 chk_per_lun
= geo
->num_chk
* geo
->pln_mode
;
689 for (i
= 0; i
< lm
->blk_per_line
; i
++) {
690 struct pblk_lun
*rlun
= &pblk
->luns
[i
];
691 struct nvm_chk_meta
*chunk
;
692 int pos
= pblk_ppa_to_pos(geo
, rlun
->bppa
);
693 u8
*lun_bb_meta
= chunk_meta
+ pos
* chk_per_lun
;
695 chunk
= &line
->chks
[pos
];
698 * In 1.2 spec. chunk state is not persisted by the device. Thus
699 * some of the values are reset each time pblk is instantiated.
701 if (lun_bb_meta
[line
->id
] == NVM_BLK_T_FREE
)
702 chunk
->state
= NVM_CHK_ST_FREE
;
704 chunk
->state
= NVM_CHK_ST_OFFLINE
;
706 chunk
->type
= NVM_CHK_TP_W_SEQ
;
709 chunk
->cnlb
= geo
->clba
;
712 if (!(chunk
->state
& NVM_CHK_ST_OFFLINE
))
715 set_bit(pos
, line
->blk_bitmap
);
722 static int pblk_setup_line_meta_20(struct pblk
*pblk
, struct pblk_line
*line
,
723 struct nvm_chk_meta
*meta
)
725 struct nvm_tgt_dev
*dev
= pblk
->dev
;
726 struct nvm_geo
*geo
= &dev
->geo
;
727 struct pblk_line_meta
*lm
= &pblk
->lm
;
728 int i
, nr_bad_chks
= 0;
730 for (i
= 0; i
< lm
->blk_per_line
; i
++) {
731 struct pblk_lun
*rlun
= &pblk
->luns
[i
];
732 struct nvm_chk_meta
*chunk
;
733 struct nvm_chk_meta
*chunk_meta
;
738 pos
= pblk_ppa_to_pos(geo
, ppa
);
739 chunk
= &line
->chks
[pos
];
741 ppa
.m
.chk
= line
->id
;
742 chunk_meta
= pblk_chunk_get_off(pblk
, meta
, ppa
);
744 chunk
->state
= chunk_meta
->state
;
745 chunk
->type
= chunk_meta
->type
;
746 chunk
->wi
= chunk_meta
->wi
;
747 chunk
->slba
= chunk_meta
->slba
;
748 chunk
->cnlb
= chunk_meta
->cnlb
;
749 chunk
->wp
= chunk_meta
->wp
;
751 if (!(chunk
->state
& NVM_CHK_ST_OFFLINE
))
754 if (chunk
->type
& NVM_CHK_TP_SZ_SPEC
) {
755 WARN_ONCE(1, "pblk: custom-sized chunks unsupported\n");
759 set_bit(pos
, line
->blk_bitmap
);
766 static long pblk_setup_line_meta(struct pblk
*pblk
, struct pblk_line
*line
,
767 void *chunk_meta
, int line_id
)
769 struct nvm_tgt_dev
*dev
= pblk
->dev
;
770 struct nvm_geo
*geo
= &dev
->geo
;
771 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
772 struct pblk_line_meta
*lm
= &pblk
->lm
;
773 long nr_bad_chks
, chk_in_line
;
777 line
->type
= PBLK_LINETYPE_FREE
;
778 line
->state
= PBLK_LINESTATE_NEW
;
779 line
->gc_group
= PBLK_LINEGC_NONE
;
780 line
->vsc
= &l_mg
->vsc_list
[line_id
];
781 spin_lock_init(&line
->lock
);
783 if (geo
->version
== NVM_OCSSD_SPEC_12
)
784 nr_bad_chks
= pblk_setup_line_meta_12(pblk
, line
, chunk_meta
);
786 nr_bad_chks
= pblk_setup_line_meta_20(pblk
, line
, chunk_meta
);
788 chk_in_line
= lm
->blk_per_line
- nr_bad_chks
;
789 if (nr_bad_chks
< 0 || nr_bad_chks
> lm
->blk_per_line
||
790 chk_in_line
< lm
->min_blk_line
) {
791 line
->state
= PBLK_LINESTATE_BAD
;
792 list_add_tail(&line
->list
, &l_mg
->bad_list
);
796 atomic_set(&line
->blk_in_line
, chk_in_line
);
797 list_add_tail(&line
->list
, &l_mg
->free_list
);
798 l_mg
->nr_free_lines
++;
803 static int pblk_alloc_line_meta(struct pblk
*pblk
, struct pblk_line
*line
)
805 struct pblk_line_meta
*lm
= &pblk
->lm
;
807 line
->blk_bitmap
= kzalloc(lm
->blk_bitmap_len
, GFP_KERNEL
);
808 if (!line
->blk_bitmap
)
811 line
->erase_bitmap
= kzalloc(lm
->blk_bitmap_len
, GFP_KERNEL
);
812 if (!line
->erase_bitmap
) {
813 kfree(line
->blk_bitmap
);
817 line
->chks
= kmalloc(lm
->blk_per_line
* sizeof(struct nvm_chk_meta
),
820 kfree(line
->erase_bitmap
);
821 kfree(line
->blk_bitmap
);
828 static int pblk_line_mg_init(struct pblk
*pblk
)
830 struct nvm_tgt_dev
*dev
= pblk
->dev
;
831 struct nvm_geo
*geo
= &dev
->geo
;
832 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
833 struct pblk_line_meta
*lm
= &pblk
->lm
;
836 l_mg
->nr_lines
= geo
->num_chk
;
837 l_mg
->log_line
= l_mg
->data_line
= NULL
;
838 l_mg
->l_seq_nr
= l_mg
->d_seq_nr
= 0;
839 l_mg
->nr_free_lines
= 0;
840 bitmap_zero(&l_mg
->meta_bitmap
, PBLK_DATA_LINES
);
842 INIT_LIST_HEAD(&l_mg
->free_list
);
843 INIT_LIST_HEAD(&l_mg
->corrupt_list
);
844 INIT_LIST_HEAD(&l_mg
->bad_list
);
845 INIT_LIST_HEAD(&l_mg
->gc_full_list
);
846 INIT_LIST_HEAD(&l_mg
->gc_high_list
);
847 INIT_LIST_HEAD(&l_mg
->gc_mid_list
);
848 INIT_LIST_HEAD(&l_mg
->gc_low_list
);
849 INIT_LIST_HEAD(&l_mg
->gc_empty_list
);
851 INIT_LIST_HEAD(&l_mg
->emeta_list
);
853 l_mg
->gc_lists
[0] = &l_mg
->gc_high_list
;
854 l_mg
->gc_lists
[1] = &l_mg
->gc_mid_list
;
855 l_mg
->gc_lists
[2] = &l_mg
->gc_low_list
;
857 spin_lock_init(&l_mg
->free_lock
);
858 spin_lock_init(&l_mg
->close_lock
);
859 spin_lock_init(&l_mg
->gc_lock
);
861 l_mg
->vsc_list
= kcalloc(l_mg
->nr_lines
, sizeof(__le32
), GFP_KERNEL
);
865 l_mg
->bb_template
= kzalloc(lm
->sec_bitmap_len
, GFP_KERNEL
);
866 if (!l_mg
->bb_template
)
867 goto fail_free_vsc_list
;
869 l_mg
->bb_aux
= kzalloc(lm
->sec_bitmap_len
, GFP_KERNEL
);
871 goto fail_free_bb_template
;
873 /* smeta is always small enough to fit on a kmalloc memory allocation,
874 * emeta depends on the number of LUNs allocated to the pblk instance
876 for (i
= 0; i
< PBLK_DATA_LINES
; i
++) {
877 l_mg
->sline_meta
[i
] = kmalloc(lm
->smeta_len
, GFP_KERNEL
);
878 if (!l_mg
->sline_meta
[i
])
879 goto fail_free_smeta
;
882 /* emeta allocates three different buffers for managing metadata with
883 * in-memory and in-media layouts
885 for (i
= 0; i
< PBLK_DATA_LINES
; i
++) {
886 struct pblk_emeta
*emeta
;
888 emeta
= kmalloc(sizeof(struct pblk_emeta
), GFP_KERNEL
);
890 goto fail_free_emeta
;
892 if (lm
->emeta_len
[0] > KMALLOC_MAX_CACHE_SIZE
) {
893 l_mg
->emeta_alloc_type
= PBLK_VMALLOC_META
;
895 emeta
->buf
= vmalloc(lm
->emeta_len
[0]);
898 goto fail_free_emeta
;
901 emeta
->nr_entries
= lm
->emeta_sec
[0];
902 l_mg
->eline_meta
[i
] = emeta
;
904 l_mg
->emeta_alloc_type
= PBLK_KMALLOC_META
;
906 emeta
->buf
= kmalloc(lm
->emeta_len
[0], GFP_KERNEL
);
909 goto fail_free_emeta
;
912 emeta
->nr_entries
= lm
->emeta_sec
[0];
913 l_mg
->eline_meta
[i
] = emeta
;
917 for (i
= 0; i
< l_mg
->nr_lines
; i
++)
918 l_mg
->vsc_list
[i
] = cpu_to_le32(EMPTY_ENTRY
);
920 bb_distance
= (geo
->all_luns
) * geo
->ws_opt
;
921 for (i
= 0; i
< lm
->sec_per_line
; i
+= bb_distance
)
922 bitmap_set(l_mg
->bb_template
, i
, geo
->ws_opt
);
928 if (l_mg
->emeta_alloc_type
== PBLK_VMALLOC_META
)
929 vfree(l_mg
->eline_meta
[i
]->buf
);
931 kfree(l_mg
->eline_meta
[i
]->buf
);
932 kfree(l_mg
->eline_meta
[i
]);
935 for (i
= 0; i
< PBLK_DATA_LINES
; i
++)
936 kfree(l_mg
->sline_meta
[i
]);
938 fail_free_bb_template
:
939 kfree(l_mg
->bb_template
);
941 kfree(l_mg
->vsc_list
);
946 static int pblk_line_meta_init(struct pblk
*pblk
)
948 struct nvm_tgt_dev
*dev
= pblk
->dev
;
949 struct nvm_geo
*geo
= &dev
->geo
;
950 struct pblk_line_meta
*lm
= &pblk
->lm
;
951 unsigned int smeta_len
, emeta_len
;
954 lm
->sec_per_line
= geo
->clba
* geo
->all_luns
;
955 lm
->blk_per_line
= geo
->all_luns
;
956 lm
->blk_bitmap_len
= BITS_TO_LONGS(geo
->all_luns
) * sizeof(long);
957 lm
->sec_bitmap_len
= BITS_TO_LONGS(lm
->sec_per_line
) * sizeof(long);
958 lm
->lun_bitmap_len
= BITS_TO_LONGS(geo
->all_luns
) * sizeof(long);
959 lm
->mid_thrs
= lm
->sec_per_line
/ 2;
960 lm
->high_thrs
= lm
->sec_per_line
/ 4;
961 lm
->meta_distance
= (geo
->all_luns
/ 2) * pblk
->min_write_pgs
;
963 /* Calculate necessary pages for smeta. See comment over struct
964 * line_smeta definition
968 lm
->smeta_sec
= i
* geo
->ws_opt
;
969 lm
->smeta_len
= lm
->smeta_sec
* geo
->csecs
;
971 smeta_len
= sizeof(struct line_smeta
) + lm
->lun_bitmap_len
;
972 if (smeta_len
> lm
->smeta_len
) {
977 /* Calculate necessary pages for emeta. See comment over struct
978 * line_emeta definition
982 lm
->emeta_sec
[0] = i
* geo
->ws_opt
;
983 lm
->emeta_len
[0] = lm
->emeta_sec
[0] * geo
->csecs
;
985 emeta_len
= calc_emeta_len(pblk
);
986 if (emeta_len
> lm
->emeta_len
[0]) {
991 lm
->emeta_bb
= geo
->all_luns
> i
? geo
->all_luns
- i
: 0;
993 lm
->min_blk_line
= 1;
994 if (geo
->all_luns
> 1)
995 lm
->min_blk_line
+= DIV_ROUND_UP(lm
->smeta_sec
+
996 lm
->emeta_sec
[0], geo
->clba
);
998 if (lm
->min_blk_line
> lm
->blk_per_line
) {
999 pr_err("pblk: config. not supported. Min. LUN in line:%d\n",
1007 static int pblk_lines_init(struct pblk
*pblk
)
1009 struct pblk_line_mgmt
*l_mg
= &pblk
->l_mg
;
1010 struct pblk_line
*line
;
1012 long nr_free_chks
= 0;
1015 ret
= pblk_line_meta_init(pblk
);
1019 ret
= pblk_line_mg_init(pblk
);
1023 ret
= pblk_luns_init(pblk
);
1025 goto fail_free_meta
;
1027 chunk_meta
= pblk_chunk_get_meta(pblk
);
1028 if (IS_ERR(chunk_meta
)) {
1029 ret
= PTR_ERR(chunk_meta
);
1030 goto fail_free_luns
;
1033 pblk
->lines
= kcalloc(l_mg
->nr_lines
, sizeof(struct pblk_line
),
1037 goto fail_free_chunk_meta
;
1040 for (i
= 0; i
< l_mg
->nr_lines
; i
++) {
1041 line
= &pblk
->lines
[i
];
1043 ret
= pblk_alloc_line_meta(pblk
, line
);
1045 goto fail_free_lines
;
1047 nr_free_chks
+= pblk_setup_line_meta(pblk
, line
, chunk_meta
, i
);
1050 pblk_set_provision(pblk
, nr_free_chks
);
1057 pblk_line_meta_free(&pblk
->lines
[i
]);
1059 fail_free_chunk_meta
:
1064 pblk_line_mg_free(pblk
);
1069 static int pblk_writer_init(struct pblk
*pblk
)
1071 pblk
->writer_ts
= kthread_create(pblk_write_ts
, pblk
, "pblk-writer-t");
1072 if (IS_ERR(pblk
->writer_ts
)) {
1073 int err
= PTR_ERR(pblk
->writer_ts
);
1076 pr_err("pblk: could not allocate writer kthread (%d)\n",
1081 timer_setup(&pblk
->wtimer
, pblk_write_timer_fn
, 0);
1082 mod_timer(&pblk
->wtimer
, jiffies
+ msecs_to_jiffies(100));
1087 static void pblk_writer_stop(struct pblk
*pblk
)
1089 /* The pipeline must be stopped and the write buffer emptied before the
1090 * write thread is stopped
1092 WARN(pblk_rb_read_count(&pblk
->rwb
),
1093 "Stopping not fully persisted write buffer\n");
1095 WARN(pblk_rb_sync_count(&pblk
->rwb
),
1096 "Stopping not fully synced write buffer\n");
1098 del_timer_sync(&pblk
->wtimer
);
1099 if (pblk
->writer_ts
)
1100 kthread_stop(pblk
->writer_ts
);
1103 static void pblk_free(struct pblk
*pblk
)
1105 pblk_lines_free(pblk
);
1106 pblk_l2p_free(pblk
);
1107 pblk_rwb_free(pblk
);
1108 pblk_core_free(pblk
);
1113 static void pblk_tear_down(struct pblk
*pblk
)
1115 pblk_pipeline_stop(pblk
);
1116 pblk_writer_stop(pblk
);
1117 pblk_rb_sync_l2p(&pblk
->rwb
);
1118 pblk_rl_free(&pblk
->rl
);
1120 pr_debug("pblk: consistent tear down\n");
1123 static void pblk_exit(void *private)
1125 struct pblk
*pblk
= private;
1127 down_write(&pblk_lock
);
1129 pblk_tear_down(pblk
);
1131 #ifdef CONFIG_NVM_DEBUG
1132 pr_info("pblk exit: L2P CRC: %x\n", pblk_l2p_crc(pblk
));
1136 up_write(&pblk_lock
);
1139 static sector_t
pblk_capacity(void *private)
1141 struct pblk
*pblk
= private;
1143 return pblk
->capacity
* NR_PHY_IN_LOG
;
1146 static void *pblk_init(struct nvm_tgt_dev
*dev
, struct gendisk
*tdisk
,
1149 struct nvm_geo
*geo
= &dev
->geo
;
1150 struct request_queue
*bqueue
= dev
->q
;
1151 struct request_queue
*tqueue
= tdisk
->queue
;
1155 /* pblk supports 1.2 and 2.0 versions */
1156 if (!(geo
->version
== NVM_OCSSD_SPEC_12
||
1157 geo
->version
== NVM_OCSSD_SPEC_20
)) {
1158 pr_err("pblk: OCSSD version not supported (%u)\n",
1160 return ERR_PTR(-EINVAL
);
1163 if (geo
->version
== NVM_OCSSD_SPEC_12
&& geo
->dom
& NVM_RSP_L2P
) {
1164 pr_err("pblk: host-side L2P table not supported. (%x)\n",
1166 return ERR_PTR(-EINVAL
);
1169 pblk
= kzalloc(sizeof(struct pblk
), GFP_KERNEL
);
1171 return ERR_PTR(-ENOMEM
);
1175 pblk
->state
= PBLK_STATE_RUNNING
;
1176 pblk
->gc
.gc_enabled
= 0;
1178 spin_lock_init(&pblk
->trans_lock
);
1179 spin_lock_init(&pblk
->lock
);
1181 #ifdef CONFIG_NVM_DEBUG
1182 atomic_long_set(&pblk
->inflight_writes
, 0);
1183 atomic_long_set(&pblk
->padded_writes
, 0);
1184 atomic_long_set(&pblk
->padded_wb
, 0);
1185 atomic_long_set(&pblk
->req_writes
, 0);
1186 atomic_long_set(&pblk
->sub_writes
, 0);
1187 atomic_long_set(&pblk
->sync_writes
, 0);
1188 atomic_long_set(&pblk
->inflight_reads
, 0);
1189 atomic_long_set(&pblk
->cache_reads
, 0);
1190 atomic_long_set(&pblk
->sync_reads
, 0);
1191 atomic_long_set(&pblk
->recov_writes
, 0);
1192 atomic_long_set(&pblk
->recov_writes
, 0);
1193 atomic_long_set(&pblk
->recov_gc_writes
, 0);
1194 atomic_long_set(&pblk
->recov_gc_reads
, 0);
1197 atomic_long_set(&pblk
->read_failed
, 0);
1198 atomic_long_set(&pblk
->read_empty
, 0);
1199 atomic_long_set(&pblk
->read_high_ecc
, 0);
1200 atomic_long_set(&pblk
->read_failed_gc
, 0);
1201 atomic_long_set(&pblk
->write_failed
, 0);
1202 atomic_long_set(&pblk
->erase_failed
, 0);
1204 ret
= pblk_core_init(pblk
);
1206 pr_err("pblk: could not initialize core\n");
1210 ret
= pblk_lines_init(pblk
);
1212 pr_err("pblk: could not initialize lines\n");
1213 goto fail_free_core
;
1216 ret
= pblk_rwb_init(pblk
);
1218 pr_err("pblk: could not initialize write buffer\n");
1219 goto fail_free_lines
;
1222 ret
= pblk_l2p_init(pblk
, flags
& NVM_TARGET_FACTORY
);
1224 pr_err("pblk: could not initialize maps\n");
1228 ret
= pblk_writer_init(pblk
);
1231 pr_err("pblk: could not initialize write thread\n");
1235 ret
= pblk_gc_init(pblk
);
1237 pr_err("pblk: could not initialize gc\n");
1238 goto fail_stop_writer
;
1241 /* inherit the size from the underlying device */
1242 blk_queue_logical_block_size(tqueue
, queue_physical_block_size(bqueue
));
1243 blk_queue_max_hw_sectors(tqueue
, queue_max_hw_sectors(bqueue
));
1245 blk_queue_write_cache(tqueue
, true, false);
1247 tqueue
->limits
.discard_granularity
= geo
->clba
* geo
->csecs
;
1248 tqueue
->limits
.discard_alignment
= 0;
1249 blk_queue_max_discard_sectors(tqueue
, UINT_MAX
>> 9);
1250 blk_queue_flag_set(QUEUE_FLAG_DISCARD
, tqueue
);
1252 pr_info("pblk(%s): luns:%u, lines:%d, secs:%llu, buf entries:%u\n",
1254 geo
->all_luns
, pblk
->l_mg
.nr_lines
,
1255 (unsigned long long)pblk
->rl
.nr_secs
,
1256 pblk
->rwb
.nr_entries
);
1258 wake_up_process(pblk
->writer_ts
);
1260 /* Check if we need to start GC */
1261 pblk_gc_should_kick(pblk
);
1266 pblk_writer_stop(pblk
);
1268 pblk_l2p_free(pblk
);
1270 pblk_rwb_free(pblk
);
1272 pblk_lines_free(pblk
);
1274 pblk_core_free(pblk
);
1277 return ERR_PTR(ret
);
1280 /* physical block device target */
1281 static struct nvm_tgt_type tt_pblk
= {
1283 .version
= {1, 0, 0},
1285 .make_rq
= pblk_make_rq
,
1286 .capacity
= pblk_capacity
,
1291 .sysfs_init
= pblk_sysfs_init
,
1292 .sysfs_exit
= pblk_sysfs_exit
,
1293 .owner
= THIS_MODULE
,
1296 static int __init
pblk_module_init(void)
1300 pblk_bio_set
= bioset_create(BIO_POOL_SIZE
, 0, 0);
1303 ret
= nvm_register_tgt_type(&tt_pblk
);
1305 bioset_free(pblk_bio_set
);
1309 static void pblk_module_exit(void)
1311 bioset_free(pblk_bio_set
);
1312 nvm_unregister_tgt_type(&tt_pblk
);
1315 module_init(pblk_module_init
);
1316 module_exit(pblk_module_exit
);
1317 MODULE_AUTHOR("Javier Gonzalez <javier@cnexlabs.com>");
1318 MODULE_AUTHOR("Matias Bjorling <matias@cnexlabs.com>");
1319 MODULE_LICENSE("GPL v2");
1320 MODULE_DESCRIPTION("Physical Block-Device for Open-Channel SSDs");