]>
Commit | Line | Data |
---|---|---|
a4bd217b JG |
1 | /* |
2 | * Copyright (C) 2015 IT University of Copenhagen (rrpc.c) | |
3 | * Copyright (C) 2016 CNEX Labs | |
4 | * Initial release: Javier Gonzalez <javier@cnexlabs.com> | |
5 | * Matias Bjorling <matias@cnexlabs.com> | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License version | |
9 | * 2 as published by the Free Software Foundation. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, but | |
12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | * General Public License for more details. | |
15 | * | |
16 | * Implementation of a physical block-device target for Open-channel SSDs. | |
17 | * | |
18 | * pblk-init.c - pblk's initialization. | |
19 | */ | |
20 | ||
21 | #include "pblk.h" | |
22 | ||
23 | static struct kmem_cache *pblk_blk_ws_cache, *pblk_rec_cache, *pblk_r_rq_cache, | |
24 | *pblk_w_rq_cache, *pblk_line_meta_cache; | |
25 | static DECLARE_RWSEM(pblk_lock); | |
26 | ||
27 | static int pblk_rw_io(struct request_queue *q, struct pblk *pblk, | |
28 | struct bio *bio) | |
29 | { | |
30 | int ret; | |
31 | ||
32 | /* Read requests must be <= 256kb due to NVMe's 64 bit completion bitmap | |
33 | * constraint. Writes can be of arbitrary size. | |
34 | */ | |
35 | if (bio_data_dir(bio) == READ) { | |
36 | blk_queue_split(q, &bio, q->bio_split); | |
37 | ret = pblk_submit_read(pblk, bio); | |
38 | if (ret == NVM_IO_DONE && bio_flagged(bio, BIO_CLONED)) | |
39 | bio_put(bio); | |
40 | ||
41 | return ret; | |
42 | } | |
43 | ||
44 | /* Prevent deadlock in the case of a modest LUN configuration and large | |
45 | * user I/Os. Unless stalled, the rate limiter leaves at least 256KB | |
46 | * available for user I/O. | |
47 | */ | |
48 | if (unlikely(pblk_get_secs(bio) >= pblk_rl_sysfs_rate_show(&pblk->rl))) | |
49 | blk_queue_split(q, &bio, q->bio_split); | |
50 | ||
51 | return pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER); | |
52 | } | |
53 | ||
54 | static blk_qc_t pblk_make_rq(struct request_queue *q, struct bio *bio) | |
55 | { | |
56 | struct pblk *pblk = q->queuedata; | |
57 | ||
58 | if (bio_op(bio) == REQ_OP_DISCARD) { | |
59 | pblk_discard(pblk, bio); | |
60 | if (!(bio->bi_opf & REQ_PREFLUSH)) { | |
61 | bio_endio(bio); | |
62 | return BLK_QC_T_NONE; | |
63 | } | |
64 | } | |
65 | ||
66 | switch (pblk_rw_io(q, pblk, bio)) { | |
67 | case NVM_IO_ERR: | |
68 | bio_io_error(bio); | |
69 | break; | |
70 | case NVM_IO_DONE: | |
71 | bio_endio(bio); | |
72 | break; | |
73 | } | |
74 | ||
75 | return BLK_QC_T_NONE; | |
76 | } | |
77 | ||
78 | static void pblk_l2p_free(struct pblk *pblk) | |
79 | { | |
80 | vfree(pblk->trans_map); | |
81 | } | |
82 | ||
83 | static int pblk_l2p_init(struct pblk *pblk) | |
84 | { | |
85 | sector_t i; | |
86 | struct ppa_addr ppa; | |
87 | int entry_size = 8; | |
88 | ||
89 | if (pblk->ppaf_bitsize < 32) | |
90 | entry_size = 4; | |
91 | ||
92 | pblk->trans_map = vmalloc(entry_size * pblk->rl.nr_secs); | |
93 | if (!pblk->trans_map) | |
94 | return -ENOMEM; | |
95 | ||
96 | pblk_ppa_set_empty(&ppa); | |
97 | ||
98 | for (i = 0; i < pblk->rl.nr_secs; i++) | |
99 | pblk_trans_map_set(pblk, i, ppa); | |
100 | ||
101 | return 0; | |
102 | } | |
103 | ||
104 | static void pblk_rwb_free(struct pblk *pblk) | |
105 | { | |
106 | if (pblk_rb_tear_down_check(&pblk->rwb)) | |
107 | pr_err("pblk: write buffer error on tear down\n"); | |
108 | ||
109 | pblk_rb_data_free(&pblk->rwb); | |
110 | vfree(pblk_rb_entries_ref(&pblk->rwb)); | |
111 | } | |
112 | ||
113 | static int pblk_rwb_init(struct pblk *pblk) | |
114 | { | |
115 | struct nvm_tgt_dev *dev = pblk->dev; | |
116 | struct nvm_geo *geo = &dev->geo; | |
117 | struct pblk_rb_entry *entries; | |
118 | unsigned long nr_entries; | |
119 | unsigned int power_size, power_seg_sz; | |
120 | ||
121 | nr_entries = pblk_rb_calculate_size(pblk->pgs_in_buffer); | |
122 | ||
123 | entries = vzalloc(nr_entries * sizeof(struct pblk_rb_entry)); | |
124 | if (!entries) | |
125 | return -ENOMEM; | |
126 | ||
127 | power_size = get_count_order(nr_entries); | |
128 | power_seg_sz = get_count_order(geo->sec_size); | |
129 | ||
130 | return pblk_rb_init(&pblk->rwb, entries, power_size, power_seg_sz); | |
131 | } | |
132 | ||
133 | /* Minimum pages needed within a lun */ | |
134 | #define PAGE_POOL_SIZE 16 | |
135 | #define ADDR_POOL_SIZE 64 | |
136 | ||
137 | static int pblk_set_ppaf(struct pblk *pblk) | |
138 | { | |
139 | struct nvm_tgt_dev *dev = pblk->dev; | |
140 | struct nvm_geo *geo = &dev->geo; | |
141 | struct nvm_addr_format ppaf = geo->ppaf; | |
142 | int power_len; | |
143 | ||
144 | /* Re-calculate channel and lun format to adapt to configuration */ | |
145 | power_len = get_count_order(geo->nr_chnls); | |
146 | if (1 << power_len != geo->nr_chnls) { | |
147 | pr_err("pblk: supports only power-of-two channel config.\n"); | |
148 | return -EINVAL; | |
149 | } | |
150 | ppaf.ch_len = power_len; | |
151 | ||
152 | power_len = get_count_order(geo->luns_per_chnl); | |
153 | if (1 << power_len != geo->luns_per_chnl) { | |
154 | pr_err("pblk: supports only power-of-two LUN config.\n"); | |
155 | return -EINVAL; | |
156 | } | |
157 | ppaf.lun_len = power_len; | |
158 | ||
159 | pblk->ppaf.sec_offset = 0; | |
160 | pblk->ppaf.pln_offset = ppaf.sect_len; | |
161 | pblk->ppaf.ch_offset = pblk->ppaf.pln_offset + ppaf.pln_len; | |
162 | pblk->ppaf.lun_offset = pblk->ppaf.ch_offset + ppaf.ch_len; | |
163 | pblk->ppaf.pg_offset = pblk->ppaf.lun_offset + ppaf.lun_len; | |
164 | pblk->ppaf.blk_offset = pblk->ppaf.pg_offset + ppaf.pg_len; | |
165 | pblk->ppaf.sec_mask = (1ULL << ppaf.sect_len) - 1; | |
166 | pblk->ppaf.pln_mask = ((1ULL << ppaf.pln_len) - 1) << | |
167 | pblk->ppaf.pln_offset; | |
168 | pblk->ppaf.ch_mask = ((1ULL << ppaf.ch_len) - 1) << | |
169 | pblk->ppaf.ch_offset; | |
170 | pblk->ppaf.lun_mask = ((1ULL << ppaf.lun_len) - 1) << | |
171 | pblk->ppaf.lun_offset; | |
172 | pblk->ppaf.pg_mask = ((1ULL << ppaf.pg_len) - 1) << | |
173 | pblk->ppaf.pg_offset; | |
174 | pblk->ppaf.blk_mask = ((1ULL << ppaf.blk_len) - 1) << | |
175 | pblk->ppaf.blk_offset; | |
176 | ||
177 | pblk->ppaf_bitsize = pblk->ppaf.blk_offset + ppaf.blk_len; | |
178 | ||
179 | return 0; | |
180 | } | |
181 | ||
182 | static int pblk_init_global_caches(struct pblk *pblk) | |
183 | { | |
184 | char cache_name[PBLK_CACHE_NAME_LEN]; | |
185 | ||
186 | down_write(&pblk_lock); | |
187 | pblk_blk_ws_cache = kmem_cache_create("pblk_blk_ws", | |
188 | sizeof(struct pblk_line_ws), 0, 0, NULL); | |
189 | if (!pblk_blk_ws_cache) { | |
190 | up_write(&pblk_lock); | |
191 | return -ENOMEM; | |
192 | } | |
193 | ||
194 | pblk_rec_cache = kmem_cache_create("pblk_rec", | |
195 | sizeof(struct pblk_rec_ctx), 0, 0, NULL); | |
196 | if (!pblk_rec_cache) { | |
197 | kmem_cache_destroy(pblk_blk_ws_cache); | |
198 | up_write(&pblk_lock); | |
199 | return -ENOMEM; | |
200 | } | |
201 | ||
202 | pblk_r_rq_cache = kmem_cache_create("pblk_r_rq", pblk_r_rq_size, | |
203 | 0, 0, NULL); | |
204 | if (!pblk_r_rq_cache) { | |
205 | kmem_cache_destroy(pblk_blk_ws_cache); | |
206 | kmem_cache_destroy(pblk_rec_cache); | |
207 | up_write(&pblk_lock); | |
208 | return -ENOMEM; | |
209 | } | |
210 | ||
211 | pblk_w_rq_cache = kmem_cache_create("pblk_w_rq", pblk_w_rq_size, | |
212 | 0, 0, NULL); | |
213 | if (!pblk_w_rq_cache) { | |
214 | kmem_cache_destroy(pblk_blk_ws_cache); | |
215 | kmem_cache_destroy(pblk_rec_cache); | |
216 | kmem_cache_destroy(pblk_r_rq_cache); | |
217 | up_write(&pblk_lock); | |
218 | return -ENOMEM; | |
219 | } | |
220 | ||
221 | snprintf(cache_name, sizeof(cache_name), "pblk_line_m_%s", | |
222 | pblk->disk->disk_name); | |
223 | pblk_line_meta_cache = kmem_cache_create(cache_name, | |
224 | pblk->lm.sec_bitmap_len, 0, 0, NULL); | |
225 | if (!pblk_line_meta_cache) { | |
226 | kmem_cache_destroy(pblk_blk_ws_cache); | |
227 | kmem_cache_destroy(pblk_rec_cache); | |
228 | kmem_cache_destroy(pblk_r_rq_cache); | |
229 | kmem_cache_destroy(pblk_w_rq_cache); | |
230 | up_write(&pblk_lock); | |
231 | return -ENOMEM; | |
232 | } | |
233 | up_write(&pblk_lock); | |
234 | ||
235 | return 0; | |
236 | } | |
237 | ||
238 | static int pblk_core_init(struct pblk *pblk) | |
239 | { | |
240 | struct nvm_tgt_dev *dev = pblk->dev; | |
241 | struct nvm_geo *geo = &dev->geo; | |
242 | int max_write_ppas; | |
243 | int mod; | |
244 | ||
245 | pblk->min_write_pgs = geo->sec_per_pl * (geo->sec_size / PAGE_SIZE); | |
246 | max_write_ppas = pblk->min_write_pgs * geo->nr_luns; | |
247 | pblk->max_write_pgs = (max_write_ppas < nvm_max_phys_sects(dev)) ? | |
248 | max_write_ppas : nvm_max_phys_sects(dev); | |
249 | pblk->pgs_in_buffer = NVM_MEM_PAGE_WRITE * geo->sec_per_pg * | |
250 | geo->nr_planes * geo->nr_luns; | |
251 | ||
252 | if (pblk->max_write_pgs > PBLK_MAX_REQ_ADDRS) { | |
253 | pr_err("pblk: cannot support device max_phys_sect\n"); | |
254 | return -EINVAL; | |
255 | } | |
256 | ||
257 | div_u64_rem(geo->sec_per_blk, pblk->min_write_pgs, &mod); | |
258 | if (mod) { | |
259 | pr_err("pblk: bad configuration of sectors/pages\n"); | |
260 | return -EINVAL; | |
261 | } | |
262 | ||
263 | if (pblk_init_global_caches(pblk)) | |
264 | return -ENOMEM; | |
265 | ||
266 | pblk->page_pool = mempool_create_page_pool(PAGE_POOL_SIZE, 0); | |
267 | if (!pblk->page_pool) | |
268 | return -ENOMEM; | |
269 | ||
270 | pblk->line_ws_pool = mempool_create_slab_pool(geo->nr_luns, | |
271 | pblk_blk_ws_cache); | |
272 | if (!pblk->line_ws_pool) | |
273 | goto free_page_pool; | |
274 | ||
275 | pblk->rec_pool = mempool_create_slab_pool(geo->nr_luns, pblk_rec_cache); | |
276 | if (!pblk->rec_pool) | |
277 | goto free_blk_ws_pool; | |
278 | ||
279 | pblk->r_rq_pool = mempool_create_slab_pool(64, pblk_r_rq_cache); | |
280 | if (!pblk->r_rq_pool) | |
281 | goto free_rec_pool; | |
282 | ||
283 | pblk->w_rq_pool = mempool_create_slab_pool(64, pblk_w_rq_cache); | |
284 | if (!pblk->w_rq_pool) | |
285 | goto free_r_rq_pool; | |
286 | ||
287 | pblk->line_meta_pool = | |
288 | mempool_create_slab_pool(16, pblk_line_meta_cache); | |
289 | if (!pblk->line_meta_pool) | |
290 | goto free_w_rq_pool; | |
291 | ||
292 | pblk->kw_wq = alloc_workqueue("pblk-aux-wq", | |
293 | WQ_MEM_RECLAIM | WQ_UNBOUND, 1); | |
294 | if (!pblk->kw_wq) | |
295 | goto free_line_meta_pool; | |
296 | ||
297 | if (pblk_set_ppaf(pblk)) | |
298 | goto free_kw_wq; | |
299 | ||
300 | if (pblk_rwb_init(pblk)) | |
301 | goto free_kw_wq; | |
302 | ||
303 | INIT_LIST_HEAD(&pblk->compl_list); | |
304 | return 0; | |
305 | ||
306 | free_kw_wq: | |
307 | destroy_workqueue(pblk->kw_wq); | |
308 | free_line_meta_pool: | |
309 | mempool_destroy(pblk->line_meta_pool); | |
310 | free_w_rq_pool: | |
311 | mempool_destroy(pblk->w_rq_pool); | |
312 | free_r_rq_pool: | |
313 | mempool_destroy(pblk->r_rq_pool); | |
314 | free_rec_pool: | |
315 | mempool_destroy(pblk->rec_pool); | |
316 | free_blk_ws_pool: | |
317 | mempool_destroy(pblk->line_ws_pool); | |
318 | free_page_pool: | |
319 | mempool_destroy(pblk->page_pool); | |
320 | return -ENOMEM; | |
321 | } | |
322 | ||
323 | static void pblk_core_free(struct pblk *pblk) | |
324 | { | |
325 | if (pblk->kw_wq) | |
326 | destroy_workqueue(pblk->kw_wq); | |
327 | ||
328 | mempool_destroy(pblk->page_pool); | |
329 | mempool_destroy(pblk->line_ws_pool); | |
330 | mempool_destroy(pblk->rec_pool); | |
331 | mempool_destroy(pblk->r_rq_pool); | |
332 | mempool_destroy(pblk->w_rq_pool); | |
333 | mempool_destroy(pblk->line_meta_pool); | |
334 | ||
335 | kmem_cache_destroy(pblk_blk_ws_cache); | |
336 | kmem_cache_destroy(pblk_rec_cache); | |
337 | kmem_cache_destroy(pblk_r_rq_cache); | |
338 | kmem_cache_destroy(pblk_w_rq_cache); | |
339 | kmem_cache_destroy(pblk_line_meta_cache); | |
340 | } | |
341 | ||
342 | static void pblk_luns_free(struct pblk *pblk) | |
343 | { | |
344 | kfree(pblk->luns); | |
345 | } | |
346 | ||
347 | static void pblk_lines_free(struct pblk *pblk) | |
348 | { | |
349 | struct pblk_line_mgmt *l_mg = &pblk->l_mg; | |
350 | struct pblk_line *line; | |
351 | int i; | |
352 | ||
353 | spin_lock(&l_mg->free_lock); | |
354 | for (i = 0; i < l_mg->nr_lines; i++) { | |
355 | line = &pblk->lines[i]; | |
356 | ||
357 | pblk_line_free(pblk, line); | |
358 | kfree(line->blk_bitmap); | |
359 | kfree(line->erase_bitmap); | |
360 | } | |
361 | spin_unlock(&l_mg->free_lock); | |
362 | } | |
363 | ||
364 | static void pblk_line_meta_free(struct pblk *pblk) | |
365 | { | |
366 | struct pblk_line_mgmt *l_mg = &pblk->l_mg; | |
367 | int i; | |
368 | ||
369 | kfree(l_mg->bb_template); | |
370 | kfree(l_mg->bb_aux); | |
371 | ||
372 | for (i = 0; i < PBLK_DATA_LINES; i++) { | |
373 | pblk_mfree(l_mg->sline_meta[i].meta, l_mg->smeta_alloc_type); | |
374 | pblk_mfree(l_mg->eline_meta[i].meta, l_mg->emeta_alloc_type); | |
375 | } | |
376 | ||
377 | kfree(pblk->lines); | |
378 | } | |
379 | ||
380 | static int pblk_bb_discovery(struct nvm_tgt_dev *dev, struct pblk_lun *rlun) | |
381 | { | |
382 | struct nvm_geo *geo = &dev->geo; | |
383 | struct ppa_addr ppa; | |
384 | u8 *blks; | |
385 | int nr_blks, ret; | |
386 | ||
387 | nr_blks = geo->blks_per_lun * geo->plane_mode; | |
388 | blks = kmalloc(nr_blks, GFP_KERNEL); | |
389 | if (!blks) | |
390 | return -ENOMEM; | |
391 | ||
392 | ppa.ppa = 0; | |
393 | ppa.g.ch = rlun->bppa.g.ch; | |
394 | ppa.g.lun = rlun->bppa.g.lun; | |
395 | ||
396 | ret = nvm_get_tgt_bb_tbl(dev, ppa, blks); | |
397 | if (ret) | |
398 | goto out; | |
399 | ||
400 | nr_blks = nvm_bb_tbl_fold(dev->parent, blks, nr_blks); | |
401 | if (nr_blks < 0) { | |
402 | kfree(blks); | |
403 | ret = nr_blks; | |
404 | } | |
405 | ||
406 | rlun->bb_list = blks; | |
407 | ||
408 | out: | |
409 | return ret; | |
410 | } | |
411 | ||
412 | static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line) | |
413 | { | |
414 | struct pblk_line_meta *lm = &pblk->lm; | |
415 | struct pblk_lun *rlun; | |
416 | int bb_cnt = 0; | |
417 | int i; | |
418 | ||
419 | line->blk_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL); | |
420 | if (!line->blk_bitmap) | |
421 | return -ENOMEM; | |
422 | ||
423 | line->erase_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL); | |
424 | if (!line->erase_bitmap) { | |
425 | kfree(line->blk_bitmap); | |
426 | return -ENOMEM; | |
427 | } | |
428 | ||
429 | for (i = 0; i < lm->blk_per_line; i++) { | |
430 | rlun = &pblk->luns[i]; | |
431 | if (rlun->bb_list[line->id] == NVM_BLK_T_FREE) | |
432 | continue; | |
433 | ||
434 | set_bit(i, line->blk_bitmap); | |
435 | bb_cnt++; | |
436 | } | |
437 | ||
438 | return bb_cnt; | |
439 | } | |
440 | ||
441 | static int pblk_luns_init(struct pblk *pblk, struct ppa_addr *luns) | |
442 | { | |
443 | struct nvm_tgt_dev *dev = pblk->dev; | |
444 | struct nvm_geo *geo = &dev->geo; | |
445 | struct pblk_lun *rlun; | |
446 | int i, ret; | |
447 | ||
448 | /* TODO: Implement unbalanced LUN support */ | |
449 | if (geo->luns_per_chnl < 0) { | |
450 | pr_err("pblk: unbalanced LUN config.\n"); | |
451 | return -EINVAL; | |
452 | } | |
453 | ||
454 | pblk->luns = kcalloc(geo->nr_luns, sizeof(struct pblk_lun), GFP_KERNEL); | |
455 | if (!pblk->luns) | |
456 | return -ENOMEM; | |
457 | ||
458 | for (i = 0; i < geo->nr_luns; i++) { | |
459 | /* Stripe across channels */ | |
460 | int ch = i % geo->nr_chnls; | |
461 | int lun_raw = i / geo->nr_chnls; | |
462 | int lunid = lun_raw + ch * geo->luns_per_chnl; | |
463 | ||
464 | rlun = &pblk->luns[i]; | |
465 | rlun->bppa = luns[lunid]; | |
466 | ||
467 | sema_init(&rlun->wr_sem, 1); | |
468 | ||
469 | ret = pblk_bb_discovery(dev, rlun); | |
470 | if (ret) { | |
471 | while (--i >= 0) | |
472 | kfree(pblk->luns[i].bb_list); | |
473 | return ret; | |
474 | } | |
475 | } | |
476 | ||
477 | return 0; | |
478 | } | |
479 | ||
480 | static int pblk_lines_configure(struct pblk *pblk, int flags) | |
481 | { | |
482 | struct pblk_line *line = NULL; | |
483 | int ret = 0; | |
484 | ||
485 | if (!(flags & NVM_TARGET_FACTORY)) { | |
486 | line = pblk_recov_l2p(pblk); | |
487 | if (IS_ERR(line)) { | |
488 | pr_err("pblk: could not recover l2p table\n"); | |
489 | ret = -EFAULT; | |
490 | } | |
491 | } | |
492 | ||
493 | if (!line) { | |
494 | /* Configure next line for user data */ | |
495 | line = pblk_line_get_first_data(pblk); | |
496 | if (!line) { | |
497 | pr_err("pblk: line list corrupted\n"); | |
498 | ret = -EFAULT; | |
499 | } | |
500 | } | |
501 | ||
502 | return ret; | |
503 | } | |
504 | ||
505 | /* See comment over struct line_emeta definition */ | |
506 | static unsigned int calc_emeta_len(struct pblk *pblk, struct pblk_line_meta *lm) | |
507 | { | |
508 | return (sizeof(struct line_emeta) + | |
509 | ((lm->sec_per_line - lm->emeta_sec) * sizeof(u64)) + | |
510 | (pblk->l_mg.nr_lines * sizeof(u32)) + | |
511 | lm->blk_bitmap_len); | |
512 | } | |
513 | ||
514 | static void pblk_set_provision(struct pblk *pblk, long nr_free_blks) | |
515 | { | |
516 | struct nvm_tgt_dev *dev = pblk->dev; | |
517 | struct nvm_geo *geo = &dev->geo; | |
518 | sector_t provisioned; | |
519 | ||
520 | pblk->over_pct = 20; | |
521 | ||
522 | provisioned = nr_free_blks; | |
523 | provisioned *= (100 - pblk->over_pct); | |
524 | sector_div(provisioned, 100); | |
525 | ||
526 | /* Internally pblk manages all free blocks, but all calculations based | |
527 | * on user capacity consider only provisioned blocks | |
528 | */ | |
529 | pblk->rl.total_blocks = nr_free_blks; | |
530 | pblk->rl.nr_secs = nr_free_blks * geo->sec_per_blk; | |
531 | pblk->capacity = provisioned * geo->sec_per_blk; | |
532 | atomic_set(&pblk->rl.free_blocks, nr_free_blks); | |
533 | } | |
534 | ||
535 | static int pblk_lines_init(struct pblk *pblk) | |
536 | { | |
537 | struct nvm_tgt_dev *dev = pblk->dev; | |
538 | struct nvm_geo *geo = &dev->geo; | |
539 | struct pblk_line_mgmt *l_mg = &pblk->l_mg; | |
540 | struct pblk_line_meta *lm = &pblk->lm; | |
541 | struct pblk_line *line; | |
542 | unsigned int smeta_len, emeta_len; | |
543 | long nr_bad_blks, nr_meta_blks, nr_free_blks; | |
544 | int bb_distance; | |
545 | int i; | |
546 | int ret = 0; | |
547 | ||
548 | lm->sec_per_line = geo->sec_per_blk * geo->nr_luns; | |
549 | lm->blk_per_line = geo->nr_luns; | |
550 | lm->blk_bitmap_len = BITS_TO_LONGS(geo->nr_luns) * sizeof(long); | |
551 | lm->sec_bitmap_len = BITS_TO_LONGS(lm->sec_per_line) * sizeof(long); | |
552 | lm->lun_bitmap_len = BITS_TO_LONGS(geo->nr_luns) * sizeof(long); | |
553 | lm->high_thrs = lm->sec_per_line / 2; | |
554 | lm->mid_thrs = lm->sec_per_line / 4; | |
555 | ||
556 | /* Calculate necessary pages for smeta. See comment over struct | |
557 | * line_smeta definition | |
558 | */ | |
559 | lm->smeta_len = sizeof(struct line_smeta) + | |
560 | PBLK_LINE_NR_LUN_BITMAP * lm->lun_bitmap_len; | |
561 | ||
562 | i = 1; | |
563 | add_smeta_page: | |
564 | lm->smeta_sec = i * geo->sec_per_pl; | |
565 | lm->smeta_len = lm->smeta_sec * geo->sec_size; | |
566 | ||
567 | smeta_len = sizeof(struct line_smeta) + | |
568 | PBLK_LINE_NR_LUN_BITMAP * lm->lun_bitmap_len; | |
569 | if (smeta_len > lm->smeta_len) { | |
570 | i++; | |
571 | goto add_smeta_page; | |
572 | } | |
573 | ||
574 | /* Calculate necessary pages for emeta. See comment over struct | |
575 | * line_emeta definition | |
576 | */ | |
577 | i = 1; | |
578 | add_emeta_page: | |
579 | lm->emeta_sec = i * geo->sec_per_pl; | |
580 | lm->emeta_len = lm->emeta_sec * geo->sec_size; | |
581 | ||
582 | emeta_len = calc_emeta_len(pblk, lm); | |
583 | if (emeta_len > lm->emeta_len) { | |
584 | i++; | |
585 | goto add_emeta_page; | |
586 | } | |
587 | lm->emeta_bb = geo->nr_luns - i; | |
588 | ||
589 | nr_meta_blks = (lm->smeta_sec + lm->emeta_sec + | |
590 | (geo->sec_per_blk / 2)) / geo->sec_per_blk; | |
591 | lm->min_blk_line = nr_meta_blks + 1; | |
592 | ||
593 | l_mg->nr_lines = geo->blks_per_lun; | |
594 | l_mg->log_line = l_mg->data_line = NULL; | |
595 | l_mg->l_seq_nr = l_mg->d_seq_nr = 0; | |
596 | l_mg->nr_free_lines = 0; | |
597 | bitmap_zero(&l_mg->meta_bitmap, PBLK_DATA_LINES); | |
598 | ||
599 | /* smeta is always small enough to fit on a kmalloc memory allocation, | |
600 | * emeta depends on the number of LUNs allocated to the pblk instance | |
601 | */ | |
602 | l_mg->smeta_alloc_type = PBLK_KMALLOC_META; | |
603 | for (i = 0; i < PBLK_DATA_LINES; i++) { | |
604 | l_mg->sline_meta[i].meta = kmalloc(lm->smeta_len, GFP_KERNEL); | |
605 | if (!l_mg->sline_meta[i].meta) | |
606 | while (--i >= 0) { | |
607 | kfree(l_mg->sline_meta[i].meta); | |
608 | ret = -ENOMEM; | |
609 | goto fail; | |
610 | } | |
611 | } | |
612 | ||
613 | if (lm->emeta_len > KMALLOC_MAX_CACHE_SIZE) { | |
614 | l_mg->emeta_alloc_type = PBLK_VMALLOC_META; | |
615 | ||
616 | for (i = 0; i < PBLK_DATA_LINES; i++) { | |
617 | l_mg->eline_meta[i].meta = vmalloc(lm->emeta_len); | |
618 | if (!l_mg->eline_meta[i].meta) | |
619 | while (--i >= 0) { | |
620 | vfree(l_mg->eline_meta[i].meta); | |
621 | ret = -ENOMEM; | |
622 | goto fail; | |
623 | } | |
624 | } | |
625 | } else { | |
626 | l_mg->emeta_alloc_type = PBLK_KMALLOC_META; | |
627 | ||
628 | for (i = 0; i < PBLK_DATA_LINES; i++) { | |
629 | l_mg->eline_meta[i].meta = | |
630 | kmalloc(lm->emeta_len, GFP_KERNEL); | |
631 | if (!l_mg->eline_meta[i].meta) | |
632 | while (--i >= 0) { | |
633 | kfree(l_mg->eline_meta[i].meta); | |
634 | ret = -ENOMEM; | |
635 | goto fail; | |
636 | } | |
637 | } | |
638 | } | |
639 | ||
640 | l_mg->bb_template = kzalloc(lm->sec_bitmap_len, GFP_KERNEL); | |
641 | if (!l_mg->bb_template) | |
642 | goto fail_free_meta; | |
643 | ||
644 | l_mg->bb_aux = kzalloc(lm->sec_bitmap_len, GFP_KERNEL); | |
645 | if (!l_mg->bb_aux) | |
646 | goto fail_free_bb_template; | |
647 | ||
648 | bb_distance = (geo->nr_luns) * geo->sec_per_pl; | |
649 | for (i = 0; i < lm->sec_per_line; i += bb_distance) | |
650 | bitmap_set(l_mg->bb_template, i, geo->sec_per_pl); | |
651 | ||
652 | INIT_LIST_HEAD(&l_mg->free_list); | |
653 | INIT_LIST_HEAD(&l_mg->corrupt_list); | |
654 | INIT_LIST_HEAD(&l_mg->bad_list); | |
655 | INIT_LIST_HEAD(&l_mg->gc_full_list); | |
656 | INIT_LIST_HEAD(&l_mg->gc_high_list); | |
657 | INIT_LIST_HEAD(&l_mg->gc_mid_list); | |
658 | INIT_LIST_HEAD(&l_mg->gc_low_list); | |
659 | INIT_LIST_HEAD(&l_mg->gc_empty_list); | |
660 | ||
661 | l_mg->gc_lists[0] = &l_mg->gc_high_list; | |
662 | l_mg->gc_lists[1] = &l_mg->gc_mid_list; | |
663 | l_mg->gc_lists[2] = &l_mg->gc_low_list; | |
664 | ||
665 | spin_lock_init(&l_mg->free_lock); | |
666 | spin_lock_init(&l_mg->gc_lock); | |
667 | ||
668 | pblk->lines = kcalloc(l_mg->nr_lines, sizeof(struct pblk_line), | |
669 | GFP_KERNEL); | |
670 | if (!pblk->lines) | |
671 | goto fail_free_bb_aux; | |
672 | ||
673 | nr_free_blks = 0; | |
674 | for (i = 0; i < l_mg->nr_lines; i++) { | |
675 | line = &pblk->lines[i]; | |
676 | ||
677 | line->pblk = pblk; | |
678 | line->id = i; | |
679 | line->type = PBLK_LINETYPE_FREE; | |
680 | line->state = PBLK_LINESTATE_FREE; | |
681 | line->gc_group = PBLK_LINEGC_NONE; | |
682 | spin_lock_init(&line->lock); | |
683 | ||
684 | nr_bad_blks = pblk_bb_line(pblk, line); | |
685 | if (nr_bad_blks < 0 || nr_bad_blks > lm->blk_per_line) | |
686 | goto fail_free_lines; | |
687 | ||
688 | line->blk_in_line = lm->blk_per_line - nr_bad_blks; | |
689 | if (line->blk_in_line < lm->min_blk_line) { | |
690 | line->state = PBLK_LINESTATE_BAD; | |
691 | list_add_tail(&line->list, &l_mg->bad_list); | |
692 | continue; | |
693 | } | |
694 | ||
695 | nr_free_blks += line->blk_in_line; | |
696 | ||
697 | l_mg->nr_free_lines++; | |
698 | list_add_tail(&line->list, &l_mg->free_list); | |
699 | } | |
700 | ||
701 | pblk_set_provision(pblk, nr_free_blks); | |
702 | ||
703 | sema_init(&pblk->erase_sem, 1); | |
704 | ||
705 | /* Cleanup per-LUN bad block lists - managed within lines on run-time */ | |
706 | for (i = 0; i < geo->nr_luns; i++) | |
707 | kfree(pblk->luns[i].bb_list); | |
708 | ||
709 | return 0; | |
710 | fail_free_lines: | |
711 | kfree(pblk->lines); | |
712 | fail_free_bb_aux: | |
713 | kfree(l_mg->bb_aux); | |
714 | fail_free_bb_template: | |
715 | kfree(l_mg->bb_template); | |
716 | fail_free_meta: | |
717 | for (i = 0; i < PBLK_DATA_LINES; i++) { | |
718 | pblk_mfree(l_mg->sline_meta[i].meta, l_mg->smeta_alloc_type); | |
719 | pblk_mfree(l_mg->eline_meta[i].meta, l_mg->emeta_alloc_type); | |
720 | } | |
721 | fail: | |
722 | for (i = 0; i < geo->nr_luns; i++) | |
723 | kfree(pblk->luns[i].bb_list); | |
724 | ||
725 | return ret; | |
726 | } | |
727 | ||
728 | static int pblk_writer_init(struct pblk *pblk) | |
729 | { | |
730 | setup_timer(&pblk->wtimer, pblk_write_timer_fn, (unsigned long)pblk); | |
731 | mod_timer(&pblk->wtimer, jiffies + msecs_to_jiffies(100)); | |
732 | ||
733 | pblk->writer_ts = kthread_create(pblk_write_ts, pblk, "pblk-writer-t"); | |
734 | if (IS_ERR(pblk->writer_ts)) { | |
735 | pr_err("pblk: could not allocate writer kthread\n"); | |
736 | return 1; | |
737 | } | |
738 | ||
739 | return 0; | |
740 | } | |
741 | ||
742 | static void pblk_writer_stop(struct pblk *pblk) | |
743 | { | |
744 | if (pblk->writer_ts) | |
745 | kthread_stop(pblk->writer_ts); | |
746 | del_timer(&pblk->wtimer); | |
747 | } | |
748 | ||
749 | static void pblk_free(struct pblk *pblk) | |
750 | { | |
751 | pblk_luns_free(pblk); | |
752 | pblk_lines_free(pblk); | |
753 | pblk_line_meta_free(pblk); | |
754 | pblk_core_free(pblk); | |
755 | pblk_l2p_free(pblk); | |
756 | ||
757 | kfree(pblk); | |
758 | } | |
759 | ||
760 | static void pblk_tear_down(struct pblk *pblk) | |
761 | { | |
762 | pblk_flush_writer(pblk); | |
763 | pblk_writer_stop(pblk); | |
764 | pblk_rb_sync_l2p(&pblk->rwb); | |
765 | pblk_recov_pad(pblk); | |
766 | pblk_rwb_free(pblk); | |
767 | pblk_rl_free(&pblk->rl); | |
768 | ||
769 | pr_debug("pblk: consistent tear down\n"); | |
770 | } | |
771 | ||
772 | static void pblk_exit(void *private) | |
773 | { | |
774 | struct pblk *pblk = private; | |
775 | ||
776 | down_write(&pblk_lock); | |
777 | pblk_gc_exit(pblk); | |
778 | pblk_tear_down(pblk); | |
779 | pblk_free(pblk); | |
780 | up_write(&pblk_lock); | |
781 | } | |
782 | ||
783 | static sector_t pblk_capacity(void *private) | |
784 | { | |
785 | struct pblk *pblk = private; | |
786 | ||
787 | return pblk->capacity * NR_PHY_IN_LOG; | |
788 | } | |
789 | ||
790 | static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, | |
791 | int flags) | |
792 | { | |
793 | struct nvm_geo *geo = &dev->geo; | |
794 | struct request_queue *bqueue = dev->q; | |
795 | struct request_queue *tqueue = tdisk->queue; | |
796 | struct pblk *pblk; | |
797 | int ret; | |
798 | ||
799 | if (dev->identity.dom & NVM_RSP_L2P) { | |
800 | pr_err("pblk: device-side L2P table not supported. (%x)\n", | |
801 | dev->identity.dom); | |
802 | return ERR_PTR(-EINVAL); | |
803 | } | |
804 | ||
805 | pblk = kzalloc(sizeof(struct pblk), GFP_KERNEL); | |
806 | if (!pblk) | |
807 | return ERR_PTR(-ENOMEM); | |
808 | ||
809 | pblk->dev = dev; | |
810 | pblk->disk = tdisk; | |
811 | ||
812 | spin_lock_init(&pblk->trans_lock); | |
813 | spin_lock_init(&pblk->lock); | |
814 | ||
815 | if (flags & NVM_TARGET_FACTORY) | |
816 | pblk_setup_uuid(pblk); | |
817 | ||
818 | #ifdef CONFIG_NVM_DEBUG | |
819 | atomic_long_set(&pblk->inflight_writes, 0); | |
820 | atomic_long_set(&pblk->padded_writes, 0); | |
821 | atomic_long_set(&pblk->padded_wb, 0); | |
822 | atomic_long_set(&pblk->nr_flush, 0); | |
823 | atomic_long_set(&pblk->req_writes, 0); | |
824 | atomic_long_set(&pblk->sub_writes, 0); | |
825 | atomic_long_set(&pblk->sync_writes, 0); | |
826 | atomic_long_set(&pblk->compl_writes, 0); | |
827 | atomic_long_set(&pblk->inflight_reads, 0); | |
828 | atomic_long_set(&pblk->sync_reads, 0); | |
829 | atomic_long_set(&pblk->recov_writes, 0); | |
830 | atomic_long_set(&pblk->recov_writes, 0); | |
831 | atomic_long_set(&pblk->recov_gc_writes, 0); | |
832 | #endif | |
833 | ||
834 | atomic_long_set(&pblk->read_failed, 0); | |
835 | atomic_long_set(&pblk->read_empty, 0); | |
836 | atomic_long_set(&pblk->read_high_ecc, 0); | |
837 | atomic_long_set(&pblk->read_failed_gc, 0); | |
838 | atomic_long_set(&pblk->write_failed, 0); | |
839 | atomic_long_set(&pblk->erase_failed, 0); | |
840 | ||
841 | ret = pblk_luns_init(pblk, dev->luns); | |
842 | if (ret) { | |
843 | pr_err("pblk: could not initialize luns\n"); | |
844 | goto fail; | |
845 | } | |
846 | ||
847 | ret = pblk_lines_init(pblk); | |
848 | if (ret) { | |
849 | pr_err("pblk: could not initialize lines\n"); | |
850 | goto fail_free_luns; | |
851 | } | |
852 | ||
853 | ret = pblk_core_init(pblk); | |
854 | if (ret) { | |
855 | pr_err("pblk: could not initialize core\n"); | |
856 | goto fail_free_line_meta; | |
857 | } | |
858 | ||
859 | ret = pblk_l2p_init(pblk); | |
860 | if (ret) { | |
861 | pr_err("pblk: could not initialize maps\n"); | |
862 | goto fail_free_core; | |
863 | } | |
864 | ||
865 | ret = pblk_lines_configure(pblk, flags); | |
866 | if (ret) { | |
867 | pr_err("pblk: could not configure lines\n"); | |
868 | goto fail_free_l2p; | |
869 | } | |
870 | ||
871 | ret = pblk_writer_init(pblk); | |
872 | if (ret) { | |
873 | pr_err("pblk: could not initialize write thread\n"); | |
874 | goto fail_free_lines; | |
875 | } | |
876 | ||
877 | ret = pblk_gc_init(pblk); | |
878 | if (ret) { | |
879 | pr_err("pblk: could not initialize gc\n"); | |
880 | goto fail_stop_writer; | |
881 | } | |
882 | ||
883 | /* inherit the size from the underlying device */ | |
884 | blk_queue_logical_block_size(tqueue, queue_physical_block_size(bqueue)); | |
885 | blk_queue_max_hw_sectors(tqueue, queue_max_hw_sectors(bqueue)); | |
886 | ||
887 | blk_queue_write_cache(tqueue, true, false); | |
888 | ||
889 | tqueue->limits.discard_granularity = geo->pgs_per_blk * geo->pfpg_size; | |
890 | tqueue->limits.discard_alignment = 0; | |
891 | blk_queue_max_discard_sectors(tqueue, UINT_MAX >> 9); | |
892 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, tqueue); | |
893 | ||
894 | pr_info("pblk init: luns:%u, lines:%d, secs:%llu, buf entries:%u\n", | |
895 | geo->nr_luns, pblk->l_mg.nr_lines, | |
896 | (unsigned long long)pblk->rl.nr_secs, | |
897 | pblk->rwb.nr_entries); | |
898 | ||
899 | wake_up_process(pblk->writer_ts); | |
900 | return pblk; | |
901 | ||
902 | fail_stop_writer: | |
903 | pblk_writer_stop(pblk); | |
904 | fail_free_lines: | |
905 | pblk_lines_free(pblk); | |
906 | fail_free_l2p: | |
907 | pblk_l2p_free(pblk); | |
908 | fail_free_core: | |
909 | pblk_core_free(pblk); | |
910 | fail_free_line_meta: | |
911 | pblk_line_meta_free(pblk); | |
912 | fail_free_luns: | |
913 | pblk_luns_free(pblk); | |
914 | fail: | |
915 | kfree(pblk); | |
916 | return ERR_PTR(ret); | |
917 | } | |
918 | ||
919 | /* physical block device target */ | |
920 | static struct nvm_tgt_type tt_pblk = { | |
921 | .name = "pblk", | |
922 | .version = {1, 0, 0}, | |
923 | ||
924 | .make_rq = pblk_make_rq, | |
925 | .capacity = pblk_capacity, | |
926 | ||
927 | .init = pblk_init, | |
928 | .exit = pblk_exit, | |
929 | ||
930 | .sysfs_init = pblk_sysfs_init, | |
931 | .sysfs_exit = pblk_sysfs_exit, | |
932 | }; | |
933 | ||
934 | static int __init pblk_module_init(void) | |
935 | { | |
936 | return nvm_register_tgt_type(&tt_pblk); | |
937 | } | |
938 | ||
939 | static void pblk_module_exit(void) | |
940 | { | |
941 | nvm_unregister_tgt_type(&tt_pblk); | |
942 | } | |
943 | ||
944 | module_init(pblk_module_init); | |
945 | module_exit(pblk_module_exit); | |
946 | MODULE_AUTHOR("Javier Gonzalez <javier@cnexlabs.com>"); | |
947 | MODULE_AUTHOR("Matias Bjorling <matias@cnexlabs.com>"); | |
948 | MODULE_LICENSE("GPL v2"); | |
949 | MODULE_DESCRIPTION("Physical Block-Device for Open-Channel SSDs"); |