]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - drivers/lightnvm/pblk-core.c
lightnvm: pblk: redesign GC algorithm
[mirror_ubuntu-artful-kernel.git] / drivers / lightnvm / pblk-core.c
1 /*
2 * Copyright (C) 2016 CNEX Labs
3 * Initial release: Javier Gonzalez <javier@cnexlabs.com>
4 * Matias Bjorling <matias@cnexlabs.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License version
8 * 2 as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * pblk-core.c - pblk's core functionality
16 *
17 */
18
19 #include "pblk.h"
20
21 static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line,
22 struct ppa_addr *ppa)
23 {
24 struct nvm_tgt_dev *dev = pblk->dev;
25 struct nvm_geo *geo = &dev->geo;
26 int pos = pblk_dev_ppa_to_pos(geo, *ppa);
27
28 pr_debug("pblk: erase failed: line:%d, pos:%d\n", line->id, pos);
29 atomic_long_inc(&pblk->erase_failed);
30
31 atomic_dec(&line->blk_in_line);
32 if (test_and_set_bit(pos, line->blk_bitmap))
33 pr_err("pblk: attempted to erase bb: line:%d, pos:%d\n",
34 line->id, pos);
35
36 pblk_line_run_ws(pblk, NULL, ppa, pblk_line_mark_bb);
37 }
38
39 static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd)
40 {
41 struct pblk_line *line;
42
43 line = &pblk->lines[pblk_dev_ppa_to_line(rqd->ppa_addr)];
44 atomic_dec(&line->left_seblks);
45
46 if (rqd->error) {
47 struct ppa_addr *ppa;
48
49 ppa = kmalloc(sizeof(struct ppa_addr), GFP_ATOMIC);
50 if (!ppa)
51 return;
52
53 *ppa = rqd->ppa_addr;
54 pblk_mark_bb(pblk, line, ppa);
55 }
56 }
57
58 /* Erase completion assumes that only one block is erased at the time */
59 static void pblk_end_io_erase(struct nvm_rq *rqd)
60 {
61 struct pblk *pblk = rqd->private;
62
63 __pblk_end_io_erase(pblk, rqd);
64 mempool_free(rqd, pblk->g_rq_pool);
65 }
66
67 void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
68 u64 paddr)
69 {
70 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
71 struct list_head *move_list = NULL;
72
73 /* Lines being reclaimed (GC'ed) cannot be invalidated. Before the L2P
74 * table is modified with reclaimed sectors, a check is done to endure
75 * that newer updates are not overwritten.
76 */
77 spin_lock(&line->lock);
78 if (line->state == PBLK_LINESTATE_GC ||
79 line->state == PBLK_LINESTATE_FREE) {
80 spin_unlock(&line->lock);
81 return;
82 }
83
84 if (test_and_set_bit(paddr, line->invalid_bitmap)) {
85 WARN_ONCE(1, "pblk: double invalidate\n");
86 spin_unlock(&line->lock);
87 return;
88 }
89 le32_add_cpu(line->vsc, -1);
90
91 if (line->state == PBLK_LINESTATE_CLOSED)
92 move_list = pblk_line_gc_list(pblk, line);
93 spin_unlock(&line->lock);
94
95 if (move_list) {
96 spin_lock(&l_mg->gc_lock);
97 spin_lock(&line->lock);
98 /* Prevent moving a line that has just been chosen for GC */
99 if (line->state == PBLK_LINESTATE_GC ||
100 line->state == PBLK_LINESTATE_FREE) {
101 spin_unlock(&line->lock);
102 spin_unlock(&l_mg->gc_lock);
103 return;
104 }
105 spin_unlock(&line->lock);
106
107 list_move_tail(&line->list, move_list);
108 spin_unlock(&l_mg->gc_lock);
109 }
110 }
111
112 void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa)
113 {
114 struct pblk_line *line;
115 u64 paddr;
116 int line_id;
117
118 #ifdef CONFIG_NVM_DEBUG
119 /* Callers must ensure that the ppa points to a device address */
120 BUG_ON(pblk_addr_in_cache(ppa));
121 BUG_ON(pblk_ppa_empty(ppa));
122 #endif
123
124 line_id = pblk_tgt_ppa_to_line(ppa);
125 line = &pblk->lines[line_id];
126 paddr = pblk_dev_ppa_to_line_addr(pblk, ppa);
127
128 __pblk_map_invalidate(pblk, line, paddr);
129 }
130
131 static void pblk_invalidate_range(struct pblk *pblk, sector_t slba,
132 unsigned int nr_secs)
133 {
134 sector_t lba;
135
136 spin_lock(&pblk->trans_lock);
137 for (lba = slba; lba < slba + nr_secs; lba++) {
138 struct ppa_addr ppa;
139
140 ppa = pblk_trans_map_get(pblk, lba);
141
142 if (!pblk_addr_in_cache(ppa) && !pblk_ppa_empty(ppa))
143 pblk_map_invalidate(pblk, ppa);
144
145 pblk_ppa_set_empty(&ppa);
146 pblk_trans_map_set(pblk, lba, ppa);
147 }
148 spin_unlock(&pblk->trans_lock);
149 }
150
151 struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw)
152 {
153 mempool_t *pool;
154 struct nvm_rq *rqd;
155 int rq_size;
156
157 if (rw == WRITE) {
158 pool = pblk->w_rq_pool;
159 rq_size = pblk_w_rq_size;
160 } else {
161 pool = pblk->g_rq_pool;
162 rq_size = pblk_g_rq_size;
163 }
164
165 rqd = mempool_alloc(pool, GFP_KERNEL);
166 memset(rqd, 0, rq_size);
167
168 return rqd;
169 }
170
171 void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw)
172 {
173 mempool_t *pool;
174
175 if (rw == WRITE)
176 pool = pblk->w_rq_pool;
177 else
178 pool = pblk->g_rq_pool;
179
180 mempool_free(rqd, pool);
181 }
182
183 void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off,
184 int nr_pages)
185 {
186 struct bio_vec bv;
187 int i;
188
189 WARN_ON(off + nr_pages != bio->bi_vcnt);
190
191 bio_advance(bio, off * PBLK_EXPOSED_PAGE_SIZE);
192 for (i = off; i < nr_pages + off; i++) {
193 bv = bio->bi_io_vec[i];
194 mempool_free(bv.bv_page, pblk->page_pool);
195 }
196 }
197
198 int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags,
199 int nr_pages)
200 {
201 struct request_queue *q = pblk->dev->q;
202 struct page *page;
203 int i, ret;
204
205 for (i = 0; i < nr_pages; i++) {
206 page = mempool_alloc(pblk->page_pool, flags);
207 if (!page)
208 goto err;
209
210 ret = bio_add_pc_page(q, bio, page, PBLK_EXPOSED_PAGE_SIZE, 0);
211 if (ret != PBLK_EXPOSED_PAGE_SIZE) {
212 pr_err("pblk: could not add page to bio\n");
213 mempool_free(page, pblk->page_pool);
214 goto err;
215 }
216 }
217
218 return 0;
219 err:
220 pblk_bio_free_pages(pblk, bio, 0, i - 1);
221 return -1;
222 }
223
224 static void pblk_write_kick(struct pblk *pblk)
225 {
226 wake_up_process(pblk->writer_ts);
227 mod_timer(&pblk->wtimer, jiffies + msecs_to_jiffies(1000));
228 }
229
230 void pblk_write_timer_fn(unsigned long data)
231 {
232 struct pblk *pblk = (struct pblk *)data;
233
234 /* kick the write thread every tick to flush outstanding data */
235 pblk_write_kick(pblk);
236 }
237
238 void pblk_write_should_kick(struct pblk *pblk)
239 {
240 unsigned int secs_avail = pblk_rb_read_count(&pblk->rwb);
241
242 if (secs_avail >= pblk->min_write_pgs)
243 pblk_write_kick(pblk);
244 }
245
246 void pblk_end_bio_sync(struct bio *bio)
247 {
248 struct completion *waiting = bio->bi_private;
249
250 complete(waiting);
251 }
252
253 void pblk_end_io_sync(struct nvm_rq *rqd)
254 {
255 struct completion *waiting = rqd->private;
256
257 complete(waiting);
258 }
259
260 void pblk_flush_writer(struct pblk *pblk)
261 {
262 struct bio *bio;
263 int ret;
264 DECLARE_COMPLETION_ONSTACK(wait);
265
266 bio = bio_alloc(GFP_KERNEL, 1);
267 if (!bio)
268 return;
269
270 bio->bi_iter.bi_sector = 0; /* internal bio */
271 bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_OP_FLUSH);
272 bio->bi_private = &wait;
273 bio->bi_end_io = pblk_end_bio_sync;
274
275 ret = pblk_write_to_cache(pblk, bio, 0);
276 if (ret == NVM_IO_OK) {
277 if (!wait_for_completion_io_timeout(&wait,
278 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
279 pr_err("pblk: flush cache timed out\n");
280 }
281 } else if (ret != NVM_IO_DONE) {
282 pr_err("pblk: tear down bio failed\n");
283 }
284
285 if (bio->bi_status)
286 pr_err("pblk: flush sync write failed (%u)\n", bio->bi_status);
287
288 bio_put(bio);
289 }
290
291 struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
292 {
293 struct pblk_line_meta *lm = &pblk->lm;
294 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
295 struct list_head *move_list = NULL;
296 int vsc = le32_to_cpu(*line->vsc);
297
298 lockdep_assert_held(&line->lock);
299
300 if (!vsc) {
301 if (line->gc_group != PBLK_LINEGC_FULL) {
302 line->gc_group = PBLK_LINEGC_FULL;
303 move_list = &l_mg->gc_full_list;
304 }
305 } else if (vsc < lm->high_thrs) {
306 if (line->gc_group != PBLK_LINEGC_HIGH) {
307 line->gc_group = PBLK_LINEGC_HIGH;
308 move_list = &l_mg->gc_high_list;
309 }
310 } else if (vsc < lm->mid_thrs) {
311 if (line->gc_group != PBLK_LINEGC_MID) {
312 line->gc_group = PBLK_LINEGC_MID;
313 move_list = &l_mg->gc_mid_list;
314 }
315 } else if (vsc < line->sec_in_line) {
316 if (line->gc_group != PBLK_LINEGC_LOW) {
317 line->gc_group = PBLK_LINEGC_LOW;
318 move_list = &l_mg->gc_low_list;
319 }
320 } else if (vsc == line->sec_in_line) {
321 if (line->gc_group != PBLK_LINEGC_EMPTY) {
322 line->gc_group = PBLK_LINEGC_EMPTY;
323 move_list = &l_mg->gc_empty_list;
324 }
325 } else {
326 line->state = PBLK_LINESTATE_CORRUPT;
327 line->gc_group = PBLK_LINEGC_NONE;
328 move_list = &l_mg->corrupt_list;
329 pr_err("pblk: corrupted vsc for line %d, vsc:%d (%d/%d/%d)\n",
330 line->id, vsc,
331 line->sec_in_line,
332 lm->high_thrs, lm->mid_thrs);
333 }
334
335 return move_list;
336 }
337
338 void pblk_discard(struct pblk *pblk, struct bio *bio)
339 {
340 sector_t slba = pblk_get_lba(bio);
341 sector_t nr_secs = pblk_get_secs(bio);
342
343 pblk_invalidate_range(pblk, slba, nr_secs);
344 }
345
346 struct ppa_addr pblk_get_lba_map(struct pblk *pblk, sector_t lba)
347 {
348 struct ppa_addr ppa;
349
350 spin_lock(&pblk->trans_lock);
351 ppa = pblk_trans_map_get(pblk, lba);
352 spin_unlock(&pblk->trans_lock);
353
354 return ppa;
355 }
356
357 void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd)
358 {
359 atomic_long_inc(&pblk->write_failed);
360 #ifdef CONFIG_NVM_DEBUG
361 pblk_print_failed_rqd(pblk, rqd, rqd->error);
362 #endif
363 }
364
365 void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd)
366 {
367 /* Empty page read is not necessarily an error (e.g., L2P recovery) */
368 if (rqd->error == NVM_RSP_ERR_EMPTYPAGE) {
369 atomic_long_inc(&pblk->read_empty);
370 return;
371 }
372
373 switch (rqd->error) {
374 case NVM_RSP_WARN_HIGHECC:
375 atomic_long_inc(&pblk->read_high_ecc);
376 break;
377 case NVM_RSP_ERR_FAILECC:
378 case NVM_RSP_ERR_FAILCRC:
379 atomic_long_inc(&pblk->read_failed);
380 break;
381 default:
382 pr_err("pblk: unknown read error:%d\n", rqd->error);
383 }
384 #ifdef CONFIG_NVM_DEBUG
385 pblk_print_failed_rqd(pblk, rqd, rqd->error);
386 #endif
387 }
388
389 void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write)
390 {
391 pblk->sec_per_write = sec_per_write;
392 }
393
394 int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd)
395 {
396 struct nvm_tgt_dev *dev = pblk->dev;
397
398 #ifdef CONFIG_NVM_DEBUG
399 struct ppa_addr *ppa_list;
400
401 ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr;
402 if (pblk_boundary_ppa_checks(dev, ppa_list, rqd->nr_ppas)) {
403 WARN_ON(1);
404 return -EINVAL;
405 }
406
407 if (rqd->opcode == NVM_OP_PWRITE) {
408 struct pblk_line *line;
409 struct ppa_addr ppa;
410 int i;
411
412 for (i = 0; i < rqd->nr_ppas; i++) {
413 ppa = ppa_list[i];
414 line = &pblk->lines[pblk_dev_ppa_to_line(ppa)];
415
416 spin_lock(&line->lock);
417 if (line->state != PBLK_LINESTATE_OPEN) {
418 pr_err("pblk: bad ppa: line:%d,state:%d\n",
419 line->id, line->state);
420 WARN_ON(1);
421 spin_unlock(&line->lock);
422 return -EINVAL;
423 }
424 spin_unlock(&line->lock);
425 }
426 }
427 #endif
428 return nvm_submit_io(dev, rqd);
429 }
430
431 struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
432 unsigned int nr_secs, unsigned int len,
433 gfp_t gfp_mask)
434 {
435 struct nvm_tgt_dev *dev = pblk->dev;
436 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
437 void *kaddr = data;
438 struct page *page;
439 struct bio *bio;
440 int i, ret;
441
442 if (l_mg->emeta_alloc_type == PBLK_KMALLOC_META)
443 return bio_map_kern(dev->q, kaddr, len, gfp_mask);
444
445 bio = bio_kmalloc(gfp_mask, nr_secs);
446 if (!bio)
447 return ERR_PTR(-ENOMEM);
448
449 for (i = 0; i < nr_secs; i++) {
450 page = vmalloc_to_page(kaddr);
451 if (!page) {
452 pr_err("pblk: could not map vmalloc bio\n");
453 bio_put(bio);
454 bio = ERR_PTR(-ENOMEM);
455 goto out;
456 }
457
458 ret = bio_add_pc_page(dev->q, bio, page, PAGE_SIZE, 0);
459 if (ret != PAGE_SIZE) {
460 pr_err("pblk: could not add page to bio\n");
461 bio_put(bio);
462 bio = ERR_PTR(-ENOMEM);
463 goto out;
464 }
465
466 kaddr += PAGE_SIZE;
467 }
468 out:
469 return bio;
470 }
471
472 int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
473 unsigned long secs_to_flush)
474 {
475 int max = pblk->sec_per_write;
476 int min = pblk->min_write_pgs;
477 int secs_to_sync = 0;
478
479 if (secs_avail >= max)
480 secs_to_sync = max;
481 else if (secs_avail >= min)
482 secs_to_sync = min * (secs_avail / min);
483 else if (secs_to_flush)
484 secs_to_sync = min;
485
486 return secs_to_sync;
487 }
488
489 void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
490 {
491 u64 addr;
492 int i;
493
494 addr = find_next_zero_bit(line->map_bitmap,
495 pblk->lm.sec_per_line, line->cur_sec);
496 line->cur_sec = addr - nr_secs;
497
498 for (i = 0; i < nr_secs; i++, line->cur_sec--)
499 WARN_ON(!test_and_clear_bit(line->cur_sec, line->map_bitmap));
500 }
501
502 u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
503 {
504 u64 addr;
505 int i;
506
507 lockdep_assert_held(&line->lock);
508
509 /* logic error: ppa out-of-bounds. Prevent generating bad address */
510 if (line->cur_sec + nr_secs > pblk->lm.sec_per_line) {
511 WARN(1, "pblk: page allocation out of bounds\n");
512 nr_secs = pblk->lm.sec_per_line - line->cur_sec;
513 }
514
515 line->cur_sec = addr = find_next_zero_bit(line->map_bitmap,
516 pblk->lm.sec_per_line, line->cur_sec);
517 for (i = 0; i < nr_secs; i++, line->cur_sec++)
518 WARN_ON(test_and_set_bit(line->cur_sec, line->map_bitmap));
519
520 return addr;
521 }
522
523 u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
524 {
525 u64 addr;
526
527 /* Lock needed in case a write fails and a recovery needs to remap
528 * failed write buffer entries
529 */
530 spin_lock(&line->lock);
531 addr = __pblk_alloc_page(pblk, line, nr_secs);
532 line->left_msecs -= nr_secs;
533 WARN(line->left_msecs < 0, "pblk: page allocation out of bounds\n");
534 spin_unlock(&line->lock);
535
536 return addr;
537 }
538
539 u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line)
540 {
541 u64 paddr;
542
543 spin_lock(&line->lock);
544 paddr = find_next_zero_bit(line->map_bitmap,
545 pblk->lm.sec_per_line, line->cur_sec);
546 spin_unlock(&line->lock);
547
548 return paddr;
549 }
550
551 /*
552 * Submit emeta to one LUN in the raid line at the time to avoid a deadlock when
553 * taking the per LUN semaphore.
554 */
555 static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
556 void *emeta_buf, u64 paddr, int dir)
557 {
558 struct nvm_tgt_dev *dev = pblk->dev;
559 struct nvm_geo *geo = &dev->geo;
560 struct pblk_line_meta *lm = &pblk->lm;
561 void *ppa_list, *meta_list;
562 struct bio *bio;
563 struct nvm_rq rqd;
564 dma_addr_t dma_ppa_list, dma_meta_list;
565 int min = pblk->min_write_pgs;
566 int left_ppas = lm->emeta_sec[0];
567 int id = line->id;
568 int rq_ppas, rq_len;
569 int cmd_op, bio_op;
570 int i, j;
571 int ret;
572 DECLARE_COMPLETION_ONSTACK(wait);
573
574 if (dir == WRITE) {
575 bio_op = REQ_OP_WRITE;
576 cmd_op = NVM_OP_PWRITE;
577 } else if (dir == READ) {
578 bio_op = REQ_OP_READ;
579 cmd_op = NVM_OP_PREAD;
580 } else
581 return -EINVAL;
582
583 meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
584 &dma_meta_list);
585 if (!meta_list)
586 return -ENOMEM;
587
588 ppa_list = meta_list + pblk_dma_meta_size;
589 dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
590
591 next_rq:
592 memset(&rqd, 0, sizeof(struct nvm_rq));
593
594 rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
595 rq_len = rq_ppas * geo->sec_size;
596
597 bio = pblk_bio_map_addr(pblk, emeta_buf, rq_ppas, rq_len, GFP_KERNEL);
598 if (IS_ERR(bio)) {
599 ret = PTR_ERR(bio);
600 goto free_rqd_dma;
601 }
602
603 bio->bi_iter.bi_sector = 0; /* internal bio */
604 bio_set_op_attrs(bio, bio_op, 0);
605
606 rqd.bio = bio;
607 rqd.meta_list = meta_list;
608 rqd.ppa_list = ppa_list;
609 rqd.dma_meta_list = dma_meta_list;
610 rqd.dma_ppa_list = dma_ppa_list;
611 rqd.opcode = cmd_op;
612 rqd.nr_ppas = rq_ppas;
613 rqd.end_io = pblk_end_io_sync;
614 rqd.private = &wait;
615
616 if (dir == WRITE) {
617 struct pblk_sec_meta *meta_list = rqd.meta_list;
618
619 rqd.flags = pblk_set_progr_mode(pblk, WRITE);
620 for (i = 0; i < rqd.nr_ppas; ) {
621 spin_lock(&line->lock);
622 paddr = __pblk_alloc_page(pblk, line, min);
623 spin_unlock(&line->lock);
624 for (j = 0; j < min; j++, i++, paddr++) {
625 meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
626 rqd.ppa_list[i] =
627 addr_to_gen_ppa(pblk, paddr, id);
628 }
629 }
630 } else {
631 for (i = 0; i < rqd.nr_ppas; ) {
632 struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, id);
633 int pos = pblk_dev_ppa_to_pos(geo, ppa);
634 int read_type = PBLK_READ_RANDOM;
635
636 if (pblk_io_aligned(pblk, rq_ppas))
637 read_type = PBLK_READ_SEQUENTIAL;
638 rqd.flags = pblk_set_read_mode(pblk, read_type);
639
640 while (test_bit(pos, line->blk_bitmap)) {
641 paddr += min;
642 if (pblk_boundary_paddr_checks(pblk, paddr)) {
643 pr_err("pblk: corrupt emeta line:%d\n",
644 line->id);
645 bio_put(bio);
646 ret = -EINTR;
647 goto free_rqd_dma;
648 }
649
650 ppa = addr_to_gen_ppa(pblk, paddr, id);
651 pos = pblk_dev_ppa_to_pos(geo, ppa);
652 }
653
654 if (pblk_boundary_paddr_checks(pblk, paddr + min)) {
655 pr_err("pblk: corrupt emeta line:%d\n",
656 line->id);
657 bio_put(bio);
658 ret = -EINTR;
659 goto free_rqd_dma;
660 }
661
662 for (j = 0; j < min; j++, i++, paddr++)
663 rqd.ppa_list[i] =
664 addr_to_gen_ppa(pblk, paddr, line->id);
665 }
666 }
667
668 ret = pblk_submit_io(pblk, &rqd);
669 if (ret) {
670 pr_err("pblk: emeta I/O submission failed: %d\n", ret);
671 bio_put(bio);
672 goto free_rqd_dma;
673 }
674
675 if (!wait_for_completion_io_timeout(&wait,
676 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
677 pr_err("pblk: emeta I/O timed out\n");
678 }
679 reinit_completion(&wait);
680
681 if (likely(pblk->l_mg.emeta_alloc_type == PBLK_VMALLOC_META))
682 bio_put(bio);
683
684 if (rqd.error) {
685 if (dir == WRITE)
686 pblk_log_write_err(pblk, &rqd);
687 else
688 pblk_log_read_err(pblk, &rqd);
689 }
690
691 emeta_buf += rq_len;
692 left_ppas -= rq_ppas;
693 if (left_ppas)
694 goto next_rq;
695 free_rqd_dma:
696 nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
697 return ret;
698 }
699
700 u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line)
701 {
702 struct nvm_tgt_dev *dev = pblk->dev;
703 struct nvm_geo *geo = &dev->geo;
704 struct pblk_line_meta *lm = &pblk->lm;
705 int bit;
706
707 /* This usually only happens on bad lines */
708 bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
709 if (bit >= lm->blk_per_line)
710 return -1;
711
712 return bit * geo->sec_per_pl;
713 }
714
715 static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
716 u64 paddr, int dir)
717 {
718 struct nvm_tgt_dev *dev = pblk->dev;
719 struct pblk_line_meta *lm = &pblk->lm;
720 struct bio *bio;
721 struct nvm_rq rqd;
722 __le64 *lba_list = NULL;
723 int i, ret;
724 int cmd_op, bio_op;
725 int flags;
726 DECLARE_COMPLETION_ONSTACK(wait);
727
728 if (dir == WRITE) {
729 bio_op = REQ_OP_WRITE;
730 cmd_op = NVM_OP_PWRITE;
731 flags = pblk_set_progr_mode(pblk, WRITE);
732 lba_list = emeta_to_lbas(pblk, line->emeta->buf);
733 } else if (dir == READ) {
734 bio_op = REQ_OP_READ;
735 cmd_op = NVM_OP_PREAD;
736 flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
737 } else
738 return -EINVAL;
739
740 memset(&rqd, 0, sizeof(struct nvm_rq));
741
742 rqd.meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
743 &rqd.dma_meta_list);
744 if (!rqd.meta_list)
745 return -ENOMEM;
746
747 rqd.ppa_list = rqd.meta_list + pblk_dma_meta_size;
748 rqd.dma_ppa_list = rqd.dma_meta_list + pblk_dma_meta_size;
749
750 bio = bio_map_kern(dev->q, line->smeta, lm->smeta_len, GFP_KERNEL);
751 if (IS_ERR(bio)) {
752 ret = PTR_ERR(bio);
753 goto free_ppa_list;
754 }
755
756 bio->bi_iter.bi_sector = 0; /* internal bio */
757 bio_set_op_attrs(bio, bio_op, 0);
758
759 rqd.bio = bio;
760 rqd.opcode = cmd_op;
761 rqd.flags = flags;
762 rqd.nr_ppas = lm->smeta_sec;
763 rqd.end_io = pblk_end_io_sync;
764 rqd.private = &wait;
765
766 for (i = 0; i < lm->smeta_sec; i++, paddr++) {
767 struct pblk_sec_meta *meta_list = rqd.meta_list;
768
769 rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
770
771 if (dir == WRITE) {
772 u64 addr_empty = cpu_to_le64(ADDR_EMPTY);
773
774 meta_list[i].lba = lba_list[paddr] = addr_empty;
775 }
776 }
777
778 /*
779 * This I/O is sent by the write thread when a line is replace. Since
780 * the write thread is the only one sending write and erase commands,
781 * there is no need to take the LUN semaphore.
782 */
783 ret = pblk_submit_io(pblk, &rqd);
784 if (ret) {
785 pr_err("pblk: smeta I/O submission failed: %d\n", ret);
786 bio_put(bio);
787 goto free_ppa_list;
788 }
789
790 if (!wait_for_completion_io_timeout(&wait,
791 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
792 pr_err("pblk: smeta I/O timed out\n");
793 }
794
795 if (rqd.error) {
796 if (dir == WRITE)
797 pblk_log_write_err(pblk, &rqd);
798 else
799 pblk_log_read_err(pblk, &rqd);
800 }
801
802 free_ppa_list:
803 nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
804
805 return ret;
806 }
807
808 int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line)
809 {
810 u64 bpaddr = pblk_line_smeta_start(pblk, line);
811
812 return pblk_line_submit_smeta_io(pblk, line, bpaddr, READ);
813 }
814
815 int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line,
816 void *emeta_buf)
817 {
818 return pblk_line_submit_emeta_io(pblk, line, emeta_buf,
819 line->emeta_ssec, READ);
820 }
821
822 static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd,
823 struct ppa_addr ppa)
824 {
825 rqd->opcode = NVM_OP_ERASE;
826 rqd->ppa_addr = ppa;
827 rqd->nr_ppas = 1;
828 rqd->flags = pblk_set_progr_mode(pblk, ERASE);
829 rqd->bio = NULL;
830 }
831
832 static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa)
833 {
834 struct nvm_rq rqd;
835 int ret;
836 DECLARE_COMPLETION_ONSTACK(wait);
837
838 memset(&rqd, 0, sizeof(struct nvm_rq));
839
840 pblk_setup_e_rq(pblk, &rqd, ppa);
841
842 rqd.end_io = pblk_end_io_sync;
843 rqd.private = &wait;
844
845 /* The write thread schedules erases so that it minimizes disturbances
846 * with writes. Thus, there is no need to take the LUN semaphore.
847 */
848 ret = pblk_submit_io(pblk, &rqd);
849 if (ret) {
850 struct nvm_tgt_dev *dev = pblk->dev;
851 struct nvm_geo *geo = &dev->geo;
852
853 pr_err("pblk: could not sync erase line:%d,blk:%d\n",
854 pblk_dev_ppa_to_line(ppa),
855 pblk_dev_ppa_to_pos(geo, ppa));
856
857 rqd.error = ret;
858 goto out;
859 }
860
861 if (!wait_for_completion_io_timeout(&wait,
862 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
863 pr_err("pblk: sync erase timed out\n");
864 }
865
866 out:
867 rqd.private = pblk;
868 __pblk_end_io_erase(pblk, &rqd);
869
870 return 0;
871 }
872
873 int pblk_line_erase(struct pblk *pblk, struct pblk_line *line)
874 {
875 struct pblk_line_meta *lm = &pblk->lm;
876 struct ppa_addr ppa;
877 int bit = -1;
878
879 /* Erase only good blocks, one at a time */
880 do {
881 spin_lock(&line->lock);
882 bit = find_next_zero_bit(line->erase_bitmap, lm->blk_per_line,
883 bit + 1);
884 if (bit >= lm->blk_per_line) {
885 spin_unlock(&line->lock);
886 break;
887 }
888
889 ppa = pblk->luns[bit].bppa; /* set ch and lun */
890 ppa.g.blk = line->id;
891
892 atomic_dec(&line->left_eblks);
893 WARN_ON(test_and_set_bit(bit, line->erase_bitmap));
894 spin_unlock(&line->lock);
895
896 if (pblk_blk_erase_sync(pblk, ppa)) {
897 pr_err("pblk: failed to erase line %d\n", line->id);
898 return -ENOMEM;
899 }
900 } while (1);
901
902 return 0;
903 }
904
905 static void pblk_line_setup_metadata(struct pblk_line *line,
906 struct pblk_line_mgmt *l_mg,
907 struct pblk_line_meta *lm)
908 {
909 int meta_line;
910
911 retry_meta:
912 meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
913 if (meta_line == PBLK_DATA_LINES) {
914 spin_unlock(&l_mg->free_lock);
915 io_schedule();
916 spin_lock(&l_mg->free_lock);
917 goto retry_meta;
918 }
919
920 set_bit(meta_line, &l_mg->meta_bitmap);
921 line->meta_line = meta_line;
922
923 line->smeta = l_mg->sline_meta[meta_line];
924 line->emeta = l_mg->eline_meta[meta_line];
925
926 memset(line->smeta, 0, lm->smeta_len);
927 memset(line->emeta->buf, 0, lm->emeta_len[0]);
928
929 line->emeta->mem = 0;
930 atomic_set(&line->emeta->sync, 0);
931 }
932
933 /* For now lines are always assumed full lines. Thus, smeta former and current
934 * lun bitmaps are omitted.
935 */
936 static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line,
937 struct pblk_line *cur)
938 {
939 struct nvm_tgt_dev *dev = pblk->dev;
940 struct nvm_geo *geo = &dev->geo;
941 struct pblk_line_meta *lm = &pblk->lm;
942 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
943 struct pblk_emeta *emeta = line->emeta;
944 struct line_emeta *emeta_buf = emeta->buf;
945 struct line_smeta *smeta_buf = (struct line_smeta *)line->smeta;
946 int nr_blk_line;
947
948 /* After erasing the line, new bad blocks might appear and we risk
949 * having an invalid line
950 */
951 nr_blk_line = lm->blk_per_line -
952 bitmap_weight(line->blk_bitmap, lm->blk_per_line);
953 if (nr_blk_line < lm->min_blk_line) {
954 spin_lock(&l_mg->free_lock);
955 spin_lock(&line->lock);
956 line->state = PBLK_LINESTATE_BAD;
957 spin_unlock(&line->lock);
958
959 list_add_tail(&line->list, &l_mg->bad_list);
960 spin_unlock(&l_mg->free_lock);
961
962 pr_debug("pblk: line %d is bad\n", line->id);
963
964 return 0;
965 }
966
967 /* Run-time metadata */
968 line->lun_bitmap = ((void *)(smeta_buf)) + sizeof(struct line_smeta);
969
970 /* Mark LUNs allocated in this line (all for now) */
971 bitmap_set(line->lun_bitmap, 0, lm->lun_bitmap_len);
972
973 smeta_buf->header.identifier = cpu_to_le32(PBLK_MAGIC);
974 memcpy(smeta_buf->header.uuid, pblk->instance_uuid, 16);
975 smeta_buf->header.id = cpu_to_le32(line->id);
976 smeta_buf->header.type = cpu_to_le16(line->type);
977 smeta_buf->header.version = cpu_to_le16(1);
978
979 /* Start metadata */
980 smeta_buf->seq_nr = cpu_to_le64(line->seq_nr);
981 smeta_buf->window_wr_lun = cpu_to_le32(geo->nr_luns);
982
983 /* Fill metadata among lines */
984 if (cur) {
985 memcpy(line->lun_bitmap, cur->lun_bitmap, lm->lun_bitmap_len);
986 smeta_buf->prev_id = cpu_to_le32(cur->id);
987 cur->emeta->buf->next_id = cpu_to_le32(line->id);
988 } else {
989 smeta_buf->prev_id = cpu_to_le32(PBLK_LINE_EMPTY);
990 }
991
992 /* All smeta must be set at this point */
993 smeta_buf->header.crc = cpu_to_le32(
994 pblk_calc_meta_header_crc(pblk, &smeta_buf->header));
995 smeta_buf->crc = cpu_to_le32(pblk_calc_smeta_crc(pblk, smeta_buf));
996
997 /* End metadata */
998 memcpy(&emeta_buf->header, &smeta_buf->header,
999 sizeof(struct line_header));
1000 emeta_buf->seq_nr = cpu_to_le64(line->seq_nr);
1001 emeta_buf->nr_lbas = cpu_to_le64(line->sec_in_line);
1002 emeta_buf->nr_valid_lbas = cpu_to_le64(0);
1003 emeta_buf->next_id = cpu_to_le32(PBLK_LINE_EMPTY);
1004 emeta_buf->crc = cpu_to_le32(0);
1005 emeta_buf->prev_id = smeta_buf->prev_id;
1006
1007 return 1;
1008 }
1009
1010 /* For now lines are always assumed full lines. Thus, smeta former and current
1011 * lun bitmaps are omitted.
1012 */
1013 static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
1014 int init)
1015 {
1016 struct nvm_tgt_dev *dev = pblk->dev;
1017 struct nvm_geo *geo = &dev->geo;
1018 struct pblk_line_meta *lm = &pblk->lm;
1019 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1020 int nr_bb = 0;
1021 u64 off;
1022 int bit = -1;
1023
1024 line->sec_in_line = lm->sec_per_line;
1025
1026 /* Capture bad block information on line mapping bitmaps */
1027 while ((bit = find_next_bit(line->blk_bitmap, lm->blk_per_line,
1028 bit + 1)) < lm->blk_per_line) {
1029 off = bit * geo->sec_per_pl;
1030 bitmap_shift_left(l_mg->bb_aux, l_mg->bb_template, off,
1031 lm->sec_per_line);
1032 bitmap_or(line->map_bitmap, line->map_bitmap, l_mg->bb_aux,
1033 lm->sec_per_line);
1034 line->sec_in_line -= geo->sec_per_blk;
1035 if (bit >= lm->emeta_bb)
1036 nr_bb++;
1037 }
1038
1039 /* Mark smeta metadata sectors as bad sectors */
1040 bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
1041 off = bit * geo->sec_per_pl;
1042 retry_smeta:
1043 bitmap_set(line->map_bitmap, off, lm->smeta_sec);
1044 line->sec_in_line -= lm->smeta_sec;
1045 line->smeta_ssec = off;
1046 line->cur_sec = off + lm->smeta_sec;
1047
1048 if (init && pblk_line_submit_smeta_io(pblk, line, off, WRITE)) {
1049 pr_debug("pblk: line smeta I/O failed. Retry\n");
1050 off += geo->sec_per_pl;
1051 goto retry_smeta;
1052 }
1053
1054 bitmap_copy(line->invalid_bitmap, line->map_bitmap, lm->sec_per_line);
1055
1056 /* Mark emeta metadata sectors as bad sectors. We need to consider bad
1057 * blocks to make sure that there are enough sectors to store emeta
1058 */
1059 bit = lm->sec_per_line;
1060 off = lm->sec_per_line - lm->emeta_sec[0];
1061 bitmap_set(line->invalid_bitmap, off, lm->emeta_sec[0]);
1062 while (nr_bb) {
1063 off -= geo->sec_per_pl;
1064 if (!test_bit(off, line->invalid_bitmap)) {
1065 bitmap_set(line->invalid_bitmap, off, geo->sec_per_pl);
1066 nr_bb--;
1067 }
1068 }
1069
1070 line->sec_in_line -= lm->emeta_sec[0];
1071 line->emeta_ssec = off;
1072 line->nr_valid_lbas = 0;
1073 line->left_msecs = line->sec_in_line;
1074 *line->vsc = cpu_to_le32(line->sec_in_line);
1075
1076 if (lm->sec_per_line - line->sec_in_line !=
1077 bitmap_weight(line->invalid_bitmap, lm->sec_per_line)) {
1078 spin_lock(&line->lock);
1079 line->state = PBLK_LINESTATE_BAD;
1080 spin_unlock(&line->lock);
1081
1082 list_add_tail(&line->list, &l_mg->bad_list);
1083 pr_err("pblk: unexpected line %d is bad\n", line->id);
1084
1085 return 0;
1086 }
1087
1088 return 1;
1089 }
1090
1091 static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)
1092 {
1093 struct pblk_line_meta *lm = &pblk->lm;
1094 int blk_in_line = atomic_read(&line->blk_in_line);
1095
1096 line->map_bitmap = mempool_alloc(pblk->line_meta_pool, GFP_ATOMIC);
1097 if (!line->map_bitmap)
1098 return -ENOMEM;
1099 memset(line->map_bitmap, 0, lm->sec_bitmap_len);
1100
1101 /* invalid_bitmap is special since it is used when line is closed. No
1102 * need to zeroized; it will be initialized using bb info form
1103 * map_bitmap
1104 */
1105 line->invalid_bitmap = mempool_alloc(pblk->line_meta_pool, GFP_ATOMIC);
1106 if (!line->invalid_bitmap) {
1107 mempool_free(line->map_bitmap, pblk->line_meta_pool);
1108 return -ENOMEM;
1109 }
1110
1111 spin_lock(&line->lock);
1112 if (line->state != PBLK_LINESTATE_FREE) {
1113 spin_unlock(&line->lock);
1114 WARN(1, "pblk: corrupted line state\n");
1115 return -EINTR;
1116 }
1117 line->state = PBLK_LINESTATE_OPEN;
1118
1119 atomic_set(&line->left_eblks, blk_in_line);
1120 atomic_set(&line->left_seblks, blk_in_line);
1121
1122 line->meta_distance = lm->meta_distance;
1123 spin_unlock(&line->lock);
1124
1125 /* Bad blocks do not need to be erased */
1126 bitmap_copy(line->erase_bitmap, line->blk_bitmap, lm->blk_per_line);
1127
1128 kref_init(&line->ref);
1129
1130 return 0;
1131 }
1132
1133 int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line)
1134 {
1135 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1136 int ret;
1137
1138 spin_lock(&l_mg->free_lock);
1139 l_mg->data_line = line;
1140 list_del(&line->list);
1141
1142 ret = pblk_line_prepare(pblk, line);
1143 if (ret) {
1144 list_add(&line->list, &l_mg->free_list);
1145 spin_unlock(&l_mg->free_lock);
1146 return ret;
1147 }
1148 spin_unlock(&l_mg->free_lock);
1149
1150 pblk_rl_free_lines_dec(&pblk->rl, line);
1151
1152 if (!pblk_line_init_bb(pblk, line, 0)) {
1153 list_add(&line->list, &l_mg->free_list);
1154 return -EINTR;
1155 }
1156
1157 return 0;
1158 }
1159
1160 void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line)
1161 {
1162 mempool_free(line->map_bitmap, pblk->line_meta_pool);
1163 line->map_bitmap = NULL;
1164 line->smeta = NULL;
1165 line->emeta = NULL;
1166 }
1167
1168 struct pblk_line *pblk_line_get(struct pblk *pblk)
1169 {
1170 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1171 struct pblk_line_meta *lm = &pblk->lm;
1172 struct pblk_line *line = NULL;
1173 int bit;
1174
1175 lockdep_assert_held(&l_mg->free_lock);
1176
1177 retry_get:
1178 if (list_empty(&l_mg->free_list)) {
1179 pr_err("pblk: no free lines\n");
1180 goto out;
1181 }
1182
1183 line = list_first_entry(&l_mg->free_list, struct pblk_line, list);
1184 list_del(&line->list);
1185 l_mg->nr_free_lines--;
1186
1187 bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
1188 if (unlikely(bit >= lm->blk_per_line)) {
1189 spin_lock(&line->lock);
1190 line->state = PBLK_LINESTATE_BAD;
1191 spin_unlock(&line->lock);
1192
1193 list_add_tail(&line->list, &l_mg->bad_list);
1194
1195 pr_debug("pblk: line %d is bad\n", line->id);
1196 goto retry_get;
1197 }
1198
1199 if (pblk_line_prepare(pblk, line)) {
1200 pr_err("pblk: failed to prepare line %d\n", line->id);
1201 list_add(&line->list, &l_mg->free_list);
1202 l_mg->nr_free_lines++;
1203 return NULL;
1204 }
1205
1206 out:
1207 return line;
1208 }
1209
1210 static struct pblk_line *pblk_line_retry(struct pblk *pblk,
1211 struct pblk_line *line)
1212 {
1213 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1214 struct pblk_line *retry_line;
1215
1216 spin_lock(&l_mg->free_lock);
1217 retry_line = pblk_line_get(pblk);
1218 if (!retry_line) {
1219 l_mg->data_line = NULL;
1220 spin_unlock(&l_mg->free_lock);
1221 return NULL;
1222 }
1223
1224 retry_line->smeta = line->smeta;
1225 retry_line->emeta = line->emeta;
1226 retry_line->meta_line = line->meta_line;
1227
1228 pblk_line_free(pblk, line);
1229 l_mg->data_line = retry_line;
1230 spin_unlock(&l_mg->free_lock);
1231
1232 if (pblk_line_erase(pblk, retry_line)) {
1233 spin_lock(&l_mg->free_lock);
1234 l_mg->data_line = NULL;
1235 spin_unlock(&l_mg->free_lock);
1236 return NULL;
1237 }
1238
1239 pblk_rl_free_lines_dec(&pblk->rl, retry_line);
1240
1241 return retry_line;
1242 }
1243
1244 struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
1245 {
1246 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1247 struct pblk_line *line;
1248 int is_next = 0;
1249
1250 spin_lock(&l_mg->free_lock);
1251 line = pblk_line_get(pblk);
1252 if (!line) {
1253 spin_unlock(&l_mg->free_lock);
1254 return NULL;
1255 }
1256
1257 line->seq_nr = l_mg->d_seq_nr++;
1258 line->type = PBLK_LINETYPE_DATA;
1259 l_mg->data_line = line;
1260
1261 pblk_line_setup_metadata(line, l_mg, &pblk->lm);
1262
1263 /* Allocate next line for preparation */
1264 l_mg->data_next = pblk_line_get(pblk);
1265 if (l_mg->data_next) {
1266 l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
1267 l_mg->data_next->type = PBLK_LINETYPE_DATA;
1268 is_next = 1;
1269 }
1270 spin_unlock(&l_mg->free_lock);
1271
1272 pblk_rl_free_lines_dec(&pblk->rl, line);
1273 if (is_next)
1274 pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
1275
1276 if (pblk_line_erase(pblk, line))
1277 return NULL;
1278
1279 retry_setup:
1280 if (!pblk_line_init_metadata(pblk, line, NULL)) {
1281 line = pblk_line_retry(pblk, line);
1282 if (!line)
1283 return NULL;
1284
1285 goto retry_setup;
1286 }
1287
1288 if (!pblk_line_init_bb(pblk, line, 1)) {
1289 line = pblk_line_retry(pblk, line);
1290 if (!line)
1291 return NULL;
1292
1293 goto retry_setup;
1294 }
1295
1296 return line;
1297 }
1298
1299 struct pblk_line *pblk_line_replace_data(struct pblk *pblk)
1300 {
1301 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1302 struct pblk_line *cur, *new;
1303 unsigned int left_seblks;
1304 int is_next = 0;
1305
1306 cur = l_mg->data_line;
1307 new = l_mg->data_next;
1308 if (!new)
1309 return NULL;
1310 l_mg->data_line = new;
1311
1312 retry_line:
1313 left_seblks = atomic_read(&new->left_seblks);
1314 if (left_seblks) {
1315 /* If line is not fully erased, erase it */
1316 if (atomic_read(&new->left_eblks)) {
1317 if (pblk_line_erase(pblk, new))
1318 return NULL;
1319 } else {
1320 io_schedule();
1321 }
1322 goto retry_line;
1323 }
1324
1325 spin_lock(&l_mg->free_lock);
1326 /* Allocate next line for preparation */
1327 l_mg->data_next = pblk_line_get(pblk);
1328 if (l_mg->data_next) {
1329 l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
1330 l_mg->data_next->type = PBLK_LINETYPE_DATA;
1331 is_next = 1;
1332 }
1333
1334 pblk_line_setup_metadata(new, l_mg, &pblk->lm);
1335 spin_unlock(&l_mg->free_lock);
1336
1337 if (is_next)
1338 pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
1339
1340 retry_setup:
1341 if (!pblk_line_init_metadata(pblk, new, cur)) {
1342 new = pblk_line_retry(pblk, new);
1343 if (!new)
1344 return NULL;
1345
1346 goto retry_setup;
1347 }
1348
1349 if (!pblk_line_init_bb(pblk, new, 1)) {
1350 new = pblk_line_retry(pblk, new);
1351 if (!new)
1352 return NULL;
1353
1354 goto retry_setup;
1355 }
1356
1357 return new;
1358 }
1359
1360 void pblk_line_free(struct pblk *pblk, struct pblk_line *line)
1361 {
1362 if (line->map_bitmap)
1363 mempool_free(line->map_bitmap, pblk->line_meta_pool);
1364 if (line->invalid_bitmap)
1365 mempool_free(line->invalid_bitmap, pblk->line_meta_pool);
1366
1367 *line->vsc = cpu_to_le32(EMPTY_ENTRY);
1368
1369 line->map_bitmap = NULL;
1370 line->invalid_bitmap = NULL;
1371 line->smeta = NULL;
1372 line->emeta = NULL;
1373 }
1374
1375 void pblk_line_put(struct kref *ref)
1376 {
1377 struct pblk_line *line = container_of(ref, struct pblk_line, ref);
1378 struct pblk *pblk = line->pblk;
1379 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1380
1381 spin_lock(&line->lock);
1382 WARN_ON(line->state != PBLK_LINESTATE_GC);
1383 line->state = PBLK_LINESTATE_FREE;
1384 line->gc_group = PBLK_LINEGC_NONE;
1385 pblk_line_free(pblk, line);
1386 spin_unlock(&line->lock);
1387
1388 spin_lock(&l_mg->free_lock);
1389 list_add_tail(&line->list, &l_mg->free_list);
1390 l_mg->nr_free_lines++;
1391 spin_unlock(&l_mg->free_lock);
1392
1393 pblk_rl_free_lines_inc(&pblk->rl, line);
1394 }
1395
1396 int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa)
1397 {
1398 struct nvm_rq *rqd;
1399 int err;
1400
1401 rqd = mempool_alloc(pblk->g_rq_pool, GFP_KERNEL);
1402 memset(rqd, 0, pblk_g_rq_size);
1403
1404 pblk_setup_e_rq(pblk, rqd, ppa);
1405
1406 rqd->end_io = pblk_end_io_erase;
1407 rqd->private = pblk;
1408
1409 /* The write thread schedules erases so that it minimizes disturbances
1410 * with writes. Thus, there is no need to take the LUN semaphore.
1411 */
1412 err = pblk_submit_io(pblk, rqd);
1413 if (err) {
1414 struct nvm_tgt_dev *dev = pblk->dev;
1415 struct nvm_geo *geo = &dev->geo;
1416
1417 pr_err("pblk: could not async erase line:%d,blk:%d\n",
1418 pblk_dev_ppa_to_line(ppa),
1419 pblk_dev_ppa_to_pos(geo, ppa));
1420 }
1421
1422 return err;
1423 }
1424
1425 struct pblk_line *pblk_line_get_data(struct pblk *pblk)
1426 {
1427 return pblk->l_mg.data_line;
1428 }
1429
1430 /* For now, always erase next line */
1431 struct pblk_line *pblk_line_get_erase(struct pblk *pblk)
1432 {
1433 return pblk->l_mg.data_next;
1434 }
1435
1436 int pblk_line_is_full(struct pblk_line *line)
1437 {
1438 return (line->left_msecs == 0);
1439 }
1440
1441 void pblk_line_close(struct pblk *pblk, struct pblk_line *line)
1442 {
1443 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1444 struct pblk_line_meta *lm = &pblk->lm;
1445 struct list_head *move_list;
1446
1447 WARN(!bitmap_full(line->map_bitmap, lm->sec_per_line),
1448 "pblk: corrupt closed line %d\n", line->id);
1449
1450 spin_lock(&l_mg->free_lock);
1451 WARN_ON(!test_and_clear_bit(line->meta_line, &l_mg->meta_bitmap));
1452 spin_unlock(&l_mg->free_lock);
1453
1454 spin_lock(&l_mg->gc_lock);
1455 spin_lock(&line->lock);
1456 WARN_ON(line->state != PBLK_LINESTATE_OPEN);
1457 line->state = PBLK_LINESTATE_CLOSED;
1458 move_list = pblk_line_gc_list(pblk, line);
1459
1460 list_add_tail(&line->list, move_list);
1461
1462 mempool_free(line->map_bitmap, pblk->line_meta_pool);
1463 line->map_bitmap = NULL;
1464 line->smeta = NULL;
1465 line->emeta = NULL;
1466
1467 spin_unlock(&line->lock);
1468 spin_unlock(&l_mg->gc_lock);
1469
1470 pblk_gc_should_kick(pblk);
1471 }
1472
1473 void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line)
1474 {
1475 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1476 struct pblk_line_meta *lm = &pblk->lm;
1477 struct pblk_emeta *emeta = line->emeta;
1478 struct line_emeta *emeta_buf = emeta->buf;
1479
1480 /* No need for exact vsc value; avoid a big line lock and tak aprox. */
1481 memcpy(emeta_to_vsc(pblk, emeta_buf), l_mg->vsc_list, lm->vsc_list_len);
1482 memcpy(emeta_to_bb(emeta_buf), line->blk_bitmap, lm->blk_bitmap_len);
1483
1484 emeta_buf->nr_valid_lbas = cpu_to_le64(line->nr_valid_lbas);
1485 emeta_buf->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, emeta_buf));
1486
1487 spin_lock(&l_mg->close_lock);
1488 spin_lock(&line->lock);
1489 list_add_tail(&line->list, &l_mg->emeta_list);
1490 spin_unlock(&line->lock);
1491 spin_unlock(&l_mg->close_lock);
1492 }
1493
1494 void pblk_line_close_ws(struct work_struct *work)
1495 {
1496 struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
1497 ws);
1498 struct pblk *pblk = line_ws->pblk;
1499 struct pblk_line *line = line_ws->line;
1500
1501 pblk_line_close(pblk, line);
1502 mempool_free(line_ws, pblk->line_ws_pool);
1503 }
1504
1505 void pblk_line_mark_bb(struct work_struct *work)
1506 {
1507 struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
1508 ws);
1509 struct pblk *pblk = line_ws->pblk;
1510 struct nvm_tgt_dev *dev = pblk->dev;
1511 struct ppa_addr *ppa = line_ws->priv;
1512 int ret;
1513
1514 ret = nvm_set_tgt_bb_tbl(dev, ppa, 1, NVM_BLK_T_GRWN_BAD);
1515 if (ret) {
1516 struct pblk_line *line;
1517 int pos;
1518
1519 line = &pblk->lines[pblk_dev_ppa_to_line(*ppa)];
1520 pos = pblk_dev_ppa_to_pos(&dev->geo, *ppa);
1521
1522 pr_err("pblk: failed to mark bb, line:%d, pos:%d\n",
1523 line->id, pos);
1524 }
1525
1526 kfree(ppa);
1527 mempool_free(line_ws, pblk->line_ws_pool);
1528 }
1529
1530 void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
1531 void (*work)(struct work_struct *))
1532 {
1533 struct pblk_line_ws *line_ws;
1534
1535 line_ws = mempool_alloc(pblk->line_ws_pool, GFP_ATOMIC);
1536 if (!line_ws)
1537 return;
1538
1539 line_ws->pblk = pblk;
1540 line_ws->line = line;
1541 line_ws->priv = priv;
1542
1543 INIT_WORK(&line_ws->ws, work);
1544 queue_work(pblk->kw_wq, &line_ws->ws);
1545 }
1546
1547 void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
1548 unsigned long *lun_bitmap)
1549 {
1550 struct nvm_tgt_dev *dev = pblk->dev;
1551 struct nvm_geo *geo = &dev->geo;
1552 struct pblk_lun *rlun;
1553 int pos = pblk_ppa_to_pos(geo, ppa_list[0]);
1554 int ret;
1555
1556 /*
1557 * Only send one inflight I/O per LUN. Since we map at a page
1558 * granurality, all ppas in the I/O will map to the same LUN
1559 */
1560 #ifdef CONFIG_NVM_DEBUG
1561 int i;
1562
1563 for (i = 1; i < nr_ppas; i++)
1564 WARN_ON(ppa_list[0].g.lun != ppa_list[i].g.lun ||
1565 ppa_list[0].g.ch != ppa_list[i].g.ch);
1566 #endif
1567 /* If the LUN has been locked for this same request, do no attempt to
1568 * lock it again
1569 */
1570 if (test_and_set_bit(pos, lun_bitmap))
1571 return;
1572
1573 rlun = &pblk->luns[pos];
1574 ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(5000));
1575 if (ret) {
1576 switch (ret) {
1577 case -ETIME:
1578 pr_err("pblk: lun semaphore timed out\n");
1579 break;
1580 case -EINTR:
1581 pr_err("pblk: lun semaphore timed out\n");
1582 break;
1583 }
1584 }
1585 }
1586
1587 void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
1588 unsigned long *lun_bitmap)
1589 {
1590 struct nvm_tgt_dev *dev = pblk->dev;
1591 struct nvm_geo *geo = &dev->geo;
1592 struct pblk_lun *rlun;
1593 int nr_luns = geo->nr_luns;
1594 int bit = -1;
1595
1596 while ((bit = find_next_bit(lun_bitmap, nr_luns, bit + 1)) < nr_luns) {
1597 rlun = &pblk->luns[bit];
1598 up(&rlun->wr_sem);
1599 }
1600
1601 kfree(lun_bitmap);
1602 }
1603
1604 void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa)
1605 {
1606 struct ppa_addr l2p_ppa;
1607
1608 /* logic error: lba out-of-bounds. Ignore update */
1609 if (!(lba < pblk->rl.nr_secs)) {
1610 WARN(1, "pblk: corrupted L2P map request\n");
1611 return;
1612 }
1613
1614 spin_lock(&pblk->trans_lock);
1615 l2p_ppa = pblk_trans_map_get(pblk, lba);
1616
1617 if (!pblk_addr_in_cache(l2p_ppa) && !pblk_ppa_empty(l2p_ppa))
1618 pblk_map_invalidate(pblk, l2p_ppa);
1619
1620 pblk_trans_map_set(pblk, lba, ppa);
1621 spin_unlock(&pblk->trans_lock);
1622 }
1623
1624 void pblk_update_map_cache(struct pblk *pblk, sector_t lba, struct ppa_addr ppa)
1625 {
1626 #ifdef CONFIG_NVM_DEBUG
1627 /* Callers must ensure that the ppa points to a cache address */
1628 BUG_ON(!pblk_addr_in_cache(ppa));
1629 BUG_ON(pblk_rb_pos_oob(&pblk->rwb, pblk_addr_to_cacheline(ppa)));
1630 #endif
1631
1632 pblk_update_map(pblk, lba, ppa);
1633 }
1634
1635 int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa,
1636 struct pblk_line *gc_line)
1637 {
1638 struct ppa_addr l2p_ppa;
1639 int ret = 1;
1640
1641 #ifdef CONFIG_NVM_DEBUG
1642 /* Callers must ensure that the ppa points to a cache address */
1643 BUG_ON(!pblk_addr_in_cache(ppa));
1644 BUG_ON(pblk_rb_pos_oob(&pblk->rwb, pblk_addr_to_cacheline(ppa)));
1645 #endif
1646
1647 /* logic error: lba out-of-bounds. Ignore update */
1648 if (!(lba < pblk->rl.nr_secs)) {
1649 WARN(1, "pblk: corrupted L2P map request\n");
1650 return 0;
1651 }
1652
1653 spin_lock(&pblk->trans_lock);
1654 l2p_ppa = pblk_trans_map_get(pblk, lba);
1655
1656 /* Prevent updated entries to be overwritten by GC */
1657 if (pblk_addr_in_cache(l2p_ppa) || pblk_ppa_empty(l2p_ppa) ||
1658 pblk_tgt_ppa_to_line(l2p_ppa) != gc_line->id) {
1659 ret = 0;
1660 goto out;
1661 }
1662
1663 pblk_trans_map_set(pblk, lba, ppa);
1664 out:
1665 spin_unlock(&pblk->trans_lock);
1666 return ret;
1667 }
1668
1669 void pblk_update_map_dev(struct pblk *pblk, sector_t lba, struct ppa_addr ppa,
1670 struct ppa_addr entry_line)
1671 {
1672 struct ppa_addr l2p_line;
1673
1674 #ifdef CONFIG_NVM_DEBUG
1675 /* Callers must ensure that the ppa points to a device address */
1676 BUG_ON(pblk_addr_in_cache(ppa));
1677 #endif
1678 /* Invalidate and discard padded entries */
1679 if (lba == ADDR_EMPTY) {
1680 #ifdef CONFIG_NVM_DEBUG
1681 atomic_long_inc(&pblk->padded_wb);
1682 #endif
1683 pblk_map_invalidate(pblk, ppa);
1684 return;
1685 }
1686
1687 /* logic error: lba out-of-bounds. Ignore update */
1688 if (!(lba < pblk->rl.nr_secs)) {
1689 WARN(1, "pblk: corrupted L2P map request\n");
1690 return;
1691 }
1692
1693 spin_lock(&pblk->trans_lock);
1694 l2p_line = pblk_trans_map_get(pblk, lba);
1695
1696 /* Do not update L2P if the cacheline has been updated. In this case,
1697 * the mapped ppa must be invalidated
1698 */
1699 if (l2p_line.ppa != entry_line.ppa) {
1700 if (!pblk_ppa_empty(ppa))
1701 pblk_map_invalidate(pblk, ppa);
1702 goto out;
1703 }
1704
1705 #ifdef CONFIG_NVM_DEBUG
1706 WARN_ON(!pblk_addr_in_cache(l2p_line) && !pblk_ppa_empty(l2p_line));
1707 #endif
1708
1709 pblk_trans_map_set(pblk, lba, ppa);
1710 out:
1711 spin_unlock(&pblk->trans_lock);
1712 }
1713
1714 void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas,
1715 sector_t blba, int nr_secs)
1716 {
1717 int i;
1718
1719 spin_lock(&pblk->trans_lock);
1720 for (i = 0; i < nr_secs; i++)
1721 ppas[i] = pblk_trans_map_get(pblk, blba + i);
1722 spin_unlock(&pblk->trans_lock);
1723 }
1724
1725 void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas,
1726 u64 *lba_list, int nr_secs)
1727 {
1728 sector_t lba;
1729 int i;
1730
1731 spin_lock(&pblk->trans_lock);
1732 for (i = 0; i < nr_secs; i++) {
1733 lba = lba_list[i];
1734 if (lba == ADDR_EMPTY) {
1735 ppas[i].ppa = ADDR_EMPTY;
1736 } else {
1737 /* logic error: lba out-of-bounds. Ignore update */
1738 if (!(lba < pblk->rl.nr_secs)) {
1739 WARN(1, "pblk: corrupted L2P map request\n");
1740 continue;
1741 }
1742 ppas[i] = pblk_trans_map_get(pblk, lba);
1743 }
1744 }
1745 spin_unlock(&pblk->trans_lock);
1746 }