]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - drivers/lightnvm/rrpc.c
nvme: LightNVM support
[mirror_ubuntu-zesty-kernel.git] / drivers / lightnvm / rrpc.c
CommitLineData
ae1519ec
MB
1/*
2 * Copyright (C) 2015 IT University of Copenhagen
3 * Initial release: Matias Bjorling <m@bjorling.me>
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License version
7 * 2 as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * Implementation of a Round-robin page-based Hybrid FTL for Open-channel SSDs.
15 */
16
17#include "rrpc.h"
18
19static struct kmem_cache *rrpc_gcb_cache, *rrpc_rq_cache;
20static DECLARE_RWSEM(rrpc_lock);
21
22static int rrpc_submit_io(struct rrpc *rrpc, struct bio *bio,
23 struct nvm_rq *rqd, unsigned long flags);
24
25#define rrpc_for_each_lun(rrpc, rlun, i) \
26 for ((i) = 0, rlun = &(rrpc)->luns[0]; \
27 (i) < (rrpc)->nr_luns; (i)++, rlun = &(rrpc)->luns[(i)])
28
29static void rrpc_page_invalidate(struct rrpc *rrpc, struct rrpc_addr *a)
30{
31 struct rrpc_block *rblk = a->rblk;
32 unsigned int pg_offset;
33
34 lockdep_assert_held(&rrpc->rev_lock);
35
36 if (a->addr == ADDR_EMPTY || !rblk)
37 return;
38
39 spin_lock(&rblk->lock);
40
41 div_u64_rem(a->addr, rrpc->dev->pgs_per_blk, &pg_offset);
42 WARN_ON(test_and_set_bit(pg_offset, rblk->invalid_pages));
43 rblk->nr_invalid_pages++;
44
45 spin_unlock(&rblk->lock);
46
47 rrpc->rev_trans_map[a->addr - rrpc->poffset].addr = ADDR_EMPTY;
48}
49
50static void rrpc_invalidate_range(struct rrpc *rrpc, sector_t slba,
51 unsigned len)
52{
53 sector_t i;
54
55 spin_lock(&rrpc->rev_lock);
56 for (i = slba; i < slba + len; i++) {
57 struct rrpc_addr *gp = &rrpc->trans_map[i];
58
59 rrpc_page_invalidate(rrpc, gp);
60 gp->rblk = NULL;
61 }
62 spin_unlock(&rrpc->rev_lock);
63}
64
65static struct nvm_rq *rrpc_inflight_laddr_acquire(struct rrpc *rrpc,
66 sector_t laddr, unsigned int pages)
67{
68 struct nvm_rq *rqd;
69 struct rrpc_inflight_rq *inf;
70
71 rqd = mempool_alloc(rrpc->rq_pool, GFP_ATOMIC);
72 if (!rqd)
73 return ERR_PTR(-ENOMEM);
74
75 inf = rrpc_get_inflight_rq(rqd);
76 if (rrpc_lock_laddr(rrpc, laddr, pages, inf)) {
77 mempool_free(rqd, rrpc->rq_pool);
78 return NULL;
79 }
80
81 return rqd;
82}
83
84static void rrpc_inflight_laddr_release(struct rrpc *rrpc, struct nvm_rq *rqd)
85{
86 struct rrpc_inflight_rq *inf = rrpc_get_inflight_rq(rqd);
87
88 rrpc_unlock_laddr(rrpc, inf);
89
90 mempool_free(rqd, rrpc->rq_pool);
91}
92
93static void rrpc_discard(struct rrpc *rrpc, struct bio *bio)
94{
95 sector_t slba = bio->bi_iter.bi_sector / NR_PHY_IN_LOG;
96 sector_t len = bio->bi_iter.bi_size / RRPC_EXPOSED_PAGE_SIZE;
97 struct nvm_rq *rqd;
98
99 do {
100 rqd = rrpc_inflight_laddr_acquire(rrpc, slba, len);
101 schedule();
102 } while (!rqd);
103
104 if (IS_ERR(rqd)) {
105 pr_err("rrpc: unable to acquire inflight IO\n");
106 bio_io_error(bio);
107 return;
108 }
109
110 rrpc_invalidate_range(rrpc, slba, len);
111 rrpc_inflight_laddr_release(rrpc, rqd);
112}
113
114static int block_is_full(struct rrpc *rrpc, struct rrpc_block *rblk)
115{
116 return (rblk->next_page == rrpc->dev->pgs_per_blk);
117}
118
119static sector_t block_to_addr(struct rrpc *rrpc, struct rrpc_block *rblk)
120{
121 struct nvm_block *blk = rblk->parent;
122
123 return blk->id * rrpc->dev->pgs_per_blk;
124}
125
126static struct ppa_addr rrpc_ppa_to_gaddr(struct nvm_dev *dev,
127 sector_t addr)
128{
129 struct ppa_addr paddr;
130
131 paddr.ppa = addr;
132 return __linear_to_generic_addr(dev, paddr);
133}
134
135/* requires lun->lock taken */
136static void rrpc_set_lun_cur(struct rrpc_lun *rlun, struct rrpc_block *rblk)
137{
138 struct rrpc *rrpc = rlun->rrpc;
139
140 BUG_ON(!rblk);
141
142 if (rlun->cur) {
143 spin_lock(&rlun->cur->lock);
144 WARN_ON(!block_is_full(rrpc, rlun->cur));
145 spin_unlock(&rlun->cur->lock);
146 }
147 rlun->cur = rblk;
148}
149
150static struct rrpc_block *rrpc_get_blk(struct rrpc *rrpc, struct rrpc_lun *rlun,
151 unsigned long flags)
152{
153 struct nvm_block *blk;
154 struct rrpc_block *rblk;
155
156 blk = nvm_get_blk(rrpc->dev, rlun->parent, 0);
157 if (!blk)
158 return NULL;
159
160 rblk = &rlun->blocks[blk->id];
161 blk->priv = rblk;
162
163 bitmap_zero(rblk->invalid_pages, rrpc->dev->pgs_per_blk);
164 rblk->next_page = 0;
165 rblk->nr_invalid_pages = 0;
166 atomic_set(&rblk->data_cmnt_size, 0);
167
168 return rblk;
169}
170
171static void rrpc_put_blk(struct rrpc *rrpc, struct rrpc_block *rblk)
172{
173 nvm_put_blk(rrpc->dev, rblk->parent);
174}
175
176static struct rrpc_lun *get_next_lun(struct rrpc *rrpc)
177{
178 int next = atomic_inc_return(&rrpc->next_lun);
179
180 return &rrpc->luns[next % rrpc->nr_luns];
181}
182
183static void rrpc_gc_kick(struct rrpc *rrpc)
184{
185 struct rrpc_lun *rlun;
186 unsigned int i;
187
188 for (i = 0; i < rrpc->nr_luns; i++) {
189 rlun = &rrpc->luns[i];
190 queue_work(rrpc->krqd_wq, &rlun->ws_gc);
191 }
192}
193
194/*
195 * timed GC every interval.
196 */
197static void rrpc_gc_timer(unsigned long data)
198{
199 struct rrpc *rrpc = (struct rrpc *)data;
200
201 rrpc_gc_kick(rrpc);
202 mod_timer(&rrpc->gc_timer, jiffies + msecs_to_jiffies(10));
203}
204
205static void rrpc_end_sync_bio(struct bio *bio)
206{
207 struct completion *waiting = bio->bi_private;
208
209 if (bio->bi_error)
210 pr_err("nvm: gc request failed (%u).\n", bio->bi_error);
211
212 complete(waiting);
213}
214
215/*
216 * rrpc_move_valid_pages -- migrate live data off the block
217 * @rrpc: the 'rrpc' structure
218 * @block: the block from which to migrate live pages
219 *
220 * Description:
221 * GC algorithms may call this function to migrate remaining live
222 * pages off the block prior to erasing it. This function blocks
223 * further execution until the operation is complete.
224 */
225static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk)
226{
227 struct request_queue *q = rrpc->dev->q;
228 struct rrpc_rev_addr *rev;
229 struct nvm_rq *rqd;
230 struct bio *bio;
231 struct page *page;
232 int slot;
233 int nr_pgs_per_blk = rrpc->dev->pgs_per_blk;
234 sector_t phys_addr;
235 DECLARE_COMPLETION_ONSTACK(wait);
236
237 if (bitmap_full(rblk->invalid_pages, nr_pgs_per_blk))
238 return 0;
239
240 bio = bio_alloc(GFP_NOIO, 1);
241 if (!bio) {
242 pr_err("nvm: could not alloc bio to gc\n");
243 return -ENOMEM;
244 }
245
246 page = mempool_alloc(rrpc->page_pool, GFP_NOIO);
247
248 while ((slot = find_first_zero_bit(rblk->invalid_pages,
249 nr_pgs_per_blk)) < nr_pgs_per_blk) {
250
251 /* Lock laddr */
252 phys_addr = (rblk->parent->id * nr_pgs_per_blk) + slot;
253
254try:
255 spin_lock(&rrpc->rev_lock);
256 /* Get logical address from physical to logical table */
257 rev = &rrpc->rev_trans_map[phys_addr - rrpc->poffset];
258 /* already updated by previous regular write */
259 if (rev->addr == ADDR_EMPTY) {
260 spin_unlock(&rrpc->rev_lock);
261 continue;
262 }
263
264 rqd = rrpc_inflight_laddr_acquire(rrpc, rev->addr, 1);
265 if (IS_ERR_OR_NULL(rqd)) {
266 spin_unlock(&rrpc->rev_lock);
267 schedule();
268 goto try;
269 }
270
271 spin_unlock(&rrpc->rev_lock);
272
273 /* Perform read to do GC */
274 bio->bi_iter.bi_sector = rrpc_get_sector(rev->addr);
275 bio->bi_rw = READ;
276 bio->bi_private = &wait;
277 bio->bi_end_io = rrpc_end_sync_bio;
278
279 /* TODO: may fail when EXP_PG_SIZE > PAGE_SIZE */
280 bio_add_pc_page(q, bio, page, RRPC_EXPOSED_PAGE_SIZE, 0);
281
282 if (rrpc_submit_io(rrpc, bio, rqd, NVM_IOTYPE_GC)) {
283 pr_err("rrpc: gc read failed.\n");
284 rrpc_inflight_laddr_release(rrpc, rqd);
285 goto finished;
286 }
287 wait_for_completion_io(&wait);
288
289 bio_reset(bio);
290 reinit_completion(&wait);
291
292 bio->bi_iter.bi_sector = rrpc_get_sector(rev->addr);
293 bio->bi_rw = WRITE;
294 bio->bi_private = &wait;
295 bio->bi_end_io = rrpc_end_sync_bio;
296
297 bio_add_pc_page(q, bio, page, RRPC_EXPOSED_PAGE_SIZE, 0);
298
299 /* turn the command around and write the data back to a new
300 * address
301 */
302 if (rrpc_submit_io(rrpc, bio, rqd, NVM_IOTYPE_GC)) {
303 pr_err("rrpc: gc write failed.\n");
304 rrpc_inflight_laddr_release(rrpc, rqd);
305 goto finished;
306 }
307 wait_for_completion_io(&wait);
308
309 rrpc_inflight_laddr_release(rrpc, rqd);
310
311 bio_reset(bio);
312 }
313
314finished:
315 mempool_free(page, rrpc->page_pool);
316 bio_put(bio);
317
318 if (!bitmap_full(rblk->invalid_pages, nr_pgs_per_blk)) {
319 pr_err("nvm: failed to garbage collect block\n");
320 return -EIO;
321 }
322
323 return 0;
324}
325
326static void rrpc_block_gc(struct work_struct *work)
327{
328 struct rrpc_block_gc *gcb = container_of(work, struct rrpc_block_gc,
329 ws_gc);
330 struct rrpc *rrpc = gcb->rrpc;
331 struct rrpc_block *rblk = gcb->rblk;
332 struct nvm_dev *dev = rrpc->dev;
333
334 pr_debug("nvm: block '%lu' being reclaimed\n", rblk->parent->id);
335
336 if (rrpc_move_valid_pages(rrpc, rblk))
337 goto done;
338
339 nvm_erase_blk(dev, rblk->parent);
340 rrpc_put_blk(rrpc, rblk);
341done:
342 mempool_free(gcb, rrpc->gcb_pool);
343}
344
345/* the block with highest number of invalid pages, will be in the beginning
346 * of the list
347 */
348static struct rrpc_block *rblock_max_invalid(struct rrpc_block *ra,
349 struct rrpc_block *rb)
350{
351 if (ra->nr_invalid_pages == rb->nr_invalid_pages)
352 return ra;
353
354 return (ra->nr_invalid_pages < rb->nr_invalid_pages) ? rb : ra;
355}
356
357/* linearly find the block with highest number of invalid pages
358 * requires lun->lock
359 */
360static struct rrpc_block *block_prio_find_max(struct rrpc_lun *rlun)
361{
362 struct list_head *prio_list = &rlun->prio_list;
363 struct rrpc_block *rblock, *max;
364
365 BUG_ON(list_empty(prio_list));
366
367 max = list_first_entry(prio_list, struct rrpc_block, prio);
368 list_for_each_entry(rblock, prio_list, prio)
369 max = rblock_max_invalid(max, rblock);
370
371 return max;
372}
373
374static void rrpc_lun_gc(struct work_struct *work)
375{
376 struct rrpc_lun *rlun = container_of(work, struct rrpc_lun, ws_gc);
377 struct rrpc *rrpc = rlun->rrpc;
378 struct nvm_lun *lun = rlun->parent;
379 struct rrpc_block_gc *gcb;
380 unsigned int nr_blocks_need;
381
382 nr_blocks_need = rrpc->dev->blks_per_lun / GC_LIMIT_INVERSE;
383
384 if (nr_blocks_need < rrpc->nr_luns)
385 nr_blocks_need = rrpc->nr_luns;
386
387 spin_lock(&lun->lock);
388 while (nr_blocks_need > lun->nr_free_blocks &&
389 !list_empty(&rlun->prio_list)) {
390 struct rrpc_block *rblock = block_prio_find_max(rlun);
391 struct nvm_block *block = rblock->parent;
392
393 if (!rblock->nr_invalid_pages)
394 break;
395
396 list_del_init(&rblock->prio);
397
398 BUG_ON(!block_is_full(rrpc, rblock));
399
400 pr_debug("rrpc: selected block '%lu' for GC\n", block->id);
401
402 gcb = mempool_alloc(rrpc->gcb_pool, GFP_ATOMIC);
403 if (!gcb)
404 break;
405
406 gcb->rrpc = rrpc;
407 gcb->rblk = rblock;
408 INIT_WORK(&gcb->ws_gc, rrpc_block_gc);
409
410 queue_work(rrpc->kgc_wq, &gcb->ws_gc);
411
412 nr_blocks_need--;
413 }
414 spin_unlock(&lun->lock);
415
416 /* TODO: Hint that request queue can be started again */
417}
418
419static void rrpc_gc_queue(struct work_struct *work)
420{
421 struct rrpc_block_gc *gcb = container_of(work, struct rrpc_block_gc,
422 ws_gc);
423 struct rrpc *rrpc = gcb->rrpc;
424 struct rrpc_block *rblk = gcb->rblk;
425 struct nvm_lun *lun = rblk->parent->lun;
426 struct rrpc_lun *rlun = &rrpc->luns[lun->id - rrpc->lun_offset];
427
428 spin_lock(&rlun->lock);
429 list_add_tail(&rblk->prio, &rlun->prio_list);
430 spin_unlock(&rlun->lock);
431
432 mempool_free(gcb, rrpc->gcb_pool);
433 pr_debug("nvm: block '%lu' is full, allow GC (sched)\n",
434 rblk->parent->id);
435}
436
437static const struct block_device_operations rrpc_fops = {
438 .owner = THIS_MODULE,
439};
440
441static struct rrpc_lun *rrpc_get_lun_rr(struct rrpc *rrpc, int is_gc)
442{
443 unsigned int i;
444 struct rrpc_lun *rlun, *max_free;
445
446 if (!is_gc)
447 return get_next_lun(rrpc);
448
449 /* during GC, we don't care about RR, instead we want to make
450 * sure that we maintain evenness between the block luns.
451 */
452 max_free = &rrpc->luns[0];
453 /* prevent GC-ing lun from devouring pages of a lun with
454 * little free blocks. We don't take the lock as we only need an
455 * estimate.
456 */
457 rrpc_for_each_lun(rrpc, rlun, i) {
458 if (rlun->parent->nr_free_blocks >
459 max_free->parent->nr_free_blocks)
460 max_free = rlun;
461 }
462
463 return max_free;
464}
465
466static struct rrpc_addr *rrpc_update_map(struct rrpc *rrpc, sector_t laddr,
467 struct rrpc_block *rblk, sector_t paddr)
468{
469 struct rrpc_addr *gp;
470 struct rrpc_rev_addr *rev;
471
472 BUG_ON(laddr >= rrpc->nr_pages);
473
474 gp = &rrpc->trans_map[laddr];
475 spin_lock(&rrpc->rev_lock);
476 if (gp->rblk)
477 rrpc_page_invalidate(rrpc, gp);
478
479 gp->addr = paddr;
480 gp->rblk = rblk;
481
482 rev = &rrpc->rev_trans_map[gp->addr - rrpc->poffset];
483 rev->addr = laddr;
484 spin_unlock(&rrpc->rev_lock);
485
486 return gp;
487}
488
489static sector_t rrpc_alloc_addr(struct rrpc *rrpc, struct rrpc_block *rblk)
490{
491 sector_t addr = ADDR_EMPTY;
492
493 spin_lock(&rblk->lock);
494 if (block_is_full(rrpc, rblk))
495 goto out;
496
497 addr = block_to_addr(rrpc, rblk) + rblk->next_page;
498
499 rblk->next_page++;
500out:
501 spin_unlock(&rblk->lock);
502 return addr;
503}
504
505/* Simple round-robin Logical to physical address translation.
506 *
507 * Retrieve the mapping using the active append point. Then update the ap for
508 * the next write to the disk.
509 *
510 * Returns rrpc_addr with the physical address and block. Remember to return to
511 * rrpc->addr_cache when request is finished.
512 */
513static struct rrpc_addr *rrpc_map_page(struct rrpc *rrpc, sector_t laddr,
514 int is_gc)
515{
516 struct rrpc_lun *rlun;
517 struct rrpc_block *rblk;
518 struct nvm_lun *lun;
519 sector_t paddr;
520
521 rlun = rrpc_get_lun_rr(rrpc, is_gc);
522 lun = rlun->parent;
523
524 if (!is_gc && lun->nr_free_blocks < rrpc->nr_luns * 4)
525 return NULL;
526
527 spin_lock(&rlun->lock);
528
529 rblk = rlun->cur;
530retry:
531 paddr = rrpc_alloc_addr(rrpc, rblk);
532
533 if (paddr == ADDR_EMPTY) {
534 rblk = rrpc_get_blk(rrpc, rlun, 0);
535 if (rblk) {
536 rrpc_set_lun_cur(rlun, rblk);
537 goto retry;
538 }
539
540 if (is_gc) {
541 /* retry from emergency gc block */
542 paddr = rrpc_alloc_addr(rrpc, rlun->gc_cur);
543 if (paddr == ADDR_EMPTY) {
544 rblk = rrpc_get_blk(rrpc, rlun, 1);
545 if (!rblk) {
546 pr_err("rrpc: no more blocks");
547 goto err;
548 }
549
550 rlun->gc_cur = rblk;
551 paddr = rrpc_alloc_addr(rrpc, rlun->gc_cur);
552 }
553 rblk = rlun->gc_cur;
554 }
555 }
556
557 spin_unlock(&rlun->lock);
558 return rrpc_update_map(rrpc, laddr, rblk, paddr);
559err:
560 spin_unlock(&rlun->lock);
561 return NULL;
562}
563
564static void rrpc_run_gc(struct rrpc *rrpc, struct rrpc_block *rblk)
565{
566 struct rrpc_block_gc *gcb;
567
568 gcb = mempool_alloc(rrpc->gcb_pool, GFP_ATOMIC);
569 if (!gcb) {
570 pr_err("rrpc: unable to queue block for gc.");
571 return;
572 }
573
574 gcb->rrpc = rrpc;
575 gcb->rblk = rblk;
576
577 INIT_WORK(&gcb->ws_gc, rrpc_gc_queue);
578 queue_work(rrpc->kgc_wq, &gcb->ws_gc);
579}
580
581static void rrpc_end_io_write(struct rrpc *rrpc, struct rrpc_rq *rrqd,
582 sector_t laddr, uint8_t npages)
583{
584 struct rrpc_addr *p;
585 struct rrpc_block *rblk;
586 struct nvm_lun *lun;
587 int cmnt_size, i;
588
589 for (i = 0; i < npages; i++) {
590 p = &rrpc->trans_map[laddr + i];
591 rblk = p->rblk;
592 lun = rblk->parent->lun;
593
594 cmnt_size = atomic_inc_return(&rblk->data_cmnt_size);
595 if (unlikely(cmnt_size == rrpc->dev->pgs_per_blk))
596 rrpc_run_gc(rrpc, rblk);
597 }
598}
599
600static int rrpc_end_io(struct nvm_rq *rqd, int error)
601{
602 struct rrpc *rrpc = container_of(rqd->ins, struct rrpc, instance);
603 struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd);
604 uint8_t npages = rqd->nr_pages;
605 sector_t laddr = rrpc_get_laddr(rqd->bio) - npages;
606
607 if (bio_data_dir(rqd->bio) == WRITE)
608 rrpc_end_io_write(rrpc, rrqd, laddr, npages);
609
610 if (rrqd->flags & NVM_IOTYPE_GC)
611 return 0;
612
613 rrpc_unlock_rq(rrpc, rqd);
614 bio_put(rqd->bio);
615
616 if (npages > 1)
617 nvm_dev_dma_free(rrpc->dev, rqd->ppa_list, rqd->dma_ppa_list);
618 if (rqd->metadata)
619 nvm_dev_dma_free(rrpc->dev, rqd->metadata, rqd->dma_metadata);
620
621 mempool_free(rqd, rrpc->rq_pool);
622
623 return 0;
624}
625
626static int rrpc_read_ppalist_rq(struct rrpc *rrpc, struct bio *bio,
627 struct nvm_rq *rqd, unsigned long flags, int npages)
628{
629 struct rrpc_inflight_rq *r = rrpc_get_inflight_rq(rqd);
630 struct rrpc_addr *gp;
631 sector_t laddr = rrpc_get_laddr(bio);
632 int is_gc = flags & NVM_IOTYPE_GC;
633 int i;
634
635 if (!is_gc && rrpc_lock_rq(rrpc, bio, rqd)) {
636 nvm_dev_dma_free(rrpc->dev, rqd->ppa_list, rqd->dma_ppa_list);
637 return NVM_IO_REQUEUE;
638 }
639
640 for (i = 0; i < npages; i++) {
641 /* We assume that mapping occurs at 4KB granularity */
642 BUG_ON(!(laddr + i >= 0 && laddr + i < rrpc->nr_pages));
643 gp = &rrpc->trans_map[laddr + i];
644
645 if (gp->rblk) {
646 rqd->ppa_list[i] = rrpc_ppa_to_gaddr(rrpc->dev,
647 gp->addr);
648 } else {
649 BUG_ON(is_gc);
650 rrpc_unlock_laddr(rrpc, r);
651 nvm_dev_dma_free(rrpc->dev, rqd->ppa_list,
652 rqd->dma_ppa_list);
653 return NVM_IO_DONE;
654 }
655 }
656
657 rqd->opcode = NVM_OP_HBREAD;
658
659 return NVM_IO_OK;
660}
661
662static int rrpc_read_rq(struct rrpc *rrpc, struct bio *bio, struct nvm_rq *rqd,
663 unsigned long flags)
664{
665 struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd);
666 int is_gc = flags & NVM_IOTYPE_GC;
667 sector_t laddr = rrpc_get_laddr(bio);
668 struct rrpc_addr *gp;
669
670 if (!is_gc && rrpc_lock_rq(rrpc, bio, rqd))
671 return NVM_IO_REQUEUE;
672
673 BUG_ON(!(laddr >= 0 && laddr < rrpc->nr_pages));
674 gp = &rrpc->trans_map[laddr];
675
676 if (gp->rblk) {
677 rqd->ppa_addr = rrpc_ppa_to_gaddr(rrpc->dev, gp->addr);
678 } else {
679 BUG_ON(is_gc);
680 rrpc_unlock_rq(rrpc, rqd);
681 return NVM_IO_DONE;
682 }
683
684 rqd->opcode = NVM_OP_HBREAD;
685 rrqd->addr = gp;
686
687 return NVM_IO_OK;
688}
689
690static int rrpc_write_ppalist_rq(struct rrpc *rrpc, struct bio *bio,
691 struct nvm_rq *rqd, unsigned long flags, int npages)
692{
693 struct rrpc_inflight_rq *r = rrpc_get_inflight_rq(rqd);
694 struct rrpc_addr *p;
695 sector_t laddr = rrpc_get_laddr(bio);
696 int is_gc = flags & NVM_IOTYPE_GC;
697 int i;
698
699 if (!is_gc && rrpc_lock_rq(rrpc, bio, rqd)) {
700 nvm_dev_dma_free(rrpc->dev, rqd->ppa_list, rqd->dma_ppa_list);
701 return NVM_IO_REQUEUE;
702 }
703
704 for (i = 0; i < npages; i++) {
705 /* We assume that mapping occurs at 4KB granularity */
706 p = rrpc_map_page(rrpc, laddr + i, is_gc);
707 if (!p) {
708 BUG_ON(is_gc);
709 rrpc_unlock_laddr(rrpc, r);
710 nvm_dev_dma_free(rrpc->dev, rqd->ppa_list,
711 rqd->dma_ppa_list);
712 rrpc_gc_kick(rrpc);
713 return NVM_IO_REQUEUE;
714 }
715
716 rqd->ppa_list[i] = rrpc_ppa_to_gaddr(rrpc->dev,
717 p->addr);
718 }
719
720 rqd->opcode = NVM_OP_HBWRITE;
721
722 return NVM_IO_OK;
723}
724
725static int rrpc_write_rq(struct rrpc *rrpc, struct bio *bio,
726 struct nvm_rq *rqd, unsigned long flags)
727{
728 struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd);
729 struct rrpc_addr *p;
730 int is_gc = flags & NVM_IOTYPE_GC;
731 sector_t laddr = rrpc_get_laddr(bio);
732
733 if (!is_gc && rrpc_lock_rq(rrpc, bio, rqd))
734 return NVM_IO_REQUEUE;
735
736 p = rrpc_map_page(rrpc, laddr, is_gc);
737 if (!p) {
738 BUG_ON(is_gc);
739 rrpc_unlock_rq(rrpc, rqd);
740 rrpc_gc_kick(rrpc);
741 return NVM_IO_REQUEUE;
742 }
743
744 rqd->ppa_addr = rrpc_ppa_to_gaddr(rrpc->dev, p->addr);
745 rqd->opcode = NVM_OP_HBWRITE;
746 rrqd->addr = p;
747
748 return NVM_IO_OK;
749}
750
751static int rrpc_setup_rq(struct rrpc *rrpc, struct bio *bio,
752 struct nvm_rq *rqd, unsigned long flags, uint8_t npages)
753{
754 if (npages > 1) {
755 rqd->ppa_list = nvm_dev_dma_alloc(rrpc->dev, GFP_KERNEL,
756 &rqd->dma_ppa_list);
757 if (!rqd->ppa_list) {
758 pr_err("rrpc: not able to allocate ppa list\n");
759 return NVM_IO_ERR;
760 }
761
762 if (bio_rw(bio) == WRITE)
763 return rrpc_write_ppalist_rq(rrpc, bio, rqd, flags,
764 npages);
765
766 return rrpc_read_ppalist_rq(rrpc, bio, rqd, flags, npages);
767 }
768
769 if (bio_rw(bio) == WRITE)
770 return rrpc_write_rq(rrpc, bio, rqd, flags);
771
772 return rrpc_read_rq(rrpc, bio, rqd, flags);
773}
774
775static int rrpc_submit_io(struct rrpc *rrpc, struct bio *bio,
776 struct nvm_rq *rqd, unsigned long flags)
777{
778 int err;
779 struct rrpc_rq *rrq = nvm_rq_to_pdu(rqd);
780 uint8_t nr_pages = rrpc_get_pages(bio);
781 int bio_size = bio_sectors(bio) << 9;
782
783 if (bio_size < rrpc->dev->sec_size)
784 return NVM_IO_ERR;
785 else if (bio_size > rrpc->dev->max_rq_size)
786 return NVM_IO_ERR;
787
788 err = rrpc_setup_rq(rrpc, bio, rqd, flags, nr_pages);
789 if (err)
790 return err;
791
792 bio_get(bio);
793 rqd->bio = bio;
794 rqd->ins = &rrpc->instance;
795 rqd->nr_pages = nr_pages;
796 rrq->flags = flags;
797
798 err = nvm_submit_io(rrpc->dev, rqd);
799 if (err) {
800 pr_err("rrpc: I/O submission failed: %d\n", err);
801 return NVM_IO_ERR;
802 }
803
804 return NVM_IO_OK;
805}
806
807static void rrpc_make_rq(struct request_queue *q, struct bio *bio)
808{
809 struct rrpc *rrpc = q->queuedata;
810 struct nvm_rq *rqd;
811 int err;
812
813 if (bio->bi_rw & REQ_DISCARD) {
814 rrpc_discard(rrpc, bio);
815 return;
816 }
817
818 rqd = mempool_alloc(rrpc->rq_pool, GFP_KERNEL);
819 if (!rqd) {
820 pr_err_ratelimited("rrpc: not able to queue bio.");
821 bio_io_error(bio);
822 return;
823 }
824 memset(rqd, 0, sizeof(struct nvm_rq));
825
826 err = rrpc_submit_io(rrpc, bio, rqd, NVM_IOTYPE_NONE);
827 switch (err) {
828 case NVM_IO_OK:
829 return;
830 case NVM_IO_ERR:
831 bio_io_error(bio);
832 break;
833 case NVM_IO_DONE:
834 bio_endio(bio);
835 break;
836 case NVM_IO_REQUEUE:
837 spin_lock(&rrpc->bio_lock);
838 bio_list_add(&rrpc->requeue_bios, bio);
839 spin_unlock(&rrpc->bio_lock);
840 queue_work(rrpc->kgc_wq, &rrpc->ws_requeue);
841 break;
842 }
843
844 mempool_free(rqd, rrpc->rq_pool);
845}
846
847static void rrpc_requeue(struct work_struct *work)
848{
849 struct rrpc *rrpc = container_of(work, struct rrpc, ws_requeue);
850 struct bio_list bios;
851 struct bio *bio;
852
853 bio_list_init(&bios);
854
855 spin_lock(&rrpc->bio_lock);
856 bio_list_merge(&bios, &rrpc->requeue_bios);
857 bio_list_init(&rrpc->requeue_bios);
858 spin_unlock(&rrpc->bio_lock);
859
860 while ((bio = bio_list_pop(&bios)))
861 rrpc_make_rq(rrpc->disk->queue, bio);
862}
863
864static void rrpc_gc_free(struct rrpc *rrpc)
865{
866 struct rrpc_lun *rlun;
867 int i;
868
869 if (rrpc->krqd_wq)
870 destroy_workqueue(rrpc->krqd_wq);
871
872 if (rrpc->kgc_wq)
873 destroy_workqueue(rrpc->kgc_wq);
874
875 if (!rrpc->luns)
876 return;
877
878 for (i = 0; i < rrpc->nr_luns; i++) {
879 rlun = &rrpc->luns[i];
880
881 if (!rlun->blocks)
882 break;
883 vfree(rlun->blocks);
884 }
885}
886
887static int rrpc_gc_init(struct rrpc *rrpc)
888{
889 rrpc->krqd_wq = alloc_workqueue("rrpc-lun", WQ_MEM_RECLAIM|WQ_UNBOUND,
890 rrpc->nr_luns);
891 if (!rrpc->krqd_wq)
892 return -ENOMEM;
893
894 rrpc->kgc_wq = alloc_workqueue("rrpc-bg", WQ_MEM_RECLAIM, 1);
895 if (!rrpc->kgc_wq)
896 return -ENOMEM;
897
898 setup_timer(&rrpc->gc_timer, rrpc_gc_timer, (unsigned long)rrpc);
899
900 return 0;
901}
902
903static void rrpc_map_free(struct rrpc *rrpc)
904{
905 vfree(rrpc->rev_trans_map);
906 vfree(rrpc->trans_map);
907}
908
909static int rrpc_l2p_update(u64 slba, u32 nlb, __le64 *entries, void *private)
910{
911 struct rrpc *rrpc = (struct rrpc *)private;
912 struct nvm_dev *dev = rrpc->dev;
913 struct rrpc_addr *addr = rrpc->trans_map + slba;
914 struct rrpc_rev_addr *raddr = rrpc->rev_trans_map;
915 sector_t max_pages = dev->total_pages * (dev->sec_size >> 9);
916 u64 elba = slba + nlb;
917 u64 i;
918
919 if (unlikely(elba > dev->total_pages)) {
920 pr_err("nvm: L2P data from device is out of bounds!\n");
921 return -EINVAL;
922 }
923
924 for (i = 0; i < nlb; i++) {
925 u64 pba = le64_to_cpu(entries[i]);
926 /* LNVM treats address-spaces as silos, LBA and PBA are
927 * equally large and zero-indexed.
928 */
929 if (unlikely(pba >= max_pages && pba != U64_MAX)) {
930 pr_err("nvm: L2P data entry is out of bounds!\n");
931 return -EINVAL;
932 }
933
934 /* Address zero is a special one. The first page on a disk is
935 * protected. As it often holds internal device boot
936 * information.
937 */
938 if (!pba)
939 continue;
940
941 addr[i].addr = pba;
942 raddr[pba].addr = slba + i;
943 }
944
945 return 0;
946}
947
948static int rrpc_map_init(struct rrpc *rrpc)
949{
950 struct nvm_dev *dev = rrpc->dev;
951 sector_t i;
952 int ret;
953
954 rrpc->trans_map = vzalloc(sizeof(struct rrpc_addr) * rrpc->nr_pages);
955 if (!rrpc->trans_map)
956 return -ENOMEM;
957
958 rrpc->rev_trans_map = vmalloc(sizeof(struct rrpc_rev_addr)
959 * rrpc->nr_pages);
960 if (!rrpc->rev_trans_map)
961 return -ENOMEM;
962
963 for (i = 0; i < rrpc->nr_pages; i++) {
964 struct rrpc_addr *p = &rrpc->trans_map[i];
965 struct rrpc_rev_addr *r = &rrpc->rev_trans_map[i];
966
967 p->addr = ADDR_EMPTY;
968 r->addr = ADDR_EMPTY;
969 }
970
971 if (!dev->ops->get_l2p_tbl)
972 return 0;
973
974 /* Bring up the mapping table from device */
975 ret = dev->ops->get_l2p_tbl(dev->q, 0, dev->total_pages,
976 rrpc_l2p_update, rrpc);
977 if (ret) {
978 pr_err("nvm: rrpc: could not read L2P table.\n");
979 return -EINVAL;
980 }
981
982 return 0;
983}
984
985
986/* Minimum pages needed within a lun */
987#define PAGE_POOL_SIZE 16
988#define ADDR_POOL_SIZE 64
989
990static int rrpc_core_init(struct rrpc *rrpc)
991{
992 down_write(&rrpc_lock);
993 if (!rrpc_gcb_cache) {
994 rrpc_gcb_cache = kmem_cache_create("rrpc_gcb",
995 sizeof(struct rrpc_block_gc), 0, 0, NULL);
996 if (!rrpc_gcb_cache) {
997 up_write(&rrpc_lock);
998 return -ENOMEM;
999 }
1000
1001 rrpc_rq_cache = kmem_cache_create("rrpc_rq",
1002 sizeof(struct nvm_rq) + sizeof(struct rrpc_rq),
1003 0, 0, NULL);
1004 if (!rrpc_rq_cache) {
1005 kmem_cache_destroy(rrpc_gcb_cache);
1006 up_write(&rrpc_lock);
1007 return -ENOMEM;
1008 }
1009 }
1010 up_write(&rrpc_lock);
1011
1012 rrpc->page_pool = mempool_create_page_pool(PAGE_POOL_SIZE, 0);
1013 if (!rrpc->page_pool)
1014 return -ENOMEM;
1015
1016 rrpc->gcb_pool = mempool_create_slab_pool(rrpc->dev->nr_luns,
1017 rrpc_gcb_cache);
1018 if (!rrpc->gcb_pool)
1019 return -ENOMEM;
1020
1021 rrpc->rq_pool = mempool_create_slab_pool(64, rrpc_rq_cache);
1022 if (!rrpc->rq_pool)
1023 return -ENOMEM;
1024
1025 spin_lock_init(&rrpc->inflights.lock);
1026 INIT_LIST_HEAD(&rrpc->inflights.reqs);
1027
1028 return 0;
1029}
1030
1031static void rrpc_core_free(struct rrpc *rrpc)
1032{
1033 mempool_destroy(rrpc->page_pool);
1034 mempool_destroy(rrpc->gcb_pool);
1035 mempool_destroy(rrpc->rq_pool);
1036}
1037
1038static void rrpc_luns_free(struct rrpc *rrpc)
1039{
1040 kfree(rrpc->luns);
1041}
1042
1043static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
1044{
1045 struct nvm_dev *dev = rrpc->dev;
1046 struct rrpc_lun *rlun;
1047 int i, j;
1048
1049 spin_lock_init(&rrpc->rev_lock);
1050
1051 rrpc->luns = kcalloc(rrpc->nr_luns, sizeof(struct rrpc_lun),
1052 GFP_KERNEL);
1053 if (!rrpc->luns)
1054 return -ENOMEM;
1055
1056 /* 1:1 mapping */
1057 for (i = 0; i < rrpc->nr_luns; i++) {
1058 struct nvm_lun *lun = dev->mt->get_lun(dev, lun_begin + i);
1059
1060 if (dev->pgs_per_blk >
1061 MAX_INVALID_PAGES_STORAGE * BITS_PER_LONG) {
1062 pr_err("rrpc: number of pages per block too high.");
1063 goto err;
1064 }
1065
1066 rlun = &rrpc->luns[i];
1067 rlun->rrpc = rrpc;
1068 rlun->parent = lun;
1069 INIT_LIST_HEAD(&rlun->prio_list);
1070 INIT_WORK(&rlun->ws_gc, rrpc_lun_gc);
1071 spin_lock_init(&rlun->lock);
1072
1073 rrpc->total_blocks += dev->blks_per_lun;
1074 rrpc->nr_pages += dev->sec_per_lun;
1075
1076 rlun->blocks = vzalloc(sizeof(struct rrpc_block) *
1077 rrpc->dev->blks_per_lun);
1078 if (!rlun->blocks)
1079 goto err;
1080
1081 for (j = 0; j < rrpc->dev->blks_per_lun; j++) {
1082 struct rrpc_block *rblk = &rlun->blocks[j];
1083 struct nvm_block *blk = &lun->blocks[j];
1084
1085 rblk->parent = blk;
1086 INIT_LIST_HEAD(&rblk->prio);
1087 spin_lock_init(&rblk->lock);
1088 }
1089 }
1090
1091 return 0;
1092err:
1093 return -ENOMEM;
1094}
1095
1096static void rrpc_free(struct rrpc *rrpc)
1097{
1098 rrpc_gc_free(rrpc);
1099 rrpc_map_free(rrpc);
1100 rrpc_core_free(rrpc);
1101 rrpc_luns_free(rrpc);
1102
1103 kfree(rrpc);
1104}
1105
1106static void rrpc_exit(void *private)
1107{
1108 struct rrpc *rrpc = private;
1109
1110 del_timer(&rrpc->gc_timer);
1111
1112 flush_workqueue(rrpc->krqd_wq);
1113 flush_workqueue(rrpc->kgc_wq);
1114
1115 rrpc_free(rrpc);
1116}
1117
1118static sector_t rrpc_capacity(void *private)
1119{
1120 struct rrpc *rrpc = private;
1121 struct nvm_dev *dev = rrpc->dev;
1122 sector_t reserved, provisioned;
1123
1124 /* cur, gc, and two emergency blocks for each lun */
1125 reserved = rrpc->nr_luns * dev->max_pages_per_blk * 4;
1126 provisioned = rrpc->nr_pages - reserved;
1127
1128 if (reserved > rrpc->nr_pages) {
1129 pr_err("rrpc: not enough space available to expose storage.\n");
1130 return 0;
1131 }
1132
1133 sector_div(provisioned, 10);
1134 return provisioned * 9 * NR_PHY_IN_LOG;
1135}
1136
1137/*
1138 * Looks up the logical address from reverse trans map and check if its valid by
1139 * comparing the logical to physical address with the physical address.
1140 * Returns 0 on free, otherwise 1 if in use
1141 */
1142static void rrpc_block_map_update(struct rrpc *rrpc, struct rrpc_block *rblk)
1143{
1144 struct nvm_dev *dev = rrpc->dev;
1145 int offset;
1146 struct rrpc_addr *laddr;
1147 sector_t paddr, pladdr;
1148
1149 for (offset = 0; offset < dev->pgs_per_blk; offset++) {
1150 paddr = block_to_addr(rrpc, rblk) + offset;
1151
1152 pladdr = rrpc->rev_trans_map[paddr].addr;
1153 if (pladdr == ADDR_EMPTY)
1154 continue;
1155
1156 laddr = &rrpc->trans_map[pladdr];
1157
1158 if (paddr == laddr->addr) {
1159 laddr->rblk = rblk;
1160 } else {
1161 set_bit(offset, rblk->invalid_pages);
1162 rblk->nr_invalid_pages++;
1163 }
1164 }
1165}
1166
1167static int rrpc_blocks_init(struct rrpc *rrpc)
1168{
1169 struct rrpc_lun *rlun;
1170 struct rrpc_block *rblk;
1171 int lun_iter, blk_iter;
1172
1173 for (lun_iter = 0; lun_iter < rrpc->nr_luns; lun_iter++) {
1174 rlun = &rrpc->luns[lun_iter];
1175
1176 for (blk_iter = 0; blk_iter < rrpc->dev->blks_per_lun;
1177 blk_iter++) {
1178 rblk = &rlun->blocks[blk_iter];
1179 rrpc_block_map_update(rrpc, rblk);
1180 }
1181 }
1182
1183 return 0;
1184}
1185
1186static int rrpc_luns_configure(struct rrpc *rrpc)
1187{
1188 struct rrpc_lun *rlun;
1189 struct rrpc_block *rblk;
1190 int i;
1191
1192 for (i = 0; i < rrpc->nr_luns; i++) {
1193 rlun = &rrpc->luns[i];
1194
1195 rblk = rrpc_get_blk(rrpc, rlun, 0);
1196 if (!rblk)
1197 return -EINVAL;
1198
1199 rrpc_set_lun_cur(rlun, rblk);
1200
1201 /* Emergency gc block */
1202 rblk = rrpc_get_blk(rrpc, rlun, 1);
1203 if (!rblk)
1204 return -EINVAL;
1205 rlun->gc_cur = rblk;
1206 }
1207
1208 return 0;
1209}
1210
1211static struct nvm_tgt_type tt_rrpc;
1212
1213static void *rrpc_init(struct nvm_dev *dev, struct gendisk *tdisk,
1214 int lun_begin, int lun_end)
1215{
1216 struct request_queue *bqueue = dev->q;
1217 struct request_queue *tqueue = tdisk->queue;
1218 struct rrpc *rrpc;
1219 int ret;
1220
1221 if (!(dev->identity.dom & NVM_RSP_L2P)) {
1222 pr_err("nvm: rrpc: device does not support l2p (%x)\n",
1223 dev->identity.dom);
1224 return ERR_PTR(-EINVAL);
1225 }
1226
1227 rrpc = kzalloc(sizeof(struct rrpc), GFP_KERNEL);
1228 if (!rrpc)
1229 return ERR_PTR(-ENOMEM);
1230
1231 rrpc->instance.tt = &tt_rrpc;
1232 rrpc->dev = dev;
1233 rrpc->disk = tdisk;
1234
1235 bio_list_init(&rrpc->requeue_bios);
1236 spin_lock_init(&rrpc->bio_lock);
1237 INIT_WORK(&rrpc->ws_requeue, rrpc_requeue);
1238
1239 rrpc->nr_luns = lun_end - lun_begin + 1;
1240
1241 /* simple round-robin strategy */
1242 atomic_set(&rrpc->next_lun, -1);
1243
1244 ret = rrpc_luns_init(rrpc, lun_begin, lun_end);
1245 if (ret) {
1246 pr_err("nvm: rrpc: could not initialize luns\n");
1247 goto err;
1248 }
1249
1250 rrpc->poffset = dev->sec_per_lun * lun_begin;
1251 rrpc->lun_offset = lun_begin;
1252
1253 ret = rrpc_core_init(rrpc);
1254 if (ret) {
1255 pr_err("nvm: rrpc: could not initialize core\n");
1256 goto err;
1257 }
1258
1259 ret = rrpc_map_init(rrpc);
1260 if (ret) {
1261 pr_err("nvm: rrpc: could not initialize maps\n");
1262 goto err;
1263 }
1264
1265 ret = rrpc_blocks_init(rrpc);
1266 if (ret) {
1267 pr_err("nvm: rrpc: could not initialize state for blocks\n");
1268 goto err;
1269 }
1270
1271 ret = rrpc_luns_configure(rrpc);
1272 if (ret) {
1273 pr_err("nvm: rrpc: not enough blocks available in LUNs.\n");
1274 goto err;
1275 }
1276
1277 ret = rrpc_gc_init(rrpc);
1278 if (ret) {
1279 pr_err("nvm: rrpc: could not initialize gc\n");
1280 goto err;
1281 }
1282
1283 /* inherit the size from the underlying device */
1284 blk_queue_logical_block_size(tqueue, queue_physical_block_size(bqueue));
1285 blk_queue_max_hw_sectors(tqueue, queue_max_hw_sectors(bqueue));
1286
1287 pr_info("nvm: rrpc initialized with %u luns and %llu pages.\n",
1288 rrpc->nr_luns, (unsigned long long)rrpc->nr_pages);
1289
1290 mod_timer(&rrpc->gc_timer, jiffies + msecs_to_jiffies(10));
1291
1292 return rrpc;
1293err:
1294 rrpc_free(rrpc);
1295 return ERR_PTR(ret);
1296}
1297
1298/* round robin, page-based FTL, and cost-based GC */
1299static struct nvm_tgt_type tt_rrpc = {
1300 .name = "rrpc",
1301 .version = {1, 0, 0},
1302
1303 .make_rq = rrpc_make_rq,
1304 .capacity = rrpc_capacity,
1305 .end_io = rrpc_end_io,
1306
1307 .init = rrpc_init,
1308 .exit = rrpc_exit,
1309};
1310
1311static int __init rrpc_module_init(void)
1312{
1313 return nvm_register_target(&tt_rrpc);
1314}
1315
1316static void rrpc_module_exit(void)
1317{
1318 nvm_unregister_target(&tt_rrpc);
1319}
1320
1321module_init(rrpc_module_init);
1322module_exit(rrpc_module_exit);
1323MODULE_LICENSE("GPL v2");
1324MODULE_DESCRIPTION("Block-Device Target for Open-Channel SSDs");