]>
git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - drivers/lightnvm/pblk-write.c
2 * Copyright (C) 2016 CNEX Labs
3 * Initial release: Javier Gonzalez <javier@cnexlabs.com>
4 * Matias Bjorling <matias@cnexlabs.com>
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License version
8 * 2 as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
15 * pblk-write.c - pblk's write path from write buffer to media
20 static void pblk_sync_line(struct pblk
*pblk
, struct pblk_line
*line
)
22 #ifdef CONFIG_NVM_DEBUG
23 atomic_long_inc(&pblk
->sync_writes
);
26 /* Counter protected by rb sync lock */
28 if (!line
->left_ssecs
)
29 pblk_line_run_ws(pblk
, line
, NULL
, pblk_line_close_ws
);
32 static unsigned long pblk_end_w_bio(struct pblk
*pblk
, struct nvm_rq
*rqd
,
33 struct pblk_c_ctx
*c_ctx
)
35 struct nvm_tgt_dev
*dev
= pblk
->dev
;
36 struct bio
*original_bio
;
40 for (i
= 0; i
< c_ctx
->nr_valid
; i
++) {
41 struct pblk_w_ctx
*w_ctx
;
43 struct pblk_line
*line
;
45 w_ctx
= pblk_rb_w_ctx(&pblk
->rwb
, c_ctx
->sentry
+ i
);
48 line
= &pblk
->lines
[pblk_dev_ppa_to_line(p
)];
49 pblk_sync_line(pblk
, line
);
51 while ((original_bio
= bio_list_pop(&w_ctx
->bios
)))
52 bio_endio(original_bio
);
55 #ifdef CONFIG_NVM_DEBUG
56 atomic_long_add(c_ctx
->nr_valid
, &pblk
->compl_writes
);
59 ret
= pblk_rb_sync_advance(&pblk
->rwb
, c_ctx
->nr_valid
);
62 nvm_dev_dma_free(dev
->parent
, rqd
->meta_list
,
66 pblk_free_rqd(pblk
, rqd
, WRITE
);
71 static unsigned long pblk_end_queued_w_bio(struct pblk
*pblk
,
73 struct pblk_c_ctx
*c_ctx
)
75 list_del(&c_ctx
->list
);
76 return pblk_end_w_bio(pblk
, rqd
, c_ctx
);
79 static void pblk_complete_write(struct pblk
*pblk
, struct nvm_rq
*rqd
,
80 struct pblk_c_ctx
*c_ctx
)
82 struct pblk_c_ctx
*c
, *r
;
86 #ifdef CONFIG_NVM_DEBUG
87 atomic_long_sub(c_ctx
->nr_valid
, &pblk
->inflight_writes
);
90 pblk_up_rq(pblk
, rqd
->ppa_list
, rqd
->nr_ppas
, c_ctx
->lun_bitmap
);
92 pos
= pblk_rb_sync_init(&pblk
->rwb
, &flags
);
93 if (pos
== c_ctx
->sentry
) {
94 pos
= pblk_end_w_bio(pblk
, rqd
, c_ctx
);
97 list_for_each_entry_safe(c
, r
, &pblk
->compl_list
, list
) {
98 rqd
= nvm_rq_from_c_ctx(c
);
99 if (c
->sentry
== pos
) {
100 pos
= pblk_end_queued_w_bio(pblk
, rqd
, c
);
105 WARN_ON(nvm_rq_from_c_ctx(c_ctx
) != rqd
);
106 list_add_tail(&c_ctx
->list
, &pblk
->compl_list
);
108 pblk_rb_sync_end(&pblk
->rwb
, &flags
);
111 /* When a write fails, we are not sure whether the block has grown bad or a page
112 * range is more susceptible to write errors. If a high number of pages fail, we
113 * assume that the block is bad and we mark it accordingly. In all cases, we
114 * remap and resubmit the failed entries as fast as possible; if a flush is
115 * waiting on a completion, the whole stack would stall otherwise.
117 static void pblk_end_w_fail(struct pblk
*pblk
, struct nvm_rq
*rqd
)
119 void *comp_bits
= &rqd
->ppa_status
;
120 struct pblk_c_ctx
*c_ctx
= nvm_rq_to_pdu(rqd
);
121 struct pblk_rec_ctx
*recovery
;
122 struct ppa_addr
*ppa_list
= rqd
->ppa_list
;
123 int nr_ppas
= rqd
->nr_ppas
;
124 unsigned int c_entries
;
127 if (unlikely(nr_ppas
== 1))
128 ppa_list
= &rqd
->ppa_addr
;
130 recovery
= mempool_alloc(pblk
->rec_pool
, GFP_ATOMIC
);
132 pr_err("pblk: could not allocate recovery context\n");
135 INIT_LIST_HEAD(&recovery
->failed
);
138 while ((bit
= find_next_bit(comp_bits
, nr_ppas
, bit
+ 1)) < nr_ppas
) {
139 struct pblk_rb_entry
*entry
;
143 if (bit
> c_ctx
->nr_valid
) {
144 WARN_ONCE(1, "pblk: corrupted write request\n");
145 mempool_free(recovery
, pblk
->rec_pool
);
150 entry
= pblk_rb_sync_scan_entry(&pblk
->rwb
, &ppa
);
152 pr_err("pblk: could not scan entry on write failure\n");
153 mempool_free(recovery
, pblk
->rec_pool
);
157 /* The list is filled first and emptied afterwards. No need for
158 * protecting it with a lock
160 list_add_tail(&entry
->index
, &recovery
->failed
);
163 c_entries
= find_first_bit(comp_bits
, nr_ppas
);
164 ret
= pblk_recov_setup_rq(pblk
, c_ctx
, recovery
, comp_bits
, c_entries
);
166 pr_err("pblk: could not recover from write failure\n");
167 mempool_free(recovery
, pblk
->rec_pool
);
171 INIT_WORK(&recovery
->ws_rec
, pblk_submit_rec
);
172 queue_work(pblk
->kw_wq
, &recovery
->ws_rec
);
175 pblk_complete_write(pblk
, rqd
, c_ctx
);
178 static void pblk_end_io_write(struct nvm_rq
*rqd
)
180 struct pblk
*pblk
= rqd
->private;
181 struct pblk_c_ctx
*c_ctx
= nvm_rq_to_pdu(rqd
);
184 pblk_log_write_err(pblk
, rqd
);
185 return pblk_end_w_fail(pblk
, rqd
);
187 #ifdef CONFIG_NVM_DEBUG
189 WARN_ONCE(rqd
->bio
->bi_error
, "pblk: corrupted write error\n");
192 pblk_complete_write(pblk
, rqd
, c_ctx
);
195 static int pblk_alloc_w_rq(struct pblk
*pblk
, struct nvm_rq
*rqd
,
196 unsigned int nr_secs
)
198 struct nvm_tgt_dev
*dev
= pblk
->dev
;
200 /* Setup write request */
201 rqd
->opcode
= NVM_OP_PWRITE
;
202 rqd
->nr_ppas
= nr_secs
;
203 rqd
->flags
= pblk_set_progr_mode(pblk
, WRITE
);
205 rqd
->end_io
= pblk_end_io_write
;
207 rqd
->meta_list
= nvm_dev_dma_alloc(dev
->parent
, GFP_KERNEL
,
208 &rqd
->dma_meta_list
);
212 if (unlikely(nr_secs
== 1))
215 rqd
->ppa_list
= rqd
->meta_list
+ pblk_dma_meta_size
;
216 rqd
->dma_ppa_list
= rqd
->dma_meta_list
+ pblk_dma_meta_size
;
221 static int pblk_setup_w_rq(struct pblk
*pblk
, struct nvm_rq
*rqd
,
222 struct pblk_c_ctx
*c_ctx
)
224 struct pblk_line_meta
*lm
= &pblk
->lm
;
225 struct pblk_line
*e_line
= pblk_line_get_data_next(pblk
);
226 struct ppa_addr erase_ppa
;
227 unsigned int valid
= c_ctx
->nr_valid
;
228 unsigned int padded
= c_ctx
->nr_padded
;
229 unsigned int nr_secs
= valid
+ padded
;
230 unsigned long *lun_bitmap
;
233 lun_bitmap
= kzalloc(lm
->lun_bitmap_len
, GFP_KERNEL
);
238 c_ctx
->lun_bitmap
= lun_bitmap
;
240 ret
= pblk_alloc_w_rq(pblk
, rqd
, nr_secs
);
246 ppa_set_empty(&erase_ppa
);
247 if (likely(!e_line
|| !atomic_read(&e_line
->left_eblks
)))
248 pblk_map_rq(pblk
, rqd
, c_ctx
->sentry
, lun_bitmap
, valid
, 0);
250 pblk_map_erase_rq(pblk
, rqd
, c_ctx
->sentry
, lun_bitmap
,
254 if (unlikely(e_line
&& !ppa_empty(erase_ppa
))) {
255 if (pblk_blk_erase_async(pblk
, erase_ppa
)) {
256 struct nvm_tgt_dev
*dev
= pblk
->dev
;
257 struct nvm_geo
*geo
= &dev
->geo
;
260 atomic_inc(&e_line
->left_eblks
);
261 bit
= erase_ppa
.g
.lun
* geo
->nr_chnls
+ erase_ppa
.g
.ch
;
262 WARN_ON(!test_and_clear_bit(bit
, e_line
->erase_bitmap
));
263 up(&pblk
->erase_sem
);
270 int pblk_setup_w_rec_rq(struct pblk
*pblk
, struct nvm_rq
*rqd
,
271 struct pblk_c_ctx
*c_ctx
)
273 struct pblk_line_meta
*lm
= &pblk
->lm
;
274 unsigned long *lun_bitmap
;
277 lun_bitmap
= kzalloc(lm
->lun_bitmap_len
, GFP_KERNEL
);
281 c_ctx
->lun_bitmap
= lun_bitmap
;
283 ret
= pblk_alloc_w_rq(pblk
, rqd
, rqd
->nr_ppas
);
287 pblk_map_rq(pblk
, rqd
, c_ctx
->sentry
, lun_bitmap
, c_ctx
->nr_valid
, 0);
289 rqd
->ppa_status
= (u64
)0;
290 rqd
->flags
= pblk_set_progr_mode(pblk
, WRITE
);
295 static int pblk_calc_secs_to_sync(struct pblk
*pblk
, unsigned int secs_avail
,
296 unsigned int secs_to_flush
)
300 secs_to_sync
= pblk_calc_secs(pblk
, secs_avail
, secs_to_flush
);
302 #ifdef CONFIG_NVM_DEBUG
303 if ((!secs_to_sync
&& secs_to_flush
)
304 || (secs_to_sync
< 0)
305 || (secs_to_sync
> secs_avail
&& !secs_to_flush
)) {
306 pr_err("pblk: bad sector calculation (a:%d,s:%d,f:%d)\n",
307 secs_avail
, secs_to_sync
, secs_to_flush
);
314 static int pblk_submit_write(struct pblk
*pblk
)
318 struct pblk_c_ctx
*c_ctx
;
319 unsigned int pgs_read
;
320 unsigned int secs_avail
, secs_to_sync
, secs_to_com
;
321 unsigned int secs_to_flush
;
325 /* If there are no sectors in the cache, flushes (bios without data)
326 * will be cleared on the cache threads
328 secs_avail
= pblk_rb_read_count(&pblk
->rwb
);
332 secs_to_flush
= pblk_rb_sync_point_count(&pblk
->rwb
);
333 if (!secs_to_flush
&& secs_avail
< pblk
->min_write_pgs
)
336 rqd
= pblk_alloc_rqd(pblk
, WRITE
);
338 pr_err("pblk: cannot allocate write req.\n");
341 c_ctx
= nvm_rq_to_pdu(rqd
);
343 bio
= bio_alloc(GFP_KERNEL
, pblk
->max_write_pgs
);
345 pr_err("pblk: cannot allocate write bio\n");
348 bio
->bi_iter
.bi_sector
= 0; /* internal bio */
349 bio_set_op_attrs(bio
, REQ_OP_WRITE
, 0);
352 secs_to_sync
= pblk_calc_secs_to_sync(pblk
, secs_avail
, secs_to_flush
);
353 if (secs_to_sync
> pblk
->max_write_pgs
) {
354 pr_err("pblk: bad buffer sync calculation\n");
358 secs_to_com
= (secs_to_sync
> secs_avail
) ? secs_avail
: secs_to_sync
;
359 pos
= pblk_rb_read_commit(&pblk
->rwb
, secs_to_com
);
361 pgs_read
= pblk_rb_read_to_bio(&pblk
->rwb
, bio
, c_ctx
, pos
,
362 secs_to_sync
, secs_avail
);
364 pr_err("pblk: corrupted write bio\n");
368 if (c_ctx
->nr_padded
)
369 if (pblk_bio_add_pages(pblk
, bio
, GFP_KERNEL
, c_ctx
->nr_padded
))
372 /* Assign lbas to ppas and populate request structure */
373 err
= pblk_setup_w_rq(pblk
, rqd
, c_ctx
);
375 pr_err("pblk: could not setup write request\n");
379 err
= pblk_submit_io(pblk
, rqd
);
381 pr_err("pblk: I/O submission failed: %d\n", err
);
385 #ifdef CONFIG_NVM_DEBUG
386 atomic_long_add(secs_to_sync
, &pblk
->sub_writes
);
392 if (c_ctx
->nr_padded
)
393 pblk_bio_free_pages(pblk
, bio
, secs_to_sync
, c_ctx
->nr_padded
);
397 pblk_free_rqd(pblk
, rqd
, WRITE
);
402 int pblk_write_ts(void *data
)
404 struct pblk
*pblk
= data
;
406 while (!kthread_should_stop()) {
407 if (!pblk_submit_write(pblk
))
409 set_current_state(TASK_INTERRUPTIBLE
);