4 * Copyright (c) Intel Corporation.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include "spdk/likely.h"
35 #include "spdk/stdinc.h"
36 #include "spdk/nvme.h"
37 #include "spdk/io_channel.h"
38 #include "spdk/bdev_module.h"
39 #include "spdk/string.h"
40 #include "spdk_internal/log.h"
48 #include "ftl_debug.h"
49 #include "ftl_reloc.h"
53 struct spdk_ftl_dev
*dev
;
58 /* Band currently being written to */
59 struct ftl_band
*band
;
61 /* Current logical block's offset */
64 /* Current erase block */
65 struct ftl_chunk
*chunk
;
67 /* Pending IO queue */
68 TAILQ_HEAD(, ftl_io
) pending_queue
;
71 LIST_ENTRY(ftl_wptr
) list_entry
;
74 * If setup in direct mode, there will be no offset or band state update after IO.
75 * The PPA is not assigned by wptr, and is instead taken directly from the request.
82 struct spdk_ftl_dev
*dev
;
84 /* Number of batches to wait for */
94 struct spdk_bit_array
*bmap
;
97 LIST_ENTRY(ftl_flush
) list_entry
;
101 ftl_rwb_flags_from_io(const struct ftl_io
*io
)
103 int valid_flags
= FTL_IO_INTERNAL
| FTL_IO_WEAK
| FTL_IO_PAD
;
104 return io
->flags
& valid_flags
;
108 ftl_rwb_entry_weak(const struct ftl_rwb_entry
*entry
)
110 return entry
->flags
& FTL_IO_WEAK
;
114 ftl_wptr_free(struct ftl_wptr
*wptr
)
124 ftl_remove_wptr(struct ftl_wptr
*wptr
)
126 LIST_REMOVE(wptr
, list_entry
);
131 ftl_io_cmpl_cb(void *arg
, const struct spdk_nvme_cpl
*status
)
133 struct ftl_io
*io
= arg
;
135 if (spdk_nvme_cpl_is_error(status
)) {
136 ftl_io_process_error(io
, status
);
139 ftl_trace_completion(io
->dev
, io
, FTL_TRACE_COMPLETION_DISK
);
143 if (ftl_io_done(io
)) {
149 ftl_halt_writes(struct spdk_ftl_dev
*dev
, struct ftl_band
*band
)
151 struct ftl_wptr
*wptr
= NULL
;
153 LIST_FOREACH(wptr
, &dev
->wptr_list
, list_entry
) {
154 if (wptr
->band
== band
) {
159 /* If the band already has the high_prio flag set, other writes must */
160 /* have failed earlier, so it's already taken care of. */
161 if (band
->high_prio
) {
162 assert(wptr
== NULL
);
166 ftl_band_write_failed(band
);
167 ftl_remove_wptr(wptr
);
170 static struct ftl_wptr
*
171 ftl_wptr_from_band(struct ftl_band
*band
)
173 struct spdk_ftl_dev
*dev
= band
->dev
;
174 struct ftl_wptr
*wptr
= NULL
;
176 LIST_FOREACH(wptr
, &dev
->wptr_list
, list_entry
) {
177 if (wptr
->band
== band
) {
186 ftl_md_write_fail(struct ftl_io
*io
, int status
)
188 struct ftl_band
*band
= io
->band
;
189 struct ftl_wptr
*wptr
;
192 wptr
= ftl_wptr_from_band(band
);
194 SPDK_ERRLOG("Metadata write failed @ppa: %s, status: %d\n",
195 ftl_ppa2str(wptr
->ppa
, buf
, sizeof(buf
)), status
);
197 ftl_halt_writes(io
->dev
, band
);
201 ftl_md_write_cb(struct ftl_io
*io
, void *arg
, int status
)
203 struct spdk_ftl_dev
*dev
= io
->dev
;
204 struct ftl_nv_cache
*nv_cache
= &dev
->nv_cache
;
205 struct ftl_wptr
*wptr
;
206 struct spdk_bdev
*bdev
;
208 wptr
= ftl_wptr_from_band(io
->band
);
211 ftl_md_write_fail(io
, status
);
215 ftl_band_set_next_state(io
->band
);
216 if (io
->band
->state
== FTL_BAND_STATE_CLOSED
) {
217 if (nv_cache
->bdev_desc
) {
218 bdev
= spdk_bdev_desc_get_bdev(nv_cache
->bdev_desc
);
220 pthread_spin_lock(&nv_cache
->lock
);
221 nv_cache
->num_available
+= ftl_band_user_lbks(io
->band
);
223 if (spdk_unlikely(nv_cache
->num_available
> spdk_bdev_get_num_blocks(bdev
))) {
224 nv_cache
->num_available
= spdk_bdev_get_num_blocks(bdev
);
226 pthread_spin_unlock(&nv_cache
->lock
);
229 ftl_remove_wptr(wptr
);
234 ftl_ppa_read_next_ppa(struct ftl_io
*io
, struct ftl_ppa
*ppa
)
236 struct spdk_ftl_dev
*dev
= io
->dev
;
237 size_t lbk_cnt
, max_lbks
;
239 assert(ftl_io_mode_ppa(io
));
240 assert(io
->iov_pos
< io
->iov_cnt
);
245 *ppa
= ftl_band_next_xfer_ppa(io
->band
, io
->ppa
, io
->pos
);
248 assert(!ftl_ppa_invalid(*ppa
));
250 /* Metadata has to be read in the way it's written (jumping across */
251 /* the chunks in xfer_size increments) */
252 if (io
->flags
& FTL_IO_MD
) {
253 max_lbks
= dev
->xfer_size
- (ppa
->lbk
% dev
->xfer_size
);
254 lbk_cnt
= spdk_min(ftl_io_iovec_len_left(io
), max_lbks
);
255 assert(ppa
->lbk
/ dev
->xfer_size
== (ppa
->lbk
+ lbk_cnt
- 1) / dev
->xfer_size
);
257 lbk_cnt
= ftl_io_iovec_len_left(io
);
264 ftl_wptr_close_band(struct ftl_wptr
*wptr
)
266 struct ftl_band
*band
= wptr
->band
;
268 ftl_band_set_state(band
, FTL_BAND_STATE_CLOSING
);
269 band
->tail_md_ppa
= wptr
->ppa
;
271 return ftl_band_write_tail_md(band
, ftl_md_write_cb
);
275 ftl_wptr_open_band(struct ftl_wptr
*wptr
)
277 struct ftl_band
*band
= wptr
->band
;
279 assert(ftl_band_chunk_is_first(band
, wptr
->chunk
));
280 assert(band
->lba_map
.num_vld
== 0);
282 ftl_band_clear_lba_map(band
);
284 assert(band
->state
== FTL_BAND_STATE_PREP
);
285 ftl_band_set_state(band
, FTL_BAND_STATE_OPENING
);
287 return ftl_band_write_head_md(band
, ftl_md_write_cb
);
291 ftl_submit_erase(struct ftl_io
*io
)
293 struct spdk_ftl_dev
*dev
= io
->dev
;
294 struct ftl_band
*band
= io
->band
;
295 struct ftl_ppa ppa
= io
->ppa
;
296 struct ftl_chunk
*chunk
;
301 for (i
= 0; i
< io
->lbk_cnt
; ++i
) {
303 chunk
= ftl_band_next_chunk(band
, ftl_band_chunk_from_ppa(band
, ppa
));
304 assert(chunk
->state
== FTL_CHUNK_STATE_CLOSED
||
305 chunk
->state
== FTL_CHUNK_STATE_VACANT
);
306 ppa
= chunk
->start_ppa
;
309 assert(ppa
.lbk
== 0);
310 ppa_packed
= ftl_ppa_addr_pack(dev
, ppa
);
312 ftl_trace_submission(dev
, io
, ppa
, 1);
313 rc
= spdk_nvme_ocssd_ns_cmd_vector_reset(dev
->ns
, ftl_get_write_qpair(dev
),
314 &ppa_packed
, 1, NULL
, ftl_io_cmpl_cb
, io
);
315 if (spdk_unlikely(rc
)) {
317 SPDK_ERRLOG("Vector reset failed with status: %d\n", rc
);
322 ftl_io_advance(io
, 1);
325 if (ftl_io_done(io
)) {
333 _ftl_io_erase(void *ctx
)
335 ftl_io_erase((struct ftl_io
*)ctx
);
339 ftl_check_core_thread(const struct spdk_ftl_dev
*dev
)
341 return dev
->core_thread
.thread
== spdk_get_thread();
345 ftl_check_read_thread(const struct spdk_ftl_dev
*dev
)
347 return dev
->read_thread
.thread
== spdk_get_thread();
351 ftl_io_erase(struct ftl_io
*io
)
353 struct spdk_ftl_dev
*dev
= io
->dev
;
355 if (ftl_check_core_thread(dev
)) {
356 return ftl_submit_erase(io
);
359 spdk_thread_send_msg(ftl_get_core_thread(dev
), _ftl_io_erase
, io
);
363 static struct ftl_band
*
364 ftl_next_write_band(struct spdk_ftl_dev
*dev
)
366 struct ftl_band
*band
;
368 band
= LIST_FIRST(&dev
->free_bands
);
372 assert(band
->state
== FTL_BAND_STATE_FREE
);
374 if (ftl_band_erase(band
)) {
375 /* TODO: handle erase failure */
382 static struct ftl_band
*
383 ftl_next_wptr_band(struct spdk_ftl_dev
*dev
)
385 struct ftl_band
*band
;
387 if (!dev
->next_band
) {
388 band
= ftl_next_write_band(dev
);
390 assert(dev
->next_band
->state
== FTL_BAND_STATE_PREP
);
391 band
= dev
->next_band
;
392 dev
->next_band
= NULL
;
398 static struct ftl_wptr
*
399 ftl_wptr_init(struct ftl_band
*band
)
401 struct spdk_ftl_dev
*dev
= band
->dev
;
402 struct ftl_wptr
*wptr
;
404 wptr
= calloc(1, sizeof(*wptr
));
411 wptr
->chunk
= CIRCLEQ_FIRST(&band
->chunks
);
412 wptr
->ppa
= wptr
->chunk
->start_ppa
;
413 TAILQ_INIT(&wptr
->pending_queue
);
419 ftl_add_direct_wptr(struct ftl_band
*band
)
421 struct spdk_ftl_dev
*dev
= band
->dev
;
422 struct ftl_wptr
*wptr
;
424 assert(band
->state
== FTL_BAND_STATE_OPEN
);
426 wptr
= ftl_wptr_init(band
);
431 wptr
->direct_mode
= true;
433 if (ftl_band_alloc_lba_map(band
)) {
438 LIST_INSERT_HEAD(&dev
->wptr_list
, wptr
, list_entry
);
440 SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE
, "wptr: direct band %u\n", band
->id
);
441 ftl_trace_write_band(dev
, band
);
446 ftl_close_direct_wptr(struct ftl_band
*band
)
448 struct ftl_wptr
*wptr
= ftl_wptr_from_band(band
);
450 assert(wptr
->direct_mode
);
451 assert(band
->state
== FTL_BAND_STATE_CLOSED
);
453 ftl_band_release_lba_map(band
);
455 ftl_remove_wptr(wptr
);
459 ftl_band_set_direct_access(struct ftl_band
*band
, bool access
)
462 return ftl_add_direct_wptr(band
);
464 ftl_close_direct_wptr(band
);
470 ftl_add_wptr(struct spdk_ftl_dev
*dev
)
472 struct ftl_band
*band
;
473 struct ftl_wptr
*wptr
;
475 band
= ftl_next_wptr_band(dev
);
480 wptr
= ftl_wptr_init(band
);
485 if (ftl_band_write_prep(band
)) {
490 LIST_INSERT_HEAD(&dev
->wptr_list
, wptr
, list_entry
);
492 SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE
, "wptr: band %u\n", band
->id
);
493 ftl_trace_write_band(dev
, band
);
498 ftl_wptr_advance(struct ftl_wptr
*wptr
, size_t xfer_size
)
500 struct ftl_band
*band
= wptr
->band
;
501 struct spdk_ftl_dev
*dev
= wptr
->dev
;
502 struct spdk_ftl_conf
*conf
= &dev
->conf
;
505 if (spdk_unlikely(wptr
->direct_mode
)) {
509 wptr
->offset
+= xfer_size
;
510 next_thld
= (ftl_band_num_usable_lbks(band
) * conf
->band_thld
) / 100;
512 if (ftl_band_full(band
, wptr
->offset
)) {
513 ftl_band_set_state(band
, FTL_BAND_STATE_FULL
);
516 wptr
->chunk
->busy
= true;
517 wptr
->ppa
= ftl_band_next_xfer_ppa(band
, wptr
->ppa
, xfer_size
);
518 wptr
->chunk
= ftl_band_next_operational_chunk(band
, wptr
->chunk
);
520 assert(!ftl_ppa_invalid(wptr
->ppa
));
522 SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE
, "wptr: grp:%d, pu:%d chunk:%d, lbk:%u\n",
523 wptr
->ppa
.grp
, wptr
->ppa
.pu
, wptr
->ppa
.chk
, wptr
->ppa
.lbk
);
525 if (wptr
->offset
>= next_thld
&& !dev
->next_band
) {
526 dev
->next_band
= ftl_next_write_band(dev
);
531 ftl_wptr_user_lbks_left(const struct ftl_wptr
*wptr
)
533 return ftl_band_user_lbks_left(wptr
->band
, wptr
->offset
);
537 ftl_wptr_ready(struct ftl_wptr
*wptr
)
539 struct ftl_band
*band
= wptr
->band
;
541 /* TODO: add handling of empty bands */
543 if (spdk_unlikely(!ftl_chunk_is_writable(wptr
->chunk
))) {
544 /* Erasing band may fail after it was assigned to wptr. */
545 if (spdk_unlikely(wptr
->chunk
->state
== FTL_CHUNK_STATE_BAD
)) {
546 ftl_wptr_advance(wptr
, wptr
->dev
->xfer_size
);
551 /* If we're in the process of writing metadata, wait till it is */
553 /* TODO: we should probably change bands once we're writing tail md */
554 if (ftl_band_state_changing(band
)) {
558 if (band
->state
== FTL_BAND_STATE_FULL
) {
559 if (ftl_wptr_close_band(wptr
)) {
560 /* TODO: need recovery here */
566 if (band
->state
!= FTL_BAND_STATE_OPEN
) {
567 if (ftl_wptr_open_band(wptr
)) {
568 /* TODO: need recovery here */
577 static const struct spdk_ftl_limit
*
578 ftl_get_limit(const struct spdk_ftl_dev
*dev
, int type
)
580 assert(type
< SPDK_FTL_LIMIT_MAX
);
581 return &dev
->conf
.defrag
.limits
[type
];
585 ftl_cache_lba_valid(struct spdk_ftl_dev
*dev
, struct ftl_rwb_entry
*entry
)
589 /* If the LBA is invalid don't bother checking the md and l2p */
590 if (spdk_unlikely(entry
->lba
== FTL_LBA_INVALID
)) {
594 ppa
= ftl_l2p_get(dev
, entry
->lba
);
595 if (!(ftl_ppa_cached(ppa
) && ppa
.offset
== entry
->pos
)) {
603 ftl_evict_cache_entry(struct spdk_ftl_dev
*dev
, struct ftl_rwb_entry
*entry
)
605 pthread_spin_lock(&entry
->lock
);
607 if (!ftl_rwb_entry_valid(entry
)) {
611 /* If the l2p wasn't updated and still points at the entry, fill it with the */
612 /* on-disk PPA and clear the cache status bit. Otherwise, skip the l2p update */
613 /* and just clear the cache status. */
614 if (!ftl_cache_lba_valid(dev
, entry
)) {
618 ftl_l2p_set(dev
, entry
->lba
, entry
->ppa
);
620 ftl_rwb_entry_invalidate(entry
);
622 pthread_spin_unlock(&entry
->lock
);
625 static struct ftl_rwb_entry
*
626 ftl_acquire_entry(struct spdk_ftl_dev
*dev
, int flags
)
628 struct ftl_rwb_entry
*entry
;
630 entry
= ftl_rwb_acquire(dev
->rwb
, ftl_rwb_type_from_flags(flags
));
635 ftl_evict_cache_entry(dev
, entry
);
637 entry
->flags
= flags
;
642 ftl_rwb_pad(struct spdk_ftl_dev
*dev
, size_t size
)
644 struct ftl_rwb_entry
*entry
;
645 int flags
= FTL_IO_PAD
| FTL_IO_INTERNAL
;
647 for (size_t i
= 0; i
< size
; ++i
) {
648 entry
= ftl_acquire_entry(dev
, flags
);
653 entry
->lba
= FTL_LBA_INVALID
;
654 entry
->ppa
= ftl_to_ppa(FTL_PPA_INVALID
);
655 memset(entry
->data
, 0, FTL_BLOCK_SIZE
);
661 ftl_remove_free_bands(struct spdk_ftl_dev
*dev
)
663 while (!LIST_EMPTY(&dev
->free_bands
)) {
664 LIST_REMOVE(LIST_FIRST(&dev
->free_bands
), list_entry
);
667 dev
->next_band
= NULL
;
671 ftl_wptr_process_shutdown(struct ftl_wptr
*wptr
)
673 struct spdk_ftl_dev
*dev
= wptr
->dev
;
674 size_t size
= ftl_rwb_num_acquired(dev
->rwb
, FTL_RWB_TYPE_INTERNAL
) +
675 ftl_rwb_num_acquired(dev
->rwb
, FTL_RWB_TYPE_USER
);
676 size_t num_active
= dev
->xfer_size
* ftl_rwb_get_active_batches(dev
->rwb
);
677 size_t band_length
, rwb_free_space
, pad_length
;
679 num_active
= num_active
? num_active
: dev
->xfer_size
;
680 if (size
>= num_active
) {
684 /* If we reach this point we need to remove free bands */
685 /* and pad current wptr band to the end */
686 if (ftl_rwb_get_active_batches(dev
->rwb
) <= 1) {
687 ftl_remove_free_bands(dev
);
690 band_length
= ftl_wptr_user_lbks_left(wptr
);
691 rwb_free_space
= ftl_rwb_size(dev
->rwb
) - size
;
692 pad_length
= spdk_min(band_length
, rwb_free_space
);
694 /* Pad write buffer until band is full */
695 ftl_rwb_pad(dev
, pad_length
);
699 ftl_shutdown_complete(struct spdk_ftl_dev
*dev
)
701 return !__atomic_load_n(&dev
->num_inflight
, __ATOMIC_SEQ_CST
) &&
702 LIST_EMPTY(&dev
->wptr_list
);
706 ftl_apply_limits(struct spdk_ftl_dev
*dev
)
708 const struct spdk_ftl_limit
*limit
;
709 struct ftl_stats
*stats
= &dev
->stats
;
710 size_t rwb_limit
[FTL_RWB_TYPE_MAX
];
713 ftl_rwb_get_limits(dev
->rwb
, rwb_limit
);
715 /* Clear existing limit */
716 dev
->limit
= SPDK_FTL_LIMIT_MAX
;
718 for (i
= SPDK_FTL_LIMIT_CRIT
; i
< SPDK_FTL_LIMIT_MAX
; ++i
) {
719 limit
= ftl_get_limit(dev
, i
);
721 if (dev
->num_free
<= limit
->thld
) {
722 rwb_limit
[FTL_RWB_TYPE_USER
] =
723 (limit
->limit
* ftl_rwb_entry_cnt(dev
->rwb
)) / 100;
730 /* Clear the limits, since we don't need to apply them anymore */
731 rwb_limit
[FTL_RWB_TYPE_USER
] = ftl_rwb_entry_cnt(dev
->rwb
);
733 ftl_trace_limits(dev
, rwb_limit
, dev
->num_free
);
734 ftl_rwb_set_limits(dev
->rwb
, rwb_limit
);
738 ftl_invalidate_addr_unlocked(struct spdk_ftl_dev
*dev
, struct ftl_ppa ppa
)
740 struct ftl_band
*band
= ftl_band_from_ppa(dev
, ppa
);
741 struct ftl_lba_map
*lba_map
= &band
->lba_map
;
744 offset
= ftl_band_lbkoff_from_ppa(band
, ppa
);
746 /* The bit might be already cleared if two writes are scheduled to the */
747 /* same LBA at the same time */
748 if (spdk_bit_array_get(lba_map
->vld
, offset
)) {
749 assert(lba_map
->num_vld
> 0);
750 spdk_bit_array_clear(lba_map
->vld
, offset
);
759 ftl_invalidate_addr(struct spdk_ftl_dev
*dev
, struct ftl_ppa ppa
)
761 struct ftl_band
*band
;
764 assert(!ftl_ppa_cached(ppa
));
765 band
= ftl_band_from_ppa(dev
, ppa
);
767 pthread_spin_lock(&band
->lba_map
.lock
);
768 rc
= ftl_invalidate_addr_unlocked(dev
, ppa
);
769 pthread_spin_unlock(&band
->lba_map
.lock
);
775 ftl_read_retry(int rc
)
777 return rc
== -EAGAIN
;
781 ftl_read_canceled(int rc
)
783 return rc
== -EFAULT
|| rc
== 0;
787 ftl_add_to_retry_queue(struct ftl_io
*io
)
789 if (!(io
->flags
& FTL_IO_RETRY
)) {
790 io
->flags
|= FTL_IO_RETRY
;
791 TAILQ_INSERT_TAIL(&io
->dev
->retry_queue
, io
, retry_entry
);
796 ftl_ppa_cache_read(struct ftl_io
*io
, uint64_t lba
,
797 struct ftl_ppa ppa
, void *buf
)
799 struct ftl_rwb
*rwb
= io
->dev
->rwb
;
800 struct ftl_rwb_entry
*entry
;
804 entry
= ftl_rwb_entry_from_offset(rwb
, ppa
.offset
);
805 pthread_spin_lock(&entry
->lock
);
807 nppa
= ftl_l2p_get(io
->dev
, lba
);
808 if (ppa
.ppa
!= nppa
.ppa
) {
813 memcpy(buf
, entry
->data
, FTL_BLOCK_SIZE
);
815 pthread_spin_unlock(&entry
->lock
);
820 ftl_lba_read_next_ppa(struct ftl_io
*io
, struct ftl_ppa
*ppa
)
822 struct spdk_ftl_dev
*dev
= io
->dev
;
823 struct ftl_ppa next_ppa
;
826 *ppa
= ftl_l2p_get(dev
, ftl_io_current_lba(io
));
828 SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE
, "Read ppa:%lx, lba:%lu\n",
829 ppa
->ppa
, ftl_io_current_lba(io
));
831 /* If the PPA is invalid, skip it (the buffer should already be zero'ed) */
832 if (ftl_ppa_invalid(*ppa
)) {
836 if (ftl_ppa_cached(*ppa
)) {
837 if (!ftl_ppa_cache_read(io
, ftl_io_current_lba(io
), *ppa
, ftl_io_iovec_addr(io
))) {
841 /* If the state changed, we have to re-read the l2p */
845 for (i
= 1; i
< ftl_io_iovec_len_left(io
); ++i
) {
846 next_ppa
= ftl_l2p_get(dev
, ftl_io_get_lba(io
, io
->pos
+ i
));
848 if (ftl_ppa_invalid(next_ppa
) || ftl_ppa_cached(next_ppa
)) {
852 if (ftl_ppa_addr_pack(dev
, *ppa
) + i
!= ftl_ppa_addr_pack(dev
, next_ppa
)) {
861 ftl_submit_read(struct ftl_io
*io
)
863 struct spdk_ftl_dev
*dev
= io
->dev
;
867 assert(LIST_EMPTY(&io
->children
));
869 while (io
->pos
< io
->lbk_cnt
) {
870 if (ftl_io_mode_ppa(io
)) {
871 lbk_cnt
= rc
= ftl_ppa_read_next_ppa(io
, &ppa
);
873 lbk_cnt
= rc
= ftl_lba_read_next_ppa(io
, &ppa
);
876 /* We might need to retry the read from scratch (e.g. */
877 /* because write was under way and completed before */
878 /* we could read it from rwb */
879 if (ftl_read_retry(rc
)) {
883 /* We don't have to schedule the read, as it was read from cache */
884 if (ftl_read_canceled(rc
)) {
885 ftl_io_advance(io
, 1);
886 ftl_trace_completion(io
->dev
, io
, rc
? FTL_TRACE_COMPLETION_INVALID
:
887 FTL_TRACE_COMPLETION_CACHE
);
894 ftl_trace_submission(dev
, io
, ppa
, lbk_cnt
);
895 rc
= spdk_nvme_ns_cmd_read(dev
->ns
, ftl_get_read_qpair(dev
),
896 ftl_io_iovec_addr(io
),
897 ftl_ppa_addr_pack(io
->dev
, ppa
), lbk_cnt
,
898 ftl_io_cmpl_cb
, io
, 0);
899 if (spdk_unlikely(rc
)) {
901 ftl_add_to_retry_queue(io
);
909 ftl_io_advance(io
, lbk_cnt
);
912 /* If we didn't have to read anything from the device, */
913 /* complete the request right away */
914 if (ftl_io_done(io
)) {
922 ftl_complete_flush(struct ftl_flush
*flush
)
924 assert(flush
->num_req
== 0);
925 LIST_REMOVE(flush
, list_entry
);
927 flush
->cb
.fn(flush
->cb
.ctx
, 0);
929 spdk_bit_array_free(&flush
->bmap
);
934 ftl_process_flush(struct spdk_ftl_dev
*dev
, struct ftl_rwb_batch
*batch
)
936 struct ftl_flush
*flush
, *tflush
;
939 LIST_FOREACH_SAFE(flush
, &dev
->flush_list
, list_entry
, tflush
) {
940 offset
= ftl_rwb_batch_get_offset(batch
);
942 if (spdk_bit_array_get(flush
->bmap
, offset
)) {
943 spdk_bit_array_clear(flush
->bmap
, offset
);
944 if (!(--flush
->num_req
)) {
945 ftl_complete_flush(flush
);
952 ftl_reserve_nv_cache(struct ftl_nv_cache
*nv_cache
, size_t *num_lbks
)
954 struct spdk_bdev
*bdev
= spdk_bdev_desc_get_bdev(nv_cache
->bdev_desc
);
955 uint64_t num_available
, cache_size
, cache_addr
= FTL_LBA_INVALID
;
957 cache_size
= spdk_bdev_get_num_blocks(bdev
);
959 pthread_spin_lock(&nv_cache
->lock
);
960 if (spdk_unlikely(nv_cache
->num_available
== 0)) {
964 num_available
= spdk_min(nv_cache
->num_available
, *num_lbks
);
965 if (spdk_unlikely(nv_cache
->current_addr
+ num_available
> cache_size
)) {
966 *num_lbks
= cache_size
- nv_cache
->current_addr
;
968 *num_lbks
= num_available
;
971 cache_addr
= nv_cache
->current_addr
;
972 nv_cache
->current_addr
+= *num_lbks
;
973 nv_cache
->num_available
-= *num_lbks
;
975 if (nv_cache
->current_addr
== spdk_bdev_get_num_blocks(bdev
)) {
976 nv_cache
->current_addr
= 0;
979 pthread_spin_unlock(&nv_cache
->lock
);
983 static struct ftl_io
*
984 ftl_alloc_io_nv_cache(struct ftl_io
*parent
, size_t num_lbks
)
986 struct ftl_io_init_opts opts
= {
989 .data
= ftl_io_iovec_addr(parent
),
991 .flags
= FTL_IO_CACHE
,
994 return ftl_io_init_internal(&opts
);
998 ftl_nv_cache_submit_cb(struct spdk_bdev_io
*bdev_io
, bool success
, void *cb_arg
)
1000 struct ftl_io
*io
= cb_arg
;
1002 if (spdk_unlikely(!success
)) {
1003 SPDK_ERRLOG("Non-volatile cache write failed at %"PRIx64
"\n", io
->ppa
.ppa
);
1008 if (ftl_io_done(io
)) {
1009 ftl_io_complete(io
);
1012 spdk_bdev_free_io(bdev_io
);
1016 ftl_submit_nv_cache(void *ctx
)
1018 struct ftl_io
*io
= ctx
;
1019 struct spdk_ftl_dev
*dev
= io
->dev
;
1020 struct spdk_thread
*thread
;
1021 struct ftl_io_channel
*ioch
;
1024 ioch
= spdk_io_channel_get_ctx(io
->ioch
);
1025 thread
= spdk_io_channel_get_thread(io
->ioch
);
1027 rc
= spdk_bdev_write_blocks(dev
->nv_cache
.bdev_desc
, ioch
->cache_ioch
,
1028 ftl_io_iovec_addr(io
), io
->ppa
.ppa
, io
->lbk_cnt
,
1029 ftl_nv_cache_submit_cb
, io
);
1030 if (rc
== -ENOMEM
) {
1031 spdk_thread_send_msg(thread
, ftl_submit_nv_cache
, io
);
1034 SPDK_ERRLOG("Write to persistent cache failed: %s (%"PRIu64
", %"PRIu64
")\n",
1035 spdk_strerror(-rc
), io
->ppa
.ppa
, io
->lbk_cnt
);
1037 ftl_io_complete(io
);
1041 ftl_io_advance(io
, io
->lbk_cnt
);
1046 _ftl_write_nv_cache(void *ctx
)
1048 struct ftl_io
*child
, *io
= ctx
;
1049 struct spdk_ftl_dev
*dev
= io
->dev
;
1050 struct spdk_thread
*thread
;
1053 thread
= spdk_io_channel_get_thread(io
->ioch
);
1055 while (io
->pos
< io
->lbk_cnt
) {
1056 num_lbks
= ftl_io_iovec_len_left(io
);
1058 child
= ftl_alloc_io_nv_cache(io
, num_lbks
);
1059 if (spdk_unlikely(!child
)) {
1060 spdk_thread_send_msg(thread
, _ftl_write_nv_cache
, io
);
1064 /* Reserve area on the write buffer cache */
1065 child
->ppa
.ppa
= ftl_reserve_nv_cache(&dev
->nv_cache
, &num_lbks
);
1066 if (child
->ppa
.ppa
== FTL_LBA_INVALID
) {
1068 spdk_thread_send_msg(thread
, _ftl_write_nv_cache
, io
);
1072 /* Shrink the IO if there isn't enough room in the cache to fill the whole iovec */
1073 if (spdk_unlikely(num_lbks
!= ftl_io_iovec_len_left(io
))) {
1074 ftl_io_shrink_iovec(child
, num_lbks
);
1077 ftl_submit_nv_cache(child
);
1080 if (ftl_io_done(io
)) {
1081 ftl_io_complete(io
);
1086 ftl_write_nv_cache(struct ftl_io
*parent
)
1088 ftl_io_reset(parent
);
1089 parent
->flags
|= FTL_IO_CACHE
;
1090 _ftl_write_nv_cache(parent
);
1094 ftl_write_fail(struct ftl_io
*io
, int status
)
1096 struct ftl_rwb_batch
*batch
= io
->rwb_batch
;
1097 struct spdk_ftl_dev
*dev
= io
->dev
;
1098 struct ftl_rwb_entry
*entry
;
1099 struct ftl_band
*band
;
1102 entry
= ftl_rwb_batch_first_entry(batch
);
1104 band
= ftl_band_from_ppa(io
->dev
, entry
->ppa
);
1105 SPDK_ERRLOG("Write failed @ppa: %s, status: %d\n",
1106 ftl_ppa2str(entry
->ppa
, buf
, sizeof(buf
)), status
);
1108 /* Close the band and, halt wptr and defrag */
1109 ftl_halt_writes(dev
, band
);
1111 ftl_rwb_foreach(entry
, batch
) {
1112 /* Invalidate meta set by process_writes() */
1113 ftl_invalidate_addr(dev
, entry
->ppa
);
1116 /* Reset the batch back to the the RWB to resend it later */
1117 ftl_rwb_batch_revert(batch
);
1121 ftl_write_cb(struct ftl_io
*io
, void *arg
, int status
)
1123 struct spdk_ftl_dev
*dev
= io
->dev
;
1124 struct ftl_rwb_batch
*batch
= io
->rwb_batch
;
1125 struct ftl_rwb_entry
*entry
;
1128 ftl_write_fail(io
, status
);
1132 assert(io
->lbk_cnt
== dev
->xfer_size
);
1133 ftl_rwb_foreach(entry
, batch
) {
1134 if (!(io
->flags
& FTL_IO_MD
) && !(entry
->flags
& FTL_IO_PAD
)) {
1135 /* Verify that the LBA is set for user lbks */
1136 assert(entry
->lba
!= FTL_LBA_INVALID
);
1139 SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE
, "Write ppa:%lu, lba:%lu\n",
1140 entry
->ppa
.ppa
, entry
->lba
);
1143 ftl_process_flush(dev
, batch
);
1144 ftl_rwb_batch_release(batch
);
1148 ftl_update_rwb_stats(struct spdk_ftl_dev
*dev
, const struct ftl_rwb_entry
*entry
)
1150 if (!ftl_rwb_entry_internal(entry
)) {
1151 dev
->stats
.write_user
++;
1153 dev
->stats
.write_total
++;
1157 ftl_update_l2p(struct spdk_ftl_dev
*dev
, const struct ftl_rwb_entry
*entry
,
1160 struct ftl_ppa prev_ppa
;
1161 struct ftl_rwb_entry
*prev
;
1162 struct ftl_band
*band
;
1165 prev_ppa
= ftl_l2p_get(dev
, entry
->lba
);
1166 if (ftl_ppa_invalid(prev_ppa
)) {
1167 ftl_l2p_set(dev
, entry
->lba
, ppa
);
1171 /* If the L2P's PPA is different than what we expected we don't need to */
1172 /* do anything (someone's already overwritten our data). */
1173 if (ftl_rwb_entry_weak(entry
) && !ftl_ppa_cmp(prev_ppa
, entry
->ppa
)) {
1177 if (ftl_ppa_cached(prev_ppa
)) {
1178 assert(!ftl_rwb_entry_weak(entry
));
1179 prev
= ftl_rwb_entry_from_offset(dev
->rwb
, prev_ppa
.offset
);
1180 pthread_spin_lock(&prev
->lock
);
1182 /* Re-read the L2P under the lock to protect against updates */
1183 /* to this LBA from other threads */
1184 prev_ppa
= ftl_l2p_get(dev
, entry
->lba
);
1186 /* If the entry is no longer in cache, another write has been */
1187 /* scheduled in the meantime, so we have to invalidate its LBA */
1188 if (!ftl_ppa_cached(prev_ppa
)) {
1189 ftl_invalidate_addr(dev
, prev_ppa
);
1192 /* If previous entry is part of cache, remove and invalidate it */
1193 if (ftl_rwb_entry_valid(prev
)) {
1194 ftl_invalidate_addr(dev
, prev
->ppa
);
1195 ftl_rwb_entry_invalidate(prev
);
1198 ftl_l2p_set(dev
, entry
->lba
, ppa
);
1199 pthread_spin_unlock(&prev
->lock
);
1203 /* Lock the band containing previous PPA. This assures atomic changes to */
1204 /* the L2P as wall as metadata. The valid bits in metadata are used to */
1205 /* check weak writes validity. */
1206 band
= ftl_band_from_ppa(dev
, prev_ppa
);
1207 pthread_spin_lock(&band
->lba_map
.lock
);
1209 valid
= ftl_invalidate_addr_unlocked(dev
, prev_ppa
);
1211 /* If the address has been invalidated already, we don't want to update */
1212 /* the L2P for weak writes, as it means the write is no longer valid. */
1213 if (!ftl_rwb_entry_weak(entry
) || valid
) {
1214 ftl_l2p_set(dev
, entry
->lba
, ppa
);
1217 pthread_spin_unlock(&band
->lba_map
.lock
);
1220 static struct ftl_io
*
1221 ftl_io_init_child_write(struct ftl_io
*parent
, struct ftl_ppa ppa
,
1222 void *data
, void *md
, ftl_io_fn cb
)
1225 struct spdk_ftl_dev
*dev
= parent
->dev
;
1226 struct ftl_io_init_opts opts
= {
1231 .band
= parent
->band
,
1232 .size
= sizeof(struct ftl_io
),
1234 .type
= FTL_IO_WRITE
,
1235 .lbk_cnt
= dev
->xfer_size
,
1241 io
= ftl_io_init_internal(&opts
);
1252 ftl_io_child_write_cb(struct ftl_io
*io
, void *ctx
, int status
)
1254 struct ftl_chunk
*chunk
;
1256 chunk
= ftl_band_chunk_from_ppa(io
->band
, io
->ppa
);
1257 chunk
->busy
= false;
1261 ftl_submit_child_write(struct ftl_wptr
*wptr
, struct ftl_io
*io
, int lbk_cnt
)
1263 struct spdk_ftl_dev
*dev
= io
->dev
;
1264 struct ftl_io
*child
;
1268 if (spdk_likely(!wptr
->direct_mode
)) {
1271 assert(io
->flags
& FTL_IO_DIRECT_ACCESS
);
1272 assert(io
->ppa
.chk
== wptr
->band
->id
);
1276 /* Split IO to child requests and release chunk immediately after child is completed */
1277 child
= ftl_io_init_child_write(io
, ppa
, ftl_io_iovec_addr(io
),
1278 ftl_io_get_md(io
), ftl_io_child_write_cb
);
1283 rc
= spdk_nvme_ns_cmd_write_with_md(dev
->ns
, ftl_get_write_qpair(dev
),
1284 ftl_io_iovec_addr(child
), child
->md
,
1285 ftl_ppa_addr_pack(dev
, ppa
),
1286 lbk_cnt
, ftl_io_cmpl_cb
, child
, 0, 0, 0);
1288 ftl_io_fail(child
, rc
);
1289 ftl_io_complete(child
);
1290 SPDK_ERRLOG("spdk_nvme_ns_cmd_write failed with status:%d, ppa:%lu\n",
1296 ftl_io_inc_req(child
);
1297 ftl_io_advance(child
, lbk_cnt
);
1303 ftl_submit_write(struct ftl_wptr
*wptr
, struct ftl_io
*io
)
1305 struct spdk_ftl_dev
*dev
= io
->dev
;
1308 assert(io
->lbk_cnt
% dev
->xfer_size
== 0);
1310 while (io
->iov_pos
< io
->iov_cnt
) {
1311 /* There are no guarantees of the order of completion of NVMe IO submission queue */
1312 /* so wait until chunk is not busy before submitting another write */
1313 if (wptr
->chunk
->busy
) {
1314 TAILQ_INSERT_TAIL(&wptr
->pending_queue
, io
, retry_entry
);
1319 rc
= ftl_submit_child_write(wptr
, io
, dev
->xfer_size
);
1320 if (spdk_unlikely(rc
)) {
1321 if (rc
== -EAGAIN
) {
1322 TAILQ_INSERT_TAIL(&wptr
->pending_queue
, io
, retry_entry
);
1324 ftl_io_fail(io
, rc
);
1329 ftl_trace_submission(dev
, io
, wptr
->ppa
, dev
->xfer_size
);
1330 ftl_wptr_advance(wptr
, dev
->xfer_size
);
1333 if (ftl_io_done(io
)) {
1334 /* Parent IO will complete after all children are completed */
1335 ftl_io_complete(io
);
1342 ftl_flush_pad_batch(struct spdk_ftl_dev
*dev
)
1344 struct ftl_rwb
*rwb
= dev
->rwb
;
1345 size_t size
, num_entries
;
1347 size
= ftl_rwb_num_acquired(rwb
, FTL_RWB_TYPE_INTERNAL
) +
1348 ftl_rwb_num_acquired(rwb
, FTL_RWB_TYPE_USER
);
1350 /* There must be something in the RWB, otherwise the flush */
1351 /* wouldn't be waiting for anything */
1354 /* Only add padding when there's less than xfer size */
1355 /* entries in the buffer. Otherwise we just have to wait */
1356 /* for the entries to become ready. */
1357 num_entries
= ftl_rwb_get_active_batches(dev
->rwb
) * dev
->xfer_size
;
1358 if (size
< num_entries
) {
1359 ftl_rwb_pad(dev
, num_entries
- (size
% num_entries
));
1364 ftl_wptr_process_writes(struct ftl_wptr
*wptr
)
1366 struct spdk_ftl_dev
*dev
= wptr
->dev
;
1367 struct ftl_rwb_batch
*batch
;
1368 struct ftl_rwb_entry
*entry
;
1370 struct ftl_ppa ppa
, prev_ppa
;
1372 if (spdk_unlikely(!TAILQ_EMPTY(&wptr
->pending_queue
))) {
1373 io
= TAILQ_FIRST(&wptr
->pending_queue
);
1374 TAILQ_REMOVE(&wptr
->pending_queue
, io
, retry_entry
);
1376 if (ftl_submit_write(wptr
, io
) == -EAGAIN
) {
1381 /* Make sure the band is prepared for writing */
1382 if (!ftl_wptr_ready(wptr
)) {
1387 ftl_wptr_process_shutdown(wptr
);
1390 batch
= ftl_rwb_pop(dev
->rwb
);
1392 /* If there are queued flush requests we need to pad the RWB to */
1393 /* force out remaining entries */
1394 if (!LIST_EMPTY(&dev
->flush_list
)) {
1395 ftl_flush_pad_batch(dev
);
1401 io
= ftl_io_rwb_init(dev
, wptr
->band
, batch
, ftl_write_cb
);
1407 ftl_rwb_foreach(entry
, batch
) {
1410 if (entry
->lba
!= FTL_LBA_INVALID
) {
1411 pthread_spin_lock(&entry
->lock
);
1412 prev_ppa
= ftl_l2p_get(dev
, entry
->lba
);
1414 /* If the l2p was updated in the meantime, don't update band's metadata */
1415 if (ftl_ppa_cached(prev_ppa
) && prev_ppa
.offset
== entry
->pos
) {
1416 /* Setting entry's cache bit needs to be done after metadata */
1417 /* within the band is updated to make sure that writes */
1418 /* invalidating the entry clear the metadata as well */
1419 ftl_band_set_addr(wptr
->band
, entry
->lba
, entry
->ppa
);
1420 ftl_rwb_entry_set_valid(entry
);
1422 pthread_spin_unlock(&entry
->lock
);
1425 ftl_trace_rwb_pop(dev
, entry
);
1426 ftl_update_rwb_stats(dev
, entry
);
1428 ppa
= ftl_band_next_ppa(wptr
->band
, ppa
, 1);
1431 SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE
, "Write ppa:%lx, %lx\n", wptr
->ppa
.ppa
,
1432 ftl_ppa_addr_pack(dev
, wptr
->ppa
));
1434 if (ftl_submit_write(wptr
, io
)) {
1435 /* TODO: we need some recovery here */
1436 assert(0 && "Write submit failed");
1437 if (ftl_io_done(io
)) {
1442 return dev
->xfer_size
;
1444 ftl_rwb_batch_revert(batch
);
1449 ftl_process_writes(struct spdk_ftl_dev
*dev
)
1451 struct ftl_wptr
*wptr
, *twptr
;
1452 size_t num_active
= 0;
1453 enum ftl_band_state state
;
1455 LIST_FOREACH_SAFE(wptr
, &dev
->wptr_list
, list_entry
, twptr
) {
1456 ftl_wptr_process_writes(wptr
);
1457 state
= wptr
->band
->state
;
1459 if (state
!= FTL_BAND_STATE_FULL
&&
1460 state
!= FTL_BAND_STATE_CLOSING
&&
1461 state
!= FTL_BAND_STATE_CLOSED
) {
1466 if (num_active
< 1) {
1474 ftl_rwb_entry_fill(struct ftl_rwb_entry
*entry
, struct ftl_io
*io
)
1476 struct ftl_band
*band
;
1478 memcpy(entry
->data
, ftl_io_iovec_addr(io
), FTL_BLOCK_SIZE
);
1480 if (ftl_rwb_entry_weak(entry
)) {
1481 band
= ftl_band_from_ppa(io
->dev
, io
->ppa
);
1482 entry
->ppa
= ftl_band_next_ppa(band
, io
->ppa
, io
->pos
);
1485 entry
->trace
= io
->trace
;
1486 entry
->lba
= ftl_io_current_lba(io
);
1489 memcpy(entry
->md
, &entry
->lba
, sizeof(entry
->lba
));
1494 ftl_rwb_fill(struct ftl_io
*io
)
1496 struct spdk_ftl_dev
*dev
= io
->dev
;
1497 struct ftl_rwb_entry
*entry
;
1498 struct ftl_ppa ppa
= { .cached
= 1 };
1499 int flags
= ftl_rwb_flags_from_io(io
);
1501 while (io
->pos
< io
->lbk_cnt
) {
1502 if (ftl_io_current_lba(io
) == FTL_LBA_INVALID
) {
1503 ftl_io_advance(io
, 1);
1507 entry
= ftl_acquire_entry(dev
, flags
);
1512 ftl_rwb_entry_fill(entry
, io
);
1514 ppa
.offset
= entry
->pos
;
1516 ftl_trace_rwb_fill(dev
, io
);
1517 ftl_update_l2p(dev
, entry
, ppa
);
1518 ftl_io_advance(io
, 1);
1520 /* Needs to be done after L2P is updated to avoid race with */
1521 /* write completion callback when it's processed faster than */
1522 /* L2P is set in update_l2p(). */
1523 ftl_rwb_push(entry
);
1526 if (ftl_io_done(io
)) {
1527 if (dev
->nv_cache
.bdev_desc
) {
1528 ftl_write_nv_cache(io
);
1530 ftl_io_complete(io
);
1538 ftl_dev_needs_defrag(struct spdk_ftl_dev
*dev
)
1540 const struct spdk_ftl_limit
*limit
= ftl_get_limit(dev
, SPDK_FTL_LIMIT_START
);
1542 if (ftl_reloc_is_halted(dev
->reloc
)) {
1550 if (dev
->num_free
<= limit
->thld
) {
1558 ftl_band_calc_merit(struct ftl_band
*band
, size_t *threshold_valid
)
1560 size_t usable
, valid
, invalid
;
1563 /* If the band doesn't have any usable lbks it's of no use */
1564 usable
= ftl_band_num_usable_lbks(band
);
1569 valid
= threshold_valid
? (usable
- *threshold_valid
) : band
->lba_map
.num_vld
;
1570 invalid
= usable
- valid
;
1572 /* Add one to avoid division by 0 */
1573 vld_ratio
= (double)invalid
/ (double)(valid
+ 1);
1574 return vld_ratio
* ftl_band_age(band
);
1578 ftl_band_needs_defrag(struct ftl_band
*band
, struct spdk_ftl_dev
*dev
)
1580 struct spdk_ftl_conf
*conf
= &dev
->conf
;
1583 /* If we're in dire need of free bands, every band is worth defragging */
1584 if (ftl_current_limit(dev
) == SPDK_FTL_LIMIT_CRIT
) {
1588 thld_vld
= (ftl_band_num_usable_lbks(band
) * conf
->defrag
.invalid_thld
) / 100;
1590 return band
->merit
> ftl_band_calc_merit(band
, &thld_vld
);
1593 static struct ftl_band
*
1594 ftl_select_defrag_band(struct spdk_ftl_dev
*dev
)
1596 struct ftl_band
*band
, *mband
= NULL
;
1599 LIST_FOREACH(band
, &dev
->shut_bands
, list_entry
) {
1600 assert(band
->state
== FTL_BAND_STATE_CLOSED
);
1601 band
->merit
= ftl_band_calc_merit(band
, NULL
);
1602 if (band
->merit
> merit
) {
1603 merit
= band
->merit
;
1608 if (mband
&& !ftl_band_needs_defrag(mband
, dev
)) {
1616 ftl_process_relocs(struct spdk_ftl_dev
*dev
)
1618 struct ftl_band
*band
;
1620 if (ftl_dev_needs_defrag(dev
)) {
1621 band
= dev
->df_band
= ftl_select_defrag_band(dev
);
1624 ftl_reloc_add(dev
->reloc
, band
, 0, ftl_num_band_lbks(dev
), 0);
1625 ftl_trace_defrag_band(dev
, band
);
1629 ftl_reloc(dev
->reloc
);
1633 ftl_current_limit(const struct spdk_ftl_dev
*dev
)
1639 spdk_ftl_dev_get_attrs(const struct spdk_ftl_dev
*dev
, struct spdk_ftl_attrs
*attrs
)
1641 attrs
->uuid
= dev
->uuid
;
1642 attrs
->lbk_cnt
= dev
->num_lbas
;
1643 attrs
->lbk_size
= FTL_BLOCK_SIZE
;
1644 attrs
->range
= dev
->range
;
1645 attrs
->cache_bdev_desc
= dev
->nv_cache
.bdev_desc
;
1646 attrs
->allow_open_bands
= dev
->conf
.allow_open_bands
;
1647 attrs
->num_chunks
= dev
->geo
.num_chk
;
1648 attrs
->chunk_size
= dev
->geo
.clba
;
1652 _ftl_io_write(void *ctx
)
1654 ftl_io_write((struct ftl_io
*)ctx
);
1658 ftl_rwb_fill_leaf(struct ftl_io
*io
)
1662 rc
= ftl_rwb_fill(io
);
1663 if (rc
== -EAGAIN
) {
1664 spdk_thread_send_msg(spdk_io_channel_get_thread(io
->ioch
),
1673 ftl_submit_write_leaf(struct ftl_io
*io
)
1677 rc
= ftl_submit_write(ftl_wptr_from_band(io
->band
), io
);
1678 if (rc
== -EAGAIN
) {
1679 /* EAGAIN means that the request was put on the pending queue */
1687 ftl_io_write(struct ftl_io
*io
)
1689 struct spdk_ftl_dev
*dev
= io
->dev
;
1691 /* For normal IOs we just need to copy the data onto the rwb */
1692 if (!(io
->flags
& FTL_IO_MD
)) {
1693 ftl_io_call_foreach_child(io
, ftl_rwb_fill_leaf
);
1695 /* Metadata has its own buffer, so it doesn't have to be copied, so just */
1696 /* send it the the core thread and schedule the write immediately */
1697 if (ftl_check_core_thread(dev
)) {
1698 ftl_io_call_foreach_child(io
, ftl_submit_write_leaf
);
1700 spdk_thread_send_msg(ftl_get_core_thread(dev
), _ftl_io_write
, io
);
1706 spdk_ftl_write(struct spdk_ftl_dev
*dev
, struct spdk_io_channel
*ch
, uint64_t lba
, size_t lba_cnt
,
1707 struct iovec
*iov
, size_t iov_cnt
, spdk_ftl_fn cb_fn
, void *cb_arg
)
1719 if (lba_cnt
!= ftl_iovec_num_lbks(iov
, iov_cnt
)) {
1723 if (!dev
->initialized
) {
1727 io
= ftl_io_user_init(ch
, lba
, lba_cnt
, iov
, iov_cnt
, cb_fn
, cb_arg
, FTL_IO_WRITE
);
1738 ftl_io_read_leaf(struct ftl_io
*io
)
1742 rc
= ftl_submit_read(io
);
1743 if (rc
== -ENOMEM
) {
1744 /* ENOMEM means that the request was put on a pending queue */
1752 _ftl_io_read(void *arg
)
1754 ftl_io_read((struct ftl_io
*)arg
);
1758 ftl_io_read(struct ftl_io
*io
)
1760 struct spdk_ftl_dev
*dev
= io
->dev
;
1762 if (ftl_check_read_thread(dev
)) {
1763 ftl_io_call_foreach_child(io
, ftl_io_read_leaf
);
1765 spdk_thread_send_msg(ftl_get_read_thread(dev
), _ftl_io_read
, io
);
1770 spdk_ftl_read(struct spdk_ftl_dev
*dev
, struct spdk_io_channel
*ch
, uint64_t lba
, size_t lba_cnt
,
1771 struct iovec
*iov
, size_t iov_cnt
, spdk_ftl_fn cb_fn
, void *cb_arg
)
1783 if (lba_cnt
!= ftl_iovec_num_lbks(iov
, iov_cnt
)) {
1787 if (!dev
->initialized
) {
1791 io
= ftl_io_user_init(ch
, lba
, lba_cnt
, iov
, iov_cnt
, cb_fn
, cb_arg
, FTL_IO_READ
);
1800 static struct ftl_flush
*
1801 ftl_flush_init(struct spdk_ftl_dev
*dev
, spdk_ftl_fn cb_fn
, void *cb_arg
)
1803 struct ftl_flush
*flush
;
1804 struct ftl_rwb
*rwb
= dev
->rwb
;
1806 flush
= calloc(1, sizeof(*flush
));
1811 flush
->bmap
= spdk_bit_array_create(ftl_rwb_num_batches(rwb
));
1817 flush
->cb
.fn
= cb_fn
;
1818 flush
->cb
.ctx
= cb_arg
;
1827 _ftl_flush(void *ctx
)
1829 struct ftl_flush
*flush
= ctx
;
1830 struct spdk_ftl_dev
*dev
= flush
->dev
;
1831 struct ftl_rwb
*rwb
= dev
->rwb
;
1832 struct ftl_rwb_batch
*batch
;
1834 /* Attach flush object to all non-empty batches */
1835 ftl_rwb_foreach_batch(batch
, rwb
) {
1836 if (!ftl_rwb_batch_empty(batch
)) {
1837 spdk_bit_array_set(flush
->bmap
, ftl_rwb_batch_get_offset(batch
));
1842 LIST_INSERT_HEAD(&dev
->flush_list
, flush
, list_entry
);
1844 /* If the RWB was already empty, the flush can be completed right away */
1845 if (!flush
->num_req
) {
1846 ftl_complete_flush(flush
);
1851 spdk_ftl_flush(struct spdk_ftl_dev
*dev
, spdk_ftl_fn cb_fn
, void *cb_arg
)
1853 struct ftl_flush
*flush
;
1855 if (!dev
->initialized
) {
1859 flush
= ftl_flush_init(dev
, cb_fn
, cb_arg
);
1864 spdk_thread_send_msg(ftl_get_core_thread(dev
), _ftl_flush
, flush
);
1869 ftl_process_anm_event(struct ftl_anm_event
*event
)
1871 SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE
, "Unconsumed ANM received for dev: %p...\n", event
->dev
);
1872 ftl_anm_event_complete(event
);
1876 ftl_process_retry_queue(struct spdk_ftl_dev
*dev
)
1881 while (!TAILQ_EMPTY(&dev
->retry_queue
)) {
1882 io
= TAILQ_FIRST(&dev
->retry_queue
);
1884 /* Retry only if IO is still healthy */
1885 if (spdk_likely(io
->status
== 0)) {
1886 rc
= ftl_submit_read(io
);
1887 if (rc
== -ENOMEM
) {
1892 io
->flags
&= ~FTL_IO_RETRY
;
1893 TAILQ_REMOVE(&dev
->retry_queue
, io
, retry_entry
);
1895 if (ftl_io_done(io
)) {
1896 ftl_io_complete(io
);
1902 ftl_task_read(void *ctx
)
1904 struct ftl_thread
*thread
= ctx
;
1905 struct spdk_ftl_dev
*dev
= thread
->dev
;
1906 struct spdk_nvme_qpair
*qpair
= ftl_get_read_qpair(dev
);
1907 size_t num_completed
;
1910 if (ftl_shutdown_complete(dev
)) {
1911 spdk_poller_unregister(&thread
->poller
);
1916 num_completed
= spdk_nvme_qpair_process_completions(qpair
, 0);
1918 if (num_completed
&& !TAILQ_EMPTY(&dev
->retry_queue
)) {
1919 ftl_process_retry_queue(dev
);
1922 return num_completed
;
1926 ftl_task_core(void *ctx
)
1928 struct ftl_thread
*thread
= ctx
;
1929 struct spdk_ftl_dev
*dev
= thread
->dev
;
1930 struct spdk_nvme_qpair
*qpair
= ftl_get_write_qpair(dev
);
1933 if (ftl_shutdown_complete(dev
)) {
1934 spdk_poller_unregister(&thread
->poller
);
1939 ftl_process_writes(dev
);
1940 spdk_nvme_qpair_process_completions(qpair
, 0);
1941 ftl_process_relocs(dev
);
1946 SPDK_LOG_REGISTER_COMPONENT("ftl_core", SPDK_LOG_FTL_CORE
)