4 * Copyright (c) Intel Corporation.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 #include "spdk/stdinc.h"
38 #include "spdk/uuid.h"
39 #include "spdk/thread.h"
40 #include "spdk/util.h"
41 #include "spdk_internal/log.h"
42 #include "spdk/likely.h"
43 #include "spdk/queue.h"
45 #include "spdk/bdev.h"
46 #include "spdk/bdev_zone.h"
50 #include "ftl_trace.h"
52 #ifdef SPDK_CONFIG_PMDK
54 #endif /* SPDK_CONFIG_PMDK */
65 struct ftl_band_flush
;
68 /* Number of writes scheduled directly by the user */
71 /* Total number of writes */
75 struct ftl_trace trace
;
77 /* Number of limits applied */
78 uint64_t limits
[SPDK_FTL_LIMIT_MAX
];
81 struct ftl_global_md
{
83 struct spdk_uuid uuid
;
84 /* Size of the l2p table */
89 /* Write buffer cache bdev */
90 struct spdk_bdev_desc
*bdev_desc
;
92 uint64_t current_addr
;
93 /* Number of available blocks left */
94 uint64_t num_available
;
95 /* Maximum number of blocks */
96 uint64_t num_data_blocks
;
98 * Phase of the current cycle of writes. Each time whole cache area is filled, the phase is
99 * advanced. Current phase is saved in every IO's metadata, as well as in the header saved
100 * in the first sector. By looking at the phase of each block, it's possible to find the
101 * oldest block and replay the order of the writes when recovering the data from the cache.
104 /* Indicates that the data can be written to the cache */
107 struct spdk_mempool
*md_pool
;
108 /* DMA buffer for writing the header */
111 pthread_spinlock_t lock
;
115 /* Queue of write buffer entries, can reach up to xfer_size entries */
116 TAILQ_HEAD(, ftl_wbuf_entry
) entries
;
117 /* Number of entries in the queue above */
118 uint32_t num_entries
;
119 /* Index within spdk_ftl_dev.batch_array */
123 TAILQ_ENTRY(ftl_batch
) tailq
;
126 struct spdk_ftl_dev
{
127 /* Device instance */
128 struct spdk_uuid uuid
;
132 struct spdk_ftl_conf conf
;
134 /* Indicates the device is fully initialized */
136 /* Indicates the device is about to be stopped */
138 /* Indicates the device is about to start stopping - use to handle multiple stop request */
141 /* Underlying device */
142 struct spdk_bdev_desc
*base_bdev_desc
;
144 /* Non-volatile write buffer cache */
145 struct ftl_nv_cache nv_cache
;
147 /* LBA map memory pool */
148 struct spdk_mempool
*lba_pool
;
150 /* LBA map requests pool */
151 struct spdk_mempool
*lba_request_pool
;
153 /* Media management events pool */
154 struct spdk_mempool
*media_events_pool
;
157 struct ftl_stats stats
;
159 /* Current sequence number */
163 struct ftl_band
*bands
;
164 /* Number of operational bands */
166 /* Next write band */
167 struct ftl_band
*next_band
;
169 LIST_HEAD(, ftl_band
) free_bands
;
170 /* Closed bands list */
171 LIST_HEAD(, ftl_band
) shut_bands
;
172 /* Number of free bands */
175 /* List of write pointers */
176 LIST_HEAD(, ftl_wptr
) wptr_list
;
178 /* Logical -> physical table */
180 /* Size of the l2p table */
182 /* Size of pages mmapped for l2p, valid only for mapping on persistent memory */
189 LIST_HEAD(, ftl_flush
) flush_list
;
190 /* List of band flush requests */
191 LIST_HEAD(, ftl_band_flush
) band_flush_list
;
193 /* Device specific md buffer */
194 struct ftl_global_md global_md
;
200 /* Transfer unit size */
203 /* Current user write limit */
206 /* Inflight IO operations */
207 uint32_t num_inflight
;
209 /* Manages data relocation */
210 struct ftl_reloc
*reloc
;
212 /* Thread on which the poller is running */
213 struct spdk_thread
*core_thread
;
215 struct spdk_io_channel
*ioch
;
217 struct spdk_poller
*core_poller
;
219 /* IO channel array provides means for retrieving write buffer entries
220 * from their address stored in L2P. The address is divided into two
221 * parts - IO channel offset poining at specific IO channel (within this
222 * array) and entry offset pointing at specific entry within that IO
225 struct ftl_io_channel
**ioch_array
;
226 TAILQ_HEAD(, ftl_io_channel
) ioch_queue
;
227 uint64_t num_io_channels
;
228 /* Value required to shift address of a write buffer entry to retrieve
229 * the IO channel it's part of. The other part of the address describes
230 * the offset of an entry within the IO channel's entry array.
234 /* Write buffer batches */
235 #define FTL_BATCH_COUNT 4096
236 struct ftl_batch batch_array
[FTL_BATCH_COUNT
];
237 /* Iovec buffer used by batches */
238 struct iovec
*iov_buf
;
239 /* Batch currently being filled */
240 struct ftl_batch
*current_batch
;
241 /* Full and ready to be sent batches. A batch is put on this queue in
242 * case it's already filled, but cannot be sent.
244 TAILQ_HEAD(, ftl_batch
) pending_batches
;
245 TAILQ_HEAD(, ftl_batch
) free_batches
;
248 STAILQ_ENTRY(spdk_ftl_dev
) stailq
;
251 struct ftl_nv_cache_header
{
252 /* Version of the header */
254 /* UUID of the FTL device */
255 struct spdk_uuid uuid
;
256 /* Size of the non-volatile cache (in blocks) */
258 /* Contains the next address to be written after clean shutdown, invalid LBA otherwise */
259 uint64_t current_addr
;
262 /* Checksum of the header, needs to be last element */
264 } __attribute__((packed
));
266 struct ftl_media_event
{
268 struct spdk_ftl_dev
*dev
;
270 struct spdk_bdev_media_event event
;
273 typedef void (*ftl_restore_fn
)(struct ftl_restore
*, int, void *cb_arg
);
275 void ftl_apply_limits(struct spdk_ftl_dev
*dev
);
276 void ftl_io_read(struct ftl_io
*io
);
277 void ftl_io_write(struct ftl_io
*io
);
278 int ftl_flush_wbuf(struct spdk_ftl_dev
*dev
, spdk_ftl_fn cb_fn
, void *cb_arg
);
279 int ftl_current_limit(const struct spdk_ftl_dev
*dev
);
280 int ftl_invalidate_addr(struct spdk_ftl_dev
*dev
, struct ftl_addr addr
);
281 int ftl_task_core(void *ctx
);
282 int ftl_task_read(void *ctx
);
283 void ftl_process_anm_event(struct ftl_anm_event
*event
);
284 size_t ftl_tail_md_num_blocks(const struct spdk_ftl_dev
*dev
);
285 size_t ftl_tail_md_hdr_num_blocks(void);
286 size_t ftl_vld_map_num_blocks(const struct spdk_ftl_dev
*dev
);
287 size_t ftl_lba_map_num_blocks(const struct spdk_ftl_dev
*dev
);
288 size_t ftl_head_md_num_blocks(const struct spdk_ftl_dev
*dev
);
289 int ftl_restore_md(struct spdk_ftl_dev
*dev
, ftl_restore_fn cb
, void *cb_arg
);
290 int ftl_restore_device(struct ftl_restore
*restore
, ftl_restore_fn cb
, void *cb_arg
);
291 void ftl_restore_nv_cache(struct ftl_restore
*restore
, ftl_restore_fn cb
, void *cb_arg
);
292 int ftl_band_set_direct_access(struct ftl_band
*band
, bool access
);
293 bool ftl_addr_is_written(struct ftl_band
*band
, struct ftl_addr addr
);
294 int ftl_flush_active_bands(struct spdk_ftl_dev
*dev
, spdk_ftl_fn cb_fn
, void *cb_arg
);
295 int ftl_nv_cache_write_header(struct ftl_nv_cache
*nv_cache
, bool shutdown
,
296 spdk_bdev_io_completion_cb cb_fn
, void *cb_arg
);
297 int ftl_nv_cache_scrub(struct ftl_nv_cache
*nv_cache
, spdk_bdev_io_completion_cb cb_fn
,
299 void ftl_get_media_events(struct spdk_ftl_dev
*dev
);
300 int ftl_io_channel_poll(void *arg
);
301 void ftl_evict_cache_entry(struct spdk_ftl_dev
*dev
, struct ftl_wbuf_entry
*entry
);
302 struct spdk_io_channel
*ftl_get_io_channel(const struct spdk_ftl_dev
*dev
);
303 struct ftl_io_channel
*ftl_io_channel_get_ctx(struct spdk_io_channel
*ioch
);
306 #define ftl_to_addr(address) \
307 (struct ftl_addr) { .offset = (uint64_t)(address) }
309 #define ftl_to_addr_packed(address) \
310 (struct ftl_addr) { .pack.offset = (uint32_t)(address) }
312 static inline struct spdk_thread
*
313 ftl_get_core_thread(const struct spdk_ftl_dev
*dev
)
315 return dev
->core_thread
;
319 ftl_get_num_bands(const struct spdk_ftl_dev
*dev
)
321 return dev
->num_bands
;
325 ftl_get_num_punits(const struct spdk_ftl_dev
*dev
)
327 return spdk_bdev_get_optimal_open_zones(spdk_bdev_desc_get_bdev(dev
->base_bdev_desc
));
331 ftl_get_num_zones(const struct spdk_ftl_dev
*dev
)
333 return ftl_get_num_bands(dev
) * ftl_get_num_punits(dev
);
337 ftl_get_num_blocks_in_zone(const struct spdk_ftl_dev
*dev
)
339 return spdk_bdev_get_zone_size(spdk_bdev_desc_get_bdev(dev
->base_bdev_desc
));
342 static inline uint64_t
343 ftl_get_num_blocks_in_band(const struct spdk_ftl_dev
*dev
)
345 return ftl_get_num_punits(dev
) * ftl_get_num_blocks_in_zone(dev
);
348 static inline uint64_t
349 ftl_addr_get_zone_slba(const struct spdk_ftl_dev
*dev
, struct ftl_addr addr
)
351 return addr
.offset
-= (addr
.offset
% ftl_get_num_blocks_in_zone(dev
));
354 static inline uint64_t
355 ftl_addr_get_band(const struct spdk_ftl_dev
*dev
, struct ftl_addr addr
)
357 return addr
.offset
/ ftl_get_num_blocks_in_band(dev
);
360 static inline uint64_t
361 ftl_addr_get_punit(const struct spdk_ftl_dev
*dev
, struct ftl_addr addr
)
363 return (addr
.offset
/ ftl_get_num_blocks_in_zone(dev
)) % ftl_get_num_punits(dev
);
366 static inline uint64_t
367 ftl_addr_get_zone_offset(const struct spdk_ftl_dev
*dev
, struct ftl_addr addr
)
369 return addr
.offset
% ftl_get_num_blocks_in_zone(dev
);
373 ftl_vld_map_size(const struct spdk_ftl_dev
*dev
)
375 return (size_t)spdk_divide_round_up(ftl_get_num_blocks_in_band(dev
), CHAR_BIT
);
379 ftl_addr_packed(const struct spdk_ftl_dev
*dev
)
381 return dev
->addr_len
< 32;
385 ftl_l2p_lba_persist(const struct spdk_ftl_dev
*dev
, uint64_t lba
)
387 #ifdef SPDK_CONFIG_PMDK
388 size_t ftl_addr_size
= ftl_addr_packed(dev
) ? 4 : 8;
389 pmem_persist((char *)dev
->l2p
+ (lba
* ftl_addr_size
), ftl_addr_size
);
390 #else /* SPDK_CONFIG_PMDK */
391 SPDK_ERRLOG("Libpmem not available, cannot flush l2p to pmem\n");
393 #endif /* SPDK_CONFIG_PMDK */
397 ftl_addr_invalid(struct ftl_addr addr
)
399 return addr
.offset
== ftl_to_addr(FTL_ADDR_INVALID
).offset
;
403 ftl_addr_cached(struct ftl_addr addr
)
405 return !ftl_addr_invalid(addr
) && addr
.cached
;
408 static inline struct ftl_addr
409 ftl_addr_to_packed(const struct spdk_ftl_dev
*dev
, struct ftl_addr addr
)
411 struct ftl_addr p
= {};
413 if (ftl_addr_invalid(addr
)) {
414 p
= ftl_to_addr_packed(FTL_ADDR_INVALID
);
415 } else if (ftl_addr_cached(addr
)) {
417 p
.pack
.cache_offset
= (uint32_t) addr
.cache_offset
;
419 p
.pack
.offset
= (uint32_t) addr
.offset
;
425 static inline struct ftl_addr
426 ftl_addr_from_packed(const struct spdk_ftl_dev
*dev
, struct ftl_addr p
)
428 struct ftl_addr addr
= {};
430 if (p
.pack
.offset
== (uint32_t)FTL_ADDR_INVALID
) {
431 addr
= ftl_to_addr(FTL_ADDR_INVALID
);
432 } else if (p
.pack
.cached
) {
434 addr
.cache_offset
= p
.pack
.cache_offset
;
442 #define _ftl_l2p_set(l2p, off, val, bits) \
443 __atomic_store_n(((uint##bits##_t *)(l2p)) + (off), val, __ATOMIC_SEQ_CST)
445 #define _ftl_l2p_set32(l2p, off, val) \
446 _ftl_l2p_set(l2p, off, val, 32)
448 #define _ftl_l2p_set64(l2p, off, val) \
449 _ftl_l2p_set(l2p, off, val, 64)
451 #define _ftl_l2p_get(l2p, off, bits) \
452 __atomic_load_n(((uint##bits##_t *)(l2p)) + (off), __ATOMIC_SEQ_CST)
454 #define _ftl_l2p_get32(l2p, off) \
455 _ftl_l2p_get(l2p, off, 32)
457 #define _ftl_l2p_get64(l2p, off) \
458 _ftl_l2p_get(l2p, off, 64)
460 #define ftl_addr_cmp(p1, p2) \
461 ((p1).offset == (p2).offset)
464 ftl_l2p_set(struct spdk_ftl_dev
*dev
, uint64_t lba
, struct ftl_addr addr
)
466 assert(dev
->num_lbas
> lba
);
468 if (ftl_addr_packed(dev
)) {
469 _ftl_l2p_set32(dev
->l2p
, lba
, ftl_addr_to_packed(dev
, addr
).offset
);
471 _ftl_l2p_set64(dev
->l2p
, lba
, addr
.offset
);
474 if (dev
->l2p_pmem_len
!= 0) {
475 ftl_l2p_lba_persist(dev
, lba
);
479 static inline struct ftl_addr
480 ftl_l2p_get(struct spdk_ftl_dev
*dev
, uint64_t lba
)
482 assert(dev
->num_lbas
> lba
);
484 if (ftl_addr_packed(dev
)) {
485 return ftl_addr_from_packed(dev
, ftl_to_addr_packed(
486 _ftl_l2p_get32(dev
->l2p
, lba
)));
488 return ftl_to_addr(_ftl_l2p_get64(dev
->l2p
, lba
));
493 ftl_dev_has_nv_cache(const struct spdk_ftl_dev
*dev
)
495 return dev
->nv_cache
.bdev_desc
!= NULL
;
498 #define FTL_NV_CACHE_HEADER_VERSION (1)
499 #define FTL_NV_CACHE_DATA_OFFSET (1)
500 #define FTL_NV_CACHE_PHASE_OFFSET (62)
501 #define FTL_NV_CACHE_PHASE_COUNT (4)
502 #define FTL_NV_CACHE_PHASE_MASK (3ULL << FTL_NV_CACHE_PHASE_OFFSET)
503 #define FTL_NV_CACHE_LBA_INVALID (FTL_LBA_INVALID & ~FTL_NV_CACHE_PHASE_MASK)
506 ftl_nv_cache_phase_is_valid(unsigned int phase
)
508 return phase
> 0 && phase
<= 3;
511 static inline unsigned int
512 ftl_nv_cache_next_phase(unsigned int current
)
514 static const unsigned int phases
[] = { 0, 2, 3, 1 };
515 assert(ftl_nv_cache_phase_is_valid(current
));
516 return phases
[current
];
519 static inline unsigned int
520 ftl_nv_cache_prev_phase(unsigned int current
)
522 static const unsigned int phases
[] = { 0, 3, 1, 2 };
523 assert(ftl_nv_cache_phase_is_valid(current
));
524 return phases
[current
];
527 static inline uint64_t
528 ftl_nv_cache_pack_lba(uint64_t lba
, unsigned int phase
)
530 assert(ftl_nv_cache_phase_is_valid(phase
));
531 return (lba
& ~FTL_NV_CACHE_PHASE_MASK
) | ((uint64_t)phase
<< FTL_NV_CACHE_PHASE_OFFSET
);
535 ftl_nv_cache_unpack_lba(uint64_t in_lba
, uint64_t *out_lba
, unsigned int *phase
)
537 *out_lba
= in_lba
& ~FTL_NV_CACHE_PHASE_MASK
;
538 *phase
= (in_lba
& FTL_NV_CACHE_PHASE_MASK
) >> FTL_NV_CACHE_PHASE_OFFSET
;
540 /* If the phase is invalid the block wasn't written yet, so treat the LBA as invalid too */
541 if (!ftl_nv_cache_phase_is_valid(*phase
) || *out_lba
== FTL_NV_CACHE_LBA_INVALID
) {
542 *out_lba
= FTL_LBA_INVALID
;
547 ftl_is_append_supported(const struct spdk_ftl_dev
*dev
)
549 return dev
->conf
.use_append
;
552 #endif /* FTL_CORE_H */