ceph/src/spdk/lib/ftl/ftl_core.h

   1 /*-
   2  *   BSD LICENSE
   3  *
   4  *   Copyright (c) Intel Corporation.
   5  *   All rights reserved.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following conditions
   9  *   are met:
  10  *
  11  *     * Redistributions of source code must retain the above copyright
  12  *       notice, this list of conditions and the following disclaimer.
  13  *     * Redistributions in binary form must reproduce the above copyright
  14  *       notice, this list of conditions and the following disclaimer in
  15  *       the documentation and/or other materials provided with the
  16  *       distribution.
  17  *     * Neither the name of Intel Corporation nor the names of its
  18  *       contributors may be used to endorse or promote products derived
  19  *       from this software without specific prior written permission.
  20  *
  21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  32  */
  33
  34 #ifndef FTL_CORE_H
  35 #define FTL_CORE_H
  36
  37 #include "spdk/stdinc.h"
  38 #include "spdk/uuid.h"
  39 #include "spdk/thread.h"
  40 #include "spdk/util.h"
  41 #include "spdk_internal/log.h"
  42 #include "spdk/likely.h"
  43 #include "spdk/queue.h"
  44 #include "spdk/ftl.h"
  45 #include "spdk/bdev.h"
  46 #include "spdk/bdev_zone.h"
  47
  48 #include "ftl_addr.h"
  49 #include "ftl_io.h"
  50 #include "ftl_trace.h"
  51
  52 #ifdef SPDK_CONFIG_PMDK
  53 #include "libpmem.h"
  54 #endif /* SPDK_CONFIG_PMDK */
  55
  56 struct spdk_ftl_dev;
  57 struct ftl_band;
  58 struct ftl_zone;
  59 struct ftl_io;
  60 struct ftl_restore;
  61 struct ftl_wptr;
  62 struct ftl_flush;
  63 struct ftl_reloc;
  64 struct ftl_anm_event;
  65 struct ftl_band_flush;
  66
  67 struct ftl_stats {
  68         /* Number of writes scheduled directly by the user */
  69         uint64_t                                write_user;
  70
  71         /* Total number of writes */
  72         uint64_t                                write_total;
  73
  74         /* Traces */
  75         struct ftl_trace                        trace;
  76
  77         /* Number of limits applied */
  78         uint64_t                                limits[SPDK_FTL_LIMIT_MAX];
  79 };
  80
  81 struct ftl_global_md {
  82         /* Device instance */
  83         struct spdk_uuid                        uuid;
  84         /* Size of the l2p table */
  85         uint64_t                                num_lbas;
  86 };
  87
  88 struct ftl_nv_cache {
  89         /* Write buffer cache bdev */
  90         struct spdk_bdev_desc                   *bdev_desc;
  91         /* Write pointer */
  92         uint64_t                                current_addr;
  93         /* Number of available blocks left */
  94         uint64_t                                num_available;
  95         /* Maximum number of blocks */
  96         uint64_t                                num_data_blocks;
  97         /*
  98          * Phase of the current cycle of writes. Each time whole cache area is filled, the phase is
  99          * advanced. Current phase is saved in every IO's metadata, as well as in the header saved
 100          * in the first sector. By looking at the phase of each block, it's possible to find the
 101          * oldest block and replay the order of the writes when recovering the data from the cache.
 102          */
 103         unsigned int                            phase;
 104         /* Indicates that the data can be written to the cache */
 105         bool                                    ready;
 106         /* Metadata pool */
 107         struct spdk_mempool                     *md_pool;
 108         /* DMA buffer for writing the header */
 109         void                                    *dma_buf;
 110         /* Cache lock */
 111         pthread_spinlock_t                      lock;
 112 };
 113
 114 struct ftl_batch {
 115         /* Queue of write buffer entries, can reach up to xfer_size entries */
 116         TAILQ_HEAD(, ftl_wbuf_entry)            entries;
 117         /* Number of entries in the queue above */
 118         uint32_t                                num_entries;
 119         /* Index within spdk_ftl_dev.batch_array */
 120         uint32_t                                index;
 121         struct iovec                            *iov;
 122         void                                    *metadata;
 123         TAILQ_ENTRY(ftl_batch)                  tailq;
 124 };
 125
 126 struct spdk_ftl_dev {
 127         /* Device instance */
 128         struct spdk_uuid                        uuid;
 129         /* Device name */
 130         char                                    *name;
 131         /* Configuration */
 132         struct spdk_ftl_conf                    conf;
 133
 134         /* Indicates the device is fully initialized */
 135         int                                     initialized;
 136         /* Indicates the device is about to be stopped */
 137         int                                     halt;
 138         /* Indicates the device is about to start stopping - use to handle multiple stop request */
 139         bool                                    halt_started;
 140
 141         /* Underlying device */
 142         struct spdk_bdev_desc                   *base_bdev_desc;
 143
 144         /* Non-volatile write buffer cache */
 145         struct ftl_nv_cache                     nv_cache;
 146
 147         /* LBA map memory pool */
 148         struct spdk_mempool                     *lba_pool;
 149
 150         /* LBA map requests pool */
 151         struct spdk_mempool                     *lba_request_pool;
 152
 153         /* Media management events pool */
 154         struct spdk_mempool                     *media_events_pool;
 155
 156         /* Statistics */
 157         struct ftl_stats                        stats;
 158
 159         /* Current sequence number */
 160         uint64_t                                seq;
 161
 162         /* Array of bands */
 163         struct ftl_band                         *bands;
 164         /* Number of operational bands */
 165         size_t                                  num_bands;
 166         /* Next write band */
 167         struct ftl_band                         *next_band;
 168         /* Free band list */
 169         LIST_HEAD(, ftl_band)                   free_bands;
 170         /* Closed bands list */
 171         LIST_HEAD(, ftl_band)                   shut_bands;
 172         /* Number of free bands */
 173         size_t                                  num_free;
 174
 175         /* List of write pointers */
 176         LIST_HEAD(, ftl_wptr)                   wptr_list;
 177
 178         /* Logical -> physical table */
 179         void                                    *l2p;
 180         /* Size of the l2p table */
 181         uint64_t                                num_lbas;
 182         /* Size of pages mmapped for l2p, valid only for mapping on persistent memory */
 183         size_t                                  l2p_pmem_len;
 184
 185         /* Address size */
 186         size_t                                  addr_len;
 187
 188         /* Flush list */
 189         LIST_HEAD(, ftl_flush)                  flush_list;
 190         /* List of band flush requests */
 191         LIST_HEAD(, ftl_band_flush)             band_flush_list;
 192
 193         /* Device specific md buffer */
 194         struct ftl_global_md                    global_md;
 195
 196         /* Metadata size */
 197         size_t                                  md_size;
 198         void                                    *md_buf;
 199
 200         /* Transfer unit size */
 201         size_t                                  xfer_size;
 202
 203         /* Current user write limit */
 204         int                                     limit;
 205
 206         /* Inflight IO operations */
 207         uint32_t                                num_inflight;
 208
 209         /* Manages data relocation */
 210         struct ftl_reloc                        *reloc;
 211
 212         /* Thread on which the poller is running */
 213         struct spdk_thread                      *core_thread;
 214         /* IO channel */
 215         struct spdk_io_channel                  *ioch;
 216         /* Poller */
 217         struct spdk_poller                      *core_poller;
 218
 219         /* IO channel array provides means for retrieving write buffer entries
 220          * from their address stored in L2P.  The address is divided into two
 221          * parts - IO channel offset poining at specific IO channel (within this
 222          * array) and entry offset pointing at specific entry within that IO
 223          * channel.
 224          */
 225         struct ftl_io_channel                   **ioch_array;
 226         TAILQ_HEAD(, ftl_io_channel)            ioch_queue;
 227         uint64_t                                num_io_channels;
 228         /* Value required to shift address of a write buffer entry to retrieve
 229          * the IO channel it's part of.  The other part of the address describes
 230          * the offset of an entry within the IO channel's entry array.
 231          */
 232         uint64_t                                ioch_shift;
 233
 234         /* Write buffer batches */
 235 #define FTL_BATCH_COUNT 4096
 236         struct ftl_batch                        batch_array[FTL_BATCH_COUNT];
 237         /* Iovec buffer used by batches */
 238         struct iovec                            *iov_buf;
 239         /* Batch currently being filled  */
 240         struct ftl_batch                        *current_batch;
 241         /* Full and ready to be sent batches. A batch is put on this queue in
 242          * case it's already filled, but cannot be sent.
 243          */
 244         TAILQ_HEAD(, ftl_batch)                 pending_batches;
 245         TAILQ_HEAD(, ftl_batch)                 free_batches;
 246
 247         /* Devices' list */
 248         STAILQ_ENTRY(spdk_ftl_dev)              stailq;
 249 };
 250
 251 struct ftl_nv_cache_header {
 252         /* Version of the header */
 253         uint32_t                                version;
 254         /* UUID of the FTL device */
 255         struct spdk_uuid                        uuid;
 256         /* Size of the non-volatile cache (in blocks) */
 257         uint64_t                                size;
 258         /* Contains the next address to be written after clean shutdown, invalid LBA otherwise */
 259         uint64_t                                current_addr;
 260         /* Current phase */
 261         uint8_t                                 phase;
 262         /* Checksum of the header, needs to be last element */
 263         uint32_t                                checksum;
 264 } __attribute__((packed));
 265
 266 struct ftl_media_event {
 267         /* Owner */
 268         struct spdk_ftl_dev                     *dev;
 269         /* Media event */
 270         struct spdk_bdev_media_event            event;
 271 };
 272
 273 typedef void (*ftl_restore_fn)(struct ftl_restore *, int, void *cb_arg);
 274
 275 void    ftl_apply_limits(struct spdk_ftl_dev *dev);
 276 void    ftl_io_read(struct ftl_io *io);
 277 void    ftl_io_write(struct ftl_io *io);
 278 int     ftl_flush_wbuf(struct spdk_ftl_dev *dev, spdk_ftl_fn cb_fn, void *cb_arg);
 279 int     ftl_current_limit(const struct spdk_ftl_dev *dev);
 280 int     ftl_invalidate_addr(struct spdk_ftl_dev *dev, struct ftl_addr addr);
 281 int     ftl_task_core(void *ctx);
 282 int     ftl_task_read(void *ctx);
 283 void    ftl_process_anm_event(struct ftl_anm_event *event);
 284 size_t  ftl_tail_md_num_blocks(const struct spdk_ftl_dev *dev);
 285 size_t  ftl_tail_md_hdr_num_blocks(void);
 286 size_t  ftl_vld_map_num_blocks(const struct spdk_ftl_dev *dev);
 287 size_t  ftl_lba_map_num_blocks(const struct spdk_ftl_dev *dev);
 288 size_t  ftl_head_md_num_blocks(const struct spdk_ftl_dev *dev);
 289 int     ftl_restore_md(struct spdk_ftl_dev *dev, ftl_restore_fn cb, void *cb_arg);
 290 int     ftl_restore_device(struct ftl_restore *restore, ftl_restore_fn cb, void *cb_arg);
 291 void    ftl_restore_nv_cache(struct ftl_restore *restore, ftl_restore_fn cb, void *cb_arg);
 292 int     ftl_band_set_direct_access(struct ftl_band *band, bool access);
 293 bool    ftl_addr_is_written(struct ftl_band *band, struct ftl_addr addr);
 294 int     ftl_flush_active_bands(struct spdk_ftl_dev *dev, spdk_ftl_fn cb_fn, void *cb_arg);
 295 int     ftl_nv_cache_write_header(struct ftl_nv_cache *nv_cache, bool shutdown,
 296                                   spdk_bdev_io_completion_cb cb_fn, void *cb_arg);
 297 int     ftl_nv_cache_scrub(struct ftl_nv_cache *nv_cache, spdk_bdev_io_completion_cb cb_fn,
 298                            void *cb_arg);
 299 void    ftl_get_media_events(struct spdk_ftl_dev *dev);
 300 int     ftl_io_channel_poll(void *arg);
 301 void    ftl_evict_cache_entry(struct spdk_ftl_dev *dev, struct ftl_wbuf_entry *entry);
 302 struct spdk_io_channel *ftl_get_io_channel(const struct spdk_ftl_dev *dev);
 303 struct ftl_io_channel *ftl_io_channel_get_ctx(struct spdk_io_channel *ioch);
 304
 305
 306 #define ftl_to_addr(address) \
 307         (struct ftl_addr) { .offset = (uint64_t)(address) }
 308
 309 #define ftl_to_addr_packed(address) \
 310         (struct ftl_addr) { .pack.offset = (uint32_t)(address) }
 311
 312 static inline struct spdk_thread *
 313 ftl_get_core_thread(const struct spdk_ftl_dev *dev)
 314 {
 315         return dev->core_thread;
 316 }
 317
 318 static inline size_t
 319 ftl_get_num_bands(const struct spdk_ftl_dev *dev)
 320 {
 321         return dev->num_bands;
 322 }
 323
 324 static inline size_t
 325 ftl_get_num_punits(const struct spdk_ftl_dev *dev)
 326 {
 327         return spdk_bdev_get_optimal_open_zones(spdk_bdev_desc_get_bdev(dev->base_bdev_desc));
 328 }
 329
 330 static inline size_t
 331 ftl_get_num_zones(const struct spdk_ftl_dev *dev)
 332 {
 333         return ftl_get_num_bands(dev) * ftl_get_num_punits(dev);
 334 }
 335
 336 static inline size_t
 337 ftl_get_num_blocks_in_zone(const struct spdk_ftl_dev *dev)
 338 {
 339         return spdk_bdev_get_zone_size(spdk_bdev_desc_get_bdev(dev->base_bdev_desc));
 340 }
 341
 342 static inline uint64_t
 343 ftl_get_num_blocks_in_band(const struct spdk_ftl_dev *dev)
 344 {
 345         return ftl_get_num_punits(dev) * ftl_get_num_blocks_in_zone(dev);
 346 }
 347
 348 static inline uint64_t
 349 ftl_addr_get_zone_slba(const struct spdk_ftl_dev *dev, struct ftl_addr addr)
 350 {
 351         return addr.offset -= (addr.offset % ftl_get_num_blocks_in_zone(dev));
 352 }
 353
 354 static inline uint64_t
 355 ftl_addr_get_band(const struct spdk_ftl_dev *dev, struct ftl_addr addr)
 356 {
 357         return addr.offset / ftl_get_num_blocks_in_band(dev);
 358 }
 359
 360 static inline uint64_t
 361 ftl_addr_get_punit(const struct spdk_ftl_dev *dev, struct ftl_addr addr)
 362 {
 363         return (addr.offset / ftl_get_num_blocks_in_zone(dev)) % ftl_get_num_punits(dev);
 364 }
 365
 366 static inline uint64_t
 367 ftl_addr_get_zone_offset(const struct spdk_ftl_dev *dev, struct ftl_addr addr)
 368 {
 369         return addr.offset % ftl_get_num_blocks_in_zone(dev);
 370 }
 371
 372 static inline size_t
 373 ftl_vld_map_size(const struct spdk_ftl_dev *dev)
 374 {
 375         return (size_t)spdk_divide_round_up(ftl_get_num_blocks_in_band(dev), CHAR_BIT);
 376 }
 377
 378 static inline int
 379 ftl_addr_packed(const struct spdk_ftl_dev *dev)
 380 {
 381         return dev->addr_len < 32;
 382 }
 383
 384 static inline void
 385 ftl_l2p_lba_persist(const struct spdk_ftl_dev *dev, uint64_t lba)
 386 {
 387 #ifdef SPDK_CONFIG_PMDK
 388         size_t ftl_addr_size = ftl_addr_packed(dev) ? 4 : 8;
 389         pmem_persist((char *)dev->l2p + (lba * ftl_addr_size), ftl_addr_size);
 390 #else /* SPDK_CONFIG_PMDK */
 391         SPDK_ERRLOG("Libpmem not available, cannot flush l2p to pmem\n");
 392         assert(0);
 393 #endif /* SPDK_CONFIG_PMDK */
 394 }
 395
 396 static inline int
 397 ftl_addr_invalid(struct ftl_addr addr)
 398 {
 399         return addr.offset == ftl_to_addr(FTL_ADDR_INVALID).offset;
 400 }
 401
 402 static inline int
 403 ftl_addr_cached(struct ftl_addr addr)
 404 {
 405         return !ftl_addr_invalid(addr) && addr.cached;
 406 }
 407
 408 static inline struct ftl_addr
 409 ftl_addr_to_packed(const struct spdk_ftl_dev *dev, struct ftl_addr addr)
 410 {
 411         struct ftl_addr p = {};
 412
 413         if (ftl_addr_invalid(addr)) {
 414                 p = ftl_to_addr_packed(FTL_ADDR_INVALID);
 415         } else if (ftl_addr_cached(addr)) {
 416                 p.pack.cached = 1;
 417                 p.pack.cache_offset = (uint32_t) addr.cache_offset;
 418         } else {
 419                 p.pack.offset = (uint32_t) addr.offset;
 420         }
 421
 422         return p;
 423 }
 424
 425 static inline struct ftl_addr
 426 ftl_addr_from_packed(const struct spdk_ftl_dev *dev, struct ftl_addr p)
 427 {
 428         struct ftl_addr addr = {};
 429
 430         if (p.pack.offset == (uint32_t)FTL_ADDR_INVALID) {
 431                 addr = ftl_to_addr(FTL_ADDR_INVALID);
 432         } else if (p.pack.cached) {
 433                 addr.cached = 1;
 434                 addr.cache_offset = p.pack.cache_offset;
 435         } else {
 436                 addr = p;
 437         }
 438
 439         return addr;
 440 }
 441
 442 #define _ftl_l2p_set(l2p, off, val, bits) \
 443         __atomic_store_n(((uint##bits##_t *)(l2p)) + (off), val, __ATOMIC_SEQ_CST)
 444
 445 #define _ftl_l2p_set32(l2p, off, val) \
 446         _ftl_l2p_set(l2p, off, val, 32)
 447
 448 #define _ftl_l2p_set64(l2p, off, val) \
 449         _ftl_l2p_set(l2p, off, val, 64)
 450
 451 #define _ftl_l2p_get(l2p, off, bits) \
 452         __atomic_load_n(((uint##bits##_t *)(l2p)) + (off), __ATOMIC_SEQ_CST)
 453
 454 #define _ftl_l2p_get32(l2p, off) \
 455         _ftl_l2p_get(l2p, off, 32)
 456
 457 #define _ftl_l2p_get64(l2p, off) \
 458         _ftl_l2p_get(l2p, off, 64)
 459
 460 #define ftl_addr_cmp(p1, p2) \
 461         ((p1).offset == (p2).offset)
 462
 463 static inline void
 464 ftl_l2p_set(struct spdk_ftl_dev *dev, uint64_t lba, struct ftl_addr addr)
 465 {
 466         assert(dev->num_lbas > lba);
 467
 468         if (ftl_addr_packed(dev)) {
 469                 _ftl_l2p_set32(dev->l2p, lba, ftl_addr_to_packed(dev, addr).offset);
 470         } else {
 471                 _ftl_l2p_set64(dev->l2p, lba, addr.offset);
 472         }
 473
 474         if (dev->l2p_pmem_len != 0) {
 475                 ftl_l2p_lba_persist(dev, lba);
 476         }
 477 }
 478
 479 static inline struct ftl_addr
 480 ftl_l2p_get(struct spdk_ftl_dev *dev, uint64_t lba)
 481 {
 482         assert(dev->num_lbas > lba);
 483
 484         if (ftl_addr_packed(dev)) {
 485                 return ftl_addr_from_packed(dev, ftl_to_addr_packed(
 486                                                     _ftl_l2p_get32(dev->l2p, lba)));
 487         } else {
 488                 return ftl_to_addr(_ftl_l2p_get64(dev->l2p, lba));
 489         }
 490 }
 491
 492 static inline bool
 493 ftl_dev_has_nv_cache(const struct spdk_ftl_dev *dev)
 494 {
 495         return dev->nv_cache.bdev_desc != NULL;
 496 }
 497
 498 #define FTL_NV_CACHE_HEADER_VERSION     (1)
 499 #define FTL_NV_CACHE_DATA_OFFSET        (1)
 500 #define FTL_NV_CACHE_PHASE_OFFSET       (62)
 501 #define FTL_NV_CACHE_PHASE_COUNT        (4)
 502 #define FTL_NV_CACHE_PHASE_MASK         (3ULL << FTL_NV_CACHE_PHASE_OFFSET)
 503 #define FTL_NV_CACHE_LBA_INVALID        (FTL_LBA_INVALID & ~FTL_NV_CACHE_PHASE_MASK)
 504
 505 static inline bool
 506 ftl_nv_cache_phase_is_valid(unsigned int phase)
 507 {
 508         return phase > 0 && phase <= 3;
 509 }
 510
 511 static inline unsigned int
 512 ftl_nv_cache_next_phase(unsigned int current)
 513 {
 514         static const unsigned int phases[] = { 0, 2, 3, 1 };
 515         assert(ftl_nv_cache_phase_is_valid(current));
 516         return phases[current];
 517 }
 518
 519 static inline unsigned int
 520 ftl_nv_cache_prev_phase(unsigned int current)
 521 {
 522         static const unsigned int phases[] = { 0, 3, 1, 2 };
 523         assert(ftl_nv_cache_phase_is_valid(current));
 524         return phases[current];
 525 }
 526
 527 static inline uint64_t
 528 ftl_nv_cache_pack_lba(uint64_t lba, unsigned int phase)
 529 {
 530         assert(ftl_nv_cache_phase_is_valid(phase));
 531         return (lba & ~FTL_NV_CACHE_PHASE_MASK) | ((uint64_t)phase << FTL_NV_CACHE_PHASE_OFFSET);
 532 }
 533
 534 static inline void
 535 ftl_nv_cache_unpack_lba(uint64_t in_lba, uint64_t *out_lba, unsigned int *phase)
 536 {
 537         *out_lba = in_lba & ~FTL_NV_CACHE_PHASE_MASK;
 538         *phase = (in_lba & FTL_NV_CACHE_PHASE_MASK) >> FTL_NV_CACHE_PHASE_OFFSET;
 539
 540         /* If the phase is invalid the block wasn't written yet, so treat the LBA as invalid too */
 541         if (!ftl_nv_cache_phase_is_valid(*phase) || *out_lba == FTL_NV_CACHE_LBA_INVALID) {
 542                 *out_lba = FTL_LBA_INVALID;
 543         }
 544 }
 545
 546 static inline bool
 547 ftl_is_append_supported(const struct spdk_ftl_dev *dev)
 548 {
 549         return dev->conf.use_append;
 550 }
 551
 552 #endif /* FTL_CORE_H */