ceph/src/spdk/lib/ftl/ftl_init.c

   1 /*-
   2  *   BSD LICENSE
   3  *
   4  *   Copyright (c) Intel Corporation.
   5  *   All rights reserved.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following conditions
   9  *   are met:
  10  *
  11  *     * Redistributions of source code must retain the above copyright
  12  *       notice, this list of conditions and the following disclaimer.
  13  *     * Redistributions in binary form must reproduce the above copyright
  14  *       notice, this list of conditions and the following disclaimer in
  15  *       the documentation and/or other materials provided with the
  16  *       distribution.
  17  *     * Neither the name of Intel Corporation nor the names of its
  18  *       contributors may be used to endorse or promote products derived
  19  *       from this software without specific prior written permission.
  20  *
  21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  32  */
  33
  34 #include "spdk/stdinc.h"
  35 #include "spdk/nvme.h"
  36 #include "spdk/io_channel.h"
  37 #include "spdk/bdev_module.h"
  38 #include "spdk_internal/log.h"
  39 #include "spdk/ftl.h"
  40 #include "ftl_core.h"
  41 #include "ftl_anm.h"
  42 #include "ftl_io.h"
  43 #include "ftl_reloc.h"
  44 #include "ftl_rwb.h"
  45 #include "ftl_band.h"
  46 #include "ftl_debug.h"
  47
  48 #define FTL_CORE_RING_SIZE      4096
  49 #define FTL_INIT_TIMEOUT        30
  50 #define FTL_NSID                1
  51
  52 #define ftl_range_intersect(s1, e1, s2, e2) \
  53         ((s1) <= (e2) && (s2) <= (e1))
  54
  55 struct ftl_admin_cmpl {
  56         struct spdk_nvme_cpl                    status;
  57
  58         int                                     complete;
  59 };
  60
  61 static STAILQ_HEAD(, spdk_ftl_dev)      g_ftl_queue = STAILQ_HEAD_INITIALIZER(g_ftl_queue);
  62 static pthread_mutex_t                  g_ftl_queue_lock = PTHREAD_MUTEX_INITIALIZER;
  63 static const struct spdk_ftl_conf       g_default_conf = {
  64         .defrag = {
  65                 .limits = {
  66                         /* 5 free bands  / 0 % host writes */
  67                         [SPDK_FTL_LIMIT_CRIT]  = { .thld = 5,  .limit = 0 },
  68                         /* 10 free bands / 5 % host writes */
  69                         [SPDK_FTL_LIMIT_HIGH]  = { .thld = 10, .limit = 5 },
  70                         /* 20 free bands / 40 % host writes */
  71                         [SPDK_FTL_LIMIT_LOW]   = { .thld = 20, .limit = 40 },
  72                         /* 40 free bands / 100 % host writes - defrag starts running */
  73                         [SPDK_FTL_LIMIT_START] = { .thld = 40, .limit = 100 },
  74                 },
  75                 /* 10 percent valid lbks */
  76                 .invalid_thld = 10,
  77         },
  78         /* 20% spare lbks */
  79         .lba_rsvd = 20,
  80         /* 6M write buffer */
  81         .rwb_size = 6 * 1024 * 1024,
  82         /* 90% band fill threshold */
  83         .band_thld = 90,
  84         /* Max 32 IO depth per band relocate */
  85         .max_reloc_qdepth = 32,
  86         /* Max 3 active band relocates */
  87         .max_active_relocs = 3,
  88         /* IO pool size per user thread (this should be adjusted to thread IO qdepth) */
  89         .user_io_pool_size = 2048,
  90         /* Number of interleaving units per ws_opt */
  91         /* 1 for default and 3 for 3D TLC NAND */
  92         .num_interleave_units = 1,
  93         /*
  94          * If clear ftl will return error when restoring after a dirty shutdown
  95          * If set, last band will be padded, ftl will restore based only on closed bands - this
  96          * will result in lost data after recovery.
  97          */
  98         .allow_open_bands = false,
  99 };
 100
 101 static void ftl_dev_free_sync(struct spdk_ftl_dev *dev);
 102
 103 static void
 104 ftl_admin_cb(void *ctx, const struct spdk_nvme_cpl *cpl)
 105 {
 106         struct ftl_admin_cmpl *cmpl = ctx;
 107
 108         cmpl->complete = 1;
 109         cmpl->status = *cpl;
 110 }
 111
 112 static int
 113 ftl_band_init_md(struct ftl_band *band)
 114 {
 115         struct ftl_lba_map *lba_map = &band->lba_map;
 116
 117         lba_map->vld = spdk_bit_array_create(ftl_num_band_lbks(band->dev));
 118         if (!lba_map->vld) {
 119                 return -ENOMEM;
 120         }
 121
 122         pthread_spin_init(&lba_map->lock, PTHREAD_PROCESS_PRIVATE);
 123         ftl_band_md_clear(band);
 124         return 0;
 125 }
 126
 127 static int
 128 ftl_check_conf(const struct spdk_ftl_conf *conf,
 129                const struct spdk_ocssd_geometry_data *geo)
 130 {
 131         size_t i;
 132
 133         if (conf->defrag.invalid_thld >= 100) {
 134                 return -1;
 135         }
 136         if (conf->lba_rsvd >= 100) {
 137                 return -1;
 138         }
 139         if (conf->lba_rsvd == 0) {
 140                 return -1;
 141         }
 142         if (conf->rwb_size == 0) {
 143                 return -1;
 144         }
 145         if (conf->rwb_size % FTL_BLOCK_SIZE != 0) {
 146                 return -1;
 147         }
 148         if (geo->ws_opt % conf->num_interleave_units != 0) {
 149                 return -1;
 150         }
 151
 152         for (i = 0; i < SPDK_FTL_LIMIT_MAX; ++i) {
 153                 if (conf->defrag.limits[i].limit > 100) {
 154                         return -1;
 155                 }
 156         }
 157
 158         return 0;
 159 }
 160
 161 static int
 162 ftl_check_init_opts(const struct spdk_ftl_dev_init_opts *opts,
 163                     const struct spdk_ocssd_geometry_data *geo)
 164 {
 165         struct spdk_ftl_dev *dev;
 166         size_t num_punits = geo->num_pu * geo->num_grp;
 167         int rc = 0;
 168
 169         if (opts->range.begin > opts->range.end || opts->range.end >= num_punits) {
 170                 return -1;
 171         }
 172
 173         if (ftl_check_conf(opts->conf, geo)) {
 174                 return -1;
 175         }
 176
 177         pthread_mutex_lock(&g_ftl_queue_lock);
 178
 179         STAILQ_FOREACH(dev, &g_ftl_queue, stailq) {
 180                 if (spdk_nvme_transport_id_compare(&dev->trid, &opts->trid)) {
 181                         continue;
 182                 }
 183
 184                 if (ftl_range_intersect(opts->range.begin, opts->range.end,
 185                                         dev->range.begin, dev->range.end)) {
 186                         rc = -1;
 187                         goto out;
 188                 }
 189         }
 190
 191 out:
 192         pthread_mutex_unlock(&g_ftl_queue_lock);
 193         return rc;
 194 }
 195
 196 int
 197 ftl_retrieve_chunk_info(struct spdk_ftl_dev *dev, struct ftl_ppa ppa,
 198                         struct spdk_ocssd_chunk_information_entry *info,
 199                         unsigned int num_entries)
 200 {
 201         volatile struct ftl_admin_cmpl cmpl = {};
 202         uint32_t nsid = spdk_nvme_ns_get_id(dev->ns);
 203         uint64_t offset = (ppa.grp * dev->geo.num_pu + ppa.pu) *
 204                           dev->geo.num_chk + ppa.chk;
 205
 206         if (spdk_nvme_ctrlr_cmd_get_log_page(dev->ctrlr, SPDK_OCSSD_LOG_CHUNK_INFO, nsid,
 207                                              info, num_entries * sizeof(*info),
 208                                              offset * sizeof(*info),
 209                                              ftl_admin_cb, (void *)&cmpl)) {
 210                 return -1;
 211         }
 212
 213         while (!cmpl.complete) {
 214                 spdk_nvme_ctrlr_process_admin_completions(dev->ctrlr);
 215         }
 216
 217         if (spdk_nvme_cpl_is_error(&cmpl.status)) {
 218                 SPDK_ERRLOG("Unexpected status code: [%d], status code type: [%d]\n",
 219                             cmpl.status.status.sc, cmpl.status.status.sct);
 220                 return -1;
 221         }
 222
 223         return 0;
 224 }
 225
 226 static int
 227 ftl_retrieve_punit_chunk_info(struct spdk_ftl_dev *dev, const struct ftl_punit *punit,
 228                               struct spdk_ocssd_chunk_information_entry *info)
 229 {
 230         uint32_t i = 0;
 231         unsigned int num_entries = FTL_BLOCK_SIZE / sizeof(*info);
 232         struct ftl_ppa chunk_ppa = punit->start_ppa;
 233
 234         for (i = 0; i < dev->geo.num_chk; i += num_entries, chunk_ppa.chk += num_entries) {
 235                 if (num_entries > dev->geo.num_chk - i) {
 236                         num_entries = dev->geo.num_chk - i;
 237                 }
 238
 239                 if (ftl_retrieve_chunk_info(dev, chunk_ppa, &info[i], num_entries)) {
 240                         return -1;
 241                 }
 242         }
 243
 244         return 0;
 245 }
 246
 247 static unsigned char
 248 ftl_get_chunk_state(const struct spdk_ocssd_chunk_information_entry *info)
 249 {
 250         if (info->cs.free) {
 251                 return FTL_CHUNK_STATE_FREE;
 252         }
 253
 254         if (info->cs.open) {
 255                 return FTL_CHUNK_STATE_OPEN;
 256         }
 257
 258         if (info->cs.closed) {
 259                 return FTL_CHUNK_STATE_CLOSED;
 260         }
 261
 262         if (info->cs.offline) {
 263                 return FTL_CHUNK_STATE_BAD;
 264         }
 265
 266         assert(0 && "Invalid block state");
 267         return FTL_CHUNK_STATE_BAD;
 268 }
 269
 270 static void
 271 ftl_remove_empty_bands(struct spdk_ftl_dev *dev)
 272 {
 273         struct ftl_band *band, *temp_band;
 274
 275         /* Remove band from shut_bands list to prevent further processing */
 276         /* if all blocks on this band are bad */
 277         LIST_FOREACH_SAFE(band, &dev->shut_bands, list_entry, temp_band) {
 278                 if (!band->num_chunks) {
 279                         dev->num_bands--;
 280                         LIST_REMOVE(band, list_entry);
 281                 }
 282         }
 283 }
 284
 285 static int
 286 ftl_dev_init_bands(struct spdk_ftl_dev *dev)
 287 {
 288         struct spdk_ocssd_chunk_information_entry       *info;
 289         struct ftl_band                                 *band, *pband;
 290         struct ftl_punit                                *punit;
 291         struct ftl_chunk                                *chunk;
 292         unsigned int                                    i, j;
 293         char                                            buf[128];
 294         int                                             rc = 0;
 295
 296         LIST_INIT(&dev->free_bands);
 297         LIST_INIT(&dev->shut_bands);
 298
 299         dev->num_free = 0;
 300         dev->num_bands = ftl_dev_num_bands(dev);
 301         dev->bands = calloc(ftl_dev_num_bands(dev), sizeof(*dev->bands));
 302         if (!dev->bands) {
 303                 return -1;
 304         }
 305
 306         info = calloc(dev->geo.num_chk, sizeof(*info));
 307         if (!info) {
 308                 return -1;
 309         }
 310
 311         for (i = 0; i < ftl_dev_num_bands(dev); ++i) {
 312                 band = &dev->bands[i];
 313                 band->id = i;
 314                 band->dev = dev;
 315                 band->state = FTL_BAND_STATE_CLOSED;
 316
 317                 if (LIST_EMPTY(&dev->shut_bands)) {
 318                         LIST_INSERT_HEAD(&dev->shut_bands, band, list_entry);
 319                 } else {
 320                         LIST_INSERT_AFTER(pband, band, list_entry);
 321                 }
 322                 pband = band;
 323
 324                 CIRCLEQ_INIT(&band->chunks);
 325                 band->chunk_buf = calloc(ftl_dev_num_punits(dev), sizeof(*band->chunk_buf));
 326                 if (!band->chunk_buf) {
 327                         SPDK_ERRLOG("Failed to allocate block state table for band: [%u]\n", i);
 328                         rc = -1;
 329                         goto out;
 330                 }
 331
 332                 rc = ftl_band_init_md(band);
 333                 if (rc) {
 334                         SPDK_ERRLOG("Failed to initialize metadata structures for band [%u]\n", i);
 335                         goto out;
 336                 }
 337         }
 338
 339         for (i = 0; i < ftl_dev_num_punits(dev); ++i) {
 340                 punit = &dev->punits[i];
 341
 342                 rc = ftl_retrieve_punit_chunk_info(dev, punit, info);
 343                 if (rc) {
 344                         SPDK_ERRLOG("Failed to retrieve bbt for @ppa: %s [%lu]\n",
 345                                     ftl_ppa2str(punit->start_ppa, buf, sizeof(buf)),
 346                                     ftl_ppa_addr_pack(dev, punit->start_ppa));
 347                         goto out;
 348                 }
 349
 350                 for (j = 0; j < ftl_dev_num_bands(dev); ++j) {
 351                         band = &dev->bands[j];
 352                         chunk = &band->chunk_buf[i];
 353                         chunk->pos = i;
 354                         chunk->state = ftl_get_chunk_state(&info[j]);
 355                         chunk->punit = punit;
 356                         chunk->start_ppa = punit->start_ppa;
 357                         chunk->start_ppa.chk = band->id;
 358
 359                         if (chunk->state != FTL_CHUNK_STATE_BAD) {
 360                                 band->num_chunks++;
 361                                 CIRCLEQ_INSERT_TAIL(&band->chunks, chunk, circleq);
 362                         }
 363                 }
 364         }
 365
 366         ftl_remove_empty_bands(dev);
 367 out:
 368         free(info);
 369         return rc;
 370 }
 371
 372 static int
 373 ftl_dev_init_punits(struct spdk_ftl_dev *dev)
 374 {
 375         unsigned int i, punit;
 376
 377         dev->punits = calloc(ftl_dev_num_punits(dev), sizeof(*dev->punits));
 378         if (!dev->punits) {
 379                 return -1;
 380         }
 381
 382         for (i = 0; i < ftl_dev_num_punits(dev); ++i) {
 383                 dev->punits[i].dev = dev;
 384                 punit = dev->range.begin + i;
 385
 386                 dev->punits[i].start_ppa.ppa = 0;
 387                 dev->punits[i].start_ppa.grp = punit % dev->geo.num_grp;
 388                 dev->punits[i].start_ppa.pu = punit / dev->geo.num_grp;
 389         }
 390
 391         return 0;
 392 }
 393
 394 static int
 395 ftl_dev_retrieve_geo(struct spdk_ftl_dev *dev)
 396 {
 397         volatile struct ftl_admin_cmpl cmpl = {};
 398         uint32_t nsid = spdk_nvme_ns_get_id(dev->ns);
 399
 400         if (spdk_nvme_ocssd_ctrlr_cmd_geometry(dev->ctrlr, nsid, &dev->geo, sizeof(dev->geo),
 401                                                ftl_admin_cb, (void *)&cmpl)) {
 402                 SPDK_ERRLOG("Unable to retrieve geometry\n");
 403                 return -1;
 404         }
 405
 406         /* TODO: add a timeout */
 407         while (!cmpl.complete) {
 408                 spdk_nvme_ctrlr_process_admin_completions(dev->ctrlr);
 409         }
 410
 411         if (spdk_nvme_cpl_is_error(&cmpl.status)) {
 412                 SPDK_ERRLOG("Unexpected status code: [%d], status code type: [%d]\n",
 413                             cmpl.status.status.sc, cmpl.status.status.sct);
 414                 return -1;
 415         }
 416
 417         /* TODO: add sanity checks for the geo */
 418         dev->ppa_len = dev->geo.lbaf.grp_len +
 419                        dev->geo.lbaf.pu_len +
 420                        dev->geo.lbaf.chk_len +
 421                        dev->geo.lbaf.lbk_len;
 422
 423         dev->ppaf.lbk_offset = 0;
 424         dev->ppaf.lbk_mask   = (1 << dev->geo.lbaf.lbk_len) - 1;
 425         dev->ppaf.chk_offset = dev->ppaf.lbk_offset + dev->geo.lbaf.lbk_len;
 426         dev->ppaf.chk_mask   = (1 << dev->geo.lbaf.chk_len) - 1;
 427         dev->ppaf.pu_offset  = dev->ppaf.chk_offset + dev->geo.lbaf.chk_len;
 428         dev->ppaf.pu_mask    = (1 << dev->geo.lbaf.pu_len) - 1;
 429         dev->ppaf.grp_offset = dev->ppaf.pu_offset + dev->geo.lbaf.pu_len;
 430         dev->ppaf.grp_mask   = (1 << dev->geo.lbaf.grp_len) - 1;
 431
 432         /* We're using optimal write size as our xfer size */
 433         dev->xfer_size = dev->geo.ws_opt;
 434
 435         return 0;
 436 }
 437
 438 static int
 439 ftl_dev_nvme_init(struct spdk_ftl_dev *dev, const struct spdk_ftl_dev_init_opts *opts)
 440 {
 441         uint32_t block_size;
 442
 443         dev->ctrlr = opts->ctrlr;
 444
 445         if (spdk_nvme_ctrlr_get_num_ns(dev->ctrlr) != 1) {
 446                 SPDK_ERRLOG("Unsupported number of namespaces\n");
 447                 return -1;
 448         }
 449
 450         dev->ns = spdk_nvme_ctrlr_get_ns(dev->ctrlr, FTL_NSID);
 451         dev->trid = opts->trid;
 452         dev->md_size = spdk_nvme_ns_get_md_size(dev->ns);
 453
 454         block_size = spdk_nvme_ns_get_extended_sector_size(dev->ns);
 455         if (block_size != FTL_BLOCK_SIZE) {
 456                 SPDK_ERRLOG("Unsupported block size (%"PRIu32")\n", block_size);
 457                 return -1;
 458         }
 459
 460         if (dev->md_size % sizeof(uint32_t) != 0) {
 461                 /* Metadata pointer must be dword aligned */
 462                 SPDK_ERRLOG("Unsupported metadata size (%zu)\n", dev->md_size);
 463                 return -1;
 464         }
 465
 466         return 0;
 467 }
 468
 469 static int
 470 ftl_dev_init_nv_cache(struct spdk_ftl_dev *dev, struct spdk_bdev_desc *bdev_desc)
 471 {
 472         struct spdk_bdev *bdev;
 473
 474         if (!bdev_desc) {
 475                 return 0;
 476         }
 477
 478         bdev = spdk_bdev_desc_get_bdev(bdev_desc);
 479         SPDK_INFOLOG(SPDK_LOG_FTL_INIT, "Using %s as write buffer cache\n",
 480                      spdk_bdev_get_name(bdev));
 481
 482         if (spdk_bdev_get_block_size(bdev) != FTL_BLOCK_SIZE) {
 483                 SPDK_ERRLOG("Unsupported block size (%d)\n", spdk_bdev_get_block_size(bdev));
 484                 return -1;
 485         }
 486
 487         /* The cache needs to be capable of storing at least two full bands. This requirement comes
 488          * from the fact that cache works as a protection against power loss, so before the data
 489          * inside the cache can be overwritten, the band it's stored on has to be closed.
 490          */
 491         if (spdk_bdev_get_num_blocks(bdev) < ftl_num_band_lbks(dev) * 2) {
 492                 SPDK_ERRLOG("Insufficient number of blocks for write buffer cache(%"PRIu64"\n",
 493                             spdk_bdev_get_num_blocks(bdev));
 494                 return -1;
 495         }
 496
 497         if (pthread_spin_init(&dev->nv_cache.lock, PTHREAD_PROCESS_PRIVATE)) {
 498                 SPDK_ERRLOG("Failed to initialize cache lock\n");
 499                 return -1;
 500         }
 501
 502         dev->nv_cache.bdev_desc = bdev_desc;
 503         dev->nv_cache.current_addr = 0;
 504         dev->nv_cache.num_available = spdk_bdev_get_num_blocks(bdev);
 505
 506         return 0;
 507 }
 508
 509 void
 510 spdk_ftl_conf_init_defaults(struct spdk_ftl_conf *conf)
 511 {
 512         *conf = g_default_conf;
 513 }
 514
 515 static void
 516 ftl_lba_map_request_ctor(struct spdk_mempool *mp, void *opaque, void *obj, unsigned obj_idx)
 517 {
 518         struct ftl_lba_map_request *request = obj;
 519         struct spdk_ftl_dev *dev = opaque;
 520
 521         request->segments = spdk_bit_array_create(spdk_divide_round_up(
 522                                     ftl_num_band_lbks(dev), FTL_NUM_LBA_IN_BLOCK));
 523 }
 524
 525 static int
 526 ftl_init_lba_map_pools(struct spdk_ftl_dev *dev)
 527 {
 528 #define POOL_NAME_LEN 128
 529         char pool_name[POOL_NAME_LEN];
 530         int rc;
 531
 532         rc = snprintf(pool_name, sizeof(pool_name), "%s-%s", dev->name, "ocssd-lba-pool");
 533         if (rc < 0 || rc >= POOL_NAME_LEN) {
 534                 return -ENAMETOOLONG;
 535         }
 536
 537         /* We need to reserve at least 2 buffers for band close / open sequence
 538          * alone, plus additional (8) buffers for handling write errors.
 539          * TODO: This memory pool is utilized only by core thread - it introduce
 540          * unnecessary overhead and should be replaced by different data structure.
 541          */
 542         dev->lba_pool = spdk_mempool_create(pool_name, 2 + 8,
 543                                             ftl_lba_map_pool_elem_size(dev),
 544                                             SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
 545                                             SPDK_ENV_SOCKET_ID_ANY);
 546         if (!dev->lba_pool) {
 547                 return -ENOMEM;
 548         }
 549
 550         rc = snprintf(pool_name, sizeof(pool_name), "%s-%s", dev->name, "ocssd-lbareq-pool");
 551         if (rc < 0 || rc >= POOL_NAME_LEN) {
 552                 return -ENAMETOOLONG;
 553         }
 554
 555         dev->lba_request_pool = spdk_mempool_create_ctor(pool_name,
 556                                 dev->conf.max_reloc_qdepth * dev->conf.max_active_relocs,
 557                                 sizeof(struct ftl_lba_map_request),
 558                                 SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
 559                                 SPDK_ENV_SOCKET_ID_ANY,
 560                                 ftl_lba_map_request_ctor,
 561                                 dev);
 562         if (!dev->lba_request_pool) {
 563                 return -ENOMEM;
 564         }
 565
 566         return 0;
 567 }
 568
 569 static void
 570 ftl_init_wptr_list(struct spdk_ftl_dev *dev)
 571 {
 572         LIST_INIT(&dev->wptr_list);
 573         LIST_INIT(&dev->flush_list);
 574 }
 575
 576 static size_t
 577 ftl_dev_band_max_seq(struct spdk_ftl_dev *dev)
 578 {
 579         struct ftl_band *band;
 580         size_t seq = 0;
 581
 582         LIST_FOREACH(band, &dev->shut_bands, list_entry) {
 583                 if (band->seq > seq) {
 584                         seq = band->seq;
 585                 }
 586         }
 587
 588         return seq;
 589 }
 590
 591 static void
 592 _ftl_init_bands_state(void *ctx)
 593 {
 594         struct ftl_band *band, *temp_band;
 595         struct spdk_ftl_dev *dev = ctx;
 596
 597         dev->seq = ftl_dev_band_max_seq(dev);
 598
 599         LIST_FOREACH_SAFE(band, &dev->shut_bands, list_entry, temp_band) {
 600                 if (!band->lba_map.num_vld) {
 601                         ftl_band_set_state(band, FTL_BAND_STATE_FREE);
 602                 }
 603         }
 604
 605         ftl_reloc_resume(dev->reloc);
 606         /* Clear the limit applications as they're incremented incorrectly by */
 607         /* the initialization code */
 608         memset(dev->stats.limits, 0, sizeof(dev->stats.limits));
 609 }
 610
 611 static int
 612 ftl_init_num_free_bands(struct spdk_ftl_dev *dev)
 613 {
 614         struct ftl_band *band;
 615         int cnt = 0;
 616
 617         LIST_FOREACH(band, &dev->shut_bands, list_entry) {
 618                 if (band->num_chunks && !band->lba_map.num_vld) {
 619                         cnt++;
 620                 }
 621         }
 622         return cnt;
 623 }
 624
 625 static int
 626 ftl_init_bands_state(struct spdk_ftl_dev *dev)
 627 {
 628         /* TODO: Should we abort initialization or expose read only device */
 629         /* if there is no free bands? */
 630         /* If we abort initialization should we depend on condition that */
 631         /* we have no free bands or should we have some minimal number of */
 632         /* free bands? */
 633         if (!ftl_init_num_free_bands(dev)) {
 634                 return -1;
 635         }
 636
 637         spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_init_bands_state, dev);
 638         return 0;
 639 }
 640
 641 static void
 642 _ftl_dev_init_thread(void *ctx)
 643 {
 644         struct ftl_thread *thread = ctx;
 645         struct spdk_ftl_dev *dev = thread->dev;
 646
 647         thread->poller = spdk_poller_register(thread->poller_fn, thread, thread->period_us);
 648         if (!thread->poller) {
 649                 SPDK_ERRLOG("Unable to register poller\n");
 650                 assert(0);
 651         }
 652
 653         if (spdk_get_thread() == ftl_get_core_thread(dev)) {
 654                 ftl_anm_register_device(dev, ftl_process_anm_event);
 655         }
 656 }
 657
 658 static int
 659 ftl_dev_init_thread(struct spdk_ftl_dev *dev, struct ftl_thread *thread,
 660                     struct spdk_thread *spdk_thread, spdk_poller_fn fn, uint64_t period_us)
 661 {
 662         thread->dev = dev;
 663         thread->poller_fn = fn;
 664         thread->thread = spdk_thread;
 665         thread->period_us = period_us;
 666
 667         thread->qpair = spdk_nvme_ctrlr_alloc_io_qpair(dev->ctrlr, NULL, 0);
 668         if (!thread->qpair) {
 669                 SPDK_ERRLOG("Unable to initialize qpair\n");
 670                 return -1;
 671         }
 672
 673         spdk_thread_send_msg(spdk_thread, _ftl_dev_init_thread, thread);
 674         return 0;
 675 }
 676
 677 static int
 678 ftl_dev_init_threads(struct spdk_ftl_dev *dev, const struct spdk_ftl_dev_init_opts *opts)
 679 {
 680         if (!opts->core_thread || !opts->read_thread) {
 681                 return -1;
 682         }
 683
 684         if (ftl_dev_init_thread(dev, &dev->core_thread, opts->core_thread, ftl_task_core, 0)) {
 685                 SPDK_ERRLOG("Unable to initialize core thread\n");
 686                 return -1;
 687         }
 688
 689         if (ftl_dev_init_thread(dev, &dev->read_thread, opts->read_thread, ftl_task_read, 0)) {
 690                 SPDK_ERRLOG("Unable to initialize read thread\n");
 691                 return -1;
 692         }
 693
 694         return 0;
 695 }
 696
 697 static void
 698 ftl_dev_free_thread(struct spdk_ftl_dev *dev, struct ftl_thread *thread)
 699 {
 700         assert(thread->poller == NULL);
 701
 702         spdk_nvme_ctrlr_free_io_qpair(thread->qpair);
 703         thread->thread = NULL;
 704         thread->qpair = NULL;
 705 }
 706
 707 static int
 708 ftl_dev_l2p_alloc(struct spdk_ftl_dev *dev)
 709 {
 710         size_t addr_size;
 711         uint64_t i;
 712
 713         if (dev->num_lbas == 0) {
 714                 SPDK_DEBUGLOG(SPDK_LOG_FTL_INIT, "Invalid l2p table size\n");
 715                 return -1;
 716         }
 717
 718         if (dev->l2p) {
 719                 SPDK_DEBUGLOG(SPDK_LOG_FTL_INIT, "L2p table already allocated\n");
 720                 return -1;
 721         }
 722
 723         addr_size = dev->ppa_len >= 32 ? 8 : 4;
 724         dev->l2p = malloc(dev->num_lbas * addr_size);
 725         if (!dev->l2p) {
 726                 SPDK_DEBUGLOG(SPDK_LOG_FTL_INIT, "Failed to allocate l2p table\n");
 727                 return -1;
 728         }
 729
 730         for (i = 0; i < dev->num_lbas; ++i) {
 731                 ftl_l2p_set(dev, i, ftl_to_ppa(FTL_PPA_INVALID));
 732         }
 733
 734         return 0;
 735 }
 736
 737 static void
 738 ftl_init_complete(struct spdk_ftl_dev *dev)
 739 {
 740         pthread_mutex_lock(&g_ftl_queue_lock);
 741         STAILQ_INSERT_HEAD(&g_ftl_queue, dev, stailq);
 742         pthread_mutex_unlock(&g_ftl_queue_lock);
 743
 744         dev->initialized = 1;
 745
 746         if (dev->init_cb) {
 747                 dev->init_cb(dev, dev->init_arg, 0);
 748         }
 749
 750         dev->init_cb = NULL;
 751         dev->init_arg = NULL;
 752 }
 753
 754 static int
 755 ftl_setup_initial_state(struct spdk_ftl_dev *dev)
 756 {
 757         struct spdk_ftl_conf *conf = &dev->conf;
 758         size_t i;
 759
 760         spdk_uuid_generate(&dev->uuid);
 761
 762         dev->num_lbas = 0;
 763         for (i = 0; i < ftl_dev_num_bands(dev); ++i) {
 764                 dev->num_lbas += ftl_band_num_usable_lbks(&dev->bands[i]);
 765         }
 766
 767         dev->num_lbas = (dev->num_lbas * (100 - conf->lba_rsvd)) / 100;
 768
 769         if (ftl_dev_l2p_alloc(dev)) {
 770                 SPDK_ERRLOG("Unable to init l2p table\n");
 771                 return -1;
 772         }
 773
 774         if (ftl_init_bands_state(dev)) {
 775                 SPDK_ERRLOG("Unable to finish the initialization\n");
 776                 return -1;
 777         }
 778
 779         ftl_init_complete(dev);
 780         return 0;
 781 }
 782
 783 struct ftl_init_fail_ctx {
 784         spdk_ftl_init_fn        cb;
 785         void                    *arg;
 786 };
 787
 788 static void
 789 ftl_init_fail_cb(void *ctx, int status)
 790 {
 791         struct ftl_init_fail_ctx *fail_cb = ctx;
 792
 793         fail_cb->cb(NULL, fail_cb->arg, -ENODEV);
 794         free(fail_cb);
 795 }
 796
 797 static void
 798 ftl_init_fail(struct spdk_ftl_dev *dev)
 799 {
 800         struct ftl_init_fail_ctx *fail_cb;
 801
 802         fail_cb = malloc(sizeof(*fail_cb));
 803         if (!fail_cb) {
 804                 SPDK_ERRLOG("Unable to allocate context to free the device\n");
 805                 return;
 806         }
 807
 808         fail_cb->cb = dev->init_cb;
 809         fail_cb->arg = dev->init_arg;
 810         dev->halt_cb = NULL;
 811
 812         if (spdk_ftl_dev_free(dev, ftl_init_fail_cb, fail_cb)) {
 813                 SPDK_ERRLOG("Unable to free the device\n");
 814                 assert(0);
 815         }
 816 }
 817
 818 static void
 819 ftl_restore_device_cb(struct spdk_ftl_dev *dev, struct ftl_restore *restore, int status)
 820 {
 821         if (status) {
 822                 SPDK_ERRLOG("Failed to restore the device from the SSD\n");
 823                 goto error;
 824         }
 825
 826         if (ftl_init_bands_state(dev)) {
 827                 SPDK_ERRLOG("Unable to finish the initialization\n");
 828                 goto error;
 829         }
 830
 831         ftl_init_complete(dev);
 832         return;
 833 error:
 834         ftl_init_fail(dev);
 835 }
 836
 837 static void
 838 ftl_restore_md_cb(struct spdk_ftl_dev *dev, struct ftl_restore *restore, int status)
 839 {
 840         if (status) {
 841                 SPDK_ERRLOG("Failed to restore the metadata from the SSD\n");
 842                 goto error;
 843         }
 844
 845         /* After the metadata is read it should be possible to allocate the L2P */
 846         if (ftl_dev_l2p_alloc(dev)) {
 847                 SPDK_ERRLOG("Failed to allocate the L2P\n");
 848                 goto error;
 849         }
 850
 851         if (ftl_restore_device(restore, ftl_restore_device_cb)) {
 852                 SPDK_ERRLOG("Failed to start device restoration from the SSD\n");
 853                 goto error;
 854         }
 855
 856         return;
 857 error:
 858         ftl_init_fail(dev);
 859 }
 860
 861 static int
 862 ftl_restore_state(struct spdk_ftl_dev *dev, const struct spdk_ftl_dev_init_opts *opts)
 863 {
 864         dev->uuid = opts->uuid;
 865
 866         if (ftl_restore_md(dev, ftl_restore_md_cb)) {
 867                 SPDK_ERRLOG("Failed to start metadata restoration from the SSD\n");
 868                 return -1;
 869         }
 870
 871         return 0;
 872 }
 873
 874 static int
 875 ftl_io_channel_create_cb(void *io_device, void *ctx)
 876 {
 877         struct spdk_ftl_dev *dev = io_device;
 878         struct ftl_io_channel *ioch = ctx;
 879         char mempool_name[32];
 880
 881         snprintf(mempool_name, sizeof(mempool_name), "ftl_io_%p", ioch);
 882         ioch->cache_ioch = NULL;
 883         ioch->dev = dev;
 884         ioch->elem_size = sizeof(struct ftl_md_io);
 885         ioch->io_pool = spdk_mempool_create(mempool_name,
 886                                             dev->conf.user_io_pool_size,
 887                                             ioch->elem_size,
 888                                             0,
 889                                             SPDK_ENV_SOCKET_ID_ANY);
 890         if (!ioch->io_pool) {
 891                 SPDK_ERRLOG("Failed to create IO channel's IO pool\n");
 892                 return -1;
 893         }
 894
 895         if (dev->nv_cache.bdev_desc) {
 896                 ioch->cache_ioch = spdk_bdev_get_io_channel(dev->nv_cache.bdev_desc);
 897                 if (!ioch->cache_ioch) {
 898                         SPDK_ERRLOG("Failed to create cache IO channel\n");
 899                         spdk_mempool_free(ioch->io_pool);
 900                         return -1;
 901                 }
 902         }
 903
 904         return 0;
 905 }
 906
 907 static void
 908 ftl_io_channel_destroy_cb(void *io_device, void *ctx)
 909 {
 910         struct ftl_io_channel *ioch = ctx;
 911
 912         spdk_mempool_free(ioch->io_pool);
 913
 914         if (ioch->cache_ioch) {
 915                 spdk_put_io_channel(ioch->cache_ioch);
 916         }
 917 }
 918
 919 static int
 920 ftl_dev_init_io_channel(struct spdk_ftl_dev *dev)
 921 {
 922         spdk_io_device_register(dev, ftl_io_channel_create_cb, ftl_io_channel_destroy_cb,
 923                                 sizeof(struct ftl_io_channel),
 924                                 NULL);
 925
 926         dev->ioch = spdk_get_io_channel(dev);
 927         if (!dev->ioch) {
 928                 spdk_io_device_unregister(dev, NULL);
 929                 return -1;
 930         }
 931
 932         return 0;
 933 }
 934
 935 int
 936 spdk_ftl_dev_init(const struct spdk_ftl_dev_init_opts *_opts, spdk_ftl_init_fn cb, void *cb_arg)
 937 {
 938         struct spdk_ftl_dev *dev;
 939         struct spdk_ftl_dev_init_opts opts = *_opts;
 940
 941         dev = calloc(1, sizeof(*dev));
 942         if (!dev) {
 943                 return -ENOMEM;
 944         }
 945
 946         if (!opts.conf) {
 947                 opts.conf = &g_default_conf;
 948         }
 949
 950         TAILQ_INIT(&dev->retry_queue);
 951         dev->conf = *opts.conf;
 952         dev->init_cb = cb;
 953         dev->init_arg = cb_arg;
 954         dev->range = opts.range;
 955         dev->limit = SPDK_FTL_LIMIT_MAX;
 956
 957         dev->name = strdup(opts.name);
 958         if (!dev->name) {
 959                 SPDK_ERRLOG("Unable to set device name\n");
 960                 goto fail_sync;
 961         }
 962
 963         if (ftl_dev_nvme_init(dev, &opts)) {
 964                 SPDK_ERRLOG("Unable to initialize NVMe structures\n");
 965                 goto fail_sync;
 966         }
 967
 968         /* In case of errors, we free all of the memory in ftl_dev_free_sync(), */
 969         /* so we don't have to clean up in each of the init functions. */
 970         if (ftl_dev_retrieve_geo(dev)) {
 971                 SPDK_ERRLOG("Unable to retrieve geometry\n");
 972                 goto fail_sync;
 973         }
 974
 975         if (ftl_check_init_opts(&opts, &dev->geo)) {
 976                 SPDK_ERRLOG("Invalid device configuration\n");
 977                 goto fail_sync;
 978         }
 979
 980         if (ftl_dev_init_punits(dev)) {
 981                 SPDK_ERRLOG("Unable to initialize LUNs\n");
 982                 goto fail_sync;
 983         }
 984
 985         if (ftl_init_lba_map_pools(dev)) {
 986                 SPDK_ERRLOG("Unable to init LBA map pools\n");
 987                 goto fail_sync;
 988         }
 989
 990         ftl_init_wptr_list(dev);
 991
 992         if (ftl_dev_init_bands(dev)) {
 993                 SPDK_ERRLOG("Unable to initialize band array\n");
 994                 goto fail_sync;
 995         }
 996
 997         if (ftl_dev_init_nv_cache(dev, opts.cache_bdev_desc)) {
 998                 SPDK_ERRLOG("Unable to initialize persistent cache\n");
 999                 goto fail_sync;
1000         }
1001
1002         dev->rwb = ftl_rwb_init(&dev->conf, dev->geo.ws_opt, dev->md_size, ftl_dev_num_punits(dev));
1003         if (!dev->rwb) {
1004                 SPDK_ERRLOG("Unable to initialize rwb structures\n");
1005                 goto fail_sync;
1006         }
1007
1008         dev->reloc = ftl_reloc_init(dev);
1009         if (!dev->reloc) {
1010                 SPDK_ERRLOG("Unable to initialize reloc structures\n");
1011                 goto fail_sync;
1012         }
1013
1014         if (ftl_dev_init_io_channel(dev)) {
1015                 SPDK_ERRLOG("Unable to initialize IO channels\n");
1016                 goto fail_sync;
1017         }
1018
1019         if (ftl_dev_init_threads(dev, &opts)) {
1020                 SPDK_ERRLOG("Unable to initialize device threads\n");
1021                 goto fail_sync;
1022         }
1023
1024         if (opts.mode & SPDK_FTL_MODE_CREATE) {
1025                 if (ftl_setup_initial_state(dev)) {
1026                         SPDK_ERRLOG("Failed to setup initial state of the device\n");
1027                         goto fail_async;
1028                 }
1029         } else {
1030                 if (ftl_restore_state(dev, &opts)) {
1031                         SPDK_ERRLOG("Unable to restore device's state from the SSD\n");
1032                         goto fail_async;
1033                 }
1034         }
1035
1036         return 0;
1037 fail_sync:
1038         ftl_dev_free_sync(dev);
1039         return -ENOMEM;
1040 fail_async:
1041         ftl_init_fail(dev);
1042         return 0;
1043 }
1044
1045 static void
1046 _ftl_halt_defrag(void *arg)
1047 {
1048         ftl_reloc_halt(((struct spdk_ftl_dev *)arg)->reloc);
1049 }
1050
1051 static void
1052 ftl_lba_map_request_dtor(struct spdk_mempool *mp, void *opaque, void *obj, unsigned obj_idx)
1053 {
1054         struct ftl_lba_map_request *request = obj;
1055
1056         spdk_bit_array_free(&request->segments);
1057 }
1058
1059 static void
1060 ftl_dev_free_sync(struct spdk_ftl_dev *dev)
1061 {
1062         struct spdk_ftl_dev *iter;
1063         size_t i;
1064
1065         if (!dev) {
1066                 return;
1067         }
1068
1069         pthread_mutex_lock(&g_ftl_queue_lock);
1070         STAILQ_FOREACH(iter, &g_ftl_queue, stailq) {
1071                 if (iter == dev) {
1072                         STAILQ_REMOVE(&g_ftl_queue, dev, spdk_ftl_dev, stailq);
1073                         break;
1074                 }
1075         }
1076         pthread_mutex_unlock(&g_ftl_queue_lock);
1077
1078         assert(LIST_EMPTY(&dev->wptr_list));
1079
1080         ftl_dev_dump_bands(dev);
1081         ftl_dev_dump_stats(dev);
1082
1083         if (dev->ioch) {
1084                 spdk_put_io_channel(dev->ioch);
1085                 spdk_io_device_unregister(dev, NULL);
1086         }
1087
1088         if (dev->bands) {
1089                 for (i = 0; i < ftl_dev_num_bands(dev); ++i) {
1090                         free(dev->bands[i].chunk_buf);
1091                         spdk_bit_array_free(&dev->bands[i].lba_map.vld);
1092                 }
1093         }
1094
1095         spdk_mempool_free(dev->lba_pool);
1096         if (dev->lba_request_pool) {
1097                 spdk_mempool_obj_iter(dev->lba_request_pool, ftl_lba_map_request_dtor, NULL);
1098         }
1099         spdk_mempool_free(dev->lba_request_pool);
1100
1101         ftl_rwb_free(dev->rwb);
1102         ftl_reloc_free(dev->reloc);
1103
1104         free(dev->name);
1105         free(dev->punits);
1106         free(dev->bands);
1107         free(dev->l2p);
1108         free(dev);
1109 }
1110
1111 static int
1112 ftl_halt_poller(void *ctx)
1113 {
1114         struct spdk_ftl_dev *dev = ctx;
1115         spdk_ftl_fn halt_cb = dev->halt_cb;
1116         void *halt_arg = dev->halt_arg;
1117
1118         if (!dev->core_thread.poller && !dev->read_thread.poller) {
1119                 spdk_poller_unregister(&dev->halt_poller);
1120
1121                 ftl_dev_free_thread(dev, &dev->read_thread);
1122                 ftl_dev_free_thread(dev, &dev->core_thread);
1123
1124                 ftl_anm_unregister_device(dev);
1125                 ftl_dev_free_sync(dev);
1126
1127                 if (halt_cb) {
1128                         halt_cb(halt_arg, 0);
1129                 }
1130         }
1131
1132         return 0;
1133 }
1134
1135 static void
1136 ftl_add_halt_poller(void *ctx)
1137 {
1138         struct spdk_ftl_dev *dev = ctx;
1139
1140         _ftl_halt_defrag(dev);
1141
1142         assert(!dev->halt_poller);
1143         dev->halt_poller = spdk_poller_register(ftl_halt_poller, dev, 100);
1144 }
1145
1146 int
1147 spdk_ftl_dev_free(struct spdk_ftl_dev *dev, spdk_ftl_fn cb, void *cb_arg)
1148 {
1149         if (dev->halt_cb) {
1150                 return -EBUSY;
1151         }
1152
1153         dev->halt_cb = cb;
1154         dev->halt_arg = cb_arg;
1155         dev->halt = 1;
1156
1157         ftl_rwb_disable_interleaving(dev->rwb);
1158
1159         spdk_thread_send_msg(ftl_get_core_thread(dev), ftl_add_halt_poller, dev);
1160         return 0;
1161 }
1162
1163 int
1164 spdk_ftl_module_init(const struct ftl_module_init_opts *opts, spdk_ftl_fn cb, void *cb_arg)
1165 {
1166         return ftl_anm_init(opts->anm_thread, cb, cb_arg);
1167 }
1168
1169 int
1170 spdk_ftl_module_fini(spdk_ftl_fn cb, void *cb_arg)
1171 {
1172         return ftl_anm_free(cb, cb_arg);
1173 }
1174
1175 SPDK_LOG_REGISTER_COMPONENT("ftl_init", SPDK_LOG_FTL_INIT)