]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/lib/ftl/ftl_init.c
import 15.2.0 Octopus source
[ceph.git] / ceph / src / spdk / lib / ftl / ftl_init.c
1 /*-
2 * BSD LICENSE
3 *
4 * Copyright (c) Intel Corporation.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #include "spdk/stdinc.h"
35 #include "spdk/nvme.h"
36 #include "spdk/io_channel.h"
37 #include "spdk/bdev_module.h"
38 #include "spdk_internal/log.h"
39 #include "spdk/ftl.h"
40 #include "ftl_core.h"
41 #include "ftl_anm.h"
42 #include "ftl_io.h"
43 #include "ftl_reloc.h"
44 #include "ftl_rwb.h"
45 #include "ftl_band.h"
46 #include "ftl_debug.h"
47
48 #define FTL_CORE_RING_SIZE 4096
49 #define FTL_INIT_TIMEOUT 30
50 #define FTL_NSID 1
51
52 #define ftl_range_intersect(s1, e1, s2, e2) \
53 ((s1) <= (e2) && (s2) <= (e1))
54
55 struct ftl_admin_cmpl {
56 struct spdk_nvme_cpl status;
57
58 int complete;
59 };
60
61 static STAILQ_HEAD(, spdk_ftl_dev) g_ftl_queue = STAILQ_HEAD_INITIALIZER(g_ftl_queue);
62 static pthread_mutex_t g_ftl_queue_lock = PTHREAD_MUTEX_INITIALIZER;
63 static const struct spdk_ftl_conf g_default_conf = {
64 .defrag = {
65 .limits = {
66 /* 5 free bands / 0 % host writes */
67 [SPDK_FTL_LIMIT_CRIT] = { .thld = 5, .limit = 0 },
68 /* 10 free bands / 5 % host writes */
69 [SPDK_FTL_LIMIT_HIGH] = { .thld = 10, .limit = 5 },
70 /* 20 free bands / 40 % host writes */
71 [SPDK_FTL_LIMIT_LOW] = { .thld = 20, .limit = 40 },
72 /* 40 free bands / 100 % host writes - defrag starts running */
73 [SPDK_FTL_LIMIT_START] = { .thld = 40, .limit = 100 },
74 },
75 /* 10 percent valid lbks */
76 .invalid_thld = 10,
77 },
78 /* 20% spare lbks */
79 .lba_rsvd = 20,
80 /* 6M write buffer */
81 .rwb_size = 6 * 1024 * 1024,
82 /* 90% band fill threshold */
83 .band_thld = 90,
84 /* Max 32 IO depth per band relocate */
85 .max_reloc_qdepth = 32,
86 /* Max 3 active band relocates */
87 .max_active_relocs = 3,
88 /* IO pool size per user thread (this should be adjusted to thread IO qdepth) */
89 .user_io_pool_size = 2048,
90 /* Number of interleaving units per ws_opt */
91 /* 1 for default and 3 for 3D TLC NAND */
92 .num_interleave_units = 1,
93 /*
94 * If clear ftl will return error when restoring after a dirty shutdown
95 * If set, last band will be padded, ftl will restore based only on closed bands - this
96 * will result in lost data after recovery.
97 */
98 .allow_open_bands = false,
99 };
100
101 static void ftl_dev_free_sync(struct spdk_ftl_dev *dev);
102
103 static void
104 ftl_admin_cb(void *ctx, const struct spdk_nvme_cpl *cpl)
105 {
106 struct ftl_admin_cmpl *cmpl = ctx;
107
108 cmpl->complete = 1;
109 cmpl->status = *cpl;
110 }
111
112 static int
113 ftl_band_init_md(struct ftl_band *band)
114 {
115 struct ftl_lba_map *lba_map = &band->lba_map;
116
117 lba_map->vld = spdk_bit_array_create(ftl_num_band_lbks(band->dev));
118 if (!lba_map->vld) {
119 return -ENOMEM;
120 }
121
122 pthread_spin_init(&lba_map->lock, PTHREAD_PROCESS_PRIVATE);
123 ftl_band_md_clear(band);
124 return 0;
125 }
126
127 static int
128 ftl_check_conf(const struct spdk_ftl_conf *conf,
129 const struct spdk_ocssd_geometry_data *geo)
130 {
131 size_t i;
132
133 if (conf->defrag.invalid_thld >= 100) {
134 return -1;
135 }
136 if (conf->lba_rsvd >= 100) {
137 return -1;
138 }
139 if (conf->lba_rsvd == 0) {
140 return -1;
141 }
142 if (conf->rwb_size == 0) {
143 return -1;
144 }
145 if (conf->rwb_size % FTL_BLOCK_SIZE != 0) {
146 return -1;
147 }
148 if (geo->ws_opt % conf->num_interleave_units != 0) {
149 return -1;
150 }
151
152 for (i = 0; i < SPDK_FTL_LIMIT_MAX; ++i) {
153 if (conf->defrag.limits[i].limit > 100) {
154 return -1;
155 }
156 }
157
158 return 0;
159 }
160
161 static int
162 ftl_check_init_opts(const struct spdk_ftl_dev_init_opts *opts,
163 const struct spdk_ocssd_geometry_data *geo)
164 {
165 struct spdk_ftl_dev *dev;
166 size_t num_punits = geo->num_pu * geo->num_grp;
167 int rc = 0;
168
169 if (opts->range.begin > opts->range.end || opts->range.end >= num_punits) {
170 return -1;
171 }
172
173 if (ftl_check_conf(opts->conf, geo)) {
174 return -1;
175 }
176
177 pthread_mutex_lock(&g_ftl_queue_lock);
178
179 STAILQ_FOREACH(dev, &g_ftl_queue, stailq) {
180 if (spdk_nvme_transport_id_compare(&dev->trid, &opts->trid)) {
181 continue;
182 }
183
184 if (ftl_range_intersect(opts->range.begin, opts->range.end,
185 dev->range.begin, dev->range.end)) {
186 rc = -1;
187 goto out;
188 }
189 }
190
191 out:
192 pthread_mutex_unlock(&g_ftl_queue_lock);
193 return rc;
194 }
195
196 int
197 ftl_retrieve_chunk_info(struct spdk_ftl_dev *dev, struct ftl_ppa ppa,
198 struct spdk_ocssd_chunk_information_entry *info,
199 unsigned int num_entries)
200 {
201 volatile struct ftl_admin_cmpl cmpl = {};
202 uint32_t nsid = spdk_nvme_ns_get_id(dev->ns);
203 uint64_t offset = (ppa.grp * dev->geo.num_pu + ppa.pu) *
204 dev->geo.num_chk + ppa.chk;
205
206 if (spdk_nvme_ctrlr_cmd_get_log_page(dev->ctrlr, SPDK_OCSSD_LOG_CHUNK_INFO, nsid,
207 info, num_entries * sizeof(*info),
208 offset * sizeof(*info),
209 ftl_admin_cb, (void *)&cmpl)) {
210 return -1;
211 }
212
213 while (!cmpl.complete) {
214 spdk_nvme_ctrlr_process_admin_completions(dev->ctrlr);
215 }
216
217 if (spdk_nvme_cpl_is_error(&cmpl.status)) {
218 SPDK_ERRLOG("Unexpected status code: [%d], status code type: [%d]\n",
219 cmpl.status.status.sc, cmpl.status.status.sct);
220 return -1;
221 }
222
223 return 0;
224 }
225
226 static int
227 ftl_retrieve_punit_chunk_info(struct spdk_ftl_dev *dev, const struct ftl_punit *punit,
228 struct spdk_ocssd_chunk_information_entry *info)
229 {
230 uint32_t i = 0;
231 unsigned int num_entries = FTL_BLOCK_SIZE / sizeof(*info);
232 struct ftl_ppa chunk_ppa = punit->start_ppa;
233
234 for (i = 0; i < dev->geo.num_chk; i += num_entries, chunk_ppa.chk += num_entries) {
235 if (num_entries > dev->geo.num_chk - i) {
236 num_entries = dev->geo.num_chk - i;
237 }
238
239 if (ftl_retrieve_chunk_info(dev, chunk_ppa, &info[i], num_entries)) {
240 return -1;
241 }
242 }
243
244 return 0;
245 }
246
247 static unsigned char
248 ftl_get_chunk_state(const struct spdk_ocssd_chunk_information_entry *info)
249 {
250 if (info->cs.free) {
251 return FTL_CHUNK_STATE_FREE;
252 }
253
254 if (info->cs.open) {
255 return FTL_CHUNK_STATE_OPEN;
256 }
257
258 if (info->cs.closed) {
259 return FTL_CHUNK_STATE_CLOSED;
260 }
261
262 if (info->cs.offline) {
263 return FTL_CHUNK_STATE_BAD;
264 }
265
266 assert(0 && "Invalid block state");
267 return FTL_CHUNK_STATE_BAD;
268 }
269
270 static void
271 ftl_remove_empty_bands(struct spdk_ftl_dev *dev)
272 {
273 struct ftl_band *band, *temp_band;
274
275 /* Remove band from shut_bands list to prevent further processing */
276 /* if all blocks on this band are bad */
277 LIST_FOREACH_SAFE(band, &dev->shut_bands, list_entry, temp_band) {
278 if (!band->num_chunks) {
279 dev->num_bands--;
280 LIST_REMOVE(band, list_entry);
281 }
282 }
283 }
284
285 static int
286 ftl_dev_init_bands(struct spdk_ftl_dev *dev)
287 {
288 struct spdk_ocssd_chunk_information_entry *info;
289 struct ftl_band *band, *pband;
290 struct ftl_punit *punit;
291 struct ftl_chunk *chunk;
292 unsigned int i, j;
293 char buf[128];
294 int rc = 0;
295
296 LIST_INIT(&dev->free_bands);
297 LIST_INIT(&dev->shut_bands);
298
299 dev->num_free = 0;
300 dev->num_bands = ftl_dev_num_bands(dev);
301 dev->bands = calloc(ftl_dev_num_bands(dev), sizeof(*dev->bands));
302 if (!dev->bands) {
303 return -1;
304 }
305
306 info = calloc(dev->geo.num_chk, sizeof(*info));
307 if (!info) {
308 return -1;
309 }
310
311 for (i = 0; i < ftl_dev_num_bands(dev); ++i) {
312 band = &dev->bands[i];
313 band->id = i;
314 band->dev = dev;
315 band->state = FTL_BAND_STATE_CLOSED;
316
317 if (LIST_EMPTY(&dev->shut_bands)) {
318 LIST_INSERT_HEAD(&dev->shut_bands, band, list_entry);
319 } else {
320 LIST_INSERT_AFTER(pband, band, list_entry);
321 }
322 pband = band;
323
324 CIRCLEQ_INIT(&band->chunks);
325 band->chunk_buf = calloc(ftl_dev_num_punits(dev), sizeof(*band->chunk_buf));
326 if (!band->chunk_buf) {
327 SPDK_ERRLOG("Failed to allocate block state table for band: [%u]\n", i);
328 rc = -1;
329 goto out;
330 }
331
332 rc = ftl_band_init_md(band);
333 if (rc) {
334 SPDK_ERRLOG("Failed to initialize metadata structures for band [%u]\n", i);
335 goto out;
336 }
337 }
338
339 for (i = 0; i < ftl_dev_num_punits(dev); ++i) {
340 punit = &dev->punits[i];
341
342 rc = ftl_retrieve_punit_chunk_info(dev, punit, info);
343 if (rc) {
344 SPDK_ERRLOG("Failed to retrieve bbt for @ppa: %s [%lu]\n",
345 ftl_ppa2str(punit->start_ppa, buf, sizeof(buf)),
346 ftl_ppa_addr_pack(dev, punit->start_ppa));
347 goto out;
348 }
349
350 for (j = 0; j < ftl_dev_num_bands(dev); ++j) {
351 band = &dev->bands[j];
352 chunk = &band->chunk_buf[i];
353 chunk->pos = i;
354 chunk->state = ftl_get_chunk_state(&info[j]);
355 chunk->punit = punit;
356 chunk->start_ppa = punit->start_ppa;
357 chunk->start_ppa.chk = band->id;
358
359 if (chunk->state != FTL_CHUNK_STATE_BAD) {
360 band->num_chunks++;
361 CIRCLEQ_INSERT_TAIL(&band->chunks, chunk, circleq);
362 }
363 }
364 }
365
366 ftl_remove_empty_bands(dev);
367 out:
368 free(info);
369 return rc;
370 }
371
372 static int
373 ftl_dev_init_punits(struct spdk_ftl_dev *dev)
374 {
375 unsigned int i, punit;
376
377 dev->punits = calloc(ftl_dev_num_punits(dev), sizeof(*dev->punits));
378 if (!dev->punits) {
379 return -1;
380 }
381
382 for (i = 0; i < ftl_dev_num_punits(dev); ++i) {
383 dev->punits[i].dev = dev;
384 punit = dev->range.begin + i;
385
386 dev->punits[i].start_ppa.ppa = 0;
387 dev->punits[i].start_ppa.grp = punit % dev->geo.num_grp;
388 dev->punits[i].start_ppa.pu = punit / dev->geo.num_grp;
389 }
390
391 return 0;
392 }
393
394 static int
395 ftl_dev_retrieve_geo(struct spdk_ftl_dev *dev)
396 {
397 volatile struct ftl_admin_cmpl cmpl = {};
398 uint32_t nsid = spdk_nvme_ns_get_id(dev->ns);
399
400 if (spdk_nvme_ocssd_ctrlr_cmd_geometry(dev->ctrlr, nsid, &dev->geo, sizeof(dev->geo),
401 ftl_admin_cb, (void *)&cmpl)) {
402 SPDK_ERRLOG("Unable to retrieve geometry\n");
403 return -1;
404 }
405
406 /* TODO: add a timeout */
407 while (!cmpl.complete) {
408 spdk_nvme_ctrlr_process_admin_completions(dev->ctrlr);
409 }
410
411 if (spdk_nvme_cpl_is_error(&cmpl.status)) {
412 SPDK_ERRLOG("Unexpected status code: [%d], status code type: [%d]\n",
413 cmpl.status.status.sc, cmpl.status.status.sct);
414 return -1;
415 }
416
417 /* TODO: add sanity checks for the geo */
418 dev->ppa_len = dev->geo.lbaf.grp_len +
419 dev->geo.lbaf.pu_len +
420 dev->geo.lbaf.chk_len +
421 dev->geo.lbaf.lbk_len;
422
423 dev->ppaf.lbk_offset = 0;
424 dev->ppaf.lbk_mask = (1 << dev->geo.lbaf.lbk_len) - 1;
425 dev->ppaf.chk_offset = dev->ppaf.lbk_offset + dev->geo.lbaf.lbk_len;
426 dev->ppaf.chk_mask = (1 << dev->geo.lbaf.chk_len) - 1;
427 dev->ppaf.pu_offset = dev->ppaf.chk_offset + dev->geo.lbaf.chk_len;
428 dev->ppaf.pu_mask = (1 << dev->geo.lbaf.pu_len) - 1;
429 dev->ppaf.grp_offset = dev->ppaf.pu_offset + dev->geo.lbaf.pu_len;
430 dev->ppaf.grp_mask = (1 << dev->geo.lbaf.grp_len) - 1;
431
432 /* We're using optimal write size as our xfer size */
433 dev->xfer_size = dev->geo.ws_opt;
434
435 return 0;
436 }
437
438 static int
439 ftl_dev_nvme_init(struct spdk_ftl_dev *dev, const struct spdk_ftl_dev_init_opts *opts)
440 {
441 uint32_t block_size;
442
443 dev->ctrlr = opts->ctrlr;
444
445 if (spdk_nvme_ctrlr_get_num_ns(dev->ctrlr) != 1) {
446 SPDK_ERRLOG("Unsupported number of namespaces\n");
447 return -1;
448 }
449
450 dev->ns = spdk_nvme_ctrlr_get_ns(dev->ctrlr, FTL_NSID);
451 dev->trid = opts->trid;
452 dev->md_size = spdk_nvme_ns_get_md_size(dev->ns);
453
454 block_size = spdk_nvme_ns_get_extended_sector_size(dev->ns);
455 if (block_size != FTL_BLOCK_SIZE) {
456 SPDK_ERRLOG("Unsupported block size (%"PRIu32")\n", block_size);
457 return -1;
458 }
459
460 if (dev->md_size % sizeof(uint32_t) != 0) {
461 /* Metadata pointer must be dword aligned */
462 SPDK_ERRLOG("Unsupported metadata size (%zu)\n", dev->md_size);
463 return -1;
464 }
465
466 return 0;
467 }
468
469 static int
470 ftl_dev_init_nv_cache(struct spdk_ftl_dev *dev, struct spdk_bdev_desc *bdev_desc)
471 {
472 struct spdk_bdev *bdev;
473
474 if (!bdev_desc) {
475 return 0;
476 }
477
478 bdev = spdk_bdev_desc_get_bdev(bdev_desc);
479 SPDK_INFOLOG(SPDK_LOG_FTL_INIT, "Using %s as write buffer cache\n",
480 spdk_bdev_get_name(bdev));
481
482 if (spdk_bdev_get_block_size(bdev) != FTL_BLOCK_SIZE) {
483 SPDK_ERRLOG("Unsupported block size (%d)\n", spdk_bdev_get_block_size(bdev));
484 return -1;
485 }
486
487 /* The cache needs to be capable of storing at least two full bands. This requirement comes
488 * from the fact that cache works as a protection against power loss, so before the data
489 * inside the cache can be overwritten, the band it's stored on has to be closed.
490 */
491 if (spdk_bdev_get_num_blocks(bdev) < ftl_num_band_lbks(dev) * 2) {
492 SPDK_ERRLOG("Insufficient number of blocks for write buffer cache(%"PRIu64"\n",
493 spdk_bdev_get_num_blocks(bdev));
494 return -1;
495 }
496
497 if (pthread_spin_init(&dev->nv_cache.lock, PTHREAD_PROCESS_PRIVATE)) {
498 SPDK_ERRLOG("Failed to initialize cache lock\n");
499 return -1;
500 }
501
502 dev->nv_cache.bdev_desc = bdev_desc;
503 dev->nv_cache.current_addr = 0;
504 dev->nv_cache.num_available = spdk_bdev_get_num_blocks(bdev);
505
506 return 0;
507 }
508
509 void
510 spdk_ftl_conf_init_defaults(struct spdk_ftl_conf *conf)
511 {
512 *conf = g_default_conf;
513 }
514
515 static void
516 ftl_lba_map_request_ctor(struct spdk_mempool *mp, void *opaque, void *obj, unsigned obj_idx)
517 {
518 struct ftl_lba_map_request *request = obj;
519 struct spdk_ftl_dev *dev = opaque;
520
521 request->segments = spdk_bit_array_create(spdk_divide_round_up(
522 ftl_num_band_lbks(dev), FTL_NUM_LBA_IN_BLOCK));
523 }
524
525 static int
526 ftl_init_lba_map_pools(struct spdk_ftl_dev *dev)
527 {
528 #define POOL_NAME_LEN 128
529 char pool_name[POOL_NAME_LEN];
530 int rc;
531
532 rc = snprintf(pool_name, sizeof(pool_name), "%s-%s", dev->name, "ocssd-lba-pool");
533 if (rc < 0 || rc >= POOL_NAME_LEN) {
534 return -ENAMETOOLONG;
535 }
536
537 /* We need to reserve at least 2 buffers for band close / open sequence
538 * alone, plus additional (8) buffers for handling write errors.
539 * TODO: This memory pool is utilized only by core thread - it introduce
540 * unnecessary overhead and should be replaced by different data structure.
541 */
542 dev->lba_pool = spdk_mempool_create(pool_name, 2 + 8,
543 ftl_lba_map_pool_elem_size(dev),
544 SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
545 SPDK_ENV_SOCKET_ID_ANY);
546 if (!dev->lba_pool) {
547 return -ENOMEM;
548 }
549
550 rc = snprintf(pool_name, sizeof(pool_name), "%s-%s", dev->name, "ocssd-lbareq-pool");
551 if (rc < 0 || rc >= POOL_NAME_LEN) {
552 return -ENAMETOOLONG;
553 }
554
555 dev->lba_request_pool = spdk_mempool_create_ctor(pool_name,
556 dev->conf.max_reloc_qdepth * dev->conf.max_active_relocs,
557 sizeof(struct ftl_lba_map_request),
558 SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
559 SPDK_ENV_SOCKET_ID_ANY,
560 ftl_lba_map_request_ctor,
561 dev);
562 if (!dev->lba_request_pool) {
563 return -ENOMEM;
564 }
565
566 return 0;
567 }
568
569 static void
570 ftl_init_wptr_list(struct spdk_ftl_dev *dev)
571 {
572 LIST_INIT(&dev->wptr_list);
573 LIST_INIT(&dev->flush_list);
574 }
575
576 static size_t
577 ftl_dev_band_max_seq(struct spdk_ftl_dev *dev)
578 {
579 struct ftl_band *band;
580 size_t seq = 0;
581
582 LIST_FOREACH(band, &dev->shut_bands, list_entry) {
583 if (band->seq > seq) {
584 seq = band->seq;
585 }
586 }
587
588 return seq;
589 }
590
591 static void
592 _ftl_init_bands_state(void *ctx)
593 {
594 struct ftl_band *band, *temp_band;
595 struct spdk_ftl_dev *dev = ctx;
596
597 dev->seq = ftl_dev_band_max_seq(dev);
598
599 LIST_FOREACH_SAFE(band, &dev->shut_bands, list_entry, temp_band) {
600 if (!band->lba_map.num_vld) {
601 ftl_band_set_state(band, FTL_BAND_STATE_FREE);
602 }
603 }
604
605 ftl_reloc_resume(dev->reloc);
606 /* Clear the limit applications as they're incremented incorrectly by */
607 /* the initialization code */
608 memset(dev->stats.limits, 0, sizeof(dev->stats.limits));
609 }
610
611 static int
612 ftl_init_num_free_bands(struct spdk_ftl_dev *dev)
613 {
614 struct ftl_band *band;
615 int cnt = 0;
616
617 LIST_FOREACH(band, &dev->shut_bands, list_entry) {
618 if (band->num_chunks && !band->lba_map.num_vld) {
619 cnt++;
620 }
621 }
622 return cnt;
623 }
624
625 static int
626 ftl_init_bands_state(struct spdk_ftl_dev *dev)
627 {
628 /* TODO: Should we abort initialization or expose read only device */
629 /* if there is no free bands? */
630 /* If we abort initialization should we depend on condition that */
631 /* we have no free bands or should we have some minimal number of */
632 /* free bands? */
633 if (!ftl_init_num_free_bands(dev)) {
634 return -1;
635 }
636
637 spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_init_bands_state, dev);
638 return 0;
639 }
640
641 static void
642 _ftl_dev_init_thread(void *ctx)
643 {
644 struct ftl_thread *thread = ctx;
645 struct spdk_ftl_dev *dev = thread->dev;
646
647 thread->poller = spdk_poller_register(thread->poller_fn, thread, thread->period_us);
648 if (!thread->poller) {
649 SPDK_ERRLOG("Unable to register poller\n");
650 assert(0);
651 }
652
653 if (spdk_get_thread() == ftl_get_core_thread(dev)) {
654 ftl_anm_register_device(dev, ftl_process_anm_event);
655 }
656 }
657
658 static int
659 ftl_dev_init_thread(struct spdk_ftl_dev *dev, struct ftl_thread *thread,
660 struct spdk_thread *spdk_thread, spdk_poller_fn fn, uint64_t period_us)
661 {
662 thread->dev = dev;
663 thread->poller_fn = fn;
664 thread->thread = spdk_thread;
665 thread->period_us = period_us;
666
667 thread->qpair = spdk_nvme_ctrlr_alloc_io_qpair(dev->ctrlr, NULL, 0);
668 if (!thread->qpair) {
669 SPDK_ERRLOG("Unable to initialize qpair\n");
670 return -1;
671 }
672
673 spdk_thread_send_msg(spdk_thread, _ftl_dev_init_thread, thread);
674 return 0;
675 }
676
677 static int
678 ftl_dev_init_threads(struct spdk_ftl_dev *dev, const struct spdk_ftl_dev_init_opts *opts)
679 {
680 if (!opts->core_thread || !opts->read_thread) {
681 return -1;
682 }
683
684 if (ftl_dev_init_thread(dev, &dev->core_thread, opts->core_thread, ftl_task_core, 0)) {
685 SPDK_ERRLOG("Unable to initialize core thread\n");
686 return -1;
687 }
688
689 if (ftl_dev_init_thread(dev, &dev->read_thread, opts->read_thread, ftl_task_read, 0)) {
690 SPDK_ERRLOG("Unable to initialize read thread\n");
691 return -1;
692 }
693
694 return 0;
695 }
696
697 static void
698 ftl_dev_free_thread(struct spdk_ftl_dev *dev, struct ftl_thread *thread)
699 {
700 assert(thread->poller == NULL);
701
702 spdk_nvme_ctrlr_free_io_qpair(thread->qpair);
703 thread->thread = NULL;
704 thread->qpair = NULL;
705 }
706
707 static int
708 ftl_dev_l2p_alloc(struct spdk_ftl_dev *dev)
709 {
710 size_t addr_size;
711 uint64_t i;
712
713 if (dev->num_lbas == 0) {
714 SPDK_DEBUGLOG(SPDK_LOG_FTL_INIT, "Invalid l2p table size\n");
715 return -1;
716 }
717
718 if (dev->l2p) {
719 SPDK_DEBUGLOG(SPDK_LOG_FTL_INIT, "L2p table already allocated\n");
720 return -1;
721 }
722
723 addr_size = dev->ppa_len >= 32 ? 8 : 4;
724 dev->l2p = malloc(dev->num_lbas * addr_size);
725 if (!dev->l2p) {
726 SPDK_DEBUGLOG(SPDK_LOG_FTL_INIT, "Failed to allocate l2p table\n");
727 return -1;
728 }
729
730 for (i = 0; i < dev->num_lbas; ++i) {
731 ftl_l2p_set(dev, i, ftl_to_ppa(FTL_PPA_INVALID));
732 }
733
734 return 0;
735 }
736
737 static void
738 ftl_init_complete(struct spdk_ftl_dev *dev)
739 {
740 pthread_mutex_lock(&g_ftl_queue_lock);
741 STAILQ_INSERT_HEAD(&g_ftl_queue, dev, stailq);
742 pthread_mutex_unlock(&g_ftl_queue_lock);
743
744 dev->initialized = 1;
745
746 if (dev->init_cb) {
747 dev->init_cb(dev, dev->init_arg, 0);
748 }
749
750 dev->init_cb = NULL;
751 dev->init_arg = NULL;
752 }
753
754 static int
755 ftl_setup_initial_state(struct spdk_ftl_dev *dev)
756 {
757 struct spdk_ftl_conf *conf = &dev->conf;
758 size_t i;
759
760 spdk_uuid_generate(&dev->uuid);
761
762 dev->num_lbas = 0;
763 for (i = 0; i < ftl_dev_num_bands(dev); ++i) {
764 dev->num_lbas += ftl_band_num_usable_lbks(&dev->bands[i]);
765 }
766
767 dev->num_lbas = (dev->num_lbas * (100 - conf->lba_rsvd)) / 100;
768
769 if (ftl_dev_l2p_alloc(dev)) {
770 SPDK_ERRLOG("Unable to init l2p table\n");
771 return -1;
772 }
773
774 if (ftl_init_bands_state(dev)) {
775 SPDK_ERRLOG("Unable to finish the initialization\n");
776 return -1;
777 }
778
779 ftl_init_complete(dev);
780 return 0;
781 }
782
783 struct ftl_init_fail_ctx {
784 spdk_ftl_init_fn cb;
785 void *arg;
786 };
787
788 static void
789 ftl_init_fail_cb(void *ctx, int status)
790 {
791 struct ftl_init_fail_ctx *fail_cb = ctx;
792
793 fail_cb->cb(NULL, fail_cb->arg, -ENODEV);
794 free(fail_cb);
795 }
796
797 static void
798 ftl_init_fail(struct spdk_ftl_dev *dev)
799 {
800 struct ftl_init_fail_ctx *fail_cb;
801
802 fail_cb = malloc(sizeof(*fail_cb));
803 if (!fail_cb) {
804 SPDK_ERRLOG("Unable to allocate context to free the device\n");
805 return;
806 }
807
808 fail_cb->cb = dev->init_cb;
809 fail_cb->arg = dev->init_arg;
810 dev->halt_cb = NULL;
811
812 if (spdk_ftl_dev_free(dev, ftl_init_fail_cb, fail_cb)) {
813 SPDK_ERRLOG("Unable to free the device\n");
814 assert(0);
815 }
816 }
817
818 static void
819 ftl_restore_device_cb(struct spdk_ftl_dev *dev, struct ftl_restore *restore, int status)
820 {
821 if (status) {
822 SPDK_ERRLOG("Failed to restore the device from the SSD\n");
823 goto error;
824 }
825
826 if (ftl_init_bands_state(dev)) {
827 SPDK_ERRLOG("Unable to finish the initialization\n");
828 goto error;
829 }
830
831 ftl_init_complete(dev);
832 return;
833 error:
834 ftl_init_fail(dev);
835 }
836
837 static void
838 ftl_restore_md_cb(struct spdk_ftl_dev *dev, struct ftl_restore *restore, int status)
839 {
840 if (status) {
841 SPDK_ERRLOG("Failed to restore the metadata from the SSD\n");
842 goto error;
843 }
844
845 /* After the metadata is read it should be possible to allocate the L2P */
846 if (ftl_dev_l2p_alloc(dev)) {
847 SPDK_ERRLOG("Failed to allocate the L2P\n");
848 goto error;
849 }
850
851 if (ftl_restore_device(restore, ftl_restore_device_cb)) {
852 SPDK_ERRLOG("Failed to start device restoration from the SSD\n");
853 goto error;
854 }
855
856 return;
857 error:
858 ftl_init_fail(dev);
859 }
860
861 static int
862 ftl_restore_state(struct spdk_ftl_dev *dev, const struct spdk_ftl_dev_init_opts *opts)
863 {
864 dev->uuid = opts->uuid;
865
866 if (ftl_restore_md(dev, ftl_restore_md_cb)) {
867 SPDK_ERRLOG("Failed to start metadata restoration from the SSD\n");
868 return -1;
869 }
870
871 return 0;
872 }
873
874 static int
875 ftl_io_channel_create_cb(void *io_device, void *ctx)
876 {
877 struct spdk_ftl_dev *dev = io_device;
878 struct ftl_io_channel *ioch = ctx;
879 char mempool_name[32];
880
881 snprintf(mempool_name, sizeof(mempool_name), "ftl_io_%p", ioch);
882 ioch->cache_ioch = NULL;
883 ioch->dev = dev;
884 ioch->elem_size = sizeof(struct ftl_md_io);
885 ioch->io_pool = spdk_mempool_create(mempool_name,
886 dev->conf.user_io_pool_size,
887 ioch->elem_size,
888 0,
889 SPDK_ENV_SOCKET_ID_ANY);
890 if (!ioch->io_pool) {
891 SPDK_ERRLOG("Failed to create IO channel's IO pool\n");
892 return -1;
893 }
894
895 if (dev->nv_cache.bdev_desc) {
896 ioch->cache_ioch = spdk_bdev_get_io_channel(dev->nv_cache.bdev_desc);
897 if (!ioch->cache_ioch) {
898 SPDK_ERRLOG("Failed to create cache IO channel\n");
899 spdk_mempool_free(ioch->io_pool);
900 return -1;
901 }
902 }
903
904 return 0;
905 }
906
907 static void
908 ftl_io_channel_destroy_cb(void *io_device, void *ctx)
909 {
910 struct ftl_io_channel *ioch = ctx;
911
912 spdk_mempool_free(ioch->io_pool);
913
914 if (ioch->cache_ioch) {
915 spdk_put_io_channel(ioch->cache_ioch);
916 }
917 }
918
919 static int
920 ftl_dev_init_io_channel(struct spdk_ftl_dev *dev)
921 {
922 spdk_io_device_register(dev, ftl_io_channel_create_cb, ftl_io_channel_destroy_cb,
923 sizeof(struct ftl_io_channel),
924 NULL);
925
926 dev->ioch = spdk_get_io_channel(dev);
927 if (!dev->ioch) {
928 spdk_io_device_unregister(dev, NULL);
929 return -1;
930 }
931
932 return 0;
933 }
934
935 int
936 spdk_ftl_dev_init(const struct spdk_ftl_dev_init_opts *_opts, spdk_ftl_init_fn cb, void *cb_arg)
937 {
938 struct spdk_ftl_dev *dev;
939 struct spdk_ftl_dev_init_opts opts = *_opts;
940
941 dev = calloc(1, sizeof(*dev));
942 if (!dev) {
943 return -ENOMEM;
944 }
945
946 if (!opts.conf) {
947 opts.conf = &g_default_conf;
948 }
949
950 TAILQ_INIT(&dev->retry_queue);
951 dev->conf = *opts.conf;
952 dev->init_cb = cb;
953 dev->init_arg = cb_arg;
954 dev->range = opts.range;
955 dev->limit = SPDK_FTL_LIMIT_MAX;
956
957 dev->name = strdup(opts.name);
958 if (!dev->name) {
959 SPDK_ERRLOG("Unable to set device name\n");
960 goto fail_sync;
961 }
962
963 if (ftl_dev_nvme_init(dev, &opts)) {
964 SPDK_ERRLOG("Unable to initialize NVMe structures\n");
965 goto fail_sync;
966 }
967
968 /* In case of errors, we free all of the memory in ftl_dev_free_sync(), */
969 /* so we don't have to clean up in each of the init functions. */
970 if (ftl_dev_retrieve_geo(dev)) {
971 SPDK_ERRLOG("Unable to retrieve geometry\n");
972 goto fail_sync;
973 }
974
975 if (ftl_check_init_opts(&opts, &dev->geo)) {
976 SPDK_ERRLOG("Invalid device configuration\n");
977 goto fail_sync;
978 }
979
980 if (ftl_dev_init_punits(dev)) {
981 SPDK_ERRLOG("Unable to initialize LUNs\n");
982 goto fail_sync;
983 }
984
985 if (ftl_init_lba_map_pools(dev)) {
986 SPDK_ERRLOG("Unable to init LBA map pools\n");
987 goto fail_sync;
988 }
989
990 ftl_init_wptr_list(dev);
991
992 if (ftl_dev_init_bands(dev)) {
993 SPDK_ERRLOG("Unable to initialize band array\n");
994 goto fail_sync;
995 }
996
997 if (ftl_dev_init_nv_cache(dev, opts.cache_bdev_desc)) {
998 SPDK_ERRLOG("Unable to initialize persistent cache\n");
999 goto fail_sync;
1000 }
1001
1002 dev->rwb = ftl_rwb_init(&dev->conf, dev->geo.ws_opt, dev->md_size, ftl_dev_num_punits(dev));
1003 if (!dev->rwb) {
1004 SPDK_ERRLOG("Unable to initialize rwb structures\n");
1005 goto fail_sync;
1006 }
1007
1008 dev->reloc = ftl_reloc_init(dev);
1009 if (!dev->reloc) {
1010 SPDK_ERRLOG("Unable to initialize reloc structures\n");
1011 goto fail_sync;
1012 }
1013
1014 if (ftl_dev_init_io_channel(dev)) {
1015 SPDK_ERRLOG("Unable to initialize IO channels\n");
1016 goto fail_sync;
1017 }
1018
1019 if (ftl_dev_init_threads(dev, &opts)) {
1020 SPDK_ERRLOG("Unable to initialize device threads\n");
1021 goto fail_sync;
1022 }
1023
1024 if (opts.mode & SPDK_FTL_MODE_CREATE) {
1025 if (ftl_setup_initial_state(dev)) {
1026 SPDK_ERRLOG("Failed to setup initial state of the device\n");
1027 goto fail_async;
1028 }
1029 } else {
1030 if (ftl_restore_state(dev, &opts)) {
1031 SPDK_ERRLOG("Unable to restore device's state from the SSD\n");
1032 goto fail_async;
1033 }
1034 }
1035
1036 return 0;
1037 fail_sync:
1038 ftl_dev_free_sync(dev);
1039 return -ENOMEM;
1040 fail_async:
1041 ftl_init_fail(dev);
1042 return 0;
1043 }
1044
1045 static void
1046 _ftl_halt_defrag(void *arg)
1047 {
1048 ftl_reloc_halt(((struct spdk_ftl_dev *)arg)->reloc);
1049 }
1050
1051 static void
1052 ftl_lba_map_request_dtor(struct spdk_mempool *mp, void *opaque, void *obj, unsigned obj_idx)
1053 {
1054 struct ftl_lba_map_request *request = obj;
1055
1056 spdk_bit_array_free(&request->segments);
1057 }
1058
1059 static void
1060 ftl_dev_free_sync(struct spdk_ftl_dev *dev)
1061 {
1062 struct spdk_ftl_dev *iter;
1063 size_t i;
1064
1065 if (!dev) {
1066 return;
1067 }
1068
1069 pthread_mutex_lock(&g_ftl_queue_lock);
1070 STAILQ_FOREACH(iter, &g_ftl_queue, stailq) {
1071 if (iter == dev) {
1072 STAILQ_REMOVE(&g_ftl_queue, dev, spdk_ftl_dev, stailq);
1073 break;
1074 }
1075 }
1076 pthread_mutex_unlock(&g_ftl_queue_lock);
1077
1078 assert(LIST_EMPTY(&dev->wptr_list));
1079
1080 ftl_dev_dump_bands(dev);
1081 ftl_dev_dump_stats(dev);
1082
1083 if (dev->ioch) {
1084 spdk_put_io_channel(dev->ioch);
1085 spdk_io_device_unregister(dev, NULL);
1086 }
1087
1088 if (dev->bands) {
1089 for (i = 0; i < ftl_dev_num_bands(dev); ++i) {
1090 free(dev->bands[i].chunk_buf);
1091 spdk_bit_array_free(&dev->bands[i].lba_map.vld);
1092 }
1093 }
1094
1095 spdk_mempool_free(dev->lba_pool);
1096 if (dev->lba_request_pool) {
1097 spdk_mempool_obj_iter(dev->lba_request_pool, ftl_lba_map_request_dtor, NULL);
1098 }
1099 spdk_mempool_free(dev->lba_request_pool);
1100
1101 ftl_rwb_free(dev->rwb);
1102 ftl_reloc_free(dev->reloc);
1103
1104 free(dev->name);
1105 free(dev->punits);
1106 free(dev->bands);
1107 free(dev->l2p);
1108 free(dev);
1109 }
1110
1111 static int
1112 ftl_halt_poller(void *ctx)
1113 {
1114 struct spdk_ftl_dev *dev = ctx;
1115 spdk_ftl_fn halt_cb = dev->halt_cb;
1116 void *halt_arg = dev->halt_arg;
1117
1118 if (!dev->core_thread.poller && !dev->read_thread.poller) {
1119 spdk_poller_unregister(&dev->halt_poller);
1120
1121 ftl_dev_free_thread(dev, &dev->read_thread);
1122 ftl_dev_free_thread(dev, &dev->core_thread);
1123
1124 ftl_anm_unregister_device(dev);
1125 ftl_dev_free_sync(dev);
1126
1127 if (halt_cb) {
1128 halt_cb(halt_arg, 0);
1129 }
1130 }
1131
1132 return 0;
1133 }
1134
1135 static void
1136 ftl_add_halt_poller(void *ctx)
1137 {
1138 struct spdk_ftl_dev *dev = ctx;
1139
1140 _ftl_halt_defrag(dev);
1141
1142 assert(!dev->halt_poller);
1143 dev->halt_poller = spdk_poller_register(ftl_halt_poller, dev, 100);
1144 }
1145
1146 int
1147 spdk_ftl_dev_free(struct spdk_ftl_dev *dev, spdk_ftl_fn cb, void *cb_arg)
1148 {
1149 if (dev->halt_cb) {
1150 return -EBUSY;
1151 }
1152
1153 dev->halt_cb = cb;
1154 dev->halt_arg = cb_arg;
1155 dev->halt = 1;
1156
1157 ftl_rwb_disable_interleaving(dev->rwb);
1158
1159 spdk_thread_send_msg(ftl_get_core_thread(dev), ftl_add_halt_poller, dev);
1160 return 0;
1161 }
1162
1163 int
1164 spdk_ftl_module_init(const struct ftl_module_init_opts *opts, spdk_ftl_fn cb, void *cb_arg)
1165 {
1166 return ftl_anm_init(opts->anm_thread, cb, cb_arg);
1167 }
1168
1169 int
1170 spdk_ftl_module_fini(spdk_ftl_fn cb, void *cb_arg)
1171 {
1172 return ftl_anm_free(cb, cb_arg);
1173 }
1174
1175 SPDK_LOG_REGISTER_COMPONENT("ftl_init", SPDK_LOG_FTL_INIT)