]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/lib/ftl/ftl_core.h
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / spdk / lib / ftl / ftl_core.h
1 /*-
2 * BSD LICENSE
3 *
4 * Copyright (c) Intel Corporation.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #ifndef FTL_CORE_H
35 #define FTL_CORE_H
36
37 #include "spdk/stdinc.h"
38 #include "spdk/uuid.h"
39 #include "spdk/thread.h"
40 #include "spdk/util.h"
41 #include "spdk_internal/log.h"
42 #include "spdk/likely.h"
43 #include "spdk/queue.h"
44 #include "spdk/ftl.h"
45 #include "spdk/bdev.h"
46 #include "spdk/bdev_zone.h"
47
48 #include "ftl_addr.h"
49 #include "ftl_io.h"
50 #include "ftl_trace.h"
51
52 #ifdef SPDK_CONFIG_PMDK
53 #include "libpmem.h"
54 #endif /* SPDK_CONFIG_PMDK */
55
56 struct spdk_ftl_dev;
57 struct ftl_band;
58 struct ftl_zone;
59 struct ftl_io;
60 struct ftl_restore;
61 struct ftl_wptr;
62 struct ftl_flush;
63 struct ftl_reloc;
64 struct ftl_anm_event;
65 struct ftl_band_flush;
66
67 struct ftl_stats {
68 /* Number of writes scheduled directly by the user */
69 uint64_t write_user;
70
71 /* Total number of writes */
72 uint64_t write_total;
73
74 /* Traces */
75 struct ftl_trace trace;
76
77 /* Number of limits applied */
78 uint64_t limits[SPDK_FTL_LIMIT_MAX];
79 };
80
81 struct ftl_global_md {
82 /* Device instance */
83 struct spdk_uuid uuid;
84 /* Size of the l2p table */
85 uint64_t num_lbas;
86 };
87
88 struct ftl_nv_cache {
89 /* Write buffer cache bdev */
90 struct spdk_bdev_desc *bdev_desc;
91 /* Write pointer */
92 uint64_t current_addr;
93 /* Number of available blocks left */
94 uint64_t num_available;
95 /* Maximum number of blocks */
96 uint64_t num_data_blocks;
97 /*
98 * Phase of the current cycle of writes. Each time whole cache area is filled, the phase is
99 * advanced. Current phase is saved in every IO's metadata, as well as in the header saved
100 * in the first sector. By looking at the phase of each block, it's possible to find the
101 * oldest block and replay the order of the writes when recovering the data from the cache.
102 */
103 unsigned int phase;
104 /* Indicates that the data can be written to the cache */
105 bool ready;
106 /* Metadata pool */
107 struct spdk_mempool *md_pool;
108 /* DMA buffer for writing the header */
109 void *dma_buf;
110 /* Cache lock */
111 pthread_spinlock_t lock;
112 };
113
114 struct ftl_batch {
115 /* Queue of write buffer entries, can reach up to xfer_size entries */
116 TAILQ_HEAD(, ftl_wbuf_entry) entries;
117 /* Number of entries in the queue above */
118 uint32_t num_entries;
119 /* Index within spdk_ftl_dev.batch_array */
120 uint32_t index;
121 struct iovec *iov;
122 void *metadata;
123 TAILQ_ENTRY(ftl_batch) tailq;
124 };
125
126 struct spdk_ftl_dev {
127 /* Device instance */
128 struct spdk_uuid uuid;
129 /* Device name */
130 char *name;
131 /* Configuration */
132 struct spdk_ftl_conf conf;
133
134 /* Indicates the device is fully initialized */
135 int initialized;
136 /* Indicates the device is about to be stopped */
137 int halt;
138 /* Indicates the device is about to start stopping - use to handle multiple stop request */
139 bool halt_started;
140
141 /* Underlying device */
142 struct spdk_bdev_desc *base_bdev_desc;
143
144 /* Non-volatile write buffer cache */
145 struct ftl_nv_cache nv_cache;
146
147 /* LBA map memory pool */
148 struct spdk_mempool *lba_pool;
149
150 /* LBA map requests pool */
151 struct spdk_mempool *lba_request_pool;
152
153 /* Media management events pool */
154 struct spdk_mempool *media_events_pool;
155
156 /* Statistics */
157 struct ftl_stats stats;
158
159 /* Current sequence number */
160 uint64_t seq;
161
162 /* Array of bands */
163 struct ftl_band *bands;
164 /* Number of operational bands */
165 size_t num_bands;
166 /* Next write band */
167 struct ftl_band *next_band;
168 /* Free band list */
169 LIST_HEAD(, ftl_band) free_bands;
170 /* Closed bands list */
171 LIST_HEAD(, ftl_band) shut_bands;
172 /* Number of free bands */
173 size_t num_free;
174
175 /* List of write pointers */
176 LIST_HEAD(, ftl_wptr) wptr_list;
177
178 /* Logical -> physical table */
179 void *l2p;
180 /* Size of the l2p table */
181 uint64_t num_lbas;
182 /* Size of pages mmapped for l2p, valid only for mapping on persistent memory */
183 size_t l2p_pmem_len;
184
185 /* Address size */
186 size_t addr_len;
187
188 /* Flush list */
189 LIST_HEAD(, ftl_flush) flush_list;
190 /* List of band flush requests */
191 LIST_HEAD(, ftl_band_flush) band_flush_list;
192
193 /* Device specific md buffer */
194 struct ftl_global_md global_md;
195
196 /* Metadata size */
197 size_t md_size;
198 void *md_buf;
199
200 /* Transfer unit size */
201 size_t xfer_size;
202
203 /* Current user write limit */
204 int limit;
205
206 /* Inflight IO operations */
207 uint32_t num_inflight;
208
209 /* Manages data relocation */
210 struct ftl_reloc *reloc;
211
212 /* Thread on which the poller is running */
213 struct spdk_thread *core_thread;
214 /* IO channel */
215 struct spdk_io_channel *ioch;
216 /* Poller */
217 struct spdk_poller *core_poller;
218
219 /* IO channel array provides means for retrieving write buffer entries
220 * from their address stored in L2P. The address is divided into two
221 * parts - IO channel offset poining at specific IO channel (within this
222 * array) and entry offset pointing at specific entry within that IO
223 * channel.
224 */
225 struct ftl_io_channel **ioch_array;
226 TAILQ_HEAD(, ftl_io_channel) ioch_queue;
227 uint64_t num_io_channels;
228 /* Value required to shift address of a write buffer entry to retrieve
229 * the IO channel it's part of. The other part of the address describes
230 * the offset of an entry within the IO channel's entry array.
231 */
232 uint64_t ioch_shift;
233
234 /* Write buffer batches */
235 #define FTL_BATCH_COUNT 4096
236 struct ftl_batch batch_array[FTL_BATCH_COUNT];
237 /* Iovec buffer used by batches */
238 struct iovec *iov_buf;
239 /* Batch currently being filled */
240 struct ftl_batch *current_batch;
241 /* Full and ready to be sent batches. A batch is put on this queue in
242 * case it's already filled, but cannot be sent.
243 */
244 TAILQ_HEAD(, ftl_batch) pending_batches;
245 TAILQ_HEAD(, ftl_batch) free_batches;
246
247 /* Devices' list */
248 STAILQ_ENTRY(spdk_ftl_dev) stailq;
249 };
250
251 struct ftl_nv_cache_header {
252 /* Version of the header */
253 uint32_t version;
254 /* UUID of the FTL device */
255 struct spdk_uuid uuid;
256 /* Size of the non-volatile cache (in blocks) */
257 uint64_t size;
258 /* Contains the next address to be written after clean shutdown, invalid LBA otherwise */
259 uint64_t current_addr;
260 /* Current phase */
261 uint8_t phase;
262 /* Checksum of the header, needs to be last element */
263 uint32_t checksum;
264 } __attribute__((packed));
265
266 struct ftl_media_event {
267 /* Owner */
268 struct spdk_ftl_dev *dev;
269 /* Media event */
270 struct spdk_bdev_media_event event;
271 };
272
273 typedef void (*ftl_restore_fn)(struct ftl_restore *, int, void *cb_arg);
274
275 void ftl_apply_limits(struct spdk_ftl_dev *dev);
276 void ftl_io_read(struct ftl_io *io);
277 void ftl_io_write(struct ftl_io *io);
278 int ftl_flush_wbuf(struct spdk_ftl_dev *dev, spdk_ftl_fn cb_fn, void *cb_arg);
279 int ftl_current_limit(const struct spdk_ftl_dev *dev);
280 int ftl_invalidate_addr(struct spdk_ftl_dev *dev, struct ftl_addr addr);
281 int ftl_task_core(void *ctx);
282 int ftl_task_read(void *ctx);
283 void ftl_process_anm_event(struct ftl_anm_event *event);
284 size_t ftl_tail_md_num_blocks(const struct spdk_ftl_dev *dev);
285 size_t ftl_tail_md_hdr_num_blocks(void);
286 size_t ftl_vld_map_num_blocks(const struct spdk_ftl_dev *dev);
287 size_t ftl_lba_map_num_blocks(const struct spdk_ftl_dev *dev);
288 size_t ftl_head_md_num_blocks(const struct spdk_ftl_dev *dev);
289 int ftl_restore_md(struct spdk_ftl_dev *dev, ftl_restore_fn cb, void *cb_arg);
290 int ftl_restore_device(struct ftl_restore *restore, ftl_restore_fn cb, void *cb_arg);
291 void ftl_restore_nv_cache(struct ftl_restore *restore, ftl_restore_fn cb, void *cb_arg);
292 int ftl_band_set_direct_access(struct ftl_band *band, bool access);
293 bool ftl_addr_is_written(struct ftl_band *band, struct ftl_addr addr);
294 int ftl_flush_active_bands(struct spdk_ftl_dev *dev, spdk_ftl_fn cb_fn, void *cb_arg);
295 int ftl_nv_cache_write_header(struct ftl_nv_cache *nv_cache, bool shutdown,
296 spdk_bdev_io_completion_cb cb_fn, void *cb_arg);
297 int ftl_nv_cache_scrub(struct ftl_nv_cache *nv_cache, spdk_bdev_io_completion_cb cb_fn,
298 void *cb_arg);
299 void ftl_get_media_events(struct spdk_ftl_dev *dev);
300 int ftl_io_channel_poll(void *arg);
301 void ftl_evict_cache_entry(struct spdk_ftl_dev *dev, struct ftl_wbuf_entry *entry);
302 struct spdk_io_channel *ftl_get_io_channel(const struct spdk_ftl_dev *dev);
303 struct ftl_io_channel *ftl_io_channel_get_ctx(struct spdk_io_channel *ioch);
304
305
306 #define ftl_to_addr(address) \
307 (struct ftl_addr) { .offset = (uint64_t)(address) }
308
309 #define ftl_to_addr_packed(address) \
310 (struct ftl_addr) { .pack.offset = (uint32_t)(address) }
311
312 static inline struct spdk_thread *
313 ftl_get_core_thread(const struct spdk_ftl_dev *dev)
314 {
315 return dev->core_thread;
316 }
317
318 static inline size_t
319 ftl_get_num_bands(const struct spdk_ftl_dev *dev)
320 {
321 return dev->num_bands;
322 }
323
324 static inline size_t
325 ftl_get_num_punits(const struct spdk_ftl_dev *dev)
326 {
327 return spdk_bdev_get_optimal_open_zones(spdk_bdev_desc_get_bdev(dev->base_bdev_desc));
328 }
329
330 static inline size_t
331 ftl_get_num_zones(const struct spdk_ftl_dev *dev)
332 {
333 return ftl_get_num_bands(dev) * ftl_get_num_punits(dev);
334 }
335
336 static inline size_t
337 ftl_get_num_blocks_in_zone(const struct spdk_ftl_dev *dev)
338 {
339 return spdk_bdev_get_zone_size(spdk_bdev_desc_get_bdev(dev->base_bdev_desc));
340 }
341
342 static inline uint64_t
343 ftl_get_num_blocks_in_band(const struct spdk_ftl_dev *dev)
344 {
345 return ftl_get_num_punits(dev) * ftl_get_num_blocks_in_zone(dev);
346 }
347
348 static inline uint64_t
349 ftl_addr_get_zone_slba(const struct spdk_ftl_dev *dev, struct ftl_addr addr)
350 {
351 return addr.offset -= (addr.offset % ftl_get_num_blocks_in_zone(dev));
352 }
353
354 static inline uint64_t
355 ftl_addr_get_band(const struct spdk_ftl_dev *dev, struct ftl_addr addr)
356 {
357 return addr.offset / ftl_get_num_blocks_in_band(dev);
358 }
359
360 static inline uint64_t
361 ftl_addr_get_punit(const struct spdk_ftl_dev *dev, struct ftl_addr addr)
362 {
363 return (addr.offset / ftl_get_num_blocks_in_zone(dev)) % ftl_get_num_punits(dev);
364 }
365
366 static inline uint64_t
367 ftl_addr_get_zone_offset(const struct spdk_ftl_dev *dev, struct ftl_addr addr)
368 {
369 return addr.offset % ftl_get_num_blocks_in_zone(dev);
370 }
371
372 static inline size_t
373 ftl_vld_map_size(const struct spdk_ftl_dev *dev)
374 {
375 return (size_t)spdk_divide_round_up(ftl_get_num_blocks_in_band(dev), CHAR_BIT);
376 }
377
378 static inline int
379 ftl_addr_packed(const struct spdk_ftl_dev *dev)
380 {
381 return dev->addr_len < 32;
382 }
383
384 static inline void
385 ftl_l2p_lba_persist(const struct spdk_ftl_dev *dev, uint64_t lba)
386 {
387 #ifdef SPDK_CONFIG_PMDK
388 size_t ftl_addr_size = ftl_addr_packed(dev) ? 4 : 8;
389 pmem_persist((char *)dev->l2p + (lba * ftl_addr_size), ftl_addr_size);
390 #else /* SPDK_CONFIG_PMDK */
391 SPDK_ERRLOG("Libpmem not available, cannot flush l2p to pmem\n");
392 assert(0);
393 #endif /* SPDK_CONFIG_PMDK */
394 }
395
396 static inline int
397 ftl_addr_invalid(struct ftl_addr addr)
398 {
399 return addr.offset == ftl_to_addr(FTL_ADDR_INVALID).offset;
400 }
401
402 static inline int
403 ftl_addr_cached(struct ftl_addr addr)
404 {
405 return !ftl_addr_invalid(addr) && addr.cached;
406 }
407
408 static inline struct ftl_addr
409 ftl_addr_to_packed(const struct spdk_ftl_dev *dev, struct ftl_addr addr)
410 {
411 struct ftl_addr p = {};
412
413 if (ftl_addr_invalid(addr)) {
414 p = ftl_to_addr_packed(FTL_ADDR_INVALID);
415 } else if (ftl_addr_cached(addr)) {
416 p.pack.cached = 1;
417 p.pack.cache_offset = (uint32_t) addr.cache_offset;
418 } else {
419 p.pack.offset = (uint32_t) addr.offset;
420 }
421
422 return p;
423 }
424
425 static inline struct ftl_addr
426 ftl_addr_from_packed(const struct spdk_ftl_dev *dev, struct ftl_addr p)
427 {
428 struct ftl_addr addr = {};
429
430 if (p.pack.offset == (uint32_t)FTL_ADDR_INVALID) {
431 addr = ftl_to_addr(FTL_ADDR_INVALID);
432 } else if (p.pack.cached) {
433 addr.cached = 1;
434 addr.cache_offset = p.pack.cache_offset;
435 } else {
436 addr = p;
437 }
438
439 return addr;
440 }
441
442 #define _ftl_l2p_set(l2p, off, val, bits) \
443 __atomic_store_n(((uint##bits##_t *)(l2p)) + (off), val, __ATOMIC_SEQ_CST)
444
445 #define _ftl_l2p_set32(l2p, off, val) \
446 _ftl_l2p_set(l2p, off, val, 32)
447
448 #define _ftl_l2p_set64(l2p, off, val) \
449 _ftl_l2p_set(l2p, off, val, 64)
450
451 #define _ftl_l2p_get(l2p, off, bits) \
452 __atomic_load_n(((uint##bits##_t *)(l2p)) + (off), __ATOMIC_SEQ_CST)
453
454 #define _ftl_l2p_get32(l2p, off) \
455 _ftl_l2p_get(l2p, off, 32)
456
457 #define _ftl_l2p_get64(l2p, off) \
458 _ftl_l2p_get(l2p, off, 64)
459
460 #define ftl_addr_cmp(p1, p2) \
461 ((p1).offset == (p2).offset)
462
463 static inline void
464 ftl_l2p_set(struct spdk_ftl_dev *dev, uint64_t lba, struct ftl_addr addr)
465 {
466 assert(dev->num_lbas > lba);
467
468 if (ftl_addr_packed(dev)) {
469 _ftl_l2p_set32(dev->l2p, lba, ftl_addr_to_packed(dev, addr).offset);
470 } else {
471 _ftl_l2p_set64(dev->l2p, lba, addr.offset);
472 }
473
474 if (dev->l2p_pmem_len != 0) {
475 ftl_l2p_lba_persist(dev, lba);
476 }
477 }
478
479 static inline struct ftl_addr
480 ftl_l2p_get(struct spdk_ftl_dev *dev, uint64_t lba)
481 {
482 assert(dev->num_lbas > lba);
483
484 if (ftl_addr_packed(dev)) {
485 return ftl_addr_from_packed(dev, ftl_to_addr_packed(
486 _ftl_l2p_get32(dev->l2p, lba)));
487 } else {
488 return ftl_to_addr(_ftl_l2p_get64(dev->l2p, lba));
489 }
490 }
491
492 static inline bool
493 ftl_dev_has_nv_cache(const struct spdk_ftl_dev *dev)
494 {
495 return dev->nv_cache.bdev_desc != NULL;
496 }
497
498 #define FTL_NV_CACHE_HEADER_VERSION (1)
499 #define FTL_NV_CACHE_DATA_OFFSET (1)
500 #define FTL_NV_CACHE_PHASE_OFFSET (62)
501 #define FTL_NV_CACHE_PHASE_COUNT (4)
502 #define FTL_NV_CACHE_PHASE_MASK (3ULL << FTL_NV_CACHE_PHASE_OFFSET)
503 #define FTL_NV_CACHE_LBA_INVALID (FTL_LBA_INVALID & ~FTL_NV_CACHE_PHASE_MASK)
504
505 static inline bool
506 ftl_nv_cache_phase_is_valid(unsigned int phase)
507 {
508 return phase > 0 && phase <= 3;
509 }
510
511 static inline unsigned int
512 ftl_nv_cache_next_phase(unsigned int current)
513 {
514 static const unsigned int phases[] = { 0, 2, 3, 1 };
515 assert(ftl_nv_cache_phase_is_valid(current));
516 return phases[current];
517 }
518
519 static inline unsigned int
520 ftl_nv_cache_prev_phase(unsigned int current)
521 {
522 static const unsigned int phases[] = { 0, 3, 1, 2 };
523 assert(ftl_nv_cache_phase_is_valid(current));
524 return phases[current];
525 }
526
527 static inline uint64_t
528 ftl_nv_cache_pack_lba(uint64_t lba, unsigned int phase)
529 {
530 assert(ftl_nv_cache_phase_is_valid(phase));
531 return (lba & ~FTL_NV_CACHE_PHASE_MASK) | ((uint64_t)phase << FTL_NV_CACHE_PHASE_OFFSET);
532 }
533
534 static inline void
535 ftl_nv_cache_unpack_lba(uint64_t in_lba, uint64_t *out_lba, unsigned int *phase)
536 {
537 *out_lba = in_lba & ~FTL_NV_CACHE_PHASE_MASK;
538 *phase = (in_lba & FTL_NV_CACHE_PHASE_MASK) >> FTL_NV_CACHE_PHASE_OFFSET;
539
540 /* If the phase is invalid the block wasn't written yet, so treat the LBA as invalid too */
541 if (!ftl_nv_cache_phase_is_valid(*phase) || *out_lba == FTL_NV_CACHE_LBA_INVALID) {
542 *out_lba = FTL_LBA_INVALID;
543 }
544 }
545
546 static inline bool
547 ftl_is_append_supported(const struct spdk_ftl_dev *dev)
548 {
549 return dev->conf.use_append;
550 }
551
552 #endif /* FTL_CORE_H */