]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/include/spdk/bdev_module.h
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / spdk / include / spdk / bdev_module.h
1 /*-
2 * BSD LICENSE
3 *
4 * Copyright (c) Intel Corporation.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 /** \file
35 * Block Device Module Interface
36 *
37 * For information on how to write a bdev module, see @ref bdev_module.
38 */
39
40 #ifndef SPDK_BDEV_MODULE_H
41 #define SPDK_BDEV_MODULE_H
42
43 #include "spdk/stdinc.h"
44
45 #include "spdk/bdev.h"
46 #include "spdk/bdev_zone.h"
47 #include "spdk/queue.h"
48 #include "spdk/scsi_spec.h"
49 #include "spdk/thread.h"
50 #include "spdk/util.h"
51 #include "spdk/uuid.h"
52
53 /** Block device module */
54 struct spdk_bdev_module {
55 /**
56 * Initialization function for the module. Called by the spdk
57 * application during startup.
58 *
59 * Modules are required to define this function.
60 */
61 int (*module_init)(void);
62
63 /**
64 * Optional callback for modules that require notification of when
65 * the bdev subsystem has completed initialization.
66 *
67 * Modules are not required to define this function.
68 */
69 void (*init_complete)(void);
70
71 /**
72 * Optional callback for modules that require notification of when
73 * the bdev subsystem is starting the fini process.
74 *
75 * Modules are not required to define this function.
76 */
77 void (*fini_start)(void);
78
79 /**
80 * Finish function for the module. Called by the spdk application
81 * after all bdevs for all modules have been unregistered. This allows
82 * the module to do any final cleanup before the SPDK application exits.
83 *
84 * Modules are not required to define this function.
85 */
86 void (*module_fini)(void);
87
88 /**
89 * Function called to return a text string representing the
90 * module's configuration options for inclusion in a configuration file.
91 */
92 void (*config_text)(FILE *fp);
93
94 /**
95 * Function called to return a text string representing the module-level
96 * JSON RPCs required to regenerate the current configuration. This will
97 * include module-level configuration options, or methods to construct
98 * bdevs when one RPC may generate multiple bdevs (for example, an NVMe
99 * controller with multiple namespaces).
100 *
101 * Per-bdev JSON RPCs (where one "construct" RPC always creates one bdev)
102 * may be implemented here, or by the bdev's write_config_json function -
103 * but not both. Bdev module implementers may choose which mechanism to
104 * use based on the module's design.
105 *
106 * \return 0 on success or Bdev specific negative error code.
107 */
108 int (*config_json)(struct spdk_json_write_ctx *w);
109
110 /** Name for the modules being defined. */
111 const char *name;
112
113 /**
114 * Returns the allocation size required for the backend for uses such as local
115 * command structs, local SGL, iovecs, or other user context.
116 */
117 int (*get_ctx_size)(void);
118
119 /**
120 * First notification that a bdev should be examined by a virtual bdev module.
121 * Virtual bdev modules may use this to examine newly-added bdevs and automatically
122 * create their own vbdevs, but no I/O to device can be send to bdev at this point.
123 * Only vbdevs based on config files can be created here. This callback must make
124 * its decision to claim the module synchronously.
125 * It must also call spdk_bdev_module_examine_done() before returning. If the module
126 * needs to perform asynchronous operations such as I/O after claiming the bdev,
127 * it may define an examine_disk callback. The examine_disk callback will then
128 * be called immediately after the examine_config callback returns.
129 */
130 void (*examine_config)(struct spdk_bdev *bdev);
131
132 /**
133 * Second notification that a bdev should be examined by a virtual bdev module.
134 * Virtual bdev modules may use this to examine newly-added bdevs and automatically
135 * create their own vbdevs. This callback may use I/O operations end finish asynchronously.
136 */
137 void (*examine_disk)(struct spdk_bdev *bdev);
138
139 /**
140 * Denotes if the module_init function may complete asynchronously. If set to true,
141 * the module initialization has to be explicitly completed by calling
142 * spdk_bdev_module_init_done().
143 */
144 bool async_init;
145
146 /**
147 * Denotes if the module_fini function may complete asynchronously.
148 * If set to true finishing has to be explicitly completed by calling
149 * spdk_bdev_module_fini_done().
150 */
151 bool async_fini;
152
153 /**
154 * Fields that are used by the internal bdev subsystem. Bdev modules
155 * must not read or write to these fields.
156 */
157 struct __bdev_module_internal_fields {
158 /**
159 * Count of bdev inits/examinations in progress. Used by generic bdev
160 * layer and must not be modified by bdev modules.
161 *
162 * \note Used internally by bdev subsystem, don't change this value in bdev module.
163 */
164 uint32_t action_in_progress;
165
166 TAILQ_ENTRY(spdk_bdev_module) tailq;
167 } internal;
168 };
169
170 typedef void (*spdk_bdev_unregister_cb)(void *cb_arg, int rc);
171
172 /**
173 * Function table for a block device backend.
174 *
175 * The backend block device function table provides a set of APIs to allow
176 * communication with a backend. The main commands are read/write API
177 * calls for I/O via submit_request.
178 */
179 struct spdk_bdev_fn_table {
180 /** Destroy the backend block device object */
181 int (*destruct)(void *ctx);
182
183 /** Process the IO. */
184 void (*submit_request)(struct spdk_io_channel *ch, struct spdk_bdev_io *);
185
186 /** Check if the block device supports a specific I/O type. */
187 bool (*io_type_supported)(void *ctx, enum spdk_bdev_io_type);
188
189 /** Get an I/O channel for the specific bdev for the calling thread. */
190 struct spdk_io_channel *(*get_io_channel)(void *ctx);
191
192 /**
193 * Output driver-specific information to a JSON stream. Optional - may be NULL.
194 *
195 * The JSON write context will be initialized with an open object, so the bdev
196 * driver should write a name (based on the driver name) followed by a JSON value
197 * (most likely another nested object).
198 */
199 int (*dump_info_json)(void *ctx, struct spdk_json_write_ctx *w);
200
201 /**
202 * Output bdev-specific RPC configuration to a JSON stream. Optional - may be NULL.
203 *
204 * This function should only be implemented for bdevs which can be configured
205 * independently of other bdevs. For example, RPCs to create a bdev for an NVMe
206 * namespace may not be generated by this function, since enumerating an NVMe
207 * namespace requires attaching to an NVMe controller, and that controller may
208 * contain multiple namespaces. The spdk_bdev_module's config_json function should
209 * be used instead for these cases.
210 *
211 * The JSON write context will be initialized with an open object, so the bdev
212 * driver should write all data necessary to recreate this bdev by invoking
213 * constructor method. No other data should be written.
214 */
215 void (*write_config_json)(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w);
216
217 /** Get spin-time per I/O channel in microseconds.
218 * Optional - may be NULL.
219 */
220 uint64_t (*get_spin_time)(struct spdk_io_channel *ch);
221 };
222
223 /** bdev I/O completion status */
224 enum spdk_bdev_io_status {
225 SPDK_BDEV_IO_STATUS_ABORTED = -7,
226 SPDK_BDEV_IO_STATUS_FIRST_FUSED_FAILED = -6,
227 SPDK_BDEV_IO_STATUS_MISCOMPARE = -5,
228 /*
229 * NOMEM should be returned when a bdev module cannot start an I/O because of
230 * some lack of resources. It may not be returned for RESET I/O. I/O completed
231 * with NOMEM status will be retried after some I/O from the same channel have
232 * completed.
233 */
234 SPDK_BDEV_IO_STATUS_NOMEM = -4,
235 SPDK_BDEV_IO_STATUS_SCSI_ERROR = -3,
236 SPDK_BDEV_IO_STATUS_NVME_ERROR = -2,
237 SPDK_BDEV_IO_STATUS_FAILED = -1,
238 SPDK_BDEV_IO_STATUS_PENDING = 0,
239 SPDK_BDEV_IO_STATUS_SUCCESS = 1,
240 };
241
242 struct spdk_bdev_alias {
243 char *alias;
244 TAILQ_ENTRY(spdk_bdev_alias) tailq;
245 };
246
247 typedef TAILQ_HEAD(, spdk_bdev_io) bdev_io_tailq_t;
248 typedef STAILQ_HEAD(, spdk_bdev_io) bdev_io_stailq_t;
249 typedef TAILQ_HEAD(, lba_range) lba_range_tailq_t;
250
251 struct spdk_bdev {
252 /** User context passed in by the backend */
253 void *ctxt;
254
255 /** Unique name for this block device. */
256 char *name;
257
258 /** Unique aliases for this block device. */
259 TAILQ_HEAD(spdk_bdev_aliases_list, spdk_bdev_alias) aliases;
260
261 /** Unique product name for this kind of block device. */
262 char *product_name;
263
264 /** write cache enabled, not used at the moment */
265 int write_cache;
266
267 /** Size in bytes of a logical block for the backend */
268 uint32_t blocklen;
269
270 /** Number of blocks */
271 uint64_t blockcnt;
272
273 /** Number of blocks required for write */
274 uint32_t write_unit_size;
275
276 /** Atomic compare & write unit */
277 uint16_t acwu;
278
279 /**
280 * Specifies an alignment requirement for data buffers associated with an spdk_bdev_io.
281 * 0 = no alignment requirement
282 * >0 = alignment requirement is 2 ^ required_alignment.
283 * bdev layer will automatically double buffer any spdk_bdev_io that violates this
284 * alignment, before the spdk_bdev_io is submitted to the bdev module.
285 */
286 uint8_t required_alignment;
287
288 /**
289 * Specifies whether the optimal_io_boundary is mandatory or
290 * only advisory. If set to true, the bdev layer will split
291 * READ and WRITE I/O that span the optimal_io_boundary before
292 * submitting them to the bdev module.
293 *
294 * Note that this field cannot be used to force splitting of
295 * UNMAP, WRITE_ZEROES or FLUSH I/O.
296 */
297 bool split_on_optimal_io_boundary;
298
299 /**
300 * Optimal I/O boundary in blocks, or 0 for no value reported.
301 */
302 uint32_t optimal_io_boundary;
303
304 /**
305 * UUID for this bdev.
306 *
307 * Fill with zeroes if no uuid is available. The bdev layer
308 * will automatically populate this if necessary.
309 */
310 struct spdk_uuid uuid;
311
312 /** Size in bytes of a metadata for the backend */
313 uint32_t md_len;
314
315 /**
316 * Specify metadata location and set to true if metadata is interleaved
317 * with block data or false if metadata is separated with block data.
318 *
319 * Note that this field is valid only if there is metadata.
320 */
321 bool md_interleave;
322
323 /**
324 * DIF type for this bdev.
325 *
326 * Note that this field is valid only if there is metadata.
327 */
328 enum spdk_dif_type dif_type;
329
330 /*
331 * DIF location.
332 *
333 * Set to true if DIF is set in the first 8 bytes of metadata or false
334 * if DIF is set in the last 8 bytes of metadata.
335 *
336 * Note that this field is valid only if DIF is enabled.
337 */
338 bool dif_is_head_of_md;
339
340 /**
341 * Specify whether each DIF check type is enabled.
342 */
343 uint32_t dif_check_flags;
344
345 /**
346 * Specify whether bdev is zoned device.
347 */
348 bool zoned;
349
350 /**
351 * Default size of each zone (in blocks).
352 */
353 uint64_t zone_size;
354
355 /**
356 * Maximum number of open zones.
357 */
358 uint32_t max_open_zones;
359
360 /**
361 * Optimal number of open zones.
362 */
363 uint32_t optimal_open_zones;
364
365 /**
366 * Specifies whether bdev supports media management events.
367 */
368 bool media_events;
369
370 /**
371 * Pointer to the bdev module that registered this bdev.
372 */
373 struct spdk_bdev_module *module;
374
375 /** function table for all LUN ops */
376 const struct spdk_bdev_fn_table *fn_table;
377
378 /** Fields that are used internally by the bdev subsystem. Bdev modules
379 * must not read or write to these fields.
380 */
381 struct __bdev_internal_fields {
382 /** Quality of service parameters */
383 struct spdk_bdev_qos *qos;
384
385 /** True if the state of the QoS is being modified */
386 bool qos_mod_in_progress;
387
388 /** Mutex protecting claimed */
389 pthread_mutex_t mutex;
390
391 /** The bdev status */
392 enum spdk_bdev_status status;
393
394 /**
395 * Pointer to the module that has claimed this bdev for purposes of creating virtual
396 * bdevs on top of it. Set to NULL if the bdev has not been claimed.
397 */
398 struct spdk_bdev_module *claim_module;
399
400 /** Callback function that will be called after bdev destruct is completed. */
401 spdk_bdev_unregister_cb unregister_cb;
402
403 /** Unregister call context */
404 void *unregister_ctx;
405
406 /** List of open descriptors for this block device. */
407 TAILQ_HEAD(, spdk_bdev_desc) open_descs;
408
409 TAILQ_ENTRY(spdk_bdev) link;
410
411 /** points to a reset bdev_io if one is in progress. */
412 struct spdk_bdev_io *reset_in_progress;
413
414 /** poller for tracking the queue_depth of a device, NULL if not tracking */
415 struct spdk_poller *qd_poller;
416
417 /** period at which we poll for queue depth information */
418 uint64_t period;
419
420 /** used to aggregate queue depth while iterating across the bdev's open channels */
421 uint64_t temporary_queue_depth;
422
423 /** queue depth as calculated the last time the telemetry poller checked. */
424 uint64_t measured_queue_depth;
425
426 /** most recent value of ticks spent performing I/O. Used to calculate the weighted time doing I/O */
427 uint64_t io_time;
428
429 /** weighted time performing I/O. Equal to measured_queue_depth * period */
430 uint64_t weighted_io_time;
431
432 /** accumulated I/O statistics for previously deleted channels of this bdev */
433 struct spdk_bdev_io_stat stat;
434
435 /** histogram enabled on this bdev */
436 bool histogram_enabled;
437 bool histogram_in_progress;
438
439 /** Currently locked ranges for this bdev. Used to populate new channels. */
440 lba_range_tailq_t locked_ranges;
441
442 /** Pending locked ranges for this bdev. These ranges are not currently
443 * locked due to overlapping with another locked range.
444 */
445 lba_range_tailq_t pending_locked_ranges;
446 } internal;
447 };
448
449 /**
450 * Callback when buffer is allocated for the bdev I/O.
451 *
452 * \param ch The I/O channel the bdev I/O was handled on.
453 * \param bdev_io The bdev I/O
454 * \param success True if buffer is allocated successfully or the bdev I/O has an SGL
455 * assigned already, or false if it failed. The possible reason of failure is the size
456 * of the buffer to allocate is greater than the permitted maximum.
457 */
458 typedef void (*spdk_bdev_io_get_buf_cb)(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
459 bool success);
460
461 /**
462 * Callback when an auxiliary buffer is allocated for the bdev I/O.
463 *
464 * \param ch The I/O channel the bdev I/O was handled on.
465 * \param bdev_io The bdev I/O
466 * \param aux_buf Pointer to the allocated buffer. NULL if there was a failuer such as
467 * the size of the buffer to allocate is greater than the permitted maximum.
468 */
469 typedef void (*spdk_bdev_io_get_aux_buf_cb)(struct spdk_io_channel *ch,
470 struct spdk_bdev_io *bdev_io, void *aux_buf);
471
472 #define BDEV_IO_NUM_CHILD_IOV 32
473
474 struct spdk_bdev_io {
475 /** The block device that this I/O belongs to. */
476 struct spdk_bdev *bdev;
477
478 /** Enumerated value representing the I/O type. */
479 uint8_t type;
480
481 /** Number of IO submission retries */
482 uint16_t num_retries;
483
484 /** A single iovec element for use by this bdev_io. */
485 struct iovec iov;
486
487 /** Array of iovecs used for I/O splitting. */
488 struct iovec child_iov[BDEV_IO_NUM_CHILD_IOV];
489
490 union {
491 struct {
492 /** For SG buffer cases, array of iovecs to transfer. */
493 struct iovec *iovs;
494
495 /** For SG buffer cases, number of iovecs in iovec array. */
496 int iovcnt;
497
498 /** For fused operations such as COMPARE_AND_WRITE, array of iovecs
499 * for the second operation.
500 */
501 struct iovec *fused_iovs;
502
503 /** Number of iovecs in fused_iovs. */
504 int fused_iovcnt;
505
506 /* Metadata buffer */
507 void *md_buf;
508
509 /** Total size of data to be transferred. */
510 uint64_t num_blocks;
511
512 /** Starting offset (in blocks) of the bdev for this I/O. */
513 uint64_t offset_blocks;
514
515 /** stored user callback in case we split the I/O and use a temporary callback */
516 spdk_bdev_io_completion_cb stored_user_cb;
517
518 /** number of blocks remaining in a split i/o */
519 uint64_t split_remaining_num_blocks;
520
521 /** current offset of the split I/O in the bdev */
522 uint64_t split_current_offset_blocks;
523
524 /** count of outstanding batched split I/Os */
525 uint32_t split_outstanding;
526
527 struct {
528 /** Whether the buffer should be populated with the real data */
529 uint8_t populate : 1;
530
531 /** Whether the buffer should be committed back to disk */
532 uint8_t commit : 1;
533
534 /** True if this request is in the 'start' phase of zcopy. False if in 'end'. */
535 uint8_t start : 1;
536 } zcopy;
537
538 struct {
539 /** The callback argument for the outstanding request which this abort
540 * attempts to cancel.
541 */
542 void *bio_cb_arg;
543 } abort;
544 } bdev;
545 struct {
546 /** Channel reference held while messages for this reset are in progress. */
547 struct spdk_io_channel *ch_ref;
548 } reset;
549 struct {
550 /** The outstanding request matching bio_cb_arg which this abort attempts to cancel. */
551 struct spdk_bdev_io *bio_to_abort;
552 } abort;
553 struct {
554 /* The NVMe command to execute */
555 struct spdk_nvme_cmd cmd;
556
557 /* The data buffer to transfer */
558 void *buf;
559
560 /* The number of bytes to transfer */
561 size_t nbytes;
562
563 /* The meta data buffer to transfer */
564 void *md_buf;
565
566 /* meta data buffer size to transfer */
567 size_t md_len;
568 } nvme_passthru;
569 struct {
570 /* First logical block of a zone */
571 uint64_t zone_id;
572
573 /* Number of zones */
574 uint32_t num_zones;
575
576 /* Used to change zoned device zone state */
577 enum spdk_bdev_zone_action zone_action;
578
579 /* The data buffer */
580 void *buf;
581 } zone_mgmt;
582 } u;
583
584 /** It may be used by modules to put the bdev_io into its own list. */
585 TAILQ_ENTRY(spdk_bdev_io) module_link;
586
587 /**
588 * Fields that are used internally by the bdev subsystem. Bdev modules
589 * must not read or write to these fields.
590 */
591 struct __bdev_io_internal_fields {
592 /** The bdev I/O channel that this was handled on. */
593 struct spdk_bdev_channel *ch;
594
595 /** The bdev I/O channel that this was submitted on. */
596 struct spdk_bdev_channel *io_submit_ch;
597
598 /** The bdev descriptor that was used when submitting this I/O. */
599 struct spdk_bdev_desc *desc;
600
601 /** User function that will be called when this completes */
602 spdk_bdev_io_completion_cb cb;
603
604 /** Context that will be passed to the completion callback */
605 void *caller_ctx;
606
607 /** Current tsc at submit time. Used to calculate latency at completion. */
608 uint64_t submit_tsc;
609
610 /** Error information from a device */
611 union {
612 struct {
613 /** NVMe completion queue entry DW0 */
614 uint32_t cdw0;
615 /** NVMe status code type */
616 uint8_t sct;
617 /** NVMe status code */
618 uint8_t sc;
619 } nvme;
620 /** Only valid when status is SPDK_BDEV_IO_STATUS_SCSI_ERROR */
621 struct {
622 /** SCSI status code */
623 uint8_t sc;
624 /** SCSI sense key */
625 uint8_t sk;
626 /** SCSI additional sense code */
627 uint8_t asc;
628 /** SCSI additional sense code qualifier */
629 uint8_t ascq;
630 } scsi;
631 } error;
632
633 /**
634 * Set to true while the bdev module submit_request function is in progress.
635 *
636 * This is used to decide whether spdk_bdev_io_complete() can complete the I/O directly
637 * or if completion must be deferred via an event.
638 */
639 bool in_submit_request;
640
641 /** Status for the IO */
642 int8_t status;
643
644 /** bdev allocated memory associated with this request */
645 void *buf;
646
647 /** requested size of the buffer associated with this I/O */
648 uint64_t buf_len;
649
650 /** if the request is double buffered, store original request iovs here */
651 struct iovec bounce_iov;
652 struct iovec *orig_iovs;
653 int orig_iovcnt;
654 void *orig_md_buf;
655
656 /** Callback for when the aux buf is allocated */
657 spdk_bdev_io_get_aux_buf_cb get_aux_buf_cb;
658
659 /** Callback for when buf is allocated */
660 spdk_bdev_io_get_buf_cb get_buf_cb;
661
662 /** Member used for linking child I/Os together. */
663 TAILQ_ENTRY(spdk_bdev_io) link;
664
665 /** Entry to the list need_buf of struct spdk_bdev. */
666 STAILQ_ENTRY(spdk_bdev_io) buf_link;
667
668 /** Entry to the list io_submitted of struct spdk_bdev_channel */
669 TAILQ_ENTRY(spdk_bdev_io) ch_link;
670
671 /** Enables queuing parent I/O when no bdev_ios available for split children. */
672 struct spdk_bdev_io_wait_entry waitq_entry;
673 } internal;
674
675 /**
676 * Per I/O context for use by the bdev module.
677 */
678 uint8_t driver_ctx[0];
679
680 /* No members may be added after driver_ctx! */
681 };
682
683 /**
684 * Register a new bdev.
685 *
686 * \param bdev Block device to register.
687 *
688 * \return 0 on success.
689 * \return -EINVAL if the bdev name is NULL.
690 * \return -EEXIST if a bdev or bdev alias with the same name already exists.
691 */
692 int spdk_bdev_register(struct spdk_bdev *bdev);
693
694 /**
695 * Start unregistering a bdev. This will notify each currently open descriptor
696 * on this bdev about the hotremoval in hopes that the upper layers will stop
697 * using this bdev and manually close all the descriptors with spdk_bdev_close().
698 * The actual bdev unregistration may be deferred until all descriptors are closed.
699 *
700 * \param bdev Block device to unregister.
701 * \param cb_fn Callback function to be called when the unregister is complete.
702 * \param cb_arg Argument to be supplied to cb_fn
703 */
704 void spdk_bdev_unregister(struct spdk_bdev *bdev, spdk_bdev_unregister_cb cb_fn, void *cb_arg);
705
706 /**
707 * Invokes the unregister callback of a bdev backing a virtual bdev.
708 *
709 * A Bdev with an asynchronous destruct path should return 1 from its
710 * destruct function and call this function at the conclusion of that path.
711 * Bdevs with synchronous destruct paths should return 0 from their destruct
712 * path.
713 *
714 * \param bdev Block device that was destroyed.
715 * \param bdeverrno Error code returned from bdev's destruct callback.
716 */
717 void spdk_bdev_destruct_done(struct spdk_bdev *bdev, int bdeverrno);
718
719 /**
720 * Register a virtual bdev.
721 *
722 * This function is deprecated. Users should call spdk_bdev_register instead.
723 * The bdev layer currently makes no use of the base_bdevs array, so switching
724 * to spdk_bdev_register results in no loss of functionality.
725 *
726 * \param vbdev Virtual bdev to register.
727 * \param base_bdevs Array of bdevs upon which this vbdev is based.
728 * \param base_bdev_count Number of bdevs in base_bdevs.
729 *
730 * \return 0 on success
731 * \return -EINVAL if the bdev name is NULL.
732 * \return -EEXIST if the bdev already exists.
733 * \return -ENOMEM if allocation of the base_bdevs array or the base bdevs vbdevs array fails.
734 */
735 int spdk_vbdev_register(struct spdk_bdev *vbdev, struct spdk_bdev **base_bdevs,
736 int base_bdev_count);
737
738 /**
739 * Indicate to the bdev layer that the module is done examining a bdev.
740 *
741 * To be called synchronously or asynchronously in response to the
742 * module's examine function being called.
743 *
744 * \param module Pointer to the module completing the examination.
745 */
746 void spdk_bdev_module_examine_done(struct spdk_bdev_module *module);
747
748 /**
749 * Indicate to the bdev layer that the module is done initializing.
750 *
751 * To be called synchronously or asynchronously in response to the
752 * module_init function being called.
753 *
754 * \param module Pointer to the module completing the initialization.
755 */
756 void spdk_bdev_module_init_done(struct spdk_bdev_module *module);
757
758 /**
759 * Indicate to the bdev layer that the module is done cleaning up.
760 *
761 * To be called either synchronously or asynchronously
762 * in response to the module_fini function being called.
763 *
764 */
765 void spdk_bdev_module_finish_done(void);
766
767 /**
768 * Called by a bdev module to lay exclusive write claim to a bdev.
769 *
770 * Also upgrades that bdev's descriptor to have write access.
771 *
772 * \param bdev Block device to be claimed.
773 * \param desc Descriptor for the above block device.
774 * \param module Bdev module attempting to claim bdev.
775 *
776 * \return 0 on success
777 * \return -EPERM if the bdev is already claimed by another module.
778 */
779 int spdk_bdev_module_claim_bdev(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
780 struct spdk_bdev_module *module);
781
782 /**
783 * Called to release a write claim on a block device.
784 *
785 * \param bdev Block device to be released.
786 */
787 void spdk_bdev_module_release_bdev(struct spdk_bdev *bdev);
788
789 /**
790 * Add alias to block device names list.
791 * Aliases can be add only to registered bdev.
792 *
793 * \param bdev Block device to query.
794 * \param alias Alias to be added to list.
795 *
796 * \return 0 on success
797 * \return -EEXIST if alias already exists as name or alias on any bdev
798 * \return -ENOMEM if memory cannot be allocated to store alias
799 * \return -EINVAL if passed alias is empty
800 */
801 int spdk_bdev_alias_add(struct spdk_bdev *bdev, const char *alias);
802
803 /**
804 * Removes name from block device names list.
805 *
806 * \param bdev Block device to query.
807 * \param alias Alias to be deleted from list.
808 * \return 0 on success
809 * \return -ENOENT if alias does not exists
810 */
811 int spdk_bdev_alias_del(struct spdk_bdev *bdev, const char *alias);
812
813 /**
814 * Removes all alias from block device alias list.
815 *
816 * \param bdev Block device to operate.
817 */
818 void spdk_bdev_alias_del_all(struct spdk_bdev *bdev);
819
820 /**
821 * Get pointer to block device aliases list.
822 *
823 * \param bdev Block device to query.
824 * \return Pointer to bdev aliases list.
825 */
826 const struct spdk_bdev_aliases_list *spdk_bdev_get_aliases(const struct spdk_bdev *bdev);
827
828 /**
829 * Allocate a buffer for given bdev_io. Allocation will happen
830 * only if the bdev_io has no assigned SGL yet or SGL is not
831 * aligned to \c bdev->required_alignment. If SGL is not aligned,
832 * this call will cause copy from SGL to bounce buffer on write
833 * path or copy from bounce buffer to SGL before completion
834 * callback on read path. The buffer will be freed automatically
835 * on \c spdk_bdev_free_io() call. This call will never fail.
836 * In case of lack of memory given callback \c cb will be deferred
837 * until enough memory is freed.
838 *
839 * \param bdev_io I/O to allocate buffer for.
840 * \param cb callback to be called when the buffer is allocated
841 * or the bdev_io has an SGL assigned already.
842 * \param len size of the buffer to allocate. In case the bdev_io
843 * doesn't have an SGL assigned this field must be no bigger than
844 * \c SPDK_BDEV_LARGE_BUF_MAX_SIZE.
845 */
846 void spdk_bdev_io_get_buf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_buf_cb cb, uint64_t len);
847
848 /**
849 * Allocate an auxillary buffer for given bdev_io. The length of the
850 * buffer will be the same size as the bdev_io primary buffer. The buffer
851 * must be freed using \c spdk_bdev_io_put_aux_buf() before completing
852 * the associated bdev_io. This call will never fail. In case of lack of
853 * memory given callback \c cb will be deferred until enough memory is freed.
854 *
855 * \param bdev_io I/O to allocate buffer for.
856 * \param cb callback to be called when the buffer is allocated
857 */
858 void spdk_bdev_io_get_aux_buf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_aux_buf_cb cb);
859
860 /**
861 * Free an auxiliary buffer previously allocated by \c spdk_bdev_io_get_aux_buf().
862 *
863 * \param bdev_io bdev_io specified when the aux_buf was allocated.
864 * \param aux_buf auxiliary buffer to free
865 */
866 void spdk_bdev_io_put_aux_buf(struct spdk_bdev_io *bdev_io, void *aux_buf);
867
868 /**
869 * Set the given buffer as the data buffer described by this bdev_io.
870 *
871 * The portion of the buffer used may be adjusted for memory alignement
872 * purposes.
873 *
874 * \param bdev_io I/O to set the buffer on.
875 * \param buf The buffer to set as the active data buffer.
876 * \param len The length of the buffer.
877 *
878 */
879 void spdk_bdev_io_set_buf(struct spdk_bdev_io *bdev_io, void *buf, size_t len);
880
881 /**
882 * Set the given buffer as metadata buffer described by this bdev_io.
883 *
884 * \param bdev_io I/O to set the buffer on.
885 * \param md_buf The buffer to set as the active metadata buffer.
886 * \param len The length of the metadata buffer.
887 */
888 void spdk_bdev_io_set_md_buf(struct spdk_bdev_io *bdev_io, void *md_buf, size_t len);
889
890 /**
891 * Complete a bdev_io
892 *
893 * \param bdev_io I/O to complete.
894 * \param status The I/O completion status.
895 */
896 void spdk_bdev_io_complete(struct spdk_bdev_io *bdev_io,
897 enum spdk_bdev_io_status status);
898
899 /**
900 * Complete a bdev_io with an NVMe status code and DW0 completion queue entry
901 *
902 * \param bdev_io I/O to complete.
903 * \param cdw0 NVMe Completion Queue DW0 value (set to 0 if not applicable)
904 * \param sct NVMe Status Code Type.
905 * \param sc NVMe Status Code.
906 */
907 void spdk_bdev_io_complete_nvme_status(struct spdk_bdev_io *bdev_io, uint32_t cdw0, int sct,
908 int sc);
909
910 /**
911 * Complete a bdev_io with a SCSI status code.
912 *
913 * \param bdev_io I/O to complete.
914 * \param sc SCSI Status Code.
915 * \param sk SCSI Sense Key.
916 * \param asc SCSI Additional Sense Code.
917 * \param ascq SCSI Additional Sense Code Qualifier.
918 */
919 void spdk_bdev_io_complete_scsi_status(struct spdk_bdev_io *bdev_io, enum spdk_scsi_status sc,
920 enum spdk_scsi_sense sk, uint8_t asc, uint8_t ascq);
921
922 /**
923 * Get a thread that given bdev_io was submitted on.
924 *
925 * \param bdev_io I/O
926 * \return thread that submitted the I/O
927 */
928 struct spdk_thread *spdk_bdev_io_get_thread(struct spdk_bdev_io *bdev_io);
929
930 /**
931 * Get the bdev module's I/O channel that the given bdev_io was submitted on.
932 *
933 * \param bdev_io I/O
934 * \return the bdev module's I/O channel that the given bdev_io was submitted on.
935 */
936 struct spdk_io_channel *spdk_bdev_io_get_io_channel(struct spdk_bdev_io *bdev_io);
937
938 /**
939 * Resize for a bdev.
940 *
941 * Change number of blocks for provided block device.
942 * It can only be called on a registered bdev.
943 *
944 * \param bdev Block device to change.
945 * \param size New size of bdev.
946 * \return 0 on success, negated errno on failure.
947 */
948 int spdk_bdev_notify_blockcnt_change(struct spdk_bdev *bdev, uint64_t size);
949
950 /**
951 * Translates NVMe status codes to SCSI status information.
952 *
953 * The codes are stored in the user supplied integers.
954 *
955 * \param bdev_io I/O containing status codes to translate.
956 * \param sc SCSI Status Code will be stored here.
957 * \param sk SCSI Sense Key will be stored here.
958 * \param asc SCSI Additional Sense Code will be stored here.
959 * \param ascq SCSI Additional Sense Code Qualifier will be stored here.
960 */
961 void spdk_scsi_nvme_translate(const struct spdk_bdev_io *bdev_io,
962 int *sc, int *sk, int *asc, int *ascq);
963
964 /**
965 * Add the given module to the list of registered modules.
966 * This function should be invoked by referencing the macro
967 * SPDK_BDEV_MODULE_REGISTER in the module c file.
968 *
969 * \param bdev_module Module to be added.
970 */
971 void spdk_bdev_module_list_add(struct spdk_bdev_module *bdev_module);
972
973 /**
974 * Find registered module with name pointed by \c name.
975 *
976 * \param name name of module to be searched for.
977 * \return pointer to module or NULL if no module with \c name exist
978 */
979 struct spdk_bdev_module *spdk_bdev_module_list_find(const char *name);
980
981 static inline struct spdk_bdev_io *
982 spdk_bdev_io_from_ctx(void *ctx)
983 {
984 return SPDK_CONTAINEROF(ctx, struct spdk_bdev_io, driver_ctx);
985 }
986
987 struct spdk_bdev_part_base;
988
989 /**
990 * Returns a pointer to the spdk_bdev associated with an spdk_bdev_part_base
991 *
992 * \param part_base A pointer to an spdk_bdev_part_base object.
993 *
994 * \return A pointer to the base's spdk_bdev struct.
995 */
996 struct spdk_bdev *spdk_bdev_part_base_get_bdev(struct spdk_bdev_part_base *part_base);
997
998 /**
999 * Returns a spdk_bdev name of the corresponding spdk_bdev_part_base
1000 *
1001 * \param part_base A pointer to an spdk_bdev_part_base object.
1002 *
1003 * \return A text string representing the name of the base bdev.
1004 */
1005 const char *spdk_bdev_part_base_get_bdev_name(struct spdk_bdev_part_base *part_base);
1006
1007 /**
1008 * Returns a pointer to the spdk_bdev_descriptor associated with an spdk_bdev_part_base
1009 *
1010 * \param part_base A pointer to an spdk_bdev_part_base object.
1011 *
1012 * \return A pointer to the base's spdk_bdev_desc struct.
1013 */
1014 struct spdk_bdev_desc *spdk_bdev_part_base_get_desc(struct spdk_bdev_part_base *part_base);
1015
1016 /**
1017 * Returns a pointer to the tailq associated with an spdk_bdev_part_base
1018 *
1019 * \param part_base A pointer to an spdk_bdev_part_base object.
1020 *
1021 * \return The head of a tailq of spdk_bdev_part structs registered to the base's module.
1022 */
1023 struct bdev_part_tailq *spdk_bdev_part_base_get_tailq(struct spdk_bdev_part_base *part_base);
1024
1025 /**
1026 * Returns a pointer to the module level context associated with an spdk_bdev_part_base
1027 *
1028 * \param part_base A pointer to an spdk_bdev_part_base object.
1029 *
1030 * \return A pointer to the module level context registered with the base in spdk_bdev_part_base_construct.
1031 */
1032 void *spdk_bdev_part_base_get_ctx(struct spdk_bdev_part_base *part_base);
1033
1034 typedef void (*spdk_bdev_part_base_free_fn)(void *ctx);
1035
1036 struct spdk_bdev_part {
1037 /* Entry into the module's global list of bdev parts */
1038 TAILQ_ENTRY(spdk_bdev_part) tailq;
1039
1040 /**
1041 * Fields that are used internally by part.c These fields should only
1042 * be accessed from a module using any pertinent get and set methods.
1043 */
1044 struct bdev_part_internal_fields {
1045
1046 /* This part's corresponding bdev object. Not to be confused with the base bdev */
1047 struct spdk_bdev bdev;
1048
1049 /* The base to which this part belongs */
1050 struct spdk_bdev_part_base *base;
1051
1052 /* number of blocks from the start of the base bdev to the start of this part */
1053 uint64_t offset_blocks;
1054 } internal;
1055 };
1056
1057 struct spdk_bdev_part_channel {
1058 struct spdk_bdev_part *part;
1059 struct spdk_io_channel *base_ch;
1060 };
1061
1062 typedef TAILQ_HEAD(bdev_part_tailq, spdk_bdev_part) SPDK_BDEV_PART_TAILQ;
1063
1064 /**
1065 * Free the base corresponding to one or more spdk_bdev_part.
1066 *
1067 * \param base The base to free.
1068 */
1069 void spdk_bdev_part_base_free(struct spdk_bdev_part_base *base);
1070
1071 /**
1072 * Free an spdk_bdev_part context.
1073 *
1074 * \param part The part to free.
1075 *
1076 * \return 1 always. To indicate that the operation is asynchronous.
1077 */
1078 int spdk_bdev_part_free(struct spdk_bdev_part *part);
1079
1080 /**
1081 * Calls spdk_bdev_unregister on the bdev for each part associated with base_bdev.
1082 *
1083 * \param part_base The part base object built on top of an spdk_bdev
1084 * \param tailq The list of spdk_bdev_part bdevs associated with this base bdev.
1085 */
1086 void spdk_bdev_part_base_hotremove(struct spdk_bdev_part_base *part_base,
1087 struct bdev_part_tailq *tailq);
1088
1089 /**
1090 * Construct a new spdk_bdev_part_base on top of the provided bdev.
1091 *
1092 * \param bdev The spdk_bdev upon which this base will be built.
1093 * \param remove_cb Function to be called upon hotremove of the bdev.
1094 * \param module The module to which this bdev base belongs.
1095 * \param fn_table Function table for communicating with the bdev backend.
1096 * \param tailq The head of the list of all spdk_bdev_part structures registered to this base's module.
1097 * \param free_fn User provided function to free base related context upon bdev removal or shutdown.
1098 * \param ctx Module specific context for this bdev part base.
1099 * \param channel_size Channel size in bytes.
1100 * \param ch_create_cb Called after a new channel is allocated.
1101 * \param ch_destroy_cb Called upon channel deletion.
1102 *
1103 * \return 0 on success
1104 * \return -1 if the underlying bdev cannot be opened.
1105 */
1106 struct spdk_bdev_part_base *spdk_bdev_part_base_construct(struct spdk_bdev *bdev,
1107 spdk_bdev_remove_cb_t remove_cb,
1108 struct spdk_bdev_module *module,
1109 struct spdk_bdev_fn_table *fn_table,
1110 struct bdev_part_tailq *tailq,
1111 spdk_bdev_part_base_free_fn free_fn,
1112 void *ctx,
1113 uint32_t channel_size,
1114 spdk_io_channel_create_cb ch_create_cb,
1115 spdk_io_channel_destroy_cb ch_destroy_cb);
1116
1117 /**
1118 * Create a logical spdk_bdev_part on top of a base.
1119 *
1120 * \param part The part object allocated by the user.
1121 * \param base The base from which to create the part.
1122 * \param name The name of the new spdk_bdev_part.
1123 * \param offset_blocks The offset into the base bdev at which this part begins.
1124 * \param num_blocks The number of blocks that this part will span.
1125 * \param product_name Unique name for this type of block device.
1126 *
1127 * \return 0 on success.
1128 * \return -1 if the bases underlying bdev cannot be claimed by the current module.
1129 */
1130 int spdk_bdev_part_construct(struct spdk_bdev_part *part, struct spdk_bdev_part_base *base,
1131 char *name, uint64_t offset_blocks, uint64_t num_blocks,
1132 char *product_name);
1133
1134 /**
1135 * Forwards I/O from an spdk_bdev_part to the underlying base bdev.
1136 *
1137 * This function will apply the offset_blocks the user provided to
1138 * spdk_bdev_part_construct to the I/O. The user should not manually
1139 * apply this offset before submitting any I/O through this function.
1140 *
1141 * \param ch The I/O channel associated with the spdk_bdev_part.
1142 * \param bdev_io The I/O to be submitted to the underlying bdev.
1143 * \return 0 on success or non-zero if submit request failed.
1144 */
1145 int spdk_bdev_part_submit_request(struct spdk_bdev_part_channel *ch, struct spdk_bdev_io *bdev_io);
1146
1147 /**
1148 * Return a pointer to this part's spdk_bdev.
1149 *
1150 * \param part An spdk_bdev_part object.
1151 *
1152 * \return A pointer to this part's spdk_bdev object.
1153 */
1154 struct spdk_bdev *spdk_bdev_part_get_bdev(struct spdk_bdev_part *part);
1155
1156 /**
1157 * Return a pointer to this part's base.
1158 *
1159 * \param part An spdk_bdev_part object.
1160 *
1161 * \return A pointer to this part's spdk_bdev_part_base object.
1162 */
1163 struct spdk_bdev_part_base *spdk_bdev_part_get_base(struct spdk_bdev_part *part);
1164
1165 /**
1166 * Return a pointer to this part's base bdev.
1167 *
1168 * The return value of this function is equivalent to calling
1169 * spdk_bdev_part_base_get_bdev on this part's base.
1170 *
1171 * \param part An spdk_bdev_part object.
1172 *
1173 * \return A pointer to the bdev belonging to this part's base.
1174 */
1175 struct spdk_bdev *spdk_bdev_part_get_base_bdev(struct spdk_bdev_part *part);
1176
1177 /**
1178 * Return this part's offset from the beginning of the base bdev.
1179 *
1180 * This function should not be called in the I/O path. Any block
1181 * translations to I/O will be handled in spdk_bdev_part_submit_request.
1182 *
1183 * \param part An spdk_bdev_part object.
1184 *
1185 * \return the block offset of this part from it's underlying bdev.
1186 */
1187 uint64_t spdk_bdev_part_get_offset_blocks(struct spdk_bdev_part *part);
1188
1189 /**
1190 * Push media management events. To send the notification that new events are
1191 * available, spdk_bdev_notify_media_management needs to be called.
1192 *
1193 * \param bdev Block device
1194 * \param events Array of media events
1195 * \param num_events Size of the events array
1196 *
1197 * \return number of events pushed or negative errno in case of failure
1198 */
1199 int spdk_bdev_push_media_events(struct spdk_bdev *bdev, const struct spdk_bdev_media_event *events,
1200 size_t num_events);
1201
1202 /**
1203 * Send SPDK_BDEV_EVENT_MEDIA_MANAGEMENT to all open descriptors that have
1204 * pending media events.
1205 *
1206 * \param bdev Block device
1207 */
1208 void spdk_bdev_notify_media_management(struct spdk_bdev *bdev);
1209
1210 /*
1211 * Macro used to register module for later initialization.
1212 */
1213 #define SPDK_BDEV_MODULE_REGISTER(name, module) \
1214 static void __attribute__((constructor)) _spdk_bdev_module_register_##name(void) \
1215 { \
1216 spdk_bdev_module_list_add(module); \
1217 } \
1218
1219 #endif /* SPDK_BDEV_MODULE_H */