/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright 2016 Toomas Soome <tsoome@me.com>
*/
#ifndef _ZIO_H
ZIO_CHECKSUM_FLETCHER_4,
ZIO_CHECKSUM_SHA256,
ZIO_CHECKSUM_ZILOG2,
+ ZIO_CHECKSUM_NOPARITY,
+ ZIO_CHECKSUM_SHA512,
+ ZIO_CHECKSUM_SKEIN,
+ ZIO_CHECKSUM_EDONR,
ZIO_CHECKSUM_FUNCTIONS
};
#define ZIO_DEDUPCHECKSUM ZIO_CHECKSUM_SHA256
#define ZIO_DEDUPDITTO_MIN 100
-enum zio_compress {
- ZIO_COMPRESS_INHERIT = 0,
- ZIO_COMPRESS_ON,
- ZIO_COMPRESS_OFF,
- ZIO_COMPRESS_LZJB,
- ZIO_COMPRESS_EMPTY,
- ZIO_COMPRESS_GZIP_1,
- ZIO_COMPRESS_GZIP_2,
- ZIO_COMPRESS_GZIP_3,
- ZIO_COMPRESS_GZIP_4,
- ZIO_COMPRESS_GZIP_5,
- ZIO_COMPRESS_GZIP_6,
- ZIO_COMPRESS_GZIP_7,
- ZIO_COMPRESS_GZIP_8,
- ZIO_COMPRESS_GZIP_9,
- ZIO_COMPRESS_ZLE,
- ZIO_COMPRESS_LZ4,
- ZIO_COMPRESS_FUNCTIONS
+/* supported encryption algorithms */
+enum zio_encrypt {
+ ZIO_CRYPT_INHERIT = 0,
+ ZIO_CRYPT_ON,
+ ZIO_CRYPT_OFF,
+ ZIO_CRYPT_AES_128_CCM,
+ ZIO_CRYPT_AES_192_CCM,
+ ZIO_CRYPT_AES_256_CCM,
+ ZIO_CRYPT_AES_128_GCM,
+ ZIO_CRYPT_AES_192_GCM,
+ ZIO_CRYPT_AES_256_GCM,
+ ZIO_CRYPT_FUNCTIONS
};
+#define ZIO_CRYPT_ON_VALUE ZIO_CRYPT_AES_256_CCM
+#define ZIO_CRYPT_DEFAULT ZIO_CRYPT_OFF
+
+/* macros defining encryption lengths */
+#define ZIO_OBJSET_MAC_LEN 32
+#define ZIO_DATA_IV_LEN 12
+#define ZIO_DATA_SALT_LEN 8
+#define ZIO_DATA_MAC_LEN 16
+
/*
* The number of "legacy" compression functions which can be set on individual
* objects.
#define BOOTFS_COMPRESS_VALID(compress) \
((compress) == ZIO_COMPRESS_LZJB || \
(compress) == ZIO_COMPRESS_LZ4 || \
+ (compress) == ZIO_COMPRESS_GZIP_1 || \
+ (compress) == ZIO_COMPRESS_GZIP_2 || \
+ (compress) == ZIO_COMPRESS_GZIP_3 || \
+ (compress) == ZIO_COMPRESS_GZIP_4 || \
+ (compress) == ZIO_COMPRESS_GZIP_5 || \
+ (compress) == ZIO_COMPRESS_GZIP_6 || \
+ (compress) == ZIO_COMPRESS_GZIP_7 || \
+ (compress) == ZIO_COMPRESS_GZIP_8 || \
+ (compress) == ZIO_COMPRESS_GZIP_9 || \
+ (compress) == ZIO_COMPRESS_ZLE || \
(compress) == ZIO_COMPRESS_ON || \
(compress) == ZIO_COMPRESS_OFF)
-/*
- * Default Linux timeout for a sd device.
- */
-#define ZIO_DELAY_MAX (30 * MILLISEC)
-
#define ZIO_FAILURE_MODE_WAIT 0
#define ZIO_FAILURE_MODE_CONTINUE 1
#define ZIO_FAILURE_MODE_PANIC 2
+typedef enum zio_suspend_reason {
+ ZIO_SUSPEND_NONE = 0,
+ ZIO_SUSPEND_IOERR,
+ ZIO_SUSPEND_MMP,
+} zio_suspend_reason_t;
+
enum zio_flag {
/*
* Flags inherited by gang, ddt, and vdev children,
ZIO_FLAG_DONT_CACHE = 1 << 11,
ZIO_FLAG_NODATA = 1 << 12,
ZIO_FLAG_INDUCE_DAMAGE = 1 << 13,
+ ZIO_FLAG_IO_ALLOCATING = 1 << 14,
#define ZIO_FLAG_DDT_INHERIT (ZIO_FLAG_IO_RETRY - 1)
#define ZIO_FLAG_GANG_INHERIT (ZIO_FLAG_IO_RETRY - 1)
/*
* Flags inherited by vdev children.
*/
- ZIO_FLAG_IO_RETRY = 1 << 14, /* must be first for INHERIT */
- ZIO_FLAG_PROBE = 1 << 15,
- ZIO_FLAG_TRYHARD = 1 << 16,
- ZIO_FLAG_OPTIONAL = 1 << 17,
+ ZIO_FLAG_IO_RETRY = 1 << 15, /* must be first for INHERIT */
+ ZIO_FLAG_PROBE = 1 << 16,
+ ZIO_FLAG_TRYHARD = 1 << 17,
+ ZIO_FLAG_OPTIONAL = 1 << 18,
#define ZIO_FLAG_VDEV_INHERIT (ZIO_FLAG_DONT_QUEUE - 1)
/*
* Flags not inherited by any children.
*/
- ZIO_FLAG_DONT_QUEUE = 1 << 18, /* must be first for INHERIT */
- ZIO_FLAG_DONT_PROPAGATE = 1 << 19,
- ZIO_FLAG_IO_BYPASS = 1 << 20,
- ZIO_FLAG_IO_REWRITE = 1 << 21,
- ZIO_FLAG_RAW = 1 << 22,
- ZIO_FLAG_GANG_CHILD = 1 << 23,
- ZIO_FLAG_DDT_CHILD = 1 << 24,
- ZIO_FLAG_GODFATHER = 1 << 25,
- ZIO_FLAG_NOPWRITE = 1 << 26,
- ZIO_FLAG_REEXECUTED = 1 << 27,
- ZIO_FLAG_DELEGATED = 1 << 28,
- ZIO_FLAG_FASTWRITE = 1 << 29,
+ ZIO_FLAG_DONT_QUEUE = 1 << 19, /* must be first for INHERIT */
+ ZIO_FLAG_DONT_PROPAGATE = 1 << 20,
+ ZIO_FLAG_IO_BYPASS = 1 << 21,
+ ZIO_FLAG_IO_REWRITE = 1 << 22,
+ ZIO_FLAG_RAW_COMPRESS = 1 << 23,
+ ZIO_FLAG_RAW_ENCRYPT = 1 << 24,
+ ZIO_FLAG_GANG_CHILD = 1 << 25,
+ ZIO_FLAG_DDT_CHILD = 1 << 26,
+ ZIO_FLAG_GODFATHER = 1 << 27,
+ ZIO_FLAG_NOPWRITE = 1 << 28,
+ ZIO_FLAG_REEXECUTED = 1 << 29,
+ ZIO_FLAG_DELEGATED = 1 << 30,
+ ZIO_FLAG_FASTWRITE = 1 << 31,
};
#define ZIO_FLAG_MUSTSUCCEED 0
+#define ZIO_FLAG_RAW (ZIO_FLAG_RAW_COMPRESS | ZIO_FLAG_RAW_ENCRYPT)
#define ZIO_DDT_CHILD_FLAGS(zio) \
(((zio)->io_flags & ZIO_FLAG_DDT_INHERIT) | \
#define ZIO_VDEV_CHILD_FLAGS(zio) \
(((zio)->io_flags & ZIO_FLAG_VDEV_INHERIT) | \
- ZIO_FLAG_CANFAIL)
+ ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_CANFAIL)
+
+#define ZIO_CHILD_BIT(x) (1 << (x))
+#define ZIO_CHILD_BIT_IS_SET(val, x) ((val) & (1 << (x)))
enum zio_child {
ZIO_CHILD_VDEV = 0,
ZIO_CHILD_TYPES
};
+#define ZIO_CHILD_VDEV_BIT ZIO_CHILD_BIT(ZIO_CHILD_VDEV)
+#define ZIO_CHILD_GANG_BIT ZIO_CHILD_BIT(ZIO_CHILD_GANG)
+#define ZIO_CHILD_DDT_BIT ZIO_CHILD_BIT(ZIO_CHILD_DDT)
+#define ZIO_CHILD_LOGICAL_BIT ZIO_CHILD_BIT(ZIO_CHILD_LOGICAL)
+#define ZIO_CHILD_ALL_BITS \
+ (ZIO_CHILD_VDEV_BIT | ZIO_CHILD_GANG_BIT | \
+ ZIO_CHILD_DDT_BIT | ZIO_CHILD_LOGICAL_BIT)
+
enum zio_wait_type {
ZIO_WAIT_READY = 0,
ZIO_WAIT_DONE,
#define ECKSUM EBADE
#define EFRAGS EBADR
+/* Similar for ENOACTIVE */
+#define ENOTACTIVE ENOANO
+
typedef void zio_done_func_t(zio_t *zio);
+extern int zio_dva_throttle_enabled;
extern const char *zio_type_name[ZIO_TYPES];
/*
boolean_t zp_dedup;
boolean_t zp_dedup_verify;
boolean_t zp_nopwrite;
+ boolean_t zp_encrypt;
+ boolean_t zp_byteorder;
+ uint8_t zp_salt[ZIO_DATA_SALT_LEN];
+ uint8_t zp_iv[ZIO_DATA_IV_LEN];
+ uint8_t zp_mac[ZIO_DATA_MAC_LEN];
+ uint32_t zp_zpl_smallblk;
} zio_prop_t;
typedef struct zio_cksum_report zio_cksum_report_t;
typedef void zio_cksum_finish_f(zio_cksum_report_t *rep,
- const void *good_data);
+ const abd_t *good_data);
typedef void zio_cksum_free_f(void *cbdata, size_t size);
struct zio_bad_cksum; /* defined in zio_checksum.h */
struct dnode_phys;
+struct abd;
struct zio_cksum_report {
struct zio_cksum_report *zcr_next;
} zio_gang_node_t;
typedef zio_t *zio_gang_issue_func_t(zio_t *zio, blkptr_t *bp,
- zio_gang_node_t *gn, void *data);
+ zio_gang_node_t *gn, struct abd *data, uint64_t offset);
-typedef void zio_transform_func_t(zio_t *zio, void *data, uint64_t size);
+typedef void zio_transform_func_t(zio_t *zio, struct abd *data, uint64_t size);
typedef struct zio_transform {
- void *zt_orig_data;
+ struct abd *zt_orig_abd;
uint64_t zt_orig_size;
uint64_t zt_bufsize;
zio_transform_func_t *zt_transform;
struct zio_transform *zt_next;
} zio_transform_t;
-typedef int zio_pipe_stage_t(zio_t *zio);
+typedef zio_t *zio_pipe_stage_t(zio_t *zio);
/*
* The io_reexecute flags are distinct from io_flags because the child must
#define ZIO_REEXECUTE_NOW 0x01
#define ZIO_REEXECUTE_SUSPEND 0x02
+typedef struct zio_alloc_list {
+ list_t zal_list;
+ uint64_t zal_size;
+} zio_alloc_list_t;
+
typedef struct zio_link {
zio_t *zl_parent;
zio_t *zl_child;
blkptr_t io_bp_copy;
list_t io_parent_list;
list_t io_child_list;
- zio_link_t *io_walk_link;
zio_t *io_logical;
zio_transform_t *io_transform_stack;
/* Callback info */
- zio_done_func_t *io_ready;
+ zio_done_func_t *io_ready;
+ zio_done_func_t *io_children_ready;
zio_done_func_t *io_physdone;
zio_done_func_t *io_done;
void *io_private;
int64_t io_prev_space_delta; /* DMU private */
blkptr_t io_bp_orig;
+ /* io_lsize != io_orig_size iff this is a raw write */
+ uint64_t io_lsize;
/* Data represented by this I/O */
- void *io_data;
- void *io_orig_data;
+ struct abd *io_abd;
+ struct abd *io_orig_abd;
uint64_t io_size;
uint64_t io_orig_size;
vdev_t *io_vd;
void *io_vsd;
const zio_vsd_ops_t *io_vsd_ops;
+ metaslab_class_t *io_metaslab_class; /* dva throttle class */
uint64_t io_offset;
hrtime_t io_timestamp; /* submitted at */
+ hrtime_t io_queued_timestamp;
+ hrtime_t io_target_timestamp;
hrtime_t io_delta; /* vdev queue service delta */
- uint64_t io_delay; /* vdev disk service delta (ticks) */
+ hrtime_t io_delay; /* Device access time (disk or */
+ /* file). */
avl_node_t io_queue_node;
avl_node_t io_offset_node;
+ avl_node_t io_alloc_node;
+ zio_alloc_list_t io_alloc_list;
/* Internal pipeline state */
enum zio_flag io_flags;
enum zio_flag io_orig_flags;
enum zio_stage io_orig_stage;
enum zio_stage io_orig_pipeline;
+ enum zio_stage io_pipeline_trace;
int io_error;
int io_child_error[ZIO_CHILD_TYPES];
uint64_t io_children[ZIO_CHILD_TYPES][ZIO_WAIT_TYPES];
void *io_waiter;
kmutex_t io_lock;
kcondvar_t io_cv;
+ int io_allocator;
/* FMA state */
zio_cksum_report_t *io_cksum_report;
taskq_ent_t io_tqent;
};
+extern int zio_bookmark_compare(const void *, const void *);
+
extern zio_t *zio_null(zio_t *pio, spa_t *spa, vdev_t *vd,
zio_done_func_t *done, void *private, enum zio_flag flags);
extern zio_t *zio_root(spa_t *spa,
zio_done_func_t *done, void *private, enum zio_flag flags);
-extern zio_t *zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, void *data,
- uint64_t size, zio_done_func_t *done, void *private,
+extern zio_t *zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
+ struct abd *data, uint64_t lsize, zio_done_func_t *done, void *private,
zio_priority_t priority, enum zio_flag flags, const zbookmark_phys_t *zb);
extern zio_t *zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
- void *data, uint64_t size, const zio_prop_t *zp,
- zio_done_func_t *ready, zio_done_func_t *physdone, zio_done_func_t *done,
- void *private,
- zio_priority_t priority, enum zio_flag flags, const zbookmark_phys_t *zb);
+ struct abd *data, uint64_t size, uint64_t psize, const zio_prop_t *zp,
+ zio_done_func_t *ready, zio_done_func_t *children_ready,
+ zio_done_func_t *physdone, zio_done_func_t *done,
+ void *private, zio_priority_t priority, enum zio_flag flags,
+ const zbookmark_phys_t *zb);
extern zio_t *zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
- void *data, uint64_t size, zio_done_func_t *done, void *private,
+ struct abd *data, uint64_t size, zio_done_func_t *done, void *private,
zio_priority_t priority, enum zio_flag flags, zbookmark_phys_t *zb);
extern void zio_write_override(zio_t *zio, blkptr_t *bp, int copies,
zio_done_func_t *done, void *private, enum zio_flag flags);
extern zio_t *zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
- uint64_t size, void *data, int checksum,
+ uint64_t size, struct abd *data, int checksum,
zio_done_func_t *done, void *private, zio_priority_t priority,
enum zio_flag flags, boolean_t labels);
extern zio_t *zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
- uint64_t size, void *data, int checksum,
+ uint64_t size, struct abd *data, int checksum,
zio_done_func_t *done, void *private, zio_priority_t priority,
enum zio_flag flags, boolean_t labels);
extern zio_t *zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg,
const blkptr_t *bp, enum zio_flag flags);
-extern int zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp,
- uint64_t size, boolean_t use_slog);
-extern void zio_free_zil(spa_t *spa, uint64_t txg, blkptr_t *bp);
+extern int zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg,
+ blkptr_t *new_bp, uint64_t size, boolean_t *slog);
extern void zio_flush(zio_t *zio, vdev_t *vd);
extern void zio_shrink(zio_t *zio, uint64_t size);
extern void zio_nowait(zio_t *zio);
extern void zio_execute(zio_t *zio);
extern void zio_interrupt(zio_t *zio);
+extern void zio_delay_init(zio_t *zio);
+extern void zio_delay_interrupt(zio_t *zio);
+extern void zio_deadman(zio_t *zio, char *tag);
-extern zio_t *zio_walk_parents(zio_t *cio);
-extern zio_t *zio_walk_children(zio_t *pio);
+extern zio_t *zio_walk_parents(zio_t *cio, zio_link_t **);
+extern zio_t *zio_walk_children(zio_t *pio, zio_link_t **);
extern zio_t *zio_unique_parent(zio_t *cio);
extern void zio_add_child(zio_t *pio, zio_t *cio);
extern void zio_buf_free(void *buf, size_t size);
extern void *zio_data_buf_alloc(size_t size);
extern void zio_data_buf_free(void *buf, size_t size);
-extern void *zio_buf_alloc_flags(size_t size, int flags);
+
+extern void zio_push_transform(zio_t *zio, struct abd *abd, uint64_t size,
+ uint64_t bufsize, zio_transform_func_t *transform);
+extern void zio_pop_transforms(zio_t *zio);
extern void zio_resubmit_stage_async(void *);
extern zio_t *zio_vdev_child_io(zio_t *zio, blkptr_t *bp, vdev_t *vd,
- uint64_t offset, void *data, uint64_t size, int type,
+ uint64_t offset, struct abd *data, uint64_t size, int type,
zio_priority_t priority, enum zio_flag flags,
zio_done_func_t *done, void *private);
extern zio_t *zio_vdev_delegated_io(vdev_t *vd, uint64_t offset,
- void *data, uint64_t size, int type, zio_priority_t priority,
+ struct abd *data, uint64_t size, zio_type_t type, zio_priority_t priority,
enum zio_flag flags, zio_done_func_t *done, void *private);
extern void zio_vdev_io_bypass(zio_t *zio);
extern void zio_vdev_io_reissue(zio_t *zio);
extern void zio_vdev_io_redone(zio_t *zio);
+extern void zio_change_priority(zio_t *pio, zio_priority_t priority);
+
extern void zio_checksum_verified(zio_t *zio);
extern int zio_worst_error(int e1, int e2);
extern enum zio_compress zio_compress_select(spa_t *spa,
enum zio_compress child, enum zio_compress parent);
-extern void zio_suspend(spa_t *spa, zio_t *zio);
+extern void zio_suspend(spa_t *spa, zio_t *zio, zio_suspend_reason_t);
extern int zio_resume(spa_t *spa);
extern void zio_resume_wait(spa_t *spa);
struct zinject_record *record);
extern int zio_clear_fault(int id);
extern void zio_handle_panic_injection(spa_t *spa, char *tag, uint64_t type);
+extern int zio_handle_decrypt_injection(spa_t *spa, const zbookmark_phys_t *zb,
+ uint64_t type, int error);
extern int zio_handle_fault_injection(zio_t *zio, int error);
extern int zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error);
+extern int zio_handle_device_injections(vdev_t *vd, zio_t *zio, int err1,
+ int err2);
extern int zio_handle_label_injection(zio_t *zio, int error);
extern void zio_handle_ignored_writes(zio_t *zio);
-extern uint64_t zio_handle_io_delay(zio_t *zio);
+extern hrtime_t zio_handle_io_delay(zio_t *zio);
/*
* Checksum ereport functions
*/
-extern void zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, struct zio *zio,
- uint64_t offset, uint64_t length, void *arg, struct zio_bad_cksum *info);
+extern void zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd,
+ const zbookmark_phys_t *zb, struct zio *zio, uint64_t offset,
+ uint64_t length, void *arg, struct zio_bad_cksum *info);
extern void zfs_ereport_finish_checksum(zio_cksum_report_t *report,
- const void *good_data, const void *bad_data, boolean_t drop_if_identical);
+ const abd_t *good_data, const abd_t *bad_data, boolean_t drop_if_identical);
-extern void zfs_ereport_send_interim_checksum(zio_cksum_report_t *report);
extern void zfs_ereport_free_checksum(zio_cksum_report_t *report);
/* If we have the good data in hand, this function can be used */
-extern void zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd,
- struct zio *zio, uint64_t offset, uint64_t length,
- const void *good_data, const void *bad_data, struct zio_bad_cksum *info);
+extern int zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd,
+ const zbookmark_phys_t *zb, struct zio *zio, uint64_t offset,
+ uint64_t length, const abd_t *good_data, const abd_t *bad_data,
+ struct zio_bad_cksum *info);
/* Called from spa_sync(), but primarily an injection handler */
extern void spa_handle_ignored_writes(spa_t *spa);