* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2013, 2017 Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014 Integros [integros.com]
+ * Copyright (c) 2017 Datto Inc.
*/
/* Portions Copyright 2010 Robert Milkowski */
#define _SYS_FS_ZFS_H
#include <sys/time.h>
+#include <sys/zio_priority.h>
#ifdef __cplusplus
extern "C" {
ZFS_TYPE_BOOKMARK = (1 << 4)
} zfs_type_t;
+/*
+ * NB: lzc_dataset_type should be updated whenever a new objset type is added,
+ * if it represents a real type of a dataset that can be created from userland.
+ */
typedef enum dmu_objset_type {
DMU_OST_NONE,
DMU_OST_META,
#define ZFS_TYPE_DATASET \
(ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME | ZFS_TYPE_SNAPSHOT)
+/*
+ * All of these include the terminating NUL byte.
+ */
#define ZAP_MAXNAMELEN 256
#define ZAP_MAXVALUELEN (1024 * 8)
#define ZAP_OLDMAXVALUELEN 1024
* the property table in module/zcommon/zfs_prop.c.
*/
typedef enum {
- ZFS_PROP_TYPE,
+ ZFS_PROP_BAD = -1,
+ ZFS_PROP_TYPE = 0,
ZFS_PROP_CREATION,
ZFS_PROP_USED,
ZFS_PROP_AVAILABLE,
ZFS_PROP_SNAPDIR,
ZFS_PROP_PRIVATE, /* not exposed to user, temporary */
ZFS_PROP_ACLINHERIT,
- ZFS_PROP_CREATETXG, /* not exposed to the user */
+ ZFS_PROP_CREATETXG,
ZFS_PROP_NAME, /* not exposed to the user */
ZFS_PROP_CANMOUNT,
ZFS_PROP_ISCSIOPTIONS, /* not exposed to the user */
ZFS_PROP_DEDUP,
ZFS_PROP_MLSLABEL,
ZFS_PROP_SYNC,
+ ZFS_PROP_DNODESIZE,
ZFS_PROP_REFRATIO,
ZFS_PROP_WRITTEN,
ZFS_PROP_CLONES,
ZFS_PROP_LOGICALUSED,
ZFS_PROP_LOGICALREFERENCED,
ZFS_PROP_INCONSISTENT, /* not exposed to the user */
+ ZFS_PROP_VOLMODE,
ZFS_PROP_FILESYSTEM_LIMIT,
ZFS_PROP_SNAPSHOT_LIMIT,
ZFS_PROP_FILESYSTEM_COUNT,
ZFS_PROP_RELATIME,
ZFS_PROP_REDUNDANT_METADATA,
ZFS_PROP_OVERLAY,
+ ZFS_PROP_PREV_SNAP,
+ ZFS_PROP_RECEIVE_RESUME_TOKEN,
ZFS_NUM_PROPS
} zfs_prop_t;
ZFS_PROP_USERQUOTA,
ZFS_PROP_GROUPUSED,
ZFS_PROP_GROUPQUOTA,
+ ZFS_PROP_USEROBJUSED,
+ ZFS_PROP_USEROBJQUOTA,
+ ZFS_PROP_GROUPOBJUSED,
+ ZFS_PROP_GROUPOBJQUOTA,
ZFS_NUM_USERQUOTA_PROPS
} zfs_userquota_prop_t;
ZPOOL_PROP_LEAKED,
ZPOOL_PROP_MAXBLOCKSIZE,
ZPOOL_PROP_TNAME,
+ ZPOOL_PROP_MAXDNODESIZE,
+ ZPOOL_PROP_MULTIHOST,
ZPOOL_NUM_PROPS
} zpool_prop_t;
#define ZPROP_SOURCE_VAL_RECVD "$recvd"
#define ZPROP_N_MORE_ERRORS "N_MORE_ERRORS"
+
/*
* Dataset flag implemented as a special entry in the props zap object
* indicating that the dataset has received properties on or after
ZFS_XATTR_SA = 2
} zfs_xattr_type_t;
+typedef enum {
+ ZFS_DNSIZE_LEGACY = 0,
+ ZFS_DNSIZE_AUTO = 1,
+ ZFS_DNSIZE_1K = 1024,
+ ZFS_DNSIZE_2K = 2048,
+ ZFS_DNSIZE_4K = 4096,
+ ZFS_DNSIZE_8K = 8192,
+ ZFS_DNSIZE_16K = 16384
+} zfs_dnsize_type_t;
+
typedef enum {
ZFS_REDUNDANT_METADATA_ALL,
ZFS_REDUNDANT_METADATA_MOST
} zfs_redundant_metadata_type_t;
+typedef enum {
+ ZFS_VOLMODE_DEFAULT = 0,
+ ZFS_VOLMODE_GEOM = 1,
+ ZFS_VOLMODE_DEV = 2,
+ ZFS_VOLMODE_NONE = 3
+} zfs_volmode_t;
+
/*
* On-disk version number.
*/
#define ZPOOL_CONFIG_DTL "DTL"
#define ZPOOL_CONFIG_SCAN_STATS "scan_stats" /* not stored on disk */
#define ZPOOL_CONFIG_VDEV_STATS "vdev_stats" /* not stored on disk */
+
+/* container nvlist of extended stats */
+#define ZPOOL_CONFIG_VDEV_STATS_EX "vdev_stats_ex"
+
+/* Active queue read/write stats */
+#define ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE "vdev_sync_r_active_queue"
+#define ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE "vdev_sync_w_active_queue"
+#define ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE "vdev_async_r_active_queue"
+#define ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE "vdev_async_w_active_queue"
+#define ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE "vdev_async_scrub_active_queue"
+
+/* Queue sizes */
+#define ZPOOL_CONFIG_VDEV_SYNC_R_PEND_QUEUE "vdev_sync_r_pend_queue"
+#define ZPOOL_CONFIG_VDEV_SYNC_W_PEND_QUEUE "vdev_sync_w_pend_queue"
+#define ZPOOL_CONFIG_VDEV_ASYNC_R_PEND_QUEUE "vdev_async_r_pend_queue"
+#define ZPOOL_CONFIG_VDEV_ASYNC_W_PEND_QUEUE "vdev_async_w_pend_queue"
+#define ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE "vdev_async_scrub_pend_queue"
+
+/* Latency read/write histogram stats */
+#define ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO "vdev_tot_r_lat_histo"
+#define ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO "vdev_tot_w_lat_histo"
+#define ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO "vdev_disk_r_lat_histo"
+#define ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO "vdev_disk_w_lat_histo"
+#define ZPOOL_CONFIG_VDEV_SYNC_R_LAT_HISTO "vdev_sync_r_lat_histo"
+#define ZPOOL_CONFIG_VDEV_SYNC_W_LAT_HISTO "vdev_sync_w_lat_histo"
+#define ZPOOL_CONFIG_VDEV_ASYNC_R_LAT_HISTO "vdev_async_r_lat_histo"
+#define ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO "vdev_async_w_lat_histo"
+#define ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO "vdev_scrub_histo"
+
+/* Request size histograms */
+#define ZPOOL_CONFIG_VDEV_SYNC_IND_R_HISTO "vdev_sync_ind_r_histo"
+#define ZPOOL_CONFIG_VDEV_SYNC_IND_W_HISTO "vdev_sync_ind_w_histo"
+#define ZPOOL_CONFIG_VDEV_ASYNC_IND_R_HISTO "vdev_async_ind_r_histo"
+#define ZPOOL_CONFIG_VDEV_ASYNC_IND_W_HISTO "vdev_async_ind_w_histo"
+#define ZPOOL_CONFIG_VDEV_IND_SCRUB_HISTO "vdev_ind_scrub_histo"
+#define ZPOOL_CONFIG_VDEV_SYNC_AGG_R_HISTO "vdev_sync_agg_r_histo"
+#define ZPOOL_CONFIG_VDEV_SYNC_AGG_W_HISTO "vdev_sync_agg_w_histo"
+#define ZPOOL_CONFIG_VDEV_ASYNC_AGG_R_HISTO "vdev_async_agg_r_histo"
+#define ZPOOL_CONFIG_VDEV_ASYNC_AGG_W_HISTO "vdev_async_agg_w_histo"
+#define ZPOOL_CONFIG_VDEV_AGG_SCRUB_HISTO "vdev_agg_scrub_histo"
+
+/* vdev enclosure sysfs path */
+#define ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH "vdev_enc_sysfs_path"
+
#define ZPOOL_CONFIG_WHOLE_DISK "whole_disk"
#define ZPOOL_CONFIG_ERRCOUNT "error_count"
#define ZPOOL_CONFIG_NOT_PRESENT "not_present"
#define ZPOOL_CONFIG_FEATURES_FOR_READ "features_for_read"
#define ZPOOL_CONFIG_FEATURE_STATS "feature_stats" /* not stored on disk */
#define ZPOOL_CONFIG_ERRATA "errata" /* not stored on disk */
+#define ZPOOL_CONFIG_VDEV_TOP_ZAP "com.delphix:vdev_zap_top"
+#define ZPOOL_CONFIG_VDEV_LEAF_ZAP "com.delphix:vdev_zap_leaf"
+#define ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS "com.delphix:has_per_vdev_zaps"
+#define ZPOOL_CONFIG_MMP_STATE "mmp_state" /* not stored on disk */
+#define ZPOOL_CONFIG_MMP_TXG "mmp_txg" /* not stored on disk */
+#define ZPOOL_CONFIG_MMP_HOSTNAME "mmp_hostname" /* not stored on disk */
+#define ZPOOL_CONFIG_MMP_HOSTID "mmp_hostid" /* not stored on disk */
+
/*
* The persistent vdev state is stored as separate values rather than a single
* 'vdev_state' entry. This is because a device can be in multiple states, such
VDEV_AUX_ERR_EXCEEDED, /* too many errors */
VDEV_AUX_IO_FAILURE, /* experienced I/O failure */
VDEV_AUX_BAD_LOG, /* cannot read log chain(s) */
- VDEV_AUX_EXTERNAL, /* external diagnosis */
- VDEV_AUX_SPLIT_POOL /* vdev was split off into another pool */
+ VDEV_AUX_EXTERNAL, /* external diagnosis or forced fault */
+ VDEV_AUX_SPLIT_POOL, /* vdev was split off into another pool */
+ VDEV_AUX_BAD_ASHIFT, /* vdev ashift is invalid */
+ VDEV_AUX_EXTERNAL_PERSIST, /* persistent forced fault */
+ VDEV_AUX_ACTIVE, /* vdev active on a different host */
} vdev_aux_t;
/*
POOL_STATE_POTENTIALLY_ACTIVE /* Internal libzfs state */
} pool_state_t;
+/*
+ * mmp state. The following states provide additional detail describing
+ * why a pool couldn't be safely imported.
+ */
+typedef enum mmp_state {
+ MMP_STATE_ACTIVE = 0, /* In active use */
+ MMP_STATE_INACTIVE, /* Inactive and safe to import */
+ MMP_STATE_NO_HOSTID /* System hostid is not set */
+} mmp_state_t;
+
/*
* Scan Functions.
*/
POOL_SCAN_FUNCS
} pool_scan_func_t;
+/*
+ * Used to control scrub pause and resume.
+ */
+typedef enum pool_scrub_cmd {
+ POOL_SCRUB_NORMAL = 0,
+ POOL_SCRUB_PAUSE,
+ POOL_SCRUB_FLAGS_END
+} pool_scrub_cmd_t;
+
+
/*
* ZIO types. Needed to interpret vdev statistics below.
*/
/* values not stored on disk */
uint64_t pss_pass_exam; /* examined bytes per scan pass */
uint64_t pss_pass_start; /* start time of a scan pass */
+ uint64_t pss_pass_scrub_pause; /* pause time of a scurb pass */
+ /* cumulative time scrub spent paused, needed for rate calculation */
+ uint64_t pss_pass_scrub_spent_paused;
} pool_scan_stat_t;
typedef enum dsl_scan_state {
uint64_t vs_scan_removing; /* removing? */
uint64_t vs_scan_processed; /* scan processed bytes */
uint64_t vs_fragmentation; /* device fragmentation */
+
} vdev_stat_t;
+/*
+ * Extended stats
+ *
+ * These are stats which aren't included in the original iostat output. For
+ * convenience, they are grouped together in vdev_stat_ex, although each stat
+ * is individually exported as an nvlist.
+ */
+typedef struct vdev_stat_ex {
+ /* Number of ZIOs issued to disk and waiting to finish */
+ uint64_t vsx_active_queue[ZIO_PRIORITY_NUM_QUEUEABLE];
+
+ /* Number of ZIOs pending to be issued to disk */
+ uint64_t vsx_pend_queue[ZIO_PRIORITY_NUM_QUEUEABLE];
+
+ /*
+ * Below are the histograms for various latencies. Buckets are in
+ * units of nanoseconds.
+ */
+
+ /*
+ * 2^37 nanoseconds = 134s. Timeouts will probably start kicking in
+ * before this.
+ */
+#define VDEV_L_HISTO_BUCKETS 37 /* Latency histo buckets */
+#define VDEV_RQ_HISTO_BUCKETS 25 /* Request size histo buckets */
+
+
+ /* Amount of time in ZIO queue (ns) */
+ uint64_t vsx_queue_histo[ZIO_PRIORITY_NUM_QUEUEABLE]
+ [VDEV_L_HISTO_BUCKETS];
+
+ /* Total ZIO latency (ns). Includes queuing and disk access time */
+ uint64_t vsx_total_histo[ZIO_TYPES][VDEV_L_HISTO_BUCKETS];
+
+ /* Amount of time to read/write the disk (ns) */
+ uint64_t vsx_disk_histo[ZIO_TYPES][VDEV_L_HISTO_BUCKETS];
+
+ /* "lookup the bucket for a value" histogram macros */
+#define HISTO(val, buckets) (val != 0 ? MIN(highbit64(val) - 1, \
+ buckets - 1) : 0)
+#define L_HISTO(a) HISTO(a, VDEV_L_HISTO_BUCKETS)
+#define RQ_HISTO(a) HISTO(a, VDEV_RQ_HISTO_BUCKETS)
+
+ /* Physical IO histogram */
+ uint64_t vsx_ind_histo[ZIO_PRIORITY_NUM_QUEUEABLE]
+ [VDEV_RQ_HISTO_BUCKETS];
+
+ /* Delegated (aggregated) physical IO histogram */
+ uint64_t vsx_agg_histo[ZIO_PRIORITY_NUM_QUEUEABLE]
+ [VDEV_RQ_HISTO_BUCKETS];
+
+} vdev_stat_ex_t;
+
/*
* DDT statistics. Note: all fields should be 64-bit because this
* is passed between kernel and userland as an nvlist uint64 array.
*/
typedef struct ddt_object {
- uint64_t ddo_count; /* number of elments in ddt */
+ uint64_t ddo_count; /* number of elements in ddt */
uint64_t ddo_dspace; /* size of ddt on disk */
uint64_t ddo_mspace; /* size of ddt in-core */
} ddt_object_t;
#define ZVOL_DRIVER "zvol"
#define ZFS_DRIVER "zfs"
#define ZFS_DEV "/dev/zfs"
+#define ZFS_SHARETAB "/etc/dfs/sharetab"
+
+#define ZFS_SUPER_MAGIC 0x2fc12fc1
/* general zvol path */
#define ZVOL_DIR "/dev"
*/
typedef enum zfs_ioc {
/*
- * Illumos - 70/128 numbers reserved.
+ * Illumos - 71/128 numbers reserved.
*/
ZFS_IOC_FIRST = ('Z' << 8),
ZFS_IOC = ZFS_IOC_FIRST,
ZFS_IOC_BOOKMARK,
ZFS_IOC_GET_BOOKMARKS,
ZFS_IOC_DESTROY_BOOKMARKS,
+ ZFS_IOC_RECV_NEW,
+ ZFS_IOC_POOL_SYNC,
/*
* Linux - 3/64 numbers reserved.
/*
* zvol ioctl to get dataset name
*/
-#define BLKZNAME _IOR(0x12, 125, char[ZFS_MAXNAMELEN])
+#define BLKZNAME _IOR(0x12, 125, char[ZFS_MAX_DATASET_NAME_LEN])
/*
* Internal SPA load state. Used by FMA diagnosis engine.
SPA_LOAD_IMPORT, /* import in progress */
SPA_LOAD_TRYIMPORT, /* tryimport in progress */
SPA_LOAD_RECOVER, /* recovery requested */
- SPA_LOAD_ERROR /* load failed */
+ SPA_LOAD_ERROR, /* load failed */
+ SPA_LOAD_CREATE /* creation in progress */
} spa_load_state_t;
/*
#define ZFS_IMPORT_MISSING_LOG 0x4
#define ZFS_IMPORT_ONLY 0x8
#define ZFS_IMPORT_TEMP_NAME 0x10
+#define ZFS_IMPORT_SKIP_MMP 0x20
/*
* Sysevent payload members. ZFS will generate the following sysevents with the
* ZFS_EV_POOL_GUID DATA_TYPE_UINT64
* ZFS_EV_VDEV_PATH DATA_TYPE_STRING (optional)
* ZFS_EV_VDEV_GUID DATA_TYPE_UINT64
+ *
+ * ESC_ZFS_HISTORY_EVENT
+ *
+ * ZFS_EV_POOL_NAME DATA_TYPE_STRING
+ * ZFS_EV_POOL_GUID DATA_TYPE_UINT64
+ * ZFS_EV_HIST_TIME DATA_TYPE_UINT64 (optional)
+ * ZFS_EV_HIST_CMD DATA_TYPE_STRING (optional)
+ * ZFS_EV_HIST_WHO DATA_TYPE_UINT64 (optional)
+ * ZFS_EV_HIST_ZONE DATA_TYPE_STRING (optional)
+ * ZFS_EV_HIST_HOST DATA_TYPE_STRING (optional)
+ * ZFS_EV_HIST_TXG DATA_TYPE_UINT64 (optional)
+ * ZFS_EV_HIST_INT_EVENT DATA_TYPE_UINT64 (optional)
+ * ZFS_EV_HIST_INT_STR DATA_TYPE_STRING (optional)
+ * ZFS_EV_HIST_INT_NAME DATA_TYPE_STRING (optional)
+ * ZFS_EV_HIST_IOCTL DATA_TYPE_STRING (optional)
+ * ZFS_EV_HIST_DSNAME DATA_TYPE_STRING (optional)
+ * ZFS_EV_HIST_DSID DATA_TYPE_UINT64 (optional)
+ *
+ * The ZFS_EV_HIST_* members will correspond to the ZPOOL_HIST_* members in the
+ * history log nvlist. The keynames will be free of any spaces or other
+ * characters that could be potentially unexpected to consumers of the
+ * sysevents.
*/
#define ZFS_EV_POOL_NAME "pool_name"
#define ZFS_EV_POOL_GUID "pool_guid"
#define ZFS_EV_VDEV_PATH "vdev_path"
#define ZFS_EV_VDEV_GUID "vdev_guid"
+#define ZFS_EV_HIST_TIME "history_time"
+#define ZFS_EV_HIST_CMD "history_command"
+#define ZFS_EV_HIST_WHO "history_who"
+#define ZFS_EV_HIST_ZONE "history_zone"
+#define ZFS_EV_HIST_HOST "history_hostname"
+#define ZFS_EV_HIST_TXG "history_txg"
+#define ZFS_EV_HIST_INT_EVENT "history_internal_event"
+#define ZFS_EV_HIST_INT_STR "history_internal_str"
+#define ZFS_EV_HIST_INT_NAME "history_internal_name"
+#define ZFS_EV_HIST_IOCTL "history_ioctl"
+#define ZFS_EV_HIST_DSNAME "history_dsname"
+#define ZFS_EV_HIST_DSID "history_dsid"
#ifdef __cplusplus
}