* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* Copyright 2013 Saso Kiselkov. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
* Copyright 2017 Joyent, Inc.
- * Copyright (c) 2017, 2019, Datto Inc. All rights reserved.
* Copyright (c) 2017, Intel Corporation.
* Copyright (c) 2019, Allan Jude
* Copyright (c) 2019, Klara Inc.
+ * Copyright (c) 2019, Datto Inc.
*/
#ifndef _SYS_SPA_H
typedef struct zio zio_t;
typedef struct zilog zilog_t;
typedef struct spa_aux_vdev spa_aux_vdev_t;
-typedef struct ddt ddt_t;
-typedef struct ddt_entry ddt_entry_t;
typedef struct zbookmark_phys zbookmark_phys_t;
+typedef struct zbookmark_err_phys zbookmark_err_phys_t;
struct bpobj;
struct bplist;
*
* 64 56 48 40 32 24 16 8 0
* +-------+-------+-------+-------+-------+-------+-------+-------+
- * 0 | pad | vdev1 | GRID | ASIZE |
+ * 0 | pad | vdev1 | pad | ASIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 1 |G| offset1 |
* +-------+-------+-------+-------+-------+-------+-------+-------+
- * 2 | pad | vdev2 | GRID | ASIZE |
+ * 2 | pad | vdev2 | pad | ASIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 3 |G| offset2 |
* +-------+-------+-------+-------+-------+-------+-------+-------+
- * 4 | pad | vdev3 | GRID | ASIZE |
+ * 4 | pad | vdev3 | pad | ASIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 5 |G| offset3 |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* LSIZE logical size
* PSIZE physical size (after compression)
* ASIZE allocated size (including RAID-Z parity and gang block headers)
- * GRID RAID-Z layout information (reserved for future use)
* cksum checksum function
* comp compression function
* G gang block indicator
*
* 64 56 48 40 32 24 16 8 0
* +-------+-------+-------+-------+-------+-------+-------+-------+
- * 0 | vdev1 | GRID | ASIZE |
+ * 0 | vdev1 | pad | ASIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 1 |G| offset1 |
* +-------+-------+-------+-------+-------+-------+-------+-------+
- * 2 | vdev2 | GRID | ASIZE |
+ * 2 | vdev2 | pad | ASIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 3 |G| offset2 |
* +-------+-------+-------+-------+-------+-------+-------+-------+
#define BPE_NUM_WORDS 14
#define BPE_PAYLOAD_SIZE (BPE_NUM_WORDS * sizeof (uint64_t))
#define BPE_IS_PAYLOADWORD(bp, wp) \
- ((wp) != &(bp)->blk_prop && (wp) != &(bp)->blk_birth)
+ ((wp) != &(bp)->blk_prop && (wp) != (&(bp)->blk_birth_word[1]))
#define SPA_BLKPTRSHIFT 7 /* blkptr_t is 128 bytes */
#define SPA_DVAS_PER_BP 3 /* Number of DVAs in a bp */
dva_t blk_dva[SPA_DVAS_PER_BP]; /* Data Virtual Addresses */
uint64_t blk_prop; /* size, compression, type, etc */
uint64_t blk_pad[2]; /* Extra space for the future */
- uint64_t blk_phys_birth; /* txg when block was allocated */
- uint64_t blk_birth; /* transaction group at birth */
+ uint64_t blk_birth_word[2];
uint64_t blk_fill; /* fill count */
zio_cksum_t blk_cksum; /* 256-bit checksum */
} blkptr_t;
BF64_SET_SB((dva)->dva_word[0], 0, SPA_ASIZEBITS, \
SPA_MINBLOCKSHIFT, 0, x)
-#define DVA_GET_GRID(dva) BF64_GET((dva)->dva_word[0], 24, 8)
-#define DVA_SET_GRID(dva, x) BF64_SET((dva)->dva_word[0], 24, 8, x)
-
#define DVA_GET_VDEV(dva) BF64_GET((dva)->dva_word[0], 32, SPA_VDEVBITS)
#define DVA_SET_VDEV(dva, x) \
BF64_SET((dva)->dva_word[0], 32, SPA_VDEVBITS, x)
#define BP_GET_FREE(bp) BF64_GET((bp)->blk_fill, 0, 1)
#define BP_SET_FREE(bp, x) BF64_SET((bp)->blk_fill, 0, 1, x)
-#define BP_PHYSICAL_BIRTH(bp) \
- (BP_IS_EMBEDDED(bp) ? 0 : \
- (bp)->blk_phys_birth ? (bp)->blk_phys_birth : (bp)->blk_birth)
+#define BP_GET_LOGICAL_BIRTH(bp) (bp)->blk_birth_word[1]
+#define BP_SET_LOGICAL_BIRTH(bp, x) ((bp)->blk_birth_word[1] = (x))
+
+#define BP_GET_PHYSICAL_BIRTH(bp) (bp)->blk_birth_word[0]
+#define BP_SET_PHYSICAL_BIRTH(bp, x) ((bp)->blk_birth_word[0] = (x))
+
+#define BP_GET_BIRTH(bp) \
+ (BP_IS_EMBEDDED(bp) ? 0 : \
+ BP_GET_PHYSICAL_BIRTH(bp) ? BP_GET_PHYSICAL_BIRTH(bp) : \
+ BP_GET_LOGICAL_BIRTH(bp))
#define BP_SET_BIRTH(bp, logical, physical) \
{ \
ASSERT(!BP_IS_EMBEDDED(bp)); \
- (bp)->blk_birth = (logical); \
- (bp)->blk_phys_birth = ((logical) == (physical) ? 0 : (physical)); \
+ BP_SET_LOGICAL_BIRTH(bp, logical); \
+ BP_SET_PHYSICAL_BIRTH(bp, \
+ ((logical) == (physical) ? 0 : (physical))); \
}
#define BP_GET_FILL(bp) \
(dva1)->dva_word[0] == (dva2)->dva_word[0])
#define BP_EQUAL(bp1, bp2) \
- (BP_PHYSICAL_BIRTH(bp1) == BP_PHYSICAL_BIRTH(bp2) && \
- (bp1)->blk_birth == (bp2)->blk_birth && \
+ (BP_GET_BIRTH(bp1) == BP_GET_BIRTH(bp2) && \
+ BP_GET_LOGICAL_BIRTH(bp1) == BP_GET_LOGICAL_BIRTH(bp2) && \
DVA_EQUAL(&(bp1)->blk_dva[0], &(bp2)->blk_dva[0]) && \
DVA_EQUAL(&(bp1)->blk_dva[1], &(bp2)->blk_dva[1]) && \
DVA_EQUAL(&(bp1)->blk_dva[2], &(bp2)->blk_dva[2]))
(bp)->blk_prop = 0; \
(bp)->blk_pad[0] = 0; \
(bp)->blk_pad[1] = 0; \
- (bp)->blk_phys_birth = 0; \
- (bp)->blk_birth = 0; \
+ (bp)->blk_birth_word[0] = 0; \
+ (bp)->blk_birth_word[1] = 0; \
(bp)->blk_fill = 0; \
ZIO_SET_CHECKSUM(&(bp)->blk_cksum, 0, 0, 0, 0); \
}
/*
* This macro allows code sharing between zfs, libzpool, and mdb.
- * 'func' is either snprintf() or mdb_snprintf().
+ * 'func' is either kmem_scnprintf() or mdb_snprintf().
* 'ws' (whitespace) can be ' ' for single-line format, '\n' for multi-line.
*/
#define SNPRINTF_BLKPTR(func, ws, buf, size, bp, type, checksum, compress) \
{ \
- static const char *copyname[] = \
+ static const char *const copyname[] = \
{ "zero", "single", "double", "triple" }; \
int len = 0; \
int copies = 0; \
(u_longlong_t)BP_GET_LEVEL(bp), \
type, \
(u_longlong_t)BP_GET_LSIZE(bp), \
- (u_longlong_t)bp->blk_birth); \
+ (u_longlong_t)BP_GET_LOGICAL_BIRTH(bp)); \
} else if (BP_IS_EMBEDDED(bp)) { \
len = func(buf + len, size - len, \
"EMBEDDED [L%llu %s] et=%u %s " \
compress, \
(u_longlong_t)BPE_GET_LSIZE(bp), \
(u_longlong_t)BPE_GET_PSIZE(bp), \
- (u_longlong_t)bp->blk_birth); \
+ (u_longlong_t)BP_GET_LOGICAL_BIRTH(bp)); \
} else if (BP_IS_REDACTED(bp)) { \
len += func(buf + len, size - len, \
"REDACTED [L%llu %s] size=%llxL birth=%lluL", \
(u_longlong_t)BP_GET_LEVEL(bp), \
type, \
(u_longlong_t)BP_GET_LSIZE(bp), \
- (u_longlong_t)bp->blk_birth); \
+ (u_longlong_t)BP_GET_LOGICAL_BIRTH(bp)); \
} else { \
for (int d = 0; d < BP_GET_NDVAS(bp); d++) { \
const dva_t *dva = &bp->blk_dva[d]; \
(u_longlong_t)DVA_GET_ASIZE(dva), \
ws); \
} \
+ ASSERT3S(copies, >, 0); \
if (BP_IS_ENCRYPTED(bp)) { \
len += func(buf + len, size - len, \
"salt=%llx iv=%llx:%llx%c", \
len += func(buf + len, size - len, \
"[L%llu %s] %s %s %s %s %s %s %s%c" \
"size=%llxL/%llxP birth=%lluL/%lluP fill=%llu%c" \
- "cksum=%llx:%llx:%llx:%llx", \
+ "cksum=%016llx:%016llx:%016llx:%016llx", \
(u_longlong_t)BP_GET_LEVEL(bp), \
type, \
checksum, \
ws, \
(u_longlong_t)BP_GET_LSIZE(bp), \
(u_longlong_t)BP_GET_PSIZE(bp), \
- (u_longlong_t)bp->blk_birth, \
- (u_longlong_t)BP_PHYSICAL_BIRTH(bp), \
+ (u_longlong_t)BP_GET_LOGICAL_BIRTH(bp), \
+ (u_longlong_t)BP_GET_BIRTH(bp), \
(u_longlong_t)BP_GET_FILL(bp), \
ws, \
(u_longlong_t)bp->blk_cksum.zc_word[0], \
* Send TRIM commands in-line during normal pool operation while deleting.
* OFF: no
* ON: yes
- * NB: IN_FREEBSD_BASE is defined within the FreeBSD sources.
*/
typedef enum {
SPA_AUTOTRIM_OFF = 0, /* default */
SPA_AUTOTRIM_ON,
-#ifdef IN_FREEBSD_BASE
- SPA_AUTOTRIM_DEFAULT = SPA_AUTOTRIM_ON,
-#else
- SPA_AUTOTRIM_DEFAULT = SPA_AUTOTRIM_OFF,
-#endif
} spa_autotrim_t;
/*
} trim_type_t;
/* state manipulation functions */
-extern int spa_open(const char *pool, spa_t **, void *tag);
-extern int spa_open_rewind(const char *pool, spa_t **, void *tag,
+extern int spa_open(const char *pool, spa_t **, const void *tag);
+extern int spa_open_rewind(const char *pool, spa_t **, const void *tag,
nvlist_t *policy, nvlist_t **config);
extern int spa_get_stats(const char *pool, nvlist_t **config, char *altroot,
size_t buflen);
#define SPA_ASYNC_L2CACHE_REBUILD 0x800
#define SPA_ASYNC_L2CACHE_TRIM 0x1000
#define SPA_ASYNC_REBUILD_DONE 0x2000
+#define SPA_ASYNC_DETACH_SPARE 0x4000
/* device manipulation */
extern int spa_vdev_add(spa_t *spa, nvlist_t *nvroot);
uint64_t rate, boolean_t partial, boolean_t secure, nvlist_t *vdev_errlist);
extern int spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath);
extern int spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru);
-extern int spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config,
- nvlist_t *props, boolean_t exp);
+extern int spa_vdev_split_mirror(spa_t *spa, const char *newname,
+ nvlist_t *config, nvlist_t *props, boolean_t exp);
/* spare state (which is global across all pools) */
extern void spa_spare_add(vdev_t *vd);
extern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */
extern void spa_sync_allpools(void);
-extern int zfs_sync_pass_deferred_free;
+extern uint_t zfs_sync_pass_deferred_free;
+
+/* spa sync taskqueues */
+taskq_t *spa_sync_tq_create(spa_t *spa, const char *name);
+void spa_sync_tq_destroy(spa_t *spa);
+void spa_select_allocator(zio_t *zio);
/* spa namespace global mutex */
extern kmutex_t spa_namespace_lock;
#define SPA_CONFIG_UPDATE_POOL 0
#define SPA_CONFIG_UPDATE_VDEVS 1
-extern void spa_write_cachefile(spa_t *, boolean_t, boolean_t);
+extern void spa_write_cachefile(spa_t *, boolean_t, boolean_t, boolean_t);
extern void spa_config_load(void);
-extern nvlist_t *spa_all_configs(uint64_t *);
+extern int spa_all_configs(uint64_t *generation, nvlist_t **pools);
extern void spa_config_set(spa_t *spa, nvlist_t *config);
extern nvlist_t *spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg,
int getstats);
extern spa_t *spa_next(spa_t *prev);
/* Refcount functions */
-extern void spa_open_ref(spa_t *spa, void *tag);
-extern void spa_close(spa_t *spa, void *tag);
-extern void spa_async_close(spa_t *spa, void *tag);
+extern void spa_open_ref(spa_t *spa, const void *tag);
+extern void spa_close(spa_t *spa, const void *tag);
+extern void spa_async_close(spa_t *spa, const void *tag);
extern boolean_t spa_refcount_zero(spa_t *spa);
#define SCL_NONE 0x00
uint64_t max_txg);
extern int spa_import_progress_set_state(uint64_t pool_guid,
spa_load_state_t spa_load_state);
+extern void spa_import_progress_set_notes(spa_t *spa,
+ const char *fmt, ...) __printflike(2, 3);
+extern void spa_import_progress_set_notes_nolog(spa_t *spa,
+ const char *fmt, ...) __printflike(2, 3);
/* Pool configuration locks */
-extern int spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw);
+extern int spa_config_tryenter(spa_t *spa, int locks, const void *tag,
+ krw_t rw);
extern void spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw);
+extern void spa_config_enter_mmp(spa_t *spa, int locks, const void *tag,
+ krw_t rw);
extern void spa_config_exit(spa_t *spa, int locks, const void *tag);
extern int spa_config_held(spa_t *spa, int locks, krw_t rw);
extern uint64_t spa_vdev_detach_enter(spa_t *spa, uint64_t guid);
extern uint64_t spa_vdev_config_enter(spa_t *spa);
extern void spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg,
- int error, char *tag);
+ int error, const char *tag);
extern int spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error);
/* Pool vdev state change lock */
extern blkptr_t *spa_get_rootblkptr(spa_t *spa);
extern void spa_set_rootblkptr(spa_t *spa, const blkptr_t *bp);
extern void spa_altroot(spa_t *, char *, size_t);
-extern int spa_sync_pass(spa_t *spa);
+extern uint32_t spa_sync_pass(spa_t *spa);
extern char *spa_name(spa_t *spa);
extern uint64_t spa_guid(spa_t *spa);
extern uint64_t spa_load_guid(spa_t *spa);
extern uint64_t spa_deadman_ziotime(spa_t *spa);
extern uint64_t spa_dirty_data(spa_t *spa);
extern spa_autotrim_t spa_get_autotrim(spa_t *spa);
+extern int spa_get_allocator(spa_t *spa);
+extern void spa_set_allocator(spa_t *spa, const char *allocator);
/* Miscellaneous support routines */
extern void spa_load_failed(spa_t *spa, const char *fmt, ...)
/* error handling */
struct zbookmark_phys;
-extern void spa_log_error(spa_t *spa, const zbookmark_phys_t *zb);
+extern void spa_log_error(spa_t *spa, const zbookmark_phys_t *zb,
+ const uint64_t birth);
+extern void spa_remove_error(spa_t *spa, zbookmark_phys_t *zb,
+ uint64_t birth);
extern int zfs_ereport_post(const char *clazz, spa_t *spa, vdev_t *vd,
const zbookmark_phys_t *zb, zio_t *zio, uint64_t state);
extern boolean_t zfs_ereport_is_valid(const char *clazz, spa_t *spa, vdev_t *vd,
extern void zfs_post_remove(spa_t *spa, vdev_t *vd);
extern void zfs_post_state_change(spa_t *spa, vdev_t *vd, uint64_t laststate);
extern void zfs_post_autoreplace(spa_t *spa, vdev_t *vd);
-extern uint64_t spa_get_errlog_size(spa_t *spa);
+extern uint64_t spa_approx_errlog_size(spa_t *spa);
extern int spa_get_errlog(spa_t *spa, void *uaddr, uint64_t *count);
+extern uint64_t spa_get_last_errlog_size(spa_t *spa);
extern void spa_errlog_rotate(spa_t *spa);
extern void spa_errlog_drain(spa_t *spa);
extern void spa_errlog_sync(spa_t *spa, uint64_t txg);
extern void sync_error_list(spa_t *spa, avl_tree_t *t, uint64_t *obj,
dmu_tx_t *tx);
extern void spa_upgrade_errlog(spa_t *spa, dmu_tx_t *tx);
-
-/* vdev cache */
-extern void vdev_cache_stat_init(void);
-extern void vdev_cache_stat_fini(void);
+extern int find_top_affected_fs(spa_t *spa, uint64_t head_ds,
+ zbookmark_err_phys_t *zep, uint64_t *top_affected_fs);
+extern int find_birth_txg(struct dsl_dataset *ds, zbookmark_err_phys_t *zep,
+ uint64_t *birth_txg);
+extern void zep_to_zb(uint64_t dataset, zbookmark_err_phys_t *zep,
+ zbookmark_phys_t *zb);
+extern void name_to_errphys(char *buf, zbookmark_err_phys_t *zep);
/* vdev mirror */
extern void vdev_mirror_stat_init(void);
int param_set_deadman_synctime(ZFS_MODULE_PARAM_ARGS);
int param_set_slop_shift(ZFS_MODULE_PARAM_ARGS);
int param_set_deadman_failmode(ZFS_MODULE_PARAM_ARGS);
+int param_set_active_allocator(ZFS_MODULE_PARAM_ARGS);
#ifdef ZFS_DEBUG
#define dprintf_bp(bp, fmt, ...) do { \
extern spa_mode_t spa_mode_global;
extern int zfs_deadman_enabled;
-extern unsigned long zfs_deadman_synctime_ms;
-extern unsigned long zfs_deadman_ziotime_ms;
-extern unsigned long zfs_deadman_checktime_ms;
+extern uint64_t zfs_deadman_synctime_ms;
+extern uint64_t zfs_deadman_ziotime_ms;
+extern uint64_t zfs_deadman_checktime_ms;
extern kmem_cache_t *zio_buf_cache[];
extern kmem_cache_t *zio_data_buf_cache[];