]> git.proxmox.com Git - mirror_zfs.git/blame - include/sys/uberblock_impl.h
vdev probe to slow disk can stall mmp write checker
[mirror_zfs.git] / include / sys / uberblock_impl.h
CommitLineData
34dc7c2f
BB
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
1d3ba0bf 9 * or https://opensource.org/licenses/CDDL-1.0.
34dc7c2f
BB
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
428870ff 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
d2734cce 23 * Copyright (c) 2016, 2017 by Delphix. All rights reserved.
34dc7c2f
BB
24 */
25
26#ifndef _SYS_UBERBLOCK_IMPL_H
27#define _SYS_UBERBLOCK_IMPL_H
28
34dc7c2f
BB
29#include <sys/uberblock.h>
30
31#ifdef __cplusplus
32extern "C" {
33#endif
34
35/*
36 * The uberblock version is incremented whenever an incompatible on-disk
37 * format change is made to the SPA, DMU, or ZAP.
38 *
39 * Note: the first two fields should never be moved. When a storage pool
40 * is opened, the uberblock must be read off the disk before the version
41 * can be checked. If the ub_version field is moved, we may not detect
42 * version mismatch. If the ub_magic field is moved, applications that
43 * expect the magic number in the first word won't work.
44 */
45#define UBERBLOCK_MAGIC 0x00bab10c /* oo-ba-bloc! */
46#define UBERBLOCK_SHIFT 10 /* up to 1K */
060f0226
OF
47#define MMP_MAGIC 0xa11cea11 /* all-see-all */
48
49#define MMP_INTERVAL_VALID_BIT 0x01
50#define MMP_SEQ_VALID_BIT 0x02
51#define MMP_FAIL_INT_VALID_BIT 0x04
52
c3f2f1aa
DB
53#define MMP_VALID(ubp) ((ubp)->ub_magic == UBERBLOCK_MAGIC && \
54 (ubp)->ub_mmp_magic == MMP_MAGIC)
55#define MMP_INTERVAL_VALID(ubp) (MMP_VALID(ubp) && ((ubp)->ub_mmp_config & \
060f0226 56 MMP_INTERVAL_VALID_BIT))
c3f2f1aa 57#define MMP_SEQ_VALID(ubp) (MMP_VALID(ubp) && ((ubp)->ub_mmp_config & \
060f0226 58 MMP_SEQ_VALID_BIT))
c3f2f1aa 59#define MMP_FAIL_INT_VALID(ubp) (MMP_VALID(ubp) && ((ubp)->ub_mmp_config & \
060f0226
OF
60 MMP_FAIL_INT_VALID_BIT))
61
c3f2f1aa 62#define MMP_INTERVAL(ubp) (((ubp)->ub_mmp_config & 0x00000000FFFFFF00) \
060f0226 63 >> 8)
c3f2f1aa 64#define MMP_SEQ(ubp) (((ubp)->ub_mmp_config & 0x0000FFFF00000000) \
060f0226 65 >> 32)
c3f2f1aa 66#define MMP_FAIL_INT(ubp) (((ubp)->ub_mmp_config & 0xFFFF000000000000) \
060f0226
OF
67 >> 48)
68
69#define MMP_INTERVAL_SET(write) \
70 (((uint64_t)(write & 0xFFFFFF) << 8) | MMP_INTERVAL_VALID_BIT)
71
72#define MMP_SEQ_SET(seq) \
73 (((uint64_t)(seq & 0xFFFF) << 32) | MMP_SEQ_VALID_BIT)
74
75#define MMP_FAIL_INT_SET(fail) \
76 (((uint64_t)(fail & 0xFFFF) << 48) | MMP_FAIL_INT_VALID_BIT)
34dc7c2f 77
5caeef02
DB
78/*
79 * RAIDZ expansion reflow information.
80 *
81 * 64 56 48 40 32 24 16 8 0
82 * +-------+-------+-------+-------+-------+-------+-------+-------+
83 * |Scratch | Reflow |
84 * | State | Offset |
85 * +-------+-------+-------+-------+-------+-------+-------+-------+
86 */
87typedef enum raidz_reflow_scratch_state {
88 RRSS_SCRATCH_NOT_IN_USE = 0,
89 RRSS_SCRATCH_VALID,
90 RRSS_SCRATCH_INVALID_SYNCED,
91 RRSS_SCRATCH_INVALID_SYNCED_ON_IMPORT,
92 RRSS_SCRATCH_INVALID_SYNCED_REFLOW
93} raidz_reflow_scratch_state_t;
94
95#define RRSS_GET_OFFSET(ub) \
96 BF64_GET_SB((ub)->ub_raidz_reflow_info, 0, 55, SPA_MINBLOCKSHIFT, 0)
97#define RRSS_SET_OFFSET(ub, x) \
98 BF64_SET_SB((ub)->ub_raidz_reflow_info, 0, 55, SPA_MINBLOCKSHIFT, 0, x)
99
100#define RRSS_GET_STATE(ub) \
101 BF64_GET((ub)->ub_raidz_reflow_info, 55, 9)
102#define RRSS_SET_STATE(ub, x) \
103 BF64_SET((ub)->ub_raidz_reflow_info, 55, 9, x)
104
105#define RAIDZ_REFLOW_SET(ub, state, offset) do { \
106 (ub)->ub_raidz_reflow_info = 0; \
107 RRSS_SET_OFFSET(ub, offset); \
108 RRSS_SET_STATE(ub, state); \
109} while (0)
110
34dc7c2f
BB
111struct uberblock {
112 uint64_t ub_magic; /* UBERBLOCK_MAGIC */
113 uint64_t ub_version; /* SPA_VERSION */
114 uint64_t ub_txg; /* txg of last sync */
115 uint64_t ub_guid_sum; /* sum of all vdev guids */
116 uint64_t ub_timestamp; /* UTC time of last sync */
117 blkptr_t ub_rootbp; /* MOS objset_phys_t */
428870ff
BB
118
119 /* highest SPA_VERSION supported by software that wrote this txg */
120 uint64_t ub_software_version;
379ca9cf
OF
121
122 /* Maybe missing in uberblocks we read, but always written */
123 uint64_t ub_mmp_magic; /* MMP_MAGIC */
060f0226
OF
124 /*
125 * If ub_mmp_delay == 0 and ub_mmp_magic is valid, MMP is off.
126 * Otherwise, nanosec since last MMP write.
127 */
128 uint64_t ub_mmp_delay;
129
130 /*
131 * The ub_mmp_config contains the multihost write interval, multihost
132 * fail intervals, sequence number for sub-second granularity, and
133 * valid bit mask. This layout is as follows:
134 *
135 * 64 56 48 40 32 24 16 8 0
136 * +-------+-------+-------+-------+-------+-------+-------+-------+
137 * 0 | Fail Intervals| Seq | Write Interval (ms) | VALID |
138 * +-------+-------+-------+-------+-------+-------+-------+-------+
139 *
140 * This allows a write_interval of (2^24/1000)s, over 4.5 hours
141 *
142 * VALID Bits:
143 * - 0x01 - Write Interval (ms)
144 * - 0x02 - Sequence number exists
145 * - 0x04 - Fail Intervals
146 * - 0xf8 - Reserved
147 */
148 uint64_t ub_mmp_config;
d2734cce
SD
149
150 /*
151 * ub_checkpoint_txg indicates two things about the current uberblock:
152 *
153 * 1] If it is not zero then this uberblock is a checkpoint. If it is
154 * zero, then this uberblock is not a checkpoint.
155 *
156 * 2] On checkpointed uberblocks, the value of ub_checkpoint_txg is
157 * the ub_txg that the uberblock had at the time we moved it to
158 * the MOS config.
159 *
160 * The field is set when we checkpoint the uberblock and continues to
161 * hold that value even after we've rewound (unlike the ub_txg that
162 * is reset to a higher value).
163 *
164 * Besides checks used to determine whether we are reopening the
165 * pool from a checkpointed uberblock [see spa_ld_select_uberblock()],
166 * the value of the field is used to determine which ZIL blocks have
167 * been allocated according to the ms_sm when we are rewinding to a
493fcce9 168 * checkpoint. Specifically, if logical birth > ub_checkpoint_txg,then
d2734cce
SD
169 * the ZIL block is not allocated [see uses of spa_min_claim_txg()].
170 */
829f9251 171 uint64_t ub_checkpoint_txg;
5caeef02
DB
172
173 uint64_t ub_raidz_reflow_info;
34dc7c2f
BB
174};
175
176#ifdef __cplusplus
177}
178#endif
179
180#endif /* _SYS_UBERBLOCK_IMPL_H */