]>
Commit | Line | Data |
---|---|---|
34dc7c2f BB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or http://www.opensolaris.org/os/licensing. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | /* | |
428870ff | 22 | * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. |
34dc7c2f BB |
23 | */ |
24 | ||
34dc7c2f BB |
25 | #include <sys/zfs_context.h> |
26 | #include <sys/spa.h> | |
27 | #include <sys/zio.h> | |
28 | #include <sys/zio_checksum.h> | |
428870ff BB |
29 | #include <sys/zil.h> |
30 | #include <zfs_fletcher.h> | |
34dc7c2f BB |
31 | |
32 | /* | |
33 | * Checksum vectors. | |
34 | * | |
35 | * In the SPA, everything is checksummed. We support checksum vectors | |
36 | * for three distinct reasons: | |
37 | * | |
38 | * 1. Different kinds of data need different levels of protection. | |
39 | * For SPA metadata, we always want a very strong checksum. | |
40 | * For user data, we let users make the trade-off between speed | |
41 | * and checksum strength. | |
42 | * | |
43 | * 2. Cryptographic hash and MAC algorithms are an area of active research. | |
44 | * It is likely that in future hash functions will be at least as strong | |
45 | * as current best-of-breed, and may be substantially faster as well. | |
46 | * We want the ability to take advantage of these new hashes as soon as | |
47 | * they become available. | |
48 | * | |
49 | * 3. If someone develops hardware that can compute a strong hash quickly, | |
50 | * we want the ability to take advantage of that hardware. | |
51 | * | |
52 | * Of course, we don't want a checksum upgrade to invalidate existing | |
428870ff BB |
53 | * data, so we store the checksum *function* in eight bits of the bp. |
54 | * This gives us room for up to 256 different checksum functions. | |
34dc7c2f BB |
55 | * |
56 | * When writing a block, we always checksum it with the latest-and-greatest | |
57 | * checksum function of the appropriate strength. When reading a block, | |
58 | * we compare the expected checksum against the actual checksum, which we | |
428870ff | 59 | * compute via the checksum function specified by BP_GET_CHECKSUM(bp). |
34dc7c2f BB |
60 | */ |
61 | ||
62 | /*ARGSUSED*/ | |
63 | static void | |
64 | zio_checksum_off(const void *buf, uint64_t size, zio_cksum_t *zcp) | |
65 | { | |
66 | ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0); | |
67 | } | |
68 | ||
69 | zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = { | |
428870ff BB |
70 | {{NULL, NULL}, 0, 0, 0, "inherit"}, |
71 | {{NULL, NULL}, 0, 0, 0, "on"}, | |
72 | {{zio_checksum_off, zio_checksum_off}, 0, 0, 0, "off"}, | |
73 | {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 1, 0, "label"}, | |
74 | {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 1, 0, "gang_header"}, | |
75 | {{fletcher_2_native, fletcher_2_byteswap}, 0, 1, 0, "zilog"}, | |
76 | {{fletcher_2_native, fletcher_2_byteswap}, 0, 0, 0, "fletcher2"}, | |
77 | {{fletcher_4_native, fletcher_4_byteswap}, 1, 0, 0, "fletcher4"}, | |
78 | {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 0, 1, "sha256"}, | |
79 | {{fletcher_4_native, fletcher_4_byteswap}, 0, 1, 0, "zilog2"}, | |
34dc7c2f BB |
80 | }; |
81 | ||
428870ff BB |
82 | enum zio_checksum |
83 | zio_checksum_select(enum zio_checksum child, enum zio_checksum parent) | |
34dc7c2f BB |
84 | { |
85 | ASSERT(child < ZIO_CHECKSUM_FUNCTIONS); | |
86 | ASSERT(parent < ZIO_CHECKSUM_FUNCTIONS); | |
87 | ASSERT(parent != ZIO_CHECKSUM_INHERIT && parent != ZIO_CHECKSUM_ON); | |
88 | ||
89 | if (child == ZIO_CHECKSUM_INHERIT) | |
90 | return (parent); | |
91 | ||
92 | if (child == ZIO_CHECKSUM_ON) | |
93 | return (ZIO_CHECKSUM_ON_VALUE); | |
94 | ||
95 | return (child); | |
96 | } | |
97 | ||
428870ff BB |
98 | enum zio_checksum |
99 | zio_checksum_dedup_select(spa_t *spa, enum zio_checksum child, | |
100 | enum zio_checksum parent) | |
101 | { | |
102 | ASSERT((child & ZIO_CHECKSUM_MASK) < ZIO_CHECKSUM_FUNCTIONS); | |
103 | ASSERT((parent & ZIO_CHECKSUM_MASK) < ZIO_CHECKSUM_FUNCTIONS); | |
104 | ASSERT(parent != ZIO_CHECKSUM_INHERIT && parent != ZIO_CHECKSUM_ON); | |
105 | ||
106 | if (child == ZIO_CHECKSUM_INHERIT) | |
107 | return (parent); | |
108 | ||
109 | if (child == ZIO_CHECKSUM_ON) | |
110 | return (spa_dedup_checksum(spa)); | |
111 | ||
112 | if (child == (ZIO_CHECKSUM_ON | ZIO_CHECKSUM_VERIFY)) | |
113 | return (spa_dedup_checksum(spa) | ZIO_CHECKSUM_VERIFY); | |
114 | ||
115 | ASSERT(zio_checksum_table[child & ZIO_CHECKSUM_MASK].ci_dedup || | |
116 | (child & ZIO_CHECKSUM_VERIFY) || child == ZIO_CHECKSUM_OFF); | |
117 | ||
118 | return (child); | |
119 | } | |
120 | ||
b128c09f BB |
121 | /* |
122 | * Set the external verifier for a gang block based on <vdev, offset, txg>, | |
123 | * a tuple which is guaranteed to be unique for the life of the pool. | |
124 | */ | |
125 | static void | |
126 | zio_checksum_gang_verifier(zio_cksum_t *zcp, blkptr_t *bp) | |
127 | { | |
128 | dva_t *dva = BP_IDENTITY(bp); | |
428870ff | 129 | uint64_t txg = BP_PHYSICAL_BIRTH(bp); |
b128c09f BB |
130 | |
131 | ASSERT(BP_IS_GANG(bp)); | |
132 | ||
133 | ZIO_SET_CHECKSUM(zcp, DVA_GET_VDEV(dva), DVA_GET_OFFSET(dva), txg, 0); | |
134 | } | |
135 | ||
136 | /* | |
137 | * Set the external verifier for a label block based on its offset. | |
138 | * The vdev is implicit, and the txg is unknowable at pool open time -- | |
139 | * hence the logic in vdev_uberblock_load() to find the most recent copy. | |
140 | */ | |
141 | static void | |
142 | zio_checksum_label_verifier(zio_cksum_t *zcp, uint64_t offset) | |
143 | { | |
144 | ZIO_SET_CHECKSUM(zcp, offset, 0, 0, 0); | |
145 | } | |
146 | ||
34dc7c2f BB |
147 | /* |
148 | * Generate the checksum. | |
149 | */ | |
150 | void | |
b128c09f BB |
151 | zio_checksum_compute(zio_t *zio, enum zio_checksum checksum, |
152 | void *data, uint64_t size) | |
34dc7c2f | 153 | { |
b128c09f BB |
154 | blkptr_t *bp = zio->io_bp; |
155 | uint64_t offset = zio->io_offset; | |
34dc7c2f | 156 | zio_checksum_info_t *ci = &zio_checksum_table[checksum]; |
428870ff | 157 | zio_cksum_t cksum; |
34dc7c2f | 158 | |
b128c09f | 159 | ASSERT((uint_t)checksum < ZIO_CHECKSUM_FUNCTIONS); |
34dc7c2f BB |
160 | ASSERT(ci->ci_func[0] != NULL); |
161 | ||
428870ff BB |
162 | if (ci->ci_eck) { |
163 | zio_eck_t *eck; | |
164 | ||
165 | if (checksum == ZIO_CHECKSUM_ZILOG2) { | |
166 | zil_chain_t *zilc = data; | |
167 | ||
168 | size = P2ROUNDUP_TYPED(zilc->zc_nused, ZIL_MIN_BLKSZ, | |
169 | uint64_t); | |
170 | eck = &zilc->zc_eck; | |
171 | } else { | |
172 | eck = (zio_eck_t *)((char *)data + size) - 1; | |
173 | } | |
b128c09f | 174 | if (checksum == ZIO_CHECKSUM_GANG_HEADER) |
428870ff | 175 | zio_checksum_gang_verifier(&eck->zec_cksum, bp); |
b128c09f | 176 | else if (checksum == ZIO_CHECKSUM_LABEL) |
428870ff | 177 | zio_checksum_label_verifier(&eck->zec_cksum, offset); |
b128c09f | 178 | else |
428870ff BB |
179 | bp->blk_cksum = eck->zec_cksum; |
180 | eck->zec_magic = ZEC_MAGIC; | |
181 | ci->ci_func[0](data, size, &cksum); | |
182 | eck->zec_cksum = cksum; | |
34dc7c2f | 183 | } else { |
b128c09f | 184 | ci->ci_func[0](data, size, &bp->blk_cksum); |
34dc7c2f BB |
185 | } |
186 | } | |
187 | ||
188 | int | |
428870ff | 189 | zio_checksum_error(zio_t *zio, zio_bad_cksum_t *info) |
34dc7c2f BB |
190 | { |
191 | blkptr_t *bp = zio->io_bp; | |
b128c09f BB |
192 | uint_t checksum = (bp == NULL ? zio->io_prop.zp_checksum : |
193 | (BP_IS_GANG(bp) ? ZIO_CHECKSUM_GANG_HEADER : BP_GET_CHECKSUM(bp))); | |
194 | int byteswap; | |
428870ff | 195 | int error; |
b128c09f BB |
196 | uint64_t size = (bp == NULL ? zio->io_size : |
197 | (BP_IS_GANG(bp) ? SPA_GANGBLOCKSIZE : BP_GET_PSIZE(bp))); | |
198 | uint64_t offset = zio->io_offset; | |
428870ff | 199 | void *data = zio->io_data; |
34dc7c2f | 200 | zio_checksum_info_t *ci = &zio_checksum_table[checksum]; |
b128c09f | 201 | zio_cksum_t actual_cksum, expected_cksum, verifier; |
34dc7c2f BB |
202 | |
203 | if (checksum >= ZIO_CHECKSUM_FUNCTIONS || ci->ci_func[0] == NULL) | |
204 | return (EINVAL); | |
205 | ||
428870ff BB |
206 | if (ci->ci_eck) { |
207 | zio_eck_t *eck; | |
208 | ||
209 | if (checksum == ZIO_CHECKSUM_ZILOG2) { | |
210 | zil_chain_t *zilc = data; | |
211 | uint64_t nused; | |
212 | ||
213 | eck = &zilc->zc_eck; | |
214 | if (eck->zec_magic == ZEC_MAGIC) | |
215 | nused = zilc->zc_nused; | |
216 | else if (eck->zec_magic == BSWAP_64(ZEC_MAGIC)) | |
217 | nused = BSWAP_64(zilc->zc_nused); | |
218 | else | |
219 | return (ECKSUM); | |
220 | ||
221 | if (nused > size) | |
222 | return (ECKSUM); | |
223 | ||
224 | size = P2ROUNDUP_TYPED(nused, ZIL_MIN_BLKSZ, uint64_t); | |
225 | } else { | |
226 | eck = (zio_eck_t *)((char *)data + size) - 1; | |
227 | } | |
228 | ||
34dc7c2f | 229 | if (checksum == ZIO_CHECKSUM_GANG_HEADER) |
b128c09f BB |
230 | zio_checksum_gang_verifier(&verifier, bp); |
231 | else if (checksum == ZIO_CHECKSUM_LABEL) | |
232 | zio_checksum_label_verifier(&verifier, offset); | |
233 | else | |
234 | verifier = bp->blk_cksum; | |
235 | ||
428870ff | 236 | byteswap = (eck->zec_magic == BSWAP_64(ZEC_MAGIC)); |
34dc7c2f | 237 | |
b128c09f BB |
238 | if (byteswap) |
239 | byteswap_uint64_array(&verifier, sizeof (zio_cksum_t)); | |
240 | ||
428870ff BB |
241 | expected_cksum = eck->zec_cksum; |
242 | eck->zec_cksum = verifier; | |
b128c09f | 243 | ci->ci_func[byteswap](data, size, &actual_cksum); |
428870ff | 244 | eck->zec_cksum = expected_cksum; |
b128c09f BB |
245 | |
246 | if (byteswap) | |
34dc7c2f BB |
247 | byteswap_uint64_array(&expected_cksum, |
248 | sizeof (zio_cksum_t)); | |
34dc7c2f BB |
249 | } else { |
250 | ASSERT(!BP_IS_GANG(bp)); | |
b128c09f BB |
251 | byteswap = BP_SHOULD_BYTESWAP(bp); |
252 | expected_cksum = bp->blk_cksum; | |
34dc7c2f BB |
253 | ci->ci_func[byteswap](data, size, &actual_cksum); |
254 | } | |
255 | ||
428870ff BB |
256 | info->zbc_expected = expected_cksum; |
257 | info->zbc_actual = actual_cksum; | |
258 | info->zbc_checksum_name = ci->ci_name; | |
259 | info->zbc_byteswapped = byteswap; | |
260 | info->zbc_injected = 0; | |
261 | info->zbc_has_cksum = 1; | |
262 | ||
b128c09f | 263 | if (!ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum)) |
34dc7c2f BB |
264 | return (ECKSUM); |
265 | ||
428870ff BB |
266 | if (zio_injection_enabled && !zio->io_error && |
267 | (error = zio_handle_fault_injection(zio, ECKSUM)) != 0) { | |
268 | ||
269 | info->zbc_injected = 1; | |
270 | return (error); | |
271 | } | |
34dc7c2f BB |
272 | |
273 | return (0); | |
274 | } |