]>
Commit | Line | Data |
---|---|---|
34dc7c2f BB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or http://www.opensolaris.org/os/licensing. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | /* | |
428870ff | 22 | * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. |
2e528b49 | 23 | * Copyright (c) 2013 by Delphix. All rights reserved. |
34dc7c2f BB |
24 | */ |
25 | ||
34dc7c2f BB |
26 | #include <sys/zfs_context.h> |
27 | #include <sys/spa.h> | |
28 | #include <sys/zio.h> | |
29 | #include <sys/zio_checksum.h> | |
428870ff BB |
30 | #include <sys/zil.h> |
31 | #include <zfs_fletcher.h> | |
34dc7c2f BB |
32 | |
33 | /* | |
34 | * Checksum vectors. | |
35 | * | |
36 | * In the SPA, everything is checksummed. We support checksum vectors | |
37 | * for three distinct reasons: | |
38 | * | |
39 | * 1. Different kinds of data need different levels of protection. | |
40 | * For SPA metadata, we always want a very strong checksum. | |
41 | * For user data, we let users make the trade-off between speed | |
42 | * and checksum strength. | |
43 | * | |
44 | * 2. Cryptographic hash and MAC algorithms are an area of active research. | |
45 | * It is likely that in future hash functions will be at least as strong | |
46 | * as current best-of-breed, and may be substantially faster as well. | |
47 | * We want the ability to take advantage of these new hashes as soon as | |
48 | * they become available. | |
49 | * | |
50 | * 3. If someone develops hardware that can compute a strong hash quickly, | |
51 | * we want the ability to take advantage of that hardware. | |
52 | * | |
53 | * Of course, we don't want a checksum upgrade to invalidate existing | |
428870ff BB |
54 | * data, so we store the checksum *function* in eight bits of the bp. |
55 | * This gives us room for up to 256 different checksum functions. | |
34dc7c2f BB |
56 | * |
57 | * When writing a block, we always checksum it with the latest-and-greatest | |
58 | * checksum function of the appropriate strength. When reading a block, | |
59 | * we compare the expected checksum against the actual checksum, which we | |
428870ff | 60 | * compute via the checksum function specified by BP_GET_CHECKSUM(bp). |
34dc7c2f BB |
61 | */ |
62 | ||
63 | /*ARGSUSED*/ | |
64 | static void | |
65 | zio_checksum_off(const void *buf, uint64_t size, zio_cksum_t *zcp) | |
66 | { | |
67 | ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0); | |
68 | } | |
69 | ||
70 | zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = { | |
428870ff BB |
71 | {{NULL, NULL}, 0, 0, 0, "inherit"}, |
72 | {{NULL, NULL}, 0, 0, 0, "on"}, | |
73 | {{zio_checksum_off, zio_checksum_off}, 0, 0, 0, "off"}, | |
74 | {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 1, 0, "label"}, | |
75 | {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 1, 0, "gang_header"}, | |
76 | {{fletcher_2_native, fletcher_2_byteswap}, 0, 1, 0, "zilog"}, | |
77 | {{fletcher_2_native, fletcher_2_byteswap}, 0, 0, 0, "fletcher2"}, | |
78 | {{fletcher_4_native, fletcher_4_byteswap}, 1, 0, 0, "fletcher4"}, | |
79 | {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 0, 1, "sha256"}, | |
80 | {{fletcher_4_native, fletcher_4_byteswap}, 0, 1, 0, "zilog2"}, | |
34dc7c2f BB |
81 | }; |
82 | ||
428870ff BB |
83 | enum zio_checksum |
84 | zio_checksum_select(enum zio_checksum child, enum zio_checksum parent) | |
34dc7c2f BB |
85 | { |
86 | ASSERT(child < ZIO_CHECKSUM_FUNCTIONS); | |
87 | ASSERT(parent < ZIO_CHECKSUM_FUNCTIONS); | |
88 | ASSERT(parent != ZIO_CHECKSUM_INHERIT && parent != ZIO_CHECKSUM_ON); | |
89 | ||
90 | if (child == ZIO_CHECKSUM_INHERIT) | |
91 | return (parent); | |
92 | ||
93 | if (child == ZIO_CHECKSUM_ON) | |
94 | return (ZIO_CHECKSUM_ON_VALUE); | |
95 | ||
96 | return (child); | |
97 | } | |
98 | ||
428870ff BB |
99 | enum zio_checksum |
100 | zio_checksum_dedup_select(spa_t *spa, enum zio_checksum child, | |
101 | enum zio_checksum parent) | |
102 | { | |
103 | ASSERT((child & ZIO_CHECKSUM_MASK) < ZIO_CHECKSUM_FUNCTIONS); | |
104 | ASSERT((parent & ZIO_CHECKSUM_MASK) < ZIO_CHECKSUM_FUNCTIONS); | |
105 | ASSERT(parent != ZIO_CHECKSUM_INHERIT && parent != ZIO_CHECKSUM_ON); | |
106 | ||
107 | if (child == ZIO_CHECKSUM_INHERIT) | |
108 | return (parent); | |
109 | ||
110 | if (child == ZIO_CHECKSUM_ON) | |
111 | return (spa_dedup_checksum(spa)); | |
112 | ||
113 | if (child == (ZIO_CHECKSUM_ON | ZIO_CHECKSUM_VERIFY)) | |
114 | return (spa_dedup_checksum(spa) | ZIO_CHECKSUM_VERIFY); | |
115 | ||
116 | ASSERT(zio_checksum_table[child & ZIO_CHECKSUM_MASK].ci_dedup || | |
117 | (child & ZIO_CHECKSUM_VERIFY) || child == ZIO_CHECKSUM_OFF); | |
118 | ||
119 | return (child); | |
120 | } | |
121 | ||
b128c09f BB |
122 | /* |
123 | * Set the external verifier for a gang block based on <vdev, offset, txg>, | |
124 | * a tuple which is guaranteed to be unique for the life of the pool. | |
125 | */ | |
126 | static void | |
127 | zio_checksum_gang_verifier(zio_cksum_t *zcp, blkptr_t *bp) | |
128 | { | |
129 | dva_t *dva = BP_IDENTITY(bp); | |
428870ff | 130 | uint64_t txg = BP_PHYSICAL_BIRTH(bp); |
b128c09f BB |
131 | |
132 | ASSERT(BP_IS_GANG(bp)); | |
133 | ||
134 | ZIO_SET_CHECKSUM(zcp, DVA_GET_VDEV(dva), DVA_GET_OFFSET(dva), txg, 0); | |
135 | } | |
136 | ||
137 | /* | |
138 | * Set the external verifier for a label block based on its offset. | |
139 | * The vdev is implicit, and the txg is unknowable at pool open time -- | |
140 | * hence the logic in vdev_uberblock_load() to find the most recent copy. | |
141 | */ | |
142 | static void | |
143 | zio_checksum_label_verifier(zio_cksum_t *zcp, uint64_t offset) | |
144 | { | |
145 | ZIO_SET_CHECKSUM(zcp, offset, 0, 0, 0); | |
146 | } | |
147 | ||
34dc7c2f BB |
148 | /* |
149 | * Generate the checksum. | |
150 | */ | |
151 | void | |
b128c09f BB |
152 | zio_checksum_compute(zio_t *zio, enum zio_checksum checksum, |
153 | void *data, uint64_t size) | |
34dc7c2f | 154 | { |
b128c09f BB |
155 | blkptr_t *bp = zio->io_bp; |
156 | uint64_t offset = zio->io_offset; | |
34dc7c2f | 157 | zio_checksum_info_t *ci = &zio_checksum_table[checksum]; |
428870ff | 158 | zio_cksum_t cksum; |
34dc7c2f | 159 | |
b128c09f | 160 | ASSERT((uint_t)checksum < ZIO_CHECKSUM_FUNCTIONS); |
34dc7c2f BB |
161 | ASSERT(ci->ci_func[0] != NULL); |
162 | ||
428870ff BB |
163 | if (ci->ci_eck) { |
164 | zio_eck_t *eck; | |
165 | ||
166 | if (checksum == ZIO_CHECKSUM_ZILOG2) { | |
167 | zil_chain_t *zilc = data; | |
168 | ||
169 | size = P2ROUNDUP_TYPED(zilc->zc_nused, ZIL_MIN_BLKSZ, | |
170 | uint64_t); | |
171 | eck = &zilc->zc_eck; | |
172 | } else { | |
173 | eck = (zio_eck_t *)((char *)data + size) - 1; | |
174 | } | |
b128c09f | 175 | if (checksum == ZIO_CHECKSUM_GANG_HEADER) |
428870ff | 176 | zio_checksum_gang_verifier(&eck->zec_cksum, bp); |
b128c09f | 177 | else if (checksum == ZIO_CHECKSUM_LABEL) |
428870ff | 178 | zio_checksum_label_verifier(&eck->zec_cksum, offset); |
b128c09f | 179 | else |
428870ff BB |
180 | bp->blk_cksum = eck->zec_cksum; |
181 | eck->zec_magic = ZEC_MAGIC; | |
182 | ci->ci_func[0](data, size, &cksum); | |
183 | eck->zec_cksum = cksum; | |
34dc7c2f | 184 | } else { |
b128c09f | 185 | ci->ci_func[0](data, size, &bp->blk_cksum); |
34dc7c2f BB |
186 | } |
187 | } | |
188 | ||
189 | int | |
428870ff | 190 | zio_checksum_error(zio_t *zio, zio_bad_cksum_t *info) |
34dc7c2f BB |
191 | { |
192 | blkptr_t *bp = zio->io_bp; | |
b128c09f BB |
193 | uint_t checksum = (bp == NULL ? zio->io_prop.zp_checksum : |
194 | (BP_IS_GANG(bp) ? ZIO_CHECKSUM_GANG_HEADER : BP_GET_CHECKSUM(bp))); | |
195 | int byteswap; | |
428870ff | 196 | int error; |
b128c09f BB |
197 | uint64_t size = (bp == NULL ? zio->io_size : |
198 | (BP_IS_GANG(bp) ? SPA_GANGBLOCKSIZE : BP_GET_PSIZE(bp))); | |
199 | uint64_t offset = zio->io_offset; | |
428870ff | 200 | void *data = zio->io_data; |
34dc7c2f | 201 | zio_checksum_info_t *ci = &zio_checksum_table[checksum]; |
b128c09f | 202 | zio_cksum_t actual_cksum, expected_cksum, verifier; |
34dc7c2f BB |
203 | |
204 | if (checksum >= ZIO_CHECKSUM_FUNCTIONS || ci->ci_func[0] == NULL) | |
2e528b49 | 205 | return (SET_ERROR(EINVAL)); |
34dc7c2f | 206 | |
428870ff BB |
207 | if (ci->ci_eck) { |
208 | zio_eck_t *eck; | |
209 | ||
210 | if (checksum == ZIO_CHECKSUM_ZILOG2) { | |
211 | zil_chain_t *zilc = data; | |
212 | uint64_t nused; | |
213 | ||
214 | eck = &zilc->zc_eck; | |
215 | if (eck->zec_magic == ZEC_MAGIC) | |
216 | nused = zilc->zc_nused; | |
217 | else if (eck->zec_magic == BSWAP_64(ZEC_MAGIC)) | |
218 | nused = BSWAP_64(zilc->zc_nused); | |
219 | else | |
2e528b49 | 220 | return (SET_ERROR(ECKSUM)); |
428870ff BB |
221 | |
222 | if (nused > size) | |
2e528b49 | 223 | return (SET_ERROR(ECKSUM)); |
428870ff BB |
224 | |
225 | size = P2ROUNDUP_TYPED(nused, ZIL_MIN_BLKSZ, uint64_t); | |
226 | } else { | |
227 | eck = (zio_eck_t *)((char *)data + size) - 1; | |
228 | } | |
229 | ||
34dc7c2f | 230 | if (checksum == ZIO_CHECKSUM_GANG_HEADER) |
b128c09f BB |
231 | zio_checksum_gang_verifier(&verifier, bp); |
232 | else if (checksum == ZIO_CHECKSUM_LABEL) | |
233 | zio_checksum_label_verifier(&verifier, offset); | |
234 | else | |
235 | verifier = bp->blk_cksum; | |
236 | ||
428870ff | 237 | byteswap = (eck->zec_magic == BSWAP_64(ZEC_MAGIC)); |
34dc7c2f | 238 | |
b128c09f BB |
239 | if (byteswap) |
240 | byteswap_uint64_array(&verifier, sizeof (zio_cksum_t)); | |
241 | ||
428870ff BB |
242 | expected_cksum = eck->zec_cksum; |
243 | eck->zec_cksum = verifier; | |
b128c09f | 244 | ci->ci_func[byteswap](data, size, &actual_cksum); |
428870ff | 245 | eck->zec_cksum = expected_cksum; |
b128c09f BB |
246 | |
247 | if (byteswap) | |
34dc7c2f BB |
248 | byteswap_uint64_array(&expected_cksum, |
249 | sizeof (zio_cksum_t)); | |
34dc7c2f BB |
250 | } else { |
251 | ASSERT(!BP_IS_GANG(bp)); | |
b128c09f BB |
252 | byteswap = BP_SHOULD_BYTESWAP(bp); |
253 | expected_cksum = bp->blk_cksum; | |
34dc7c2f BB |
254 | ci->ci_func[byteswap](data, size, &actual_cksum); |
255 | } | |
256 | ||
428870ff BB |
257 | info->zbc_expected = expected_cksum; |
258 | info->zbc_actual = actual_cksum; | |
259 | info->zbc_checksum_name = ci->ci_name; | |
260 | info->zbc_byteswapped = byteswap; | |
261 | info->zbc_injected = 0; | |
262 | info->zbc_has_cksum = 1; | |
263 | ||
b128c09f | 264 | if (!ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum)) |
2e528b49 | 265 | return (SET_ERROR(ECKSUM)); |
34dc7c2f | 266 | |
428870ff BB |
267 | if (zio_injection_enabled && !zio->io_error && |
268 | (error = zio_handle_fault_injection(zio, ECKSUM)) != 0) { | |
269 | ||
270 | info->zbc_injected = 1; | |
271 | return (error); | |
272 | } | |
34dc7c2f BB |
273 | |
274 | return (0); | |
275 | } |