]>
Commit | Line | Data |
---|---|---|
428870ff BB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
1d3ba0bf | 9 | * or https://opensource.org/licenses/CDDL-1.0. |
428870ff BB |
10 | * See the License for the specific language governing permissions |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | ||
22 | /* | |
23 | * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. | |
d9b4bf06 | 24 | * Copyright (c) 2018 by Delphix. All rights reserved. |
428870ff BB |
25 | */ |
26 | ||
27 | #include <sys/zfs_context.h> | |
28 | #include <sys/spa.h> | |
29 | #include <sys/zio.h> | |
30 | #include <sys/ddt.h> | |
8e414fcd | 31 | #include <sys/ddt_impl.h> |
428870ff BB |
32 | #include <sys/zap.h> |
33 | #include <sys/dmu_tx.h> | |
86e91c03 | 34 | #include <sys/zio_compress.h> |
428870ff | 35 | |
2b10e325 RE |
36 | static unsigned int ddt_zap_default_bs = 15; |
37 | static unsigned int ddt_zap_default_ibs = 15; | |
428870ff | 38 | |
86e91c03 RN |
39 | #define DDT_ZAP_COMPRESS_BYTEORDER_MASK 0x80 |
40 | #define DDT_ZAP_COMPRESS_FUNCTION_MASK 0x7f | |
41 | ||
42 | #define DDT_KEY_WORDS (sizeof (ddt_key_t) / sizeof (uint64_t)) | |
43 | ||
44 | static size_t | |
9029278d | 45 | ddt_zap_compress(const void *src, uchar_t *dst, size_t s_len, size_t d_len) |
86e91c03 RN |
46 | { |
47 | uchar_t *version = dst++; | |
48 | int cpfunc = ZIO_COMPRESS_ZLE; | |
49 | zio_compress_info_t *ci = &zio_compress_table[cpfunc]; | |
50 | size_t c_len; | |
51 | ||
52 | ASSERT3U(d_len, >=, s_len + 1); /* no compression plus version byte */ | |
53 | ||
9029278d RN |
54 | c_len = ci->ci_compress((void *)src, dst, s_len, d_len - 1, |
55 | ci->ci_level); | |
86e91c03 RN |
56 | |
57 | if (c_len == s_len) { | |
58 | cpfunc = ZIO_COMPRESS_OFF; | |
59 | memcpy(dst, src, s_len); | |
60 | } | |
61 | ||
62 | *version = cpfunc; | |
63 | if (ZFS_HOST_BYTEORDER) | |
64 | *version |= DDT_ZAP_COMPRESS_BYTEORDER_MASK; | |
65 | ||
66 | return (c_len + 1); | |
67 | } | |
68 | ||
69 | static void | |
70 | ddt_zap_decompress(uchar_t *src, void *dst, size_t s_len, size_t d_len) | |
71 | { | |
72 | uchar_t version = *src++; | |
73 | int cpfunc = version & DDT_ZAP_COMPRESS_FUNCTION_MASK; | |
74 | zio_compress_info_t *ci = &zio_compress_table[cpfunc]; | |
75 | ||
76 | if (ci->ci_decompress != NULL) | |
77 | (void) ci->ci_decompress(src, dst, s_len, d_len, ci->ci_level); | |
78 | else | |
79 | memcpy(dst, src, d_len); | |
80 | ||
81 | if (((version & DDT_ZAP_COMPRESS_BYTEORDER_MASK) != 0) != | |
82 | (ZFS_HOST_BYTEORDER != 0)) | |
83 | byteswap_uint64_array(dst, d_len); | |
84 | } | |
85 | ||
428870ff BB |
86 | static int |
87 | ddt_zap_create(objset_t *os, uint64_t *objectp, dmu_tx_t *tx, boolean_t prehash) | |
88 | { | |
89 | zap_flags_t flags = ZAP_FLAG_HASH64 | ZAP_FLAG_UINT64_KEY; | |
90 | ||
91 | if (prehash) | |
92 | flags |= ZAP_FLAG_PRE_HASHED_KEY; | |
93 | ||
94 | *objectp = zap_create_flags(os, 0, flags, DMU_OT_DDT_ZAP, | |
2b10e325 | 95 | ddt_zap_default_bs, ddt_zap_default_ibs, |
428870ff | 96 | DMU_OT_NONE, 0, tx); |
9029278d RN |
97 | if (*objectp == 0) |
98 | return (SET_ERROR(ENOTSUP)); | |
428870ff | 99 | |
9029278d | 100 | return (0); |
428870ff BB |
101 | } |
102 | ||
103 | static int | |
104 | ddt_zap_destroy(objset_t *os, uint64_t object, dmu_tx_t *tx) | |
105 | { | |
106 | return (zap_destroy(os, object, tx)); | |
107 | } | |
108 | ||
109 | static int | |
9029278d RN |
110 | ddt_zap_lookup(objset_t *os, uint64_t object, |
111 | const ddt_key_t *ddk, ddt_phys_t *phys, size_t psize) | |
428870ff | 112 | { |
5b8c7bbc | 113 | uchar_t *cbuf; |
428870ff BB |
114 | uint64_t one, csize; |
115 | int error; | |
116 | ||
9029278d | 117 | error = zap_length_uint64(os, object, (uint64_t *)ddk, |
428870ff BB |
118 | DDT_KEY_WORDS, &one, &csize); |
119 | if (error) | |
5c4cc21f | 120 | return (error); |
428870ff | 121 | |
d3bafe45 | 122 | ASSERT3U(one, ==, 1); |
9029278d | 123 | ASSERT3U(csize, <=, psize + 1); |
428870ff | 124 | |
5c4cc21f RN |
125 | cbuf = kmem_alloc(csize, KM_SLEEP); |
126 | ||
9029278d | 127 | error = zap_lookup_uint64(os, object, (uint64_t *)ddk, |
428870ff | 128 | DDT_KEY_WORDS, 1, csize, cbuf); |
5c4cc21f | 129 | if (error == 0) |
9029278d | 130 | ddt_zap_decompress(cbuf, phys, csize, psize); |
428870ff | 131 | |
5c4cc21f | 132 | kmem_free(cbuf, csize); |
428870ff | 133 | |
5b8c7bbc | 134 | return (error); |
428870ff BB |
135 | } |
136 | ||
9029278d RN |
137 | static int |
138 | ddt_zap_contains(objset_t *os, uint64_t object, const ddt_key_t *ddk) | |
139 | { | |
140 | return (zap_length_uint64(os, object, (uint64_t *)ddk, DDT_KEY_WORDS, | |
141 | NULL, NULL)); | |
142 | } | |
143 | ||
428870ff | 144 | static void |
9029278d | 145 | ddt_zap_prefetch(objset_t *os, uint64_t object, const ddt_key_t *ddk) |
428870ff | 146 | { |
9029278d | 147 | (void) zap_prefetch_uint64(os, object, (uint64_t *)ddk, DDT_KEY_WORDS); |
428870ff BB |
148 | } |
149 | ||
150 | static int | |
9029278d RN |
151 | ddt_zap_update(objset_t *os, uint64_t object, const ddt_key_t *ddk, |
152 | const ddt_phys_t *phys, size_t psize, dmu_tx_t *tx) | |
428870ff | 153 | { |
9029278d | 154 | const size_t cbuf_size = psize + 1; |
5c4cc21f RN |
155 | |
156 | uchar_t *cbuf = kmem_alloc(cbuf_size, KM_SLEEP); | |
157 | ||
9029278d | 158 | uint64_t csize = ddt_zap_compress(phys, cbuf, psize, cbuf_size); |
5c4cc21f | 159 | |
9029278d | 160 | int error = zap_update_uint64(os, object, (uint64_t *)ddk, |
5c4cc21f | 161 | DDT_KEY_WORDS, 1, csize, cbuf, tx); |
428870ff | 162 | |
5c4cc21f | 163 | kmem_free(cbuf, cbuf_size); |
428870ff | 164 | |
5c4cc21f | 165 | return (error); |
428870ff BB |
166 | } |
167 | ||
168 | static int | |
9029278d RN |
169 | ddt_zap_remove(objset_t *os, uint64_t object, const ddt_key_t *ddk, |
170 | dmu_tx_t *tx) | |
428870ff | 171 | { |
9029278d | 172 | return (zap_remove_uint64(os, object, (uint64_t *)ddk, |
428870ff BB |
173 | DDT_KEY_WORDS, tx)); |
174 | } | |
175 | ||
176 | static int | |
9029278d RN |
177 | ddt_zap_walk(objset_t *os, uint64_t object, uint64_t *walk, ddt_key_t *ddk, |
178 | ddt_phys_t *phys, size_t psize) | |
428870ff BB |
179 | { |
180 | zap_cursor_t zc; | |
181 | zap_attribute_t za; | |
182 | int error; | |
183 | ||
d9b4bf06 MA |
184 | if (*walk == 0) { |
185 | /* | |
186 | * We don't want to prefetch the entire ZAP object, because | |
187 | * it can be enormous. Also the primary use of DDT iteration | |
188 | * is for scrubbing, in which case we will be issuing many | |
189 | * scrub I/Os for each ZAP block that we read in, so | |
190 | * reading the ZAP is unlikely to be the bottleneck. | |
191 | */ | |
192 | zap_cursor_init_noprefetch(&zc, os, object); | |
193 | } else { | |
194 | zap_cursor_init_serialized(&zc, os, object, *walk); | |
195 | } | |
428870ff | 196 | if ((error = zap_cursor_retrieve(&zc, &za)) == 0) { |
428870ff | 197 | uint64_t csize = za.za_num_integers; |
5c4cc21f | 198 | |
d3bafe45 | 199 | ASSERT3U(za.za_integer_length, ==, 1); |
9029278d | 200 | ASSERT3U(csize, <=, psize + 1); |
5c4cc21f RN |
201 | |
202 | uchar_t *cbuf = kmem_alloc(csize, KM_SLEEP); | |
203 | ||
428870ff BB |
204 | error = zap_lookup_uint64(os, object, (uint64_t *)za.za_name, |
205 | DDT_KEY_WORDS, 1, csize, cbuf); | |
d3bafe45 | 206 | ASSERT0(error); |
428870ff | 207 | if (error == 0) { |
9029278d RN |
208 | ddt_zap_decompress(cbuf, phys, csize, psize); |
209 | *ddk = *(ddt_key_t *)za.za_name; | |
428870ff | 210 | } |
5c4cc21f RN |
211 | |
212 | kmem_free(cbuf, csize); | |
213 | ||
428870ff BB |
214 | zap_cursor_advance(&zc); |
215 | *walk = zap_cursor_serialize(&zc); | |
216 | } | |
217 | zap_cursor_fini(&zc); | |
218 | return (error); | |
219 | } | |
220 | ||
e8fd45a0 BB |
221 | static int |
222 | ddt_zap_count(objset_t *os, uint64_t object, uint64_t *count) | |
428870ff | 223 | { |
d1d7e268 | 224 | return (zap_count(os, object, count)); |
428870ff BB |
225 | } |
226 | ||
227 | const ddt_ops_t ddt_zap_ops = { | |
228 | "zap", | |
229 | ddt_zap_create, | |
230 | ddt_zap_destroy, | |
231 | ddt_zap_lookup, | |
9029278d | 232 | ddt_zap_contains, |
428870ff BB |
233 | ddt_zap_prefetch, |
234 | ddt_zap_update, | |
235 | ddt_zap_remove, | |
236 | ddt_zap_walk, | |
237 | ddt_zap_count, | |
238 | }; | |
2b10e325 RE |
239 | |
240 | /* BEGIN CSTYLED */ | |
241 | ZFS_MODULE_PARAM(zfs_dedup, , ddt_zap_default_bs, UINT, ZMOD_RW, | |
242 | "DDT ZAP leaf blockshift"); | |
243 | ZFS_MODULE_PARAM(zfs_dedup, , ddt_zap_default_ibs, UINT, ZMOD_RW, | |
244 | "DDT ZAP indirect blockshift"); | |
245 | /* END CSTYLED */ |