]>
Commit | Line | Data |
---|---|---|
34dc7c2f BB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or http://www.opensolaris.org/os/licensing. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | /* | |
428870ff | 22 | * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. |
22cd4a46 | 23 | * Copyright 2011 Nexenta Systems, Inc. All rights reserved. |
4747a7d3 | 24 | * Copyright (c) 2012, 2017 by Delphix. All rights reserved. |
22cd4a46 | 25 | */ |
34dc7c2f | 26 | |
34dc7c2f BB |
27 | #include <sys/dmu.h> |
28 | #include <sys/dmu_impl.h> | |
29 | #include <sys/dbuf.h> | |
30 | #include <sys/dmu_tx.h> | |
31 | #include <sys/dmu_objset.h> | |
3ec3bc21 BB |
32 | #include <sys/dsl_dataset.h> |
33 | #include <sys/dsl_dir.h> | |
34dc7c2f | 34 | #include <sys/dsl_pool.h> |
3ec3bc21 | 35 | #include <sys/zap_impl.h> |
34dc7c2f | 36 | #include <sys/spa.h> |
428870ff BB |
37 | #include <sys/sa.h> |
38 | #include <sys/sa_impl.h> | |
34dc7c2f | 39 | #include <sys/zfs_context.h> |
49ee64e5 | 40 | #include <sys/trace_dmu.h> |
34dc7c2f BB |
41 | |
42 | typedef void (*dmu_tx_hold_func_t)(dmu_tx_t *tx, struct dnode *dn, | |
43 | uint64_t arg1, uint64_t arg2); | |
44 | ||
570827e1 BB |
45 | dmu_tx_stats_t dmu_tx_stats = { |
46 | { "dmu_tx_assigned", KSTAT_DATA_UINT64 }, | |
47 | { "dmu_tx_delay", KSTAT_DATA_UINT64 }, | |
48 | { "dmu_tx_error", KSTAT_DATA_UINT64 }, | |
49 | { "dmu_tx_suspended", KSTAT_DATA_UINT64 }, | |
50 | { "dmu_tx_group", KSTAT_DATA_UINT64 }, | |
570827e1 BB |
51 | { "dmu_tx_memory_reserve", KSTAT_DATA_UINT64 }, |
52 | { "dmu_tx_memory_reclaim", KSTAT_DATA_UINT64 }, | |
570827e1 | 53 | { "dmu_tx_dirty_throttle", KSTAT_DATA_UINT64 }, |
e8b96c60 MA |
54 | { "dmu_tx_dirty_delay", KSTAT_DATA_UINT64 }, |
55 | { "dmu_tx_dirty_over_max", KSTAT_DATA_UINT64 }, | |
750e1f88 | 56 | { "dmu_tx_dirty_frees_delay", KSTAT_DATA_UINT64 }, |
570827e1 BB |
57 | { "dmu_tx_quota", KSTAT_DATA_UINT64 }, |
58 | }; | |
59 | ||
60 | static kstat_t *dmu_tx_ksp; | |
34dc7c2f BB |
61 | |
62 | dmu_tx_t * | |
63 | dmu_tx_create_dd(dsl_dir_t *dd) | |
64 | { | |
79c76d5b | 65 | dmu_tx_t *tx = kmem_zalloc(sizeof (dmu_tx_t), KM_SLEEP); |
34dc7c2f | 66 | tx->tx_dir = dd; |
6f1ffb06 | 67 | if (dd != NULL) |
34dc7c2f BB |
68 | tx->tx_pool = dd->dd_pool; |
69 | list_create(&tx->tx_holds, sizeof (dmu_tx_hold_t), | |
70 | offsetof(dmu_tx_hold_t, txh_node)); | |
428870ff BB |
71 | list_create(&tx->tx_callbacks, sizeof (dmu_tx_callback_t), |
72 | offsetof(dmu_tx_callback_t, dcb_node)); | |
e8b96c60 | 73 | tx->tx_start = gethrtime(); |
34dc7c2f BB |
74 | return (tx); |
75 | } | |
76 | ||
77 | dmu_tx_t * | |
78 | dmu_tx_create(objset_t *os) | |
79 | { | |
428870ff | 80 | dmu_tx_t *tx = dmu_tx_create_dd(os->os_dsl_dataset->ds_dir); |
34dc7c2f | 81 | tx->tx_objset = os; |
34dc7c2f BB |
82 | return (tx); |
83 | } | |
84 | ||
85 | dmu_tx_t * | |
86 | dmu_tx_create_assigned(struct dsl_pool *dp, uint64_t txg) | |
87 | { | |
88 | dmu_tx_t *tx = dmu_tx_create_dd(NULL); | |
89 | ||
8c4fb36a | 90 | TXG_VERIFY(dp->dp_spa, txg); |
34dc7c2f BB |
91 | tx->tx_pool = dp; |
92 | tx->tx_txg = txg; | |
93 | tx->tx_anyobj = TRUE; | |
94 | ||
95 | return (tx); | |
96 | } | |
97 | ||
98 | int | |
99 | dmu_tx_is_syncing(dmu_tx_t *tx) | |
100 | { | |
101 | return (tx->tx_anyobj); | |
102 | } | |
103 | ||
104 | int | |
105 | dmu_tx_private_ok(dmu_tx_t *tx) | |
106 | { | |
107 | return (tx->tx_anyobj); | |
108 | } | |
109 | ||
110 | static dmu_tx_hold_t * | |
0eef1bde | 111 | dmu_tx_hold_dnode_impl(dmu_tx_t *tx, dnode_t *dn, enum dmu_tx_hold_type type, |
112 | uint64_t arg1, uint64_t arg2) | |
34dc7c2f BB |
113 | { |
114 | dmu_tx_hold_t *txh; | |
34dc7c2f | 115 | |
0eef1bde | 116 | if (dn != NULL) { |
c13060e4 | 117 | (void) zfs_refcount_add(&dn->dn_holds, tx); |
0eef1bde | 118 | if (tx->tx_txg != 0) { |
34dc7c2f BB |
119 | mutex_enter(&dn->dn_mtx); |
120 | /* | |
121 | * dn->dn_assigned_txg == tx->tx_txg doesn't pose a | |
122 | * problem, but there's no way for it to happen (for | |
123 | * now, at least). | |
124 | */ | |
125 | ASSERT(dn->dn_assigned_txg == 0); | |
126 | dn->dn_assigned_txg = tx->tx_txg; | |
c13060e4 | 127 | (void) zfs_refcount_add(&dn->dn_tx_holds, tx); |
34dc7c2f BB |
128 | mutex_exit(&dn->dn_mtx); |
129 | } | |
130 | } | |
131 | ||
79c76d5b | 132 | txh = kmem_zalloc(sizeof (dmu_tx_hold_t), KM_SLEEP); |
34dc7c2f BB |
133 | txh->txh_tx = tx; |
134 | txh->txh_dnode = dn; | |
424fd7c3 TS |
135 | zfs_refcount_create(&txh->txh_space_towrite); |
136 | zfs_refcount_create(&txh->txh_memory_tohold); | |
34dc7c2f BB |
137 | txh->txh_type = type; |
138 | txh->txh_arg1 = arg1; | |
139 | txh->txh_arg2 = arg2; | |
34dc7c2f BB |
140 | list_insert_tail(&tx->tx_holds, txh); |
141 | ||
142 | return (txh); | |
143 | } | |
144 | ||
0eef1bde | 145 | static dmu_tx_hold_t * |
146 | dmu_tx_hold_object_impl(dmu_tx_t *tx, objset_t *os, uint64_t object, | |
147 | enum dmu_tx_hold_type type, uint64_t arg1, uint64_t arg2) | |
148 | { | |
149 | dnode_t *dn = NULL; | |
150 | dmu_tx_hold_t *txh; | |
151 | int err; | |
152 | ||
153 | if (object != DMU_NEW_OBJECT) { | |
154 | err = dnode_hold(os, object, FTAG, &dn); | |
66eead53 | 155 | if (err != 0) { |
0eef1bde | 156 | tx->tx_err = err; |
157 | return (NULL); | |
158 | } | |
159 | } | |
160 | txh = dmu_tx_hold_dnode_impl(tx, dn, type, arg1, arg2); | |
161 | if (dn != NULL) | |
162 | dnode_rele(dn, FTAG); | |
163 | return (txh); | |
164 | } | |
165 | ||
34dc7c2f | 166 | void |
66eead53 | 167 | dmu_tx_add_new_object(dmu_tx_t *tx, dnode_t *dn) |
34dc7c2f BB |
168 | { |
169 | /* | |
170 | * If we're syncing, they can manipulate any object anyhow, and | |
171 | * the hold on the dnode_t can cause problems. | |
172 | */ | |
0eef1bde | 173 | if (!dmu_tx_is_syncing(tx)) |
174 | (void) dmu_tx_hold_dnode_impl(tx, dn, THT_NEWOBJECT, 0, 0); | |
34dc7c2f BB |
175 | } |
176 | ||
3ec3bc21 BB |
177 | /* |
178 | * This function reads specified data from disk. The specified data will | |
179 | * be needed to perform the transaction -- i.e, it will be read after | |
180 | * we do dmu_tx_assign(). There are two reasons that we read the data now | |
181 | * (before dmu_tx_assign()): | |
182 | * | |
183 | * 1. Reading it now has potentially better performance. The transaction | |
184 | * has not yet been assigned, so the TXG is not held open, and also the | |
185 | * caller typically has less locks held when calling dmu_tx_hold_*() than | |
186 | * after the transaction has been assigned. This reduces the lock (and txg) | |
187 | * hold times, thus reducing lock contention. | |
188 | * | |
189 | * 2. It is easier for callers (primarily the ZPL) to handle i/o errors | |
190 | * that are detected before they start making changes to the DMU state | |
191 | * (i.e. now). Once the transaction has been assigned, and some DMU | |
192 | * state has been changed, it can be difficult to recover from an i/o | |
193 | * error (e.g. to undo the changes already made in memory at the DMU | |
194 | * layer). Typically code to do so does not exist in the caller -- it | |
195 | * assumes that the data has already been cached and thus i/o errors are | |
196 | * not possible. | |
197 | * | |
198 | * It has been observed that the i/o initiated here can be a performance | |
199 | * problem, and it appears to be optional, because we don't look at the | |
200 | * data which is read. However, removing this read would only serve to | |
201 | * move the work elsewhere (after the dmu_tx_assign()), where it may | |
202 | * have a greater impact on performance (in addition to the impact on | |
203 | * fault tolerance noted above). | |
204 | */ | |
34dc7c2f BB |
205 | static int |
206 | dmu_tx_check_ioerr(zio_t *zio, dnode_t *dn, int level, uint64_t blkid) | |
207 | { | |
208 | int err; | |
209 | dmu_buf_impl_t *db; | |
210 | ||
211 | rw_enter(&dn->dn_struct_rwlock, RW_READER); | |
212 | db = dbuf_hold_level(dn, level, blkid, FTAG); | |
213 | rw_exit(&dn->dn_struct_rwlock); | |
214 | if (db == NULL) | |
2e528b49 | 215 | return (SET_ERROR(EIO)); |
34dc7c2f BB |
216 | err = dbuf_read(db, zio, DB_RF_CANFAIL | DB_RF_NOPREFETCH); |
217 | dbuf_rele(db, FTAG); | |
218 | return (err); | |
219 | } | |
220 | ||
221 | /* ARGSUSED */ | |
222 | static void | |
223 | dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) | |
224 | { | |
225 | dnode_t *dn = txh->txh_dnode; | |
34dc7c2f BB |
226 | int err = 0; |
227 | ||
228 | if (len == 0) | |
229 | return; | |
230 | ||
424fd7c3 | 231 | (void) zfs_refcount_add_many(&txh->txh_space_towrite, len, FTAG); |
34dc7c2f | 232 | |
424fd7c3 | 233 | if (zfs_refcount_count(&txh->txh_space_towrite) > 2 * DMU_MAX_ACCESS) |
3ec3bc21 | 234 | err = SET_ERROR(EFBIG); |
34dc7c2f | 235 | |
3ec3bc21 BB |
236 | if (dn == NULL) |
237 | return; | |
34dc7c2f | 238 | |
3ec3bc21 BB |
239 | /* |
240 | * For i/o error checking, read the blocks that will be needed | |
241 | * to perform the write: the first and last level-0 blocks (if | |
242 | * they are not aligned, i.e. if they are partial-block writes), | |
243 | * and all the level-1 blocks. | |
244 | */ | |
245 | if (dn->dn_maxblkid == 0) { | |
246 | if (off < dn->dn_datablksz && | |
247 | (off > 0 || len < dn->dn_datablksz)) { | |
248 | err = dmu_tx_check_ioerr(NULL, dn, 0, 0); | |
249 | if (err != 0) { | |
250 | txh->txh_tx->tx_err = err; | |
34dc7c2f | 251 | } |
9babb374 | 252 | } |
3ec3bc21 BB |
253 | } else { |
254 | zio_t *zio = zio_root(dn->dn_objset->os_spa, | |
255 | NULL, NULL, ZIO_FLAG_CANFAIL); | |
9babb374 | 256 | |
3ec3bc21 BB |
257 | /* first level-0 block */ |
258 | uint64_t start = off >> dn->dn_datablkshift; | |
259 | if (P2PHASE(off, dn->dn_datablksz) || len < dn->dn_datablksz) { | |
260 | err = dmu_tx_check_ioerr(zio, dn, 0, start); | |
261 | if (err != 0) { | |
262 | txh->txh_tx->tx_err = err; | |
263 | } | |
428870ff | 264 | } |
428870ff | 265 | |
3ec3bc21 BB |
266 | /* last level-0 block */ |
267 | uint64_t end = (off + len - 1) >> dn->dn_datablkshift; | |
268 | if (end != start && end <= dn->dn_maxblkid && | |
269 | P2PHASE(off + len, dn->dn_datablksz)) { | |
270 | err = dmu_tx_check_ioerr(zio, dn, 0, end); | |
271 | if (err != 0) { | |
428870ff | 272 | txh->txh_tx->tx_err = err; |
9babb374 | 273 | } |
3ec3bc21 | 274 | } |
428870ff | 275 | |
3ec3bc21 BB |
276 | /* level-1 blocks */ |
277 | if (dn->dn_nlevels > 1) { | |
278 | int shft = dn->dn_indblkshift - SPA_BLKPTRSHIFT; | |
279 | for (uint64_t i = (start >> shft) + 1; | |
280 | i < end >> shft; i++) { | |
281 | err = dmu_tx_check_ioerr(zio, dn, 1, i); | |
282 | if (err != 0) { | |
283 | txh->txh_tx->tx_err = err; | |
284 | } | |
9babb374 | 285 | } |
9babb374 | 286 | } |
34dc7c2f | 287 | |
3ec3bc21 BB |
288 | err = zio_wait(zio); |
289 | if (err != 0) { | |
290 | txh->txh_tx->tx_err = err; | |
9babb374 | 291 | } |
34dc7c2f | 292 | } |
34dc7c2f BB |
293 | } |
294 | ||
295 | static void | |
296 | dmu_tx_count_dnode(dmu_tx_hold_t *txh) | |
297 | { | |
424fd7c3 TS |
298 | (void) zfs_refcount_add_many(&txh->txh_space_towrite, |
299 | DNODE_MIN_SIZE, FTAG); | |
34dc7c2f BB |
300 | } |
301 | ||
302 | void | |
303 | dmu_tx_hold_write(dmu_tx_t *tx, uint64_t object, uint64_t off, int len) | |
304 | { | |
305 | dmu_tx_hold_t *txh; | |
306 | ||
66eead53 MA |
307 | ASSERT0(tx->tx_txg); |
308 | ASSERT3U(len, <=, DMU_MAX_ACCESS); | |
34dc7c2f BB |
309 | ASSERT(len == 0 || UINT64_MAX - off >= len - 1); |
310 | ||
311 | txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, | |
312 | object, THT_WRITE, off, len); | |
66eead53 MA |
313 | if (txh != NULL) { |
314 | dmu_tx_count_write(txh, off, len); | |
315 | dmu_tx_count_dnode(txh); | |
316 | } | |
34dc7c2f BB |
317 | } |
318 | ||
a1d477c2 MA |
319 | void |
320 | dmu_tx_hold_remap_l1indirect(dmu_tx_t *tx, uint64_t object) | |
321 | { | |
322 | dmu_tx_hold_t *txh; | |
323 | ||
324 | ASSERT(tx->tx_txg == 0); | |
325 | txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, | |
326 | object, THT_WRITE, 0, 0); | |
327 | if (txh == NULL) | |
328 | return; | |
329 | ||
330 | dnode_t *dn = txh->txh_dnode; | |
424fd7c3 | 331 | (void) zfs_refcount_add_many(&txh->txh_space_towrite, |
a1d477c2 MA |
332 | 1ULL << dn->dn_indblkshift, FTAG); |
333 | dmu_tx_count_dnode(txh); | |
334 | } | |
335 | ||
0eef1bde | 336 | void |
337 | dmu_tx_hold_write_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off, int len) | |
338 | { | |
339 | dmu_tx_hold_t *txh; | |
340 | ||
66eead53 MA |
341 | ASSERT0(tx->tx_txg); |
342 | ASSERT3U(len, <=, DMU_MAX_ACCESS); | |
0eef1bde | 343 | ASSERT(len == 0 || UINT64_MAX - off >= len - 1); |
344 | ||
345 | txh = dmu_tx_hold_dnode_impl(tx, dn, THT_WRITE, off, len); | |
66eead53 MA |
346 | if (txh != NULL) { |
347 | dmu_tx_count_write(txh, off, len); | |
348 | dmu_tx_count_dnode(txh); | |
349 | } | |
0eef1bde | 350 | } |
351 | ||
19d55079 MA |
352 | /* |
353 | * This function marks the transaction as being a "net free". The end | |
354 | * result is that refquotas will be disabled for this transaction, and | |
355 | * this transaction will be able to use half of the pool space overhead | |
356 | * (see dsl_pool_adjustedsize()). Therefore this function should only | |
357 | * be called for transactions that we expect will not cause a net increase | |
358 | * in the amount of space used (but it's OK if that is occasionally not true). | |
359 | */ | |
360 | void | |
361 | dmu_tx_mark_netfree(dmu_tx_t *tx) | |
362 | { | |
3ec3bc21 | 363 | tx->tx_netfree = B_TRUE; |
19d55079 MA |
364 | } |
365 | ||
0eef1bde | 366 | static void |
367 | dmu_tx_hold_free_impl(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) | |
34dc7c2f | 368 | { |
3ec3bc21 BB |
369 | dmu_tx_t *tx = txh->txh_tx; |
370 | dnode_t *dn = txh->txh_dnode; | |
ea97f8ce | 371 | int err; |
34dc7c2f BB |
372 | |
373 | ASSERT(tx->tx_txg == 0); | |
374 | ||
e8b96c60 | 375 | dmu_tx_count_dnode(txh); |
34dc7c2f | 376 | |
3ec3bc21 | 377 | if (off >= (dn->dn_maxblkid + 1) * dn->dn_datablksz) |
34dc7c2f BB |
378 | return; |
379 | if (len == DMU_OBJECT_END) | |
3ec3bc21 | 380 | len = (dn->dn_maxblkid + 1) * dn->dn_datablksz - off; |
34dc7c2f | 381 | |
ea97f8ce MA |
382 | dmu_tx_count_dnode(txh); |
383 | ||
34dc7c2f | 384 | /* |
ea97f8ce MA |
385 | * For i/o error checking, we read the first and last level-0 |
386 | * blocks if they are not aligned, and all the level-1 blocks. | |
387 | * | |
388 | * Note: dbuf_free_range() assumes that we have not instantiated | |
389 | * any level-0 dbufs that will be completely freed. Therefore we must | |
390 | * exercise care to not read or count the first and last blocks | |
391 | * if they are blocksize-aligned. | |
392 | */ | |
393 | if (dn->dn_datablkshift == 0) { | |
b663a23d | 394 | if (off != 0 || len < dn->dn_datablksz) |
92bc214c | 395 | dmu_tx_count_write(txh, 0, dn->dn_datablksz); |
ea97f8ce MA |
396 | } else { |
397 | /* first block will be modified if it is not aligned */ | |
398 | if (!IS_P2ALIGNED(off, 1 << dn->dn_datablkshift)) | |
399 | dmu_tx_count_write(txh, off, 1); | |
400 | /* last block will be modified if it is not aligned */ | |
401 | if (!IS_P2ALIGNED(off + len, 1 << dn->dn_datablkshift)) | |
3ec3bc21 | 402 | dmu_tx_count_write(txh, off + len, 1); |
ea97f8ce MA |
403 | } |
404 | ||
405 | /* | |
406 | * Check level-1 blocks. | |
34dc7c2f BB |
407 | */ |
408 | if (dn->dn_nlevels > 1) { | |
ea97f8ce | 409 | int shift = dn->dn_datablkshift + dn->dn_indblkshift - |
34dc7c2f | 410 | SPA_BLKPTRSHIFT; |
ea97f8ce MA |
411 | uint64_t start = off >> shift; |
412 | uint64_t end = (off + len) >> shift; | |
ea97f8ce | 413 | |
ea97f8ce | 414 | ASSERT(dn->dn_indblkshift != 0); |
34dc7c2f | 415 | |
2e7b7657 MA |
416 | /* |
417 | * dnode_reallocate() can result in an object with indirect | |
418 | * blocks having an odd data block size. In this case, | |
419 | * just check the single block. | |
420 | */ | |
421 | if (dn->dn_datablkshift == 0) | |
422 | start = end = 0; | |
423 | ||
3ec3bc21 | 424 | zio_t *zio = zio_root(tx->tx_pool->dp_spa, |
34dc7c2f | 425 | NULL, NULL, ZIO_FLAG_CANFAIL); |
1c27024e | 426 | for (uint64_t i = start; i <= end; i++) { |
34dc7c2f | 427 | uint64_t ibyte = i << shift; |
b128c09f | 428 | err = dnode_next_offset(dn, 0, &ibyte, 2, 1, 0); |
34dc7c2f | 429 | i = ibyte >> shift; |
4bda3bd0 | 430 | if (err == ESRCH || i > end) |
34dc7c2f | 431 | break; |
3ec3bc21 | 432 | if (err != 0) { |
34dc7c2f | 433 | tx->tx_err = err; |
3ec3bc21 | 434 | (void) zio_wait(zio); |
34dc7c2f BB |
435 | return; |
436 | } | |
437 | ||
424fd7c3 | 438 | (void) zfs_refcount_add_many(&txh->txh_memory_tohold, |
3ec3bc21 BB |
439 | 1 << dn->dn_indblkshift, FTAG); |
440 | ||
34dc7c2f | 441 | err = dmu_tx_check_ioerr(zio, dn, 1, i); |
3ec3bc21 | 442 | if (err != 0) { |
34dc7c2f | 443 | tx->tx_err = err; |
3ec3bc21 | 444 | (void) zio_wait(zio); |
34dc7c2f BB |
445 | return; |
446 | } | |
447 | } | |
448 | err = zio_wait(zio); | |
3ec3bc21 | 449 | if (err != 0) { |
34dc7c2f BB |
450 | tx->tx_err = err; |
451 | return; | |
452 | } | |
453 | } | |
34dc7c2f BB |
454 | } |
455 | ||
456 | void | |
0eef1bde | 457 | dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off, uint64_t len) |
458 | { | |
459 | dmu_tx_hold_t *txh; | |
460 | ||
461 | txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, | |
462 | object, THT_FREE, off, len); | |
66eead53 MA |
463 | if (txh != NULL) |
464 | (void) dmu_tx_hold_free_impl(txh, off, len); | |
0eef1bde | 465 | } |
466 | ||
467 | void | |
468 | dmu_tx_hold_free_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len) | |
34dc7c2f BB |
469 | { |
470 | dmu_tx_hold_t *txh; | |
0eef1bde | 471 | |
472 | txh = dmu_tx_hold_dnode_impl(tx, dn, THT_FREE, off, len); | |
66eead53 MA |
473 | if (txh != NULL) |
474 | (void) dmu_tx_hold_free_impl(txh, off, len); | |
0eef1bde | 475 | } |
476 | ||
477 | static void | |
9522bd24 | 478 | dmu_tx_hold_zap_impl(dmu_tx_hold_t *txh, const char *name) |
0eef1bde | 479 | { |
480 | dmu_tx_t *tx = txh->txh_tx; | |
3ec3bc21 | 481 | dnode_t *dn = txh->txh_dnode; |
f85c06be | 482 | int err; |
34dc7c2f BB |
483 | |
484 | ASSERT(tx->tx_txg == 0); | |
485 | ||
34dc7c2f BB |
486 | dmu_tx_count_dnode(txh); |
487 | ||
3ec3bc21 BB |
488 | /* |
489 | * Modifying a almost-full microzap is around the worst case (128KB) | |
490 | * | |
491 | * If it is a fat zap, the worst case would be 7*16KB=112KB: | |
492 | * - 3 blocks overwritten: target leaf, ptrtbl block, header block | |
493 | * - 4 new blocks written if adding: | |
494 | * - 2 blocks for possibly split leaves, | |
495 | * - 2 grown ptrtbl blocks | |
496 | */ | |
424fd7c3 | 497 | (void) zfs_refcount_add_many(&txh->txh_space_towrite, |
3ec3bc21 BB |
498 | MZAP_MAX_BLKSZ, FTAG); |
499 | ||
500 | if (dn == NULL) | |
34dc7c2f | 501 | return; |
34dc7c2f | 502 | |
9ae529ec | 503 | ASSERT3U(DMU_OT_BYTESWAP(dn->dn_type), ==, DMU_BSWAP_ZAP); |
34dc7c2f | 504 | |
3ec3bc21 | 505 | if (dn->dn_maxblkid == 0 || name == NULL) { |
34dc7c2f | 506 | /* |
3ec3bc21 BB |
507 | * This is a microzap (only one block), or we don't know |
508 | * the name. Check the first block for i/o errors. | |
34dc7c2f BB |
509 | */ |
510 | err = dmu_tx_check_ioerr(NULL, dn, 0, 0); | |
3ec3bc21 | 511 | if (err != 0) { |
34dc7c2f | 512 | tx->tx_err = err; |
f85c06be | 513 | } |
3ec3bc21 | 514 | } else { |
34dc7c2f | 515 | /* |
3ec3bc21 BB |
516 | * Access the name so that we'll check for i/o errors to |
517 | * the leaf blocks, etc. We ignore ENOENT, as this name | |
518 | * may not yet exist. | |
34dc7c2f | 519 | */ |
2bce8049 | 520 | err = zap_lookup_by_dnode(dn, name, 8, 0, NULL); |
3ec3bc21 | 521 | if (err == EIO || err == ECKSUM || err == ENXIO) { |
34dc7c2f | 522 | tx->tx_err = err; |
f85c06be GM |
523 | } |
524 | } | |
34dc7c2f BB |
525 | } |
526 | ||
0eef1bde | 527 | void |
528 | dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name) | |
529 | { | |
530 | dmu_tx_hold_t *txh; | |
531 | ||
66eead53 | 532 | ASSERT0(tx->tx_txg); |
0eef1bde | 533 | |
534 | txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, | |
535 | object, THT_ZAP, add, (uintptr_t)name); | |
66eead53 | 536 | if (txh != NULL) |
9522bd24 | 537 | dmu_tx_hold_zap_impl(txh, name); |
0eef1bde | 538 | } |
539 | ||
540 | void | |
541 | dmu_tx_hold_zap_by_dnode(dmu_tx_t *tx, dnode_t *dn, int add, const char *name) | |
542 | { | |
543 | dmu_tx_hold_t *txh; | |
544 | ||
66eead53 | 545 | ASSERT0(tx->tx_txg); |
0eef1bde | 546 | ASSERT(dn != NULL); |
547 | ||
548 | txh = dmu_tx_hold_dnode_impl(tx, dn, THT_ZAP, add, (uintptr_t)name); | |
66eead53 | 549 | if (txh != NULL) |
9522bd24 | 550 | dmu_tx_hold_zap_impl(txh, name); |
0eef1bde | 551 | } |
552 | ||
34dc7c2f BB |
553 | void |
554 | dmu_tx_hold_bonus(dmu_tx_t *tx, uint64_t object) | |
555 | { | |
556 | dmu_tx_hold_t *txh; | |
557 | ||
558 | ASSERT(tx->tx_txg == 0); | |
559 | ||
560 | txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, | |
561 | object, THT_BONUS, 0, 0); | |
562 | if (txh) | |
563 | dmu_tx_count_dnode(txh); | |
564 | } | |
565 | ||
0eef1bde | 566 | void |
567 | dmu_tx_hold_bonus_by_dnode(dmu_tx_t *tx, dnode_t *dn) | |
568 | { | |
569 | dmu_tx_hold_t *txh; | |
570 | ||
66eead53 | 571 | ASSERT0(tx->tx_txg); |
0eef1bde | 572 | |
573 | txh = dmu_tx_hold_dnode_impl(tx, dn, THT_BONUS, 0, 0); | |
574 | if (txh) | |
575 | dmu_tx_count_dnode(txh); | |
576 | } | |
577 | ||
34dc7c2f BB |
578 | void |
579 | dmu_tx_hold_space(dmu_tx_t *tx, uint64_t space) | |
580 | { | |
581 | dmu_tx_hold_t *txh; | |
7d637211 | 582 | |
34dc7c2f BB |
583 | ASSERT(tx->tx_txg == 0); |
584 | ||
585 | txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, | |
586 | DMU_NEW_OBJECT, THT_SPACE, space, 0); | |
424fd7c3 TS |
587 | if (txh) { |
588 | (void) zfs_refcount_add_many( | |
589 | &txh->txh_space_towrite, space, FTAG); | |
590 | } | |
34dc7c2f BB |
591 | } |
592 | ||
3ec3bc21 | 593 | #ifdef ZFS_DEBUG |
34dc7c2f BB |
594 | void |
595 | dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db) | |
596 | { | |
3ec3bc21 BB |
597 | boolean_t match_object = B_FALSE; |
598 | boolean_t match_offset = B_FALSE; | |
34dc7c2f | 599 | |
572e2857 | 600 | DB_DNODE_ENTER(db); |
3ec3bc21 | 601 | dnode_t *dn = DB_DNODE(db); |
34dc7c2f | 602 | ASSERT(tx->tx_txg != 0); |
428870ff | 603 | ASSERT(tx->tx_objset == NULL || dn->dn_objset == tx->tx_objset); |
34dc7c2f BB |
604 | ASSERT3U(dn->dn_object, ==, db->db.db_object); |
605 | ||
572e2857 BB |
606 | if (tx->tx_anyobj) { |
607 | DB_DNODE_EXIT(db); | |
34dc7c2f | 608 | return; |
572e2857 | 609 | } |
34dc7c2f BB |
610 | |
611 | /* XXX No checking on the meta dnode for now */ | |
572e2857 BB |
612 | if (db->db.db_object == DMU_META_DNODE_OBJECT) { |
613 | DB_DNODE_EXIT(db); | |
34dc7c2f | 614 | return; |
572e2857 | 615 | } |
34dc7c2f | 616 | |
3ec3bc21 | 617 | for (dmu_tx_hold_t *txh = list_head(&tx->tx_holds); txh != NULL; |
34dc7c2f | 618 | txh = list_next(&tx->tx_holds, txh)) { |
99ea23c5 | 619 | ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg); |
34dc7c2f BB |
620 | if (txh->txh_dnode == dn && txh->txh_type != THT_NEWOBJECT) |
621 | match_object = TRUE; | |
622 | if (txh->txh_dnode == NULL || txh->txh_dnode == dn) { | |
623 | int datablkshift = dn->dn_datablkshift ? | |
624 | dn->dn_datablkshift : SPA_MAXBLOCKSHIFT; | |
625 | int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; | |
626 | int shift = datablkshift + epbs * db->db_level; | |
627 | uint64_t beginblk = shift >= 64 ? 0 : | |
628 | (txh->txh_arg1 >> shift); | |
629 | uint64_t endblk = shift >= 64 ? 0 : | |
630 | ((txh->txh_arg1 + txh->txh_arg2 - 1) >> shift); | |
631 | uint64_t blkid = db->db_blkid; | |
632 | ||
633 | /* XXX txh_arg2 better not be zero... */ | |
634 | ||
635 | dprintf("found txh type %x beginblk=%llx endblk=%llx\n", | |
636 | txh->txh_type, beginblk, endblk); | |
637 | ||
638 | switch (txh->txh_type) { | |
639 | case THT_WRITE: | |
640 | if (blkid >= beginblk && blkid <= endblk) | |
641 | match_offset = TRUE; | |
642 | /* | |
643 | * We will let this hold work for the bonus | |
428870ff BB |
644 | * or spill buffer so that we don't need to |
645 | * hold it when creating a new object. | |
34dc7c2f | 646 | */ |
428870ff BB |
647 | if (blkid == DMU_BONUS_BLKID || |
648 | blkid == DMU_SPILL_BLKID) | |
34dc7c2f BB |
649 | match_offset = TRUE; |
650 | /* | |
651 | * They might have to increase nlevels, | |
652 | * thus dirtying the new TLIBs. Or the | |
653 | * might have to change the block size, | |
654 | * thus dirying the new lvl=0 blk=0. | |
655 | */ | |
656 | if (blkid == 0) | |
657 | match_offset = TRUE; | |
658 | break; | |
659 | case THT_FREE: | |
b128c09f BB |
660 | /* |
661 | * We will dirty all the level 1 blocks in | |
662 | * the free range and perhaps the first and | |
663 | * last level 0 block. | |
664 | */ | |
665 | if (blkid >= beginblk && (blkid <= endblk || | |
666 | txh->txh_arg2 == DMU_OBJECT_END)) | |
34dc7c2f BB |
667 | match_offset = TRUE; |
668 | break; | |
428870ff BB |
669 | case THT_SPILL: |
670 | if (blkid == DMU_SPILL_BLKID) | |
671 | match_offset = TRUE; | |
672 | break; | |
34dc7c2f | 673 | case THT_BONUS: |
428870ff | 674 | if (blkid == DMU_BONUS_BLKID) |
34dc7c2f BB |
675 | match_offset = TRUE; |
676 | break; | |
677 | case THT_ZAP: | |
678 | match_offset = TRUE; | |
679 | break; | |
680 | case THT_NEWOBJECT: | |
681 | match_object = TRUE; | |
682 | break; | |
683 | default: | |
989fd514 BB |
684 | cmn_err(CE_PANIC, "bad txh_type %d", |
685 | txh->txh_type); | |
34dc7c2f BB |
686 | } |
687 | } | |
572e2857 BB |
688 | if (match_object && match_offset) { |
689 | DB_DNODE_EXIT(db); | |
34dc7c2f | 690 | return; |
572e2857 | 691 | } |
34dc7c2f | 692 | } |
572e2857 | 693 | DB_DNODE_EXIT(db); |
34dc7c2f BB |
694 | panic("dirtying dbuf obj=%llx lvl=%u blkid=%llx but not tx_held\n", |
695 | (u_longlong_t)db->db.db_object, db->db_level, | |
696 | (u_longlong_t)db->db_blkid); | |
697 | } | |
698 | #endif | |
699 | ||
e8b96c60 MA |
700 | /* |
701 | * If we can't do 10 iops, something is wrong. Let us go ahead | |
702 | * and hit zfs_dirty_data_max. | |
703 | */ | |
704 | hrtime_t zfs_delay_max_ns = 100 * MICROSEC; /* 100 milliseconds */ | |
705 | int zfs_delay_resolution_ns = 100 * 1000; /* 100 microseconds */ | |
706 | ||
707 | /* | |
708 | * We delay transactions when we've determined that the backend storage | |
709 | * isn't able to accommodate the rate of incoming writes. | |
710 | * | |
711 | * If there is already a transaction waiting, we delay relative to when | |
712 | * that transaction finishes waiting. This way the calculated min_time | |
713 | * is independent of the number of threads concurrently executing | |
714 | * transactions. | |
715 | * | |
716 | * If we are the only waiter, wait relative to when the transaction | |
717 | * started, rather than the current time. This credits the transaction for | |
718 | * "time already served", e.g. reading indirect blocks. | |
719 | * | |
720 | * The minimum time for a transaction to take is calculated as: | |
721 | * min_time = scale * (dirty - min) / (max - dirty) | |
722 | * min_time is then capped at zfs_delay_max_ns. | |
723 | * | |
724 | * The delay has two degrees of freedom that can be adjusted via tunables. | |
725 | * The percentage of dirty data at which we start to delay is defined by | |
726 | * zfs_delay_min_dirty_percent. This should typically be at or above | |
727 | * zfs_vdev_async_write_active_max_dirty_percent so that we only start to | |
728 | * delay after writing at full speed has failed to keep up with the incoming | |
729 | * write rate. The scale of the curve is defined by zfs_delay_scale. Roughly | |
730 | * speaking, this variable determines the amount of delay at the midpoint of | |
731 | * the curve. | |
732 | * | |
733 | * delay | |
734 | * 10ms +-------------------------------------------------------------*+ | |
735 | * | *| | |
736 | * 9ms + *+ | |
737 | * | *| | |
738 | * 8ms + *+ | |
739 | * | * | | |
740 | * 7ms + * + | |
741 | * | * | | |
742 | * 6ms + * + | |
743 | * | * | | |
744 | * 5ms + * + | |
745 | * | * | | |
746 | * 4ms + * + | |
747 | * | * | | |
748 | * 3ms + * + | |
749 | * | * | | |
750 | * 2ms + (midpoint) * + | |
751 | * | | ** | | |
752 | * 1ms + v *** + | |
753 | * | zfs_delay_scale ----------> ******** | | |
754 | * 0 +-------------------------------------*********----------------+ | |
755 | * 0% <- zfs_dirty_data_max -> 100% | |
756 | * | |
757 | * Note that since the delay is added to the outstanding time remaining on the | |
758 | * most recent transaction, the delay is effectively the inverse of IOPS. | |
759 | * Here the midpoint of 500us translates to 2000 IOPS. The shape of the curve | |
760 | * was chosen such that small changes in the amount of accumulated dirty data | |
761 | * in the first 3/4 of the curve yield relatively small differences in the | |
762 | * amount of delay. | |
763 | * | |
764 | * The effects can be easier to understand when the amount of delay is | |
765 | * represented on a log scale: | |
766 | * | |
767 | * delay | |
768 | * 100ms +-------------------------------------------------------------++ | |
769 | * + + | |
770 | * | | | |
771 | * + *+ | |
772 | * 10ms + *+ | |
773 | * + ** + | |
774 | * | (midpoint) ** | | |
775 | * + | ** + | |
776 | * 1ms + v **** + | |
777 | * + zfs_delay_scale ----------> ***** + | |
778 | * | **** | | |
779 | * + **** + | |
780 | * 100us + ** + | |
781 | * + * + | |
782 | * | * | | |
783 | * + * + | |
784 | * 10us + * + | |
785 | * + + | |
786 | * | | | |
787 | * + + | |
788 | * +--------------------------------------------------------------+ | |
789 | * 0% <- zfs_dirty_data_max -> 100% | |
790 | * | |
791 | * Note here that only as the amount of dirty data approaches its limit does | |
792 | * the delay start to increase rapidly. The goal of a properly tuned system | |
793 | * should be to keep the amount of dirty data out of that range by first | |
794 | * ensuring that the appropriate limits are set for the I/O scheduler to reach | |
795 | * optimal throughput on the backend storage, and then by changing the value | |
796 | * of zfs_delay_scale to increase the steepness of the curve. | |
797 | */ | |
798 | static void | |
799 | dmu_tx_delay(dmu_tx_t *tx, uint64_t dirty) | |
800 | { | |
801 | dsl_pool_t *dp = tx->tx_pool; | |
802 | uint64_t delay_min_bytes = | |
803 | zfs_dirty_data_max * zfs_delay_min_dirty_percent / 100; | |
804 | hrtime_t wakeup, min_tx_time, now; | |
805 | ||
806 | if (dirty <= delay_min_bytes) | |
807 | return; | |
808 | ||
809 | /* | |
810 | * The caller has already waited until we are under the max. | |
811 | * We make them pass us the amount of dirty data so we don't | |
812 | * have to handle the case of it being >= the max, which could | |
813 | * cause a divide-by-zero if it's == the max. | |
814 | */ | |
815 | ASSERT3U(dirty, <, zfs_dirty_data_max); | |
816 | ||
817 | now = gethrtime(); | |
818 | min_tx_time = zfs_delay_scale * | |
819 | (dirty - delay_min_bytes) / (zfs_dirty_data_max - dirty); | |
820 | min_tx_time = MIN(min_tx_time, zfs_delay_max_ns); | |
821 | if (now > tx->tx_start + min_tx_time) | |
822 | return; | |
823 | ||
824 | DTRACE_PROBE3(delay__mintime, dmu_tx_t *, tx, uint64_t, dirty, | |
825 | uint64_t, min_tx_time); | |
826 | ||
827 | mutex_enter(&dp->dp_lock); | |
828 | wakeup = MAX(tx->tx_start + min_tx_time, | |
829 | dp->dp_last_wakeup + min_tx_time); | |
830 | dp->dp_last_wakeup = wakeup; | |
831 | mutex_exit(&dp->dp_lock); | |
832 | ||
833 | zfs_sleep_until(wakeup); | |
834 | } | |
835 | ||
3ec3bc21 BB |
836 | /* |
837 | * This routine attempts to assign the transaction to a transaction group. | |
838 | * To do so, we must determine if there is sufficient free space on disk. | |
839 | * | |
840 | * If this is a "netfree" transaction (i.e. we called dmu_tx_mark_netfree() | |
841 | * on it), then it is assumed that there is sufficient free space, | |
842 | * unless there's insufficient slop space in the pool (see the comment | |
843 | * above spa_slop_shift in spa_misc.c). | |
844 | * | |
845 | * If it is not a "netfree" transaction, then if the data already on disk | |
846 | * is over the allowed usage (e.g. quota), this will fail with EDQUOT or | |
847 | * ENOSPC. Otherwise, if the current rough estimate of pending changes, | |
848 | * plus the rough estimate of this transaction's changes, may exceed the | |
849 | * allowed usage, then this will fail with ERESTART, which will cause the | |
850 | * caller to wait for the pending changes to be written to disk (by waiting | |
851 | * for the next TXG to open), and then check the space usage again. | |
852 | * | |
853 | * The rough estimate of pending changes is comprised of the sum of: | |
854 | * | |
855 | * - this transaction's holds' txh_space_towrite | |
856 | * | |
857 | * - dd_tempreserved[], which is the sum of in-flight transactions' | |
858 | * holds' txh_space_towrite (i.e. those transactions that have called | |
859 | * dmu_tx_assign() but not yet called dmu_tx_commit()). | |
860 | * | |
861 | * - dd_space_towrite[], which is the amount of dirtied dbufs. | |
862 | * | |
863 | * Note that all of these values are inflated by spa_get_worst_case_asize(), | |
864 | * which means that we may get ERESTART well before we are actually in danger | |
865 | * of running out of space, but this also mitigates any small inaccuracies | |
866 | * in the rough estimate (e.g. txh_space_towrite doesn't take into account | |
867 | * indirect blocks, and dd_space_towrite[] doesn't take into account changes | |
868 | * to the MOS). | |
869 | * | |
870 | * Note that due to this algorithm, it is possible to exceed the allowed | |
871 | * usage by one transaction. Also, as we approach the allowed usage, | |
872 | * we will allow a very limited amount of changes into each TXG, thus | |
873 | * decreasing performance. | |
874 | */ | |
34dc7c2f | 875 | static int |
0735ecb3 | 876 | dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how) |
34dc7c2f | 877 | { |
34dc7c2f | 878 | spa_t *spa = tx->tx_pool->dp_spa; |
34dc7c2f | 879 | |
c99c9001 | 880 | ASSERT0(tx->tx_txg); |
34dc7c2f | 881 | |
570827e1 BB |
882 | if (tx->tx_err) { |
883 | DMU_TX_STAT_BUMP(dmu_tx_error); | |
34dc7c2f | 884 | return (tx->tx_err); |
570827e1 | 885 | } |
34dc7c2f | 886 | |
b128c09f | 887 | if (spa_suspended(spa)) { |
570827e1 BB |
888 | DMU_TX_STAT_BUMP(dmu_tx_suspended); |
889 | ||
34dc7c2f BB |
890 | /* |
891 | * If the user has indicated a blocking failure mode | |
892 | * then return ERESTART which will block in dmu_tx_wait(). | |
893 | * Otherwise, return EIO so that an error can get | |
894 | * propagated back to the VOP calls. | |
895 | * | |
896 | * Note that we always honor the txg_how flag regardless | |
897 | * of the failuremode setting. | |
898 | */ | |
899 | if (spa_get_failmode(spa) == ZIO_FAILURE_MODE_CONTINUE && | |
0735ecb3 | 900 | !(txg_how & TXG_WAIT)) |
2e528b49 | 901 | return (SET_ERROR(EIO)); |
34dc7c2f | 902 | |
2e528b49 | 903 | return (SET_ERROR(ERESTART)); |
34dc7c2f BB |
904 | } |
905 | ||
0735ecb3 | 906 | if (!tx->tx_dirty_delayed && |
e8b96c60 MA |
907 | dsl_pool_need_dirty_delay(tx->tx_pool)) { |
908 | tx->tx_wait_dirty = B_TRUE; | |
909 | DMU_TX_STAT_BUMP(dmu_tx_dirty_delay); | |
ecb2b7dc | 910 | return (SET_ERROR(ERESTART)); |
e8b96c60 MA |
911 | } |
912 | ||
34dc7c2f BB |
913 | tx->tx_txg = txg_hold_open(tx->tx_pool, &tx->tx_txgh); |
914 | tx->tx_needassign_txh = NULL; | |
915 | ||
916 | /* | |
917 | * NB: No error returns are allowed after txg_hold_open, but | |
918 | * before processing the dnode holds, due to the | |
919 | * dmu_tx_unassign() logic. | |
920 | */ | |
921 | ||
3ec3bc21 BB |
922 | uint64_t towrite = 0; |
923 | uint64_t tohold = 0; | |
924 | for (dmu_tx_hold_t *txh = list_head(&tx->tx_holds); txh != NULL; | |
34dc7c2f BB |
925 | txh = list_next(&tx->tx_holds, txh)) { |
926 | dnode_t *dn = txh->txh_dnode; | |
927 | if (dn != NULL) { | |
928 | mutex_enter(&dn->dn_mtx); | |
929 | if (dn->dn_assigned_txg == tx->tx_txg - 1) { | |
930 | mutex_exit(&dn->dn_mtx); | |
931 | tx->tx_needassign_txh = txh; | |
570827e1 | 932 | DMU_TX_STAT_BUMP(dmu_tx_group); |
2e528b49 | 933 | return (SET_ERROR(ERESTART)); |
34dc7c2f BB |
934 | } |
935 | if (dn->dn_assigned_txg == 0) | |
936 | dn->dn_assigned_txg = tx->tx_txg; | |
937 | ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg); | |
c13060e4 | 938 | (void) zfs_refcount_add(&dn->dn_tx_holds, tx); |
34dc7c2f BB |
939 | mutex_exit(&dn->dn_mtx); |
940 | } | |
424fd7c3 TS |
941 | towrite += zfs_refcount_count(&txh->txh_space_towrite); |
942 | tohold += zfs_refcount_count(&txh->txh_memory_tohold); | |
34dc7c2f BB |
943 | } |
944 | ||
b128c09f | 945 | /* needed allocation: worst-case estimate of write space */ |
3ec3bc21 | 946 | uint64_t asize = spa_get_worst_case_asize(tx->tx_pool->dp_spa, towrite); |
b128c09f | 947 | /* calculate memory footprint estimate */ |
3ec3bc21 | 948 | uint64_t memory = towrite + tohold; |
34dc7c2f | 949 | |
3ec3bc21 | 950 | if (tx->tx_dir != NULL && asize != 0) { |
b128c09f | 951 | int err = dsl_dir_tempreserve_space(tx->tx_dir, memory, |
3ec3bc21 BB |
952 | asize, tx->tx_netfree, &tx->tx_tempreserve_cookie, tx); |
953 | if (err != 0) | |
34dc7c2f BB |
954 | return (err); |
955 | } | |
956 | ||
570827e1 BB |
957 | DMU_TX_STAT_BUMP(dmu_tx_assigned); |
958 | ||
34dc7c2f BB |
959 | return (0); |
960 | } | |
961 | ||
962 | static void | |
963 | dmu_tx_unassign(dmu_tx_t *tx) | |
964 | { | |
34dc7c2f BB |
965 | if (tx->tx_txg == 0) |
966 | return; | |
967 | ||
968 | txg_rele_to_quiesce(&tx->tx_txgh); | |
969 | ||
e49f1e20 WA |
970 | /* |
971 | * Walk the transaction's hold list, removing the hold on the | |
972 | * associated dnode, and notifying waiters if the refcount drops to 0. | |
973 | */ | |
3ec3bc21 | 974 | for (dmu_tx_hold_t *txh = list_head(&tx->tx_holds); |
981b2126 | 975 | txh && txh != tx->tx_needassign_txh; |
34dc7c2f BB |
976 | txh = list_next(&tx->tx_holds, txh)) { |
977 | dnode_t *dn = txh->txh_dnode; | |
978 | ||
979 | if (dn == NULL) | |
980 | continue; | |
981 | mutex_enter(&dn->dn_mtx); | |
982 | ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg); | |
983 | ||
424fd7c3 | 984 | if (zfs_refcount_remove(&dn->dn_tx_holds, tx) == 0) { |
34dc7c2f BB |
985 | dn->dn_assigned_txg = 0; |
986 | cv_broadcast(&dn->dn_notxholds); | |
987 | } | |
988 | mutex_exit(&dn->dn_mtx); | |
989 | } | |
990 | ||
991 | txg_rele_to_sync(&tx->tx_txgh); | |
992 | ||
993 | tx->tx_lasttried_txg = tx->tx_txg; | |
994 | tx->tx_txg = 0; | |
995 | } | |
996 | ||
997 | /* | |
0735ecb3 | 998 | * Assign tx to a transaction group; txg_how is a bitmask: |
34dc7c2f | 999 | * |
0735ecb3 PS |
1000 | * If TXG_WAIT is set and the currently open txg is full, this function |
1001 | * will wait until there's a new txg. This should be used when no locks | |
1002 | * are being held. With this bit set, this function will only fail if | |
1003 | * we're truly out of space (or over quota). | |
34dc7c2f | 1004 | * |
0735ecb3 PS |
1005 | * If TXG_WAIT is *not* set and we can't assign into the currently open |
1006 | * txg without blocking, this function will return immediately with | |
1007 | * ERESTART. This should be used whenever locks are being held. On an | |
1008 | * ERESTART error, the caller should drop all locks, call dmu_tx_wait(), | |
1009 | * and try again. | |
e8b96c60 | 1010 | * |
0735ecb3 PS |
1011 | * If TXG_NOTHROTTLE is set, this indicates that this tx should not be |
1012 | * delayed due on the ZFS Write Throttle (see comments in dsl_pool.c for | |
1013 | * details on the throttle). This is used by the VFS operations, after | |
1014 | * they have already called dmu_tx_wait() (though most likely on a | |
1015 | * different tx). | |
34dc7c2f BB |
1016 | */ |
1017 | int | |
0735ecb3 | 1018 | dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how) |
34dc7c2f BB |
1019 | { |
1020 | int err; | |
1021 | ||
1022 | ASSERT(tx->tx_txg == 0); | |
0735ecb3 | 1023 | ASSERT0(txg_how & ~(TXG_WAIT | TXG_NOTHROTTLE)); |
34dc7c2f BB |
1024 | ASSERT(!dsl_pool_sync_context(tx->tx_pool)); |
1025 | ||
13fe0198 | 1026 | /* If we might wait, we must not hold the config lock. */ |
0735ecb3 PS |
1027 | IMPLY((txg_how & TXG_WAIT), !dsl_pool_config_held(tx->tx_pool)); |
1028 | ||
1029 | if ((txg_how & TXG_NOTHROTTLE)) | |
1030 | tx->tx_dirty_delayed = B_TRUE; | |
13fe0198 | 1031 | |
34dc7c2f BB |
1032 | while ((err = dmu_tx_try_assign(tx, txg_how)) != 0) { |
1033 | dmu_tx_unassign(tx); | |
1034 | ||
0735ecb3 | 1035 | if (err != ERESTART || !(txg_how & TXG_WAIT)) |
34dc7c2f BB |
1036 | return (err); |
1037 | ||
1038 | dmu_tx_wait(tx); | |
1039 | } | |
1040 | ||
1041 | txg_rele_to_quiesce(&tx->tx_txgh); | |
1042 | ||
1043 | return (0); | |
1044 | } | |
1045 | ||
1046 | void | |
1047 | dmu_tx_wait(dmu_tx_t *tx) | |
1048 | { | |
1049 | spa_t *spa = tx->tx_pool->dp_spa; | |
e8b96c60 | 1050 | dsl_pool_t *dp = tx->tx_pool; |
a77c4c83 | 1051 | hrtime_t before; |
34dc7c2f BB |
1052 | |
1053 | ASSERT(tx->tx_txg == 0); | |
13fe0198 | 1054 | ASSERT(!dsl_pool_config_held(tx->tx_pool)); |
34dc7c2f | 1055 | |
a77c4c83 NB |
1056 | before = gethrtime(); |
1057 | ||
e8b96c60 MA |
1058 | if (tx->tx_wait_dirty) { |
1059 | uint64_t dirty; | |
1060 | ||
1061 | /* | |
1062 | * dmu_tx_try_assign() has determined that we need to wait | |
1063 | * because we've consumed much or all of the dirty buffer | |
1064 | * space. | |
1065 | */ | |
1066 | mutex_enter(&dp->dp_lock); | |
1067 | if (dp->dp_dirty_total >= zfs_dirty_data_max) | |
1068 | DMU_TX_STAT_BUMP(dmu_tx_dirty_over_max); | |
1069 | while (dp->dp_dirty_total >= zfs_dirty_data_max) | |
1070 | cv_wait(&dp->dp_spaceavail_cv, &dp->dp_lock); | |
1071 | dirty = dp->dp_dirty_total; | |
1072 | mutex_exit(&dp->dp_lock); | |
1073 | ||
1074 | dmu_tx_delay(tx, dirty); | |
1075 | ||
1076 | tx->tx_wait_dirty = B_FALSE; | |
1077 | ||
1078 | /* | |
0735ecb3 PS |
1079 | * Note: setting tx_dirty_delayed only has effect if the |
1080 | * caller used TX_WAIT. Otherwise they are going to | |
1081 | * destroy this tx and try again. The common case, | |
1082 | * zfs_write(), uses TX_WAIT. | |
e8b96c60 | 1083 | */ |
0735ecb3 | 1084 | tx->tx_dirty_delayed = B_TRUE; |
e8b96c60 MA |
1085 | } else if (spa_suspended(spa) || tx->tx_lasttried_txg == 0) { |
1086 | /* | |
1087 | * If the pool is suspended we need to wait until it | |
1088 | * is resumed. Note that it's possible that the pool | |
1089 | * has become active after this thread has tried to | |
1090 | * obtain a tx. If that's the case then tx_lasttried_txg | |
1091 | * would not have been set. | |
1092 | */ | |
1093 | txg_wait_synced(dp, spa_last_synced_txg(spa) + 1); | |
34dc7c2f BB |
1094 | } else if (tx->tx_needassign_txh) { |
1095 | dnode_t *dn = tx->tx_needassign_txh->txh_dnode; | |
1096 | ||
1097 | mutex_enter(&dn->dn_mtx); | |
1098 | while (dn->dn_assigned_txg == tx->tx_lasttried_txg - 1) | |
1099 | cv_wait(&dn->dn_notxholds, &dn->dn_mtx); | |
1100 | mutex_exit(&dn->dn_mtx); | |
1101 | tx->tx_needassign_txh = NULL; | |
1102 | } else { | |
e8b96c60 | 1103 | /* |
e48afbc4 SD |
1104 | * If we have a lot of dirty data just wait until we sync |
1105 | * out a TXG at which point we'll hopefully have synced | |
1106 | * a portion of the changes. | |
e8b96c60 | 1107 | */ |
e48afbc4 | 1108 | txg_wait_synced(dp, spa_last_synced_txg(spa) + 1); |
34dc7c2f | 1109 | } |
a77c4c83 NB |
1110 | |
1111 | spa_tx_assign_add_nsecs(spa, gethrtime() - before); | |
34dc7c2f BB |
1112 | } |
1113 | ||
f85c06be GM |
1114 | static void |
1115 | dmu_tx_destroy(dmu_tx_t *tx) | |
1116 | { | |
1117 | dmu_tx_hold_t *txh; | |
1118 | ||
1119 | while ((txh = list_head(&tx->tx_holds)) != NULL) { | |
1120 | dnode_t *dn = txh->txh_dnode; | |
1121 | ||
1122 | list_remove(&tx->tx_holds, txh); | |
424fd7c3 TS |
1123 | zfs_refcount_destroy_many(&txh->txh_space_towrite, |
1124 | zfs_refcount_count(&txh->txh_space_towrite)); | |
1125 | zfs_refcount_destroy_many(&txh->txh_memory_tohold, | |
1126 | zfs_refcount_count(&txh->txh_memory_tohold)); | |
f85c06be GM |
1127 | kmem_free(txh, sizeof (dmu_tx_hold_t)); |
1128 | if (dn != NULL) | |
1129 | dnode_rele(dn, tx); | |
1130 | } | |
1131 | ||
1132 | list_destroy(&tx->tx_callbacks); | |
1133 | list_destroy(&tx->tx_holds); | |
f85c06be GM |
1134 | kmem_free(tx, sizeof (dmu_tx_t)); |
1135 | } | |
1136 | ||
34dc7c2f BB |
1137 | void |
1138 | dmu_tx_commit(dmu_tx_t *tx) | |
1139 | { | |
34dc7c2f BB |
1140 | ASSERT(tx->tx_txg != 0); |
1141 | ||
e49f1e20 WA |
1142 | /* |
1143 | * Go through the transaction's hold list and remove holds on | |
1144 | * associated dnodes, notifying waiters if no holds remain. | |
1145 | */ | |
1c27024e | 1146 | for (dmu_tx_hold_t *txh = list_head(&tx->tx_holds); txh != NULL; |
f85c06be | 1147 | txh = list_next(&tx->tx_holds, txh)) { |
34dc7c2f BB |
1148 | dnode_t *dn = txh->txh_dnode; |
1149 | ||
34dc7c2f BB |
1150 | if (dn == NULL) |
1151 | continue; | |
f85c06be | 1152 | |
34dc7c2f BB |
1153 | mutex_enter(&dn->dn_mtx); |
1154 | ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg); | |
1155 | ||
424fd7c3 | 1156 | if (zfs_refcount_remove(&dn->dn_tx_holds, tx) == 0) { |
34dc7c2f BB |
1157 | dn->dn_assigned_txg = 0; |
1158 | cv_broadcast(&dn->dn_notxholds); | |
1159 | } | |
1160 | mutex_exit(&dn->dn_mtx); | |
34dc7c2f BB |
1161 | } |
1162 | ||
1163 | if (tx->tx_tempreserve_cookie) | |
1164 | dsl_dir_tempreserve_clear(tx->tx_tempreserve_cookie, tx); | |
1165 | ||
428870ff BB |
1166 | if (!list_is_empty(&tx->tx_callbacks)) |
1167 | txg_register_callbacks(&tx->tx_txgh, &tx->tx_callbacks); | |
1168 | ||
34dc7c2f BB |
1169 | if (tx->tx_anyobj == FALSE) |
1170 | txg_rele_to_sync(&tx->tx_txgh); | |
428870ff | 1171 | |
f85c06be | 1172 | dmu_tx_destroy(tx); |
34dc7c2f BB |
1173 | } |
1174 | ||
1175 | void | |
1176 | dmu_tx_abort(dmu_tx_t *tx) | |
1177 | { | |
34dc7c2f BB |
1178 | ASSERT(tx->tx_txg == 0); |
1179 | ||
428870ff BB |
1180 | /* |
1181 | * Call any registered callbacks with an error code. | |
1182 | */ | |
1183 | if (!list_is_empty(&tx->tx_callbacks)) | |
1184 | dmu_tx_do_callbacks(&tx->tx_callbacks, ECANCELED); | |
1185 | ||
f85c06be | 1186 | dmu_tx_destroy(tx); |
34dc7c2f BB |
1187 | } |
1188 | ||
1189 | uint64_t | |
1190 | dmu_tx_get_txg(dmu_tx_t *tx) | |
1191 | { | |
1192 | ASSERT(tx->tx_txg != 0); | |
1193 | return (tx->tx_txg); | |
1194 | } | |
428870ff | 1195 | |
13fe0198 MA |
1196 | dsl_pool_t * |
1197 | dmu_tx_pool(dmu_tx_t *tx) | |
1198 | { | |
1199 | ASSERT(tx->tx_pool != NULL); | |
1200 | return (tx->tx_pool); | |
1201 | } | |
1202 | ||
428870ff BB |
1203 | void |
1204 | dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *func, void *data) | |
1205 | { | |
1206 | dmu_tx_callback_t *dcb; | |
1207 | ||
79c76d5b | 1208 | dcb = kmem_alloc(sizeof (dmu_tx_callback_t), KM_SLEEP); |
428870ff BB |
1209 | |
1210 | dcb->dcb_func = func; | |
1211 | dcb->dcb_data = data; | |
1212 | ||
1213 | list_insert_tail(&tx->tx_callbacks, dcb); | |
1214 | } | |
1215 | ||
1216 | /* | |
1217 | * Call all the commit callbacks on a list, with a given error code. | |
1218 | */ | |
1219 | void | |
1220 | dmu_tx_do_callbacks(list_t *cb_list, int error) | |
1221 | { | |
1222 | dmu_tx_callback_t *dcb; | |
1223 | ||
823d48bf | 1224 | while ((dcb = list_tail(cb_list)) != NULL) { |
428870ff BB |
1225 | list_remove(cb_list, dcb); |
1226 | dcb->dcb_func(dcb->dcb_data, error); | |
1227 | kmem_free(dcb, sizeof (dmu_tx_callback_t)); | |
1228 | } | |
1229 | } | |
1230 | ||
1231 | /* | |
1232 | * Interface to hold a bunch of attributes. | |
1233 | * used for creating new files. | |
1234 | * attrsize is the total size of all attributes | |
1235 | * to be added during object creation | |
1236 | * | |
1237 | * For updating/adding a single attribute dmu_tx_hold_sa() should be used. | |
1238 | */ | |
1239 | ||
1240 | /* | |
1241 | * hold necessary attribute name for attribute registration. | |
1242 | * should be a very rare case where this is needed. If it does | |
1243 | * happen it would only happen on the first write to the file system. | |
1244 | */ | |
1245 | static void | |
1246 | dmu_tx_sa_registration_hold(sa_os_t *sa, dmu_tx_t *tx) | |
1247 | { | |
428870ff BB |
1248 | if (!sa->sa_need_attr_registration) |
1249 | return; | |
1250 | ||
3ec3bc21 | 1251 | for (int i = 0; i != sa->sa_num_attrs; i++) { |
428870ff BB |
1252 | if (!sa->sa_attr_table[i].sa_registered) { |
1253 | if (sa->sa_reg_attr_obj) | |
1254 | dmu_tx_hold_zap(tx, sa->sa_reg_attr_obj, | |
1255 | B_TRUE, sa->sa_attr_table[i].sa_name); | |
1256 | else | |
1257 | dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, | |
1258 | B_TRUE, sa->sa_attr_table[i].sa_name); | |
1259 | } | |
1260 | } | |
1261 | } | |
1262 | ||
428870ff BB |
1263 | void |
1264 | dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object) | |
1265 | { | |
9631681b | 1266 | dmu_tx_hold_t *txh; |
428870ff | 1267 | |
9631681b BB |
1268 | txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, object, |
1269 | THT_SPILL, 0, 0); | |
1270 | if (txh != NULL) | |
424fd7c3 | 1271 | (void) zfs_refcount_add_many(&txh->txh_space_towrite, |
9631681b | 1272 | SPA_OLD_MAXBLOCKSIZE, FTAG); |
428870ff BB |
1273 | } |
1274 | ||
1275 | void | |
1276 | dmu_tx_hold_sa_create(dmu_tx_t *tx, int attrsize) | |
1277 | { | |
1278 | sa_os_t *sa = tx->tx_objset->os_sa; | |
1279 | ||
1280 | dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); | |
1281 | ||
1282 | if (tx->tx_objset->os_sa->sa_master_obj == 0) | |
1283 | return; | |
1284 | ||
3ec3bc21 | 1285 | if (tx->tx_objset->os_sa->sa_layout_attr_obj) { |
428870ff | 1286 | dmu_tx_hold_zap(tx, sa->sa_layout_attr_obj, B_TRUE, NULL); |
3ec3bc21 | 1287 | } else { |
428870ff BB |
1288 | dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_LAYOUTS); |
1289 | dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_REGISTRY); | |
1290 | dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL); | |
1291 | dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL); | |
1292 | } | |
1293 | ||
1294 | dmu_tx_sa_registration_hold(sa, tx); | |
1295 | ||
50c957f7 | 1296 | if (attrsize <= DN_OLD_MAX_BONUSLEN && !sa->sa_force_spill) |
428870ff BB |
1297 | return; |
1298 | ||
1299 | (void) dmu_tx_hold_object_impl(tx, tx->tx_objset, DMU_NEW_OBJECT, | |
1300 | THT_SPILL, 0, 0); | |
1301 | } | |
1302 | ||
1303 | /* | |
1304 | * Hold SA attribute | |
1305 | * | |
1306 | * dmu_tx_hold_sa(dmu_tx_t *tx, sa_handle_t *, attribute, add, size) | |
1307 | * | |
1308 | * variable_size is the total size of all variable sized attributes | |
1309 | * passed to this function. It is not the total size of all | |
1310 | * variable size attributes that *may* exist on this object. | |
1311 | */ | |
1312 | void | |
1313 | dmu_tx_hold_sa(dmu_tx_t *tx, sa_handle_t *hdl, boolean_t may_grow) | |
1314 | { | |
1315 | uint64_t object; | |
1316 | sa_os_t *sa = tx->tx_objset->os_sa; | |
1317 | ||
1318 | ASSERT(hdl != NULL); | |
1319 | ||
1320 | object = sa_handle_object(hdl); | |
1321 | ||
1322 | dmu_tx_hold_bonus(tx, object); | |
1323 | ||
1324 | if (tx->tx_objset->os_sa->sa_master_obj == 0) | |
1325 | return; | |
1326 | ||
1327 | if (tx->tx_objset->os_sa->sa_reg_attr_obj == 0 || | |
1328 | tx->tx_objset->os_sa->sa_layout_attr_obj == 0) { | |
1329 | dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_LAYOUTS); | |
1330 | dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_REGISTRY); | |
1331 | dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL); | |
1332 | dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL); | |
1333 | } | |
1334 | ||
1335 | dmu_tx_sa_registration_hold(sa, tx); | |
1336 | ||
1337 | if (may_grow && tx->tx_objset->os_sa->sa_layout_attr_obj) | |
1338 | dmu_tx_hold_zap(tx, sa->sa_layout_attr_obj, B_TRUE, NULL); | |
1339 | ||
572e2857 | 1340 | if (sa->sa_force_spill || may_grow || hdl->sa_spill) { |
428870ff BB |
1341 | ASSERT(tx->tx_txg == 0); |
1342 | dmu_tx_hold_spill(tx, object); | |
572e2857 BB |
1343 | } else { |
1344 | dmu_buf_impl_t *db = (dmu_buf_impl_t *)hdl->sa_bonus; | |
1345 | dnode_t *dn; | |
1346 | ||
1347 | DB_DNODE_ENTER(db); | |
1348 | dn = DB_DNODE(db); | |
1349 | if (dn->dn_have_spill) { | |
1350 | ASSERT(tx->tx_txg == 0); | |
1351 | dmu_tx_hold_spill(tx, object); | |
1352 | } | |
1353 | DB_DNODE_EXIT(db); | |
428870ff BB |
1354 | } |
1355 | } | |
c28b2279 | 1356 | |
570827e1 BB |
1357 | void |
1358 | dmu_tx_init(void) | |
1359 | { | |
1360 | dmu_tx_ksp = kstat_create("zfs", 0, "dmu_tx", "misc", | |
1361 | KSTAT_TYPE_NAMED, sizeof (dmu_tx_stats) / sizeof (kstat_named_t), | |
1362 | KSTAT_FLAG_VIRTUAL); | |
1363 | ||
1364 | if (dmu_tx_ksp != NULL) { | |
1365 | dmu_tx_ksp->ks_data = &dmu_tx_stats; | |
1366 | kstat_install(dmu_tx_ksp); | |
1367 | } | |
1368 | } | |
1369 | ||
1370 | void | |
1371 | dmu_tx_fini(void) | |
1372 | { | |
1373 | if (dmu_tx_ksp != NULL) { | |
1374 | kstat_delete(dmu_tx_ksp); | |
1375 | dmu_tx_ksp = NULL; | |
1376 | } | |
1377 | } | |
1378 | ||
93ce2b4c | 1379 | #if defined(_KERNEL) |
c28b2279 BB |
1380 | EXPORT_SYMBOL(dmu_tx_create); |
1381 | EXPORT_SYMBOL(dmu_tx_hold_write); | |
0eef1bde | 1382 | EXPORT_SYMBOL(dmu_tx_hold_write_by_dnode); |
c28b2279 | 1383 | EXPORT_SYMBOL(dmu_tx_hold_free); |
0eef1bde | 1384 | EXPORT_SYMBOL(dmu_tx_hold_free_by_dnode); |
c28b2279 | 1385 | EXPORT_SYMBOL(dmu_tx_hold_zap); |
0eef1bde | 1386 | EXPORT_SYMBOL(dmu_tx_hold_zap_by_dnode); |
c28b2279 | 1387 | EXPORT_SYMBOL(dmu_tx_hold_bonus); |
0eef1bde | 1388 | EXPORT_SYMBOL(dmu_tx_hold_bonus_by_dnode); |
c28b2279 BB |
1389 | EXPORT_SYMBOL(dmu_tx_abort); |
1390 | EXPORT_SYMBOL(dmu_tx_assign); | |
1391 | EXPORT_SYMBOL(dmu_tx_wait); | |
1392 | EXPORT_SYMBOL(dmu_tx_commit); | |
848259c1 | 1393 | EXPORT_SYMBOL(dmu_tx_mark_netfree); |
c28b2279 BB |
1394 | EXPORT_SYMBOL(dmu_tx_get_txg); |
1395 | EXPORT_SYMBOL(dmu_tx_callback_register); | |
1396 | EXPORT_SYMBOL(dmu_tx_do_callbacks); | |
1397 | EXPORT_SYMBOL(dmu_tx_hold_spill); | |
1398 | EXPORT_SYMBOL(dmu_tx_hold_sa_create); | |
1399 | EXPORT_SYMBOL(dmu_tx_hold_sa); | |
1400 | #endif |