]>
Commit | Line | Data |
---|---|---|
34dc7c2f BB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or http://www.opensolaris.org/os/licensing. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | /* | |
428870ff | 22 | * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. |
22cd4a46 | 23 | * Copyright 2011 Nexenta Systems, Inc. All rights reserved. |
4747a7d3 | 24 | * Copyright (c) 2012, 2017 by Delphix. All rights reserved. |
22cd4a46 | 25 | */ |
34dc7c2f | 26 | |
34dc7c2f BB |
27 | #include <sys/dmu.h> |
28 | #include <sys/dmu_impl.h> | |
29 | #include <sys/dbuf.h> | |
30 | #include <sys/dmu_tx.h> | |
31 | #include <sys/dmu_objset.h> | |
3ec3bc21 BB |
32 | #include <sys/dsl_dataset.h> |
33 | #include <sys/dsl_dir.h> | |
34dc7c2f | 34 | #include <sys/dsl_pool.h> |
3ec3bc21 | 35 | #include <sys/zap_impl.h> |
34dc7c2f | 36 | #include <sys/spa.h> |
428870ff BB |
37 | #include <sys/sa.h> |
38 | #include <sys/sa_impl.h> | |
34dc7c2f | 39 | #include <sys/zfs_context.h> |
e5d1c27e | 40 | #include <sys/trace_zfs.h> |
34dc7c2f BB |
41 | |
42 | typedef void (*dmu_tx_hold_func_t)(dmu_tx_t *tx, struct dnode *dn, | |
43 | uint64_t arg1, uint64_t arg2); | |
44 | ||
570827e1 BB |
45 | dmu_tx_stats_t dmu_tx_stats = { |
46 | { "dmu_tx_assigned", KSTAT_DATA_UINT64 }, | |
47 | { "dmu_tx_delay", KSTAT_DATA_UINT64 }, | |
48 | { "dmu_tx_error", KSTAT_DATA_UINT64 }, | |
49 | { "dmu_tx_suspended", KSTAT_DATA_UINT64 }, | |
50 | { "dmu_tx_group", KSTAT_DATA_UINT64 }, | |
570827e1 BB |
51 | { "dmu_tx_memory_reserve", KSTAT_DATA_UINT64 }, |
52 | { "dmu_tx_memory_reclaim", KSTAT_DATA_UINT64 }, | |
570827e1 | 53 | { "dmu_tx_dirty_throttle", KSTAT_DATA_UINT64 }, |
e8b96c60 MA |
54 | { "dmu_tx_dirty_delay", KSTAT_DATA_UINT64 }, |
55 | { "dmu_tx_dirty_over_max", KSTAT_DATA_UINT64 }, | |
a7bd20e3 | 56 | { "dmu_tx_wrlog_over_max", KSTAT_DATA_UINT64 }, |
750e1f88 | 57 | { "dmu_tx_dirty_frees_delay", KSTAT_DATA_UINT64 }, |
570827e1 BB |
58 | { "dmu_tx_quota", KSTAT_DATA_UINT64 }, |
59 | }; | |
60 | ||
61 | static kstat_t *dmu_tx_ksp; | |
34dc7c2f BB |
62 | |
63 | dmu_tx_t * | |
64 | dmu_tx_create_dd(dsl_dir_t *dd) | |
65 | { | |
79c76d5b | 66 | dmu_tx_t *tx = kmem_zalloc(sizeof (dmu_tx_t), KM_SLEEP); |
34dc7c2f | 67 | tx->tx_dir = dd; |
6f1ffb06 | 68 | if (dd != NULL) |
34dc7c2f BB |
69 | tx->tx_pool = dd->dd_pool; |
70 | list_create(&tx->tx_holds, sizeof (dmu_tx_hold_t), | |
71 | offsetof(dmu_tx_hold_t, txh_node)); | |
428870ff BB |
72 | list_create(&tx->tx_callbacks, sizeof (dmu_tx_callback_t), |
73 | offsetof(dmu_tx_callback_t, dcb_node)); | |
e8b96c60 | 74 | tx->tx_start = gethrtime(); |
34dc7c2f BB |
75 | return (tx); |
76 | } | |
77 | ||
78 | dmu_tx_t * | |
79 | dmu_tx_create(objset_t *os) | |
80 | { | |
428870ff | 81 | dmu_tx_t *tx = dmu_tx_create_dd(os->os_dsl_dataset->ds_dir); |
34dc7c2f | 82 | tx->tx_objset = os; |
34dc7c2f BB |
83 | return (tx); |
84 | } | |
85 | ||
86 | dmu_tx_t * | |
87 | dmu_tx_create_assigned(struct dsl_pool *dp, uint64_t txg) | |
88 | { | |
89 | dmu_tx_t *tx = dmu_tx_create_dd(NULL); | |
90 | ||
8c4fb36a | 91 | TXG_VERIFY(dp->dp_spa, txg); |
34dc7c2f BB |
92 | tx->tx_pool = dp; |
93 | tx->tx_txg = txg; | |
94 | tx->tx_anyobj = TRUE; | |
95 | ||
96 | return (tx); | |
97 | } | |
98 | ||
99 | int | |
100 | dmu_tx_is_syncing(dmu_tx_t *tx) | |
101 | { | |
102 | return (tx->tx_anyobj); | |
103 | } | |
104 | ||
105 | int | |
106 | dmu_tx_private_ok(dmu_tx_t *tx) | |
107 | { | |
108 | return (tx->tx_anyobj); | |
109 | } | |
110 | ||
111 | static dmu_tx_hold_t * | |
0eef1bde | 112 | dmu_tx_hold_dnode_impl(dmu_tx_t *tx, dnode_t *dn, enum dmu_tx_hold_type type, |
113 | uint64_t arg1, uint64_t arg2) | |
34dc7c2f BB |
114 | { |
115 | dmu_tx_hold_t *txh; | |
34dc7c2f | 116 | |
0eef1bde | 117 | if (dn != NULL) { |
c13060e4 | 118 | (void) zfs_refcount_add(&dn->dn_holds, tx); |
0eef1bde | 119 | if (tx->tx_txg != 0) { |
34dc7c2f BB |
120 | mutex_enter(&dn->dn_mtx); |
121 | /* | |
122 | * dn->dn_assigned_txg == tx->tx_txg doesn't pose a | |
123 | * problem, but there's no way for it to happen (for | |
124 | * now, at least). | |
125 | */ | |
126 | ASSERT(dn->dn_assigned_txg == 0); | |
127 | dn->dn_assigned_txg = tx->tx_txg; | |
c13060e4 | 128 | (void) zfs_refcount_add(&dn->dn_tx_holds, tx); |
34dc7c2f BB |
129 | mutex_exit(&dn->dn_mtx); |
130 | } | |
131 | } | |
132 | ||
79c76d5b | 133 | txh = kmem_zalloc(sizeof (dmu_tx_hold_t), KM_SLEEP); |
34dc7c2f BB |
134 | txh->txh_tx = tx; |
135 | txh->txh_dnode = dn; | |
424fd7c3 TS |
136 | zfs_refcount_create(&txh->txh_space_towrite); |
137 | zfs_refcount_create(&txh->txh_memory_tohold); | |
34dc7c2f BB |
138 | txh->txh_type = type; |
139 | txh->txh_arg1 = arg1; | |
140 | txh->txh_arg2 = arg2; | |
34dc7c2f BB |
141 | list_insert_tail(&tx->tx_holds, txh); |
142 | ||
143 | return (txh); | |
144 | } | |
145 | ||
0eef1bde | 146 | static dmu_tx_hold_t * |
147 | dmu_tx_hold_object_impl(dmu_tx_t *tx, objset_t *os, uint64_t object, | |
148 | enum dmu_tx_hold_type type, uint64_t arg1, uint64_t arg2) | |
149 | { | |
150 | dnode_t *dn = NULL; | |
151 | dmu_tx_hold_t *txh; | |
152 | int err; | |
153 | ||
154 | if (object != DMU_NEW_OBJECT) { | |
155 | err = dnode_hold(os, object, FTAG, &dn); | |
66eead53 | 156 | if (err != 0) { |
0eef1bde | 157 | tx->tx_err = err; |
158 | return (NULL); | |
159 | } | |
160 | } | |
161 | txh = dmu_tx_hold_dnode_impl(tx, dn, type, arg1, arg2); | |
162 | if (dn != NULL) | |
163 | dnode_rele(dn, FTAG); | |
164 | return (txh); | |
165 | } | |
166 | ||
34dc7c2f | 167 | void |
66eead53 | 168 | dmu_tx_add_new_object(dmu_tx_t *tx, dnode_t *dn) |
34dc7c2f BB |
169 | { |
170 | /* | |
171 | * If we're syncing, they can manipulate any object anyhow, and | |
172 | * the hold on the dnode_t can cause problems. | |
173 | */ | |
0eef1bde | 174 | if (!dmu_tx_is_syncing(tx)) |
175 | (void) dmu_tx_hold_dnode_impl(tx, dn, THT_NEWOBJECT, 0, 0); | |
34dc7c2f BB |
176 | } |
177 | ||
3ec3bc21 BB |
178 | /* |
179 | * This function reads specified data from disk. The specified data will | |
180 | * be needed to perform the transaction -- i.e, it will be read after | |
181 | * we do dmu_tx_assign(). There are two reasons that we read the data now | |
182 | * (before dmu_tx_assign()): | |
183 | * | |
184 | * 1. Reading it now has potentially better performance. The transaction | |
185 | * has not yet been assigned, so the TXG is not held open, and also the | |
186 | * caller typically has less locks held when calling dmu_tx_hold_*() than | |
187 | * after the transaction has been assigned. This reduces the lock (and txg) | |
188 | * hold times, thus reducing lock contention. | |
189 | * | |
190 | * 2. It is easier for callers (primarily the ZPL) to handle i/o errors | |
191 | * that are detected before they start making changes to the DMU state | |
192 | * (i.e. now). Once the transaction has been assigned, and some DMU | |
193 | * state has been changed, it can be difficult to recover from an i/o | |
194 | * error (e.g. to undo the changes already made in memory at the DMU | |
195 | * layer). Typically code to do so does not exist in the caller -- it | |
196 | * assumes that the data has already been cached and thus i/o errors are | |
197 | * not possible. | |
198 | * | |
199 | * It has been observed that the i/o initiated here can be a performance | |
200 | * problem, and it appears to be optional, because we don't look at the | |
201 | * data which is read. However, removing this read would only serve to | |
202 | * move the work elsewhere (after the dmu_tx_assign()), where it may | |
203 | * have a greater impact on performance (in addition to the impact on | |
204 | * fault tolerance noted above). | |
205 | */ | |
34dc7c2f BB |
206 | static int |
207 | dmu_tx_check_ioerr(zio_t *zio, dnode_t *dn, int level, uint64_t blkid) | |
208 | { | |
209 | int err; | |
210 | dmu_buf_impl_t *db; | |
211 | ||
212 | rw_enter(&dn->dn_struct_rwlock, RW_READER); | |
213 | db = dbuf_hold_level(dn, level, blkid, FTAG); | |
214 | rw_exit(&dn->dn_struct_rwlock); | |
215 | if (db == NULL) | |
2e528b49 | 216 | return (SET_ERROR(EIO)); |
34dc7c2f BB |
217 | err = dbuf_read(db, zio, DB_RF_CANFAIL | DB_RF_NOPREFETCH); |
218 | dbuf_rele(db, FTAG); | |
219 | return (err); | |
220 | } | |
221 | ||
222 | /* ARGSUSED */ | |
223 | static void | |
224 | dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) | |
225 | { | |
226 | dnode_t *dn = txh->txh_dnode; | |
34dc7c2f BB |
227 | int err = 0; |
228 | ||
229 | if (len == 0) | |
230 | return; | |
231 | ||
424fd7c3 | 232 | (void) zfs_refcount_add_many(&txh->txh_space_towrite, len, FTAG); |
34dc7c2f | 233 | |
3ec3bc21 BB |
234 | if (dn == NULL) |
235 | return; | |
34dc7c2f | 236 | |
3ec3bc21 BB |
237 | /* |
238 | * For i/o error checking, read the blocks that will be needed | |
239 | * to perform the write: the first and last level-0 blocks (if | |
240 | * they are not aligned, i.e. if they are partial-block writes), | |
241 | * and all the level-1 blocks. | |
242 | */ | |
243 | if (dn->dn_maxblkid == 0) { | |
244 | if (off < dn->dn_datablksz && | |
245 | (off > 0 || len < dn->dn_datablksz)) { | |
246 | err = dmu_tx_check_ioerr(NULL, dn, 0, 0); | |
247 | if (err != 0) { | |
248 | txh->txh_tx->tx_err = err; | |
34dc7c2f | 249 | } |
9babb374 | 250 | } |
3ec3bc21 BB |
251 | } else { |
252 | zio_t *zio = zio_root(dn->dn_objset->os_spa, | |
253 | NULL, NULL, ZIO_FLAG_CANFAIL); | |
9babb374 | 254 | |
3ec3bc21 BB |
255 | /* first level-0 block */ |
256 | uint64_t start = off >> dn->dn_datablkshift; | |
257 | if (P2PHASE(off, dn->dn_datablksz) || len < dn->dn_datablksz) { | |
258 | err = dmu_tx_check_ioerr(zio, dn, 0, start); | |
259 | if (err != 0) { | |
260 | txh->txh_tx->tx_err = err; | |
261 | } | |
428870ff | 262 | } |
428870ff | 263 | |
3ec3bc21 BB |
264 | /* last level-0 block */ |
265 | uint64_t end = (off + len - 1) >> dn->dn_datablkshift; | |
266 | if (end != start && end <= dn->dn_maxblkid && | |
267 | P2PHASE(off + len, dn->dn_datablksz)) { | |
268 | err = dmu_tx_check_ioerr(zio, dn, 0, end); | |
269 | if (err != 0) { | |
428870ff | 270 | txh->txh_tx->tx_err = err; |
9babb374 | 271 | } |
3ec3bc21 | 272 | } |
428870ff | 273 | |
3ec3bc21 BB |
274 | /* level-1 blocks */ |
275 | if (dn->dn_nlevels > 1) { | |
276 | int shft = dn->dn_indblkshift - SPA_BLKPTRSHIFT; | |
277 | for (uint64_t i = (start >> shft) + 1; | |
278 | i < end >> shft; i++) { | |
279 | err = dmu_tx_check_ioerr(zio, dn, 1, i); | |
280 | if (err != 0) { | |
281 | txh->txh_tx->tx_err = err; | |
282 | } | |
9babb374 | 283 | } |
9babb374 | 284 | } |
34dc7c2f | 285 | |
3ec3bc21 BB |
286 | err = zio_wait(zio); |
287 | if (err != 0) { | |
288 | txh->txh_tx->tx_err = err; | |
9babb374 | 289 | } |
34dc7c2f | 290 | } |
34dc7c2f BB |
291 | } |
292 | ||
293 | static void | |
294 | dmu_tx_count_dnode(dmu_tx_hold_t *txh) | |
295 | { | |
424fd7c3 TS |
296 | (void) zfs_refcount_add_many(&txh->txh_space_towrite, |
297 | DNODE_MIN_SIZE, FTAG); | |
34dc7c2f BB |
298 | } |
299 | ||
300 | void | |
301 | dmu_tx_hold_write(dmu_tx_t *tx, uint64_t object, uint64_t off, int len) | |
302 | { | |
303 | dmu_tx_hold_t *txh; | |
304 | ||
66eead53 MA |
305 | ASSERT0(tx->tx_txg); |
306 | ASSERT3U(len, <=, DMU_MAX_ACCESS); | |
34dc7c2f BB |
307 | ASSERT(len == 0 || UINT64_MAX - off >= len - 1); |
308 | ||
309 | txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, | |
310 | object, THT_WRITE, off, len); | |
66eead53 MA |
311 | if (txh != NULL) { |
312 | dmu_tx_count_write(txh, off, len); | |
313 | dmu_tx_count_dnode(txh); | |
314 | } | |
34dc7c2f BB |
315 | } |
316 | ||
0eef1bde | 317 | void |
318 | dmu_tx_hold_write_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off, int len) | |
319 | { | |
320 | dmu_tx_hold_t *txh; | |
321 | ||
66eead53 MA |
322 | ASSERT0(tx->tx_txg); |
323 | ASSERT3U(len, <=, DMU_MAX_ACCESS); | |
0eef1bde | 324 | ASSERT(len == 0 || UINT64_MAX - off >= len - 1); |
325 | ||
326 | txh = dmu_tx_hold_dnode_impl(tx, dn, THT_WRITE, off, len); | |
66eead53 MA |
327 | if (txh != NULL) { |
328 | dmu_tx_count_write(txh, off, len); | |
329 | dmu_tx_count_dnode(txh); | |
330 | } | |
0eef1bde | 331 | } |
332 | ||
19d55079 MA |
333 | /* |
334 | * This function marks the transaction as being a "net free". The end | |
335 | * result is that refquotas will be disabled for this transaction, and | |
336 | * this transaction will be able to use half of the pool space overhead | |
337 | * (see dsl_pool_adjustedsize()). Therefore this function should only | |
338 | * be called for transactions that we expect will not cause a net increase | |
339 | * in the amount of space used (but it's OK if that is occasionally not true). | |
340 | */ | |
341 | void | |
342 | dmu_tx_mark_netfree(dmu_tx_t *tx) | |
343 | { | |
3ec3bc21 | 344 | tx->tx_netfree = B_TRUE; |
19d55079 MA |
345 | } |
346 | ||
0eef1bde | 347 | static void |
348 | dmu_tx_hold_free_impl(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) | |
34dc7c2f | 349 | { |
3ec3bc21 BB |
350 | dmu_tx_t *tx = txh->txh_tx; |
351 | dnode_t *dn = txh->txh_dnode; | |
ea97f8ce | 352 | int err; |
34dc7c2f BB |
353 | |
354 | ASSERT(tx->tx_txg == 0); | |
355 | ||
e8b96c60 | 356 | dmu_tx_count_dnode(txh); |
34dc7c2f | 357 | |
3ec3bc21 | 358 | if (off >= (dn->dn_maxblkid + 1) * dn->dn_datablksz) |
34dc7c2f BB |
359 | return; |
360 | if (len == DMU_OBJECT_END) | |
3ec3bc21 | 361 | len = (dn->dn_maxblkid + 1) * dn->dn_datablksz - off; |
34dc7c2f | 362 | |
ea97f8ce MA |
363 | dmu_tx_count_dnode(txh); |
364 | ||
34dc7c2f | 365 | /* |
ea97f8ce MA |
366 | * For i/o error checking, we read the first and last level-0 |
367 | * blocks if they are not aligned, and all the level-1 blocks. | |
368 | * | |
369 | * Note: dbuf_free_range() assumes that we have not instantiated | |
370 | * any level-0 dbufs that will be completely freed. Therefore we must | |
371 | * exercise care to not read or count the first and last blocks | |
372 | * if they are blocksize-aligned. | |
373 | */ | |
374 | if (dn->dn_datablkshift == 0) { | |
b663a23d | 375 | if (off != 0 || len < dn->dn_datablksz) |
92bc214c | 376 | dmu_tx_count_write(txh, 0, dn->dn_datablksz); |
ea97f8ce MA |
377 | } else { |
378 | /* first block will be modified if it is not aligned */ | |
379 | if (!IS_P2ALIGNED(off, 1 << dn->dn_datablkshift)) | |
380 | dmu_tx_count_write(txh, off, 1); | |
381 | /* last block will be modified if it is not aligned */ | |
382 | if (!IS_P2ALIGNED(off + len, 1 << dn->dn_datablkshift)) | |
3ec3bc21 | 383 | dmu_tx_count_write(txh, off + len, 1); |
ea97f8ce MA |
384 | } |
385 | ||
386 | /* | |
387 | * Check level-1 blocks. | |
34dc7c2f BB |
388 | */ |
389 | if (dn->dn_nlevels > 1) { | |
ea97f8ce | 390 | int shift = dn->dn_datablkshift + dn->dn_indblkshift - |
34dc7c2f | 391 | SPA_BLKPTRSHIFT; |
ea97f8ce MA |
392 | uint64_t start = off >> shift; |
393 | uint64_t end = (off + len) >> shift; | |
ea97f8ce | 394 | |
ea97f8ce | 395 | ASSERT(dn->dn_indblkshift != 0); |
34dc7c2f | 396 | |
2e7b7657 MA |
397 | /* |
398 | * dnode_reallocate() can result in an object with indirect | |
399 | * blocks having an odd data block size. In this case, | |
400 | * just check the single block. | |
401 | */ | |
402 | if (dn->dn_datablkshift == 0) | |
403 | start = end = 0; | |
404 | ||
3ec3bc21 | 405 | zio_t *zio = zio_root(tx->tx_pool->dp_spa, |
34dc7c2f | 406 | NULL, NULL, ZIO_FLAG_CANFAIL); |
1c27024e | 407 | for (uint64_t i = start; i <= end; i++) { |
34dc7c2f | 408 | uint64_t ibyte = i << shift; |
b128c09f | 409 | err = dnode_next_offset(dn, 0, &ibyte, 2, 1, 0); |
34dc7c2f | 410 | i = ibyte >> shift; |
4bda3bd0 | 411 | if (err == ESRCH || i > end) |
34dc7c2f | 412 | break; |
3ec3bc21 | 413 | if (err != 0) { |
34dc7c2f | 414 | tx->tx_err = err; |
3ec3bc21 | 415 | (void) zio_wait(zio); |
34dc7c2f BB |
416 | return; |
417 | } | |
418 | ||
424fd7c3 | 419 | (void) zfs_refcount_add_many(&txh->txh_memory_tohold, |
3ec3bc21 BB |
420 | 1 << dn->dn_indblkshift, FTAG); |
421 | ||
34dc7c2f | 422 | err = dmu_tx_check_ioerr(zio, dn, 1, i); |
3ec3bc21 | 423 | if (err != 0) { |
34dc7c2f | 424 | tx->tx_err = err; |
3ec3bc21 | 425 | (void) zio_wait(zio); |
34dc7c2f BB |
426 | return; |
427 | } | |
428 | } | |
429 | err = zio_wait(zio); | |
3ec3bc21 | 430 | if (err != 0) { |
34dc7c2f BB |
431 | tx->tx_err = err; |
432 | return; | |
433 | } | |
434 | } | |
34dc7c2f BB |
435 | } |
436 | ||
437 | void | |
0eef1bde | 438 | dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off, uint64_t len) |
439 | { | |
440 | dmu_tx_hold_t *txh; | |
441 | ||
442 | txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, | |
443 | object, THT_FREE, off, len); | |
66eead53 MA |
444 | if (txh != NULL) |
445 | (void) dmu_tx_hold_free_impl(txh, off, len); | |
0eef1bde | 446 | } |
447 | ||
448 | void | |
449 | dmu_tx_hold_free_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len) | |
34dc7c2f BB |
450 | { |
451 | dmu_tx_hold_t *txh; | |
0eef1bde | 452 | |
453 | txh = dmu_tx_hold_dnode_impl(tx, dn, THT_FREE, off, len); | |
66eead53 MA |
454 | if (txh != NULL) |
455 | (void) dmu_tx_hold_free_impl(txh, off, len); | |
0eef1bde | 456 | } |
457 | ||
458 | static void | |
9522bd24 | 459 | dmu_tx_hold_zap_impl(dmu_tx_hold_t *txh, const char *name) |
0eef1bde | 460 | { |
461 | dmu_tx_t *tx = txh->txh_tx; | |
3ec3bc21 | 462 | dnode_t *dn = txh->txh_dnode; |
f85c06be | 463 | int err; |
34dc7c2f BB |
464 | |
465 | ASSERT(tx->tx_txg == 0); | |
466 | ||
34dc7c2f BB |
467 | dmu_tx_count_dnode(txh); |
468 | ||
3ec3bc21 BB |
469 | /* |
470 | * Modifying a almost-full microzap is around the worst case (128KB) | |
471 | * | |
472 | * If it is a fat zap, the worst case would be 7*16KB=112KB: | |
473 | * - 3 blocks overwritten: target leaf, ptrtbl block, header block | |
474 | * - 4 new blocks written if adding: | |
475 | * - 2 blocks for possibly split leaves, | |
476 | * - 2 grown ptrtbl blocks | |
477 | */ | |
424fd7c3 | 478 | (void) zfs_refcount_add_many(&txh->txh_space_towrite, |
3ec3bc21 BB |
479 | MZAP_MAX_BLKSZ, FTAG); |
480 | ||
481 | if (dn == NULL) | |
34dc7c2f | 482 | return; |
34dc7c2f | 483 | |
9ae529ec | 484 | ASSERT3U(DMU_OT_BYTESWAP(dn->dn_type), ==, DMU_BSWAP_ZAP); |
34dc7c2f | 485 | |
3ec3bc21 | 486 | if (dn->dn_maxblkid == 0 || name == NULL) { |
34dc7c2f | 487 | /* |
3ec3bc21 BB |
488 | * This is a microzap (only one block), or we don't know |
489 | * the name. Check the first block for i/o errors. | |
34dc7c2f BB |
490 | */ |
491 | err = dmu_tx_check_ioerr(NULL, dn, 0, 0); | |
3ec3bc21 | 492 | if (err != 0) { |
34dc7c2f | 493 | tx->tx_err = err; |
f85c06be | 494 | } |
3ec3bc21 | 495 | } else { |
34dc7c2f | 496 | /* |
3ec3bc21 BB |
497 | * Access the name so that we'll check for i/o errors to |
498 | * the leaf blocks, etc. We ignore ENOENT, as this name | |
499 | * may not yet exist. | |
34dc7c2f | 500 | */ |
2bce8049 | 501 | err = zap_lookup_by_dnode(dn, name, 8, 0, NULL); |
3ec3bc21 | 502 | if (err == EIO || err == ECKSUM || err == ENXIO) { |
34dc7c2f | 503 | tx->tx_err = err; |
f85c06be GM |
504 | } |
505 | } | |
34dc7c2f BB |
506 | } |
507 | ||
0eef1bde | 508 | void |
509 | dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name) | |
510 | { | |
511 | dmu_tx_hold_t *txh; | |
512 | ||
66eead53 | 513 | ASSERT0(tx->tx_txg); |
0eef1bde | 514 | |
515 | txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, | |
516 | object, THT_ZAP, add, (uintptr_t)name); | |
66eead53 | 517 | if (txh != NULL) |
9522bd24 | 518 | dmu_tx_hold_zap_impl(txh, name); |
0eef1bde | 519 | } |
520 | ||
521 | void | |
522 | dmu_tx_hold_zap_by_dnode(dmu_tx_t *tx, dnode_t *dn, int add, const char *name) | |
523 | { | |
524 | dmu_tx_hold_t *txh; | |
525 | ||
66eead53 | 526 | ASSERT0(tx->tx_txg); |
0eef1bde | 527 | ASSERT(dn != NULL); |
528 | ||
529 | txh = dmu_tx_hold_dnode_impl(tx, dn, THT_ZAP, add, (uintptr_t)name); | |
66eead53 | 530 | if (txh != NULL) |
9522bd24 | 531 | dmu_tx_hold_zap_impl(txh, name); |
0eef1bde | 532 | } |
533 | ||
34dc7c2f BB |
534 | void |
535 | dmu_tx_hold_bonus(dmu_tx_t *tx, uint64_t object) | |
536 | { | |
537 | dmu_tx_hold_t *txh; | |
538 | ||
539 | ASSERT(tx->tx_txg == 0); | |
540 | ||
541 | txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, | |
542 | object, THT_BONUS, 0, 0); | |
543 | if (txh) | |
544 | dmu_tx_count_dnode(txh); | |
545 | } | |
546 | ||
0eef1bde | 547 | void |
548 | dmu_tx_hold_bonus_by_dnode(dmu_tx_t *tx, dnode_t *dn) | |
549 | { | |
550 | dmu_tx_hold_t *txh; | |
551 | ||
66eead53 | 552 | ASSERT0(tx->tx_txg); |
0eef1bde | 553 | |
554 | txh = dmu_tx_hold_dnode_impl(tx, dn, THT_BONUS, 0, 0); | |
555 | if (txh) | |
556 | dmu_tx_count_dnode(txh); | |
557 | } | |
558 | ||
34dc7c2f BB |
559 | void |
560 | dmu_tx_hold_space(dmu_tx_t *tx, uint64_t space) | |
561 | { | |
562 | dmu_tx_hold_t *txh; | |
7d637211 | 563 | |
34dc7c2f BB |
564 | ASSERT(tx->tx_txg == 0); |
565 | ||
566 | txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, | |
567 | DMU_NEW_OBJECT, THT_SPACE, space, 0); | |
424fd7c3 TS |
568 | if (txh) { |
569 | (void) zfs_refcount_add_many( | |
570 | &txh->txh_space_towrite, space, FTAG); | |
571 | } | |
34dc7c2f BB |
572 | } |
573 | ||
3ec3bc21 | 574 | #ifdef ZFS_DEBUG |
34dc7c2f BB |
575 | void |
576 | dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db) | |
577 | { | |
3ec3bc21 BB |
578 | boolean_t match_object = B_FALSE; |
579 | boolean_t match_offset = B_FALSE; | |
34dc7c2f | 580 | |
572e2857 | 581 | DB_DNODE_ENTER(db); |
3ec3bc21 | 582 | dnode_t *dn = DB_DNODE(db); |
34dc7c2f | 583 | ASSERT(tx->tx_txg != 0); |
428870ff | 584 | ASSERT(tx->tx_objset == NULL || dn->dn_objset == tx->tx_objset); |
34dc7c2f BB |
585 | ASSERT3U(dn->dn_object, ==, db->db.db_object); |
586 | ||
572e2857 BB |
587 | if (tx->tx_anyobj) { |
588 | DB_DNODE_EXIT(db); | |
34dc7c2f | 589 | return; |
572e2857 | 590 | } |
34dc7c2f BB |
591 | |
592 | /* XXX No checking on the meta dnode for now */ | |
572e2857 BB |
593 | if (db->db.db_object == DMU_META_DNODE_OBJECT) { |
594 | DB_DNODE_EXIT(db); | |
34dc7c2f | 595 | return; |
572e2857 | 596 | } |
34dc7c2f | 597 | |
3ec3bc21 | 598 | for (dmu_tx_hold_t *txh = list_head(&tx->tx_holds); txh != NULL; |
34dc7c2f | 599 | txh = list_next(&tx->tx_holds, txh)) { |
99ea23c5 | 600 | ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg); |
34dc7c2f BB |
601 | if (txh->txh_dnode == dn && txh->txh_type != THT_NEWOBJECT) |
602 | match_object = TRUE; | |
603 | if (txh->txh_dnode == NULL || txh->txh_dnode == dn) { | |
604 | int datablkshift = dn->dn_datablkshift ? | |
605 | dn->dn_datablkshift : SPA_MAXBLOCKSHIFT; | |
606 | int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; | |
607 | int shift = datablkshift + epbs * db->db_level; | |
608 | uint64_t beginblk = shift >= 64 ? 0 : | |
609 | (txh->txh_arg1 >> shift); | |
610 | uint64_t endblk = shift >= 64 ? 0 : | |
611 | ((txh->txh_arg1 + txh->txh_arg2 - 1) >> shift); | |
612 | uint64_t blkid = db->db_blkid; | |
613 | ||
614 | /* XXX txh_arg2 better not be zero... */ | |
615 | ||
616 | dprintf("found txh type %x beginblk=%llx endblk=%llx\n", | |
8e739b2c RE |
617 | txh->txh_type, (u_longlong_t)beginblk, |
618 | (u_longlong_t)endblk); | |
34dc7c2f BB |
619 | |
620 | switch (txh->txh_type) { | |
621 | case THT_WRITE: | |
622 | if (blkid >= beginblk && blkid <= endblk) | |
623 | match_offset = TRUE; | |
624 | /* | |
625 | * We will let this hold work for the bonus | |
428870ff BB |
626 | * or spill buffer so that we don't need to |
627 | * hold it when creating a new object. | |
34dc7c2f | 628 | */ |
428870ff BB |
629 | if (blkid == DMU_BONUS_BLKID || |
630 | blkid == DMU_SPILL_BLKID) | |
34dc7c2f BB |
631 | match_offset = TRUE; |
632 | /* | |
633 | * They might have to increase nlevels, | |
634 | * thus dirtying the new TLIBs. Or the | |
635 | * might have to change the block size, | |
636 | * thus dirying the new lvl=0 blk=0. | |
637 | */ | |
638 | if (blkid == 0) | |
639 | match_offset = TRUE; | |
640 | break; | |
641 | case THT_FREE: | |
b128c09f BB |
642 | /* |
643 | * We will dirty all the level 1 blocks in | |
644 | * the free range and perhaps the first and | |
645 | * last level 0 block. | |
646 | */ | |
647 | if (blkid >= beginblk && (blkid <= endblk || | |
648 | txh->txh_arg2 == DMU_OBJECT_END)) | |
34dc7c2f BB |
649 | match_offset = TRUE; |
650 | break; | |
428870ff BB |
651 | case THT_SPILL: |
652 | if (blkid == DMU_SPILL_BLKID) | |
653 | match_offset = TRUE; | |
654 | break; | |
34dc7c2f | 655 | case THT_BONUS: |
428870ff | 656 | if (blkid == DMU_BONUS_BLKID) |
34dc7c2f BB |
657 | match_offset = TRUE; |
658 | break; | |
659 | case THT_ZAP: | |
660 | match_offset = TRUE; | |
661 | break; | |
662 | case THT_NEWOBJECT: | |
663 | match_object = TRUE; | |
664 | break; | |
665 | default: | |
989fd514 BB |
666 | cmn_err(CE_PANIC, "bad txh_type %d", |
667 | txh->txh_type); | |
34dc7c2f BB |
668 | } |
669 | } | |
572e2857 BB |
670 | if (match_object && match_offset) { |
671 | DB_DNODE_EXIT(db); | |
34dc7c2f | 672 | return; |
572e2857 | 673 | } |
34dc7c2f | 674 | } |
572e2857 | 675 | DB_DNODE_EXIT(db); |
34dc7c2f BB |
676 | panic("dirtying dbuf obj=%llx lvl=%u blkid=%llx but not tx_held\n", |
677 | (u_longlong_t)db->db.db_object, db->db_level, | |
678 | (u_longlong_t)db->db_blkid); | |
679 | } | |
680 | #endif | |
681 | ||
e8b96c60 MA |
682 | /* |
683 | * If we can't do 10 iops, something is wrong. Let us go ahead | |
684 | * and hit zfs_dirty_data_max. | |
685 | */ | |
18168da7 | 686 | static const hrtime_t zfs_delay_max_ns = 100 * MICROSEC; /* 100 milliseconds */ |
e8b96c60 MA |
687 | |
688 | /* | |
689 | * We delay transactions when we've determined that the backend storage | |
690 | * isn't able to accommodate the rate of incoming writes. | |
691 | * | |
692 | * If there is already a transaction waiting, we delay relative to when | |
693 | * that transaction finishes waiting. This way the calculated min_time | |
694 | * is independent of the number of threads concurrently executing | |
695 | * transactions. | |
696 | * | |
697 | * If we are the only waiter, wait relative to when the transaction | |
698 | * started, rather than the current time. This credits the transaction for | |
699 | * "time already served", e.g. reading indirect blocks. | |
700 | * | |
701 | * The minimum time for a transaction to take is calculated as: | |
702 | * min_time = scale * (dirty - min) / (max - dirty) | |
703 | * min_time is then capped at zfs_delay_max_ns. | |
704 | * | |
705 | * The delay has two degrees of freedom that can be adjusted via tunables. | |
706 | * The percentage of dirty data at which we start to delay is defined by | |
707 | * zfs_delay_min_dirty_percent. This should typically be at or above | |
708 | * zfs_vdev_async_write_active_max_dirty_percent so that we only start to | |
709 | * delay after writing at full speed has failed to keep up with the incoming | |
710 | * write rate. The scale of the curve is defined by zfs_delay_scale. Roughly | |
711 | * speaking, this variable determines the amount of delay at the midpoint of | |
712 | * the curve. | |
713 | * | |
714 | * delay | |
715 | * 10ms +-------------------------------------------------------------*+ | |
716 | * | *| | |
717 | * 9ms + *+ | |
718 | * | *| | |
719 | * 8ms + *+ | |
720 | * | * | | |
721 | * 7ms + * + | |
722 | * | * | | |
723 | * 6ms + * + | |
724 | * | * | | |
725 | * 5ms + * + | |
726 | * | * | | |
727 | * 4ms + * + | |
728 | * | * | | |
729 | * 3ms + * + | |
730 | * | * | | |
731 | * 2ms + (midpoint) * + | |
732 | * | | ** | | |
733 | * 1ms + v *** + | |
734 | * | zfs_delay_scale ----------> ******** | | |
735 | * 0 +-------------------------------------*********----------------+ | |
736 | * 0% <- zfs_dirty_data_max -> 100% | |
737 | * | |
738 | * Note that since the delay is added to the outstanding time remaining on the | |
739 | * most recent transaction, the delay is effectively the inverse of IOPS. | |
740 | * Here the midpoint of 500us translates to 2000 IOPS. The shape of the curve | |
741 | * was chosen such that small changes in the amount of accumulated dirty data | |
742 | * in the first 3/4 of the curve yield relatively small differences in the | |
743 | * amount of delay. | |
744 | * | |
745 | * The effects can be easier to understand when the amount of delay is | |
746 | * represented on a log scale: | |
747 | * | |
748 | * delay | |
749 | * 100ms +-------------------------------------------------------------++ | |
750 | * + + | |
751 | * | | | |
752 | * + *+ | |
753 | * 10ms + *+ | |
754 | * + ** + | |
755 | * | (midpoint) ** | | |
756 | * + | ** + | |
757 | * 1ms + v **** + | |
758 | * + zfs_delay_scale ----------> ***** + | |
759 | * | **** | | |
760 | * + **** + | |
761 | * 100us + ** + | |
762 | * + * + | |
763 | * | * | | |
764 | * + * + | |
765 | * 10us + * + | |
766 | * + + | |
767 | * | | | |
768 | * + + | |
769 | * +--------------------------------------------------------------+ | |
770 | * 0% <- zfs_dirty_data_max -> 100% | |
771 | * | |
772 | * Note here that only as the amount of dirty data approaches its limit does | |
773 | * the delay start to increase rapidly. The goal of a properly tuned system | |
774 | * should be to keep the amount of dirty data out of that range by first | |
775 | * ensuring that the appropriate limits are set for the I/O scheduler to reach | |
776 | * optimal throughput on the backend storage, and then by changing the value | |
777 | * of zfs_delay_scale to increase the steepness of the curve. | |
778 | */ | |
779 | static void | |
780 | dmu_tx_delay(dmu_tx_t *tx, uint64_t dirty) | |
781 | { | |
782 | dsl_pool_t *dp = tx->tx_pool; | |
783 | uint64_t delay_min_bytes = | |
784 | zfs_dirty_data_max * zfs_delay_min_dirty_percent / 100; | |
785 | hrtime_t wakeup, min_tx_time, now; | |
786 | ||
787 | if (dirty <= delay_min_bytes) | |
788 | return; | |
789 | ||
790 | /* | |
791 | * The caller has already waited until we are under the max. | |
792 | * We make them pass us the amount of dirty data so we don't | |
793 | * have to handle the case of it being >= the max, which could | |
794 | * cause a divide-by-zero if it's == the max. | |
795 | */ | |
796 | ASSERT3U(dirty, <, zfs_dirty_data_max); | |
797 | ||
798 | now = gethrtime(); | |
799 | min_tx_time = zfs_delay_scale * | |
800 | (dirty - delay_min_bytes) / (zfs_dirty_data_max - dirty); | |
801 | min_tx_time = MIN(min_tx_time, zfs_delay_max_ns); | |
802 | if (now > tx->tx_start + min_tx_time) | |
803 | return; | |
804 | ||
805 | DTRACE_PROBE3(delay__mintime, dmu_tx_t *, tx, uint64_t, dirty, | |
806 | uint64_t, min_tx_time); | |
807 | ||
808 | mutex_enter(&dp->dp_lock); | |
809 | wakeup = MAX(tx->tx_start + min_tx_time, | |
810 | dp->dp_last_wakeup + min_tx_time); | |
811 | dp->dp_last_wakeup = wakeup; | |
812 | mutex_exit(&dp->dp_lock); | |
813 | ||
814 | zfs_sleep_until(wakeup); | |
815 | } | |
816 | ||
3ec3bc21 BB |
817 | /* |
818 | * This routine attempts to assign the transaction to a transaction group. | |
819 | * To do so, we must determine if there is sufficient free space on disk. | |
820 | * | |
821 | * If this is a "netfree" transaction (i.e. we called dmu_tx_mark_netfree() | |
822 | * on it), then it is assumed that there is sufficient free space, | |
823 | * unless there's insufficient slop space in the pool (see the comment | |
824 | * above spa_slop_shift in spa_misc.c). | |
825 | * | |
826 | * If it is not a "netfree" transaction, then if the data already on disk | |
827 | * is over the allowed usage (e.g. quota), this will fail with EDQUOT or | |
828 | * ENOSPC. Otherwise, if the current rough estimate of pending changes, | |
829 | * plus the rough estimate of this transaction's changes, may exceed the | |
830 | * allowed usage, then this will fail with ERESTART, which will cause the | |
831 | * caller to wait for the pending changes to be written to disk (by waiting | |
832 | * for the next TXG to open), and then check the space usage again. | |
833 | * | |
834 | * The rough estimate of pending changes is comprised of the sum of: | |
835 | * | |
836 | * - this transaction's holds' txh_space_towrite | |
837 | * | |
838 | * - dd_tempreserved[], which is the sum of in-flight transactions' | |
839 | * holds' txh_space_towrite (i.e. those transactions that have called | |
840 | * dmu_tx_assign() but not yet called dmu_tx_commit()). | |
841 | * | |
842 | * - dd_space_towrite[], which is the amount of dirtied dbufs. | |
843 | * | |
844 | * Note that all of these values are inflated by spa_get_worst_case_asize(), | |
845 | * which means that we may get ERESTART well before we are actually in danger | |
846 | * of running out of space, but this also mitigates any small inaccuracies | |
847 | * in the rough estimate (e.g. txh_space_towrite doesn't take into account | |
848 | * indirect blocks, and dd_space_towrite[] doesn't take into account changes | |
849 | * to the MOS). | |
850 | * | |
851 | * Note that due to this algorithm, it is possible to exceed the allowed | |
852 | * usage by one transaction. Also, as we approach the allowed usage, | |
853 | * we will allow a very limited amount of changes into each TXG, thus | |
854 | * decreasing performance. | |
855 | */ | |
34dc7c2f | 856 | static int |
0735ecb3 | 857 | dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how) |
34dc7c2f | 858 | { |
34dc7c2f | 859 | spa_t *spa = tx->tx_pool->dp_spa; |
34dc7c2f | 860 | |
c99c9001 | 861 | ASSERT0(tx->tx_txg); |
34dc7c2f | 862 | |
570827e1 BB |
863 | if (tx->tx_err) { |
864 | DMU_TX_STAT_BUMP(dmu_tx_error); | |
34dc7c2f | 865 | return (tx->tx_err); |
570827e1 | 866 | } |
34dc7c2f | 867 | |
b128c09f | 868 | if (spa_suspended(spa)) { |
570827e1 BB |
869 | DMU_TX_STAT_BUMP(dmu_tx_suspended); |
870 | ||
34dc7c2f BB |
871 | /* |
872 | * If the user has indicated a blocking failure mode | |
873 | * then return ERESTART which will block in dmu_tx_wait(). | |
874 | * Otherwise, return EIO so that an error can get | |
875 | * propagated back to the VOP calls. | |
876 | * | |
877 | * Note that we always honor the txg_how flag regardless | |
878 | * of the failuremode setting. | |
879 | */ | |
880 | if (spa_get_failmode(spa) == ZIO_FAILURE_MODE_CONTINUE && | |
0735ecb3 | 881 | !(txg_how & TXG_WAIT)) |
2e528b49 | 882 | return (SET_ERROR(EIO)); |
34dc7c2f | 883 | |
2e528b49 | 884 | return (SET_ERROR(ERESTART)); |
34dc7c2f BB |
885 | } |
886 | ||
a7bd20e3 KJ |
887 | if (!tx->tx_dirty_delayed && |
888 | dsl_pool_wrlog_over_max(tx->tx_pool)) { | |
889 | DMU_TX_STAT_BUMP(dmu_tx_wrlog_over_max); | |
890 | return (SET_ERROR(ERESTART)); | |
891 | } | |
892 | ||
0735ecb3 | 893 | if (!tx->tx_dirty_delayed && |
e8b96c60 MA |
894 | dsl_pool_need_dirty_delay(tx->tx_pool)) { |
895 | tx->tx_wait_dirty = B_TRUE; | |
896 | DMU_TX_STAT_BUMP(dmu_tx_dirty_delay); | |
ecb2b7dc | 897 | return (SET_ERROR(ERESTART)); |
e8b96c60 MA |
898 | } |
899 | ||
34dc7c2f BB |
900 | tx->tx_txg = txg_hold_open(tx->tx_pool, &tx->tx_txgh); |
901 | tx->tx_needassign_txh = NULL; | |
902 | ||
903 | /* | |
904 | * NB: No error returns are allowed after txg_hold_open, but | |
905 | * before processing the dnode holds, due to the | |
906 | * dmu_tx_unassign() logic. | |
907 | */ | |
908 | ||
3ec3bc21 BB |
909 | uint64_t towrite = 0; |
910 | uint64_t tohold = 0; | |
911 | for (dmu_tx_hold_t *txh = list_head(&tx->tx_holds); txh != NULL; | |
34dc7c2f BB |
912 | txh = list_next(&tx->tx_holds, txh)) { |
913 | dnode_t *dn = txh->txh_dnode; | |
914 | if (dn != NULL) { | |
cb9e5b7e MA |
915 | /* |
916 | * This thread can't hold the dn_struct_rwlock | |
917 | * while assigning the tx, because this can lead to | |
918 | * deadlock. Specifically, if this dnode is already | |
919 | * assigned to an earlier txg, this thread may need | |
920 | * to wait for that txg to sync (the ERESTART case | |
921 | * below). The other thread that has assigned this | |
922 | * dnode to an earlier txg prevents this txg from | |
923 | * syncing until its tx can complete (calling | |
924 | * dmu_tx_commit()), but it may need to acquire the | |
925 | * dn_struct_rwlock to do so (e.g. via | |
926 | * dmu_buf_hold*()). | |
927 | * | |
928 | * Note that this thread can't hold the lock for | |
929 | * read either, but the rwlock doesn't record | |
930 | * enough information to make that assertion. | |
931 | */ | |
932 | ASSERT(!RW_WRITE_HELD(&dn->dn_struct_rwlock)); | |
933 | ||
34dc7c2f BB |
934 | mutex_enter(&dn->dn_mtx); |
935 | if (dn->dn_assigned_txg == tx->tx_txg - 1) { | |
936 | mutex_exit(&dn->dn_mtx); | |
937 | tx->tx_needassign_txh = txh; | |
570827e1 | 938 | DMU_TX_STAT_BUMP(dmu_tx_group); |
2e528b49 | 939 | return (SET_ERROR(ERESTART)); |
34dc7c2f BB |
940 | } |
941 | if (dn->dn_assigned_txg == 0) | |
942 | dn->dn_assigned_txg = tx->tx_txg; | |
943 | ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg); | |
c13060e4 | 944 | (void) zfs_refcount_add(&dn->dn_tx_holds, tx); |
34dc7c2f BB |
945 | mutex_exit(&dn->dn_mtx); |
946 | } | |
424fd7c3 TS |
947 | towrite += zfs_refcount_count(&txh->txh_space_towrite); |
948 | tohold += zfs_refcount_count(&txh->txh_memory_tohold); | |
34dc7c2f BB |
949 | } |
950 | ||
b128c09f | 951 | /* needed allocation: worst-case estimate of write space */ |
3ec3bc21 | 952 | uint64_t asize = spa_get_worst_case_asize(tx->tx_pool->dp_spa, towrite); |
b128c09f | 953 | /* calculate memory footprint estimate */ |
3ec3bc21 | 954 | uint64_t memory = towrite + tohold; |
34dc7c2f | 955 | |
3ec3bc21 | 956 | if (tx->tx_dir != NULL && asize != 0) { |
b128c09f | 957 | int err = dsl_dir_tempreserve_space(tx->tx_dir, memory, |
3ec3bc21 BB |
958 | asize, tx->tx_netfree, &tx->tx_tempreserve_cookie, tx); |
959 | if (err != 0) | |
34dc7c2f BB |
960 | return (err); |
961 | } | |
962 | ||
570827e1 BB |
963 | DMU_TX_STAT_BUMP(dmu_tx_assigned); |
964 | ||
34dc7c2f BB |
965 | return (0); |
966 | } | |
967 | ||
968 | static void | |
969 | dmu_tx_unassign(dmu_tx_t *tx) | |
970 | { | |
34dc7c2f BB |
971 | if (tx->tx_txg == 0) |
972 | return; | |
973 | ||
974 | txg_rele_to_quiesce(&tx->tx_txgh); | |
975 | ||
e49f1e20 WA |
976 | /* |
977 | * Walk the transaction's hold list, removing the hold on the | |
978 | * associated dnode, and notifying waiters if the refcount drops to 0. | |
979 | */ | |
3ec3bc21 | 980 | for (dmu_tx_hold_t *txh = list_head(&tx->tx_holds); |
981b2126 | 981 | txh && txh != tx->tx_needassign_txh; |
34dc7c2f BB |
982 | txh = list_next(&tx->tx_holds, txh)) { |
983 | dnode_t *dn = txh->txh_dnode; | |
984 | ||
985 | if (dn == NULL) | |
986 | continue; | |
987 | mutex_enter(&dn->dn_mtx); | |
988 | ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg); | |
989 | ||
424fd7c3 | 990 | if (zfs_refcount_remove(&dn->dn_tx_holds, tx) == 0) { |
34dc7c2f BB |
991 | dn->dn_assigned_txg = 0; |
992 | cv_broadcast(&dn->dn_notxholds); | |
993 | } | |
994 | mutex_exit(&dn->dn_mtx); | |
995 | } | |
996 | ||
997 | txg_rele_to_sync(&tx->tx_txgh); | |
998 | ||
999 | tx->tx_lasttried_txg = tx->tx_txg; | |
1000 | tx->tx_txg = 0; | |
1001 | } | |
1002 | ||
1003 | /* | |
0735ecb3 | 1004 | * Assign tx to a transaction group; txg_how is a bitmask: |
34dc7c2f | 1005 | * |
0735ecb3 PS |
1006 | * If TXG_WAIT is set and the currently open txg is full, this function |
1007 | * will wait until there's a new txg. This should be used when no locks | |
1008 | * are being held. With this bit set, this function will only fail if | |
1009 | * we're truly out of space (or over quota). | |
34dc7c2f | 1010 | * |
0735ecb3 PS |
1011 | * If TXG_WAIT is *not* set and we can't assign into the currently open |
1012 | * txg without blocking, this function will return immediately with | |
1013 | * ERESTART. This should be used whenever locks are being held. On an | |
1014 | * ERESTART error, the caller should drop all locks, call dmu_tx_wait(), | |
1015 | * and try again. | |
e8b96c60 | 1016 | * |
0735ecb3 PS |
1017 | * If TXG_NOTHROTTLE is set, this indicates that this tx should not be |
1018 | * delayed due on the ZFS Write Throttle (see comments in dsl_pool.c for | |
1019 | * details on the throttle). This is used by the VFS operations, after | |
1020 | * they have already called dmu_tx_wait() (though most likely on a | |
1021 | * different tx). | |
84268b09 CS |
1022 | * |
1023 | * It is guaranteed that subsequent successful calls to dmu_tx_assign() | |
1024 | * will assign the tx to monotonically increasing txgs. Of course this is | |
1025 | * not strong monotonicity, because the same txg can be returned multiple | |
1026 | * times in a row. This guarantee holds both for subsequent calls from | |
1027 | * one thread and for multiple threads. For example, it is impossible to | |
1028 | * observe the following sequence of events: | |
1029 | * | |
1030 | * Thread 1 Thread 2 | |
1031 | * | |
1032 | * dmu_tx_assign(T1, ...) | |
1033 | * 1 <- dmu_tx_get_txg(T1) | |
1034 | * dmu_tx_assign(T2, ...) | |
1035 | * 2 <- dmu_tx_get_txg(T2) | |
1036 | * dmu_tx_assign(T3, ...) | |
1037 | * 1 <- dmu_tx_get_txg(T3) | |
34dc7c2f BB |
1038 | */ |
1039 | int | |
0735ecb3 | 1040 | dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how) |
34dc7c2f BB |
1041 | { |
1042 | int err; | |
1043 | ||
1044 | ASSERT(tx->tx_txg == 0); | |
0735ecb3 | 1045 | ASSERT0(txg_how & ~(TXG_WAIT | TXG_NOTHROTTLE)); |
34dc7c2f BB |
1046 | ASSERT(!dsl_pool_sync_context(tx->tx_pool)); |
1047 | ||
13fe0198 | 1048 | /* If we might wait, we must not hold the config lock. */ |
0735ecb3 PS |
1049 | IMPLY((txg_how & TXG_WAIT), !dsl_pool_config_held(tx->tx_pool)); |
1050 | ||
1051 | if ((txg_how & TXG_NOTHROTTLE)) | |
1052 | tx->tx_dirty_delayed = B_TRUE; | |
13fe0198 | 1053 | |
34dc7c2f BB |
1054 | while ((err = dmu_tx_try_assign(tx, txg_how)) != 0) { |
1055 | dmu_tx_unassign(tx); | |
1056 | ||
0735ecb3 | 1057 | if (err != ERESTART || !(txg_how & TXG_WAIT)) |
34dc7c2f BB |
1058 | return (err); |
1059 | ||
1060 | dmu_tx_wait(tx); | |
1061 | } | |
1062 | ||
1063 | txg_rele_to_quiesce(&tx->tx_txgh); | |
1064 | ||
1065 | return (0); | |
1066 | } | |
1067 | ||
1068 | void | |
1069 | dmu_tx_wait(dmu_tx_t *tx) | |
1070 | { | |
1071 | spa_t *spa = tx->tx_pool->dp_spa; | |
e8b96c60 | 1072 | dsl_pool_t *dp = tx->tx_pool; |
a77c4c83 | 1073 | hrtime_t before; |
34dc7c2f BB |
1074 | |
1075 | ASSERT(tx->tx_txg == 0); | |
13fe0198 | 1076 | ASSERT(!dsl_pool_config_held(tx->tx_pool)); |
34dc7c2f | 1077 | |
a77c4c83 NB |
1078 | before = gethrtime(); |
1079 | ||
e8b96c60 MA |
1080 | if (tx->tx_wait_dirty) { |
1081 | uint64_t dirty; | |
1082 | ||
1083 | /* | |
1084 | * dmu_tx_try_assign() has determined that we need to wait | |
1085 | * because we've consumed much or all of the dirty buffer | |
1086 | * space. | |
1087 | */ | |
1088 | mutex_enter(&dp->dp_lock); | |
1089 | if (dp->dp_dirty_total >= zfs_dirty_data_max) | |
1090 | DMU_TX_STAT_BUMP(dmu_tx_dirty_over_max); | |
1091 | while (dp->dp_dirty_total >= zfs_dirty_data_max) | |
1092 | cv_wait(&dp->dp_spaceavail_cv, &dp->dp_lock); | |
1093 | dirty = dp->dp_dirty_total; | |
1094 | mutex_exit(&dp->dp_lock); | |
1095 | ||
1096 | dmu_tx_delay(tx, dirty); | |
1097 | ||
1098 | tx->tx_wait_dirty = B_FALSE; | |
1099 | ||
1100 | /* | |
0735ecb3 PS |
1101 | * Note: setting tx_dirty_delayed only has effect if the |
1102 | * caller used TX_WAIT. Otherwise they are going to | |
1103 | * destroy this tx and try again. The common case, | |
1104 | * zfs_write(), uses TX_WAIT. | |
e8b96c60 | 1105 | */ |
0735ecb3 | 1106 | tx->tx_dirty_delayed = B_TRUE; |
e8b96c60 MA |
1107 | } else if (spa_suspended(spa) || tx->tx_lasttried_txg == 0) { |
1108 | /* | |
1109 | * If the pool is suspended we need to wait until it | |
1110 | * is resumed. Note that it's possible that the pool | |
1111 | * has become active after this thread has tried to | |
1112 | * obtain a tx. If that's the case then tx_lasttried_txg | |
1113 | * would not have been set. | |
1114 | */ | |
1115 | txg_wait_synced(dp, spa_last_synced_txg(spa) + 1); | |
34dc7c2f BB |
1116 | } else if (tx->tx_needassign_txh) { |
1117 | dnode_t *dn = tx->tx_needassign_txh->txh_dnode; | |
1118 | ||
1119 | mutex_enter(&dn->dn_mtx); | |
1120 | while (dn->dn_assigned_txg == tx->tx_lasttried_txg - 1) | |
1121 | cv_wait(&dn->dn_notxholds, &dn->dn_mtx); | |
1122 | mutex_exit(&dn->dn_mtx); | |
1123 | tx->tx_needassign_txh = NULL; | |
1124 | } else { | |
e8b96c60 | 1125 | /* |
e48afbc4 SD |
1126 | * If we have a lot of dirty data just wait until we sync |
1127 | * out a TXG at which point we'll hopefully have synced | |
1128 | * a portion of the changes. | |
e8b96c60 | 1129 | */ |
e48afbc4 | 1130 | txg_wait_synced(dp, spa_last_synced_txg(spa) + 1); |
34dc7c2f | 1131 | } |
a77c4c83 NB |
1132 | |
1133 | spa_tx_assign_add_nsecs(spa, gethrtime() - before); | |
34dc7c2f BB |
1134 | } |
1135 | ||
f85c06be GM |
1136 | static void |
1137 | dmu_tx_destroy(dmu_tx_t *tx) | |
1138 | { | |
1139 | dmu_tx_hold_t *txh; | |
1140 | ||
1141 | while ((txh = list_head(&tx->tx_holds)) != NULL) { | |
1142 | dnode_t *dn = txh->txh_dnode; | |
1143 | ||
1144 | list_remove(&tx->tx_holds, txh); | |
424fd7c3 TS |
1145 | zfs_refcount_destroy_many(&txh->txh_space_towrite, |
1146 | zfs_refcount_count(&txh->txh_space_towrite)); | |
1147 | zfs_refcount_destroy_many(&txh->txh_memory_tohold, | |
1148 | zfs_refcount_count(&txh->txh_memory_tohold)); | |
f85c06be GM |
1149 | kmem_free(txh, sizeof (dmu_tx_hold_t)); |
1150 | if (dn != NULL) | |
1151 | dnode_rele(dn, tx); | |
1152 | } | |
1153 | ||
1154 | list_destroy(&tx->tx_callbacks); | |
1155 | list_destroy(&tx->tx_holds); | |
f85c06be GM |
1156 | kmem_free(tx, sizeof (dmu_tx_t)); |
1157 | } | |
1158 | ||
34dc7c2f BB |
1159 | void |
1160 | dmu_tx_commit(dmu_tx_t *tx) | |
1161 | { | |
34dc7c2f BB |
1162 | ASSERT(tx->tx_txg != 0); |
1163 | ||
e49f1e20 WA |
1164 | /* |
1165 | * Go through the transaction's hold list and remove holds on | |
1166 | * associated dnodes, notifying waiters if no holds remain. | |
1167 | */ | |
1c27024e | 1168 | for (dmu_tx_hold_t *txh = list_head(&tx->tx_holds); txh != NULL; |
f85c06be | 1169 | txh = list_next(&tx->tx_holds, txh)) { |
34dc7c2f BB |
1170 | dnode_t *dn = txh->txh_dnode; |
1171 | ||
34dc7c2f BB |
1172 | if (dn == NULL) |
1173 | continue; | |
f85c06be | 1174 | |
34dc7c2f BB |
1175 | mutex_enter(&dn->dn_mtx); |
1176 | ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg); | |
1177 | ||
424fd7c3 | 1178 | if (zfs_refcount_remove(&dn->dn_tx_holds, tx) == 0) { |
34dc7c2f BB |
1179 | dn->dn_assigned_txg = 0; |
1180 | cv_broadcast(&dn->dn_notxholds); | |
1181 | } | |
1182 | mutex_exit(&dn->dn_mtx); | |
34dc7c2f BB |
1183 | } |
1184 | ||
1185 | if (tx->tx_tempreserve_cookie) | |
1186 | dsl_dir_tempreserve_clear(tx->tx_tempreserve_cookie, tx); | |
1187 | ||
428870ff BB |
1188 | if (!list_is_empty(&tx->tx_callbacks)) |
1189 | txg_register_callbacks(&tx->tx_txgh, &tx->tx_callbacks); | |
1190 | ||
34dc7c2f BB |
1191 | if (tx->tx_anyobj == FALSE) |
1192 | txg_rele_to_sync(&tx->tx_txgh); | |
428870ff | 1193 | |
f85c06be | 1194 | dmu_tx_destroy(tx); |
34dc7c2f BB |
1195 | } |
1196 | ||
1197 | void | |
1198 | dmu_tx_abort(dmu_tx_t *tx) | |
1199 | { | |
34dc7c2f BB |
1200 | ASSERT(tx->tx_txg == 0); |
1201 | ||
428870ff BB |
1202 | /* |
1203 | * Call any registered callbacks with an error code. | |
1204 | */ | |
1205 | if (!list_is_empty(&tx->tx_callbacks)) | |
28caa74b | 1206 | dmu_tx_do_callbacks(&tx->tx_callbacks, SET_ERROR(ECANCELED)); |
428870ff | 1207 | |
f85c06be | 1208 | dmu_tx_destroy(tx); |
34dc7c2f BB |
1209 | } |
1210 | ||
1211 | uint64_t | |
1212 | dmu_tx_get_txg(dmu_tx_t *tx) | |
1213 | { | |
1214 | ASSERT(tx->tx_txg != 0); | |
1215 | return (tx->tx_txg); | |
1216 | } | |
428870ff | 1217 | |
13fe0198 MA |
1218 | dsl_pool_t * |
1219 | dmu_tx_pool(dmu_tx_t *tx) | |
1220 | { | |
1221 | ASSERT(tx->tx_pool != NULL); | |
1222 | return (tx->tx_pool); | |
1223 | } | |
1224 | ||
428870ff BB |
1225 | void |
1226 | dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *func, void *data) | |
1227 | { | |
1228 | dmu_tx_callback_t *dcb; | |
1229 | ||
79c76d5b | 1230 | dcb = kmem_alloc(sizeof (dmu_tx_callback_t), KM_SLEEP); |
428870ff BB |
1231 | |
1232 | dcb->dcb_func = func; | |
1233 | dcb->dcb_data = data; | |
1234 | ||
1235 | list_insert_tail(&tx->tx_callbacks, dcb); | |
1236 | } | |
1237 | ||
1238 | /* | |
1239 | * Call all the commit callbacks on a list, with a given error code. | |
1240 | */ | |
1241 | void | |
1242 | dmu_tx_do_callbacks(list_t *cb_list, int error) | |
1243 | { | |
1244 | dmu_tx_callback_t *dcb; | |
1245 | ||
823d48bf | 1246 | while ((dcb = list_tail(cb_list)) != NULL) { |
428870ff BB |
1247 | list_remove(cb_list, dcb); |
1248 | dcb->dcb_func(dcb->dcb_data, error); | |
1249 | kmem_free(dcb, sizeof (dmu_tx_callback_t)); | |
1250 | } | |
1251 | } | |
1252 | ||
1253 | /* | |
1254 | * Interface to hold a bunch of attributes. | |
1255 | * used for creating new files. | |
1256 | * attrsize is the total size of all attributes | |
1257 | * to be added during object creation | |
1258 | * | |
1259 | * For updating/adding a single attribute dmu_tx_hold_sa() should be used. | |
1260 | */ | |
1261 | ||
1262 | /* | |
1263 | * hold necessary attribute name for attribute registration. | |
1264 | * should be a very rare case where this is needed. If it does | |
1265 | * happen it would only happen on the first write to the file system. | |
1266 | */ | |
1267 | static void | |
1268 | dmu_tx_sa_registration_hold(sa_os_t *sa, dmu_tx_t *tx) | |
1269 | { | |
428870ff BB |
1270 | if (!sa->sa_need_attr_registration) |
1271 | return; | |
1272 | ||
3ec3bc21 | 1273 | for (int i = 0; i != sa->sa_num_attrs; i++) { |
428870ff BB |
1274 | if (!sa->sa_attr_table[i].sa_registered) { |
1275 | if (sa->sa_reg_attr_obj) | |
1276 | dmu_tx_hold_zap(tx, sa->sa_reg_attr_obj, | |
1277 | B_TRUE, sa->sa_attr_table[i].sa_name); | |
1278 | else | |
1279 | dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, | |
1280 | B_TRUE, sa->sa_attr_table[i].sa_name); | |
1281 | } | |
1282 | } | |
1283 | } | |
1284 | ||
428870ff BB |
1285 | void |
1286 | dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object) | |
1287 | { | |
9631681b | 1288 | dmu_tx_hold_t *txh; |
428870ff | 1289 | |
9631681b BB |
1290 | txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, object, |
1291 | THT_SPILL, 0, 0); | |
1292 | if (txh != NULL) | |
424fd7c3 | 1293 | (void) zfs_refcount_add_many(&txh->txh_space_towrite, |
9631681b | 1294 | SPA_OLD_MAXBLOCKSIZE, FTAG); |
428870ff BB |
1295 | } |
1296 | ||
1297 | void | |
1298 | dmu_tx_hold_sa_create(dmu_tx_t *tx, int attrsize) | |
1299 | { | |
1300 | sa_os_t *sa = tx->tx_objset->os_sa; | |
1301 | ||
1302 | dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); | |
1303 | ||
1304 | if (tx->tx_objset->os_sa->sa_master_obj == 0) | |
1305 | return; | |
1306 | ||
3ec3bc21 | 1307 | if (tx->tx_objset->os_sa->sa_layout_attr_obj) { |
428870ff | 1308 | dmu_tx_hold_zap(tx, sa->sa_layout_attr_obj, B_TRUE, NULL); |
3ec3bc21 | 1309 | } else { |
428870ff BB |
1310 | dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_LAYOUTS); |
1311 | dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_REGISTRY); | |
1312 | dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL); | |
1313 | dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL); | |
1314 | } | |
1315 | ||
1316 | dmu_tx_sa_registration_hold(sa, tx); | |
1317 | ||
50c957f7 | 1318 | if (attrsize <= DN_OLD_MAX_BONUSLEN && !sa->sa_force_spill) |
428870ff BB |
1319 | return; |
1320 | ||
1321 | (void) dmu_tx_hold_object_impl(tx, tx->tx_objset, DMU_NEW_OBJECT, | |
1322 | THT_SPILL, 0, 0); | |
1323 | } | |
1324 | ||
1325 | /* | |
1326 | * Hold SA attribute | |
1327 | * | |
1328 | * dmu_tx_hold_sa(dmu_tx_t *tx, sa_handle_t *, attribute, add, size) | |
1329 | * | |
1330 | * variable_size is the total size of all variable sized attributes | |
1331 | * passed to this function. It is not the total size of all | |
1332 | * variable size attributes that *may* exist on this object. | |
1333 | */ | |
1334 | void | |
1335 | dmu_tx_hold_sa(dmu_tx_t *tx, sa_handle_t *hdl, boolean_t may_grow) | |
1336 | { | |
1337 | uint64_t object; | |
1338 | sa_os_t *sa = tx->tx_objset->os_sa; | |
1339 | ||
1340 | ASSERT(hdl != NULL); | |
1341 | ||
1342 | object = sa_handle_object(hdl); | |
1343 | ||
0eb8ba6a MA |
1344 | dmu_buf_impl_t *db = (dmu_buf_impl_t *)hdl->sa_bonus; |
1345 | DB_DNODE_ENTER(db); | |
1346 | dmu_tx_hold_bonus_by_dnode(tx, DB_DNODE(db)); | |
1347 | DB_DNODE_EXIT(db); | |
428870ff BB |
1348 | |
1349 | if (tx->tx_objset->os_sa->sa_master_obj == 0) | |
1350 | return; | |
1351 | ||
1352 | if (tx->tx_objset->os_sa->sa_reg_attr_obj == 0 || | |
1353 | tx->tx_objset->os_sa->sa_layout_attr_obj == 0) { | |
1354 | dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_LAYOUTS); | |
1355 | dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_REGISTRY); | |
1356 | dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL); | |
1357 | dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL); | |
1358 | } | |
1359 | ||
1360 | dmu_tx_sa_registration_hold(sa, tx); | |
1361 | ||
1362 | if (may_grow && tx->tx_objset->os_sa->sa_layout_attr_obj) | |
1363 | dmu_tx_hold_zap(tx, sa->sa_layout_attr_obj, B_TRUE, NULL); | |
1364 | ||
572e2857 | 1365 | if (sa->sa_force_spill || may_grow || hdl->sa_spill) { |
428870ff BB |
1366 | ASSERT(tx->tx_txg == 0); |
1367 | dmu_tx_hold_spill(tx, object); | |
572e2857 | 1368 | } else { |
572e2857 BB |
1369 | dnode_t *dn; |
1370 | ||
1371 | DB_DNODE_ENTER(db); | |
1372 | dn = DB_DNODE(db); | |
1373 | if (dn->dn_have_spill) { | |
1374 | ASSERT(tx->tx_txg == 0); | |
1375 | dmu_tx_hold_spill(tx, object); | |
1376 | } | |
1377 | DB_DNODE_EXIT(db); | |
428870ff BB |
1378 | } |
1379 | } | |
c28b2279 | 1380 | |
570827e1 BB |
1381 | void |
1382 | dmu_tx_init(void) | |
1383 | { | |
1384 | dmu_tx_ksp = kstat_create("zfs", 0, "dmu_tx", "misc", | |
1385 | KSTAT_TYPE_NAMED, sizeof (dmu_tx_stats) / sizeof (kstat_named_t), | |
1386 | KSTAT_FLAG_VIRTUAL); | |
1387 | ||
1388 | if (dmu_tx_ksp != NULL) { | |
1389 | dmu_tx_ksp->ks_data = &dmu_tx_stats; | |
1390 | kstat_install(dmu_tx_ksp); | |
1391 | } | |
1392 | } | |
1393 | ||
1394 | void | |
1395 | dmu_tx_fini(void) | |
1396 | { | |
1397 | if (dmu_tx_ksp != NULL) { | |
1398 | kstat_delete(dmu_tx_ksp); | |
1399 | dmu_tx_ksp = NULL; | |
1400 | } | |
1401 | } | |
1402 | ||
93ce2b4c | 1403 | #if defined(_KERNEL) |
c28b2279 BB |
1404 | EXPORT_SYMBOL(dmu_tx_create); |
1405 | EXPORT_SYMBOL(dmu_tx_hold_write); | |
0eef1bde | 1406 | EXPORT_SYMBOL(dmu_tx_hold_write_by_dnode); |
c28b2279 | 1407 | EXPORT_SYMBOL(dmu_tx_hold_free); |
0eef1bde | 1408 | EXPORT_SYMBOL(dmu_tx_hold_free_by_dnode); |
c28b2279 | 1409 | EXPORT_SYMBOL(dmu_tx_hold_zap); |
0eef1bde | 1410 | EXPORT_SYMBOL(dmu_tx_hold_zap_by_dnode); |
c28b2279 | 1411 | EXPORT_SYMBOL(dmu_tx_hold_bonus); |
0eef1bde | 1412 | EXPORT_SYMBOL(dmu_tx_hold_bonus_by_dnode); |
c28b2279 BB |
1413 | EXPORT_SYMBOL(dmu_tx_abort); |
1414 | EXPORT_SYMBOL(dmu_tx_assign); | |
1415 | EXPORT_SYMBOL(dmu_tx_wait); | |
1416 | EXPORT_SYMBOL(dmu_tx_commit); | |
848259c1 | 1417 | EXPORT_SYMBOL(dmu_tx_mark_netfree); |
c28b2279 BB |
1418 | EXPORT_SYMBOL(dmu_tx_get_txg); |
1419 | EXPORT_SYMBOL(dmu_tx_callback_register); | |
1420 | EXPORT_SYMBOL(dmu_tx_do_callbacks); | |
1421 | EXPORT_SYMBOL(dmu_tx_hold_spill); | |
1422 | EXPORT_SYMBOL(dmu_tx_hold_sa_create); | |
1423 | EXPORT_SYMBOL(dmu_tx_hold_sa); | |
1424 | #endif |