]>
Commit | Line | Data |
---|---|---|
34dc7c2f BB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or http://www.opensolaris.org/os/licensing. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | /* | |
22 | * Copyright 2008 Sun Microsystems, Inc. All rights reserved. | |
23 | * Use is subject to license terms. | |
24 | */ | |
25 | ||
26 | #pragma ident "@(#)dsl_dataset.c 1.42 08/04/28 SMI" | |
27 | ||
28 | #include <sys/dmu_objset.h> | |
29 | #include <sys/dsl_dataset.h> | |
30 | #include <sys/dsl_dir.h> | |
31 | #include <sys/dsl_prop.h> | |
32 | #include <sys/dsl_synctask.h> | |
33 | #include <sys/dmu_traverse.h> | |
34 | #include <sys/dmu_tx.h> | |
35 | #include <sys/arc.h> | |
36 | #include <sys/zio.h> | |
37 | #include <sys/zap.h> | |
38 | #include <sys/unique.h> | |
39 | #include <sys/zfs_context.h> | |
40 | #include <sys/zfs_ioctl.h> | |
41 | #include <sys/spa.h> | |
42 | #include <sys/sunddi.h> | |
43 | ||
44 | static dsl_checkfunc_t dsl_dataset_destroy_begin_check; | |
45 | static dsl_syncfunc_t dsl_dataset_destroy_begin_sync; | |
46 | static dsl_checkfunc_t dsl_dataset_rollback_check; | |
47 | static dsl_syncfunc_t dsl_dataset_rollback_sync; | |
48 | static dsl_syncfunc_t dsl_dataset_set_reservation_sync; | |
49 | ||
50 | #define DS_REF_MAX (1ULL << 62) | |
51 | ||
52 | #define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE | |
53 | ||
54 | /* | |
55 | * We use weighted reference counts to express the various forms of exclusion | |
56 | * between different open modes. A STANDARD open is 1 point, an EXCLUSIVE open | |
57 | * is DS_REF_MAX, and a PRIMARY open is little more than half of an EXCLUSIVE. | |
58 | * This makes the exclusion logic simple: the total refcnt for all opens cannot | |
59 | * exceed DS_REF_MAX. For example, EXCLUSIVE opens are exclusive because their | |
60 | * weight (DS_REF_MAX) consumes the entire refcnt space. PRIMARY opens consume | |
61 | * just over half of the refcnt space, so there can't be more than one, but it | |
62 | * can peacefully coexist with any number of STANDARD opens. | |
63 | */ | |
64 | static uint64_t ds_refcnt_weight[DS_MODE_LEVELS] = { | |
65 | 0, /* DS_MODE_NONE - invalid */ | |
66 | 1, /* DS_MODE_STANDARD - unlimited number */ | |
67 | (DS_REF_MAX >> 1) + 1, /* DS_MODE_PRIMARY - only one of these */ | |
68 | DS_REF_MAX /* DS_MODE_EXCLUSIVE - no other opens */ | |
69 | }; | |
70 | ||
71 | /* | |
72 | * Figure out how much of this delta should be propogated to the dsl_dir | |
73 | * layer. If there's a refreservation, that space has already been | |
74 | * partially accounted for in our ancestors. | |
75 | */ | |
76 | static int64_t | |
77 | parent_delta(dsl_dataset_t *ds, int64_t delta) | |
78 | { | |
79 | uint64_t old_bytes, new_bytes; | |
80 | ||
81 | if (ds->ds_reserved == 0) | |
82 | return (delta); | |
83 | ||
84 | old_bytes = MAX(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); | |
85 | new_bytes = MAX(ds->ds_phys->ds_unique_bytes + delta, ds->ds_reserved); | |
86 | ||
87 | ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta)); | |
88 | return (new_bytes - old_bytes); | |
89 | } | |
90 | ||
91 | void | |
92 | dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) | |
93 | { | |
94 | int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); | |
95 | int compressed = BP_GET_PSIZE(bp); | |
96 | int uncompressed = BP_GET_UCSIZE(bp); | |
97 | int64_t delta; | |
98 | ||
99 | dprintf_bp(bp, "born, ds=%p\n", ds); | |
100 | ||
101 | ASSERT(dmu_tx_is_syncing(tx)); | |
102 | /* It could have been compressed away to nothing */ | |
103 | if (BP_IS_HOLE(bp)) | |
104 | return; | |
105 | ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); | |
106 | ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES); | |
107 | if (ds == NULL) { | |
108 | /* | |
109 | * Account for the meta-objset space in its placeholder | |
110 | * dsl_dir. | |
111 | */ | |
112 | ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */ | |
113 | dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, | |
114 | used, compressed, uncompressed, tx); | |
115 | dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); | |
116 | return; | |
117 | } | |
118 | dmu_buf_will_dirty(ds->ds_dbuf, tx); | |
119 | mutex_enter(&ds->ds_lock); | |
120 | delta = parent_delta(ds, used); | |
121 | ds->ds_phys->ds_used_bytes += used; | |
122 | ds->ds_phys->ds_compressed_bytes += compressed; | |
123 | ds->ds_phys->ds_uncompressed_bytes += uncompressed; | |
124 | ds->ds_phys->ds_unique_bytes += used; | |
125 | mutex_exit(&ds->ds_lock); | |
126 | dsl_dir_diduse_space(ds->ds_dir, delta, compressed, uncompressed, tx); | |
127 | } | |
128 | ||
129 | void | |
130 | dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, zio_t *pio, | |
131 | dmu_tx_t *tx) | |
132 | { | |
133 | int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); | |
134 | int compressed = BP_GET_PSIZE(bp); | |
135 | int uncompressed = BP_GET_UCSIZE(bp); | |
136 | ||
137 | ASSERT(dmu_tx_is_syncing(tx)); | |
138 | /* No block pointer => nothing to free */ | |
139 | if (BP_IS_HOLE(bp)) | |
140 | return; | |
141 | ||
142 | ASSERT(used > 0); | |
143 | if (ds == NULL) { | |
144 | int err; | |
145 | /* | |
146 | * Account for the meta-objset space in its placeholder | |
147 | * dataset. | |
148 | */ | |
149 | err = arc_free(pio, tx->tx_pool->dp_spa, | |
150 | tx->tx_txg, bp, NULL, NULL, pio ? ARC_NOWAIT: ARC_WAIT); | |
151 | ASSERT(err == 0); | |
152 | ||
153 | dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, | |
154 | -used, -compressed, -uncompressed, tx); | |
155 | dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); | |
156 | return; | |
157 | } | |
158 | ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); | |
159 | ||
160 | dmu_buf_will_dirty(ds->ds_dbuf, tx); | |
161 | ||
162 | if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) { | |
163 | int err; | |
164 | int64_t delta; | |
165 | ||
166 | dprintf_bp(bp, "freeing: %s", ""); | |
167 | err = arc_free(pio, tx->tx_pool->dp_spa, | |
168 | tx->tx_txg, bp, NULL, NULL, pio ? ARC_NOWAIT: ARC_WAIT); | |
169 | ASSERT(err == 0); | |
170 | ||
171 | mutex_enter(&ds->ds_lock); | |
172 | ASSERT(ds->ds_phys->ds_unique_bytes >= used || | |
173 | !DS_UNIQUE_IS_ACCURATE(ds)); | |
174 | delta = parent_delta(ds, -used); | |
175 | ds->ds_phys->ds_unique_bytes -= used; | |
176 | mutex_exit(&ds->ds_lock); | |
177 | dsl_dir_diduse_space(ds->ds_dir, | |
178 | delta, -compressed, -uncompressed, tx); | |
179 | } else { | |
180 | dprintf_bp(bp, "putting on dead list: %s", ""); | |
181 | VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, bp, tx)); | |
182 | ASSERT3U(ds->ds_prev->ds_object, ==, | |
183 | ds->ds_phys->ds_prev_snap_obj); | |
184 | ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0); | |
185 | /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ | |
186 | if (ds->ds_prev->ds_phys->ds_next_snap_obj == | |
187 | ds->ds_object && bp->blk_birth > | |
188 | ds->ds_prev->ds_phys->ds_prev_snap_txg) { | |
189 | dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); | |
190 | mutex_enter(&ds->ds_prev->ds_lock); | |
191 | ds->ds_prev->ds_phys->ds_unique_bytes += used; | |
192 | mutex_exit(&ds->ds_prev->ds_lock); | |
193 | } | |
194 | } | |
195 | mutex_enter(&ds->ds_lock); | |
196 | ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used); | |
197 | ds->ds_phys->ds_used_bytes -= used; | |
198 | ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed); | |
199 | ds->ds_phys->ds_compressed_bytes -= compressed; | |
200 | ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed); | |
201 | ds->ds_phys->ds_uncompressed_bytes -= uncompressed; | |
202 | mutex_exit(&ds->ds_lock); | |
203 | } | |
204 | ||
205 | uint64_t | |
206 | dsl_dataset_prev_snap_txg(dsl_dataset_t *ds) | |
207 | { | |
208 | uint64_t trysnap = 0; | |
209 | ||
210 | if (ds == NULL) | |
211 | return (0); | |
212 | /* | |
213 | * The snapshot creation could fail, but that would cause an | |
214 | * incorrect FALSE return, which would only result in an | |
215 | * overestimation of the amount of space that an operation would | |
216 | * consume, which is OK. | |
217 | * | |
218 | * There's also a small window where we could miss a pending | |
219 | * snapshot, because we could set the sync task in the quiescing | |
220 | * phase. So this should only be used as a guess. | |
221 | */ | |
222 | if (ds->ds_trysnap_txg > | |
223 | spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa)) | |
224 | trysnap = ds->ds_trysnap_txg; | |
225 | return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap)); | |
226 | } | |
227 | ||
228 | int | |
229 | dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth) | |
230 | { | |
231 | return (blk_birth > dsl_dataset_prev_snap_txg(ds)); | |
232 | } | |
233 | ||
234 | /* ARGSUSED */ | |
235 | static void | |
236 | dsl_dataset_evict(dmu_buf_t *db, void *dsv) | |
237 | { | |
238 | dsl_dataset_t *ds = dsv; | |
239 | ||
240 | /* open_refcount == DS_REF_MAX when deleting */ | |
241 | ASSERT(ds->ds_open_refcount == 0 || | |
242 | ds->ds_open_refcount == DS_REF_MAX); | |
243 | ||
244 | dprintf_ds(ds, "evicting %s\n", ""); | |
245 | ||
246 | unique_remove(ds->ds_fsid_guid); | |
247 | ||
248 | if (ds->ds_user_ptr != NULL) | |
249 | ds->ds_user_evict_func(ds, ds->ds_user_ptr); | |
250 | ||
251 | if (ds->ds_prev) { | |
252 | dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds); | |
253 | ds->ds_prev = NULL; | |
254 | } | |
255 | ||
256 | bplist_close(&ds->ds_deadlist); | |
257 | dsl_dir_close(ds->ds_dir, ds); | |
258 | ||
259 | ASSERT(!list_link_active(&ds->ds_synced_link)); | |
260 | ||
261 | mutex_destroy(&ds->ds_lock); | |
262 | mutex_destroy(&ds->ds_opening_lock); | |
263 | mutex_destroy(&ds->ds_deadlist.bpl_lock); | |
264 | ||
265 | kmem_free(ds, sizeof (dsl_dataset_t)); | |
266 | } | |
267 | ||
268 | static int | |
269 | dsl_dataset_get_snapname(dsl_dataset_t *ds) | |
270 | { | |
271 | dsl_dataset_phys_t *headphys; | |
272 | int err; | |
273 | dmu_buf_t *headdbuf; | |
274 | dsl_pool_t *dp = ds->ds_dir->dd_pool; | |
275 | objset_t *mos = dp->dp_meta_objset; | |
276 | ||
277 | if (ds->ds_snapname[0]) | |
278 | return (0); | |
279 | if (ds->ds_phys->ds_next_snap_obj == 0) | |
280 | return (0); | |
281 | ||
282 | err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj, | |
283 | FTAG, &headdbuf); | |
284 | if (err) | |
285 | return (err); | |
286 | headphys = headdbuf->db_data; | |
287 | err = zap_value_search(dp->dp_meta_objset, | |
288 | headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname); | |
289 | dmu_buf_rele(headdbuf, FTAG); | |
290 | return (err); | |
291 | } | |
292 | ||
293 | static int | |
294 | dsl_dataset_snap_lookup(objset_t *os, uint64_t flags, | |
295 | uint64_t snapnames_zapobj, const char *name, uint64_t *value) | |
296 | { | |
297 | matchtype_t mt; | |
298 | int err; | |
299 | ||
300 | if (flags & DS_FLAG_CI_DATASET) | |
301 | mt = MT_FIRST; | |
302 | else | |
303 | mt = MT_EXACT; | |
304 | ||
305 | err = zap_lookup_norm(os, snapnames_zapobj, name, 8, 1, | |
306 | value, mt, NULL, 0, NULL); | |
307 | if (err == ENOTSUP && mt == MT_FIRST) | |
308 | err = zap_lookup(os, snapnames_zapobj, name, 8, 1, value); | |
309 | return (err); | |
310 | } | |
311 | ||
312 | static int | |
313 | dsl_dataset_snap_remove(objset_t *os, uint64_t flags, | |
314 | uint64_t snapnames_zapobj, char *name, dmu_tx_t *tx) | |
315 | { | |
316 | matchtype_t mt; | |
317 | int err; | |
318 | ||
319 | if (flags & DS_FLAG_CI_DATASET) | |
320 | mt = MT_FIRST; | |
321 | else | |
322 | mt = MT_EXACT; | |
323 | ||
324 | err = zap_remove_norm(os, snapnames_zapobj, name, mt, tx); | |
325 | if (err == ENOTSUP && mt == MT_FIRST) | |
326 | err = zap_remove(os, snapnames_zapobj, name, tx); | |
327 | return (err); | |
328 | } | |
329 | ||
330 | int | |
331 | dsl_dataset_open_obj(dsl_pool_t *dp, uint64_t dsobj, const char *snapname, | |
332 | int mode, void *tag, dsl_dataset_t **dsp) | |
333 | { | |
334 | uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)]; | |
335 | objset_t *mos = dp->dp_meta_objset; | |
336 | dmu_buf_t *dbuf; | |
337 | dsl_dataset_t *ds; | |
338 | int err; | |
339 | ||
340 | ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || | |
341 | dsl_pool_sync_context(dp)); | |
342 | ||
343 | err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); | |
344 | if (err) | |
345 | return (err); | |
346 | ds = dmu_buf_get_user(dbuf); | |
347 | if (ds == NULL) { | |
348 | dsl_dataset_t *winner; | |
349 | ||
350 | ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); | |
351 | ds->ds_dbuf = dbuf; | |
352 | ds->ds_object = dsobj; | |
353 | ds->ds_phys = dbuf->db_data; | |
354 | ||
355 | mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); | |
356 | mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL); | |
357 | mutex_init(&ds->ds_deadlist.bpl_lock, NULL, MUTEX_DEFAULT, | |
358 | NULL); | |
359 | ||
360 | err = bplist_open(&ds->ds_deadlist, | |
361 | mos, ds->ds_phys->ds_deadlist_obj); | |
362 | if (err == 0) { | |
363 | err = dsl_dir_open_obj(dp, | |
364 | ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); | |
365 | } | |
366 | if (err) { | |
367 | /* | |
368 | * we don't really need to close the blist if we | |
369 | * just opened it. | |
370 | */ | |
371 | mutex_destroy(&ds->ds_lock); | |
372 | mutex_destroy(&ds->ds_opening_lock); | |
373 | mutex_destroy(&ds->ds_deadlist.bpl_lock); | |
374 | kmem_free(ds, sizeof (dsl_dataset_t)); | |
375 | dmu_buf_rele(dbuf, tag); | |
376 | return (err); | |
377 | } | |
378 | ||
379 | if (ds->ds_dir->dd_phys->dd_head_dataset_obj == dsobj) { | |
380 | ds->ds_snapname[0] = '\0'; | |
381 | if (ds->ds_phys->ds_prev_snap_obj) { | |
382 | err = dsl_dataset_open_obj(dp, | |
383 | ds->ds_phys->ds_prev_snap_obj, NULL, | |
384 | DS_MODE_NONE, ds, &ds->ds_prev); | |
385 | } | |
386 | } else { | |
387 | if (snapname) { | |
388 | #ifdef ZFS_DEBUG | |
389 | dsl_dataset_phys_t *headphys; | |
390 | dmu_buf_t *headdbuf; | |
391 | err = dmu_bonus_hold(mos, | |
392 | ds->ds_dir->dd_phys->dd_head_dataset_obj, | |
393 | FTAG, &headdbuf); | |
394 | if (err == 0) { | |
395 | uint64_t foundobj; | |
396 | ||
397 | headphys = headdbuf->db_data; | |
398 | err = dsl_dataset_snap_lookup( | |
399 | dp->dp_meta_objset, | |
400 | headphys->ds_flags, | |
401 | headphys->ds_snapnames_zapobj, | |
402 | snapname, &foundobj); | |
403 | ASSERT3U(foundobj, ==, dsobj); | |
404 | dmu_buf_rele(headdbuf, FTAG); | |
405 | } | |
406 | #endif | |
407 | (void) strcat(ds->ds_snapname, snapname); | |
408 | } else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) { | |
409 | err = dsl_dataset_get_snapname(ds); | |
410 | } | |
411 | } | |
412 | ||
413 | if (!dsl_dataset_is_snapshot(ds)) { | |
414 | /* | |
415 | * In sync context, we're called with either no lock | |
416 | * or with the write lock. If we're not syncing, | |
417 | * we're always called with the read lock held. | |
418 | */ | |
419 | boolean_t need_lock = | |
420 | !RW_WRITE_HELD(&dp->dp_config_rwlock) && | |
421 | dsl_pool_sync_context(dp); | |
422 | ||
423 | if (need_lock) | |
424 | rw_enter(&dp->dp_config_rwlock, RW_READER); | |
425 | ||
426 | err = dsl_prop_get_ds_locked(ds->ds_dir, | |
427 | "refreservation", sizeof (uint64_t), 1, | |
428 | &ds->ds_reserved, NULL); | |
429 | if (err == 0) { | |
430 | err = dsl_prop_get_ds_locked(ds->ds_dir, | |
431 | "refquota", sizeof (uint64_t), 1, | |
432 | &ds->ds_quota, NULL); | |
433 | } | |
434 | ||
435 | if (need_lock) | |
436 | rw_exit(&dp->dp_config_rwlock); | |
437 | } else { | |
438 | ds->ds_reserved = ds->ds_quota = 0; | |
439 | } | |
440 | ||
441 | if (err == 0) { | |
442 | winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys, | |
443 | dsl_dataset_evict); | |
444 | } | |
445 | if (err || winner) { | |
446 | bplist_close(&ds->ds_deadlist); | |
447 | if (ds->ds_prev) { | |
448 | dsl_dataset_close(ds->ds_prev, | |
449 | DS_MODE_NONE, ds); | |
450 | } | |
451 | dsl_dir_close(ds->ds_dir, ds); | |
452 | mutex_destroy(&ds->ds_lock); | |
453 | mutex_destroy(&ds->ds_opening_lock); | |
454 | mutex_destroy(&ds->ds_deadlist.bpl_lock); | |
455 | kmem_free(ds, sizeof (dsl_dataset_t)); | |
456 | if (err) { | |
457 | dmu_buf_rele(dbuf, tag); | |
458 | return (err); | |
459 | } | |
460 | ds = winner; | |
461 | } else { | |
462 | ds->ds_fsid_guid = | |
463 | unique_insert(ds->ds_phys->ds_fsid_guid); | |
464 | } | |
465 | } | |
466 | ASSERT3P(ds->ds_dbuf, ==, dbuf); | |
467 | ASSERT3P(ds->ds_phys, ==, dbuf->db_data); | |
468 | ||
469 | mutex_enter(&ds->ds_lock); | |
470 | if ((DS_MODE_LEVEL(mode) == DS_MODE_PRIMARY && | |
471 | (ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) && | |
472 | !DS_MODE_IS_INCONSISTENT(mode)) || | |
473 | (ds->ds_open_refcount + weight > DS_REF_MAX)) { | |
474 | mutex_exit(&ds->ds_lock); | |
475 | dsl_dataset_close(ds, DS_MODE_NONE, tag); | |
476 | return (EBUSY); | |
477 | } | |
478 | ds->ds_open_refcount += weight; | |
479 | mutex_exit(&ds->ds_lock); | |
480 | ||
481 | *dsp = ds; | |
482 | return (0); | |
483 | } | |
484 | ||
485 | int | |
486 | dsl_dataset_open_spa(spa_t *spa, const char *name, int mode, | |
487 | void *tag, dsl_dataset_t **dsp) | |
488 | { | |
489 | dsl_dir_t *dd; | |
490 | dsl_pool_t *dp; | |
491 | const char *tail; | |
492 | uint64_t obj; | |
493 | dsl_dataset_t *ds = NULL; | |
494 | int err = 0; | |
495 | ||
496 | err = dsl_dir_open_spa(spa, name, FTAG, &dd, &tail); | |
497 | if (err) | |
498 | return (err); | |
499 | ||
500 | dp = dd->dd_pool; | |
501 | obj = dd->dd_phys->dd_head_dataset_obj; | |
502 | rw_enter(&dp->dp_config_rwlock, RW_READER); | |
503 | if (obj == 0) { | |
504 | /* A dataset with no associated objset */ | |
505 | err = ENOENT; | |
506 | goto out; | |
507 | } | |
508 | ||
509 | if (tail != NULL) { | |
510 | objset_t *mos = dp->dp_meta_objset; | |
511 | uint64_t flags; | |
512 | ||
513 | err = dsl_dataset_open_obj(dp, obj, NULL, | |
514 | DS_MODE_NONE, tag, &ds); | |
515 | if (err) | |
516 | goto out; | |
517 | flags = ds->ds_phys->ds_flags; | |
518 | obj = ds->ds_phys->ds_snapnames_zapobj; | |
519 | dsl_dataset_close(ds, DS_MODE_NONE, tag); | |
520 | ds = NULL; | |
521 | ||
522 | if (tail[0] != '@') { | |
523 | err = ENOENT; | |
524 | goto out; | |
525 | } | |
526 | tail++; | |
527 | ||
528 | /* Look for a snapshot */ | |
529 | if (!DS_MODE_IS_READONLY(mode)) { | |
530 | err = EROFS; | |
531 | goto out; | |
532 | } | |
533 | dprintf("looking for snapshot '%s'\n", tail); | |
534 | err = dsl_dataset_snap_lookup(mos, flags, obj, tail, &obj); | |
535 | if (err) | |
536 | goto out; | |
537 | } | |
538 | err = dsl_dataset_open_obj(dp, obj, tail, mode, tag, &ds); | |
539 | ||
540 | out: | |
541 | rw_exit(&dp->dp_config_rwlock); | |
542 | dsl_dir_close(dd, FTAG); | |
543 | ||
544 | ASSERT3U((err == 0), ==, (ds != NULL)); | |
545 | /* ASSERT(ds == NULL || strcmp(name, ds->ds_name) == 0); */ | |
546 | ||
547 | *dsp = ds; | |
548 | return (err); | |
549 | } | |
550 | ||
551 | int | |
552 | dsl_dataset_open(const char *name, int mode, void *tag, dsl_dataset_t **dsp) | |
553 | { | |
554 | return (dsl_dataset_open_spa(NULL, name, mode, tag, dsp)); | |
555 | } | |
556 | ||
557 | void | |
558 | dsl_dataset_name(dsl_dataset_t *ds, char *name) | |
559 | { | |
560 | if (ds == NULL) { | |
561 | (void) strcpy(name, "mos"); | |
562 | } else { | |
563 | dsl_dir_name(ds->ds_dir, name); | |
564 | VERIFY(0 == dsl_dataset_get_snapname(ds)); | |
565 | if (ds->ds_snapname[0]) { | |
566 | (void) strcat(name, "@"); | |
567 | if (!MUTEX_HELD(&ds->ds_lock)) { | |
568 | /* | |
569 | * We use a "recursive" mutex so that we | |
570 | * can call dprintf_ds() with ds_lock held. | |
571 | */ | |
572 | mutex_enter(&ds->ds_lock); | |
573 | (void) strcat(name, ds->ds_snapname); | |
574 | mutex_exit(&ds->ds_lock); | |
575 | } else { | |
576 | (void) strcat(name, ds->ds_snapname); | |
577 | } | |
578 | } | |
579 | } | |
580 | } | |
581 | ||
582 | static int | |
583 | dsl_dataset_namelen(dsl_dataset_t *ds) | |
584 | { | |
585 | int result; | |
586 | ||
587 | if (ds == NULL) { | |
588 | result = 3; /* "mos" */ | |
589 | } else { | |
590 | result = dsl_dir_namelen(ds->ds_dir); | |
591 | VERIFY(0 == dsl_dataset_get_snapname(ds)); | |
592 | if (ds->ds_snapname[0]) { | |
593 | ++result; /* adding one for the @-sign */ | |
594 | if (!MUTEX_HELD(&ds->ds_lock)) { | |
595 | /* see dsl_datset_name */ | |
596 | mutex_enter(&ds->ds_lock); | |
597 | result += strlen(ds->ds_snapname); | |
598 | mutex_exit(&ds->ds_lock); | |
599 | } else { | |
600 | result += strlen(ds->ds_snapname); | |
601 | } | |
602 | } | |
603 | } | |
604 | ||
605 | return (result); | |
606 | } | |
607 | ||
608 | void | |
609 | dsl_dataset_close(dsl_dataset_t *ds, int mode, void *tag) | |
610 | { | |
611 | uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)]; | |
612 | mutex_enter(&ds->ds_lock); | |
613 | ASSERT3U(ds->ds_open_refcount, >=, weight); | |
614 | ds->ds_open_refcount -= weight; | |
615 | mutex_exit(&ds->ds_lock); | |
616 | ||
617 | dmu_buf_rele(ds->ds_dbuf, tag); | |
618 | } | |
619 | ||
620 | void | |
621 | dsl_dataset_downgrade(dsl_dataset_t *ds, int oldmode, int newmode) | |
622 | { | |
623 | uint64_t oldweight = ds_refcnt_weight[DS_MODE_LEVEL(oldmode)]; | |
624 | uint64_t newweight = ds_refcnt_weight[DS_MODE_LEVEL(newmode)]; | |
625 | mutex_enter(&ds->ds_lock); | |
626 | ASSERT3U(ds->ds_open_refcount, >=, oldweight); | |
627 | ASSERT3U(oldweight, >=, newweight); | |
628 | ds->ds_open_refcount -= oldweight; | |
629 | ds->ds_open_refcount += newweight; | |
630 | mutex_exit(&ds->ds_lock); | |
631 | } | |
632 | ||
633 | boolean_t | |
634 | dsl_dataset_tryupgrade(dsl_dataset_t *ds, int oldmode, int newmode) | |
635 | { | |
636 | boolean_t rv; | |
637 | uint64_t oldweight = ds_refcnt_weight[DS_MODE_LEVEL(oldmode)]; | |
638 | uint64_t newweight = ds_refcnt_weight[DS_MODE_LEVEL(newmode)]; | |
639 | mutex_enter(&ds->ds_lock); | |
640 | ASSERT3U(ds->ds_open_refcount, >=, oldweight); | |
641 | ASSERT3U(newweight, >=, oldweight); | |
642 | if (ds->ds_open_refcount - oldweight + newweight > DS_REF_MAX) { | |
643 | rv = B_FALSE; | |
644 | } else { | |
645 | ds->ds_open_refcount -= oldweight; | |
646 | ds->ds_open_refcount += newweight; | |
647 | rv = B_TRUE; | |
648 | } | |
649 | mutex_exit(&ds->ds_lock); | |
650 | return (rv); | |
651 | } | |
652 | ||
653 | void | |
654 | dsl_dataset_create_root(dsl_pool_t *dp, uint64_t *ddobjp, dmu_tx_t *tx) | |
655 | { | |
656 | objset_t *mos = dp->dp_meta_objset; | |
657 | dmu_buf_t *dbuf; | |
658 | dsl_dataset_phys_t *dsphys; | |
659 | dsl_dataset_t *ds; | |
660 | uint64_t dsobj; | |
661 | dsl_dir_t *dd; | |
662 | ||
663 | dsl_dir_create_root(mos, ddobjp, tx); | |
664 | VERIFY(0 == dsl_dir_open_obj(dp, *ddobjp, NULL, FTAG, &dd)); | |
665 | ||
666 | dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, | |
667 | DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); | |
668 | VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); | |
669 | dmu_buf_will_dirty(dbuf, tx); | |
670 | dsphys = dbuf->db_data; | |
671 | dsphys->ds_dir_obj = dd->dd_object; | |
672 | dsphys->ds_fsid_guid = unique_create(); | |
673 | (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, | |
674 | sizeof (dsphys->ds_guid)); | |
675 | dsphys->ds_snapnames_zapobj = | |
676 | zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP, | |
677 | DMU_OT_NONE, 0, tx); | |
678 | dsphys->ds_creation_time = gethrestime_sec(); | |
679 | dsphys->ds_creation_txg = tx->tx_txg; | |
680 | dsphys->ds_deadlist_obj = | |
681 | bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); | |
682 | if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) | |
683 | dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; | |
684 | dmu_buf_rele(dbuf, FTAG); | |
685 | ||
686 | dmu_buf_will_dirty(dd->dd_dbuf, tx); | |
687 | dd->dd_phys->dd_head_dataset_obj = dsobj; | |
688 | dsl_dir_close(dd, FTAG); | |
689 | ||
690 | VERIFY(0 == | |
691 | dsl_dataset_open_obj(dp, dsobj, NULL, DS_MODE_NONE, FTAG, &ds)); | |
692 | (void) dmu_objset_create_impl(dp->dp_spa, ds, | |
693 | &ds->ds_phys->ds_bp, DMU_OST_ZFS, tx); | |
694 | dsl_dataset_close(ds, DS_MODE_NONE, FTAG); | |
695 | } | |
696 | ||
697 | uint64_t | |
698 | dsl_dataset_create_sync_impl(dsl_dir_t *dd, dsl_dataset_t *origin, | |
699 | uint64_t flags, dmu_tx_t *tx) | |
700 | { | |
701 | dsl_pool_t *dp = dd->dd_pool; | |
702 | dmu_buf_t *dbuf; | |
703 | dsl_dataset_phys_t *dsphys; | |
704 | uint64_t dsobj; | |
705 | objset_t *mos = dp->dp_meta_objset; | |
706 | ||
707 | ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp); | |
708 | ASSERT(origin == NULL || origin->ds_phys->ds_num_children > 0); | |
709 | ASSERT(dmu_tx_is_syncing(tx)); | |
710 | ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); | |
711 | ||
712 | dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, | |
713 | DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); | |
714 | VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); | |
715 | dmu_buf_will_dirty(dbuf, tx); | |
716 | dsphys = dbuf->db_data; | |
717 | dsphys->ds_dir_obj = dd->dd_object; | |
718 | dsphys->ds_flags = flags; | |
719 | dsphys->ds_fsid_guid = unique_create(); | |
720 | (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, | |
721 | sizeof (dsphys->ds_guid)); | |
722 | dsphys->ds_snapnames_zapobj = | |
723 | zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP, | |
724 | DMU_OT_NONE, 0, tx); | |
725 | dsphys->ds_creation_time = gethrestime_sec(); | |
726 | dsphys->ds_creation_txg = tx->tx_txg; | |
727 | dsphys->ds_deadlist_obj = | |
728 | bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); | |
729 | ||
730 | if (origin) { | |
731 | dsphys->ds_prev_snap_obj = origin->ds_object; | |
732 | dsphys->ds_prev_snap_txg = | |
733 | origin->ds_phys->ds_creation_txg; | |
734 | dsphys->ds_used_bytes = | |
735 | origin->ds_phys->ds_used_bytes; | |
736 | dsphys->ds_compressed_bytes = | |
737 | origin->ds_phys->ds_compressed_bytes; | |
738 | dsphys->ds_uncompressed_bytes = | |
739 | origin->ds_phys->ds_uncompressed_bytes; | |
740 | dsphys->ds_bp = origin->ds_phys->ds_bp; | |
741 | dsphys->ds_flags |= origin->ds_phys->ds_flags; | |
742 | ||
743 | dmu_buf_will_dirty(origin->ds_dbuf, tx); | |
744 | origin->ds_phys->ds_num_children++; | |
745 | ||
746 | dmu_buf_will_dirty(dd->dd_dbuf, tx); | |
747 | dd->dd_phys->dd_origin_obj = origin->ds_object; | |
748 | } | |
749 | ||
750 | if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) | |
751 | dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; | |
752 | ||
753 | dmu_buf_rele(dbuf, FTAG); | |
754 | ||
755 | dmu_buf_will_dirty(dd->dd_dbuf, tx); | |
756 | dd->dd_phys->dd_head_dataset_obj = dsobj; | |
757 | ||
758 | return (dsobj); | |
759 | } | |
760 | ||
761 | uint64_t | |
762 | dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, | |
763 | dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx) | |
764 | { | |
765 | dsl_pool_t *dp = pdd->dd_pool; | |
766 | uint64_t dsobj, ddobj; | |
767 | dsl_dir_t *dd; | |
768 | ||
769 | ASSERT(lastname[0] != '@'); | |
770 | ||
771 | ddobj = dsl_dir_create_sync(pdd, lastname, tx); | |
772 | VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd)); | |
773 | ||
774 | dsobj = dsl_dataset_create_sync_impl(dd, origin, flags, tx); | |
775 | ||
776 | dsl_deleg_set_create_perms(dd, tx, cr); | |
777 | ||
778 | dsl_dir_close(dd, FTAG); | |
779 | ||
780 | return (dsobj); | |
781 | } | |
782 | ||
783 | struct destroyarg { | |
784 | dsl_sync_task_group_t *dstg; | |
785 | char *snapname; | |
786 | char *failed; | |
787 | }; | |
788 | ||
789 | static int | |
790 | dsl_snapshot_destroy_one(char *name, void *arg) | |
791 | { | |
792 | struct destroyarg *da = arg; | |
793 | dsl_dataset_t *ds; | |
794 | char *cp; | |
795 | int err; | |
796 | ||
797 | (void) strcat(name, "@"); | |
798 | (void) strcat(name, da->snapname); | |
799 | err = dsl_dataset_open(name, | |
800 | DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, | |
801 | da->dstg, &ds); | |
802 | cp = strchr(name, '@'); | |
803 | *cp = '\0'; | |
804 | if (err == ENOENT) | |
805 | return (0); | |
806 | if (err) { | |
807 | (void) strcpy(da->failed, name); | |
808 | return (err); | |
809 | } | |
810 | ||
811 | dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check, | |
812 | dsl_dataset_destroy_sync, ds, da->dstg, 0); | |
813 | return (0); | |
814 | } | |
815 | ||
816 | /* | |
817 | * Destroy 'snapname' in all descendants of 'fsname'. | |
818 | */ | |
819 | #pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy | |
820 | int | |
821 | dsl_snapshots_destroy(char *fsname, char *snapname) | |
822 | { | |
823 | int err; | |
824 | struct destroyarg da; | |
825 | dsl_sync_task_t *dst; | |
826 | spa_t *spa; | |
827 | ||
828 | err = spa_open(fsname, &spa, FTAG); | |
829 | if (err) | |
830 | return (err); | |
831 | da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); | |
832 | da.snapname = snapname; | |
833 | da.failed = fsname; | |
834 | ||
835 | err = dmu_objset_find(fsname, | |
836 | dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN); | |
837 | ||
838 | if (err == 0) | |
839 | err = dsl_sync_task_group_wait(da.dstg); | |
840 | ||
841 | for (dst = list_head(&da.dstg->dstg_tasks); dst; | |
842 | dst = list_next(&da.dstg->dstg_tasks, dst)) { | |
843 | dsl_dataset_t *ds = dst->dst_arg1; | |
844 | if (dst->dst_err) { | |
845 | dsl_dataset_name(ds, fsname); | |
846 | *strchr(fsname, '@') = '\0'; | |
847 | } | |
848 | /* | |
849 | * If it was successful, destroy_sync would have | |
850 | * closed the ds | |
851 | */ | |
852 | if (err) | |
853 | dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, da.dstg); | |
854 | } | |
855 | ||
856 | dsl_sync_task_group_destroy(da.dstg); | |
857 | spa_close(spa, FTAG); | |
858 | return (err); | |
859 | } | |
860 | ||
861 | /* | |
862 | * ds must be opened EXCLUSIVE or PRIMARY. on return (whether | |
863 | * successful or not), ds will be closed and caller can no longer | |
864 | * dereference it. | |
865 | */ | |
866 | int | |
867 | dsl_dataset_destroy(dsl_dataset_t *ds, void *tag) | |
868 | { | |
869 | int err; | |
870 | dsl_sync_task_group_t *dstg; | |
871 | objset_t *os; | |
872 | dsl_dir_t *dd; | |
873 | uint64_t obj; | |
874 | ||
875 | if (ds->ds_open_refcount != DS_REF_MAX) { | |
876 | if (dsl_dataset_tryupgrade(ds, DS_MODE_PRIMARY, | |
877 | DS_MODE_EXCLUSIVE) == 0) { | |
878 | dsl_dataset_close(ds, DS_MODE_PRIMARY, tag); | |
879 | return (EBUSY); | |
880 | } | |
881 | } | |
882 | ||
883 | if (dsl_dataset_is_snapshot(ds)) { | |
884 | /* Destroying a snapshot is simpler */ | |
885 | err = dsl_sync_task_do(ds->ds_dir->dd_pool, | |
886 | dsl_dataset_destroy_check, dsl_dataset_destroy_sync, | |
887 | ds, tag, 0); | |
888 | goto out; | |
889 | } | |
890 | ||
891 | dd = ds->ds_dir; | |
892 | ||
893 | /* | |
894 | * Check for errors and mark this ds as inconsistent, in | |
895 | * case we crash while freeing the objects. | |
896 | */ | |
897 | err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check, | |
898 | dsl_dataset_destroy_begin_sync, ds, NULL, 0); | |
899 | if (err) | |
900 | goto out; | |
901 | ||
902 | err = dmu_objset_open_ds(ds, DMU_OST_ANY, &os); | |
903 | if (err) | |
904 | goto out; | |
905 | ||
906 | /* | |
907 | * remove the objects in open context, so that we won't | |
908 | * have too much to do in syncing context. | |
909 | */ | |
910 | for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, | |
911 | ds->ds_phys->ds_prev_snap_txg)) { | |
912 | dmu_tx_t *tx = dmu_tx_create(os); | |
913 | dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END); | |
914 | dmu_tx_hold_bonus(tx, obj); | |
915 | err = dmu_tx_assign(tx, TXG_WAIT); | |
916 | if (err) { | |
917 | /* | |
918 | * Perhaps there is not enough disk | |
919 | * space. Just deal with it from | |
920 | * dsl_dataset_destroy_sync(). | |
921 | */ | |
922 | dmu_tx_abort(tx); | |
923 | continue; | |
924 | } | |
925 | VERIFY(0 == dmu_object_free(os, obj, tx)); | |
926 | dmu_tx_commit(tx); | |
927 | } | |
928 | /* Make sure it's not dirty before we finish destroying it. */ | |
929 | txg_wait_synced(dd->dd_pool, 0); | |
930 | ||
931 | dmu_objset_close(os); | |
932 | if (err != ESRCH) | |
933 | goto out; | |
934 | ||
935 | if (ds->ds_user_ptr) { | |
936 | ds->ds_user_evict_func(ds, ds->ds_user_ptr); | |
937 | ds->ds_user_ptr = NULL; | |
938 | } | |
939 | ||
940 | rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); | |
941 | err = dsl_dir_open_obj(dd->dd_pool, dd->dd_object, NULL, FTAG, &dd); | |
942 | rw_exit(&dd->dd_pool->dp_config_rwlock); | |
943 | ||
944 | if (err) | |
945 | goto out; | |
946 | ||
947 | /* | |
948 | * Blow away the dsl_dir + head dataset. | |
949 | */ | |
950 | dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool); | |
951 | dsl_sync_task_create(dstg, dsl_dataset_destroy_check, | |
952 | dsl_dataset_destroy_sync, ds, tag, 0); | |
953 | dsl_sync_task_create(dstg, dsl_dir_destroy_check, | |
954 | dsl_dir_destroy_sync, dd, FTAG, 0); | |
955 | err = dsl_sync_task_group_wait(dstg); | |
956 | dsl_sync_task_group_destroy(dstg); | |
957 | /* if it is successful, *destroy_sync will close the ds+dd */ | |
958 | if (err) | |
959 | dsl_dir_close(dd, FTAG); | |
960 | out: | |
961 | if (err) | |
962 | dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, tag); | |
963 | return (err); | |
964 | } | |
965 | ||
966 | int | |
967 | dsl_dataset_rollback(dsl_dataset_t *ds, dmu_objset_type_t ost) | |
968 | { | |
969 | ASSERT3U(ds->ds_open_refcount, ==, DS_REF_MAX); | |
970 | ||
971 | return (dsl_sync_task_do(ds->ds_dir->dd_pool, | |
972 | dsl_dataset_rollback_check, dsl_dataset_rollback_sync, | |
973 | ds, &ost, 0)); | |
974 | } | |
975 | ||
976 | void * | |
977 | dsl_dataset_set_user_ptr(dsl_dataset_t *ds, | |
978 | void *p, dsl_dataset_evict_func_t func) | |
979 | { | |
980 | void *old; | |
981 | ||
982 | mutex_enter(&ds->ds_lock); | |
983 | old = ds->ds_user_ptr; | |
984 | if (old == NULL) { | |
985 | ds->ds_user_ptr = p; | |
986 | ds->ds_user_evict_func = func; | |
987 | } | |
988 | mutex_exit(&ds->ds_lock); | |
989 | return (old); | |
990 | } | |
991 | ||
992 | void * | |
993 | dsl_dataset_get_user_ptr(dsl_dataset_t *ds) | |
994 | { | |
995 | return (ds->ds_user_ptr); | |
996 | } | |
997 | ||
998 | ||
999 | blkptr_t * | |
1000 | dsl_dataset_get_blkptr(dsl_dataset_t *ds) | |
1001 | { | |
1002 | return (&ds->ds_phys->ds_bp); | |
1003 | } | |
1004 | ||
1005 | void | |
1006 | dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) | |
1007 | { | |
1008 | ASSERT(dmu_tx_is_syncing(tx)); | |
1009 | /* If it's the meta-objset, set dp_meta_rootbp */ | |
1010 | if (ds == NULL) { | |
1011 | tx->tx_pool->dp_meta_rootbp = *bp; | |
1012 | } else { | |
1013 | dmu_buf_will_dirty(ds->ds_dbuf, tx); | |
1014 | ds->ds_phys->ds_bp = *bp; | |
1015 | } | |
1016 | } | |
1017 | ||
1018 | spa_t * | |
1019 | dsl_dataset_get_spa(dsl_dataset_t *ds) | |
1020 | { | |
1021 | return (ds->ds_dir->dd_pool->dp_spa); | |
1022 | } | |
1023 | ||
1024 | void | |
1025 | dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) | |
1026 | { | |
1027 | dsl_pool_t *dp; | |
1028 | ||
1029 | if (ds == NULL) /* this is the meta-objset */ | |
1030 | return; | |
1031 | ||
1032 | ASSERT(ds->ds_user_ptr != NULL); | |
1033 | ||
1034 | if (ds->ds_phys->ds_next_snap_obj != 0) | |
1035 | panic("dirtying snapshot!"); | |
1036 | ||
1037 | dp = ds->ds_dir->dd_pool; | |
1038 | ||
1039 | if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) { | |
1040 | /* up the hold count until we can be written out */ | |
1041 | dmu_buf_add_ref(ds->ds_dbuf, ds); | |
1042 | } | |
1043 | } | |
1044 | ||
1045 | /* | |
1046 | * The unique space in the head dataset can be calculated by subtracting | |
1047 | * the space used in the most recent snapshot, that is still being used | |
1048 | * in this file system, from the space currently in use. To figure out | |
1049 | * the space in the most recent snapshot still in use, we need to take | |
1050 | * the total space used in the snapshot and subtract out the space that | |
1051 | * has been freed up since the snapshot was taken. | |
1052 | */ | |
1053 | static void | |
1054 | dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds) | |
1055 | { | |
1056 | uint64_t mrs_used; | |
1057 | uint64_t dlused, dlcomp, dluncomp; | |
1058 | ||
1059 | ASSERT(ds->ds_object == ds->ds_dir->dd_phys->dd_head_dataset_obj); | |
1060 | ||
1061 | if (ds->ds_phys->ds_prev_snap_obj != 0) | |
1062 | mrs_used = ds->ds_prev->ds_phys->ds_used_bytes; | |
1063 | else | |
1064 | mrs_used = 0; | |
1065 | ||
1066 | VERIFY(0 == bplist_space(&ds->ds_deadlist, &dlused, &dlcomp, | |
1067 | &dluncomp)); | |
1068 | ||
1069 | ASSERT3U(dlused, <=, mrs_used); | |
1070 | ds->ds_phys->ds_unique_bytes = | |
1071 | ds->ds_phys->ds_used_bytes - (mrs_used - dlused); | |
1072 | ||
1073 | if (!DS_UNIQUE_IS_ACCURATE(ds) && | |
1074 | spa_version(ds->ds_dir->dd_pool->dp_spa) >= | |
1075 | SPA_VERSION_UNIQUE_ACCURATE) | |
1076 | ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; | |
1077 | } | |
1078 | ||
1079 | static uint64_t | |
1080 | dsl_dataset_unique(dsl_dataset_t *ds) | |
1081 | { | |
1082 | if (!DS_UNIQUE_IS_ACCURATE(ds) && !dsl_dataset_is_snapshot(ds)) | |
1083 | dsl_dataset_recalc_head_uniq(ds); | |
1084 | ||
1085 | return (ds->ds_phys->ds_unique_bytes); | |
1086 | } | |
1087 | ||
1088 | struct killarg { | |
1089 | int64_t *usedp; | |
1090 | int64_t *compressedp; | |
1091 | int64_t *uncompressedp; | |
1092 | zio_t *zio; | |
1093 | dmu_tx_t *tx; | |
1094 | }; | |
1095 | ||
1096 | static int | |
1097 | kill_blkptr(traverse_blk_cache_t *bc, spa_t *spa, void *arg) | |
1098 | { | |
1099 | struct killarg *ka = arg; | |
1100 | blkptr_t *bp = &bc->bc_blkptr; | |
1101 | ||
1102 | ASSERT3U(bc->bc_errno, ==, 0); | |
1103 | ||
1104 | /* | |
1105 | * Since this callback is not called concurrently, no lock is | |
1106 | * needed on the accounting values. | |
1107 | */ | |
1108 | *ka->usedp += bp_get_dasize(spa, bp); | |
1109 | *ka->compressedp += BP_GET_PSIZE(bp); | |
1110 | *ka->uncompressedp += BP_GET_UCSIZE(bp); | |
1111 | /* XXX check for EIO? */ | |
1112 | (void) arc_free(ka->zio, spa, ka->tx->tx_txg, bp, NULL, NULL, | |
1113 | ARC_NOWAIT); | |
1114 | return (0); | |
1115 | } | |
1116 | ||
1117 | /* ARGSUSED */ | |
1118 | static int | |
1119 | dsl_dataset_rollback_check(void *arg1, void *arg2, dmu_tx_t *tx) | |
1120 | { | |
1121 | dsl_dataset_t *ds = arg1; | |
1122 | dmu_objset_type_t *ost = arg2; | |
1123 | ||
1124 | /* | |
1125 | * We can only roll back to emptyness if it is a ZPL objset. | |
1126 | */ | |
1127 | if (*ost != DMU_OST_ZFS && ds->ds_phys->ds_prev_snap_txg == 0) | |
1128 | return (EINVAL); | |
1129 | ||
1130 | /* | |
1131 | * This must not be a snapshot. | |
1132 | */ | |
1133 | if (ds->ds_phys->ds_next_snap_obj != 0) | |
1134 | return (EINVAL); | |
1135 | ||
1136 | /* | |
1137 | * If we made changes this txg, traverse_dsl_dataset won't find | |
1138 | * them. Try again. | |
1139 | */ | |
1140 | if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) | |
1141 | return (EAGAIN); | |
1142 | ||
1143 | return (0); | |
1144 | } | |
1145 | ||
1146 | /* ARGSUSED */ | |
1147 | static void | |
1148 | dsl_dataset_rollback_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) | |
1149 | { | |
1150 | dsl_dataset_t *ds = arg1; | |
1151 | dmu_objset_type_t *ost = arg2; | |
1152 | objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; | |
1153 | ||
1154 | dmu_buf_will_dirty(ds->ds_dbuf, tx); | |
1155 | ||
1156 | /* | |
1157 | * Before the roll back destroy the zil. | |
1158 | */ | |
1159 | if (ds->ds_user_ptr != NULL) { | |
1160 | zil_rollback_destroy( | |
1161 | ((objset_impl_t *)ds->ds_user_ptr)->os_zil, tx); | |
1162 | ||
1163 | /* | |
1164 | * We need to make sure that the objset_impl_t is reopened after | |
1165 | * we do the rollback, otherwise it will have the wrong | |
1166 | * objset_phys_t. Normally this would happen when this | |
1167 | * DS_MODE_EXCLUSIVE dataset-open is closed, thus causing the | |
1168 | * dataset to be immediately evicted. But when doing "zfs recv | |
1169 | * -F", we reopen the objset before that, so that there is no | |
1170 | * window where the dataset is closed and inconsistent. | |
1171 | */ | |
1172 | ds->ds_user_evict_func(ds, ds->ds_user_ptr); | |
1173 | ds->ds_user_ptr = NULL; | |
1174 | } | |
1175 | ||
1176 | /* Zero out the deadlist. */ | |
1177 | bplist_close(&ds->ds_deadlist); | |
1178 | bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); | |
1179 | ds->ds_phys->ds_deadlist_obj = | |
1180 | bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); | |
1181 | VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, | |
1182 | ds->ds_phys->ds_deadlist_obj)); | |
1183 | ||
1184 | { | |
1185 | /* Free blkptrs that we gave birth to */ | |
1186 | zio_t *zio; | |
1187 | int64_t used = 0, compressed = 0, uncompressed = 0; | |
1188 | struct killarg ka; | |
1189 | int64_t delta; | |
1190 | ||
1191 | zio = zio_root(tx->tx_pool->dp_spa, NULL, NULL, | |
1192 | ZIO_FLAG_MUSTSUCCEED); | |
1193 | ka.usedp = &used; | |
1194 | ka.compressedp = &compressed; | |
1195 | ka.uncompressedp = &uncompressed; | |
1196 | ka.zio = zio; | |
1197 | ka.tx = tx; | |
1198 | (void) traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg, | |
1199 | ADVANCE_POST, kill_blkptr, &ka); | |
1200 | (void) zio_wait(zio); | |
1201 | ||
1202 | /* only deduct space beyond any refreservation */ | |
1203 | delta = parent_delta(ds, -used); | |
1204 | dsl_dir_diduse_space(ds->ds_dir, | |
1205 | delta, -compressed, -uncompressed, tx); | |
1206 | } | |
1207 | ||
1208 | if (ds->ds_prev) { | |
1209 | /* Change our contents to that of the prev snapshot */ | |
1210 | ASSERT3U(ds->ds_prev->ds_object, ==, | |
1211 | ds->ds_phys->ds_prev_snap_obj); | |
1212 | ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp; | |
1213 | ds->ds_phys->ds_used_bytes = | |
1214 | ds->ds_prev->ds_phys->ds_used_bytes; | |
1215 | ds->ds_phys->ds_compressed_bytes = | |
1216 | ds->ds_prev->ds_phys->ds_compressed_bytes; | |
1217 | ds->ds_phys->ds_uncompressed_bytes = | |
1218 | ds->ds_prev->ds_phys->ds_uncompressed_bytes; | |
1219 | ds->ds_phys->ds_flags = ds->ds_prev->ds_phys->ds_flags; | |
1220 | ds->ds_phys->ds_unique_bytes = 0; | |
1221 | ||
1222 | if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { | |
1223 | dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); | |
1224 | ds->ds_prev->ds_phys->ds_unique_bytes = 0; | |
1225 | } | |
1226 | } else { | |
1227 | /* Zero out our contents, recreate objset */ | |
1228 | bzero(&ds->ds_phys->ds_bp, sizeof (blkptr_t)); | |
1229 | ds->ds_phys->ds_used_bytes = 0; | |
1230 | ds->ds_phys->ds_compressed_bytes = 0; | |
1231 | ds->ds_phys->ds_uncompressed_bytes = 0; | |
1232 | ds->ds_phys->ds_flags = 0; | |
1233 | ds->ds_phys->ds_unique_bytes = 0; | |
1234 | (void) dmu_objset_create_impl(ds->ds_dir->dd_pool->dp_spa, ds, | |
1235 | &ds->ds_phys->ds_bp, *ost, tx); | |
1236 | } | |
1237 | ||
1238 | spa_history_internal_log(LOG_DS_ROLLBACK, ds->ds_dir->dd_pool->dp_spa, | |
1239 | tx, cr, "dataset = %llu", ds->ds_object); | |
1240 | } | |
1241 | ||
1242 | /* ARGSUSED */ | |
1243 | static int | |
1244 | dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx) | |
1245 | { | |
1246 | dsl_dataset_t *ds = arg1; | |
1247 | objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; | |
1248 | uint64_t count; | |
1249 | int err; | |
1250 | ||
1251 | /* | |
1252 | * Can't delete a head dataset if there are snapshots of it. | |
1253 | * (Except if the only snapshots are from the branch we cloned | |
1254 | * from.) | |
1255 | */ | |
1256 | if (ds->ds_prev != NULL && | |
1257 | ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) | |
1258 | return (EINVAL); | |
1259 | ||
1260 | /* | |
1261 | * This is really a dsl_dir thing, but check it here so that | |
1262 | * we'll be less likely to leave this dataset inconsistent & | |
1263 | * nearly destroyed. | |
1264 | */ | |
1265 | err = zap_count(mos, ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count); | |
1266 | if (err) | |
1267 | return (err); | |
1268 | if (count != 0) | |
1269 | return (EEXIST); | |
1270 | ||
1271 | return (0); | |
1272 | } | |
1273 | ||
1274 | /* ARGSUSED */ | |
1275 | static void | |
1276 | dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) | |
1277 | { | |
1278 | dsl_dataset_t *ds = arg1; | |
1279 | dsl_pool_t *dp = ds->ds_dir->dd_pool; | |
1280 | ||
1281 | /* Mark it as inconsistent on-disk, in case we crash */ | |
1282 | dmu_buf_will_dirty(ds->ds_dbuf, tx); | |
1283 | ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; | |
1284 | ||
1285 | spa_history_internal_log(LOG_DS_DESTROY_BEGIN, dp->dp_spa, tx, | |
1286 | cr, "dataset = %llu", ds->ds_object); | |
1287 | } | |
1288 | ||
1289 | /* ARGSUSED */ | |
1290 | int | |
1291 | dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) | |
1292 | { | |
1293 | dsl_dataset_t *ds = arg1; | |
1294 | ||
1295 | /* Can't delete a branch point. */ | |
1296 | if (ds->ds_phys->ds_num_children > 1) | |
1297 | return (EEXIST); | |
1298 | ||
1299 | /* | |
1300 | * Can't delete a head dataset if there are snapshots of it. | |
1301 | * (Except if the only snapshots are from the branch we cloned | |
1302 | * from.) | |
1303 | */ | |
1304 | if (ds->ds_prev != NULL && | |
1305 | ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) | |
1306 | return (EINVAL); | |
1307 | ||
1308 | /* | |
1309 | * If we made changes this txg, traverse_dsl_dataset won't find | |
1310 | * them. Try again. | |
1311 | */ | |
1312 | if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) | |
1313 | return (EAGAIN); | |
1314 | ||
1315 | /* XXX we should do some i/o error checking... */ | |
1316 | return (0); | |
1317 | } | |
1318 | ||
1319 | void | |
1320 | dsl_dataset_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx) | |
1321 | { | |
1322 | dsl_dataset_t *ds = arg1; | |
1323 | int64_t used = 0, compressed = 0, uncompressed = 0; | |
1324 | zio_t *zio; | |
1325 | int err; | |
1326 | int after_branch_point = FALSE; | |
1327 | dsl_pool_t *dp = ds->ds_dir->dd_pool; | |
1328 | objset_t *mos = dp->dp_meta_objset; | |
1329 | dsl_dataset_t *ds_prev = NULL; | |
1330 | uint64_t obj; | |
1331 | ||
1332 | ASSERT3U(ds->ds_open_refcount, ==, DS_REF_MAX); | |
1333 | ASSERT3U(ds->ds_phys->ds_num_children, <=, 1); | |
1334 | ASSERT(ds->ds_prev == NULL || | |
1335 | ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object); | |
1336 | ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg); | |
1337 | ||
1338 | /* Remove our reservation */ | |
1339 | if (ds->ds_reserved != 0) { | |
1340 | uint64_t val = 0; | |
1341 | dsl_dataset_set_reservation_sync(ds, &val, cr, tx); | |
1342 | ASSERT3U(ds->ds_reserved, ==, 0); | |
1343 | } | |
1344 | ||
1345 | ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); | |
1346 | ||
1347 | obj = ds->ds_object; | |
1348 | ||
1349 | if (ds->ds_phys->ds_prev_snap_obj != 0) { | |
1350 | if (ds->ds_prev) { | |
1351 | ds_prev = ds->ds_prev; | |
1352 | } else { | |
1353 | VERIFY(0 == dsl_dataset_open_obj(dp, | |
1354 | ds->ds_phys->ds_prev_snap_obj, NULL, | |
1355 | DS_MODE_NONE, FTAG, &ds_prev)); | |
1356 | } | |
1357 | after_branch_point = | |
1358 | (ds_prev->ds_phys->ds_next_snap_obj != obj); | |
1359 | ||
1360 | dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); | |
1361 | if (after_branch_point && | |
1362 | ds->ds_phys->ds_next_snap_obj == 0) { | |
1363 | /* This clone is toast. */ | |
1364 | ASSERT(ds_prev->ds_phys->ds_num_children > 1); | |
1365 | ds_prev->ds_phys->ds_num_children--; | |
1366 | } else if (!after_branch_point) { | |
1367 | ds_prev->ds_phys->ds_next_snap_obj = | |
1368 | ds->ds_phys->ds_next_snap_obj; | |
1369 | } | |
1370 | } | |
1371 | ||
1372 | zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); | |
1373 | ||
1374 | if (ds->ds_phys->ds_next_snap_obj != 0) { | |
1375 | blkptr_t bp; | |
1376 | dsl_dataset_t *ds_next; | |
1377 | uint64_t itor = 0; | |
1378 | uint64_t old_unique; | |
1379 | ||
1380 | spa_scrub_restart(dp->dp_spa, tx->tx_txg); | |
1381 | ||
1382 | VERIFY(0 == dsl_dataset_open_obj(dp, | |
1383 | ds->ds_phys->ds_next_snap_obj, NULL, | |
1384 | DS_MODE_NONE, FTAG, &ds_next)); | |
1385 | ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj); | |
1386 | ||
1387 | old_unique = dsl_dataset_unique(ds_next); | |
1388 | ||
1389 | dmu_buf_will_dirty(ds_next->ds_dbuf, tx); | |
1390 | ds_next->ds_phys->ds_prev_snap_obj = | |
1391 | ds->ds_phys->ds_prev_snap_obj; | |
1392 | ds_next->ds_phys->ds_prev_snap_txg = | |
1393 | ds->ds_phys->ds_prev_snap_txg; | |
1394 | ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, | |
1395 | ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0); | |
1396 | ||
1397 | /* | |
1398 | * Transfer to our deadlist (which will become next's | |
1399 | * new deadlist) any entries from next's current | |
1400 | * deadlist which were born before prev, and free the | |
1401 | * other entries. | |
1402 | * | |
1403 | * XXX we're doing this long task with the config lock held | |
1404 | */ | |
1405 | while (bplist_iterate(&ds_next->ds_deadlist, &itor, | |
1406 | &bp) == 0) { | |
1407 | if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) { | |
1408 | VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, | |
1409 | &bp, tx)); | |
1410 | if (ds_prev && !after_branch_point && | |
1411 | bp.blk_birth > | |
1412 | ds_prev->ds_phys->ds_prev_snap_txg) { | |
1413 | ds_prev->ds_phys->ds_unique_bytes += | |
1414 | bp_get_dasize(dp->dp_spa, &bp); | |
1415 | } | |
1416 | } else { | |
1417 | used += bp_get_dasize(dp->dp_spa, &bp); | |
1418 | compressed += BP_GET_PSIZE(&bp); | |
1419 | uncompressed += BP_GET_UCSIZE(&bp); | |
1420 | /* XXX check return value? */ | |
1421 | (void) arc_free(zio, dp->dp_spa, tx->tx_txg, | |
1422 | &bp, NULL, NULL, ARC_NOWAIT); | |
1423 | } | |
1424 | } | |
1425 | ||
1426 | /* free next's deadlist */ | |
1427 | bplist_close(&ds_next->ds_deadlist); | |
1428 | bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx); | |
1429 | ||
1430 | /* set next's deadlist to our deadlist */ | |
1431 | ds_next->ds_phys->ds_deadlist_obj = | |
1432 | ds->ds_phys->ds_deadlist_obj; | |
1433 | VERIFY(0 == bplist_open(&ds_next->ds_deadlist, mos, | |
1434 | ds_next->ds_phys->ds_deadlist_obj)); | |
1435 | ds->ds_phys->ds_deadlist_obj = 0; | |
1436 | ||
1437 | if (ds_next->ds_phys->ds_next_snap_obj != 0) { | |
1438 | /* | |
1439 | * Update next's unique to include blocks which | |
1440 | * were previously shared by only this snapshot | |
1441 | * and it. Those blocks will be born after the | |
1442 | * prev snap and before this snap, and will have | |
1443 | * died after the next snap and before the one | |
1444 | * after that (ie. be on the snap after next's | |
1445 | * deadlist). | |
1446 | * | |
1447 | * XXX we're doing this long task with the | |
1448 | * config lock held | |
1449 | */ | |
1450 | dsl_dataset_t *ds_after_next; | |
1451 | ||
1452 | VERIFY(0 == dsl_dataset_open_obj(dp, | |
1453 | ds_next->ds_phys->ds_next_snap_obj, NULL, | |
1454 | DS_MODE_NONE, FTAG, &ds_after_next)); | |
1455 | itor = 0; | |
1456 | while (bplist_iterate(&ds_after_next->ds_deadlist, | |
1457 | &itor, &bp) == 0) { | |
1458 | if (bp.blk_birth > | |
1459 | ds->ds_phys->ds_prev_snap_txg && | |
1460 | bp.blk_birth <= | |
1461 | ds->ds_phys->ds_creation_txg) { | |
1462 | ds_next->ds_phys->ds_unique_bytes += | |
1463 | bp_get_dasize(dp->dp_spa, &bp); | |
1464 | } | |
1465 | } | |
1466 | ||
1467 | dsl_dataset_close(ds_after_next, DS_MODE_NONE, FTAG); | |
1468 | ASSERT3P(ds_next->ds_prev, ==, NULL); | |
1469 | } else { | |
1470 | ASSERT3P(ds_next->ds_prev, ==, ds); | |
1471 | dsl_dataset_close(ds_next->ds_prev, DS_MODE_NONE, | |
1472 | ds_next); | |
1473 | if (ds_prev) { | |
1474 | VERIFY(0 == dsl_dataset_open_obj(dp, | |
1475 | ds->ds_phys->ds_prev_snap_obj, NULL, | |
1476 | DS_MODE_NONE, ds_next, &ds_next->ds_prev)); | |
1477 | } else { | |
1478 | ds_next->ds_prev = NULL; | |
1479 | } | |
1480 | ||
1481 | dsl_dataset_recalc_head_uniq(ds_next); | |
1482 | ||
1483 | /* | |
1484 | * Reduce the amount of our unconsmed refreservation | |
1485 | * being charged to our parent by the amount of | |
1486 | * new unique data we have gained. | |
1487 | */ | |
1488 | if (old_unique < ds_next->ds_reserved) { | |
1489 | int64_t mrsdelta; | |
1490 | uint64_t new_unique = | |
1491 | ds_next->ds_phys->ds_unique_bytes; | |
1492 | ||
1493 | ASSERT(old_unique <= new_unique); | |
1494 | mrsdelta = MIN(new_unique - old_unique, | |
1495 | ds_next->ds_reserved - old_unique); | |
1496 | dsl_dir_diduse_space(ds->ds_dir, -mrsdelta, | |
1497 | 0, 0, tx); | |
1498 | } | |
1499 | } | |
1500 | dsl_dataset_close(ds_next, DS_MODE_NONE, FTAG); | |
1501 | ||
1502 | /* | |
1503 | * NB: unique_bytes might not be accurate for the head objset. | |
1504 | * Before SPA_VERSION 9, we didn't update its value when we | |
1505 | * deleted the most recent snapshot. | |
1506 | */ | |
1507 | ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes); | |
1508 | } else { | |
1509 | /* | |
1510 | * There's no next snapshot, so this is a head dataset. | |
1511 | * Destroy the deadlist. Unless it's a clone, the | |
1512 | * deadlist should be empty. (If it's a clone, it's | |
1513 | * safe to ignore the deadlist contents.) | |
1514 | */ | |
1515 | struct killarg ka; | |
1516 | ||
1517 | ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist)); | |
1518 | bplist_close(&ds->ds_deadlist); | |
1519 | bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); | |
1520 | ds->ds_phys->ds_deadlist_obj = 0; | |
1521 | ||
1522 | /* | |
1523 | * Free everything that we point to (that's born after | |
1524 | * the previous snapshot, if we are a clone) | |
1525 | * | |
1526 | * XXX we're doing this long task with the config lock held | |
1527 | */ | |
1528 | ka.usedp = &used; | |
1529 | ka.compressedp = &compressed; | |
1530 | ka.uncompressedp = &uncompressed; | |
1531 | ka.zio = zio; | |
1532 | ka.tx = tx; | |
1533 | err = traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg, | |
1534 | ADVANCE_POST, kill_blkptr, &ka); | |
1535 | ASSERT3U(err, ==, 0); | |
1536 | ASSERT(spa_version(dp->dp_spa) < | |
1537 | SPA_VERSION_UNIQUE_ACCURATE || | |
1538 | used == ds->ds_phys->ds_unique_bytes); | |
1539 | } | |
1540 | ||
1541 | err = zio_wait(zio); | |
1542 | ASSERT3U(err, ==, 0); | |
1543 | ||
1544 | dsl_dir_diduse_space(ds->ds_dir, -used, -compressed, -uncompressed, tx); | |
1545 | ||
1546 | if (ds->ds_phys->ds_snapnames_zapobj) { | |
1547 | err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx); | |
1548 | ASSERT(err == 0); | |
1549 | } | |
1550 | ||
1551 | if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) { | |
1552 | /* Erase the link in the dataset */ | |
1553 | dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); | |
1554 | ds->ds_dir->dd_phys->dd_head_dataset_obj = 0; | |
1555 | /* | |
1556 | * dsl_dir_sync_destroy() called us, they'll destroy | |
1557 | * the dataset. | |
1558 | */ | |
1559 | } else { | |
1560 | /* remove from snapshot namespace */ | |
1561 | dsl_dataset_t *ds_head; | |
1562 | VERIFY(0 == dsl_dataset_open_obj(dp, | |
1563 | ds->ds_dir->dd_phys->dd_head_dataset_obj, NULL, | |
1564 | DS_MODE_NONE, FTAG, &ds_head)); | |
1565 | VERIFY(0 == dsl_dataset_get_snapname(ds)); | |
1566 | #ifdef ZFS_DEBUG | |
1567 | { | |
1568 | uint64_t val; | |
1569 | ||
1570 | err = dsl_dataset_snap_lookup(mos, | |
1571 | ds_head->ds_phys->ds_flags, | |
1572 | ds_head->ds_phys->ds_snapnames_zapobj, | |
1573 | ds->ds_snapname, &val); | |
1574 | ASSERT3U(err, ==, 0); | |
1575 | ASSERT3U(val, ==, obj); | |
1576 | } | |
1577 | #endif | |
1578 | err = dsl_dataset_snap_remove(mos, | |
1579 | ds_head->ds_phys->ds_flags, | |
1580 | ds_head->ds_phys->ds_snapnames_zapobj, | |
1581 | ds->ds_snapname, tx); | |
1582 | ASSERT(err == 0); | |
1583 | dsl_dataset_close(ds_head, DS_MODE_NONE, FTAG); | |
1584 | } | |
1585 | ||
1586 | if (ds_prev && ds->ds_prev != ds_prev) | |
1587 | dsl_dataset_close(ds_prev, DS_MODE_NONE, FTAG); | |
1588 | ||
1589 | spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx); | |
1590 | spa_history_internal_log(LOG_DS_DESTROY, dp->dp_spa, tx, | |
1591 | cr, "dataset = %llu", ds->ds_object); | |
1592 | ||
1593 | dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, tag); | |
1594 | VERIFY(0 == dmu_object_free(mos, obj, tx)); | |
1595 | ||
1596 | } | |
1597 | ||
1598 | static int | |
1599 | dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) | |
1600 | { | |
1601 | uint64_t asize; | |
1602 | ||
1603 | if (!dmu_tx_is_syncing(tx)) | |
1604 | return (0); | |
1605 | ||
1606 | /* | |
1607 | * If there's an fs-only reservation, any blocks that might become | |
1608 | * owned by the snapshot dataset must be accommodated by space | |
1609 | * outside of the reservation. | |
1610 | */ | |
1611 | asize = MIN(dsl_dataset_unique(ds), ds->ds_reserved); | |
1612 | if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, FALSE)) | |
1613 | return (ENOSPC); | |
1614 | ||
1615 | /* | |
1616 | * Propogate any reserved space for this snapshot to other | |
1617 | * snapshot checks in this sync group. | |
1618 | */ | |
1619 | if (asize > 0) | |
1620 | dsl_dir_willuse_space(ds->ds_dir, asize, tx); | |
1621 | ||
1622 | return (0); | |
1623 | } | |
1624 | ||
1625 | /* ARGSUSED */ | |
1626 | int | |
1627 | dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) | |
1628 | { | |
1629 | dsl_dataset_t *ds = arg1; | |
1630 | const char *snapname = arg2; | |
1631 | objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; | |
1632 | int err; | |
1633 | uint64_t value; | |
1634 | ||
1635 | /* | |
1636 | * We don't allow multiple snapshots of the same txg. If there | |
1637 | * is already one, try again. | |
1638 | */ | |
1639 | if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg) | |
1640 | return (EAGAIN); | |
1641 | ||
1642 | /* | |
1643 | * Check for conflicting name snapshot name. | |
1644 | */ | |
1645 | err = dsl_dataset_snap_lookup(mos, ds->ds_phys->ds_flags, | |
1646 | ds->ds_phys->ds_snapnames_zapobj, snapname, &value); | |
1647 | if (err == 0) | |
1648 | return (EEXIST); | |
1649 | if (err != ENOENT) | |
1650 | return (err); | |
1651 | ||
1652 | /* | |
1653 | * Check that the dataset's name is not too long. Name consists | |
1654 | * of the dataset's length + 1 for the @-sign + snapshot name's length | |
1655 | */ | |
1656 | if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN) | |
1657 | return (ENAMETOOLONG); | |
1658 | ||
1659 | err = dsl_dataset_snapshot_reserve_space(ds, tx); | |
1660 | if (err) | |
1661 | return (err); | |
1662 | ||
1663 | ds->ds_trysnap_txg = tx->tx_txg; | |
1664 | return (0); | |
1665 | } | |
1666 | ||
1667 | void | |
1668 | dsl_dataset_snapshot_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) | |
1669 | { | |
1670 | dsl_dataset_t *ds = arg1; | |
1671 | const char *snapname = arg2; | |
1672 | dsl_pool_t *dp = ds->ds_dir->dd_pool; | |
1673 | dmu_buf_t *dbuf; | |
1674 | dsl_dataset_phys_t *dsphys; | |
1675 | uint64_t dsobj; | |
1676 | objset_t *mos = dp->dp_meta_objset; | |
1677 | int err; | |
1678 | ||
1679 | spa_scrub_restart(dp->dp_spa, tx->tx_txg); | |
1680 | ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); | |
1681 | ||
1682 | dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, | |
1683 | DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); | |
1684 | VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); | |
1685 | dmu_buf_will_dirty(dbuf, tx); | |
1686 | dsphys = dbuf->db_data; | |
1687 | dsphys->ds_dir_obj = ds->ds_dir->dd_object; | |
1688 | dsphys->ds_fsid_guid = unique_create(); | |
1689 | (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, | |
1690 | sizeof (dsphys->ds_guid)); | |
1691 | dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj; | |
1692 | dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg; | |
1693 | dsphys->ds_next_snap_obj = ds->ds_object; | |
1694 | dsphys->ds_num_children = 1; | |
1695 | dsphys->ds_creation_time = gethrestime_sec(); | |
1696 | dsphys->ds_creation_txg = tx->tx_txg; | |
1697 | dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj; | |
1698 | dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes; | |
1699 | dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes; | |
1700 | dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes; | |
1701 | dsphys->ds_flags = ds->ds_phys->ds_flags; | |
1702 | dsphys->ds_bp = ds->ds_phys->ds_bp; | |
1703 | dmu_buf_rele(dbuf, FTAG); | |
1704 | ||
1705 | ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0); | |
1706 | if (ds->ds_prev) { | |
1707 | ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj == | |
1708 | ds->ds_object || | |
1709 | ds->ds_prev->ds_phys->ds_num_children > 1); | |
1710 | if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { | |
1711 | dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); | |
1712 | ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, | |
1713 | ds->ds_prev->ds_phys->ds_creation_txg); | |
1714 | ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj; | |
1715 | } | |
1716 | } | |
1717 | ||
1718 | /* | |
1719 | * If we have a reference-reservation on this dataset, we will | |
1720 | * need to increase the amount of refreservation being charged | |
1721 | * since our unique space is going to zero. | |
1722 | */ | |
1723 | if (ds->ds_reserved) { | |
1724 | int64_t add = MIN(dsl_dataset_unique(ds), ds->ds_reserved); | |
1725 | dsl_dir_diduse_space(ds->ds_dir, add, 0, 0, tx); | |
1726 | } | |
1727 | ||
1728 | bplist_close(&ds->ds_deadlist); | |
1729 | dmu_buf_will_dirty(ds->ds_dbuf, tx); | |
1730 | ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, tx->tx_txg); | |
1731 | ds->ds_phys->ds_prev_snap_obj = dsobj; | |
1732 | ds->ds_phys->ds_prev_snap_txg = tx->tx_txg; | |
1733 | ds->ds_phys->ds_unique_bytes = 0; | |
1734 | if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) | |
1735 | ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; | |
1736 | ds->ds_phys->ds_deadlist_obj = | |
1737 | bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); | |
1738 | VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, | |
1739 | ds->ds_phys->ds_deadlist_obj)); | |
1740 | ||
1741 | dprintf("snap '%s' -> obj %llu\n", snapname, dsobj); | |
1742 | err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, | |
1743 | snapname, 8, 1, &dsobj, tx); | |
1744 | ASSERT(err == 0); | |
1745 | ||
1746 | if (ds->ds_prev) | |
1747 | dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds); | |
1748 | VERIFY(0 == dsl_dataset_open_obj(dp, | |
1749 | ds->ds_phys->ds_prev_snap_obj, snapname, | |
1750 | DS_MODE_NONE, ds, &ds->ds_prev)); | |
1751 | ||
1752 | spa_history_internal_log(LOG_DS_SNAPSHOT, dp->dp_spa, tx, cr, | |
1753 | "dataset = %llu", dsobj); | |
1754 | } | |
1755 | ||
1756 | void | |
1757 | dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) | |
1758 | { | |
1759 | ASSERT(dmu_tx_is_syncing(tx)); | |
1760 | ASSERT(ds->ds_user_ptr != NULL); | |
1761 | ASSERT(ds->ds_phys->ds_next_snap_obj == 0); | |
1762 | ||
1763 | /* | |
1764 | * in case we had to change ds_fsid_guid when we opened it, | |
1765 | * sync it out now. | |
1766 | */ | |
1767 | dmu_buf_will_dirty(ds->ds_dbuf, tx); | |
1768 | ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid; | |
1769 | ||
1770 | dsl_dir_dirty(ds->ds_dir, tx); | |
1771 | dmu_objset_sync(ds->ds_user_ptr, zio, tx); | |
1772 | } | |
1773 | ||
1774 | void | |
1775 | dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) | |
1776 | { | |
1777 | uint64_t refd, avail, uobjs, aobjs; | |
1778 | ||
1779 | dsl_dir_stats(ds->ds_dir, nv); | |
1780 | ||
1781 | dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs); | |
1782 | dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail); | |
1783 | dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd); | |
1784 | ||
1785 | dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION, | |
1786 | ds->ds_phys->ds_creation_time); | |
1787 | dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG, | |
1788 | ds->ds_phys->ds_creation_txg); | |
1789 | dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA, | |
1790 | ds->ds_quota); | |
1791 | dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION, | |
1792 | ds->ds_reserved); | |
1793 | ||
1794 | if (ds->ds_phys->ds_next_snap_obj) { | |
1795 | /* | |
1796 | * This is a snapshot; override the dd's space used with | |
1797 | * our unique space and compression ratio. | |
1798 | */ | |
1799 | dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, | |
1800 | ds->ds_phys->ds_unique_bytes); | |
1801 | dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, | |
1802 | ds->ds_phys->ds_compressed_bytes == 0 ? 100 : | |
1803 | (ds->ds_phys->ds_uncompressed_bytes * 100 / | |
1804 | ds->ds_phys->ds_compressed_bytes)); | |
1805 | } | |
1806 | } | |
1807 | ||
1808 | void | |
1809 | dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) | |
1810 | { | |
1811 | stat->dds_creation_txg = ds->ds_phys->ds_creation_txg; | |
1812 | stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; | |
1813 | stat->dds_guid = ds->ds_phys->ds_guid; | |
1814 | if (ds->ds_phys->ds_next_snap_obj) { | |
1815 | stat->dds_is_snapshot = B_TRUE; | |
1816 | stat->dds_num_clones = ds->ds_phys->ds_num_children - 1; | |
1817 | } | |
1818 | ||
1819 | /* clone origin is really a dsl_dir thing... */ | |
1820 | rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); | |
1821 | if (ds->ds_dir->dd_phys->dd_origin_obj) { | |
1822 | dsl_dataset_t *ods; | |
1823 | ||
1824 | VERIFY(0 == dsl_dataset_open_obj(ds->ds_dir->dd_pool, | |
1825 | ds->ds_dir->dd_phys->dd_origin_obj, | |
1826 | NULL, DS_MODE_NONE, FTAG, &ods)); | |
1827 | dsl_dataset_name(ods, stat->dds_origin); | |
1828 | dsl_dataset_close(ods, DS_MODE_NONE, FTAG); | |
1829 | } | |
1830 | rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); | |
1831 | } | |
1832 | ||
1833 | uint64_t | |
1834 | dsl_dataset_fsid_guid(dsl_dataset_t *ds) | |
1835 | { | |
1836 | return (ds->ds_fsid_guid); | |
1837 | } | |
1838 | ||
1839 | void | |
1840 | dsl_dataset_space(dsl_dataset_t *ds, | |
1841 | uint64_t *refdbytesp, uint64_t *availbytesp, | |
1842 | uint64_t *usedobjsp, uint64_t *availobjsp) | |
1843 | { | |
1844 | *refdbytesp = ds->ds_phys->ds_used_bytes; | |
1845 | *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE); | |
1846 | if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) | |
1847 | *availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes; | |
1848 | if (ds->ds_quota != 0) { | |
1849 | /* | |
1850 | * Adjust available bytes according to refquota | |
1851 | */ | |
1852 | if (*refdbytesp < ds->ds_quota) | |
1853 | *availbytesp = MIN(*availbytesp, | |
1854 | ds->ds_quota - *refdbytesp); | |
1855 | else | |
1856 | *availbytesp = 0; | |
1857 | } | |
1858 | *usedobjsp = ds->ds_phys->ds_bp.blk_fill; | |
1859 | *availobjsp = DN_MAX_OBJECT - *usedobjsp; | |
1860 | } | |
1861 | ||
1862 | boolean_t | |
1863 | dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds) | |
1864 | { | |
1865 | dsl_pool_t *dp = ds->ds_dir->dd_pool; | |
1866 | ||
1867 | ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || | |
1868 | dsl_pool_sync_context(dp)); | |
1869 | if (ds->ds_prev == NULL) | |
1870 | return (B_FALSE); | |
1871 | if (ds->ds_phys->ds_bp.blk_birth > | |
1872 | ds->ds_prev->ds_phys->ds_creation_txg) | |
1873 | return (B_TRUE); | |
1874 | return (B_FALSE); | |
1875 | } | |
1876 | ||
1877 | /* ARGSUSED */ | |
1878 | static int | |
1879 | dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx) | |
1880 | { | |
1881 | dsl_dataset_t *ds = arg1; | |
1882 | char *newsnapname = arg2; | |
1883 | dsl_dir_t *dd = ds->ds_dir; | |
1884 | objset_t *mos = dd->dd_pool->dp_meta_objset; | |
1885 | dsl_dataset_t *hds; | |
1886 | uint64_t val; | |
1887 | int err; | |
1888 | ||
1889 | err = dsl_dataset_open_obj(dd->dd_pool, | |
1890 | dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds); | |
1891 | if (err) | |
1892 | return (err); | |
1893 | ||
1894 | /* new name better not be in use */ | |
1895 | err = dsl_dataset_snap_lookup(mos, hds->ds_phys->ds_flags, | |
1896 | hds->ds_phys->ds_snapnames_zapobj, newsnapname, &val); | |
1897 | dsl_dataset_close(hds, DS_MODE_NONE, FTAG); | |
1898 | ||
1899 | if (err == 0) | |
1900 | err = EEXIST; | |
1901 | else if (err == ENOENT) | |
1902 | err = 0; | |
1903 | ||
1904 | /* dataset name + 1 for the "@" + the new snapshot name must fit */ | |
1905 | if (dsl_dir_namelen(ds->ds_dir) + 1 + strlen(newsnapname) >= MAXNAMELEN) | |
1906 | err = ENAMETOOLONG; | |
1907 | ||
1908 | return (err); | |
1909 | } | |
1910 | ||
1911 | static void | |
1912 | dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, | |
1913 | cred_t *cr, dmu_tx_t *tx) | |
1914 | { | |
1915 | dsl_dataset_t *ds = arg1; | |
1916 | const char *newsnapname = arg2; | |
1917 | dsl_dir_t *dd = ds->ds_dir; | |
1918 | objset_t *mos = dd->dd_pool->dp_meta_objset; | |
1919 | dsl_dataset_t *hds; | |
1920 | int err; | |
1921 | ||
1922 | ASSERT(ds->ds_phys->ds_next_snap_obj != 0); | |
1923 | ||
1924 | VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, | |
1925 | dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds)); | |
1926 | ||
1927 | VERIFY(0 == dsl_dataset_get_snapname(ds)); | |
1928 | err = dsl_dataset_snap_remove(mos, hds->ds_phys->ds_flags, | |
1929 | hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, tx); | |
1930 | ASSERT3U(err, ==, 0); | |
1931 | mutex_enter(&ds->ds_lock); | |
1932 | (void) strcpy(ds->ds_snapname, newsnapname); | |
1933 | mutex_exit(&ds->ds_lock); | |
1934 | err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj, | |
1935 | ds->ds_snapname, 8, 1, &ds->ds_object, tx); | |
1936 | ASSERT3U(err, ==, 0); | |
1937 | ||
1938 | spa_history_internal_log(LOG_DS_RENAME, dd->dd_pool->dp_spa, tx, | |
1939 | cr, "dataset = %llu", ds->ds_object); | |
1940 | dsl_dataset_close(hds, DS_MODE_NONE, FTAG); | |
1941 | } | |
1942 | ||
1943 | struct renamesnaparg { | |
1944 | dsl_sync_task_group_t *dstg; | |
1945 | char failed[MAXPATHLEN]; | |
1946 | char *oldsnap; | |
1947 | char *newsnap; | |
1948 | }; | |
1949 | ||
1950 | static int | |
1951 | dsl_snapshot_rename_one(char *name, void *arg) | |
1952 | { | |
1953 | struct renamesnaparg *ra = arg; | |
1954 | dsl_dataset_t *ds = NULL; | |
1955 | char *cp; | |
1956 | int err; | |
1957 | ||
1958 | cp = name + strlen(name); | |
1959 | *cp = '@'; | |
1960 | (void) strcpy(cp + 1, ra->oldsnap); | |
1961 | ||
1962 | /* | |
1963 | * For recursive snapshot renames the parent won't be changing | |
1964 | * so we just pass name for both the to/from argument. | |
1965 | */ | |
1966 | if (err = zfs_secpolicy_rename_perms(name, name, CRED())) { | |
1967 | (void) strcpy(ra->failed, name); | |
1968 | return (err); | |
1969 | } | |
1970 | ||
1971 | err = dsl_dataset_open(name, DS_MODE_READONLY | DS_MODE_STANDARD, | |
1972 | ra->dstg, &ds); | |
1973 | if (err == ENOENT) { | |
1974 | *cp = '\0'; | |
1975 | return (0); | |
1976 | } | |
1977 | if (err) { | |
1978 | (void) strcpy(ra->failed, name); | |
1979 | *cp = '\0'; | |
1980 | dsl_dataset_close(ds, DS_MODE_STANDARD, ra->dstg); | |
1981 | return (err); | |
1982 | } | |
1983 | ||
1984 | #ifdef _KERNEL | |
1985 | /* for all filesystems undergoing rename, we'll need to unmount it */ | |
1986 | (void) zfs_unmount_snap(name, NULL); | |
1987 | #endif | |
1988 | ||
1989 | *cp = '\0'; | |
1990 | ||
1991 | dsl_sync_task_create(ra->dstg, dsl_dataset_snapshot_rename_check, | |
1992 | dsl_dataset_snapshot_rename_sync, ds, ra->newsnap, 0); | |
1993 | ||
1994 | return (0); | |
1995 | } | |
1996 | ||
1997 | static int | |
1998 | dsl_recursive_rename(char *oldname, const char *newname) | |
1999 | { | |
2000 | int err; | |
2001 | struct renamesnaparg *ra; | |
2002 | dsl_sync_task_t *dst; | |
2003 | spa_t *spa; | |
2004 | char *cp, *fsname = spa_strdup(oldname); | |
2005 | int len = strlen(oldname); | |
2006 | ||
2007 | /* truncate the snapshot name to get the fsname */ | |
2008 | cp = strchr(fsname, '@'); | |
2009 | *cp = '\0'; | |
2010 | ||
2011 | err = spa_open(fsname, &spa, FTAG); | |
2012 | if (err) { | |
2013 | kmem_free(fsname, len + 1); | |
2014 | return (err); | |
2015 | } | |
2016 | ra = kmem_alloc(sizeof (struct renamesnaparg), KM_SLEEP); | |
2017 | ra->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); | |
2018 | ||
2019 | ra->oldsnap = strchr(oldname, '@') + 1; | |
2020 | ra->newsnap = strchr(newname, '@') + 1; | |
2021 | *ra->failed = '\0'; | |
2022 | ||
2023 | err = dmu_objset_find(fsname, dsl_snapshot_rename_one, ra, | |
2024 | DS_FIND_CHILDREN); | |
2025 | kmem_free(fsname, len + 1); | |
2026 | ||
2027 | if (err == 0) { | |
2028 | err = dsl_sync_task_group_wait(ra->dstg); | |
2029 | } | |
2030 | ||
2031 | for (dst = list_head(&ra->dstg->dstg_tasks); dst; | |
2032 | dst = list_next(&ra->dstg->dstg_tasks, dst)) { | |
2033 | dsl_dataset_t *ds = dst->dst_arg1; | |
2034 | if (dst->dst_err) { | |
2035 | dsl_dir_name(ds->ds_dir, ra->failed); | |
2036 | (void) strcat(ra->failed, "@"); | |
2037 | (void) strcat(ra->failed, ra->newsnap); | |
2038 | } | |
2039 | dsl_dataset_close(ds, DS_MODE_STANDARD, ra->dstg); | |
2040 | } | |
2041 | ||
2042 | if (err) | |
2043 | (void) strcpy(oldname, ra->failed); | |
2044 | ||
2045 | dsl_sync_task_group_destroy(ra->dstg); | |
2046 | kmem_free(ra, sizeof (struct renamesnaparg)); | |
2047 | spa_close(spa, FTAG); | |
2048 | return (err); | |
2049 | } | |
2050 | ||
2051 | static int | |
2052 | dsl_valid_rename(char *oldname, void *arg) | |
2053 | { | |
2054 | int delta = *(int *)arg; | |
2055 | ||
2056 | if (strlen(oldname) + delta >= MAXNAMELEN) | |
2057 | return (ENAMETOOLONG); | |
2058 | ||
2059 | return (0); | |
2060 | } | |
2061 | ||
2062 | #pragma weak dmu_objset_rename = dsl_dataset_rename | |
2063 | int | |
2064 | dsl_dataset_rename(char *oldname, const char *newname, | |
2065 | boolean_t recursive) | |
2066 | { | |
2067 | dsl_dir_t *dd; | |
2068 | dsl_dataset_t *ds; | |
2069 | const char *tail; | |
2070 | int err; | |
2071 | ||
2072 | err = dsl_dir_open(oldname, FTAG, &dd, &tail); | |
2073 | if (err) | |
2074 | return (err); | |
2075 | if (tail == NULL) { | |
2076 | int delta = strlen(newname) - strlen(oldname); | |
2077 | ||
2078 | /* if we're growing, validate child size lengths */ | |
2079 | if (delta > 0) | |
2080 | err = dmu_objset_find(oldname, dsl_valid_rename, | |
2081 | &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); | |
2082 | ||
2083 | if (!err) | |
2084 | err = dsl_dir_rename(dd, newname); | |
2085 | dsl_dir_close(dd, FTAG); | |
2086 | return (err); | |
2087 | } | |
2088 | if (tail[0] != '@') { | |
2089 | /* the name ended in a nonexistant component */ | |
2090 | dsl_dir_close(dd, FTAG); | |
2091 | return (ENOENT); | |
2092 | } | |
2093 | ||
2094 | dsl_dir_close(dd, FTAG); | |
2095 | ||
2096 | /* new name must be snapshot in same filesystem */ | |
2097 | tail = strchr(newname, '@'); | |
2098 | if (tail == NULL) | |
2099 | return (EINVAL); | |
2100 | tail++; | |
2101 | if (strncmp(oldname, newname, tail - newname) != 0) | |
2102 | return (EXDEV); | |
2103 | ||
2104 | if (recursive) { | |
2105 | err = dsl_recursive_rename(oldname, newname); | |
2106 | } else { | |
2107 | err = dsl_dataset_open(oldname, | |
2108 | DS_MODE_READONLY | DS_MODE_STANDARD, FTAG, &ds); | |
2109 | if (err) | |
2110 | return (err); | |
2111 | ||
2112 | err = dsl_sync_task_do(ds->ds_dir->dd_pool, | |
2113 | dsl_dataset_snapshot_rename_check, | |
2114 | dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1); | |
2115 | ||
2116 | dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG); | |
2117 | } | |
2118 | ||
2119 | return (err); | |
2120 | } | |
2121 | ||
2122 | struct promotearg { | |
2123 | uint64_t used, comp, uncomp, unique; | |
2124 | uint64_t ds_flags, newnext_obj, snapnames_obj; | |
2125 | }; | |
2126 | ||
2127 | /* ARGSUSED */ | |
2128 | static int | |
2129 | dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) | |
2130 | { | |
2131 | dsl_dataset_t *hds = arg1; | |
2132 | struct promotearg *pa = arg2; | |
2133 | dsl_dir_t *dd = hds->ds_dir; | |
2134 | dsl_pool_t *dp = hds->ds_dir->dd_pool; | |
2135 | dsl_dir_t *odd = NULL; | |
2136 | dsl_dataset_t *ds = NULL; | |
2137 | dsl_dataset_t *origin_ds = NULL; | |
2138 | dsl_dataset_t *newnext_ds = NULL; | |
2139 | int err; | |
2140 | char *name = NULL; | |
2141 | uint64_t itor = 0; | |
2142 | blkptr_t bp; | |
2143 | ||
2144 | bzero(pa, sizeof (*pa)); | |
2145 | ||
2146 | /* Check that it is a clone */ | |
2147 | if (dd->dd_phys->dd_origin_obj == 0) | |
2148 | return (EINVAL); | |
2149 | ||
2150 | /* Since this is so expensive, don't do the preliminary check */ | |
2151 | if (!dmu_tx_is_syncing(tx)) | |
2152 | return (0); | |
2153 | ||
2154 | if (err = dsl_dataset_open_obj(dp, dd->dd_phys->dd_origin_obj, | |
2155 | NULL, DS_MODE_EXCLUSIVE, FTAG, &origin_ds)) | |
2156 | goto out; | |
2157 | odd = origin_ds->ds_dir; | |
2158 | ||
2159 | { | |
2160 | dsl_dataset_t *phds; | |
2161 | if (err = dsl_dataset_open_obj(dd->dd_pool, | |
2162 | odd->dd_phys->dd_head_dataset_obj, | |
2163 | NULL, DS_MODE_NONE, FTAG, &phds)) | |
2164 | goto out; | |
2165 | pa->ds_flags = phds->ds_phys->ds_flags; | |
2166 | pa->snapnames_obj = phds->ds_phys->ds_snapnames_zapobj; | |
2167 | dsl_dataset_close(phds, DS_MODE_NONE, FTAG); | |
2168 | } | |
2169 | ||
2170 | if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) { | |
2171 | err = EXDEV; | |
2172 | goto out; | |
2173 | } | |
2174 | ||
2175 | /* find origin's new next ds */ | |
2176 | VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, hds->ds_object, | |
2177 | NULL, DS_MODE_NONE, FTAG, &newnext_ds)); | |
2178 | while (newnext_ds->ds_phys->ds_prev_snap_obj != origin_ds->ds_object) { | |
2179 | dsl_dataset_t *prev; | |
2180 | ||
2181 | if (err = dsl_dataset_open_obj(dd->dd_pool, | |
2182 | newnext_ds->ds_phys->ds_prev_snap_obj, | |
2183 | NULL, DS_MODE_NONE, FTAG, &prev)) | |
2184 | goto out; | |
2185 | dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG); | |
2186 | newnext_ds = prev; | |
2187 | } | |
2188 | pa->newnext_obj = newnext_ds->ds_object; | |
2189 | ||
2190 | /* compute origin's new unique space */ | |
2191 | while ((err = bplist_iterate(&newnext_ds->ds_deadlist, | |
2192 | &itor, &bp)) == 0) { | |
2193 | if (bp.blk_birth > origin_ds->ds_phys->ds_prev_snap_txg) | |
2194 | pa->unique += bp_get_dasize(dd->dd_pool->dp_spa, &bp); | |
2195 | } | |
2196 | if (err != ENOENT) | |
2197 | goto out; | |
2198 | ||
2199 | /* Walk the snapshots that we are moving */ | |
2200 | name = kmem_alloc(MAXPATHLEN, KM_SLEEP); | |
2201 | ds = origin_ds; | |
2202 | /* CONSTCOND */ | |
2203 | while (TRUE) { | |
2204 | uint64_t val, dlused, dlcomp, dluncomp; | |
2205 | dsl_dataset_t *prev; | |
2206 | ||
2207 | /* Check that the snapshot name does not conflict */ | |
2208 | dsl_dataset_name(ds, name); | |
2209 | err = dsl_dataset_snap_lookup(dd->dd_pool->dp_meta_objset, | |
2210 | hds->ds_phys->ds_flags, hds->ds_phys->ds_snapnames_zapobj, | |
2211 | ds->ds_snapname, &val); | |
2212 | if (err != ENOENT) { | |
2213 | if (err == 0) | |
2214 | err = EEXIST; | |
2215 | goto out; | |
2216 | } | |
2217 | ||
2218 | /* | |
2219 | * compute space to transfer. Each snapshot gave birth to: | |
2220 | * (my used) - (prev's used) + (deadlist's used) | |
2221 | */ | |
2222 | pa->used += ds->ds_phys->ds_used_bytes; | |
2223 | pa->comp += ds->ds_phys->ds_compressed_bytes; | |
2224 | pa->uncomp += ds->ds_phys->ds_uncompressed_bytes; | |
2225 | ||
2226 | /* If we reach the first snapshot, we're done. */ | |
2227 | if (ds->ds_phys->ds_prev_snap_obj == 0) | |
2228 | break; | |
2229 | ||
2230 | if (err = bplist_space(&ds->ds_deadlist, | |
2231 | &dlused, &dlcomp, &dluncomp)) | |
2232 | goto out; | |
2233 | if (err = dsl_dataset_open_obj(dd->dd_pool, | |
2234 | ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE, | |
2235 | FTAG, &prev)) | |
2236 | goto out; | |
2237 | pa->used += dlused - prev->ds_phys->ds_used_bytes; | |
2238 | pa->comp += dlcomp - prev->ds_phys->ds_compressed_bytes; | |
2239 | pa->uncomp += dluncomp - prev->ds_phys->ds_uncompressed_bytes; | |
2240 | ||
2241 | /* | |
2242 | * We could be a clone of a clone. If we reach our | |
2243 | * parent's branch point, we're done. | |
2244 | */ | |
2245 | if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) { | |
2246 | dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG); | |
2247 | break; | |
2248 | } | |
2249 | if (ds != origin_ds) | |
2250 | dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); | |
2251 | ds = prev; | |
2252 | } | |
2253 | ||
2254 | /* Check that there is enough space here */ | |
2255 | err = dsl_dir_transfer_possible(odd, dd, pa->used); | |
2256 | ||
2257 | out: | |
2258 | if (ds && ds != origin_ds) | |
2259 | dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); | |
2260 | if (origin_ds) | |
2261 | dsl_dataset_close(origin_ds, DS_MODE_EXCLUSIVE, FTAG); | |
2262 | if (newnext_ds) | |
2263 | dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG); | |
2264 | if (name) | |
2265 | kmem_free(name, MAXPATHLEN); | |
2266 | return (err); | |
2267 | } | |
2268 | ||
2269 | static void | |
2270 | dsl_dataset_promote_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) | |
2271 | { | |
2272 | dsl_dataset_t *hds = arg1; | |
2273 | struct promotearg *pa = arg2; | |
2274 | dsl_dir_t *dd = hds->ds_dir; | |
2275 | dsl_pool_t *dp = hds->ds_dir->dd_pool; | |
2276 | dsl_dir_t *odd = NULL; | |
2277 | dsl_dataset_t *ds, *origin_ds; | |
2278 | char *name; | |
2279 | ||
2280 | ASSERT(dd->dd_phys->dd_origin_obj != 0); | |
2281 | ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)); | |
2282 | ||
2283 | VERIFY(0 == dsl_dataset_open_obj(dp, dd->dd_phys->dd_origin_obj, | |
2284 | NULL, DS_MODE_EXCLUSIVE, FTAG, &origin_ds)); | |
2285 | /* | |
2286 | * We need to explicitly open odd, since origin_ds's dd will be | |
2287 | * changing. | |
2288 | */ | |
2289 | VERIFY(0 == dsl_dir_open_obj(dp, origin_ds->ds_dir->dd_object, | |
2290 | NULL, FTAG, &odd)); | |
2291 | ||
2292 | /* move snapshots to this dir */ | |
2293 | name = kmem_alloc(MAXPATHLEN, KM_SLEEP); | |
2294 | ds = origin_ds; | |
2295 | /* CONSTCOND */ | |
2296 | while (TRUE) { | |
2297 | dsl_dataset_t *prev; | |
2298 | ||
2299 | /* move snap name entry */ | |
2300 | dsl_dataset_name(ds, name); | |
2301 | VERIFY(0 == dsl_dataset_snap_remove(dp->dp_meta_objset, | |
2302 | pa->ds_flags, pa->snapnames_obj, ds->ds_snapname, tx)); | |
2303 | VERIFY(0 == zap_add(dp->dp_meta_objset, | |
2304 | hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, | |
2305 | 8, 1, &ds->ds_object, tx)); | |
2306 | ||
2307 | /* change containing dsl_dir */ | |
2308 | dmu_buf_will_dirty(ds->ds_dbuf, tx); | |
2309 | ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object); | |
2310 | ds->ds_phys->ds_dir_obj = dd->dd_object; | |
2311 | ASSERT3P(ds->ds_dir, ==, odd); | |
2312 | dsl_dir_close(ds->ds_dir, ds); | |
2313 | VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object, | |
2314 | NULL, ds, &ds->ds_dir)); | |
2315 | ||
2316 | ASSERT3U(dsl_prop_numcb(ds), ==, 0); | |
2317 | ||
2318 | if (ds->ds_phys->ds_prev_snap_obj == 0) | |
2319 | break; | |
2320 | ||
2321 | VERIFY(0 == dsl_dataset_open_obj(dp, | |
2322 | ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE, | |
2323 | FTAG, &prev)); | |
2324 | ||
2325 | if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) { | |
2326 | dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG); | |
2327 | break; | |
2328 | } | |
2329 | if (ds != origin_ds) | |
2330 | dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); | |
2331 | ds = prev; | |
2332 | } | |
2333 | if (ds != origin_ds) | |
2334 | dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); | |
2335 | ||
2336 | /* change origin's next snap */ | |
2337 | dmu_buf_will_dirty(origin_ds->ds_dbuf, tx); | |
2338 | origin_ds->ds_phys->ds_next_snap_obj = pa->newnext_obj; | |
2339 | ||
2340 | /* change origin */ | |
2341 | dmu_buf_will_dirty(dd->dd_dbuf, tx); | |
2342 | ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object); | |
2343 | dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj; | |
2344 | dmu_buf_will_dirty(odd->dd_dbuf, tx); | |
2345 | odd->dd_phys->dd_origin_obj = origin_ds->ds_object; | |
2346 | ||
2347 | /* change space accounting */ | |
2348 | dsl_dir_diduse_space(odd, -pa->used, -pa->comp, -pa->uncomp, tx); | |
2349 | dsl_dir_diduse_space(dd, pa->used, pa->comp, pa->uncomp, tx); | |
2350 | origin_ds->ds_phys->ds_unique_bytes = pa->unique; | |
2351 | ||
2352 | /* log history record */ | |
2353 | spa_history_internal_log(LOG_DS_PROMOTE, dd->dd_pool->dp_spa, tx, | |
2354 | cr, "dataset = %llu", ds->ds_object); | |
2355 | ||
2356 | dsl_dir_close(odd, FTAG); | |
2357 | dsl_dataset_close(origin_ds, DS_MODE_EXCLUSIVE, FTAG); | |
2358 | kmem_free(name, MAXPATHLEN); | |
2359 | } | |
2360 | ||
2361 | int | |
2362 | dsl_dataset_promote(const char *name) | |
2363 | { | |
2364 | dsl_dataset_t *ds; | |
2365 | int err; | |
2366 | dmu_object_info_t doi; | |
2367 | struct promotearg pa; | |
2368 | ||
2369 | err = dsl_dataset_open(name, DS_MODE_NONE, FTAG, &ds); | |
2370 | if (err) | |
2371 | return (err); | |
2372 | ||
2373 | err = dmu_object_info(ds->ds_dir->dd_pool->dp_meta_objset, | |
2374 | ds->ds_phys->ds_snapnames_zapobj, &doi); | |
2375 | if (err) { | |
2376 | dsl_dataset_close(ds, DS_MODE_NONE, FTAG); | |
2377 | return (err); | |
2378 | } | |
2379 | ||
2380 | /* | |
2381 | * Add in 128x the snapnames zapobj size, since we will be moving | |
2382 | * a bunch of snapnames to the promoted ds, and dirtying their | |
2383 | * bonus buffers. | |
2384 | */ | |
2385 | err = dsl_sync_task_do(ds->ds_dir->dd_pool, | |
2386 | dsl_dataset_promote_check, | |
2387 | dsl_dataset_promote_sync, ds, &pa, 2 + 2 * doi.doi_physical_blks); | |
2388 | dsl_dataset_close(ds, DS_MODE_NONE, FTAG); | |
2389 | return (err); | |
2390 | } | |
2391 | ||
2392 | struct cloneswaparg { | |
2393 | dsl_dataset_t *cds; /* clone dataset */ | |
2394 | dsl_dataset_t *ohds; /* origin's head dataset */ | |
2395 | boolean_t force; | |
2396 | int64_t unused_refres_delta; /* change in unconsumed refreservation */ | |
2397 | }; | |
2398 | ||
2399 | /* ARGSUSED */ | |
2400 | static int | |
2401 | dsl_dataset_clone_swap_check(void *arg1, void *arg2, dmu_tx_t *tx) | |
2402 | { | |
2403 | struct cloneswaparg *csa = arg1; | |
2404 | ||
2405 | /* they should both be heads */ | |
2406 | if (dsl_dataset_is_snapshot(csa->cds) || | |
2407 | dsl_dataset_is_snapshot(csa->ohds)) | |
2408 | return (EINVAL); | |
2409 | ||
2410 | /* the branch point should be just before them */ | |
2411 | if (csa->cds->ds_prev != csa->ohds->ds_prev) | |
2412 | return (EINVAL); | |
2413 | ||
2414 | /* cds should be the clone */ | |
2415 | if (csa->cds->ds_prev->ds_phys->ds_next_snap_obj != | |
2416 | csa->ohds->ds_object) | |
2417 | return (EINVAL); | |
2418 | ||
2419 | /* the clone should be a child of the origin */ | |
2420 | if (csa->cds->ds_dir->dd_parent != csa->ohds->ds_dir) | |
2421 | return (EINVAL); | |
2422 | ||
2423 | /* ohds shouldn't be modified unless 'force' */ | |
2424 | if (!csa->force && dsl_dataset_modified_since_lastsnap(csa->ohds)) | |
2425 | return (ETXTBSY); | |
2426 | ||
2427 | /* adjust amount of any unconsumed refreservation */ | |
2428 | csa->unused_refres_delta = | |
2429 | (int64_t)MIN(csa->ohds->ds_reserved, | |
2430 | csa->ohds->ds_phys->ds_unique_bytes) - | |
2431 | (int64_t)MIN(csa->ohds->ds_reserved, | |
2432 | csa->cds->ds_phys->ds_unique_bytes); | |
2433 | ||
2434 | if (csa->unused_refres_delta > 0 && | |
2435 | csa->unused_refres_delta > | |
2436 | dsl_dir_space_available(csa->ohds->ds_dir, NULL, 0, TRUE)) | |
2437 | return (ENOSPC); | |
2438 | ||
2439 | return (0); | |
2440 | } | |
2441 | ||
2442 | /* ARGSUSED */ | |
2443 | static void | |
2444 | dsl_dataset_clone_swap_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) | |
2445 | { | |
2446 | struct cloneswaparg *csa = arg1; | |
2447 | dsl_pool_t *dp = csa->cds->ds_dir->dd_pool; | |
2448 | uint64_t itor = 0; | |
2449 | blkptr_t bp; | |
2450 | uint64_t unique = 0; | |
2451 | int err; | |
2452 | ||
2453 | ASSERT(csa->cds->ds_reserved == 0); | |
2454 | ASSERT(csa->cds->ds_quota == csa->ohds->ds_quota); | |
2455 | ||
2456 | dmu_buf_will_dirty(csa->cds->ds_dbuf, tx); | |
2457 | dmu_buf_will_dirty(csa->ohds->ds_dbuf, tx); | |
2458 | dmu_buf_will_dirty(csa->cds->ds_prev->ds_dbuf, tx); | |
2459 | ||
2460 | if (csa->cds->ds_user_ptr != NULL) { | |
2461 | csa->cds->ds_user_evict_func(csa->cds, csa->cds->ds_user_ptr); | |
2462 | csa->cds->ds_user_ptr = NULL; | |
2463 | } | |
2464 | ||
2465 | if (csa->ohds->ds_user_ptr != NULL) { | |
2466 | csa->ohds->ds_user_evict_func(csa->ohds, | |
2467 | csa->ohds->ds_user_ptr); | |
2468 | csa->ohds->ds_user_ptr = NULL; | |
2469 | } | |
2470 | ||
2471 | /* compute unique space */ | |
2472 | while ((err = bplist_iterate(&csa->cds->ds_deadlist, | |
2473 | &itor, &bp)) == 0) { | |
2474 | if (bp.blk_birth > csa->cds->ds_prev->ds_phys->ds_prev_snap_txg) | |
2475 | unique += bp_get_dasize(dp->dp_spa, &bp); | |
2476 | } | |
2477 | VERIFY(err == ENOENT); | |
2478 | ||
2479 | /* reset origin's unique bytes */ | |
2480 | csa->cds->ds_prev->ds_phys->ds_unique_bytes = unique; | |
2481 | ||
2482 | /* swap blkptrs */ | |
2483 | { | |
2484 | blkptr_t tmp; | |
2485 | tmp = csa->ohds->ds_phys->ds_bp; | |
2486 | csa->ohds->ds_phys->ds_bp = csa->cds->ds_phys->ds_bp; | |
2487 | csa->cds->ds_phys->ds_bp = tmp; | |
2488 | } | |
2489 | ||
2490 | /* set dd_*_bytes */ | |
2491 | { | |
2492 | int64_t dused, dcomp, duncomp; | |
2493 | uint64_t cdl_used, cdl_comp, cdl_uncomp; | |
2494 | uint64_t odl_used, odl_comp, odl_uncomp; | |
2495 | ||
2496 | VERIFY(0 == bplist_space(&csa->cds->ds_deadlist, &cdl_used, | |
2497 | &cdl_comp, &cdl_uncomp)); | |
2498 | VERIFY(0 == bplist_space(&csa->ohds->ds_deadlist, &odl_used, | |
2499 | &odl_comp, &odl_uncomp)); | |
2500 | dused = csa->cds->ds_phys->ds_used_bytes + cdl_used - | |
2501 | (csa->ohds->ds_phys->ds_used_bytes + odl_used); | |
2502 | dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp - | |
2503 | (csa->ohds->ds_phys->ds_compressed_bytes + odl_comp); | |
2504 | duncomp = csa->cds->ds_phys->ds_uncompressed_bytes + | |
2505 | cdl_uncomp - | |
2506 | (csa->ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp); | |
2507 | ||
2508 | dsl_dir_diduse_space(csa->ohds->ds_dir, | |
2509 | dused, dcomp, duncomp, tx); | |
2510 | dsl_dir_diduse_space(csa->cds->ds_dir, | |
2511 | -dused, -dcomp, -duncomp, tx); | |
2512 | } | |
2513 | ||
2514 | #define SWITCH64(x, y) \ | |
2515 | { \ | |
2516 | uint64_t __tmp = (x); \ | |
2517 | (x) = (y); \ | |
2518 | (y) = __tmp; \ | |
2519 | } | |
2520 | ||
2521 | /* swap ds_*_bytes */ | |
2522 | SWITCH64(csa->ohds->ds_phys->ds_used_bytes, | |
2523 | csa->cds->ds_phys->ds_used_bytes); | |
2524 | SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes, | |
2525 | csa->cds->ds_phys->ds_compressed_bytes); | |
2526 | SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes, | |
2527 | csa->cds->ds_phys->ds_uncompressed_bytes); | |
2528 | SWITCH64(csa->ohds->ds_phys->ds_unique_bytes, | |
2529 | csa->cds->ds_phys->ds_unique_bytes); | |
2530 | ||
2531 | /* apply any parent delta for change in unconsumed refreservation */ | |
2532 | dsl_dir_diduse_space(csa->ohds->ds_dir, csa->unused_refres_delta, | |
2533 | 0, 0, tx); | |
2534 | ||
2535 | /* swap deadlists */ | |
2536 | bplist_close(&csa->cds->ds_deadlist); | |
2537 | bplist_close(&csa->ohds->ds_deadlist); | |
2538 | SWITCH64(csa->ohds->ds_phys->ds_deadlist_obj, | |
2539 | csa->cds->ds_phys->ds_deadlist_obj); | |
2540 | VERIFY(0 == bplist_open(&csa->cds->ds_deadlist, dp->dp_meta_objset, | |
2541 | csa->cds->ds_phys->ds_deadlist_obj)); | |
2542 | VERIFY(0 == bplist_open(&csa->ohds->ds_deadlist, dp->dp_meta_objset, | |
2543 | csa->ohds->ds_phys->ds_deadlist_obj)); | |
2544 | } | |
2545 | ||
2546 | /* | |
2547 | * Swap 'clone' with its origin head file system. | |
2548 | */ | |
2549 | int | |
2550 | dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head, | |
2551 | boolean_t force) | |
2552 | { | |
2553 | struct cloneswaparg csa; | |
2554 | ||
2555 | ASSERT(clone->ds_open_refcount == DS_REF_MAX); | |
2556 | ASSERT(origin_head->ds_open_refcount == DS_REF_MAX); | |
2557 | ||
2558 | csa.cds = clone; | |
2559 | csa.ohds = origin_head; | |
2560 | csa.force = force; | |
2561 | return (dsl_sync_task_do(clone->ds_dir->dd_pool, | |
2562 | dsl_dataset_clone_swap_check, | |
2563 | dsl_dataset_clone_swap_sync, &csa, NULL, 9)); | |
2564 | } | |
2565 | ||
2566 | /* | |
2567 | * Given a pool name and a dataset object number in that pool, | |
2568 | * return the name of that dataset. | |
2569 | */ | |
2570 | int | |
2571 | dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf) | |
2572 | { | |
2573 | spa_t *spa; | |
2574 | dsl_pool_t *dp; | |
2575 | dsl_dataset_t *ds = NULL; | |
2576 | int error; | |
2577 | ||
2578 | if ((error = spa_open(pname, &spa, FTAG)) != 0) | |
2579 | return (error); | |
2580 | dp = spa_get_dsl(spa); | |
2581 | rw_enter(&dp->dp_config_rwlock, RW_READER); | |
2582 | if ((error = dsl_dataset_open_obj(dp, obj, | |
2583 | NULL, DS_MODE_NONE, FTAG, &ds)) != 0) { | |
2584 | rw_exit(&dp->dp_config_rwlock); | |
2585 | spa_close(spa, FTAG); | |
2586 | return (error); | |
2587 | } | |
2588 | dsl_dataset_name(ds, buf); | |
2589 | dsl_dataset_close(ds, DS_MODE_NONE, FTAG); | |
2590 | rw_exit(&dp->dp_config_rwlock); | |
2591 | spa_close(spa, FTAG); | |
2592 | ||
2593 | return (0); | |
2594 | } | |
2595 | ||
2596 | int | |
2597 | dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota, | |
2598 | uint64_t asize, uint64_t inflight, uint64_t *used, | |
2599 | uint64_t *ref_rsrv) | |
2600 | { | |
2601 | int error = 0; | |
2602 | ||
2603 | ASSERT3S(asize, >, 0); | |
2604 | ||
2605 | /* | |
2606 | * *ref_rsrv is the portion of asize that will come from any | |
2607 | * unconsumed refreservation space. | |
2608 | */ | |
2609 | *ref_rsrv = 0; | |
2610 | ||
2611 | mutex_enter(&ds->ds_lock); | |
2612 | /* | |
2613 | * Make a space adjustment for reserved bytes. | |
2614 | */ | |
2615 | if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) { | |
2616 | ASSERT3U(*used, >=, | |
2617 | ds->ds_reserved - ds->ds_phys->ds_unique_bytes); | |
2618 | *used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes); | |
2619 | *ref_rsrv = | |
2620 | asize - MIN(asize, parent_delta(ds, asize + inflight)); | |
2621 | } | |
2622 | ||
2623 | if (!check_quota || ds->ds_quota == 0) { | |
2624 | mutex_exit(&ds->ds_lock); | |
2625 | return (0); | |
2626 | } | |
2627 | /* | |
2628 | * If they are requesting more space, and our current estimate | |
2629 | * is over quota, they get to try again unless the actual | |
2630 | * on-disk is over quota and there are no pending changes (which | |
2631 | * may free up space for us). | |
2632 | */ | |
2633 | if (ds->ds_phys->ds_used_bytes + inflight >= ds->ds_quota) { | |
2634 | if (inflight > 0 || ds->ds_phys->ds_used_bytes < ds->ds_quota) | |
2635 | error = ERESTART; | |
2636 | else | |
2637 | error = EDQUOT; | |
2638 | } | |
2639 | mutex_exit(&ds->ds_lock); | |
2640 | ||
2641 | return (error); | |
2642 | } | |
2643 | ||
2644 | /* ARGSUSED */ | |
2645 | static int | |
2646 | dsl_dataset_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx) | |
2647 | { | |
2648 | dsl_dataset_t *ds = arg1; | |
2649 | uint64_t *quotap = arg2; | |
2650 | uint64_t new_quota = *quotap; | |
2651 | ||
2652 | if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_REFQUOTA) | |
2653 | return (ENOTSUP); | |
2654 | ||
2655 | if (new_quota == 0) | |
2656 | return (0); | |
2657 | ||
2658 | if (new_quota < ds->ds_phys->ds_used_bytes || | |
2659 | new_quota < ds->ds_reserved) | |
2660 | return (ENOSPC); | |
2661 | ||
2662 | return (0); | |
2663 | } | |
2664 | ||
2665 | /* ARGSUSED */ | |
2666 | void | |
2667 | dsl_dataset_set_quota_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) | |
2668 | { | |
2669 | dsl_dataset_t *ds = arg1; | |
2670 | uint64_t *quotap = arg2; | |
2671 | uint64_t new_quota = *quotap; | |
2672 | ||
2673 | dmu_buf_will_dirty(ds->ds_dbuf, tx); | |
2674 | ||
2675 | mutex_enter(&ds->ds_lock); | |
2676 | ds->ds_quota = new_quota; | |
2677 | mutex_exit(&ds->ds_lock); | |
2678 | ||
2679 | dsl_prop_set_uint64_sync(ds->ds_dir, "refquota", new_quota, cr, tx); | |
2680 | ||
2681 | spa_history_internal_log(LOG_DS_REFQUOTA, ds->ds_dir->dd_pool->dp_spa, | |
2682 | tx, cr, "%lld dataset = %llu ", | |
2683 | (longlong_t)new_quota, ds->ds_dir->dd_phys->dd_head_dataset_obj); | |
2684 | } | |
2685 | ||
2686 | int | |
2687 | dsl_dataset_set_quota(const char *dsname, uint64_t quota) | |
2688 | { | |
2689 | dsl_dataset_t *ds; | |
2690 | int err; | |
2691 | ||
2692 | err = dsl_dataset_open(dsname, DS_MODE_STANDARD, FTAG, &ds); | |
2693 | if (err) | |
2694 | return (err); | |
2695 | ||
2696 | if (quota != ds->ds_quota) { | |
2697 | /* | |
2698 | * If someone removes a file, then tries to set the quota, we | |
2699 | * want to make sure the file freeing takes effect. | |
2700 | */ | |
2701 | txg_wait_open(ds->ds_dir->dd_pool, 0); | |
2702 | ||
2703 | err = dsl_sync_task_do(ds->ds_dir->dd_pool, | |
2704 | dsl_dataset_set_quota_check, dsl_dataset_set_quota_sync, | |
2705 | ds, "a, 0); | |
2706 | } | |
2707 | dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG); | |
2708 | return (err); | |
2709 | } | |
2710 | ||
2711 | static int | |
2712 | dsl_dataset_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx) | |
2713 | { | |
2714 | dsl_dataset_t *ds = arg1; | |
2715 | uint64_t *reservationp = arg2; | |
2716 | uint64_t new_reservation = *reservationp; | |
2717 | int64_t delta; | |
2718 | uint64_t unique; | |
2719 | ||
2720 | if (new_reservation > INT64_MAX) | |
2721 | return (EOVERFLOW); | |
2722 | ||
2723 | if (spa_version(ds->ds_dir->dd_pool->dp_spa) < | |
2724 | SPA_VERSION_REFRESERVATION) | |
2725 | return (ENOTSUP); | |
2726 | ||
2727 | if (dsl_dataset_is_snapshot(ds)) | |
2728 | return (EINVAL); | |
2729 | ||
2730 | /* | |
2731 | * If we are doing the preliminary check in open context, the | |
2732 | * space estimates may be inaccurate. | |
2733 | */ | |
2734 | if (!dmu_tx_is_syncing(tx)) | |
2735 | return (0); | |
2736 | ||
2737 | mutex_enter(&ds->ds_lock); | |
2738 | unique = dsl_dataset_unique(ds); | |
2739 | delta = MAX(unique, new_reservation) - MAX(unique, ds->ds_reserved); | |
2740 | mutex_exit(&ds->ds_lock); | |
2741 | ||
2742 | if (delta > 0 && | |
2743 | delta > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) | |
2744 | return (ENOSPC); | |
2745 | if (delta > 0 && ds->ds_quota > 0 && | |
2746 | new_reservation > ds->ds_quota) | |
2747 | return (ENOSPC); | |
2748 | ||
2749 | return (0); | |
2750 | } | |
2751 | ||
2752 | /* ARGSUSED */ | |
2753 | static void | |
2754 | dsl_dataset_set_reservation_sync(void *arg1, void *arg2, cred_t *cr, | |
2755 | dmu_tx_t *tx) | |
2756 | { | |
2757 | dsl_dataset_t *ds = arg1; | |
2758 | uint64_t *reservationp = arg2; | |
2759 | uint64_t new_reservation = *reservationp; | |
2760 | uint64_t unique; | |
2761 | int64_t delta; | |
2762 | ||
2763 | dmu_buf_will_dirty(ds->ds_dbuf, tx); | |
2764 | ||
2765 | mutex_enter(&ds->ds_lock); | |
2766 | unique = dsl_dataset_unique(ds); | |
2767 | delta = MAX(0, (int64_t)(new_reservation - unique)) - | |
2768 | MAX(0, (int64_t)(ds->ds_reserved - unique)); | |
2769 | ds->ds_reserved = new_reservation; | |
2770 | mutex_exit(&ds->ds_lock); | |
2771 | ||
2772 | dsl_prop_set_uint64_sync(ds->ds_dir, "refreservation", | |
2773 | new_reservation, cr, tx); | |
2774 | ||
2775 | dsl_dir_diduse_space(ds->ds_dir, delta, 0, 0, tx); | |
2776 | ||
2777 | spa_history_internal_log(LOG_DS_REFRESERV, | |
2778 | ds->ds_dir->dd_pool->dp_spa, tx, cr, "%lld dataset = %llu", | |
2779 | (longlong_t)new_reservation, | |
2780 | ds->ds_dir->dd_phys->dd_head_dataset_obj); | |
2781 | } | |
2782 | ||
2783 | int | |
2784 | dsl_dataset_set_reservation(const char *dsname, uint64_t reservation) | |
2785 | { | |
2786 | dsl_dataset_t *ds; | |
2787 | int err; | |
2788 | ||
2789 | err = dsl_dataset_open(dsname, DS_MODE_STANDARD, FTAG, &ds); | |
2790 | if (err) | |
2791 | return (err); | |
2792 | ||
2793 | err = dsl_sync_task_do(ds->ds_dir->dd_pool, | |
2794 | dsl_dataset_set_reservation_check, | |
2795 | dsl_dataset_set_reservation_sync, ds, &reservation, 0); | |
2796 | dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG); | |
2797 | return (err); | |
2798 | } |