]>
Commit | Line | Data |
---|---|---|
34dc7c2f BB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or http://www.opensolaris.org/os/licensing. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | /* | |
428870ff | 22 | * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. |
34dc7c2f | 23 | */ |
8d35c149 AS |
24 | /* |
25 | * Copyright 2011 Nexenta Systems, Inc. All rights reserved. | |
26 | */ | |
34dc7c2f | 27 | |
34dc7c2f BB |
28 | #include <sys/dmu.h> |
29 | #include <sys/dmu_impl.h> | |
30 | #include <sys/dmu_tx.h> | |
31 | #include <sys/dbuf.h> | |
32 | #include <sys/dnode.h> | |
33 | #include <sys/zfs_context.h> | |
34 | #include <sys/dmu_objset.h> | |
35 | #include <sys/dmu_traverse.h> | |
36 | #include <sys/dsl_dataset.h> | |
37 | #include <sys/dsl_dir.h> | |
428870ff | 38 | #include <sys/dsl_prop.h> |
34dc7c2f BB |
39 | #include <sys/dsl_pool.h> |
40 | #include <sys/dsl_synctask.h> | |
41 | #include <sys/zfs_ioctl.h> | |
42 | #include <sys/zap.h> | |
43 | #include <sys/zio_checksum.h> | |
428870ff BB |
44 | #include <sys/zfs_znode.h> |
45 | #include <zfs_fletcher.h> | |
46 | #include <sys/avl.h> | |
47 | #include <sys/ddt.h> | |
572e2857 | 48 | #include <sys/zfs_onexit.h> |
34dc7c2f BB |
49 | |
50 | static char *dmu_recv_tag = "dmu_recv_tag"; | |
51 | ||
428870ff BB |
52 | /* |
53 | * The list of data whose inclusion in a send stream can be pending from | |
54 | * one call to backup_cb to another. Multiple calls to dump_free() and | |
55 | * dump_freeobjects() can be aggregated into a single DRR_FREE or | |
56 | * DRR_FREEOBJECTS replay record. | |
57 | */ | |
58 | typedef enum { | |
59 | PENDING_NONE, | |
60 | PENDING_FREE, | |
61 | PENDING_FREEOBJECTS | |
62 | } pendop_t; | |
63 | ||
34dc7c2f BB |
64 | struct backuparg { |
65 | dmu_replay_record_t *drr; | |
66 | vnode_t *vp; | |
67 | offset_t *off; | |
68 | objset_t *os; | |
69 | zio_cksum_t zc; | |
428870ff | 70 | uint64_t toguid; |
34dc7c2f | 71 | int err; |
428870ff | 72 | pendop_t pending_op; |
34dc7c2f BB |
73 | }; |
74 | ||
75 | static int | |
76 | dump_bytes(struct backuparg *ba, void *buf, int len) | |
77 | { | |
78 | ssize_t resid; /* have to get resid to get detailed errno */ | |
79 | ASSERT3U(len % 8, ==, 0); | |
80 | ||
81 | fletcher_4_incremental_native(buf, len, &ba->zc); | |
82 | ba->err = vn_rdwr(UIO_WRITE, ba->vp, | |
83 | (caddr_t)buf, len, | |
84 | 0, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY, CRED(), &resid); | |
85 | *ba->off += len; | |
86 | return (ba->err); | |
87 | } | |
88 | ||
89 | static int | |
90 | dump_free(struct backuparg *ba, uint64_t object, uint64_t offset, | |
91 | uint64_t length) | |
92 | { | |
428870ff BB |
93 | struct drr_free *drrf = &(ba->drr->drr_u.drr_free); |
94 | ||
95 | /* | |
96 | * If there is a pending op, but it's not PENDING_FREE, push it out, | |
97 | * since free block aggregation can only be done for blocks of the | |
98 | * same type (i.e., DRR_FREE records can only be aggregated with | |
99 | * other DRR_FREE records. DRR_FREEOBJECTS records can only be | |
100 | * aggregated with other DRR_FREEOBJECTS records. | |
101 | */ | |
102 | if (ba->pending_op != PENDING_NONE && ba->pending_op != PENDING_FREE) { | |
103 | if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) | |
104 | return (EINTR); | |
105 | ba->pending_op = PENDING_NONE; | |
106 | } | |
107 | ||
108 | if (ba->pending_op == PENDING_FREE) { | |
109 | /* | |
110 | * There should never be a PENDING_FREE if length is -1 | |
111 | * (because dump_dnode is the only place where this | |
112 | * function is called with a -1, and only after flushing | |
113 | * any pending record). | |
114 | */ | |
115 | ASSERT(length != -1ULL); | |
116 | /* | |
117 | * Check to see whether this free block can be aggregated | |
118 | * with pending one. | |
119 | */ | |
120 | if (drrf->drr_object == object && drrf->drr_offset + | |
121 | drrf->drr_length == offset) { | |
122 | drrf->drr_length += length; | |
123 | return (0); | |
124 | } else { | |
125 | /* not a continuation. Push out pending record */ | |
126 | if (dump_bytes(ba, ba->drr, | |
127 | sizeof (dmu_replay_record_t)) != 0) | |
128 | return (EINTR); | |
129 | ba->pending_op = PENDING_NONE; | |
130 | } | |
131 | } | |
132 | /* create a FREE record and make it pending */ | |
34dc7c2f BB |
133 | bzero(ba->drr, sizeof (dmu_replay_record_t)); |
134 | ba->drr->drr_type = DRR_FREE; | |
428870ff BB |
135 | drrf->drr_object = object; |
136 | drrf->drr_offset = offset; | |
137 | drrf->drr_length = length; | |
138 | drrf->drr_toguid = ba->toguid; | |
139 | if (length == -1ULL) { | |
140 | if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) | |
141 | return (EINTR); | |
142 | } else { | |
143 | ba->pending_op = PENDING_FREE; | |
144 | } | |
34dc7c2f | 145 | |
34dc7c2f BB |
146 | return (0); |
147 | } | |
148 | ||
149 | static int | |
150 | dump_data(struct backuparg *ba, dmu_object_type_t type, | |
428870ff | 151 | uint64_t object, uint64_t offset, int blksz, const blkptr_t *bp, void *data) |
34dc7c2f | 152 | { |
428870ff BB |
153 | struct drr_write *drrw = &(ba->drr->drr_u.drr_write); |
154 | ||
155 | ||
156 | /* | |
157 | * If there is any kind of pending aggregation (currently either | |
158 | * a grouping of free objects or free blocks), push it out to | |
159 | * the stream, since aggregation can't be done across operations | |
160 | * of different types. | |
161 | */ | |
162 | if (ba->pending_op != PENDING_NONE) { | |
163 | if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) | |
164 | return (EINTR); | |
165 | ba->pending_op = PENDING_NONE; | |
166 | } | |
34dc7c2f BB |
167 | /* write a DATA record */ |
168 | bzero(ba->drr, sizeof (dmu_replay_record_t)); | |
169 | ba->drr->drr_type = DRR_WRITE; | |
428870ff BB |
170 | drrw->drr_object = object; |
171 | drrw->drr_type = type; | |
172 | drrw->drr_offset = offset; | |
173 | drrw->drr_length = blksz; | |
174 | drrw->drr_toguid = ba->toguid; | |
175 | drrw->drr_checksumtype = BP_GET_CHECKSUM(bp); | |
176 | if (zio_checksum_table[drrw->drr_checksumtype].ci_dedup) | |
177 | drrw->drr_checksumflags |= DRR_CHECKSUM_DEDUP; | |
178 | DDK_SET_LSIZE(&drrw->drr_key, BP_GET_LSIZE(bp)); | |
179 | DDK_SET_PSIZE(&drrw->drr_key, BP_GET_PSIZE(bp)); | |
180 | DDK_SET_COMPRESS(&drrw->drr_key, BP_GET_COMPRESS(bp)); | |
181 | drrw->drr_key.ddk_cksum = bp->blk_cksum; | |
182 | ||
183 | if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) | |
184 | return (EINTR); | |
185 | if (dump_bytes(ba, data, blksz) != 0) | |
186 | return (EINTR); | |
187 | return (0); | |
188 | } | |
189 | ||
190 | static int | |
191 | dump_spill(struct backuparg *ba, uint64_t object, int blksz, void *data) | |
192 | { | |
193 | struct drr_spill *drrs = &(ba->drr->drr_u.drr_spill); | |
194 | ||
195 | if (ba->pending_op != PENDING_NONE) { | |
196 | if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) | |
197 | return (EINTR); | |
198 | ba->pending_op = PENDING_NONE; | |
199 | } | |
200 | ||
201 | /* write a SPILL record */ | |
202 | bzero(ba->drr, sizeof (dmu_replay_record_t)); | |
203 | ba->drr->drr_type = DRR_SPILL; | |
204 | drrs->drr_object = object; | |
205 | drrs->drr_length = blksz; | |
206 | drrs->drr_toguid = ba->toguid; | |
34dc7c2f BB |
207 | |
208 | if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t))) | |
209 | return (EINTR); | |
210 | if (dump_bytes(ba, data, blksz)) | |
211 | return (EINTR); | |
212 | return (0); | |
213 | } | |
214 | ||
215 | static int | |
216 | dump_freeobjects(struct backuparg *ba, uint64_t firstobj, uint64_t numobjs) | |
217 | { | |
428870ff BB |
218 | struct drr_freeobjects *drrfo = &(ba->drr->drr_u.drr_freeobjects); |
219 | ||
220 | /* | |
221 | * If there is a pending op, but it's not PENDING_FREEOBJECTS, | |
222 | * push it out, since free block aggregation can only be done for | |
223 | * blocks of the same type (i.e., DRR_FREE records can only be | |
224 | * aggregated with other DRR_FREE records. DRR_FREEOBJECTS records | |
225 | * can only be aggregated with other DRR_FREEOBJECTS records. | |
226 | */ | |
227 | if (ba->pending_op != PENDING_NONE && | |
228 | ba->pending_op != PENDING_FREEOBJECTS) { | |
229 | if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) | |
230 | return (EINTR); | |
231 | ba->pending_op = PENDING_NONE; | |
232 | } | |
233 | if (ba->pending_op == PENDING_FREEOBJECTS) { | |
234 | /* | |
235 | * See whether this free object array can be aggregated | |
236 | * with pending one | |
237 | */ | |
238 | if (drrfo->drr_firstobj + drrfo->drr_numobjs == firstobj) { | |
239 | drrfo->drr_numobjs += numobjs; | |
240 | return (0); | |
241 | } else { | |
242 | /* can't be aggregated. Push out pending record */ | |
243 | if (dump_bytes(ba, ba->drr, | |
244 | sizeof (dmu_replay_record_t)) != 0) | |
245 | return (EINTR); | |
246 | ba->pending_op = PENDING_NONE; | |
247 | } | |
248 | } | |
249 | ||
34dc7c2f BB |
250 | /* write a FREEOBJECTS record */ |
251 | bzero(ba->drr, sizeof (dmu_replay_record_t)); | |
252 | ba->drr->drr_type = DRR_FREEOBJECTS; | |
428870ff BB |
253 | drrfo->drr_firstobj = firstobj; |
254 | drrfo->drr_numobjs = numobjs; | |
255 | drrfo->drr_toguid = ba->toguid; | |
256 | ||
257 | ba->pending_op = PENDING_FREEOBJECTS; | |
34dc7c2f | 258 | |
34dc7c2f BB |
259 | return (0); |
260 | } | |
261 | ||
262 | static int | |
263 | dump_dnode(struct backuparg *ba, uint64_t object, dnode_phys_t *dnp) | |
264 | { | |
428870ff BB |
265 | struct drr_object *drro = &(ba->drr->drr_u.drr_object); |
266 | ||
34dc7c2f BB |
267 | if (dnp == NULL || dnp->dn_type == DMU_OT_NONE) |
268 | return (dump_freeobjects(ba, object, 1)); | |
269 | ||
428870ff BB |
270 | if (ba->pending_op != PENDING_NONE) { |
271 | if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) | |
272 | return (EINTR); | |
273 | ba->pending_op = PENDING_NONE; | |
274 | } | |
275 | ||
34dc7c2f BB |
276 | /* write an OBJECT record */ |
277 | bzero(ba->drr, sizeof (dmu_replay_record_t)); | |
278 | ba->drr->drr_type = DRR_OBJECT; | |
428870ff BB |
279 | drro->drr_object = object; |
280 | drro->drr_type = dnp->dn_type; | |
281 | drro->drr_bonustype = dnp->dn_bonustype; | |
282 | drro->drr_blksz = dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT; | |
283 | drro->drr_bonuslen = dnp->dn_bonuslen; | |
284 | drro->drr_checksumtype = dnp->dn_checksum; | |
285 | drro->drr_compress = dnp->dn_compress; | |
286 | drro->drr_toguid = ba->toguid; | |
287 | ||
288 | if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) | |
34dc7c2f BB |
289 | return (EINTR); |
290 | ||
428870ff | 291 | if (dump_bytes(ba, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0) |
34dc7c2f BB |
292 | return (EINTR); |
293 | ||
294 | /* free anything past the end of the file */ | |
295 | if (dump_free(ba, object, (dnp->dn_maxblkid + 1) * | |
296 | (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), -1ULL)) | |
297 | return (EINTR); | |
298 | if (ba->err) | |
299 | return (EINTR); | |
300 | return (0); | |
301 | } | |
302 | ||
303 | #define BP_SPAN(dnp, level) \ | |
304 | (((uint64_t)dnp->dn_datablkszsec) << (SPA_MINBLOCKSHIFT + \ | |
305 | (level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) | |
306 | ||
428870ff | 307 | /* ARGSUSED */ |
34dc7c2f | 308 | static int |
428870ff BB |
309 | backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf, |
310 | const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) | |
34dc7c2f BB |
311 | { |
312 | struct backuparg *ba = arg; | |
34dc7c2f | 313 | dmu_object_type_t type = bp ? BP_GET_TYPE(bp) : DMU_OT_NONE; |
34dc7c2f BB |
314 | int err = 0; |
315 | ||
316 | if (issig(JUSTLOOKING) && issig(FORREAL)) | |
317 | return (EINTR); | |
318 | ||
428870ff BB |
319 | if (zb->zb_object != DMU_META_DNODE_OBJECT && |
320 | DMU_OBJECT_IS_SPECIAL(zb->zb_object)) { | |
9babb374 | 321 | return (0); |
428870ff | 322 | } else if (bp == NULL && zb->zb_object == DMU_META_DNODE_OBJECT) { |
b128c09f BB |
323 | uint64_t span = BP_SPAN(dnp, zb->zb_level); |
324 | uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT; | |
34dc7c2f BB |
325 | err = dump_freeobjects(ba, dnobj, span >> DNODE_SHIFT); |
326 | } else if (bp == NULL) { | |
b128c09f BB |
327 | uint64_t span = BP_SPAN(dnp, zb->zb_level); |
328 | err = dump_free(ba, zb->zb_object, zb->zb_blkid * span, span); | |
329 | } else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) { | |
330 | return (0); | |
331 | } else if (type == DMU_OT_DNODE) { | |
332 | dnode_phys_t *blk; | |
34dc7c2f BB |
333 | int i; |
334 | int blksz = BP_GET_LSIZE(bp); | |
b128c09f BB |
335 | uint32_t aflags = ARC_WAIT; |
336 | arc_buf_t *abuf; | |
337 | ||
428870ff | 338 | if (dsl_read(NULL, spa, bp, pbuf, |
b128c09f BB |
339 | arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ, |
340 | ZIO_FLAG_CANFAIL, &aflags, zb) != 0) | |
341 | return (EIO); | |
34dc7c2f | 342 | |
b128c09f | 343 | blk = abuf->b_data; |
34dc7c2f | 344 | for (i = 0; i < blksz >> DNODE_SHIFT; i++) { |
b128c09f BB |
345 | uint64_t dnobj = (zb->zb_blkid << |
346 | (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i; | |
34dc7c2f BB |
347 | err = dump_dnode(ba, dnobj, blk+i); |
348 | if (err) | |
349 | break; | |
350 | } | |
b128c09f | 351 | (void) arc_buf_remove_ref(abuf, &abuf); |
428870ff | 352 | } else if (type == DMU_OT_SA) { |
b128c09f BB |
353 | uint32_t aflags = ARC_WAIT; |
354 | arc_buf_t *abuf; | |
34dc7c2f | 355 | int blksz = BP_GET_LSIZE(bp); |
b128c09f BB |
356 | |
357 | if (arc_read_nolock(NULL, spa, bp, | |
358 | arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ, | |
359 | ZIO_FLAG_CANFAIL, &aflags, zb) != 0) | |
360 | return (EIO); | |
361 | ||
428870ff BB |
362 | err = dump_spill(ba, zb->zb_object, blksz, abuf->b_data); |
363 | (void) arc_buf_remove_ref(abuf, &abuf); | |
364 | } else { /* it's a level-0 block of a regular object */ | |
365 | uint32_t aflags = ARC_WAIT; | |
366 | arc_buf_t *abuf; | |
367 | int blksz = BP_GET_LSIZE(bp); | |
368 | ||
369 | if (dsl_read(NULL, spa, bp, pbuf, | |
370 | arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ, | |
371 | ZIO_FLAG_CANFAIL, &aflags, zb) != 0) | |
372 | return (EIO); | |
373 | ||
b128c09f | 374 | err = dump_data(ba, type, zb->zb_object, zb->zb_blkid * blksz, |
428870ff | 375 | blksz, bp, abuf->b_data); |
b128c09f | 376 | (void) arc_buf_remove_ref(abuf, &abuf); |
34dc7c2f BB |
377 | } |
378 | ||
379 | ASSERT(err == 0 || err == EINTR); | |
380 | return (err); | |
381 | } | |
382 | ||
383 | int | |
384 | dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, | |
385 | vnode_t *vp, offset_t *off) | |
386 | { | |
428870ff BB |
387 | dsl_dataset_t *ds = tosnap->os_dsl_dataset; |
388 | dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL; | |
34dc7c2f BB |
389 | dmu_replay_record_t *drr; |
390 | struct backuparg ba; | |
391 | int err; | |
392 | uint64_t fromtxg = 0; | |
393 | ||
394 | /* tosnap must be a snapshot */ | |
395 | if (ds->ds_phys->ds_next_snap_obj == 0) | |
396 | return (EINVAL); | |
397 | ||
398 | /* fromsnap must be an earlier snapshot from the same fs as tosnap */ | |
399 | if (fromds && (ds->ds_dir != fromds->ds_dir || | |
400 | fromds->ds_phys->ds_creation_txg >= ds->ds_phys->ds_creation_txg)) | |
401 | return (EXDEV); | |
402 | ||
403 | if (fromorigin) { | |
b128c09f BB |
404 | dsl_pool_t *dp = ds->ds_dir->dd_pool; |
405 | ||
34dc7c2f BB |
406 | if (fromsnap) |
407 | return (EINVAL); | |
408 | ||
b128c09f | 409 | if (dsl_dir_is_clone(ds->ds_dir)) { |
34dc7c2f | 410 | rw_enter(&dp->dp_config_rwlock, RW_READER); |
b128c09f BB |
411 | err = dsl_dataset_hold_obj(dp, |
412 | ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &fromds); | |
34dc7c2f BB |
413 | rw_exit(&dp->dp_config_rwlock); |
414 | if (err) | |
415 | return (err); | |
416 | } else { | |
417 | fromorigin = B_FALSE; | |
418 | } | |
419 | } | |
420 | ||
421 | ||
422 | drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); | |
423 | drr->drr_type = DRR_BEGIN; | |
424 | drr->drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC; | |
428870ff BB |
425 | DMU_SET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo, |
426 | DMU_SUBSTREAM); | |
427 | ||
428 | #ifdef _KERNEL | |
429 | if (dmu_objset_type(tosnap) == DMU_OST_ZFS) { | |
430 | uint64_t version; | |
431 | if (zfs_get_zplprop(tosnap, ZFS_PROP_VERSION, &version) != 0) | |
432 | return (EINVAL); | |
433 | if (version == ZPL_VERSION_SA) { | |
434 | DMU_SET_FEATUREFLAGS( | |
435 | drr->drr_u.drr_begin.drr_versioninfo, | |
436 | DMU_BACKUP_FEATURE_SA_SPILL); | |
437 | } | |
438 | } | |
439 | #endif | |
440 | ||
34dc7c2f BB |
441 | drr->drr_u.drr_begin.drr_creation_time = |
442 | ds->ds_phys->ds_creation_time; | |
428870ff | 443 | drr->drr_u.drr_begin.drr_type = tosnap->os_phys->os_type; |
34dc7c2f BB |
444 | if (fromorigin) |
445 | drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CLONE; | |
446 | drr->drr_u.drr_begin.drr_toguid = ds->ds_phys->ds_guid; | |
447 | if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) | |
448 | drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CI_DATA; | |
449 | ||
450 | if (fromds) | |
451 | drr->drr_u.drr_begin.drr_fromguid = fromds->ds_phys->ds_guid; | |
452 | dsl_dataset_name(ds, drr->drr_u.drr_begin.drr_toname); | |
453 | ||
454 | if (fromds) | |
455 | fromtxg = fromds->ds_phys->ds_creation_txg; | |
456 | if (fromorigin) | |
b128c09f | 457 | dsl_dataset_rele(fromds, FTAG); |
34dc7c2f BB |
458 | |
459 | ba.drr = drr; | |
460 | ba.vp = vp; | |
461 | ba.os = tosnap; | |
462 | ba.off = off; | |
428870ff | 463 | ba.toguid = ds->ds_phys->ds_guid; |
34dc7c2f | 464 | ZIO_SET_CHECKSUM(&ba.zc, 0, 0, 0, 0); |
428870ff | 465 | ba.pending_op = PENDING_NONE; |
34dc7c2f | 466 | |
428870ff | 467 | if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t)) != 0) { |
34dc7c2f BB |
468 | kmem_free(drr, sizeof (dmu_replay_record_t)); |
469 | return (ba.err); | |
470 | } | |
471 | ||
b128c09f | 472 | err = traverse_dataset(ds, fromtxg, TRAVERSE_PRE | TRAVERSE_PREFETCH, |
34dc7c2f BB |
473 | backup_cb, &ba); |
474 | ||
428870ff BB |
475 | if (ba.pending_op != PENDING_NONE) |
476 | if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t)) != 0) | |
477 | err = EINTR; | |
478 | ||
34dc7c2f BB |
479 | if (err) { |
480 | if (err == EINTR && ba.err) | |
481 | err = ba.err; | |
482 | kmem_free(drr, sizeof (dmu_replay_record_t)); | |
483 | return (err); | |
484 | } | |
485 | ||
486 | bzero(drr, sizeof (dmu_replay_record_t)); | |
487 | drr->drr_type = DRR_END; | |
488 | drr->drr_u.drr_end.drr_checksum = ba.zc; | |
428870ff | 489 | drr->drr_u.drr_end.drr_toguid = ba.toguid; |
34dc7c2f | 490 | |
428870ff | 491 | if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t)) != 0) { |
34dc7c2f BB |
492 | kmem_free(drr, sizeof (dmu_replay_record_t)); |
493 | return (ba.err); | |
494 | } | |
495 | ||
496 | kmem_free(drr, sizeof (dmu_replay_record_t)); | |
497 | ||
498 | return (0); | |
499 | } | |
500 | ||
501 | struct recvbeginsyncarg { | |
502 | const char *tofs; | |
503 | const char *tosnap; | |
504 | dsl_dataset_t *origin; | |
505 | uint64_t fromguid; | |
506 | dmu_objset_type_t type; | |
507 | void *tag; | |
508 | boolean_t force; | |
509 | uint64_t dsflags; | |
510 | char clonelastname[MAXNAMELEN]; | |
511 | dsl_dataset_t *ds; /* the ds to recv into; returned from the syncfunc */ | |
428870ff | 512 | cred_t *cr; |
34dc7c2f BB |
513 | }; |
514 | ||
34dc7c2f BB |
515 | /* ARGSUSED */ |
516 | static int | |
428870ff | 517 | recv_new_check(void *arg1, void *arg2, dmu_tx_t *tx) |
34dc7c2f BB |
518 | { |
519 | dsl_dir_t *dd = arg1; | |
520 | struct recvbeginsyncarg *rbsa = arg2; | |
521 | objset_t *mos = dd->dd_pool->dp_meta_objset; | |
522 | uint64_t val; | |
523 | int err; | |
524 | ||
525 | err = zap_lookup(mos, dd->dd_phys->dd_child_dir_zapobj, | |
526 | strrchr(rbsa->tofs, '/') + 1, sizeof (uint64_t), 1, &val); | |
527 | ||
528 | if (err != ENOENT) | |
529 | return (err ? err : EEXIST); | |
530 | ||
531 | if (rbsa->origin) { | |
532 | /* make sure it's a snap in the same pool */ | |
533 | if (rbsa->origin->ds_dir->dd_pool != dd->dd_pool) | |
534 | return (EXDEV); | |
428870ff | 535 | if (!dsl_dataset_is_snapshot(rbsa->origin)) |
34dc7c2f BB |
536 | return (EINVAL); |
537 | if (rbsa->origin->ds_phys->ds_guid != rbsa->fromguid) | |
538 | return (ENODEV); | |
539 | } | |
540 | ||
541 | return (0); | |
542 | } | |
543 | ||
544 | static void | |
428870ff | 545 | recv_new_sync(void *arg1, void *arg2, dmu_tx_t *tx) |
34dc7c2f BB |
546 | { |
547 | dsl_dir_t *dd = arg1; | |
548 | struct recvbeginsyncarg *rbsa = arg2; | |
b128c09f | 549 | uint64_t flags = DS_FLAG_INCONSISTENT | rbsa->dsflags; |
34dc7c2f | 550 | uint64_t dsobj; |
34dc7c2f | 551 | |
428870ff | 552 | /* Create and open new dataset. */ |
34dc7c2f | 553 | dsobj = dsl_dataset_create_sync(dd, strrchr(rbsa->tofs, '/') + 1, |
428870ff BB |
554 | rbsa->origin, flags, rbsa->cr, tx); |
555 | VERIFY(0 == dsl_dataset_own_obj(dd->dd_pool, dsobj, | |
556 | B_TRUE, dmu_recv_tag, &rbsa->ds)); | |
34dc7c2f | 557 | |
428870ff BB |
558 | if (rbsa->origin == NULL) { |
559 | (void) dmu_objset_create_impl(dd->dd_pool->dp_spa, | |
560 | rbsa->ds, &rbsa->ds->ds_phys->ds_bp, rbsa->type, tx); | |
34dc7c2f BB |
561 | } |
562 | ||
428870ff BB |
563 | spa_history_log_internal(LOG_DS_REPLAY_FULL_SYNC, |
564 | dd->dd_pool->dp_spa, tx, "dataset = %lld", dsobj); | |
34dc7c2f BB |
565 | } |
566 | ||
567 | /* ARGSUSED */ | |
568 | static int | |
428870ff | 569 | recv_existing_check(void *arg1, void *arg2, dmu_tx_t *tx) |
34dc7c2f BB |
570 | { |
571 | dsl_dataset_t *ds = arg1; | |
572 | struct recvbeginsyncarg *rbsa = arg2; | |
573 | int err; | |
574 | uint64_t val; | |
575 | ||
576 | /* must not have any changes since most recent snapshot */ | |
577 | if (!rbsa->force && dsl_dataset_modified_since_lastsnap(ds)) | |
578 | return (ETXTBSY); | |
579 | ||
572e2857 BB |
580 | /* new snapshot name must not exist */ |
581 | err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset, | |
582 | ds->ds_phys->ds_snapnames_zapobj, rbsa->tosnap, 8, 1, &val); | |
583 | if (err == 0) | |
584 | return (EEXIST); | |
585 | if (err != ENOENT) | |
586 | return (err); | |
587 | ||
428870ff BB |
588 | if (rbsa->fromguid) { |
589 | /* if incremental, most recent snapshot must match fromguid */ | |
590 | if (ds->ds_prev == NULL) | |
591 | return (ENODEV); | |
34dc7c2f | 592 | |
428870ff BB |
593 | /* |
594 | * most recent snapshot must match fromguid, or there are no | |
595 | * changes since the fromguid one | |
596 | */ | |
597 | if (ds->ds_prev->ds_phys->ds_guid != rbsa->fromguid) { | |
598 | uint64_t birth = ds->ds_prev->ds_phys->ds_bp.blk_birth; | |
599 | uint64_t obj = ds->ds_prev->ds_phys->ds_prev_snap_obj; | |
600 | while (obj != 0) { | |
601 | dsl_dataset_t *snap; | |
602 | err = dsl_dataset_hold_obj(ds->ds_dir->dd_pool, | |
603 | obj, FTAG, &snap); | |
604 | if (err) | |
605 | return (ENODEV); | |
606 | if (snap->ds_phys->ds_creation_txg < birth) { | |
607 | dsl_dataset_rele(snap, FTAG); | |
608 | return (ENODEV); | |
609 | } | |
610 | if (snap->ds_phys->ds_guid == rbsa->fromguid) { | |
611 | dsl_dataset_rele(snap, FTAG); | |
612 | break; /* it's ok */ | |
613 | } | |
614 | obj = snap->ds_phys->ds_prev_snap_obj; | |
615 | dsl_dataset_rele(snap, FTAG); | |
616 | } | |
617 | if (obj == 0) | |
618 | return (ENODEV); | |
619 | } | |
620 | } else { | |
621 | /* if full, most recent snapshot must be $ORIGIN */ | |
622 | if (ds->ds_phys->ds_prev_snap_txg >= TXG_INITIAL) | |
623 | return (ENODEV); | |
624 | } | |
34dc7c2f BB |
625 | |
626 | /* temporary clone name must not exist */ | |
627 | err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset, | |
628 | ds->ds_dir->dd_phys->dd_child_dir_zapobj, | |
629 | rbsa->clonelastname, 8, 1, &val); | |
630 | if (err == 0) | |
631 | return (EEXIST); | |
632 | if (err != ENOENT) | |
633 | return (err); | |
634 | ||
34dc7c2f BB |
635 | return (0); |
636 | } | |
637 | ||
638 | /* ARGSUSED */ | |
639 | static void | |
428870ff | 640 | recv_existing_sync(void *arg1, void *arg2, dmu_tx_t *tx) |
34dc7c2f BB |
641 | { |
642 | dsl_dataset_t *ohds = arg1; | |
643 | struct recvbeginsyncarg *rbsa = arg2; | |
644 | dsl_pool_t *dp = ohds->ds_dir->dd_pool; | |
428870ff | 645 | dsl_dataset_t *cds; |
b128c09f | 646 | uint64_t flags = DS_FLAG_INCONSISTENT | rbsa->dsflags; |
34dc7c2f | 647 | uint64_t dsobj; |
34dc7c2f | 648 | |
428870ff BB |
649 | /* create and open the temporary clone */ |
650 | dsobj = dsl_dataset_create_sync(ohds->ds_dir, rbsa->clonelastname, | |
651 | ohds->ds_prev, flags, rbsa->cr, tx); | |
652 | VERIFY(0 == dsl_dataset_own_obj(dp, dsobj, B_TRUE, dmu_recv_tag, &cds)); | |
34dc7c2f | 653 | |
428870ff BB |
654 | /* |
655 | * If we actually created a non-clone, we need to create the | |
656 | * objset in our new dataset. | |
657 | */ | |
658 | if (BP_IS_HOLE(dsl_dataset_get_blkptr(cds))) { | |
659 | (void) dmu_objset_create_impl(dp->dp_spa, | |
660 | cds, dsl_dataset_get_blkptr(cds), rbsa->type, tx); | |
661 | } | |
34dc7c2f BB |
662 | |
663 | rbsa->ds = cds; | |
664 | ||
428870ff BB |
665 | spa_history_log_internal(LOG_DS_REPLAY_INC_SYNC, |
666 | dp->dp_spa, tx, "dataset = %lld", dsobj); | |
667 | } | |
668 | ||
428870ff BB |
669 | static boolean_t |
670 | dmu_recv_verify_features(dsl_dataset_t *ds, struct drr_begin *drrb) | |
671 | { | |
672 | int featureflags; | |
673 | ||
674 | featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo); | |
675 | ||
676 | /* Verify pool version supports SA if SA_SPILL feature set */ | |
677 | return ((featureflags & DMU_BACKUP_FEATURE_SA_SPILL) && | |
678 | (spa_version(dsl_dataset_get_spa(ds)) < SPA_VERSION_SA)); | |
34dc7c2f BB |
679 | } |
680 | ||
34dc7c2f BB |
681 | /* |
682 | * NB: callers *MUST* call dmu_recv_stream() if dmu_recv_begin() | |
683 | * succeeds; otherwise we will leak the holds on the datasets. | |
684 | */ | |
685 | int | |
428870ff | 686 | dmu_recv_begin(char *tofs, char *tosnap, char *top_ds, struct drr_begin *drrb, |
45d1cae3 | 687 | boolean_t force, objset_t *origin, dmu_recv_cookie_t *drc) |
34dc7c2f BB |
688 | { |
689 | int err = 0; | |
690 | boolean_t byteswap; | |
428870ff BB |
691 | struct recvbeginsyncarg rbsa = { 0 }; |
692 | uint64_t versioninfo; | |
34dc7c2f BB |
693 | int flags; |
694 | dsl_dataset_t *ds; | |
695 | ||
696 | if (drrb->drr_magic == DMU_BACKUP_MAGIC) | |
697 | byteswap = FALSE; | |
698 | else if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) | |
699 | byteswap = TRUE; | |
700 | else | |
701 | return (EINVAL); | |
702 | ||
703 | rbsa.tofs = tofs; | |
704 | rbsa.tosnap = tosnap; | |
428870ff | 705 | rbsa.origin = origin ? origin->os_dsl_dataset : NULL; |
34dc7c2f BB |
706 | rbsa.fromguid = drrb->drr_fromguid; |
707 | rbsa.type = drrb->drr_type; | |
708 | rbsa.tag = FTAG; | |
709 | rbsa.dsflags = 0; | |
428870ff BB |
710 | rbsa.cr = CRED(); |
711 | versioninfo = drrb->drr_versioninfo; | |
34dc7c2f BB |
712 | flags = drrb->drr_flags; |
713 | ||
714 | if (byteswap) { | |
715 | rbsa.type = BSWAP_32(rbsa.type); | |
716 | rbsa.fromguid = BSWAP_64(rbsa.fromguid); | |
428870ff | 717 | versioninfo = BSWAP_64(versioninfo); |
34dc7c2f BB |
718 | flags = BSWAP_32(flags); |
719 | } | |
720 | ||
428870ff | 721 | if (DMU_GET_STREAM_HDRTYPE(versioninfo) == DMU_COMPOUNDSTREAM || |
34dc7c2f BB |
722 | rbsa.type >= DMU_OST_NUMTYPES || |
723 | ((flags & DRR_FLAG_CLONE) && origin == NULL)) | |
724 | return (EINVAL); | |
725 | ||
726 | if (flags & DRR_FLAG_CI_DATA) | |
727 | rbsa.dsflags = DS_FLAG_CI_DATASET; | |
728 | ||
729 | bzero(drc, sizeof (dmu_recv_cookie_t)); | |
730 | drc->drc_drrb = drrb; | |
731 | drc->drc_tosnap = tosnap; | |
428870ff | 732 | drc->drc_top_ds = top_ds; |
34dc7c2f BB |
733 | drc->drc_force = force; |
734 | ||
735 | /* | |
736 | * Process the begin in syncing context. | |
737 | */ | |
34dc7c2f | 738 | |
428870ff BB |
739 | /* open the dataset we are logically receiving into */ |
740 | err = dsl_dataset_hold(tofs, dmu_recv_tag, &ds); | |
741 | if (err == 0) { | |
742 | if (dmu_recv_verify_features(ds, drrb)) { | |
743 | dsl_dataset_rele(ds, dmu_recv_tag); | |
744 | return (ENOTSUP); | |
745 | } | |
746 | /* target fs already exists; recv into temp clone */ | |
34dc7c2f | 747 | |
428870ff BB |
748 | /* Can't recv a clone into an existing fs */ |
749 | if (flags & DRR_FLAG_CLONE) { | |
750 | dsl_dataset_rele(ds, dmu_recv_tag); | |
751 | return (EINVAL); | |
752 | } | |
34dc7c2f | 753 | |
45d1cae3 BB |
754 | /* must not have an incremental recv already in progress */ |
755 | if (!mutex_tryenter(&ds->ds_recvlock)) { | |
756 | dsl_dataset_rele(ds, dmu_recv_tag); | |
757 | return (EBUSY); | |
758 | } | |
759 | ||
428870ff BB |
760 | /* tmp clone name is: tofs/%tosnap" */ |
761 | (void) snprintf(rbsa.clonelastname, sizeof (rbsa.clonelastname), | |
762 | "%%%s", tosnap); | |
34dc7c2f BB |
763 | rbsa.force = force; |
764 | err = dsl_sync_task_do(ds->ds_dir->dd_pool, | |
428870ff | 765 | recv_existing_check, recv_existing_sync, ds, &rbsa, 5); |
34dc7c2f | 766 | if (err) { |
45d1cae3 | 767 | mutex_exit(&ds->ds_recvlock); |
b128c09f | 768 | dsl_dataset_rele(ds, dmu_recv_tag); |
34dc7c2f BB |
769 | return (err); |
770 | } | |
771 | drc->drc_logical_ds = ds; | |
772 | drc->drc_real_ds = rbsa.ds; | |
428870ff BB |
773 | } else if (err == ENOENT) { |
774 | /* target fs does not exist; must be a full backup or clone */ | |
775 | char *cp; | |
34dc7c2f | 776 | |
428870ff BB |
777 | /* |
778 | * If it's a non-clone incremental, we are missing the | |
779 | * target fs, so fail the recv. | |
780 | */ | |
781 | if (rbsa.fromguid && !(flags & DRR_FLAG_CLONE)) | |
782 | return (ENOENT); | |
783 | ||
784 | /* Open the parent of tofs */ | |
785 | cp = strrchr(tofs, '/'); | |
786 | *cp = '\0'; | |
787 | err = dsl_dataset_hold(tofs, FTAG, &ds); | |
788 | *cp = '/'; | |
34dc7c2f BB |
789 | if (err) |
790 | return (err); | |
34dc7c2f | 791 | |
428870ff | 792 | if (dmu_recv_verify_features(ds, drrb)) { |
572e2857 | 793 | dsl_dataset_rele(ds, FTAG); |
428870ff | 794 | return (ENOTSUP); |
34dc7c2f | 795 | } |
428870ff BB |
796 | |
797 | err = dsl_sync_task_do(ds->ds_dir->dd_pool, | |
798 | recv_new_check, recv_new_sync, ds->ds_dir, &rbsa, 5); | |
799 | dsl_dataset_rele(ds, FTAG); | |
34dc7c2f BB |
800 | if (err) |
801 | return (err); | |
802 | drc->drc_logical_ds = drc->drc_real_ds = rbsa.ds; | |
803 | drc->drc_newfs = B_TRUE; | |
804 | } | |
805 | ||
428870ff | 806 | return (err); |
34dc7c2f BB |
807 | } |
808 | ||
809 | struct restorearg { | |
810 | int err; | |
811 | int byteswap; | |
812 | vnode_t *vp; | |
813 | char *buf; | |
814 | uint64_t voff; | |
815 | int bufsize; /* amount of memory allocated for buf */ | |
816 | zio_cksum_t cksum; | |
572e2857 | 817 | avl_tree_t *guid_to_ds_map; |
34dc7c2f BB |
818 | }; |
819 | ||
428870ff BB |
820 | typedef struct guid_map_entry { |
821 | uint64_t guid; | |
822 | dsl_dataset_t *gme_ds; | |
823 | avl_node_t avlnode; | |
824 | } guid_map_entry_t; | |
825 | ||
826 | static int | |
827 | guid_compare(const void *arg1, const void *arg2) | |
828 | { | |
829 | const guid_map_entry_t *gmep1 = arg1; | |
830 | const guid_map_entry_t *gmep2 = arg2; | |
831 | ||
832 | if (gmep1->guid < gmep2->guid) | |
833 | return (-1); | |
834 | else if (gmep1->guid > gmep2->guid) | |
835 | return (1); | |
836 | return (0); | |
837 | } | |
838 | ||
572e2857 BB |
839 | static void |
840 | free_guid_map_onexit(void *arg) | |
841 | { | |
842 | avl_tree_t *ca = arg; | |
843 | void *cookie = NULL; | |
844 | guid_map_entry_t *gmep; | |
845 | ||
846 | while ((gmep = avl_destroy_nodes(ca, &cookie)) != NULL) { | |
847 | dsl_dataset_rele(gmep->gme_ds, ca); | |
848 | kmem_free(gmep, sizeof (guid_map_entry_t)); | |
849 | } | |
850 | avl_destroy(ca); | |
851 | kmem_free(ca, sizeof (avl_tree_t)); | |
852 | } | |
853 | ||
34dc7c2f BB |
854 | static void * |
855 | restore_read(struct restorearg *ra, int len) | |
856 | { | |
857 | void *rv; | |
858 | int done = 0; | |
859 | ||
860 | /* some things will require 8-byte alignment, so everything must */ | |
861 | ASSERT3U(len % 8, ==, 0); | |
862 | ||
863 | while (done < len) { | |
864 | ssize_t resid; | |
865 | ||
866 | ra->err = vn_rdwr(UIO_READ, ra->vp, | |
867 | (caddr_t)ra->buf + done, len - done, | |
868 | ra->voff, UIO_SYSSPACE, FAPPEND, | |
869 | RLIM64_INFINITY, CRED(), &resid); | |
870 | ||
871 | if (resid == len - done) | |
872 | ra->err = EINVAL; | |
873 | ra->voff += len - done - resid; | |
874 | done = len - resid; | |
875 | if (ra->err) | |
876 | return (NULL); | |
877 | } | |
878 | ||
879 | ASSERT3U(done, ==, len); | |
880 | rv = ra->buf; | |
881 | if (ra->byteswap) | |
882 | fletcher_4_incremental_byteswap(rv, len, &ra->cksum); | |
883 | else | |
884 | fletcher_4_incremental_native(rv, len, &ra->cksum); | |
885 | return (rv); | |
886 | } | |
887 | ||
60948de1 | 888 | noinline static void |
34dc7c2f BB |
889 | backup_byteswap(dmu_replay_record_t *drr) |
890 | { | |
891 | #define DO64(X) (drr->drr_u.X = BSWAP_64(drr->drr_u.X)) | |
892 | #define DO32(X) (drr->drr_u.X = BSWAP_32(drr->drr_u.X)) | |
893 | drr->drr_type = BSWAP_32(drr->drr_type); | |
894 | drr->drr_payloadlen = BSWAP_32(drr->drr_payloadlen); | |
895 | switch (drr->drr_type) { | |
896 | case DRR_BEGIN: | |
897 | DO64(drr_begin.drr_magic); | |
428870ff | 898 | DO64(drr_begin.drr_versioninfo); |
34dc7c2f BB |
899 | DO64(drr_begin.drr_creation_time); |
900 | DO32(drr_begin.drr_type); | |
901 | DO32(drr_begin.drr_flags); | |
902 | DO64(drr_begin.drr_toguid); | |
903 | DO64(drr_begin.drr_fromguid); | |
904 | break; | |
905 | case DRR_OBJECT: | |
906 | DO64(drr_object.drr_object); | |
907 | /* DO64(drr_object.drr_allocation_txg); */ | |
908 | DO32(drr_object.drr_type); | |
909 | DO32(drr_object.drr_bonustype); | |
910 | DO32(drr_object.drr_blksz); | |
911 | DO32(drr_object.drr_bonuslen); | |
428870ff | 912 | DO64(drr_object.drr_toguid); |
34dc7c2f BB |
913 | break; |
914 | case DRR_FREEOBJECTS: | |
915 | DO64(drr_freeobjects.drr_firstobj); | |
916 | DO64(drr_freeobjects.drr_numobjs); | |
428870ff | 917 | DO64(drr_freeobjects.drr_toguid); |
34dc7c2f BB |
918 | break; |
919 | case DRR_WRITE: | |
920 | DO64(drr_write.drr_object); | |
921 | DO32(drr_write.drr_type); | |
922 | DO64(drr_write.drr_offset); | |
923 | DO64(drr_write.drr_length); | |
428870ff BB |
924 | DO64(drr_write.drr_toguid); |
925 | DO64(drr_write.drr_key.ddk_cksum.zc_word[0]); | |
926 | DO64(drr_write.drr_key.ddk_cksum.zc_word[1]); | |
927 | DO64(drr_write.drr_key.ddk_cksum.zc_word[2]); | |
928 | DO64(drr_write.drr_key.ddk_cksum.zc_word[3]); | |
929 | DO64(drr_write.drr_key.ddk_prop); | |
930 | break; | |
931 | case DRR_WRITE_BYREF: | |
932 | DO64(drr_write_byref.drr_object); | |
933 | DO64(drr_write_byref.drr_offset); | |
934 | DO64(drr_write_byref.drr_length); | |
935 | DO64(drr_write_byref.drr_toguid); | |
936 | DO64(drr_write_byref.drr_refguid); | |
937 | DO64(drr_write_byref.drr_refobject); | |
938 | DO64(drr_write_byref.drr_refoffset); | |
939 | DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[0]); | |
940 | DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[1]); | |
941 | DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[2]); | |
942 | DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[3]); | |
943 | DO64(drr_write_byref.drr_key.ddk_prop); | |
34dc7c2f BB |
944 | break; |
945 | case DRR_FREE: | |
946 | DO64(drr_free.drr_object); | |
947 | DO64(drr_free.drr_offset); | |
948 | DO64(drr_free.drr_length); | |
428870ff BB |
949 | DO64(drr_free.drr_toguid); |
950 | break; | |
951 | case DRR_SPILL: | |
952 | DO64(drr_spill.drr_object); | |
953 | DO64(drr_spill.drr_length); | |
954 | DO64(drr_spill.drr_toguid); | |
34dc7c2f BB |
955 | break; |
956 | case DRR_END: | |
957 | DO64(drr_end.drr_checksum.zc_word[0]); | |
958 | DO64(drr_end.drr_checksum.zc_word[1]); | |
959 | DO64(drr_end.drr_checksum.zc_word[2]); | |
960 | DO64(drr_end.drr_checksum.zc_word[3]); | |
428870ff | 961 | DO64(drr_end.drr_toguid); |
34dc7c2f | 962 | break; |
e75c13c3 BB |
963 | default: |
964 | break; | |
34dc7c2f BB |
965 | } |
966 | #undef DO64 | |
967 | #undef DO32 | |
968 | } | |
969 | ||
60948de1 | 970 | noinline static int |
34dc7c2f BB |
971 | restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro) |
972 | { | |
973 | int err; | |
974 | dmu_tx_t *tx; | |
b128c09f | 975 | void *data = NULL; |
34dc7c2f | 976 | |
34dc7c2f BB |
977 | if (drro->drr_type == DMU_OT_NONE || |
978 | drro->drr_type >= DMU_OT_NUMTYPES || | |
979 | drro->drr_bonustype >= DMU_OT_NUMTYPES || | |
428870ff | 980 | drro->drr_checksumtype >= ZIO_CHECKSUM_FUNCTIONS || |
34dc7c2f BB |
981 | drro->drr_compress >= ZIO_COMPRESS_FUNCTIONS || |
982 | P2PHASE(drro->drr_blksz, SPA_MINBLOCKSIZE) || | |
983 | drro->drr_blksz < SPA_MINBLOCKSIZE || | |
984 | drro->drr_blksz > SPA_MAXBLOCKSIZE || | |
985 | drro->drr_bonuslen > DN_MAX_BONUSLEN) { | |
986 | return (EINVAL); | |
987 | } | |
988 | ||
9babb374 BB |
989 | err = dmu_object_info(os, drro->drr_object, NULL); |
990 | ||
991 | if (err != 0 && err != ENOENT) | |
992 | return (EINVAL); | |
993 | ||
b128c09f BB |
994 | if (drro->drr_bonuslen) { |
995 | data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8)); | |
996 | if (ra->err) | |
997 | return (ra->err); | |
998 | } | |
999 | ||
34dc7c2f BB |
1000 | if (err == ENOENT) { |
1001 | /* currently free, want to be allocated */ | |
9babb374 | 1002 | tx = dmu_tx_create(os); |
34dc7c2f | 1003 | dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); |
34dc7c2f BB |
1004 | err = dmu_tx_assign(tx, TXG_WAIT); |
1005 | if (err) { | |
1006 | dmu_tx_abort(tx); | |
1007 | return (err); | |
1008 | } | |
1009 | err = dmu_object_claim(os, drro->drr_object, | |
1010 | drro->drr_type, drro->drr_blksz, | |
1011 | drro->drr_bonustype, drro->drr_bonuslen, tx); | |
9babb374 | 1012 | dmu_tx_commit(tx); |
34dc7c2f BB |
1013 | } else { |
1014 | /* currently allocated, want to be allocated */ | |
34dc7c2f BB |
1015 | err = dmu_object_reclaim(os, drro->drr_object, |
1016 | drro->drr_type, drro->drr_blksz, | |
9babb374 | 1017 | drro->drr_bonustype, drro->drr_bonuslen); |
34dc7c2f | 1018 | } |
428870ff | 1019 | if (err) { |
34dc7c2f | 1020 | return (EINVAL); |
428870ff | 1021 | } |
9babb374 BB |
1022 | |
1023 | tx = dmu_tx_create(os); | |
1024 | dmu_tx_hold_bonus(tx, drro->drr_object); | |
1025 | err = dmu_tx_assign(tx, TXG_WAIT); | |
1026 | if (err) { | |
1027 | dmu_tx_abort(tx); | |
1028 | return (err); | |
34dc7c2f BB |
1029 | } |
1030 | ||
428870ff BB |
1031 | dmu_object_set_checksum(os, drro->drr_object, drro->drr_checksumtype, |
1032 | tx); | |
34dc7c2f BB |
1033 | dmu_object_set_compress(os, drro->drr_object, drro->drr_compress, tx); |
1034 | ||
b128c09f | 1035 | if (data != NULL) { |
34dc7c2f | 1036 | dmu_buf_t *db; |
b128c09f | 1037 | |
34dc7c2f BB |
1038 | VERIFY(0 == dmu_bonus_hold(os, drro->drr_object, FTAG, &db)); |
1039 | dmu_buf_will_dirty(db, tx); | |
1040 | ||
1041 | ASSERT3U(db->db_size, >=, drro->drr_bonuslen); | |
34dc7c2f BB |
1042 | bcopy(data, db->db_data, drro->drr_bonuslen); |
1043 | if (ra->byteswap) { | |
1044 | dmu_ot[drro->drr_bonustype].ot_byteswap(db->db_data, | |
1045 | drro->drr_bonuslen); | |
1046 | } | |
1047 | dmu_buf_rele(db, FTAG); | |
1048 | } | |
1049 | dmu_tx_commit(tx); | |
1050 | return (0); | |
1051 | } | |
1052 | ||
1053 | /* ARGSUSED */ | |
60948de1 | 1054 | noinline static int |
34dc7c2f BB |
1055 | restore_freeobjects(struct restorearg *ra, objset_t *os, |
1056 | struct drr_freeobjects *drrfo) | |
1057 | { | |
1058 | uint64_t obj; | |
1059 | ||
1060 | if (drrfo->drr_firstobj + drrfo->drr_numobjs < drrfo->drr_firstobj) | |
1061 | return (EINVAL); | |
1062 | ||
1063 | for (obj = drrfo->drr_firstobj; | |
1064 | obj < drrfo->drr_firstobj + drrfo->drr_numobjs; | |
1065 | (void) dmu_object_next(os, &obj, FALSE, 0)) { | |
34dc7c2f BB |
1066 | int err; |
1067 | ||
1068 | if (dmu_object_info(os, obj, NULL) != 0) | |
1069 | continue; | |
1070 | ||
b128c09f BB |
1071 | err = dmu_free_object(os, obj); |
1072 | if (err) | |
34dc7c2f | 1073 | return (err); |
34dc7c2f BB |
1074 | } |
1075 | return (0); | |
1076 | } | |
1077 | ||
60948de1 | 1078 | noinline static int |
34dc7c2f BB |
1079 | restore_write(struct restorearg *ra, objset_t *os, |
1080 | struct drr_write *drrw) | |
1081 | { | |
1082 | dmu_tx_t *tx; | |
1083 | void *data; | |
1084 | int err; | |
1085 | ||
1086 | if (drrw->drr_offset + drrw->drr_length < drrw->drr_offset || | |
1087 | drrw->drr_type >= DMU_OT_NUMTYPES) | |
1088 | return (EINVAL); | |
1089 | ||
1090 | data = restore_read(ra, drrw->drr_length); | |
1091 | if (data == NULL) | |
1092 | return (ra->err); | |
1093 | ||
1094 | if (dmu_object_info(os, drrw->drr_object, NULL) != 0) | |
1095 | return (EINVAL); | |
1096 | ||
1097 | tx = dmu_tx_create(os); | |
1098 | ||
1099 | dmu_tx_hold_write(tx, drrw->drr_object, | |
1100 | drrw->drr_offset, drrw->drr_length); | |
1101 | err = dmu_tx_assign(tx, TXG_WAIT); | |
1102 | if (err) { | |
1103 | dmu_tx_abort(tx); | |
1104 | return (err); | |
1105 | } | |
1106 | if (ra->byteswap) | |
1107 | dmu_ot[drrw->drr_type].ot_byteswap(data, drrw->drr_length); | |
1108 | dmu_write(os, drrw->drr_object, | |
1109 | drrw->drr_offset, drrw->drr_length, data, tx); | |
1110 | dmu_tx_commit(tx); | |
1111 | return (0); | |
1112 | } | |
1113 | ||
428870ff BB |
1114 | /* |
1115 | * Handle a DRR_WRITE_BYREF record. This record is used in dedup'ed | |
1116 | * streams to refer to a copy of the data that is already on the | |
1117 | * system because it came in earlier in the stream. This function | |
1118 | * finds the earlier copy of the data, and uses that copy instead of | |
1119 | * data from the stream to fulfill this write. | |
1120 | */ | |
1121 | static int | |
1122 | restore_write_byref(struct restorearg *ra, objset_t *os, | |
1123 | struct drr_write_byref *drrwbr) | |
1124 | { | |
1125 | dmu_tx_t *tx; | |
1126 | int err; | |
1127 | guid_map_entry_t gmesrch; | |
1128 | guid_map_entry_t *gmep; | |
1129 | avl_index_t where; | |
1130 | objset_t *ref_os = NULL; | |
1131 | dmu_buf_t *dbp; | |
1132 | ||
1133 | if (drrwbr->drr_offset + drrwbr->drr_length < drrwbr->drr_offset) | |
1134 | return (EINVAL); | |
1135 | ||
1136 | /* | |
1137 | * If the GUID of the referenced dataset is different from the | |
1138 | * GUID of the target dataset, find the referenced dataset. | |
1139 | */ | |
1140 | if (drrwbr->drr_toguid != drrwbr->drr_refguid) { | |
1141 | gmesrch.guid = drrwbr->drr_refguid; | |
572e2857 | 1142 | if ((gmep = avl_find(ra->guid_to_ds_map, &gmesrch, |
428870ff BB |
1143 | &where)) == NULL) { |
1144 | return (EINVAL); | |
1145 | } | |
1146 | if (dmu_objset_from_ds(gmep->gme_ds, &ref_os)) | |
1147 | return (EINVAL); | |
1148 | } else { | |
1149 | ref_os = os; | |
1150 | } | |
1151 | ||
c65aa5b2 BB |
1152 | err = dmu_buf_hold(ref_os, drrwbr->drr_refobject, |
1153 | drrwbr->drr_refoffset, FTAG, &dbp, DMU_READ_PREFETCH); | |
1154 | if (err) | |
428870ff BB |
1155 | return (err); |
1156 | ||
1157 | tx = dmu_tx_create(os); | |
1158 | ||
1159 | dmu_tx_hold_write(tx, drrwbr->drr_object, | |
1160 | drrwbr->drr_offset, drrwbr->drr_length); | |
1161 | err = dmu_tx_assign(tx, TXG_WAIT); | |
1162 | if (err) { | |
1163 | dmu_tx_abort(tx); | |
1164 | return (err); | |
1165 | } | |
1166 | dmu_write(os, drrwbr->drr_object, | |
1167 | drrwbr->drr_offset, drrwbr->drr_length, dbp->db_data, tx); | |
1168 | dmu_buf_rele(dbp, FTAG); | |
1169 | dmu_tx_commit(tx); | |
1170 | return (0); | |
1171 | } | |
1172 | ||
1173 | static int | |
1174 | restore_spill(struct restorearg *ra, objset_t *os, struct drr_spill *drrs) | |
1175 | { | |
1176 | dmu_tx_t *tx; | |
1177 | void *data; | |
1178 | dmu_buf_t *db, *db_spill; | |
1179 | int err; | |
1180 | ||
1181 | if (drrs->drr_length < SPA_MINBLOCKSIZE || | |
1182 | drrs->drr_length > SPA_MAXBLOCKSIZE) | |
1183 | return (EINVAL); | |
1184 | ||
1185 | data = restore_read(ra, drrs->drr_length); | |
1186 | if (data == NULL) | |
1187 | return (ra->err); | |
1188 | ||
1189 | if (dmu_object_info(os, drrs->drr_object, NULL) != 0) | |
1190 | return (EINVAL); | |
1191 | ||
1192 | VERIFY(0 == dmu_bonus_hold(os, drrs->drr_object, FTAG, &db)); | |
1193 | if ((err = dmu_spill_hold_by_bonus(db, FTAG, &db_spill)) != 0) { | |
1194 | dmu_buf_rele(db, FTAG); | |
1195 | return (err); | |
1196 | } | |
1197 | ||
1198 | tx = dmu_tx_create(os); | |
1199 | ||
1200 | dmu_tx_hold_spill(tx, db->db_object); | |
1201 | ||
1202 | err = dmu_tx_assign(tx, TXG_WAIT); | |
1203 | if (err) { | |
1204 | dmu_buf_rele(db, FTAG); | |
1205 | dmu_buf_rele(db_spill, FTAG); | |
1206 | dmu_tx_abort(tx); | |
1207 | return (err); | |
1208 | } | |
1209 | dmu_buf_will_dirty(db_spill, tx); | |
1210 | ||
1211 | if (db_spill->db_size < drrs->drr_length) | |
1212 | VERIFY(0 == dbuf_spill_set_blksz(db_spill, | |
1213 | drrs->drr_length, tx)); | |
1214 | bcopy(data, db_spill->db_data, drrs->drr_length); | |
1215 | ||
1216 | dmu_buf_rele(db, FTAG); | |
1217 | dmu_buf_rele(db_spill, FTAG); | |
1218 | ||
1219 | dmu_tx_commit(tx); | |
1220 | return (0); | |
1221 | } | |
1222 | ||
34dc7c2f | 1223 | /* ARGSUSED */ |
60948de1 | 1224 | noinline static int |
34dc7c2f BB |
1225 | restore_free(struct restorearg *ra, objset_t *os, |
1226 | struct drr_free *drrf) | |
1227 | { | |
34dc7c2f BB |
1228 | int err; |
1229 | ||
1230 | if (drrf->drr_length != -1ULL && | |
1231 | drrf->drr_offset + drrf->drr_length < drrf->drr_offset) | |
1232 | return (EINVAL); | |
1233 | ||
1234 | if (dmu_object_info(os, drrf->drr_object, NULL) != 0) | |
1235 | return (EINVAL); | |
1236 | ||
b128c09f | 1237 | err = dmu_free_long_range(os, drrf->drr_object, |
34dc7c2f | 1238 | drrf->drr_offset, drrf->drr_length); |
34dc7c2f BB |
1239 | return (err); |
1240 | } | |
1241 | ||
34dc7c2f BB |
1242 | /* |
1243 | * NB: callers *must* call dmu_recv_end() if this succeeds. | |
1244 | */ | |
1245 | int | |
572e2857 BB |
1246 | dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, |
1247 | int cleanup_fd, uint64_t *action_handlep) | |
34dc7c2f BB |
1248 | { |
1249 | struct restorearg ra = { 0 }; | |
1250 | dmu_replay_record_t *drr; | |
1251 | objset_t *os; | |
1252 | zio_cksum_t pcksum; | |
428870ff | 1253 | int featureflags; |
34dc7c2f BB |
1254 | |
1255 | if (drc->drc_drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) | |
1256 | ra.byteswap = TRUE; | |
1257 | ||
1258 | { | |
1259 | /* compute checksum of drr_begin record */ | |
1260 | dmu_replay_record_t *drr; | |
1261 | drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); | |
1262 | ||
1263 | drr->drr_type = DRR_BEGIN; | |
1264 | drr->drr_u.drr_begin = *drc->drc_drrb; | |
1265 | if (ra.byteswap) { | |
1266 | fletcher_4_incremental_byteswap(drr, | |
1267 | sizeof (dmu_replay_record_t), &ra.cksum); | |
1268 | } else { | |
1269 | fletcher_4_incremental_native(drr, | |
1270 | sizeof (dmu_replay_record_t), &ra.cksum); | |
1271 | } | |
1272 | kmem_free(drr, sizeof (dmu_replay_record_t)); | |
1273 | } | |
1274 | ||
1275 | if (ra.byteswap) { | |
1276 | struct drr_begin *drrb = drc->drc_drrb; | |
1277 | drrb->drr_magic = BSWAP_64(drrb->drr_magic); | |
428870ff | 1278 | drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo); |
34dc7c2f BB |
1279 | drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time); |
1280 | drrb->drr_type = BSWAP_32(drrb->drr_type); | |
1281 | drrb->drr_toguid = BSWAP_64(drrb->drr_toguid); | |
1282 | drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid); | |
1283 | } | |
1284 | ||
1285 | ra.vp = vp; | |
1286 | ra.voff = *voffp; | |
1287 | ra.bufsize = 1<<20; | |
00b46022 | 1288 | ra.buf = vmem_alloc(ra.bufsize, KM_SLEEP); |
34dc7c2f BB |
1289 | |
1290 | /* these were verified in dmu_recv_begin */ | |
428870ff BB |
1291 | ASSERT(DMU_GET_STREAM_HDRTYPE(drc->drc_drrb->drr_versioninfo) == |
1292 | DMU_SUBSTREAM); | |
34dc7c2f BB |
1293 | ASSERT(drc->drc_drrb->drr_type < DMU_OST_NUMTYPES); |
1294 | ||
1295 | /* | |
1296 | * Open the objset we are modifying. | |
1297 | */ | |
428870ff | 1298 | VERIFY(dmu_objset_from_ds(drc->drc_real_ds, &os) == 0); |
34dc7c2f BB |
1299 | |
1300 | ASSERT(drc->drc_real_ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT); | |
1301 | ||
428870ff BB |
1302 | featureflags = DMU_GET_FEATUREFLAGS(drc->drc_drrb->drr_versioninfo); |
1303 | ||
1304 | /* if this stream is dedup'ed, set up the avl tree for guid mapping */ | |
1305 | if (featureflags & DMU_BACKUP_FEATURE_DEDUP) { | |
572e2857 BB |
1306 | minor_t minor; |
1307 | ||
1308 | if (cleanup_fd == -1) { | |
1309 | ra.err = EBADF; | |
1310 | goto out; | |
1311 | } | |
1312 | ra.err = zfs_onexit_fd_hold(cleanup_fd, &minor); | |
1313 | if (ra.err) { | |
1314 | cleanup_fd = -1; | |
1315 | goto out; | |
1316 | } | |
1317 | ||
1318 | if (*action_handlep == 0) { | |
1319 | ra.guid_to_ds_map = | |
1320 | kmem_alloc(sizeof (avl_tree_t), KM_SLEEP); | |
1321 | avl_create(ra.guid_to_ds_map, guid_compare, | |
1322 | sizeof (guid_map_entry_t), | |
1323 | offsetof(guid_map_entry_t, avlnode)); | |
572e2857 BB |
1324 | ra.err = zfs_onexit_add_cb(minor, |
1325 | free_guid_map_onexit, ra.guid_to_ds_map, | |
1326 | action_handlep); | |
1327 | if (ra.err) | |
1328 | goto out; | |
1329 | } else { | |
1330 | ra.err = zfs_onexit_cb_data(minor, *action_handlep, | |
1331 | (void **)&ra.guid_to_ds_map); | |
1332 | if (ra.err) | |
1333 | goto out; | |
1334 | } | |
8d35c149 AS |
1335 | |
1336 | drc->drc_guid_to_ds_map = ra.guid_to_ds_map; | |
428870ff BB |
1337 | } |
1338 | ||
34dc7c2f BB |
1339 | /* |
1340 | * Read records and process them. | |
1341 | */ | |
1342 | pcksum = ra.cksum; | |
1343 | while (ra.err == 0 && | |
1344 | NULL != (drr = restore_read(&ra, sizeof (*drr)))) { | |
1345 | if (issig(JUSTLOOKING) && issig(FORREAL)) { | |
1346 | ra.err = EINTR; | |
1347 | goto out; | |
1348 | } | |
1349 | ||
1350 | if (ra.byteswap) | |
1351 | backup_byteswap(drr); | |
1352 | ||
1353 | switch (drr->drr_type) { | |
1354 | case DRR_OBJECT: | |
1355 | { | |
1356 | /* | |
1357 | * We need to make a copy of the record header, | |
1358 | * because restore_{object,write} may need to | |
1359 | * restore_read(), which will invalidate drr. | |
1360 | */ | |
1361 | struct drr_object drro = drr->drr_u.drr_object; | |
1362 | ra.err = restore_object(&ra, os, &drro); | |
1363 | break; | |
1364 | } | |
1365 | case DRR_FREEOBJECTS: | |
1366 | { | |
1367 | struct drr_freeobjects drrfo = | |
1368 | drr->drr_u.drr_freeobjects; | |
1369 | ra.err = restore_freeobjects(&ra, os, &drrfo); | |
1370 | break; | |
1371 | } | |
1372 | case DRR_WRITE: | |
1373 | { | |
1374 | struct drr_write drrw = drr->drr_u.drr_write; | |
1375 | ra.err = restore_write(&ra, os, &drrw); | |
1376 | break; | |
1377 | } | |
428870ff BB |
1378 | case DRR_WRITE_BYREF: |
1379 | { | |
1380 | struct drr_write_byref drrwbr = | |
1381 | drr->drr_u.drr_write_byref; | |
1382 | ra.err = restore_write_byref(&ra, os, &drrwbr); | |
1383 | break; | |
1384 | } | |
34dc7c2f BB |
1385 | case DRR_FREE: |
1386 | { | |
1387 | struct drr_free drrf = drr->drr_u.drr_free; | |
1388 | ra.err = restore_free(&ra, os, &drrf); | |
1389 | break; | |
1390 | } | |
1391 | case DRR_END: | |
1392 | { | |
1393 | struct drr_end drre = drr->drr_u.drr_end; | |
1394 | /* | |
1395 | * We compare against the *previous* checksum | |
1396 | * value, because the stored checksum is of | |
1397 | * everything before the DRR_END record. | |
1398 | */ | |
1399 | if (!ZIO_CHECKSUM_EQUAL(drre.drr_checksum, pcksum)) | |
1400 | ra.err = ECKSUM; | |
1401 | goto out; | |
1402 | } | |
428870ff BB |
1403 | case DRR_SPILL: |
1404 | { | |
1405 | struct drr_spill drrs = drr->drr_u.drr_spill; | |
1406 | ra.err = restore_spill(&ra, os, &drrs); | |
1407 | break; | |
1408 | } | |
34dc7c2f BB |
1409 | default: |
1410 | ra.err = EINVAL; | |
1411 | goto out; | |
1412 | } | |
1413 | pcksum = ra.cksum; | |
1414 | } | |
1415 | ASSERT(ra.err != 0); | |
1416 | ||
1417 | out: | |
572e2857 BB |
1418 | if ((featureflags & DMU_BACKUP_FEATURE_DEDUP) && (cleanup_fd != -1)) |
1419 | zfs_onexit_fd_rele(cleanup_fd); | |
1420 | ||
34dc7c2f BB |
1421 | if (ra.err != 0) { |
1422 | /* | |
45d1cae3 BB |
1423 | * destroy what we created, so we don't leave it in the |
1424 | * inconsistent restoring state. | |
34dc7c2f BB |
1425 | */ |
1426 | txg_wait_synced(drc->drc_real_ds->ds_dir->dd_pool, 0); | |
45d1cae3 BB |
1427 | |
1428 | (void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, | |
1429 | B_FALSE); | |
1430 | if (drc->drc_real_ds != drc->drc_logical_ds) { | |
1431 | mutex_exit(&drc->drc_logical_ds->ds_recvlock); | |
1432 | dsl_dataset_rele(drc->drc_logical_ds, dmu_recv_tag); | |
1433 | } | |
34dc7c2f BB |
1434 | } |
1435 | ||
00b46022 | 1436 | vmem_free(ra.buf, ra.bufsize); |
34dc7c2f BB |
1437 | *voffp = ra.voff; |
1438 | return (ra.err); | |
1439 | } | |
1440 | ||
1441 | struct recvendsyncarg { | |
1442 | char *tosnap; | |
1443 | uint64_t creation_time; | |
1444 | uint64_t toguid; | |
1445 | }; | |
1446 | ||
1447 | static int | |
1448 | recv_end_check(void *arg1, void *arg2, dmu_tx_t *tx) | |
1449 | { | |
1450 | dsl_dataset_t *ds = arg1; | |
1451 | struct recvendsyncarg *resa = arg2; | |
1452 | ||
1453 | return (dsl_dataset_snapshot_check(ds, resa->tosnap, tx)); | |
1454 | } | |
1455 | ||
1456 | static void | |
428870ff | 1457 | recv_end_sync(void *arg1, void *arg2, dmu_tx_t *tx) |
34dc7c2f BB |
1458 | { |
1459 | dsl_dataset_t *ds = arg1; | |
1460 | struct recvendsyncarg *resa = arg2; | |
1461 | ||
428870ff | 1462 | dsl_dataset_snapshot_sync(ds, resa->tosnap, tx); |
34dc7c2f BB |
1463 | |
1464 | /* set snapshot's creation time and guid */ | |
1465 | dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); | |
1466 | ds->ds_prev->ds_phys->ds_creation_time = resa->creation_time; | |
1467 | ds->ds_prev->ds_phys->ds_guid = resa->toguid; | |
1468 | ds->ds_prev->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; | |
1469 | ||
1470 | dmu_buf_will_dirty(ds->ds_dbuf, tx); | |
1471 | ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; | |
1472 | } | |
1473 | ||
8d35c149 AS |
1474 | static int |
1475 | add_ds_to_guidmap(avl_tree_t *guid_map, dsl_dataset_t *ds) | |
1476 | { | |
1477 | dsl_pool_t *dp = ds->ds_dir->dd_pool; | |
1478 | uint64_t snapobj = ds->ds_phys->ds_prev_snap_obj; | |
1479 | dsl_dataset_t *snapds; | |
1480 | guid_map_entry_t *gmep; | |
1481 | int err; | |
1482 | ||
1483 | ASSERT(guid_map != NULL); | |
1484 | ||
1485 | rw_enter(&dp->dp_config_rwlock, RW_READER); | |
1486 | err = dsl_dataset_hold_obj(dp, snapobj, guid_map, &snapds); | |
1487 | if (err == 0) { | |
1488 | gmep = kmem_alloc(sizeof (guid_map_entry_t), KM_SLEEP); | |
1489 | gmep->guid = snapds->ds_phys->ds_guid; | |
1490 | gmep->gme_ds = snapds; | |
1491 | avl_add(guid_map, gmep); | |
1492 | } | |
1493 | ||
1494 | rw_exit(&dp->dp_config_rwlock); | |
1495 | return (err); | |
1496 | } | |
1497 | ||
428870ff BB |
1498 | static int |
1499 | dmu_recv_existing_end(dmu_recv_cookie_t *drc) | |
34dc7c2f | 1500 | { |
b128c09f BB |
1501 | struct recvendsyncarg resa; |
1502 | dsl_dataset_t *ds = drc->drc_logical_ds; | |
1503 | int err; | |
34dc7c2f BB |
1504 | |
1505 | /* | |
428870ff BB |
1506 | * XXX hack; seems the ds is still dirty and dsl_pool_zil_clean() |
1507 | * expects it to have a ds_user_ptr (and zil), but clone_swap() | |
1508 | * can close it. | |
34dc7c2f | 1509 | */ |
b128c09f | 1510 | txg_wait_synced(ds->ds_dir->dd_pool, 0); |
34dc7c2f | 1511 | |
428870ff BB |
1512 | if (dsl_dataset_tryown(ds, FALSE, dmu_recv_tag)) { |
1513 | err = dsl_dataset_clone_swap(drc->drc_real_ds, ds, | |
1514 | drc->drc_force); | |
1515 | if (err) | |
1516 | goto out; | |
1517 | } else { | |
1518 | mutex_exit(&ds->ds_recvlock); | |
1519 | dsl_dataset_rele(ds, dmu_recv_tag); | |
45d1cae3 BB |
1520 | (void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, |
1521 | B_FALSE); | |
428870ff | 1522 | return (EBUSY); |
34dc7c2f BB |
1523 | } |
1524 | ||
b128c09f BB |
1525 | resa.creation_time = drc->drc_drrb->drr_creation_time; |
1526 | resa.toguid = drc->drc_drrb->drr_toguid; | |
1527 | resa.tosnap = drc->drc_tosnap; | |
34dc7c2f | 1528 | |
b128c09f BB |
1529 | err = dsl_sync_task_do(ds->ds_dir->dd_pool, |
1530 | recv_end_check, recv_end_sync, ds, &resa, 3); | |
1531 | if (err) { | |
428870ff BB |
1532 | /* swap back */ |
1533 | (void) dsl_dataset_clone_swap(drc->drc_real_ds, ds, B_TRUE); | |
34dc7c2f BB |
1534 | } |
1535 | ||
428870ff BB |
1536 | out: |
1537 | mutex_exit(&ds->ds_recvlock); | |
8d35c149 AS |
1538 | if (err == 0 && drc->drc_guid_to_ds_map != NULL) |
1539 | (void) add_ds_to_guidmap(drc->drc_guid_to_ds_map, ds); | |
b128c09f | 1540 | dsl_dataset_disown(ds, dmu_recv_tag); |
428870ff | 1541 | (void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, B_FALSE); |
34dc7c2f BB |
1542 | return (err); |
1543 | } | |
428870ff BB |
1544 | |
1545 | static int | |
1546 | dmu_recv_new_end(dmu_recv_cookie_t *drc) | |
1547 | { | |
1548 | struct recvendsyncarg resa; | |
1549 | dsl_dataset_t *ds = drc->drc_logical_ds; | |
1550 | int err; | |
1551 | ||
1552 | /* | |
1553 | * XXX hack; seems the ds is still dirty and dsl_pool_zil_clean() | |
1554 | * expects it to have a ds_user_ptr (and zil), but clone_swap() | |
1555 | * can close it. | |
1556 | */ | |
1557 | txg_wait_synced(ds->ds_dir->dd_pool, 0); | |
1558 | ||
1559 | resa.creation_time = drc->drc_drrb->drr_creation_time; | |
1560 | resa.toguid = drc->drc_drrb->drr_toguid; | |
1561 | resa.tosnap = drc->drc_tosnap; | |
1562 | ||
1563 | err = dsl_sync_task_do(ds->ds_dir->dd_pool, | |
1564 | recv_end_check, recv_end_sync, ds, &resa, 3); | |
1565 | if (err) { | |
1566 | /* clean up the fs we just recv'd into */ | |
1567 | (void) dsl_dataset_destroy(ds, dmu_recv_tag, B_FALSE); | |
1568 | } else { | |
8d35c149 AS |
1569 | if (drc->drc_guid_to_ds_map != NULL) |
1570 | (void) add_ds_to_guidmap(drc->drc_guid_to_ds_map, ds); | |
428870ff BB |
1571 | /* release the hold from dmu_recv_begin */ |
1572 | dsl_dataset_disown(ds, dmu_recv_tag); | |
1573 | } | |
1574 | return (err); | |
1575 | } | |
1576 | ||
1577 | int | |
1578 | dmu_recv_end(dmu_recv_cookie_t *drc) | |
1579 | { | |
1580 | if (drc->drc_logical_ds != drc->drc_real_ds) | |
1581 | return (dmu_recv_existing_end(drc)); | |
1582 | else | |
1583 | return (dmu_recv_new_end(drc)); | |
1584 | } |