]> git.proxmox.com Git - mirror_zfs.git/blob - lib/libzfs/libzfs_sendrecv.c
Implement Redacted Send/Receive
[mirror_zfs.git] / lib / libzfs / libzfs_sendrecv.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
25 * Copyright (c) 2012, Joyent, Inc. All rights reserved.
26 * Copyright (c) 2012 Pawel Jakub Dawidek <pawel@dawidek.net>.
27 * All rights reserved
28 * Copyright (c) 2013 Steven Hartland. All rights reserved.
29 * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
30 * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
31 * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
32 * Copyright (c) 2019 Datto Inc.
33 */
34
35 #include <assert.h>
36 #include <ctype.h>
37 #include <errno.h>
38 #include <libintl.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <strings.h>
42 #include <unistd.h>
43 #include <stddef.h>
44 #include <fcntl.h>
45 #include <sys/mount.h>
46 #include <sys/mntent.h>
47 #include <sys/mnttab.h>
48 #include <sys/avl.h>
49 #include <sys/debug.h>
50 #include <sys/stat.h>
51 #include <stddef.h>
52 #include <pthread.h>
53 #include <umem.h>
54 #include <time.h>
55
56 #include <libzfs.h>
57 #include <libzfs_core.h>
58 #include <libzutil.h>
59
60 #include "zfs_namecheck.h"
61 #include "zfs_prop.h"
62 #include "zfs_fletcher.h"
63 #include "libzfs_impl.h"
64 #include <zlib.h>
65 #include <sys/zio_checksum.h>
66 #include <sys/dsl_crypt.h>
67 #include <sys/ddt.h>
68 #include <sys/socket.h>
69 #include <sys/sha2.h>
70
71 /* in libzfs_dataset.c */
72 extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *);
73
74 static int zfs_receive_impl(libzfs_handle_t *, const char *, const char *,
75 recvflags_t *, int, const char *, nvlist_t *, avl_tree_t *, char **, int,
76 uint64_t *, const char *, nvlist_t *);
77 static int guid_to_name_redact_snaps(libzfs_handle_t *hdl, const char *parent,
78 uint64_t guid, boolean_t bookmark_ok, uint64_t *redact_snap_guids,
79 uint64_t num_redact_snaps, char *name);
80 static int guid_to_name(libzfs_handle_t *, const char *,
81 uint64_t, boolean_t, char *);
82
83 static const zio_cksum_t zero_cksum = { { 0 } };
84
85 typedef struct dedup_arg {
86 int inputfd;
87 int outputfd;
88 libzfs_handle_t *dedup_hdl;
89 } dedup_arg_t;
90
91 typedef struct progress_arg {
92 zfs_handle_t *pa_zhp;
93 int pa_fd;
94 boolean_t pa_parsable;
95 boolean_t pa_estimate;
96 int pa_verbosity;
97 } progress_arg_t;
98
99 typedef struct dataref {
100 uint64_t ref_guid;
101 uint64_t ref_object;
102 uint64_t ref_offset;
103 } dataref_t;
104
105 typedef struct dedup_entry {
106 struct dedup_entry *dde_next;
107 zio_cksum_t dde_chksum;
108 uint64_t dde_prop;
109 dataref_t dde_ref;
110 } dedup_entry_t;
111
112 #define MAX_DDT_PHYSMEM_PERCENT 20
113 #define SMALLEST_POSSIBLE_MAX_DDT_MB 128
114
115 typedef struct dedup_table {
116 dedup_entry_t **dedup_hash_array;
117 umem_cache_t *ddecache;
118 uint64_t max_ddt_size; /* max dedup table size in bytes */
119 uint64_t cur_ddt_size; /* current dedup table size in bytes */
120 uint64_t ddt_count;
121 int numhashbits;
122 boolean_t ddt_full;
123 } dedup_table_t;
124
125 static int
126 high_order_bit(uint64_t n)
127 {
128 int count;
129
130 for (count = 0; n != 0; count++)
131 n >>= 1;
132 return (count);
133 }
134
135 static size_t
136 ssread(void *buf, size_t len, FILE *stream)
137 {
138 size_t outlen;
139
140 if ((outlen = fread(buf, len, 1, stream)) == 0)
141 return (0);
142
143 return (outlen);
144 }
145
146 static void
147 ddt_hash_append(libzfs_handle_t *hdl, dedup_table_t *ddt, dedup_entry_t **ddepp,
148 zio_cksum_t *cs, uint64_t prop, dataref_t *dr)
149 {
150 dedup_entry_t *dde;
151
152 if (ddt->cur_ddt_size >= ddt->max_ddt_size) {
153 if (ddt->ddt_full == B_FALSE) {
154 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
155 "Dedup table full. Deduplication will continue "
156 "with existing table entries"));
157 ddt->ddt_full = B_TRUE;
158 }
159 return;
160 }
161
162 if ((dde = umem_cache_alloc(ddt->ddecache, UMEM_DEFAULT))
163 != NULL) {
164 assert(*ddepp == NULL);
165 dde->dde_next = NULL;
166 dde->dde_chksum = *cs;
167 dde->dde_prop = prop;
168 dde->dde_ref = *dr;
169 *ddepp = dde;
170 ddt->cur_ddt_size += sizeof (dedup_entry_t);
171 ddt->ddt_count++;
172 }
173 }
174
175 /*
176 * Using the specified dedup table, do a lookup for an entry with
177 * the checksum cs. If found, return the block's reference info
178 * in *dr. Otherwise, insert a new entry in the dedup table, using
179 * the reference information specified by *dr.
180 *
181 * return value: true - entry was found
182 * false - entry was not found
183 */
184 static boolean_t
185 ddt_update(libzfs_handle_t *hdl, dedup_table_t *ddt, zio_cksum_t *cs,
186 uint64_t prop, dataref_t *dr)
187 {
188 uint32_t hashcode;
189 dedup_entry_t **ddepp;
190
191 hashcode = BF64_GET(cs->zc_word[0], 0, ddt->numhashbits);
192
193 for (ddepp = &(ddt->dedup_hash_array[hashcode]); *ddepp != NULL;
194 ddepp = &((*ddepp)->dde_next)) {
195 if (ZIO_CHECKSUM_EQUAL(((*ddepp)->dde_chksum), *cs) &&
196 (*ddepp)->dde_prop == prop) {
197 *dr = (*ddepp)->dde_ref;
198 return (B_TRUE);
199 }
200 }
201 ddt_hash_append(hdl, ddt, ddepp, cs, prop, dr);
202 return (B_FALSE);
203 }
204
205 static int
206 dump_record(dmu_replay_record_t *drr, void *payload, int payload_len,
207 zio_cksum_t *zc, int outfd)
208 {
209 ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
210 ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
211 fletcher_4_incremental_native(drr,
212 offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), zc);
213 if (drr->drr_type != DRR_BEGIN) {
214 ASSERT(ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.
215 drr_checksum.drr_checksum));
216 drr->drr_u.drr_checksum.drr_checksum = *zc;
217 }
218 fletcher_4_incremental_native(&drr->drr_u.drr_checksum.drr_checksum,
219 sizeof (zio_cksum_t), zc);
220 if (write(outfd, drr, sizeof (*drr)) == -1)
221 return (errno);
222 if (payload_len != 0) {
223 fletcher_4_incremental_native(payload, payload_len, zc);
224 if (write(outfd, payload, payload_len) == -1)
225 return (errno);
226 }
227 return (0);
228 }
229
230 /*
231 * This function is started in a separate thread when the dedup option
232 * has been requested. The main send thread determines the list of
233 * snapshots to be included in the send stream and makes the ioctl calls
234 * for each one. But instead of having the ioctl send the output to the
235 * the output fd specified by the caller of zfs_send()), the
236 * ioctl is told to direct the output to a pipe, which is read by the
237 * alternate thread running THIS function. This function does the
238 * dedup'ing by:
239 * 1. building a dedup table (the DDT)
240 * 2. doing checksums on each data block and inserting a record in the DDT
241 * 3. looking for matching checksums, and
242 * 4. sending a DRR_WRITE_BYREF record instead of a write record whenever
243 * a duplicate block is found.
244 * The output of this function then goes to the output fd requested
245 * by the caller of zfs_send().
246 */
247 static void *
248 cksummer(void *arg)
249 {
250 dedup_arg_t *dda = arg;
251 char *buf = zfs_alloc(dda->dedup_hdl, SPA_MAXBLOCKSIZE);
252 dmu_replay_record_t thedrr = { 0 };
253 dmu_replay_record_t *drr = &thedrr;
254 FILE *ofp;
255 int outfd;
256 dedup_table_t ddt;
257 zio_cksum_t stream_cksum;
258 uint64_t numbuckets;
259
260 #ifdef _ILP32
261 ddt.max_ddt_size = SMALLEST_POSSIBLE_MAX_DDT_MB << 20;
262 #else
263 uint64_t physmem = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE);
264 ddt.max_ddt_size =
265 MAX((physmem * MAX_DDT_PHYSMEM_PERCENT) / 100,
266 SMALLEST_POSSIBLE_MAX_DDT_MB << 20);
267 #endif
268
269 numbuckets = ddt.max_ddt_size / (sizeof (dedup_entry_t));
270
271 /*
272 * numbuckets must be a power of 2. Increase number to
273 * a power of 2 if necessary.
274 */
275 if (!ISP2(numbuckets))
276 numbuckets = 1ULL << high_order_bit(numbuckets);
277
278 ddt.dedup_hash_array = calloc(numbuckets, sizeof (dedup_entry_t *));
279 ddt.ddecache = umem_cache_create("dde", sizeof (dedup_entry_t), 0,
280 NULL, NULL, NULL, NULL, NULL, 0);
281 ddt.cur_ddt_size = numbuckets * sizeof (dedup_entry_t *);
282 ddt.numhashbits = high_order_bit(numbuckets) - 1;
283 ddt.ddt_full = B_FALSE;
284
285 outfd = dda->outputfd;
286 ofp = fdopen(dda->inputfd, "r");
287 while (ssread(drr, sizeof (*drr), ofp) != 0) {
288
289 /*
290 * kernel filled in checksum, we are going to write same
291 * record, but need to regenerate checksum.
292 */
293 if (drr->drr_type != DRR_BEGIN) {
294 bzero(&drr->drr_u.drr_checksum.drr_checksum,
295 sizeof (drr->drr_u.drr_checksum.drr_checksum));
296 }
297
298 switch (drr->drr_type) {
299 case DRR_BEGIN:
300 {
301 struct drr_begin *drrb = &drr->drr_u.drr_begin;
302 int fflags;
303 int sz = 0;
304 ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
305
306 ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC);
307
308 /* set the DEDUP feature flag for this stream */
309 fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
310 fflags |= (DMU_BACKUP_FEATURE_DEDUP |
311 DMU_BACKUP_FEATURE_DEDUPPROPS);
312 DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags);
313
314 if (drr->drr_payloadlen != 0) {
315 sz = drr->drr_payloadlen;
316
317 if (sz > SPA_MAXBLOCKSIZE) {
318 buf = zfs_realloc(dda->dedup_hdl, buf,
319 SPA_MAXBLOCKSIZE, sz);
320 }
321 (void) ssread(buf, sz, ofp);
322 if (ferror(stdin))
323 perror("fread");
324 }
325 if (dump_record(drr, buf, sz, &stream_cksum,
326 outfd) != 0)
327 goto out;
328 break;
329 }
330
331 case DRR_END:
332 {
333 struct drr_end *drre = &drr->drr_u.drr_end;
334 /* use the recalculated checksum */
335 drre->drr_checksum = stream_cksum;
336 if (dump_record(drr, NULL, 0, &stream_cksum,
337 outfd) != 0)
338 goto out;
339 break;
340 }
341
342 case DRR_OBJECT:
343 {
344 struct drr_object *drro = &drr->drr_u.drr_object;
345 if (drro->drr_bonuslen > 0) {
346 (void) ssread(buf,
347 DRR_OBJECT_PAYLOAD_SIZE(drro), ofp);
348 }
349 if (dump_record(drr, buf, DRR_OBJECT_PAYLOAD_SIZE(drro),
350 &stream_cksum, outfd) != 0)
351 goto out;
352 break;
353 }
354
355 case DRR_SPILL:
356 {
357 struct drr_spill *drrs = &drr->drr_u.drr_spill;
358 (void) ssread(buf, DRR_SPILL_PAYLOAD_SIZE(drrs), ofp);
359 if (dump_record(drr, buf, DRR_SPILL_PAYLOAD_SIZE(drrs),
360 &stream_cksum, outfd) != 0)
361 goto out;
362 break;
363 }
364
365 case DRR_FREEOBJECTS:
366 {
367 if (dump_record(drr, NULL, 0, &stream_cksum,
368 outfd) != 0)
369 goto out;
370 break;
371 }
372
373 case DRR_WRITE:
374 {
375 struct drr_write *drrw = &drr->drr_u.drr_write;
376 dataref_t dataref;
377 uint64_t payload_size;
378
379 payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw);
380 (void) ssread(buf, payload_size, ofp);
381
382 /*
383 * Use the existing checksum if it's dedup-capable,
384 * else calculate a SHA256 checksum for it.
385 */
386
387 if (ZIO_CHECKSUM_EQUAL(drrw->drr_key.ddk_cksum,
388 zero_cksum) ||
389 !DRR_IS_DEDUP_CAPABLE(drrw->drr_flags)) {
390 SHA2_CTX ctx;
391 zio_cksum_t tmpsha256;
392
393 SHA2Init(SHA256, &ctx);
394 SHA2Update(&ctx, buf, payload_size);
395 SHA2Final(&tmpsha256, &ctx);
396
397 drrw->drr_key.ddk_cksum.zc_word[0] =
398 BE_64(tmpsha256.zc_word[0]);
399 drrw->drr_key.ddk_cksum.zc_word[1] =
400 BE_64(tmpsha256.zc_word[1]);
401 drrw->drr_key.ddk_cksum.zc_word[2] =
402 BE_64(tmpsha256.zc_word[2]);
403 drrw->drr_key.ddk_cksum.zc_word[3] =
404 BE_64(tmpsha256.zc_word[3]);
405 drrw->drr_checksumtype = ZIO_CHECKSUM_SHA256;
406 drrw->drr_flags |= DRR_CHECKSUM_DEDUP;
407 }
408
409 dataref.ref_guid = drrw->drr_toguid;
410 dataref.ref_object = drrw->drr_object;
411 dataref.ref_offset = drrw->drr_offset;
412
413 if (ddt_update(dda->dedup_hdl, &ddt,
414 &drrw->drr_key.ddk_cksum, drrw->drr_key.ddk_prop,
415 &dataref)) {
416 dmu_replay_record_t wbr_drr = {0};
417 struct drr_write_byref *wbr_drrr =
418 &wbr_drr.drr_u.drr_write_byref;
419
420 /* block already present in stream */
421 wbr_drr.drr_type = DRR_WRITE_BYREF;
422
423 wbr_drrr->drr_object = drrw->drr_object;
424 wbr_drrr->drr_offset = drrw->drr_offset;
425 wbr_drrr->drr_length = drrw->drr_logical_size;
426 wbr_drrr->drr_toguid = drrw->drr_toguid;
427 wbr_drrr->drr_refguid = dataref.ref_guid;
428 wbr_drrr->drr_refobject =
429 dataref.ref_object;
430 wbr_drrr->drr_refoffset =
431 dataref.ref_offset;
432
433 wbr_drrr->drr_checksumtype =
434 drrw->drr_checksumtype;
435 wbr_drrr->drr_flags = drrw->drr_flags;
436 wbr_drrr->drr_key.ddk_cksum =
437 drrw->drr_key.ddk_cksum;
438 wbr_drrr->drr_key.ddk_prop =
439 drrw->drr_key.ddk_prop;
440
441 if (dump_record(&wbr_drr, NULL, 0,
442 &stream_cksum, outfd) != 0)
443 goto out;
444 } else {
445 /* block not previously seen */
446 if (dump_record(drr, buf, payload_size,
447 &stream_cksum, outfd) != 0)
448 goto out;
449 }
450 break;
451 }
452
453 case DRR_WRITE_EMBEDDED:
454 {
455 struct drr_write_embedded *drrwe =
456 &drr->drr_u.drr_write_embedded;
457 (void) ssread(buf,
458 P2ROUNDUP((uint64_t)drrwe->drr_psize, 8), ofp);
459 if (dump_record(drr, buf,
460 P2ROUNDUP((uint64_t)drrwe->drr_psize, 8),
461 &stream_cksum, outfd) != 0)
462 goto out;
463 break;
464 }
465
466 case DRR_FREE:
467 {
468 if (dump_record(drr, NULL, 0, &stream_cksum,
469 outfd) != 0)
470 goto out;
471 break;
472 }
473
474 case DRR_OBJECT_RANGE:
475 {
476 if (dump_record(drr, NULL, 0, &stream_cksum,
477 outfd) != 0)
478 goto out;
479 break;
480 }
481
482 default:
483 (void) fprintf(stderr, "INVALID record type 0x%x\n",
484 drr->drr_type);
485 /* should never happen, so assert */
486 assert(B_FALSE);
487 }
488 }
489 out:
490 umem_cache_destroy(ddt.ddecache);
491 free(ddt.dedup_hash_array);
492 free(buf);
493 (void) fclose(ofp);
494
495 return (NULL);
496 }
497
498 /*
499 * Routines for dealing with the AVL tree of fs-nvlists
500 */
501 typedef struct fsavl_node {
502 avl_node_t fn_node;
503 nvlist_t *fn_nvfs;
504 char *fn_snapname;
505 uint64_t fn_guid;
506 } fsavl_node_t;
507
508 static int
509 fsavl_compare(const void *arg1, const void *arg2)
510 {
511 const fsavl_node_t *fn1 = (const fsavl_node_t *)arg1;
512 const fsavl_node_t *fn2 = (const fsavl_node_t *)arg2;
513
514 return (AVL_CMP(fn1->fn_guid, fn2->fn_guid));
515 }
516
517 /*
518 * Given the GUID of a snapshot, find its containing filesystem and
519 * (optionally) name.
520 */
521 static nvlist_t *
522 fsavl_find(avl_tree_t *avl, uint64_t snapguid, char **snapname)
523 {
524 fsavl_node_t fn_find;
525 fsavl_node_t *fn;
526
527 fn_find.fn_guid = snapguid;
528
529 fn = avl_find(avl, &fn_find, NULL);
530 if (fn) {
531 if (snapname)
532 *snapname = fn->fn_snapname;
533 return (fn->fn_nvfs);
534 }
535 return (NULL);
536 }
537
538 static void
539 fsavl_destroy(avl_tree_t *avl)
540 {
541 fsavl_node_t *fn;
542 void *cookie;
543
544 if (avl == NULL)
545 return;
546
547 cookie = NULL;
548 while ((fn = avl_destroy_nodes(avl, &cookie)) != NULL)
549 free(fn);
550 avl_destroy(avl);
551 free(avl);
552 }
553
554 /*
555 * Given an nvlist, produce an avl tree of snapshots, ordered by guid
556 */
557 static avl_tree_t *
558 fsavl_create(nvlist_t *fss)
559 {
560 avl_tree_t *fsavl;
561 nvpair_t *fselem = NULL;
562
563 if ((fsavl = malloc(sizeof (avl_tree_t))) == NULL)
564 return (NULL);
565
566 avl_create(fsavl, fsavl_compare, sizeof (fsavl_node_t),
567 offsetof(fsavl_node_t, fn_node));
568
569 while ((fselem = nvlist_next_nvpair(fss, fselem)) != NULL) {
570 nvlist_t *nvfs, *snaps;
571 nvpair_t *snapelem = NULL;
572
573 VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
574 VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
575
576 while ((snapelem =
577 nvlist_next_nvpair(snaps, snapelem)) != NULL) {
578 fsavl_node_t *fn;
579 uint64_t guid;
580
581 VERIFY(0 == nvpair_value_uint64(snapelem, &guid));
582 if ((fn = malloc(sizeof (fsavl_node_t))) == NULL) {
583 fsavl_destroy(fsavl);
584 return (NULL);
585 }
586 fn->fn_nvfs = nvfs;
587 fn->fn_snapname = nvpair_name(snapelem);
588 fn->fn_guid = guid;
589
590 /*
591 * Note: if there are multiple snaps with the
592 * same GUID, we ignore all but one.
593 */
594 if (avl_find(fsavl, fn, NULL) == NULL)
595 avl_add(fsavl, fn);
596 else
597 free(fn);
598 }
599 }
600
601 return (fsavl);
602 }
603
604 /*
605 * Routines for dealing with the giant nvlist of fs-nvlists, etc.
606 */
607 typedef struct send_data {
608 /*
609 * assigned inside every recursive call,
610 * restored from *_save on return:
611 *
612 * guid of fromsnap snapshot in parent dataset
613 * txg of fromsnap snapshot in current dataset
614 * txg of tosnap snapshot in current dataset
615 */
616
617 uint64_t parent_fromsnap_guid;
618 uint64_t fromsnap_txg;
619 uint64_t tosnap_txg;
620
621 /* the nvlists get accumulated during depth-first traversal */
622 nvlist_t *parent_snaps;
623 nvlist_t *fss;
624 nvlist_t *snapprops;
625 nvlist_t *snapholds; /* user holds */
626
627 /* send-receive configuration, does not change during traversal */
628 const char *fsname;
629 const char *fromsnap;
630 const char *tosnap;
631 boolean_t recursive;
632 boolean_t raw;
633 boolean_t doall;
634 boolean_t replicate;
635 boolean_t verbose;
636 boolean_t backup;
637 boolean_t seenfrom;
638 boolean_t seento;
639 boolean_t holds; /* were holds requested with send -h */
640 boolean_t props;
641
642 /*
643 * The header nvlist is of the following format:
644 * {
645 * "tosnap" -> string
646 * "fromsnap" -> string (if incremental)
647 * "fss" -> {
648 * id -> {
649 *
650 * "name" -> string (full name; for debugging)
651 * "parentfromsnap" -> number (guid of fromsnap in parent)
652 *
653 * "props" -> { name -> value (only if set here) }
654 * "snaps" -> { name (lastname) -> number (guid) }
655 * "snapprops" -> { name (lastname) -> { name -> value } }
656 * "snapholds" -> { name (lastname) -> { holdname -> crtime } }
657 *
658 * "origin" -> number (guid) (if clone)
659 * "is_encroot" -> boolean
660 * "sent" -> boolean (not on-disk)
661 * }
662 * }
663 * }
664 *
665 */
666 } send_data_t;
667
668 static void
669 send_iterate_prop(zfs_handle_t *zhp, boolean_t received_only, nvlist_t *nv);
670
671 static int
672 send_iterate_snap(zfs_handle_t *zhp, void *arg)
673 {
674 send_data_t *sd = arg;
675 uint64_t guid = zhp->zfs_dmustats.dds_guid;
676 uint64_t txg = zhp->zfs_dmustats.dds_creation_txg;
677 char *snapname;
678 nvlist_t *nv;
679 boolean_t isfromsnap, istosnap, istosnapwithnofrom;
680
681 snapname = strrchr(zhp->zfs_name, '@')+1;
682 isfromsnap = (sd->fromsnap != NULL &&
683 strcmp(sd->fromsnap, snapname) == 0);
684 istosnap = (sd->tosnap != NULL && (strcmp(sd->tosnap, snapname) == 0));
685 istosnapwithnofrom = (istosnap && sd->fromsnap == NULL);
686
687 if (sd->tosnap_txg != 0 && txg > sd->tosnap_txg) {
688 if (sd->verbose) {
689 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
690 "skipping snapshot %s because it was created "
691 "after the destination snapshot (%s)\n"),
692 zhp->zfs_name, sd->tosnap);
693 }
694 zfs_close(zhp);
695 return (0);
696 }
697
698 VERIFY(0 == nvlist_add_uint64(sd->parent_snaps, snapname, guid));
699 /*
700 * NB: if there is no fromsnap here (it's a newly created fs in
701 * an incremental replication), we will substitute the tosnap.
702 */
703 if (isfromsnap || (sd->parent_fromsnap_guid == 0 && istosnap)) {
704 sd->parent_fromsnap_guid = guid;
705 }
706
707 if (!sd->recursive) {
708 if (!sd->seenfrom && isfromsnap) {
709 sd->seenfrom = B_TRUE;
710 zfs_close(zhp);
711 return (0);
712 }
713
714 if ((sd->seento || !sd->seenfrom) && !istosnapwithnofrom) {
715 zfs_close(zhp);
716 return (0);
717 }
718
719 if (istosnap)
720 sd->seento = B_TRUE;
721 }
722
723 VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
724 send_iterate_prop(zhp, sd->backup, nv);
725 VERIFY(0 == nvlist_add_nvlist(sd->snapprops, snapname, nv));
726 nvlist_free(nv);
727 if (sd->holds) {
728 nvlist_t *holds = fnvlist_alloc();
729 int err = lzc_get_holds(zhp->zfs_name, &holds);
730 if (err == 0) {
731 VERIFY(0 == nvlist_add_nvlist(sd->snapholds,
732 snapname, holds));
733 }
734 fnvlist_free(holds);
735 }
736
737 zfs_close(zhp);
738 return (0);
739 }
740
741 static void
742 send_iterate_prop(zfs_handle_t *zhp, boolean_t received_only, nvlist_t *nv)
743 {
744 nvlist_t *props = NULL;
745 nvpair_t *elem = NULL;
746
747 if (received_only)
748 props = zfs_get_recvd_props(zhp);
749 else
750 props = zhp->zfs_props;
751
752 while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
753 char *propname = nvpair_name(elem);
754 zfs_prop_t prop = zfs_name_to_prop(propname);
755 nvlist_t *propnv;
756
757 if (!zfs_prop_user(propname)) {
758 /*
759 * Realistically, this should never happen. However,
760 * we want the ability to add DSL properties without
761 * needing to make incompatible version changes. We
762 * need to ignore unknown properties to allow older
763 * software to still send datasets containing these
764 * properties, with the unknown properties elided.
765 */
766 if (prop == ZPROP_INVAL)
767 continue;
768
769 if (zfs_prop_readonly(prop))
770 continue;
771 }
772
773 verify(nvpair_value_nvlist(elem, &propnv) == 0);
774 if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION ||
775 prop == ZFS_PROP_REFQUOTA ||
776 prop == ZFS_PROP_REFRESERVATION) {
777 char *source;
778 uint64_t value;
779 verify(nvlist_lookup_uint64(propnv,
780 ZPROP_VALUE, &value) == 0);
781 if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT)
782 continue;
783 /*
784 * May have no source before SPA_VERSION_RECVD_PROPS,
785 * but is still modifiable.
786 */
787 if (nvlist_lookup_string(propnv,
788 ZPROP_SOURCE, &source) == 0) {
789 if ((strcmp(source, zhp->zfs_name) != 0) &&
790 (strcmp(source,
791 ZPROP_SOURCE_VAL_RECVD) != 0))
792 continue;
793 }
794 } else {
795 char *source;
796 if (nvlist_lookup_string(propnv,
797 ZPROP_SOURCE, &source) != 0)
798 continue;
799 if ((strcmp(source, zhp->zfs_name) != 0) &&
800 (strcmp(source, ZPROP_SOURCE_VAL_RECVD) != 0))
801 continue;
802 }
803
804 if (zfs_prop_user(propname) ||
805 zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
806 char *value;
807 verify(nvlist_lookup_string(propnv,
808 ZPROP_VALUE, &value) == 0);
809 VERIFY(0 == nvlist_add_string(nv, propname, value));
810 } else {
811 uint64_t value;
812 verify(nvlist_lookup_uint64(propnv,
813 ZPROP_VALUE, &value) == 0);
814 VERIFY(0 == nvlist_add_uint64(nv, propname, value));
815 }
816 }
817 }
818
819 /*
820 * returns snapshot creation txg
821 * and returns 0 if the snapshot does not exist
822 */
823 static uint64_t
824 get_snap_txg(libzfs_handle_t *hdl, const char *fs, const char *snap)
825 {
826 char name[ZFS_MAX_DATASET_NAME_LEN];
827 uint64_t txg = 0;
828
829 if (fs == NULL || fs[0] == '\0' || snap == NULL || snap[0] == '\0')
830 return (txg);
831
832 (void) snprintf(name, sizeof (name), "%s@%s", fs, snap);
833 if (zfs_dataset_exists(hdl, name, ZFS_TYPE_SNAPSHOT)) {
834 zfs_handle_t *zhp = zfs_open(hdl, name, ZFS_TYPE_SNAPSHOT);
835 if (zhp != NULL) {
836 txg = zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG);
837 zfs_close(zhp);
838 }
839 }
840
841 return (txg);
842 }
843
844 /*
845 * recursively generate nvlists describing datasets. See comment
846 * for the data structure send_data_t above for description of contents
847 * of the nvlist.
848 */
849 static int
850 send_iterate_fs(zfs_handle_t *zhp, void *arg)
851 {
852 send_data_t *sd = arg;
853 nvlist_t *nvfs = NULL, *nv = NULL;
854 int rv = 0;
855 uint64_t min_txg = 0, max_txg = 0;
856 uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid;
857 uint64_t fromsnap_txg_save = sd->fromsnap_txg;
858 uint64_t tosnap_txg_save = sd->tosnap_txg;
859 uint64_t txg = zhp->zfs_dmustats.dds_creation_txg;
860 uint64_t guid = zhp->zfs_dmustats.dds_guid;
861 uint64_t fromsnap_txg, tosnap_txg;
862 char guidstring[64];
863
864 fromsnap_txg = get_snap_txg(zhp->zfs_hdl, zhp->zfs_name, sd->fromsnap);
865 if (fromsnap_txg != 0)
866 sd->fromsnap_txg = fromsnap_txg;
867
868 tosnap_txg = get_snap_txg(zhp->zfs_hdl, zhp->zfs_name, sd->tosnap);
869 if (tosnap_txg != 0)
870 sd->tosnap_txg = tosnap_txg;
871
872 /*
873 * on the send side, if the current dataset does not have tosnap,
874 * perform two additional checks:
875 *
876 * - skip sending the current dataset if it was created later than
877 * the parent tosnap
878 * - return error if the current dataset was created earlier than
879 * the parent tosnap
880 */
881 if (sd->tosnap != NULL && tosnap_txg == 0) {
882 if (sd->tosnap_txg != 0 && txg > sd->tosnap_txg) {
883 if (sd->verbose) {
884 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
885 "skipping dataset %s: snapshot %s does "
886 "not exist\n"), zhp->zfs_name, sd->tosnap);
887 }
888 } else {
889 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
890 "cannot send %s@%s%s: snapshot %s@%s does not "
891 "exist\n"), sd->fsname, sd->tosnap, sd->recursive ?
892 dgettext(TEXT_DOMAIN, " recursively") : "",
893 zhp->zfs_name, sd->tosnap);
894 rv = EZFS_NOENT;
895 }
896 goto out;
897 }
898
899 nvfs = fnvlist_alloc();
900 fnvlist_add_string(nvfs, "name", zhp->zfs_name);
901 fnvlist_add_uint64(nvfs, "parentfromsnap",
902 sd->parent_fromsnap_guid);
903
904 if (zhp->zfs_dmustats.dds_origin[0]) {
905 zfs_handle_t *origin = zfs_open(zhp->zfs_hdl,
906 zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT);
907 if (origin == NULL) {
908 rv = -1;
909 goto out;
910 }
911 fnvlist_add_uint64(nvfs, "origin",
912 origin->zfs_dmustats.dds_guid);
913
914 zfs_close(origin);
915 }
916
917 /* iterate over props */
918 if (sd->props || sd->backup || sd->recursive) {
919 nv = fnvlist_alloc();
920 send_iterate_prop(zhp, sd->backup, nv);
921 }
922 if (zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF) {
923 boolean_t encroot;
924
925 /* determine if this dataset is an encryption root */
926 if (zfs_crypto_get_encryption_root(zhp, &encroot, NULL) != 0) {
927 rv = -1;
928 goto out;
929 }
930
931 if (encroot)
932 fnvlist_add_boolean(nvfs, "is_encroot");
933
934 /*
935 * Encrypted datasets can only be sent with properties if
936 * the raw flag is specified because the receive side doesn't
937 * currently have a mechanism for recursively asking the user
938 * for new encryption parameters.
939 */
940 if (!sd->raw) {
941 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
942 "cannot send %s@%s: encrypted dataset %s may not "
943 "be sent with properties without the raw flag\n"),
944 sd->fsname, sd->tosnap, zhp->zfs_name);
945 rv = -1;
946 goto out;
947 }
948
949 }
950
951 if (nv != NULL)
952 fnvlist_add_nvlist(nvfs, "props", nv);
953
954 /* iterate over snaps, and set sd->parent_fromsnap_guid */
955 sd->parent_fromsnap_guid = 0;
956 sd->parent_snaps = fnvlist_alloc();
957 sd->snapprops = fnvlist_alloc();
958 if (sd->holds)
959 VERIFY(0 == nvlist_alloc(&sd->snapholds, NV_UNIQUE_NAME, 0));
960
961
962 /*
963 * If this is a "doall" send, a replicate send or we're just trying
964 * to gather a list of previous snapshots, iterate through all the
965 * snaps in the txg range. Otherwise just look at the one we're
966 * interested in.
967 */
968 if (sd->doall || sd->replicate || sd->tosnap == NULL) {
969 if (!sd->replicate && fromsnap_txg != 0)
970 min_txg = fromsnap_txg;
971 if (!sd->replicate && tosnap_txg != 0)
972 max_txg = tosnap_txg;
973 (void) zfs_iter_snapshots_sorted(zhp, send_iterate_snap, sd,
974 min_txg, max_txg);
975 } else {
976 char snapname[MAXPATHLEN] = { 0 };
977 zfs_handle_t *snap;
978
979 (void) snprintf(snapname, sizeof (snapname), "%s@%s",
980 zhp->zfs_name, sd->tosnap);
981 if (sd->fromsnap != NULL)
982 sd->seenfrom = B_TRUE;
983 snap = zfs_open(zhp->zfs_hdl, snapname,
984 ZFS_TYPE_SNAPSHOT);
985 if (snap != NULL)
986 (void) send_iterate_snap(snap, sd);
987 }
988
989 fnvlist_add_nvlist(nvfs, "snaps", sd->parent_snaps);
990 fnvlist_add_nvlist(nvfs, "snapprops", sd->snapprops);
991 if (sd->holds)
992 fnvlist_add_nvlist(nvfs, "snapholds", sd->snapholds);
993 fnvlist_free(sd->parent_snaps);
994 fnvlist_free(sd->snapprops);
995 fnvlist_free(sd->snapholds);
996
997 /* add this fs to nvlist */
998 (void) snprintf(guidstring, sizeof (guidstring),
999 "0x%llx", (longlong_t)guid);
1000 fnvlist_add_nvlist(sd->fss, guidstring, nvfs);
1001
1002 /* iterate over children */
1003 if (sd->recursive)
1004 rv = zfs_iter_filesystems(zhp, send_iterate_fs, sd);
1005
1006 out:
1007 sd->parent_fromsnap_guid = parent_fromsnap_guid_save;
1008 sd->fromsnap_txg = fromsnap_txg_save;
1009 sd->tosnap_txg = tosnap_txg_save;
1010 fnvlist_free(nv);
1011 fnvlist_free(nvfs);
1012
1013 zfs_close(zhp);
1014 return (rv);
1015 }
1016
1017 static int
1018 gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap,
1019 const char *tosnap, boolean_t recursive, boolean_t raw, boolean_t doall,
1020 boolean_t replicate, boolean_t verbose, boolean_t backup, boolean_t holds,
1021 boolean_t props, nvlist_t **nvlp, avl_tree_t **avlp)
1022 {
1023 zfs_handle_t *zhp;
1024 send_data_t sd = { 0 };
1025 int error;
1026
1027 zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
1028 if (zhp == NULL)
1029 return (EZFS_BADTYPE);
1030
1031 VERIFY(0 == nvlist_alloc(&sd.fss, NV_UNIQUE_NAME, 0));
1032 sd.fsname = fsname;
1033 sd.fromsnap = fromsnap;
1034 sd.tosnap = tosnap;
1035 sd.recursive = recursive;
1036 sd.raw = raw;
1037 sd.doall = doall;
1038 sd.replicate = replicate;
1039 sd.verbose = verbose;
1040 sd.backup = backup;
1041 sd.holds = holds;
1042 sd.props = props;
1043
1044 if ((error = send_iterate_fs(zhp, &sd)) != 0) {
1045 nvlist_free(sd.fss);
1046 if (avlp != NULL)
1047 *avlp = NULL;
1048 *nvlp = NULL;
1049 return (error);
1050 }
1051
1052 if (avlp != NULL && (*avlp = fsavl_create(sd.fss)) == NULL) {
1053 nvlist_free(sd.fss);
1054 *nvlp = NULL;
1055 return (EZFS_NOMEM);
1056 }
1057
1058 *nvlp = sd.fss;
1059 return (0);
1060 }
1061
1062 /*
1063 * Routines specific to "zfs send"
1064 */
1065 typedef struct send_dump_data {
1066 /* these are all just the short snapname (the part after the @) */
1067 const char *fromsnap;
1068 const char *tosnap;
1069 char prevsnap[ZFS_MAX_DATASET_NAME_LEN];
1070 uint64_t prevsnap_obj;
1071 boolean_t seenfrom, seento, replicate, doall, fromorigin;
1072 boolean_t dryrun, parsable, progress, embed_data, std_out;
1073 boolean_t large_block, compress, raw, holds;
1074 int outfd;
1075 boolean_t err;
1076 nvlist_t *fss;
1077 nvlist_t *snapholds;
1078 avl_tree_t *fsavl;
1079 snapfilter_cb_t *filter_cb;
1080 void *filter_cb_arg;
1081 nvlist_t *debugnv;
1082 char holdtag[ZFS_MAX_DATASET_NAME_LEN];
1083 int cleanup_fd;
1084 int verbosity;
1085 uint64_t size;
1086 } send_dump_data_t;
1087
1088 static int
1089 zfs_send_space(zfs_handle_t *zhp, const char *snapname, const char *from,
1090 enum lzc_send_flags flags, uint64_t *spacep)
1091 {
1092 libzfs_handle_t *hdl = zhp->zfs_hdl;
1093 int error;
1094
1095 assert(snapname != NULL);
1096 error = lzc_send_space(snapname, from, flags, spacep);
1097
1098 if (error != 0) {
1099 char errbuf[1024];
1100 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1101 "warning: cannot estimate space for '%s'"), snapname);
1102
1103 switch (error) {
1104 case EXDEV:
1105 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1106 "not an earlier snapshot from the same fs"));
1107 return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
1108
1109 case ENOENT:
1110 if (zfs_dataset_exists(hdl, snapname,
1111 ZFS_TYPE_SNAPSHOT)) {
1112 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1113 "incremental source (%s) does not exist"),
1114 snapname);
1115 }
1116 return (zfs_error(hdl, EZFS_NOENT, errbuf));
1117
1118 case EDQUOT:
1119 case EFBIG:
1120 case EIO:
1121 case ENOLINK:
1122 case ENOSPC:
1123 case ENOSTR:
1124 case ENXIO:
1125 case EPIPE:
1126 case ERANGE:
1127 case EFAULT:
1128 case EROFS:
1129 case EINVAL:
1130 zfs_error_aux(hdl, strerror(error));
1131 return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
1132
1133 default:
1134 return (zfs_standard_error(hdl, error, errbuf));
1135 }
1136 }
1137
1138 return (0);
1139 }
1140
1141 /*
1142 * Dumps a backup of the given snapshot (incremental from fromsnap if it's not
1143 * NULL) to the file descriptor specified by outfd.
1144 */
1145 static int
1146 dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj,
1147 boolean_t fromorigin, int outfd, enum lzc_send_flags flags,
1148 nvlist_t *debugnv)
1149 {
1150 zfs_cmd_t zc = {"\0"};
1151 libzfs_handle_t *hdl = zhp->zfs_hdl;
1152 nvlist_t *thisdbg;
1153
1154 assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
1155 assert(fromsnap_obj == 0 || !fromorigin);
1156
1157 (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
1158 zc.zc_cookie = outfd;
1159 zc.zc_obj = fromorigin;
1160 zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1161 zc.zc_fromobj = fromsnap_obj;
1162 zc.zc_flags = flags;
1163
1164 VERIFY(0 == nvlist_alloc(&thisdbg, NV_UNIQUE_NAME, 0));
1165 if (fromsnap && fromsnap[0] != '\0') {
1166 VERIFY(0 == nvlist_add_string(thisdbg,
1167 "fromsnap", fromsnap));
1168 }
1169
1170 if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) {
1171 char errbuf[1024];
1172 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1173 "warning: cannot send '%s'"), zhp->zfs_name);
1174
1175 VERIFY(0 == nvlist_add_uint64(thisdbg, "error", errno));
1176 if (debugnv) {
1177 VERIFY(0 == nvlist_add_nvlist(debugnv,
1178 zhp->zfs_name, thisdbg));
1179 }
1180 nvlist_free(thisdbg);
1181
1182 switch (errno) {
1183 case EXDEV:
1184 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1185 "not an earlier snapshot from the same fs"));
1186 return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
1187
1188 case EACCES:
1189 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1190 "source key must be loaded"));
1191 return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf));
1192
1193 case ENOENT:
1194 if (zfs_dataset_exists(hdl, zc.zc_name,
1195 ZFS_TYPE_SNAPSHOT)) {
1196 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1197 "incremental source (@%s) does not exist"),
1198 zc.zc_value);
1199 }
1200 return (zfs_error(hdl, EZFS_NOENT, errbuf));
1201
1202 case EDQUOT:
1203 case EFBIG:
1204 case EIO:
1205 case ENOLINK:
1206 case ENOSPC:
1207 case ENOSTR:
1208 case ENXIO:
1209 case EPIPE:
1210 case ERANGE:
1211 case EFAULT:
1212 case EROFS:
1213 zfs_error_aux(hdl, strerror(errno));
1214 return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
1215
1216 default:
1217 return (zfs_standard_error(hdl, errno, errbuf));
1218 }
1219 }
1220
1221 if (debugnv)
1222 VERIFY(0 == nvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg));
1223 nvlist_free(thisdbg);
1224
1225 return (0);
1226 }
1227
1228 static void
1229 gather_holds(zfs_handle_t *zhp, send_dump_data_t *sdd)
1230 {
1231 assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
1232
1233 /*
1234 * zfs_send() only sets snapholds for sends that need them,
1235 * e.g. replication and doall.
1236 */
1237 if (sdd->snapholds == NULL)
1238 return;
1239
1240 fnvlist_add_string(sdd->snapholds, zhp->zfs_name, sdd->holdtag);
1241 }
1242
1243 int
1244 zfs_send_progress(zfs_handle_t *zhp, int fd, uint64_t *bytes_written,
1245 uint64_t *blocks_visited)
1246 {
1247 zfs_cmd_t zc = { {0} };
1248 (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
1249 zc.zc_cookie = fd;
1250 if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND_PROGRESS, &zc) != 0)
1251 return (errno);
1252 if (bytes_written != NULL)
1253 *bytes_written = zc.zc_cookie;
1254 if (blocks_visited != NULL)
1255 *blocks_visited = zc.zc_objset_type;
1256 return (0);
1257 }
1258
1259 static void *
1260 send_progress_thread(void *arg)
1261 {
1262 progress_arg_t *pa = arg;
1263 zfs_handle_t *zhp = pa->pa_zhp;
1264 uint64_t bytes;
1265 uint64_t blocks;
1266 char buf[16];
1267 time_t t;
1268 struct tm *tm;
1269 boolean_t firstloop = B_TRUE;
1270
1271 /*
1272 * Print the progress from ZFS_IOC_SEND_PROGRESS every second.
1273 */
1274 for (;;) {
1275 int err;
1276 (void) sleep(1);
1277 if ((err = zfs_send_progress(zhp, pa->pa_fd, &bytes,
1278 &blocks)) != 0) {
1279 if (err == EINTR || err == ENOENT)
1280 return ((void *)0);
1281 return ((void *)(uintptr_t)err);
1282 }
1283
1284 if (firstloop && !pa->pa_parsable) {
1285 (void) fprintf(stderr,
1286 "TIME %s %sSNAPSHOT %s\n",
1287 pa->pa_estimate ? "BYTES" : " SENT",
1288 pa->pa_verbosity >= 2 ? " BLOCKS " : "",
1289 zhp->zfs_name);
1290 firstloop = B_FALSE;
1291 }
1292
1293 (void) time(&t);
1294 tm = localtime(&t);
1295
1296 if (pa->pa_verbosity >= 2 && pa->pa_parsable) {
1297 (void) fprintf(stderr,
1298 "%02d:%02d:%02d\t%llu\t%llu\t%s\n",
1299 tm->tm_hour, tm->tm_min, tm->tm_sec,
1300 (u_longlong_t)bytes, (u_longlong_t)blocks,
1301 zhp->zfs_name);
1302 } else if (pa->pa_verbosity >= 2) {
1303 zfs_nicenum(bytes, buf, sizeof (buf));
1304 (void) fprintf(stderr,
1305 "%02d:%02d:%02d %5s %8llu %s\n",
1306 tm->tm_hour, tm->tm_min, tm->tm_sec,
1307 buf, (u_longlong_t)blocks, zhp->zfs_name);
1308 } else if (pa->pa_parsable) {
1309 (void) fprintf(stderr, "%02d:%02d:%02d\t%llu\t%s\n",
1310 tm->tm_hour, tm->tm_min, tm->tm_sec,
1311 (u_longlong_t)bytes, zhp->zfs_name);
1312 } else {
1313 zfs_nicebytes(bytes, buf, sizeof (buf));
1314 (void) fprintf(stderr, "%02d:%02d:%02d %5s %s\n",
1315 tm->tm_hour, tm->tm_min, tm->tm_sec,
1316 buf, zhp->zfs_name);
1317 }
1318 }
1319 }
1320
1321 static void
1322 send_print_verbose(FILE *fout, const char *tosnap, const char *fromsnap,
1323 uint64_t size, boolean_t parsable)
1324 {
1325 if (parsable) {
1326 if (fromsnap != NULL) {
1327 (void) fprintf(fout, "incremental\t%s\t%s",
1328 fromsnap, tosnap);
1329 } else {
1330 (void) fprintf(fout, "full\t%s",
1331 tosnap);
1332 }
1333 } else {
1334 if (fromsnap != NULL) {
1335 if (strchr(fromsnap, '@') == NULL &&
1336 strchr(fromsnap, '#') == NULL) {
1337 (void) fprintf(fout, dgettext(TEXT_DOMAIN,
1338 "send from @%s to %s"),
1339 fromsnap, tosnap);
1340 } else {
1341 (void) fprintf(fout, dgettext(TEXT_DOMAIN,
1342 "send from %s to %s"),
1343 fromsnap, tosnap);
1344 }
1345 } else {
1346 (void) fprintf(fout, dgettext(TEXT_DOMAIN,
1347 "full send of %s"),
1348 tosnap);
1349 }
1350 }
1351
1352 if (parsable) {
1353 (void) fprintf(fout, "\t%llu",
1354 (longlong_t)size);
1355 } else if (size != 0) {
1356 char buf[16];
1357 zfs_nicebytes(size, buf, sizeof (buf));
1358 (void) fprintf(fout, dgettext(TEXT_DOMAIN,
1359 " estimated size is %s"), buf);
1360 }
1361 (void) fprintf(fout, "\n");
1362 }
1363
1364 static int
1365 dump_snapshot(zfs_handle_t *zhp, void *arg)
1366 {
1367 send_dump_data_t *sdd = arg;
1368 progress_arg_t pa = { 0 };
1369 pthread_t tid;
1370 char *thissnap;
1371 enum lzc_send_flags flags = 0;
1372 int err;
1373 boolean_t isfromsnap, istosnap, fromorigin;
1374 boolean_t exclude = B_FALSE;
1375 FILE *fout = sdd->std_out ? stdout : stderr;
1376
1377 err = 0;
1378 thissnap = strchr(zhp->zfs_name, '@') + 1;
1379 isfromsnap = (sdd->fromsnap != NULL &&
1380 strcmp(sdd->fromsnap, thissnap) == 0);
1381
1382 if (!sdd->seenfrom && isfromsnap) {
1383 gather_holds(zhp, sdd);
1384 sdd->seenfrom = B_TRUE;
1385 (void) strlcpy(sdd->prevsnap, thissnap,
1386 sizeof (sdd->prevsnap));
1387 sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1388 zfs_close(zhp);
1389 return (0);
1390 }
1391
1392 if (sdd->seento || !sdd->seenfrom) {
1393 zfs_close(zhp);
1394 return (0);
1395 }
1396
1397 istosnap = (strcmp(sdd->tosnap, thissnap) == 0);
1398 if (istosnap)
1399 sdd->seento = B_TRUE;
1400
1401 if (sdd->large_block)
1402 flags |= LZC_SEND_FLAG_LARGE_BLOCK;
1403 if (sdd->embed_data)
1404 flags |= LZC_SEND_FLAG_EMBED_DATA;
1405 if (sdd->compress)
1406 flags |= LZC_SEND_FLAG_COMPRESS;
1407 if (sdd->raw)
1408 flags |= LZC_SEND_FLAG_RAW;
1409
1410 if (!sdd->doall && !isfromsnap && !istosnap) {
1411 if (sdd->replicate) {
1412 char *snapname;
1413 nvlist_t *snapprops;
1414 /*
1415 * Filter out all intermediate snapshots except origin
1416 * snapshots needed to replicate clones.
1417 */
1418 nvlist_t *nvfs = fsavl_find(sdd->fsavl,
1419 zhp->zfs_dmustats.dds_guid, &snapname);
1420
1421 VERIFY(0 == nvlist_lookup_nvlist(nvfs,
1422 "snapprops", &snapprops));
1423 VERIFY(0 == nvlist_lookup_nvlist(snapprops,
1424 thissnap, &snapprops));
1425 exclude = !nvlist_exists(snapprops, "is_clone_origin");
1426 } else {
1427 exclude = B_TRUE;
1428 }
1429 }
1430
1431 /*
1432 * If a filter function exists, call it to determine whether
1433 * this snapshot will be sent.
1434 */
1435 if (exclude || (sdd->filter_cb != NULL &&
1436 sdd->filter_cb(zhp, sdd->filter_cb_arg) == B_FALSE)) {
1437 /*
1438 * This snapshot is filtered out. Don't send it, and don't
1439 * set prevsnap_obj, so it will be as if this snapshot didn't
1440 * exist, and the next accepted snapshot will be sent as
1441 * an incremental from the last accepted one, or as the
1442 * first (and full) snapshot in the case of a replication,
1443 * non-incremental send.
1444 */
1445 zfs_close(zhp);
1446 return (0);
1447 }
1448
1449 gather_holds(zhp, sdd);
1450 fromorigin = sdd->prevsnap[0] == '\0' &&
1451 (sdd->fromorigin || sdd->replicate);
1452
1453 if (sdd->verbosity != 0) {
1454 uint64_t size = 0;
1455 char fromds[ZFS_MAX_DATASET_NAME_LEN];
1456
1457 if (sdd->prevsnap[0] != '\0') {
1458 (void) strlcpy(fromds, zhp->zfs_name, sizeof (fromds));
1459 *(strchr(fromds, '@') + 1) = '\0';
1460 (void) strlcat(fromds, sdd->prevsnap, sizeof (fromds));
1461 }
1462 if (zfs_send_space(zhp, zhp->zfs_name,
1463 sdd->prevsnap[0] ? fromds : NULL, flags, &size) != 0) {
1464 size = 0; /* cannot estimate send space */
1465 } else {
1466 send_print_verbose(fout, zhp->zfs_name,
1467 sdd->prevsnap[0] ? sdd->prevsnap : NULL,
1468 size, sdd->parsable);
1469 }
1470 sdd->size += size;
1471 }
1472
1473 if (!sdd->dryrun) {
1474 /*
1475 * If progress reporting is requested, spawn a new thread to
1476 * poll ZFS_IOC_SEND_PROGRESS at a regular interval.
1477 */
1478 if (sdd->progress) {
1479 pa.pa_zhp = zhp;
1480 pa.pa_fd = sdd->outfd;
1481 pa.pa_parsable = sdd->parsable;
1482 pa.pa_estimate = B_FALSE;
1483 pa.pa_verbosity = sdd->verbosity;
1484
1485 if ((err = pthread_create(&tid, NULL,
1486 send_progress_thread, &pa)) != 0) {
1487 zfs_close(zhp);
1488 return (err);
1489 }
1490 }
1491
1492 err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj,
1493 fromorigin, sdd->outfd, flags, sdd->debugnv);
1494
1495 if (sdd->progress) {
1496 void *status = NULL;
1497 (void) pthread_cancel(tid);
1498 (void) pthread_join(tid, &status);
1499 int error = (int)(uintptr_t)status;
1500 if (error != 0 && status != PTHREAD_CANCELED) {
1501 char errbuf[1024];
1502 (void) snprintf(errbuf, sizeof (errbuf),
1503 dgettext(TEXT_DOMAIN,
1504 "progress thread exited nonzero"));
1505 return (zfs_standard_error(zhp->zfs_hdl, error,
1506 errbuf));
1507 }
1508 }
1509 }
1510
1511 (void) strcpy(sdd->prevsnap, thissnap);
1512 sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1513 zfs_close(zhp);
1514 return (err);
1515 }
1516
1517 static int
1518 dump_filesystem(zfs_handle_t *zhp, void *arg)
1519 {
1520 int rv = 0;
1521 send_dump_data_t *sdd = arg;
1522 boolean_t missingfrom = B_FALSE;
1523 zfs_cmd_t zc = {"\0"};
1524 uint64_t min_txg = 0, max_txg = 0;
1525
1526 (void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
1527 zhp->zfs_name, sdd->tosnap);
1528 if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) {
1529 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1530 "WARNING: could not send %s@%s: does not exist\n"),
1531 zhp->zfs_name, sdd->tosnap);
1532 sdd->err = B_TRUE;
1533 return (0);
1534 }
1535
1536 if (sdd->replicate && sdd->fromsnap) {
1537 /*
1538 * If this fs does not have fromsnap, and we're doing
1539 * recursive, we need to send a full stream from the
1540 * beginning (or an incremental from the origin if this
1541 * is a clone). If we're doing non-recursive, then let
1542 * them get the error.
1543 */
1544 (void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
1545 zhp->zfs_name, sdd->fromsnap);
1546 if (ioctl(zhp->zfs_hdl->libzfs_fd,
1547 ZFS_IOC_OBJSET_STATS, &zc) != 0) {
1548 missingfrom = B_TRUE;
1549 }
1550 }
1551
1552 sdd->seenfrom = sdd->seento = sdd->prevsnap[0] = 0;
1553 sdd->prevsnap_obj = 0;
1554 if (sdd->fromsnap == NULL || missingfrom)
1555 sdd->seenfrom = B_TRUE;
1556
1557
1558
1559 /*
1560 * Iterate through all snapshots and process the ones we will be
1561 * sending. If we only have a "from" and "to" snapshot to deal
1562 * with, we can avoid iterating through all the other snapshots.
1563 */
1564 if (sdd->doall || sdd->replicate || sdd->tosnap == NULL) {
1565 if (!sdd->replicate && sdd->fromsnap != NULL)
1566 min_txg = get_snap_txg(zhp->zfs_hdl, zhp->zfs_name,
1567 sdd->fromsnap);
1568 if (!sdd->replicate && sdd->tosnap != NULL)
1569 max_txg = get_snap_txg(zhp->zfs_hdl, zhp->zfs_name,
1570 sdd->tosnap);
1571 rv = zfs_iter_snapshots_sorted(zhp, dump_snapshot, arg,
1572 min_txg, max_txg);
1573 } else {
1574 char snapname[MAXPATHLEN] = { 0 };
1575 zfs_handle_t *snap;
1576
1577 if (!sdd->seenfrom) {
1578 (void) snprintf(snapname, sizeof (snapname),
1579 "%s@%s", zhp->zfs_name, sdd->fromsnap);
1580 snap = zfs_open(zhp->zfs_hdl, snapname,
1581 ZFS_TYPE_SNAPSHOT);
1582 if (snap != NULL)
1583 rv = dump_snapshot(snap, sdd);
1584 else
1585 rv = -1;
1586 }
1587
1588 if (rv == 0) {
1589 (void) snprintf(snapname, sizeof (snapname),
1590 "%s@%s", zhp->zfs_name, sdd->tosnap);
1591 snap = zfs_open(zhp->zfs_hdl, snapname,
1592 ZFS_TYPE_SNAPSHOT);
1593 if (snap != NULL)
1594 rv = dump_snapshot(snap, sdd);
1595 else
1596 rv = -1;
1597 }
1598 }
1599
1600 if (!sdd->seenfrom) {
1601 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1602 "WARNING: could not send %s@%s:\n"
1603 "incremental source (%s@%s) does not exist\n"),
1604 zhp->zfs_name, sdd->tosnap,
1605 zhp->zfs_name, sdd->fromsnap);
1606 sdd->err = B_TRUE;
1607 } else if (!sdd->seento) {
1608 if (sdd->fromsnap) {
1609 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1610 "WARNING: could not send %s@%s:\n"
1611 "incremental source (%s@%s) "
1612 "is not earlier than it\n"),
1613 zhp->zfs_name, sdd->tosnap,
1614 zhp->zfs_name, sdd->fromsnap);
1615 } else {
1616 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1617 "WARNING: "
1618 "could not send %s@%s: does not exist\n"),
1619 zhp->zfs_name, sdd->tosnap);
1620 }
1621 sdd->err = B_TRUE;
1622 }
1623
1624 return (rv);
1625 }
1626
1627 static int
1628 dump_filesystems(zfs_handle_t *rzhp, void *arg)
1629 {
1630 send_dump_data_t *sdd = arg;
1631 nvpair_t *fspair;
1632 boolean_t needagain, progress;
1633
1634 if (!sdd->replicate)
1635 return (dump_filesystem(rzhp, sdd));
1636
1637 /* Mark the clone origin snapshots. */
1638 for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1639 fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1640 nvlist_t *nvfs;
1641 uint64_t origin_guid = 0;
1642
1643 VERIFY(0 == nvpair_value_nvlist(fspair, &nvfs));
1644 (void) nvlist_lookup_uint64(nvfs, "origin", &origin_guid);
1645 if (origin_guid != 0) {
1646 char *snapname;
1647 nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
1648 origin_guid, &snapname);
1649 if (origin_nv != NULL) {
1650 nvlist_t *snapprops;
1651 VERIFY(0 == nvlist_lookup_nvlist(origin_nv,
1652 "snapprops", &snapprops));
1653 VERIFY(0 == nvlist_lookup_nvlist(snapprops,
1654 snapname, &snapprops));
1655 VERIFY(0 == nvlist_add_boolean(
1656 snapprops, "is_clone_origin"));
1657 }
1658 }
1659 }
1660 again:
1661 needagain = progress = B_FALSE;
1662 for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1663 fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1664 nvlist_t *fslist, *parent_nv;
1665 char *fsname;
1666 zfs_handle_t *zhp;
1667 int err;
1668 uint64_t origin_guid = 0;
1669 uint64_t parent_guid = 0;
1670
1671 VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
1672 if (nvlist_lookup_boolean(fslist, "sent") == 0)
1673 continue;
1674
1675 VERIFY(nvlist_lookup_string(fslist, "name", &fsname) == 0);
1676 (void) nvlist_lookup_uint64(fslist, "origin", &origin_guid);
1677 (void) nvlist_lookup_uint64(fslist, "parentfromsnap",
1678 &parent_guid);
1679
1680 if (parent_guid != 0) {
1681 parent_nv = fsavl_find(sdd->fsavl, parent_guid, NULL);
1682 if (!nvlist_exists(parent_nv, "sent")) {
1683 /* parent has not been sent; skip this one */
1684 needagain = B_TRUE;
1685 continue;
1686 }
1687 }
1688
1689 if (origin_guid != 0) {
1690 nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
1691 origin_guid, NULL);
1692 if (origin_nv != NULL &&
1693 !nvlist_exists(origin_nv, "sent")) {
1694 /*
1695 * origin has not been sent yet;
1696 * skip this clone.
1697 */
1698 needagain = B_TRUE;
1699 continue;
1700 }
1701 }
1702
1703 zhp = zfs_open(rzhp->zfs_hdl, fsname, ZFS_TYPE_DATASET);
1704 if (zhp == NULL)
1705 return (-1);
1706 err = dump_filesystem(zhp, sdd);
1707 VERIFY(nvlist_add_boolean(fslist, "sent") == 0);
1708 progress = B_TRUE;
1709 zfs_close(zhp);
1710 if (err)
1711 return (err);
1712 }
1713 if (needagain) {
1714 assert(progress);
1715 goto again;
1716 }
1717
1718 /* clean out the sent flags in case we reuse this fss */
1719 for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1720 fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1721 nvlist_t *fslist;
1722
1723 VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
1724 (void) nvlist_remove_all(fslist, "sent");
1725 }
1726
1727 return (0);
1728 }
1729
1730 nvlist_t *
1731 zfs_send_resume_token_to_nvlist(libzfs_handle_t *hdl, const char *token)
1732 {
1733 unsigned int version;
1734 int nread, i;
1735 unsigned long long checksum, packed_len;
1736
1737 /*
1738 * Decode token header, which is:
1739 * <token version>-<checksum of payload>-<uncompressed payload length>
1740 * Note that the only supported token version is 1.
1741 */
1742 nread = sscanf(token, "%u-%llx-%llx-",
1743 &version, &checksum, &packed_len);
1744 if (nread != 3) {
1745 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1746 "resume token is corrupt (invalid format)"));
1747 return (NULL);
1748 }
1749
1750 if (version != ZFS_SEND_RESUME_TOKEN_VERSION) {
1751 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1752 "resume token is corrupt (invalid version %u)"),
1753 version);
1754 return (NULL);
1755 }
1756
1757 /* convert hexadecimal representation to binary */
1758 token = strrchr(token, '-') + 1;
1759 int len = strlen(token) / 2;
1760 unsigned char *compressed = zfs_alloc(hdl, len);
1761 for (i = 0; i < len; i++) {
1762 nread = sscanf(token + i * 2, "%2hhx", compressed + i);
1763 if (nread != 1) {
1764 free(compressed);
1765 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1766 "resume token is corrupt "
1767 "(payload is not hex-encoded)"));
1768 return (NULL);
1769 }
1770 }
1771
1772 /* verify checksum */
1773 zio_cksum_t cksum;
1774 fletcher_4_native_varsize(compressed, len, &cksum);
1775 if (cksum.zc_word[0] != checksum) {
1776 free(compressed);
1777 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1778 "resume token is corrupt (incorrect checksum)"));
1779 return (NULL);
1780 }
1781
1782 /* uncompress */
1783 void *packed = zfs_alloc(hdl, packed_len);
1784 uLongf packed_len_long = packed_len;
1785 if (uncompress(packed, &packed_len_long, compressed, len) != Z_OK ||
1786 packed_len_long != packed_len) {
1787 free(packed);
1788 free(compressed);
1789 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1790 "resume token is corrupt (decompression failed)"));
1791 return (NULL);
1792 }
1793
1794 /* unpack nvlist */
1795 nvlist_t *nv;
1796 int error = nvlist_unpack(packed, packed_len, &nv, KM_SLEEP);
1797 free(packed);
1798 free(compressed);
1799 if (error != 0) {
1800 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1801 "resume token is corrupt (nvlist_unpack failed)"));
1802 return (NULL);
1803 }
1804 return (nv);
1805 }
1806 static enum lzc_send_flags
1807 lzc_flags_from_sendflags(const sendflags_t *flags)
1808 {
1809 enum lzc_send_flags lzc_flags = 0;
1810 if (flags->largeblock)
1811 lzc_flags |= LZC_SEND_FLAG_LARGE_BLOCK;
1812 if (flags->embed_data)
1813 lzc_flags |= LZC_SEND_FLAG_EMBED_DATA;
1814 if (flags->compress)
1815 lzc_flags |= LZC_SEND_FLAG_COMPRESS;
1816 if (flags->raw)
1817 lzc_flags |= LZC_SEND_FLAG_RAW;
1818 return (lzc_flags);
1819 }
1820
1821 static int
1822 estimate_size(zfs_handle_t *zhp, const char *from, int fd, sendflags_t *flags,
1823 uint64_t resumeobj, uint64_t resumeoff, uint64_t bytes,
1824 const char *redactbook, char *errbuf)
1825 {
1826 uint64_t size;
1827 FILE *fout = flags->dryrun ? stdout : stderr;
1828 progress_arg_t pa = { 0 };
1829 int err = 0;
1830 pthread_t ptid;
1831
1832 if (flags->progress) {
1833 pa.pa_zhp = zhp;
1834 pa.pa_fd = fd;
1835 pa.pa_parsable = flags->parsable;
1836 pa.pa_estimate = B_TRUE;
1837 pa.pa_verbosity = flags->verbosity;
1838
1839 err = pthread_create(&ptid, NULL,
1840 send_progress_thread, &pa);
1841 if (err != 0) {
1842 zfs_error_aux(zhp->zfs_hdl, strerror(errno));
1843 return (zfs_error(zhp->zfs_hdl,
1844 EZFS_THREADCREATEFAILED, errbuf));
1845 }
1846 }
1847
1848 err = lzc_send_space_resume_redacted(zhp->zfs_name, from,
1849 lzc_flags_from_sendflags(flags), resumeobj, resumeoff, bytes,
1850 redactbook, fd, &size);
1851
1852 if (flags->progress) {
1853 void *status = NULL;
1854 (void) pthread_cancel(ptid);
1855 (void) pthread_join(ptid, &status);
1856 int error = (int)(uintptr_t)status;
1857 if (error != 0 && status != PTHREAD_CANCELED) {
1858 char errbuf[1024];
1859 (void) snprintf(errbuf, sizeof (errbuf),
1860 dgettext(TEXT_DOMAIN, "progress thread exited "
1861 "nonzero"));
1862 return (zfs_standard_error(zhp->zfs_hdl, error,
1863 errbuf));
1864 }
1865 }
1866
1867 if (err != 0) {
1868 zfs_error_aux(zhp->zfs_hdl, strerror(err));
1869 return (zfs_error(zhp->zfs_hdl, EZFS_BADBACKUP,
1870 errbuf));
1871 }
1872 send_print_verbose(fout, zhp->zfs_name, from, size,
1873 flags->parsable);
1874
1875 if (flags->parsable) {
1876 (void) fprintf(fout, "size\t%llu\n", (longlong_t)size);
1877 } else {
1878 char buf[16];
1879 zfs_nicenum(size, buf, sizeof (buf));
1880 (void) fprintf(fout, dgettext(TEXT_DOMAIN,
1881 "total estimated size is %s\n"), buf);
1882 }
1883 return (0);
1884 }
1885
1886 static boolean_t
1887 redact_snaps_contains(const uint64_t *snaps, uint64_t num_snaps, uint64_t guid)
1888 {
1889 for (int i = 0; i < num_snaps; i++) {
1890 if (snaps[i] == guid)
1891 return (B_TRUE);
1892 }
1893 return (B_FALSE);
1894 }
1895
1896 static boolean_t
1897 redact_snaps_equal(const uint64_t *snaps1, uint64_t num_snaps1,
1898 const uint64_t *snaps2, uint64_t num_snaps2)
1899 {
1900 if (num_snaps1 != num_snaps2)
1901 return (B_FALSE);
1902 for (int i = 0; i < num_snaps1; i++) {
1903 if (!redact_snaps_contains(snaps2, num_snaps2, snaps1[i]))
1904 return (B_FALSE);
1905 }
1906 return (B_TRUE);
1907 }
1908
1909 /*
1910 * Check that the list of redaction snapshots in the bookmark matches the send
1911 * we're resuming, and return whether or not it's complete.
1912 *
1913 * Note that the caller needs to free the contents of *bookname with free() if
1914 * this function returns successfully.
1915 */
1916 static int
1917 find_redact_book(libzfs_handle_t *hdl, const char *path,
1918 const uint64_t *redact_snap_guids, int num_redact_snaps,
1919 char **bookname)
1920 {
1921 char errbuf[1024];
1922 int error = 0;
1923 nvlist_t *props = fnvlist_alloc();
1924 nvlist_t *bmarks;
1925
1926 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1927 "cannot resume send"));
1928
1929 fnvlist_add_boolean(props, "redact_complete");
1930 fnvlist_add_boolean(props, zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS));
1931 error = lzc_get_bookmarks(path, props, &bmarks);
1932 nvlist_free(props);
1933 if (error != 0) {
1934 if (error == ESRCH) {
1935 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1936 "nonexistent redaction bookmark provided"));
1937 } else if (error == ENOENT) {
1938 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1939 "dataset to be sent no longer exists"));
1940 } else {
1941 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1942 "unknown error: %s"), strerror(error));
1943 }
1944 return (zfs_error(hdl, EZFS_BADPROP, errbuf));
1945 }
1946 nvpair_t *pair;
1947 for (pair = nvlist_next_nvpair(bmarks, NULL); pair;
1948 pair = nvlist_next_nvpair(bmarks, pair)) {
1949
1950 nvlist_t *bmark = fnvpair_value_nvlist(pair);
1951 nvlist_t *vallist = fnvlist_lookup_nvlist(bmark,
1952 zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS));
1953 uint_t len = 0;
1954 uint64_t *bmarksnaps = fnvlist_lookup_uint64_array(vallist,
1955 ZPROP_VALUE, &len);
1956 if (redact_snaps_equal(redact_snap_guids,
1957 num_redact_snaps, bmarksnaps, len)) {
1958 break;
1959 }
1960 }
1961 if (pair == NULL) {
1962 fnvlist_free(bmarks);
1963 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1964 "no appropriate redaction bookmark exists"));
1965 return (zfs_error(hdl, EZFS_BADPROP, errbuf));
1966 }
1967 char *name = nvpair_name(pair);
1968 nvlist_t *bmark = fnvpair_value_nvlist(pair);
1969 nvlist_t *vallist = fnvlist_lookup_nvlist(bmark, "redact_complete");
1970 boolean_t complete = fnvlist_lookup_boolean_value(vallist,
1971 ZPROP_VALUE);
1972 if (!complete) {
1973 fnvlist_free(bmarks);
1974 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1975 "incomplete redaction bookmark provided"));
1976 return (zfs_error(hdl, EZFS_BADPROP, errbuf));
1977 }
1978 *bookname = strndup(name, ZFS_MAX_DATASET_NAME_LEN);
1979 ASSERT3P(*bookname, !=, NULL);
1980 fnvlist_free(bmarks);
1981 return (0);
1982 }
1983
1984 int
1985 zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd,
1986 const char *resume_token)
1987 {
1988 char errbuf[1024];
1989 char *toname;
1990 char *fromname = NULL;
1991 uint64_t resumeobj, resumeoff, toguid, fromguid, bytes;
1992 zfs_handle_t *zhp;
1993 int error = 0;
1994 char name[ZFS_MAX_DATASET_NAME_LEN];
1995 enum lzc_send_flags lzc_flags = 0;
1996 FILE *fout = (flags->verbosity > 0 && flags->dryrun) ? stdout : stderr;
1997 uint64_t *redact_snap_guids = NULL;
1998 int num_redact_snaps = 0;
1999 char *redact_book = NULL;
2000
2001 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2002 "cannot resume send"));
2003
2004 nvlist_t *resume_nvl =
2005 zfs_send_resume_token_to_nvlist(hdl, resume_token);
2006 if (resume_nvl == NULL) {
2007 /*
2008 * zfs_error_aux has already been set by
2009 * zfs_send_resume_token_to_nvlist
2010 */
2011 return (zfs_error(hdl, EZFS_FAULT, errbuf));
2012 }
2013 if (flags->verbosity != 0) {
2014 (void) fprintf(fout, dgettext(TEXT_DOMAIN,
2015 "resume token contents:\n"));
2016 nvlist_print(fout, resume_nvl);
2017 }
2018
2019 if (nvlist_lookup_string(resume_nvl, "toname", &toname) != 0 ||
2020 nvlist_lookup_uint64(resume_nvl, "object", &resumeobj) != 0 ||
2021 nvlist_lookup_uint64(resume_nvl, "offset", &resumeoff) != 0 ||
2022 nvlist_lookup_uint64(resume_nvl, "bytes", &bytes) != 0 ||
2023 nvlist_lookup_uint64(resume_nvl, "toguid", &toguid) != 0) {
2024 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2025 "resume token is corrupt"));
2026 return (zfs_error(hdl, EZFS_FAULT, errbuf));
2027 }
2028 fromguid = 0;
2029 (void) nvlist_lookup_uint64(resume_nvl, "fromguid", &fromguid);
2030
2031 if (flags->largeblock || nvlist_exists(resume_nvl, "largeblockok"))
2032 lzc_flags |= LZC_SEND_FLAG_LARGE_BLOCK;
2033 if (flags->embed_data || nvlist_exists(resume_nvl, "embedok"))
2034 lzc_flags |= LZC_SEND_FLAG_EMBED_DATA;
2035 if (flags->compress || nvlist_exists(resume_nvl, "compressok"))
2036 lzc_flags |= LZC_SEND_FLAG_COMPRESS;
2037 if (flags->raw || nvlist_exists(resume_nvl, "rawok"))
2038 lzc_flags |= LZC_SEND_FLAG_RAW;
2039
2040 if (guid_to_name(hdl, toname, toguid, B_FALSE, name) != 0) {
2041 if (zfs_dataset_exists(hdl, toname, ZFS_TYPE_DATASET)) {
2042 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2043 "'%s' is no longer the same snapshot used in "
2044 "the initial send"), toname);
2045 } else {
2046 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2047 "'%s' used in the initial send no longer exists"),
2048 toname);
2049 }
2050 return (zfs_error(hdl, EZFS_BADPATH, errbuf));
2051 }
2052 zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
2053 if (zhp == NULL) {
2054 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2055 "unable to access '%s'"), name);
2056 return (zfs_error(hdl, EZFS_BADPATH, errbuf));
2057 }
2058
2059 if (nvlist_lookup_uint64_array(resume_nvl, "book_redact_snaps",
2060 &redact_snap_guids, (uint_t *)&num_redact_snaps) != 0) {
2061 num_redact_snaps = -1;
2062 }
2063
2064 if (fromguid != 0) {
2065 if (guid_to_name_redact_snaps(hdl, toname, fromguid, B_TRUE,
2066 redact_snap_guids, num_redact_snaps, name) != 0) {
2067 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2068 "incremental source %#llx no longer exists"),
2069 (longlong_t)fromguid);
2070 return (zfs_error(hdl, EZFS_BADPATH, errbuf));
2071 }
2072 fromname = name;
2073 }
2074
2075 redact_snap_guids = NULL;
2076
2077 if (nvlist_lookup_uint64_array(resume_nvl,
2078 zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS), &redact_snap_guids,
2079 (uint_t *)&num_redact_snaps) == 0) {
2080 char path[ZFS_MAX_DATASET_NAME_LEN];
2081
2082 (void) strlcpy(path, toname, sizeof (path));
2083 char *at = strchr(path, '@');
2084 ASSERT3P(at, !=, NULL);
2085
2086 *at = '\0';
2087
2088 if ((error = find_redact_book(hdl, path, redact_snap_guids,
2089 num_redact_snaps, &redact_book)) != 0) {
2090 return (error);
2091 }
2092 }
2093
2094 if (flags->verbosity != 0) {
2095 /*
2096 * Some of these may have come from the resume token, set them
2097 * here for size estimate purposes.
2098 */
2099 sendflags_t tmpflags = *flags;
2100 if (lzc_flags & LZC_SEND_FLAG_LARGE_BLOCK)
2101 tmpflags.largeblock = B_TRUE;
2102 if (lzc_flags & LZC_SEND_FLAG_COMPRESS)
2103 tmpflags.compress = B_TRUE;
2104 if (lzc_flags & LZC_SEND_FLAG_EMBED_DATA)
2105 tmpflags.embed_data = B_TRUE;
2106 error = estimate_size(zhp, fromname, outfd, &tmpflags,
2107 resumeobj, resumeoff, bytes, redact_book, errbuf);
2108 }
2109
2110 if (!flags->dryrun) {
2111 progress_arg_t pa = { 0 };
2112 pthread_t tid;
2113 /*
2114 * If progress reporting is requested, spawn a new thread to
2115 * poll ZFS_IOC_SEND_PROGRESS at a regular interval.
2116 */
2117 if (flags->progress) {
2118 pa.pa_zhp = zhp;
2119 pa.pa_fd = outfd;
2120 pa.pa_parsable = flags->parsable;
2121 pa.pa_estimate = B_FALSE;
2122 pa.pa_verbosity = flags->verbosity;
2123
2124 error = pthread_create(&tid, NULL,
2125 send_progress_thread, &pa);
2126 if (error != 0) {
2127 if (redact_book != NULL)
2128 free(redact_book);
2129 zfs_close(zhp);
2130 return (error);
2131 }
2132 }
2133
2134 error = lzc_send_resume_redacted(zhp->zfs_name, fromname, outfd,
2135 lzc_flags, resumeobj, resumeoff, redact_book);
2136 if (redact_book != NULL)
2137 free(redact_book);
2138
2139 if (flags->progress) {
2140 void *status = NULL;
2141 (void) pthread_cancel(tid);
2142 (void) pthread_join(tid, &status);
2143 int error = (int)(uintptr_t)status;
2144 if (error != 0 && status != PTHREAD_CANCELED) {
2145 char errbuf[1024];
2146 (void) snprintf(errbuf, sizeof (errbuf),
2147 dgettext(TEXT_DOMAIN,
2148 "progress thread exited nonzero"));
2149 return (zfs_standard_error(hdl, error, errbuf));
2150 }
2151 }
2152
2153 char errbuf[1024];
2154 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2155 "warning: cannot send '%s'"), zhp->zfs_name);
2156
2157 zfs_close(zhp);
2158
2159 switch (error) {
2160 case 0:
2161 return (0);
2162 case EACCES:
2163 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2164 "source key must be loaded"));
2165 return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf));
2166 case ESRCH:
2167 if (lzc_exists(zhp->zfs_name)) {
2168 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2169 "incremental source could not be found"));
2170 }
2171 return (zfs_error(hdl, EZFS_NOENT, errbuf));
2172
2173 case EXDEV:
2174 case ENOENT:
2175 case EDQUOT:
2176 case EFBIG:
2177 case EIO:
2178 case ENOLINK:
2179 case ENOSPC:
2180 case ENOSTR:
2181 case ENXIO:
2182 case EPIPE:
2183 case ERANGE:
2184 case EFAULT:
2185 case EROFS:
2186 zfs_error_aux(hdl, strerror(errno));
2187 return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
2188
2189 default:
2190 return (zfs_standard_error(hdl, errno, errbuf));
2191 }
2192 } else {
2193 if (redact_book != NULL)
2194 free(redact_book);
2195 }
2196
2197 zfs_close(zhp);
2198
2199 return (error);
2200 }
2201
2202 /*
2203 * This function informs the target system that the recursive send is complete.
2204 * The record is also expected in the case of a send -p.
2205 */
2206 static int
2207 send_conclusion_record(int fd, zio_cksum_t *zc)
2208 {
2209 dmu_replay_record_t drr = { 0 };
2210 drr.drr_type = DRR_END;
2211 if (zc != NULL)
2212 drr.drr_u.drr_end.drr_checksum = *zc;
2213 if (write(fd, &drr, sizeof (drr)) == -1) {
2214 return (errno);
2215 }
2216 return (0);
2217 }
2218
2219 /*
2220 * This function is responsible for sending the records that contain the
2221 * necessary information for the target system's libzfs to be able to set the
2222 * properties of the filesystem being received, or to be able to prepare for
2223 * a recursive receive.
2224 *
2225 * The "zhp" argument is the handle of the snapshot we are sending
2226 * (the "tosnap"). The "from" argument is the short snapshot name (the part
2227 * after the @) of the incremental source.
2228 */
2229 static int
2230 send_prelim_records(zfs_handle_t *zhp, const char *from, int fd,
2231 boolean_t gather_props, boolean_t recursive, boolean_t verbose,
2232 boolean_t dryrun, boolean_t raw, boolean_t replicate, boolean_t backup,
2233 boolean_t holds, boolean_t props, boolean_t doall,
2234 nvlist_t **fssp, avl_tree_t **fsavlp)
2235 {
2236 int err = 0;
2237 char *packbuf = NULL;
2238 size_t buflen = 0;
2239 zio_cksum_t zc = { {0} };
2240 int featureflags = 0;
2241 /* name of filesystem/volume that contains snapshot we are sending */
2242 char tofs[ZFS_MAX_DATASET_NAME_LEN];
2243 /* short name of snap we are sending */
2244 char *tosnap = "";
2245
2246 char errbuf[1024];
2247 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2248 "warning: cannot send '%s'"), zhp->zfs_name);
2249 if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM && zfs_prop_get_int(zhp,
2250 ZFS_PROP_VERSION) >= ZPL_VERSION_SA) {
2251 featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
2252 }
2253
2254 if (holds)
2255 featureflags |= DMU_BACKUP_FEATURE_HOLDS;
2256
2257 (void) strlcpy(tofs, zhp->zfs_name, ZFS_MAX_DATASET_NAME_LEN);
2258 char *at = strchr(tofs, '@');
2259 if (at != NULL) {
2260 *at = '\0';
2261 tosnap = at + 1;
2262 }
2263
2264 if (gather_props) {
2265 nvlist_t *hdrnv = fnvlist_alloc();
2266 nvlist_t *fss = NULL;
2267
2268 if (from != NULL)
2269 fnvlist_add_string(hdrnv, "fromsnap", from);
2270 fnvlist_add_string(hdrnv, "tosnap", tosnap);
2271 if (!recursive)
2272 fnvlist_add_boolean(hdrnv, "not_recursive");
2273
2274 if (raw) {
2275 VERIFY0(nvlist_add_boolean(hdrnv, "raw"));
2276 }
2277
2278 if ((err = gather_nvlist(zhp->zfs_hdl, tofs,
2279 from, tosnap, recursive, raw, doall, replicate, verbose,
2280 backup, holds, props, &fss, fsavlp)) != 0) {
2281 return (zfs_error(zhp->zfs_hdl, EZFS_BADBACKUP,
2282 errbuf));
2283 }
2284 fnvlist_add_nvlist(hdrnv, "fss", fss);
2285 VERIFY0(nvlist_pack(hdrnv, &packbuf, &buflen, NV_ENCODE_XDR,
2286 0));
2287 if (fssp != NULL) {
2288 *fssp = fss;
2289 } else {
2290 nvlist_free(fss);
2291 }
2292 nvlist_free(hdrnv);
2293 }
2294
2295 if (!dryrun) {
2296 dmu_replay_record_t drr = { 0 };
2297 /* write first begin record */
2298 drr.drr_type = DRR_BEGIN;
2299 drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
2300 DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin.
2301 drr_versioninfo, DMU_COMPOUNDSTREAM);
2302 DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin.
2303 drr_versioninfo, featureflags);
2304 if (snprintf(drr.drr_u.drr_begin.drr_toname,
2305 sizeof (drr.drr_u.drr_begin.drr_toname), "%s@%s", tofs,
2306 tosnap) >= sizeof (drr.drr_u.drr_begin.drr_toname)) {
2307 return (zfs_error(zhp->zfs_hdl, EZFS_BADBACKUP,
2308 errbuf));
2309 }
2310 drr.drr_payloadlen = buflen;
2311
2312 err = dump_record(&drr, packbuf, buflen, &zc, fd);
2313 free(packbuf);
2314 if (err != 0) {
2315 zfs_error_aux(zhp->zfs_hdl, strerror(err));
2316 return (zfs_error(zhp->zfs_hdl, EZFS_BADBACKUP,
2317 errbuf));
2318 }
2319 err = send_conclusion_record(fd, &zc);
2320 if (err != 0) {
2321 zfs_error_aux(zhp->zfs_hdl, strerror(err));
2322 return (zfs_error(zhp->zfs_hdl, EZFS_BADBACKUP,
2323 errbuf));
2324 }
2325 }
2326 return (0);
2327 }
2328
2329 /*
2330 * Generate a send stream. The "zhp" argument is the filesystem/volume
2331 * that contains the snapshot to send. The "fromsnap" argument is the
2332 * short name (the part after the '@') of the snapshot that is the
2333 * incremental source to send from (if non-NULL). The "tosnap" argument
2334 * is the short name of the snapshot to send.
2335 *
2336 * The content of the send stream is the snapshot identified by
2337 * 'tosnap'. Incremental streams are requested in two ways:
2338 * - from the snapshot identified by "fromsnap" (if non-null) or
2339 * - from the origin of the dataset identified by zhp, which must
2340 * be a clone. In this case, "fromsnap" is null and "fromorigin"
2341 * is TRUE.
2342 *
2343 * The send stream is recursive (i.e. dumps a hierarchy of snapshots) and
2344 * uses a special header (with a hdrtype field of DMU_COMPOUNDSTREAM)
2345 * if "replicate" is set. If "doall" is set, dump all the intermediate
2346 * snapshots. The DMU_COMPOUNDSTREAM header is used in the "doall"
2347 * case too. If "props" is set, send properties.
2348 */
2349 int
2350 zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
2351 sendflags_t *flags, int outfd, snapfilter_cb_t filter_func,
2352 void *cb_arg, nvlist_t **debugnvp)
2353 {
2354 char errbuf[1024];
2355 send_dump_data_t sdd = { 0 };
2356 int err = 0;
2357 nvlist_t *fss = NULL;
2358 avl_tree_t *fsavl = NULL;
2359 static uint64_t holdseq;
2360 int spa_version;
2361 pthread_t tid = 0;
2362 int pipefd[2];
2363 dedup_arg_t dda = { 0 };
2364 int featureflags = 0;
2365 FILE *fout;
2366
2367 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2368 "cannot send '%s'"), zhp->zfs_name);
2369
2370 if (fromsnap && fromsnap[0] == '\0') {
2371 zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
2372 "zero-length incremental source"));
2373 return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
2374 }
2375
2376 if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM) {
2377 uint64_t version;
2378 version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
2379 if (version >= ZPL_VERSION_SA) {
2380 featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
2381 }
2382 }
2383
2384 if (flags->holds)
2385 featureflags |= DMU_BACKUP_FEATURE_HOLDS;
2386
2387 /*
2388 * Start the dedup thread if this is a dedup stream. We do not bother
2389 * doing this if this a raw send of an encrypted dataset with dedup off
2390 * because normal encrypted blocks won't dedup.
2391 */
2392 if (flags->dedup && !flags->dryrun && !(flags->raw &&
2393 zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF &&
2394 zfs_prop_get_int(zhp, ZFS_PROP_DEDUP) == ZIO_CHECKSUM_OFF)) {
2395 featureflags |= (DMU_BACKUP_FEATURE_DEDUP |
2396 DMU_BACKUP_FEATURE_DEDUPPROPS);
2397 if ((err = socketpair(AF_UNIX, SOCK_STREAM, 0, pipefd)) != 0) {
2398 zfs_error_aux(zhp->zfs_hdl, strerror(errno));
2399 return (zfs_error(zhp->zfs_hdl, EZFS_PIPEFAILED,
2400 errbuf));
2401 }
2402 dda.outputfd = outfd;
2403 dda.inputfd = pipefd[1];
2404 dda.dedup_hdl = zhp->zfs_hdl;
2405 if ((err = pthread_create(&tid, NULL, cksummer, &dda)) != 0) {
2406 (void) close(pipefd[0]);
2407 (void) close(pipefd[1]);
2408 zfs_error_aux(zhp->zfs_hdl, strerror(errno));
2409 return (zfs_error(zhp->zfs_hdl,
2410 EZFS_THREADCREATEFAILED, errbuf));
2411 }
2412 }
2413
2414 if (flags->replicate || flags->doall || flags->props ||
2415 flags->holds || flags->backup) {
2416 char full_tosnap_name[ZFS_MAX_DATASET_NAME_LEN];
2417 if (snprintf(full_tosnap_name, sizeof (full_tosnap_name),
2418 "%s@%s", zhp->zfs_name, tosnap) >=
2419 sizeof (full_tosnap_name)) {
2420 err = EINVAL;
2421 goto stderr_out;
2422 }
2423 zfs_handle_t *tosnap = zfs_open(zhp->zfs_hdl,
2424 full_tosnap_name, ZFS_TYPE_SNAPSHOT);
2425 err = send_prelim_records(tosnap, fromsnap, outfd,
2426 flags->replicate || flags->props || flags->holds,
2427 flags->replicate, flags->verbosity > 0, flags->dryrun,
2428 flags->raw, flags->replicate, flags->backup, flags->holds,
2429 flags->props, flags->doall, &fss, &fsavl);
2430 zfs_close(tosnap);
2431 if (err != 0)
2432 goto err_out;
2433 }
2434
2435 /* dump each stream */
2436 sdd.fromsnap = fromsnap;
2437 sdd.tosnap = tosnap;
2438 if (tid != 0)
2439 sdd.outfd = pipefd[0];
2440 else
2441 sdd.outfd = outfd;
2442 sdd.replicate = flags->replicate;
2443 sdd.doall = flags->doall;
2444 sdd.fromorigin = flags->fromorigin;
2445 sdd.fss = fss;
2446 sdd.fsavl = fsavl;
2447 sdd.verbosity = flags->verbosity;
2448 sdd.parsable = flags->parsable;
2449 sdd.progress = flags->progress;
2450 sdd.dryrun = flags->dryrun;
2451 sdd.large_block = flags->largeblock;
2452 sdd.embed_data = flags->embed_data;
2453 sdd.compress = flags->compress;
2454 sdd.raw = flags->raw;
2455 sdd.holds = flags->holds;
2456 sdd.filter_cb = filter_func;
2457 sdd.filter_cb_arg = cb_arg;
2458 if (debugnvp)
2459 sdd.debugnv = *debugnvp;
2460 if (sdd.verbosity != 0 && sdd.dryrun)
2461 sdd.std_out = B_TRUE;
2462 fout = sdd.std_out ? stdout : stderr;
2463
2464 /*
2465 * Some flags require that we place user holds on the datasets that are
2466 * being sent so they don't get destroyed during the send. We can skip
2467 * this step if the pool is imported read-only since the datasets cannot
2468 * be destroyed.
2469 */
2470 if (!flags->dryrun && !zpool_get_prop_int(zfs_get_pool_handle(zhp),
2471 ZPOOL_PROP_READONLY, NULL) &&
2472 zfs_spa_version(zhp, &spa_version) == 0 &&
2473 spa_version >= SPA_VERSION_USERREFS &&
2474 (flags->doall || flags->replicate)) {
2475 ++holdseq;
2476 (void) snprintf(sdd.holdtag, sizeof (sdd.holdtag),
2477 ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
2478 sdd.cleanup_fd = open(ZFS_DEV, O_RDWR);
2479 if (sdd.cleanup_fd < 0) {
2480 err = errno;
2481 goto stderr_out;
2482 }
2483 sdd.snapholds = fnvlist_alloc();
2484 } else {
2485 sdd.cleanup_fd = -1;
2486 sdd.snapholds = NULL;
2487 }
2488
2489 if (flags->verbosity != 0 || sdd.snapholds != NULL) {
2490 /*
2491 * Do a verbose no-op dry run to get all the verbose output
2492 * or to gather snapshot hold's before generating any data,
2493 * then do a non-verbose real run to generate the streams.
2494 */
2495 sdd.dryrun = B_TRUE;
2496 err = dump_filesystems(zhp, &sdd);
2497
2498 if (err != 0)
2499 goto stderr_out;
2500
2501 if (flags->verbosity != 0) {
2502 if (flags->parsable) {
2503 (void) fprintf(fout, "size\t%llu\n",
2504 (longlong_t)sdd.size);
2505 } else {
2506 char buf[16];
2507 zfs_nicebytes(sdd.size, buf, sizeof (buf));
2508 (void) fprintf(fout, dgettext(TEXT_DOMAIN,
2509 "total estimated size is %s\n"), buf);
2510 }
2511 }
2512
2513 /* Ensure no snaps found is treated as an error. */
2514 if (!sdd.seento) {
2515 err = ENOENT;
2516 goto err_out;
2517 }
2518
2519 /* Skip the second run if dryrun was requested. */
2520 if (flags->dryrun)
2521 goto err_out;
2522
2523 if (sdd.snapholds != NULL) {
2524 err = zfs_hold_nvl(zhp, sdd.cleanup_fd, sdd.snapholds);
2525 if (err != 0)
2526 goto stderr_out;
2527
2528 fnvlist_free(sdd.snapholds);
2529 sdd.snapholds = NULL;
2530 }
2531
2532 sdd.dryrun = B_FALSE;
2533 sdd.verbosity = 0;
2534 }
2535
2536 err = dump_filesystems(zhp, &sdd);
2537 fsavl_destroy(fsavl);
2538 nvlist_free(fss);
2539
2540 /* Ensure no snaps found is treated as an error. */
2541 if (err == 0 && !sdd.seento)
2542 err = ENOENT;
2543
2544 if (tid != 0) {
2545 if (err != 0)
2546 (void) pthread_cancel(tid);
2547 (void) close(pipefd[0]);
2548 (void) pthread_join(tid, NULL);
2549 }
2550
2551 if (sdd.cleanup_fd != -1) {
2552 VERIFY(0 == close(sdd.cleanup_fd));
2553 sdd.cleanup_fd = -1;
2554 }
2555
2556 if (!flags->dryrun && (flags->replicate || flags->doall ||
2557 flags->props || flags->backup || flags->holds)) {
2558 /*
2559 * write final end record. NB: want to do this even if
2560 * there was some error, because it might not be totally
2561 * failed.
2562 */
2563 err = send_conclusion_record(outfd, NULL);
2564 if (err != 0)
2565 return (zfs_standard_error(zhp->zfs_hdl, err, errbuf));
2566 }
2567
2568 return (err || sdd.err);
2569
2570 stderr_out:
2571 err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
2572 err_out:
2573 fsavl_destroy(fsavl);
2574 nvlist_free(fss);
2575 fnvlist_free(sdd.snapholds);
2576
2577 if (sdd.cleanup_fd != -1)
2578 VERIFY(0 == close(sdd.cleanup_fd));
2579 if (tid != 0) {
2580 (void) pthread_cancel(tid);
2581 (void) close(pipefd[0]);
2582 (void) pthread_join(tid, NULL);
2583 }
2584 return (err);
2585 }
2586
2587 static int
2588 get_dedup_fd(zfs_handle_t *zhp, dedup_arg_t *dda, int fd, pthread_t *tid,
2589 int *outfd)
2590 {
2591 int pipefd[2];
2592 char errbuf[1024];
2593 int err;
2594 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2595 "warning: cannot send '%s'"), zhp->zfs_name);
2596 if ((err = socketpair(AF_UNIX, SOCK_STREAM, 0, pipefd)) != 0) {
2597 zfs_error_aux(zhp->zfs_hdl, strerror(errno));
2598 return (zfs_error(zhp->zfs_hdl, EZFS_PIPEFAILED,
2599 errbuf));
2600 }
2601 dda->outputfd = fd;
2602 dda->inputfd = pipefd[1];
2603 dda->dedup_hdl = zhp->zfs_hdl;
2604 if ((err = pthread_create(tid, NULL, cksummer, dda)) != 0) {
2605 (void) close(pipefd[0]);
2606 (void) close(pipefd[1]);
2607 zfs_error_aux(zhp->zfs_hdl, strerror(err));
2608 return (zfs_error(zhp->zfs_hdl, EZFS_THREADCREATEFAILED,
2609 errbuf));
2610 }
2611 *outfd = pipefd[0];
2612 return (0);
2613 }
2614
2615 zfs_handle_t *
2616 name_to_dir_handle(libzfs_handle_t *hdl, const char *snapname)
2617 {
2618 char dirname[ZFS_MAX_DATASET_NAME_LEN];
2619 (void) strlcpy(dirname, snapname, ZFS_MAX_DATASET_NAME_LEN);
2620 char *c = strchr(dirname, '@');
2621 if (c != NULL)
2622 *c = '\0';
2623 return (zfs_open(hdl, dirname, ZFS_TYPE_DATASET));
2624 }
2625
2626 /*
2627 * Returns B_TRUE if earlier is an earlier snapshot in later's timeline; either
2628 * an earlier snapshot in the same filesystem, or a snapshot before later's
2629 * origin, or it's origin's origin, etc.
2630 */
2631 static boolean_t
2632 snapshot_is_before(zfs_handle_t *earlier, zfs_handle_t *later)
2633 {
2634 boolean_t ret;
2635 uint64_t later_txg =
2636 (later->zfs_type == ZFS_TYPE_FILESYSTEM ||
2637 later->zfs_type == ZFS_TYPE_VOLUME ?
2638 UINT64_MAX : zfs_prop_get_int(later, ZFS_PROP_CREATETXG));
2639 uint64_t earlier_txg = zfs_prop_get_int(earlier, ZFS_PROP_CREATETXG);
2640
2641 if (earlier_txg >= later_txg)
2642 return (B_FALSE);
2643
2644 zfs_handle_t *earlier_dir = name_to_dir_handle(earlier->zfs_hdl,
2645 earlier->zfs_name);
2646 zfs_handle_t *later_dir = name_to_dir_handle(later->zfs_hdl,
2647 later->zfs_name);
2648
2649 if (strcmp(earlier_dir->zfs_name, later_dir->zfs_name) == 0) {
2650 zfs_close(earlier_dir);
2651 zfs_close(later_dir);
2652 return (B_TRUE);
2653 }
2654
2655 char clonename[ZFS_MAX_DATASET_NAME_LEN];
2656 if (zfs_prop_get(later_dir, ZFS_PROP_ORIGIN, clonename,
2657 ZFS_MAX_DATASET_NAME_LEN, NULL, NULL, 0, B_TRUE) != 0) {
2658 zfs_close(earlier_dir);
2659 zfs_close(later_dir);
2660 return (B_FALSE);
2661 }
2662
2663 zfs_handle_t *origin = zfs_open(earlier->zfs_hdl, clonename,
2664 ZFS_TYPE_DATASET);
2665 uint64_t origin_txg = zfs_prop_get_int(origin, ZFS_PROP_CREATETXG);
2666
2667 /*
2668 * If "earlier" is exactly the origin, then
2669 * snapshot_is_before(earlier, origin) will return false (because
2670 * they're the same).
2671 */
2672 if (origin_txg == earlier_txg &&
2673 strcmp(origin->zfs_name, earlier->zfs_name) == 0) {
2674 zfs_close(earlier_dir);
2675 zfs_close(later_dir);
2676 zfs_close(origin);
2677 return (B_TRUE);
2678 }
2679 zfs_close(earlier_dir);
2680 zfs_close(later_dir);
2681
2682 ret = snapshot_is_before(earlier, origin);
2683 zfs_close(origin);
2684 return (ret);
2685 }
2686
2687 /*
2688 * The "zhp" argument is the handle of the dataset to send (typically a
2689 * snapshot). The "from" argument is the full name of the snapshot or
2690 * bookmark that is the incremental source.
2691 */
2692 int
2693 zfs_send_one(zfs_handle_t *zhp, const char *from, int fd, sendflags_t *flags,
2694 const char *redactbook)
2695 {
2696 int err;
2697 libzfs_handle_t *hdl = zhp->zfs_hdl;
2698 int orig_fd = fd;
2699 pthread_t ddtid, ptid;
2700 progress_arg_t pa = { 0 };
2701 dedup_arg_t dda = { 0 };
2702
2703 char errbuf[1024];
2704 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2705 "warning: cannot send '%s'"), zhp->zfs_name);
2706
2707 if (from != NULL && strchr(from, '@')) {
2708 zfs_handle_t *from_zhp = zfs_open(hdl, from,
2709 ZFS_TYPE_DATASET);
2710 if (!snapshot_is_before(from_zhp, zhp)) {
2711 zfs_close(from_zhp);
2712 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2713 "not an earlier snapshot from the same fs"));
2714 return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
2715 }
2716 zfs_close(from_zhp);
2717 }
2718
2719 /*
2720 * Send fs properties
2721 */
2722 if (flags->props || flags->holds || flags->backup) {
2723 /*
2724 * Note: the header generated by send_prelim_records()
2725 * assumes that the incremental source is in the same
2726 * filesystem/volume as the target (which is a requirement
2727 * when doing "zfs send -R"). But that isn't always the
2728 * case here (e.g. send from snap in origin, or send from
2729 * bookmark). We pass from=NULL, which will omit this
2730 * information from the prelim records; it isn't used
2731 * when receiving this type of stream.
2732 */
2733 err = send_prelim_records(zhp, NULL, fd, B_TRUE, B_FALSE,
2734 flags->verbosity > 0, flags->dryrun, flags->raw,
2735 flags->replicate, flags->backup, flags->holds,
2736 flags->props, flags->doall, NULL, NULL);
2737 if (err != 0)
2738 return (err);
2739 }
2740
2741 /*
2742 * Perform size estimate if verbose was specified.
2743 */
2744 if (flags->verbosity != 0) {
2745 err = estimate_size(zhp, from, fd, flags, 0, 0, 0, redactbook,
2746 errbuf);
2747 if (err != 0)
2748 return (err);
2749 }
2750
2751 if (flags->dryrun)
2752 return (0);
2753
2754 /*
2755 * If deduplication is requested, spawn a thread that will deduplicate
2756 * the data coming out of the kernel.
2757 */
2758 if (flags->dedup) {
2759 err = get_dedup_fd(zhp, &dda, fd, &ddtid, &fd);
2760 if (err != 0)
2761 return (err);
2762 }
2763
2764 /*
2765 * If progress reporting is requested, spawn a new thread to poll
2766 * ZFS_IOC_SEND_PROGRESS at a regular interval.
2767 */
2768 if (flags->progress) {
2769 pa.pa_zhp = zhp;
2770 pa.pa_fd = fd;
2771 pa.pa_parsable = flags->parsable;
2772 pa.pa_estimate = B_FALSE;
2773 pa.pa_verbosity = flags->verbosity;
2774
2775 err = pthread_create(&ptid, NULL,
2776 send_progress_thread, &pa);
2777 if (err != 0) {
2778 zfs_error_aux(zhp->zfs_hdl, strerror(errno));
2779 if (flags->dedup) {
2780 (void) pthread_cancel(ddtid);
2781 (void) close(fd);
2782 (void) pthread_join(ddtid, NULL);
2783 }
2784 return (zfs_error(zhp->zfs_hdl,
2785 EZFS_THREADCREATEFAILED, errbuf));
2786 }
2787 }
2788
2789 err = lzc_send_redacted(zhp->zfs_name, from, fd,
2790 lzc_flags_from_sendflags(flags), redactbook);
2791
2792 if (flags->progress) {
2793 void *status = NULL;
2794 if (err != 0)
2795 (void) pthread_cancel(ptid);
2796 (void) pthread_join(ptid, &status);
2797 int error = (int)(uintptr_t)status;
2798 if (error != 0 && status != PTHREAD_CANCELED) {
2799 char errbuf[1024];
2800 (void) snprintf(errbuf, sizeof (errbuf),
2801 dgettext(TEXT_DOMAIN, "progress thread exited "
2802 "nonzero"));
2803 return (zfs_standard_error(hdl, error, errbuf));
2804 }
2805 }
2806 if (flags->dedup) {
2807 if (err != 0)
2808 (void) pthread_cancel(ddtid);
2809 (void) close(fd);
2810 (void) pthread_join(ddtid, NULL);
2811 }
2812
2813 if (flags->props || flags->holds || flags->backup) {
2814 /* Write the final end record. */
2815 err = send_conclusion_record(orig_fd, NULL);
2816 if (err != 0)
2817 return (zfs_standard_error(hdl, err, errbuf));
2818 }
2819 if (err != 0) {
2820 switch (errno) {
2821 case EXDEV:
2822 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2823 "not an earlier snapshot from the same fs"));
2824 return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
2825
2826 case ENOENT:
2827 case ESRCH:
2828 if (lzc_exists(zhp->zfs_name)) {
2829 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2830 "incremental source (%s) does not exist"),
2831 from);
2832 }
2833 return (zfs_error(hdl, EZFS_NOENT, errbuf));
2834
2835 case EACCES:
2836 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2837 "dataset key must be loaded"));
2838 return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf));
2839
2840 case EBUSY:
2841 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2842 "target is busy; if a filesystem, "
2843 "it must not be mounted"));
2844 return (zfs_error(hdl, EZFS_BUSY, errbuf));
2845
2846 case EDQUOT:
2847 case EFBIG:
2848 case EIO:
2849 case ENOLINK:
2850 case ENOSPC:
2851 case ENOSTR:
2852 case ENXIO:
2853 case EPIPE:
2854 case ERANGE:
2855 case EFAULT:
2856 case EROFS:
2857 zfs_error_aux(hdl, strerror(errno));
2858 return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
2859
2860 default:
2861 return (zfs_standard_error(hdl, errno, errbuf));
2862 }
2863 }
2864 return (err != 0);
2865 }
2866
2867 /*
2868 * Routines specific to "zfs recv"
2869 */
2870
2871 static int
2872 recv_read(libzfs_handle_t *hdl, int fd, void *buf, int ilen,
2873 boolean_t byteswap, zio_cksum_t *zc)
2874 {
2875 char *cp = buf;
2876 int rv;
2877 int len = ilen;
2878
2879 assert(ilen <= SPA_MAXBLOCKSIZE);
2880
2881 do {
2882 rv = read(fd, cp, len);
2883 cp += rv;
2884 len -= rv;
2885 } while (rv > 0);
2886
2887 if (rv < 0 || len != 0) {
2888 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2889 "failed to read from stream"));
2890 return (zfs_error(hdl, EZFS_BADSTREAM, dgettext(TEXT_DOMAIN,
2891 "cannot receive")));
2892 }
2893
2894 if (zc) {
2895 if (byteswap)
2896 fletcher_4_incremental_byteswap(buf, ilen, zc);
2897 else
2898 fletcher_4_incremental_native(buf, ilen, zc);
2899 }
2900 return (0);
2901 }
2902
2903 static int
2904 recv_read_nvlist(libzfs_handle_t *hdl, int fd, int len, nvlist_t **nvp,
2905 boolean_t byteswap, zio_cksum_t *zc)
2906 {
2907 char *buf;
2908 int err;
2909
2910 buf = zfs_alloc(hdl, len);
2911 if (buf == NULL)
2912 return (ENOMEM);
2913
2914 err = recv_read(hdl, fd, buf, len, byteswap, zc);
2915 if (err != 0) {
2916 free(buf);
2917 return (err);
2918 }
2919
2920 err = nvlist_unpack(buf, len, nvp, 0);
2921 free(buf);
2922 if (err != 0) {
2923 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
2924 "stream (malformed nvlist)"));
2925 return (EINVAL);
2926 }
2927 return (0);
2928 }
2929
2930 /*
2931 * Returns the grand origin (origin of origin of origin...) of a given handle.
2932 * If this dataset is not a clone, it simply returns a copy of the original
2933 * handle.
2934 */
2935 static zfs_handle_t *
2936 recv_open_grand_origin(zfs_handle_t *zhp)
2937 {
2938 char origin[ZFS_MAX_DATASET_NAME_LEN];
2939 zprop_source_t src;
2940 zfs_handle_t *ozhp = zfs_handle_dup(zhp);
2941
2942 while (ozhp != NULL) {
2943 if (zfs_prop_get(ozhp, ZFS_PROP_ORIGIN, origin,
2944 sizeof (origin), &src, NULL, 0, B_FALSE) != 0)
2945 break;
2946
2947 (void) zfs_close(ozhp);
2948 ozhp = zfs_open(zhp->zfs_hdl, origin, ZFS_TYPE_FILESYSTEM);
2949 }
2950
2951 return (ozhp);
2952 }
2953
2954 static int
2955 recv_rename_impl(zfs_handle_t *zhp, const char *name, const char *newname)
2956 {
2957 int err;
2958 zfs_handle_t *ozhp = NULL;
2959
2960 /*
2961 * Attempt to rename the dataset. If it fails with EACCES we have
2962 * attempted to rename the dataset outside of its encryption root.
2963 * Force the dataset to become an encryption root and try again.
2964 */
2965 err = lzc_rename(name, newname);
2966 if (err == EACCES) {
2967 ozhp = recv_open_grand_origin(zhp);
2968 if (ozhp == NULL) {
2969 err = ENOENT;
2970 goto out;
2971 }
2972
2973 err = lzc_change_key(ozhp->zfs_name, DCP_CMD_FORCE_NEW_KEY,
2974 NULL, NULL, 0);
2975 if (err != 0)
2976 goto out;
2977
2978 err = lzc_rename(name, newname);
2979 }
2980
2981 out:
2982 if (ozhp != NULL)
2983 zfs_close(ozhp);
2984 return (err);
2985 }
2986
2987 static int
2988 recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
2989 int baselen, char *newname, recvflags_t *flags)
2990 {
2991 static int seq;
2992 int err;
2993 prop_changelist_t *clp = NULL;
2994 zfs_handle_t *zhp = NULL;
2995
2996 zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
2997 if (zhp == NULL) {
2998 err = -1;
2999 goto out;
3000 }
3001 clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
3002 flags->force ? MS_FORCE : 0);
3003 if (clp == NULL) {
3004 err = -1;
3005 goto out;
3006 }
3007 err = changelist_prefix(clp);
3008 if (err)
3009 goto out;
3010
3011 if (tryname) {
3012 (void) strcpy(newname, tryname);
3013 if (flags->verbose) {
3014 (void) printf("attempting rename %s to %s\n",
3015 name, newname);
3016 }
3017 err = recv_rename_impl(zhp, name, newname);
3018 if (err == 0)
3019 changelist_rename(clp, name, tryname);
3020 } else {
3021 err = ENOENT;
3022 }
3023
3024 if (err != 0 && strncmp(name + baselen, "recv-", 5) != 0) {
3025 seq++;
3026
3027 (void) snprintf(newname, ZFS_MAX_DATASET_NAME_LEN,
3028 "%.*srecv-%u-%u", baselen, name, getpid(), seq);
3029
3030 if (flags->verbose) {
3031 (void) printf("failed - trying rename %s to %s\n",
3032 name, newname);
3033 }
3034 err = recv_rename_impl(zhp, name, newname);
3035 if (err == 0)
3036 changelist_rename(clp, name, newname);
3037 if (err && flags->verbose) {
3038 (void) printf("failed (%u) - "
3039 "will try again on next pass\n", errno);
3040 }
3041 err = EAGAIN;
3042 } else if (flags->verbose) {
3043 if (err == 0)
3044 (void) printf("success\n");
3045 else
3046 (void) printf("failed (%u)\n", errno);
3047 }
3048
3049 (void) changelist_postfix(clp);
3050
3051 out:
3052 if (clp != NULL)
3053 changelist_free(clp);
3054 if (zhp != NULL)
3055 zfs_close(zhp);
3056
3057 return (err);
3058 }
3059
3060 static int
3061 recv_promote(libzfs_handle_t *hdl, const char *fsname,
3062 const char *origin_fsname, recvflags_t *flags)
3063 {
3064 int err;
3065 zfs_cmd_t zc = {"\0"};
3066 zfs_handle_t *zhp = NULL, *ozhp = NULL;
3067
3068 if (flags->verbose)
3069 (void) printf("promoting %s\n", fsname);
3070
3071 (void) strlcpy(zc.zc_value, origin_fsname, sizeof (zc.zc_value));
3072 (void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name));
3073
3074 /*
3075 * Attempt to promote the dataset. If it fails with EACCES the
3076 * promotion would cause this dataset to leave its encryption root.
3077 * Force the origin to become an encryption root and try again.
3078 */
3079 err = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
3080 if (err == EACCES) {
3081 zhp = zfs_open(hdl, fsname, ZFS_TYPE_DATASET);
3082 if (zhp == NULL) {
3083 err = -1;
3084 goto out;
3085 }
3086
3087 ozhp = recv_open_grand_origin(zhp);
3088 if (ozhp == NULL) {
3089 err = -1;
3090 goto out;
3091 }
3092
3093 err = lzc_change_key(ozhp->zfs_name, DCP_CMD_FORCE_NEW_KEY,
3094 NULL, NULL, 0);
3095 if (err != 0)
3096 goto out;
3097
3098 err = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
3099 }
3100
3101 out:
3102 if (zhp != NULL)
3103 zfs_close(zhp);
3104 if (ozhp != NULL)
3105 zfs_close(ozhp);
3106
3107 return (err);
3108 }
3109
3110 static int
3111 recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen,
3112 char *newname, recvflags_t *flags)
3113 {
3114 int err = 0;
3115 prop_changelist_t *clp;
3116 zfs_handle_t *zhp;
3117 boolean_t defer = B_FALSE;
3118 int spa_version;
3119
3120 zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
3121 if (zhp == NULL)
3122 return (-1);
3123 clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
3124 flags->force ? MS_FORCE : 0);
3125 if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT &&
3126 zfs_spa_version(zhp, &spa_version) == 0 &&
3127 spa_version >= SPA_VERSION_USERREFS)
3128 defer = B_TRUE;
3129 zfs_close(zhp);
3130 if (clp == NULL)
3131 return (-1);
3132 err = changelist_prefix(clp);
3133 if (err)
3134 return (err);
3135
3136 if (flags->verbose)
3137 (void) printf("attempting destroy %s\n", name);
3138 if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) {
3139 nvlist_t *nv = fnvlist_alloc();
3140 fnvlist_add_boolean(nv, name);
3141 err = lzc_destroy_snaps(nv, defer, NULL);
3142 fnvlist_free(nv);
3143 } else {
3144 err = lzc_destroy(name);
3145 }
3146 if (err == 0) {
3147 if (flags->verbose)
3148 (void) printf("success\n");
3149 changelist_remove(clp, name);
3150 }
3151
3152 (void) changelist_postfix(clp);
3153 changelist_free(clp);
3154
3155 /*
3156 * Deferred destroy might destroy the snapshot or only mark it to be
3157 * destroyed later, and it returns success in either case.
3158 */
3159 if (err != 0 || (defer && zfs_dataset_exists(hdl, name,
3160 ZFS_TYPE_SNAPSHOT))) {
3161 err = recv_rename(hdl, name, NULL, baselen, newname, flags);
3162 }
3163
3164 return (err);
3165 }
3166
3167 typedef struct guid_to_name_data {
3168 uint64_t guid;
3169 boolean_t bookmark_ok;
3170 char *name;
3171 char *skip;
3172 uint64_t *redact_snap_guids;
3173 uint64_t num_redact_snaps;
3174 } guid_to_name_data_t;
3175
3176 boolean_t
3177 redact_snaps_match(zfs_handle_t *zhp, guid_to_name_data_t *gtnd)
3178 {
3179 uint64_t *bmark_snaps;
3180 uint_t bmark_num_snaps;
3181 nvlist_t *nvl;
3182 if (zhp->zfs_type != ZFS_TYPE_BOOKMARK)
3183 return (B_FALSE);
3184
3185 nvl = fnvlist_lookup_nvlist(zhp->zfs_props,
3186 zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS));
3187 bmark_snaps = fnvlist_lookup_uint64_array(nvl, ZPROP_VALUE,
3188 &bmark_num_snaps);
3189 if (bmark_num_snaps != gtnd->num_redact_snaps)
3190 return (B_FALSE);
3191 int i = 0;
3192 for (; i < bmark_num_snaps; i++) {
3193 int j = 0;
3194 for (; j < bmark_num_snaps; j++) {
3195 if (bmark_snaps[i] == gtnd->redact_snap_guids[j])
3196 break;
3197 }
3198 if (j == bmark_num_snaps)
3199 break;
3200 }
3201 return (i == bmark_num_snaps);
3202 }
3203
3204 static int
3205 guid_to_name_cb(zfs_handle_t *zhp, void *arg)
3206 {
3207 guid_to_name_data_t *gtnd = arg;
3208 const char *slash;
3209 int err;
3210
3211 if (gtnd->skip != NULL &&
3212 (slash = strrchr(zhp->zfs_name, '/')) != NULL &&
3213 strcmp(slash + 1, gtnd->skip) == 0) {
3214 zfs_close(zhp);
3215 return (0);
3216 }
3217
3218 if (zfs_prop_get_int(zhp, ZFS_PROP_GUID) == gtnd->guid &&
3219 (gtnd->num_redact_snaps == -1 || redact_snaps_match(zhp, gtnd))) {
3220 (void) strcpy(gtnd->name, zhp->zfs_name);
3221 zfs_close(zhp);
3222 return (EEXIST);
3223 }
3224
3225 err = zfs_iter_children(zhp, guid_to_name_cb, gtnd);
3226 if (err != EEXIST && gtnd->bookmark_ok)
3227 err = zfs_iter_bookmarks(zhp, guid_to_name_cb, gtnd);
3228 zfs_close(zhp);
3229 return (err);
3230 }
3231
3232 /*
3233 * Attempt to find the local dataset associated with this guid. In the case of
3234 * multiple matches, we attempt to find the "best" match by searching
3235 * progressively larger portions of the hierarchy. This allows one to send a
3236 * tree of datasets individually and guarantee that we will find the source
3237 * guid within that hierarchy, even if there are multiple matches elsewhere.
3238 *
3239 * If num_redact_snaps is not -1, we attempt to find a redaction bookmark with
3240 * the specified number of redaction snapshots. If num_redact_snaps isn't 0 or
3241 * -1, then redact_snap_guids will be an array of the guids of the snapshots the
3242 * redaction bookmark was created with. If num_redact_snaps is -1, then we will
3243 * attempt to find a snapshot or bookmark (if bookmark_ok is passed) with the
3244 * given guid. Note that a redaction bookmark can be returned if
3245 * num_redact_snaps == -1.
3246 */
3247 static int
3248 guid_to_name_redact_snaps(libzfs_handle_t *hdl, const char *parent,
3249 uint64_t guid, boolean_t bookmark_ok, uint64_t *redact_snap_guids,
3250 uint64_t num_redact_snaps, char *name)
3251 {
3252 char pname[ZFS_MAX_DATASET_NAME_LEN];
3253 guid_to_name_data_t gtnd;
3254
3255 gtnd.guid = guid;
3256 gtnd.bookmark_ok = bookmark_ok;
3257 gtnd.name = name;
3258 gtnd.skip = NULL;
3259 gtnd.redact_snap_guids = redact_snap_guids;
3260 gtnd.num_redact_snaps = num_redact_snaps;
3261
3262 /*
3263 * Search progressively larger portions of the hierarchy, starting
3264 * with the filesystem specified by 'parent'. This will
3265 * select the "most local" version of the origin snapshot in the case
3266 * that there are multiple matching snapshots in the system.
3267 */
3268 (void) strlcpy(pname, parent, sizeof (pname));
3269 char *cp = strrchr(pname, '@');
3270 if (cp == NULL)
3271 cp = strchr(pname, '\0');
3272 for (; cp != NULL; cp = strrchr(pname, '/')) {
3273 /* Chop off the last component and open the parent */
3274 *cp = '\0';
3275 zfs_handle_t *zhp = make_dataset_handle(hdl, pname);
3276
3277 if (zhp == NULL)
3278 continue;
3279 int err = guid_to_name_cb(zfs_handle_dup(zhp), &gtnd);
3280 if (err != EEXIST)
3281 err = zfs_iter_children(zhp, guid_to_name_cb, &gtnd);
3282 if (err != EEXIST && bookmark_ok)
3283 err = zfs_iter_bookmarks(zhp, guid_to_name_cb, &gtnd);
3284 zfs_close(zhp);
3285 if (err == EEXIST)
3286 return (0);
3287
3288 /*
3289 * Remember the last portion of the dataset so we skip it next
3290 * time through (as we've already searched that portion of the
3291 * hierarchy).
3292 */
3293 gtnd.skip = strrchr(pname, '/') + 1;
3294 }
3295
3296 return (ENOENT);
3297 }
3298
3299 static int
3300 guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid,
3301 boolean_t bookmark_ok, char *name)
3302 {
3303 return (guid_to_name_redact_snaps(hdl, parent, guid, bookmark_ok, NULL,
3304 -1, name));
3305 }
3306
3307 /*
3308 * Return +1 if guid1 is before guid2, 0 if they are the same, and -1 if
3309 * guid1 is after guid2.
3310 */
3311 static int
3312 created_before(libzfs_handle_t *hdl, avl_tree_t *avl,
3313 uint64_t guid1, uint64_t guid2)
3314 {
3315 nvlist_t *nvfs;
3316 char *fsname = NULL, *snapname = NULL;
3317 char buf[ZFS_MAX_DATASET_NAME_LEN];
3318 int rv;
3319 zfs_handle_t *guid1hdl, *guid2hdl;
3320 uint64_t create1, create2;
3321
3322 if (guid2 == 0)
3323 return (0);
3324 if (guid1 == 0)
3325 return (1);
3326
3327 nvfs = fsavl_find(avl, guid1, &snapname);
3328 VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
3329 (void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
3330 guid1hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
3331 if (guid1hdl == NULL)
3332 return (-1);
3333
3334 nvfs = fsavl_find(avl, guid2, &snapname);
3335 VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
3336 (void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
3337 guid2hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
3338 if (guid2hdl == NULL) {
3339 zfs_close(guid1hdl);
3340 return (-1);
3341 }
3342
3343 create1 = zfs_prop_get_int(guid1hdl, ZFS_PROP_CREATETXG);
3344 create2 = zfs_prop_get_int(guid2hdl, ZFS_PROP_CREATETXG);
3345
3346 if (create1 < create2)
3347 rv = -1;
3348 else if (create1 > create2)
3349 rv = +1;
3350 else
3351 rv = 0;
3352
3353 zfs_close(guid1hdl);
3354 zfs_close(guid2hdl);
3355
3356 return (rv);
3357 }
3358
3359 /*
3360 * This function reestablishes the hierarchy of encryption roots after a
3361 * recursive incremental receive has completed. This must be done after the
3362 * second call to recv_incremental_replication() has renamed and promoted all
3363 * sent datasets to their final locations in the dataset hierarchy.
3364 */
3365 static int
3366 recv_fix_encryption_hierarchy(libzfs_handle_t *hdl, const char *destname,
3367 nvlist_t *stream_nv, avl_tree_t *stream_avl)
3368 {
3369 int err;
3370 nvpair_t *fselem = NULL;
3371 nvlist_t *stream_fss;
3372 char *cp;
3373 char top_zfs[ZFS_MAX_DATASET_NAME_LEN];
3374
3375 (void) strcpy(top_zfs, destname);
3376 cp = strrchr(top_zfs, '@');
3377 if (cp != NULL)
3378 *cp = '\0';
3379
3380 VERIFY(0 == nvlist_lookup_nvlist(stream_nv, "fss", &stream_fss));
3381
3382 while ((fselem = nvlist_next_nvpair(stream_fss, fselem)) != NULL) {
3383 zfs_handle_t *zhp = NULL;
3384 uint64_t crypt;
3385 nvlist_t *snaps, *props, *stream_nvfs = NULL;
3386 nvpair_t *snapel = NULL;
3387 boolean_t is_encroot, is_clone, stream_encroot;
3388 char *cp;
3389 char *stream_keylocation = NULL;
3390 char keylocation[MAXNAMELEN];
3391 char fsname[ZFS_MAX_DATASET_NAME_LEN];
3392
3393 keylocation[0] = '\0';
3394 VERIFY(0 == nvpair_value_nvlist(fselem, &stream_nvfs));
3395 VERIFY(0 == nvlist_lookup_nvlist(stream_nvfs, "snaps", &snaps));
3396 VERIFY(0 == nvlist_lookup_nvlist(stream_nvfs, "props", &props));
3397 stream_encroot = nvlist_exists(stream_nvfs, "is_encroot");
3398
3399 /* find a snapshot from the stream that exists locally */
3400 err = ENOENT;
3401 while ((snapel = nvlist_next_nvpair(snaps, snapel)) != NULL) {
3402 uint64_t guid;
3403
3404 VERIFY(0 == nvpair_value_uint64(snapel, &guid));
3405 err = guid_to_name(hdl, destname, guid, B_FALSE,
3406 fsname);
3407 if (err == 0)
3408 break;
3409 }
3410
3411 if (err != 0)
3412 continue;
3413
3414 cp = strchr(fsname, '@');
3415 if (cp != NULL)
3416 *cp = '\0';
3417
3418 zhp = zfs_open(hdl, fsname, ZFS_TYPE_DATASET);
3419 if (zhp == NULL) {
3420 err = ENOENT;
3421 goto error;
3422 }
3423
3424 crypt = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION);
3425 is_clone = zhp->zfs_dmustats.dds_origin[0] != '\0';
3426 (void) zfs_crypto_get_encryption_root(zhp, &is_encroot, NULL);
3427
3428 /* we don't need to do anything for unencrypted filesystems */
3429 if (crypt == ZIO_CRYPT_OFF) {
3430 zfs_close(zhp);
3431 continue;
3432 }
3433
3434 /*
3435 * If the dataset is flagged as an encryption root, was not
3436 * received as a clone and is not currently an encryption root,
3437 * force it to become one. Fixup the keylocation if necessary.
3438 */
3439 if (stream_encroot) {
3440 if (!is_clone && !is_encroot) {
3441 err = lzc_change_key(fsname,
3442 DCP_CMD_FORCE_NEW_KEY, NULL, NULL, 0);
3443 if (err != 0) {
3444 zfs_close(zhp);
3445 goto error;
3446 }
3447 }
3448
3449 VERIFY(0 == nvlist_lookup_string(props,
3450 zfs_prop_to_name(ZFS_PROP_KEYLOCATION),
3451 &stream_keylocation));
3452
3453 /*
3454 * Refresh the properties in case the call to
3455 * lzc_change_key() changed the value.
3456 */
3457 zfs_refresh_properties(zhp);
3458 err = zfs_prop_get(zhp, ZFS_PROP_KEYLOCATION,
3459 keylocation, sizeof (keylocation), NULL, NULL,
3460 0, B_TRUE);
3461 if (err != 0) {
3462 zfs_close(zhp);
3463 goto error;
3464 }
3465
3466 if (strcmp(keylocation, stream_keylocation) != 0) {
3467 err = zfs_prop_set(zhp,
3468 zfs_prop_to_name(ZFS_PROP_KEYLOCATION),
3469 stream_keylocation);
3470 if (err != 0) {
3471 zfs_close(zhp);
3472 goto error;
3473 }
3474 }
3475 }
3476
3477 /*
3478 * If the dataset is not flagged as an encryption root and is
3479 * currently an encryption root, force it to inherit from its
3480 * parent. The root of a raw send should never be
3481 * force-inherited.
3482 */
3483 if (!stream_encroot && is_encroot &&
3484 strcmp(top_zfs, fsname) != 0) {
3485 err = lzc_change_key(fsname, DCP_CMD_FORCE_INHERIT,
3486 NULL, NULL, 0);
3487 if (err != 0) {
3488 zfs_close(zhp);
3489 goto error;
3490 }
3491 }
3492
3493 zfs_close(zhp);
3494 }
3495
3496 return (0);
3497
3498 error:
3499 return (err);
3500 }
3501
3502 static int
3503 recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs,
3504 recvflags_t *flags, nvlist_t *stream_nv, avl_tree_t *stream_avl,
3505 nvlist_t *renamed)
3506 {
3507 nvlist_t *local_nv, *deleted = NULL;
3508 avl_tree_t *local_avl;
3509 nvpair_t *fselem, *nextfselem;
3510 char *fromsnap;
3511 char newname[ZFS_MAX_DATASET_NAME_LEN];
3512 char guidname[32];
3513 int error;
3514 boolean_t needagain, progress, recursive;
3515 char *s1, *s2;
3516
3517 VERIFY(0 == nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap));
3518
3519 recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
3520 ENOENT);
3521
3522 if (flags->dryrun)
3523 return (0);
3524
3525 again:
3526 needagain = progress = B_FALSE;
3527
3528 VERIFY(0 == nvlist_alloc(&deleted, NV_UNIQUE_NAME, 0));
3529
3530 if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL,
3531 recursive, B_TRUE, B_FALSE, recursive, B_FALSE, B_FALSE,
3532 B_FALSE, B_TRUE, &local_nv, &local_avl)) != 0)
3533 return (error);
3534
3535 /*
3536 * Process deletes and renames
3537 */
3538 for (fselem = nvlist_next_nvpair(local_nv, NULL);
3539 fselem; fselem = nextfselem) {
3540 nvlist_t *nvfs, *snaps;
3541 nvlist_t *stream_nvfs = NULL;
3542 nvpair_t *snapelem, *nextsnapelem;
3543 uint64_t fromguid = 0;
3544 uint64_t originguid = 0;
3545 uint64_t stream_originguid = 0;
3546 uint64_t parent_fromsnap_guid, stream_parent_fromsnap_guid;
3547 char *fsname, *stream_fsname;
3548
3549 nextfselem = nvlist_next_nvpair(local_nv, fselem);
3550
3551 VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
3552 VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
3553 VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
3554 VERIFY(0 == nvlist_lookup_uint64(nvfs, "parentfromsnap",
3555 &parent_fromsnap_guid));
3556 (void) nvlist_lookup_uint64(nvfs, "origin", &originguid);
3557
3558 /*
3559 * First find the stream's fs, so we can check for
3560 * a different origin (due to "zfs promote")
3561 */
3562 for (snapelem = nvlist_next_nvpair(snaps, NULL);
3563 snapelem; snapelem = nvlist_next_nvpair(snaps, snapelem)) {
3564 uint64_t thisguid;
3565
3566 VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
3567 stream_nvfs = fsavl_find(stream_avl, thisguid, NULL);
3568
3569 if (stream_nvfs != NULL)
3570 break;
3571 }
3572
3573 /* check for promote */
3574 (void) nvlist_lookup_uint64(stream_nvfs, "origin",
3575 &stream_originguid);
3576 if (stream_nvfs && originguid != stream_originguid) {
3577 switch (created_before(hdl, local_avl,
3578 stream_originguid, originguid)) {
3579 case 1: {
3580 /* promote it! */
3581 nvlist_t *origin_nvfs;
3582 char *origin_fsname;
3583
3584 origin_nvfs = fsavl_find(local_avl, originguid,
3585 NULL);
3586 VERIFY(0 == nvlist_lookup_string(origin_nvfs,
3587 "name", &origin_fsname));
3588 error = recv_promote(hdl, fsname, origin_fsname,
3589 flags);
3590 if (error == 0)
3591 progress = B_TRUE;
3592 break;
3593 }
3594 default:
3595 break;
3596 case -1:
3597 fsavl_destroy(local_avl);
3598 nvlist_free(local_nv);
3599 return (-1);
3600 }
3601 /*
3602 * We had/have the wrong origin, therefore our
3603 * list of snapshots is wrong. Need to handle
3604 * them on the next pass.
3605 */
3606 needagain = B_TRUE;
3607 continue;
3608 }
3609
3610 for (snapelem = nvlist_next_nvpair(snaps, NULL);
3611 snapelem; snapelem = nextsnapelem) {
3612 uint64_t thisguid;
3613 char *stream_snapname;
3614 nvlist_t *found, *props;
3615
3616 nextsnapelem = nvlist_next_nvpair(snaps, snapelem);
3617
3618 VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
3619 found = fsavl_find(stream_avl, thisguid,
3620 &stream_snapname);
3621
3622 /* check for delete */
3623 if (found == NULL) {
3624 char name[ZFS_MAX_DATASET_NAME_LEN];
3625
3626 if (!flags->force)
3627 continue;
3628
3629 (void) snprintf(name, sizeof (name), "%s@%s",
3630 fsname, nvpair_name(snapelem));
3631
3632 error = recv_destroy(hdl, name,
3633 strlen(fsname)+1, newname, flags);
3634 if (error)
3635 needagain = B_TRUE;
3636 else
3637 progress = B_TRUE;
3638 sprintf(guidname, "%llu",
3639 (u_longlong_t)thisguid);
3640 nvlist_add_boolean(deleted, guidname);
3641 continue;
3642 }
3643
3644 stream_nvfs = found;
3645
3646 if (0 == nvlist_lookup_nvlist(stream_nvfs, "snapprops",
3647 &props) && 0 == nvlist_lookup_nvlist(props,
3648 stream_snapname, &props)) {
3649 zfs_cmd_t zc = {"\0"};
3650
3651 zc.zc_cookie = B_TRUE; /* received */
3652 (void) snprintf(zc.zc_name, sizeof (zc.zc_name),
3653 "%s@%s", fsname, nvpair_name(snapelem));
3654 if (zcmd_write_src_nvlist(hdl, &zc,
3655 props) == 0) {
3656 (void) zfs_ioctl(hdl,
3657 ZFS_IOC_SET_PROP, &zc);
3658 zcmd_free_nvlists(&zc);
3659 }
3660 }
3661
3662 /* check for different snapname */
3663 if (strcmp(nvpair_name(snapelem),
3664 stream_snapname) != 0) {
3665 char name[ZFS_MAX_DATASET_NAME_LEN];
3666 char tryname[ZFS_MAX_DATASET_NAME_LEN];
3667
3668 (void) snprintf(name, sizeof (name), "%s@%s",
3669 fsname, nvpair_name(snapelem));
3670 (void) snprintf(tryname, sizeof (name), "%s@%s",
3671 fsname, stream_snapname);
3672
3673 error = recv_rename(hdl, name, tryname,
3674 strlen(fsname)+1, newname, flags);
3675 if (error)
3676 needagain = B_TRUE;
3677 else
3678 progress = B_TRUE;
3679 }
3680
3681 if (strcmp(stream_snapname, fromsnap) == 0)
3682 fromguid = thisguid;
3683 }
3684
3685 /* check for delete */
3686 if (stream_nvfs == NULL) {
3687 if (!flags->force)
3688 continue;
3689
3690 error = recv_destroy(hdl, fsname, strlen(tofs)+1,
3691 newname, flags);
3692 if (error)
3693 needagain = B_TRUE;
3694 else
3695 progress = B_TRUE;
3696 sprintf(guidname, "%llu",
3697 (u_longlong_t)parent_fromsnap_guid);
3698 nvlist_add_boolean(deleted, guidname);
3699 continue;
3700 }
3701
3702 if (fromguid == 0) {
3703 if (flags->verbose) {
3704 (void) printf("local fs %s does not have "
3705 "fromsnap (%s in stream); must have "
3706 "been deleted locally; ignoring\n",
3707 fsname, fromsnap);
3708 }
3709 continue;
3710 }
3711
3712 VERIFY(0 == nvlist_lookup_string(stream_nvfs,
3713 "name", &stream_fsname));
3714 VERIFY(0 == nvlist_lookup_uint64(stream_nvfs,
3715 "parentfromsnap", &stream_parent_fromsnap_guid));
3716
3717 s1 = strrchr(fsname, '/');
3718 s2 = strrchr(stream_fsname, '/');
3719
3720 /*
3721 * Check if we're going to rename based on parent guid change
3722 * and the current parent guid was also deleted. If it was then
3723 * rename will fail and is likely unneeded, so avoid this and
3724 * force an early retry to determine the new
3725 * parent_fromsnap_guid.
3726 */
3727 if (stream_parent_fromsnap_guid != 0 &&
3728 parent_fromsnap_guid != 0 &&
3729 stream_parent_fromsnap_guid != parent_fromsnap_guid) {
3730 sprintf(guidname, "%llu",
3731 (u_longlong_t)parent_fromsnap_guid);
3732 if (nvlist_exists(deleted, guidname)) {
3733 progress = B_TRUE;
3734 needagain = B_TRUE;
3735 goto doagain;
3736 }
3737 }
3738
3739 /*
3740 * Check for rename. If the exact receive path is specified, it
3741 * does not count as a rename, but we still need to check the
3742 * datasets beneath it.
3743 */
3744 if ((stream_parent_fromsnap_guid != 0 &&
3745 parent_fromsnap_guid != 0 &&
3746 stream_parent_fromsnap_guid != parent_fromsnap_guid) ||
3747 ((flags->isprefix || strcmp(tofs, fsname) != 0) &&
3748 (s1 != NULL) && (s2 != NULL) && strcmp(s1, s2) != 0)) {
3749 nvlist_t *parent;
3750 char tryname[ZFS_MAX_DATASET_NAME_LEN];
3751
3752 parent = fsavl_find(local_avl,
3753 stream_parent_fromsnap_guid, NULL);
3754 /*
3755 * NB: parent might not be found if we used the
3756 * tosnap for stream_parent_fromsnap_guid,
3757 * because the parent is a newly-created fs;
3758 * we'll be able to rename it after we recv the
3759 * new fs.
3760 */
3761 if (parent != NULL) {
3762 char *pname;
3763
3764 VERIFY(0 == nvlist_lookup_string(parent, "name",
3765 &pname));
3766 (void) snprintf(tryname, sizeof (tryname),
3767 "%s%s", pname, strrchr(stream_fsname, '/'));
3768 } else {
3769 tryname[0] = '\0';
3770 if (flags->verbose) {
3771 (void) printf("local fs %s new parent "
3772 "not found\n", fsname);
3773 }
3774 }
3775
3776 newname[0] = '\0';
3777
3778 error = recv_rename(hdl, fsname, tryname,
3779 strlen(tofs)+1, newname, flags);
3780
3781 if (renamed != NULL && newname[0] != '\0') {
3782 VERIFY(0 == nvlist_add_boolean(renamed,
3783 newname));
3784 }
3785
3786 if (error)
3787 needagain = B_TRUE;
3788 else
3789 progress = B_TRUE;
3790 }
3791 }
3792
3793 doagain:
3794 fsavl_destroy(local_avl);
3795 nvlist_free(local_nv);
3796 nvlist_free(deleted);
3797
3798 if (needagain && progress) {
3799 /* do another pass to fix up temporary names */
3800 if (flags->verbose)
3801 (void) printf("another pass:\n");
3802 goto again;
3803 }
3804
3805 return (needagain || error != 0);
3806 }
3807
3808 static int
3809 zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
3810 recvflags_t *flags, dmu_replay_record_t *drr, zio_cksum_t *zc,
3811 char **top_zfs, int cleanup_fd, uint64_t *action_handlep,
3812 nvlist_t *cmdprops)
3813 {
3814 nvlist_t *stream_nv = NULL;
3815 avl_tree_t *stream_avl = NULL;
3816 char *fromsnap = NULL;
3817 char *sendsnap = NULL;
3818 char *cp;
3819 char tofs[ZFS_MAX_DATASET_NAME_LEN];
3820 char sendfs[ZFS_MAX_DATASET_NAME_LEN];
3821 char errbuf[1024];
3822 dmu_replay_record_t drre;
3823 int error;
3824 boolean_t anyerr = B_FALSE;
3825 boolean_t softerr = B_FALSE;
3826 boolean_t recursive, raw;
3827
3828 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3829 "cannot receive"));
3830
3831 assert(drr->drr_type == DRR_BEGIN);
3832 assert(drr->drr_u.drr_begin.drr_magic == DMU_BACKUP_MAGIC);
3833 assert(DMU_GET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo) ==
3834 DMU_COMPOUNDSTREAM);
3835
3836 /*
3837 * Read in the nvlist from the stream.
3838 */
3839 if (drr->drr_payloadlen != 0) {
3840 error = recv_read_nvlist(hdl, fd, drr->drr_payloadlen,
3841 &stream_nv, flags->byteswap, zc);
3842 if (error) {
3843 error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3844 goto out;
3845 }
3846 }
3847
3848 recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
3849 ENOENT);
3850 raw = (nvlist_lookup_boolean(stream_nv, "raw") == 0);
3851
3852 if (recursive && strchr(destname, '@')) {
3853 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3854 "cannot specify snapshot name for multi-snapshot stream"));
3855 error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3856 goto out;
3857 }
3858
3859 /*
3860 * Read in the end record and verify checksum.
3861 */
3862 if (0 != (error = recv_read(hdl, fd, &drre, sizeof (drre),
3863 flags->byteswap, NULL)))
3864 goto out;
3865 if (flags->byteswap) {
3866 drre.drr_type = BSWAP_32(drre.drr_type);
3867 drre.drr_u.drr_end.drr_checksum.zc_word[0] =
3868 BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[0]);
3869 drre.drr_u.drr_end.drr_checksum.zc_word[1] =
3870 BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[1]);
3871 drre.drr_u.drr_end.drr_checksum.zc_word[2] =
3872 BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[2]);
3873 drre.drr_u.drr_end.drr_checksum.zc_word[3] =
3874 BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[3]);
3875 }
3876 if (drre.drr_type != DRR_END) {
3877 error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3878 goto out;
3879 }
3880 if (!ZIO_CHECKSUM_EQUAL(drre.drr_u.drr_end.drr_checksum, *zc)) {
3881 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3882 "incorrect header checksum"));
3883 error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3884 goto out;
3885 }
3886
3887 (void) nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap);
3888
3889 if (drr->drr_payloadlen != 0) {
3890 nvlist_t *stream_fss;
3891
3892 VERIFY(0 == nvlist_lookup_nvlist(stream_nv, "fss",
3893 &stream_fss));
3894 if ((stream_avl = fsavl_create(stream_fss)) == NULL) {
3895 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3896 "couldn't allocate avl tree"));
3897 error = zfs_error(hdl, EZFS_NOMEM, errbuf);
3898 goto out;
3899 }
3900
3901 if (fromsnap != NULL && recursive) {
3902 nvlist_t *renamed = NULL;
3903 nvpair_t *pair = NULL;
3904
3905 (void) strlcpy(tofs, destname, sizeof (tofs));
3906 if (flags->isprefix) {
3907 struct drr_begin *drrb = &drr->drr_u.drr_begin;
3908 int i;
3909
3910 if (flags->istail) {
3911 cp = strrchr(drrb->drr_toname, '/');
3912 if (cp == NULL) {
3913 (void) strlcat(tofs, "/",
3914 sizeof (tofs));
3915 i = 0;
3916 } else {
3917 i = (cp - drrb->drr_toname);
3918 }
3919 } else {
3920 i = strcspn(drrb->drr_toname, "/@");
3921 }
3922 /* zfs_receive_one() will create_parents() */
3923 (void) strlcat(tofs, &drrb->drr_toname[i],
3924 sizeof (tofs));
3925 *strchr(tofs, '@') = '\0';
3926 }
3927
3928 if (!flags->dryrun && !flags->nomount) {
3929 VERIFY(0 == nvlist_alloc(&renamed,
3930 NV_UNIQUE_NAME, 0));
3931 }
3932
3933 softerr = recv_incremental_replication(hdl, tofs, flags,
3934 stream_nv, stream_avl, renamed);
3935
3936 /* Unmount renamed filesystems before receiving. */
3937 while ((pair = nvlist_next_nvpair(renamed,
3938 pair)) != NULL) {
3939 zfs_handle_t *zhp;
3940 prop_changelist_t *clp = NULL;
3941
3942 zhp = zfs_open(hdl, nvpair_name(pair),
3943 ZFS_TYPE_FILESYSTEM);
3944 if (zhp != NULL) {
3945 clp = changelist_gather(zhp,
3946 ZFS_PROP_MOUNTPOINT, 0, 0);
3947 zfs_close(zhp);
3948 if (clp != NULL) {
3949 softerr |=
3950 changelist_prefix(clp);
3951 changelist_free(clp);
3952 }
3953 }
3954 }
3955
3956 nvlist_free(renamed);
3957 }
3958 }
3959
3960 /*
3961 * Get the fs specified by the first path in the stream (the top level
3962 * specified by 'zfs send') and pass it to each invocation of
3963 * zfs_receive_one().
3964 */
3965 (void) strlcpy(sendfs, drr->drr_u.drr_begin.drr_toname,
3966 sizeof (sendfs));
3967 if ((cp = strchr(sendfs, '@')) != NULL) {
3968 *cp = '\0';
3969 /*
3970 * Find the "sendsnap", the final snapshot in a replication
3971 * stream. zfs_receive_one() handles certain errors
3972 * differently, depending on if the contained stream is the
3973 * last one or not.
3974 */
3975 sendsnap = (cp + 1);
3976 }
3977
3978 /* Finally, receive each contained stream */
3979 do {
3980 /*
3981 * we should figure out if it has a recoverable
3982 * error, in which case do a recv_skip() and drive on.
3983 * Note, if we fail due to already having this guid,
3984 * zfs_receive_one() will take care of it (ie,
3985 * recv_skip() and return 0).
3986 */
3987 error = zfs_receive_impl(hdl, destname, NULL, flags, fd,
3988 sendfs, stream_nv, stream_avl, top_zfs, cleanup_fd,
3989 action_handlep, sendsnap, cmdprops);
3990 if (error == ENODATA) {
3991 error = 0;
3992 break;
3993 }
3994 anyerr |= error;
3995 } while (error == 0);
3996
3997 if (drr->drr_payloadlen != 0 && recursive && fromsnap != NULL) {
3998 /*
3999 * Now that we have the fs's they sent us, try the
4000 * renames again.
4001 */
4002 softerr = recv_incremental_replication(hdl, tofs, flags,
4003 stream_nv, stream_avl, NULL);
4004 }
4005
4006 if (raw && softerr == 0) {
4007 softerr = recv_fix_encryption_hierarchy(hdl, destname,
4008 stream_nv, stream_avl);
4009 }
4010
4011 out:
4012 fsavl_destroy(stream_avl);
4013 nvlist_free(stream_nv);
4014 if (softerr)
4015 error = -2;
4016 if (anyerr)
4017 error = -1;
4018 return (error);
4019 }
4020
4021 static void
4022 trunc_prop_errs(int truncated)
4023 {
4024 ASSERT(truncated != 0);
4025
4026 if (truncated == 1)
4027 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
4028 "1 more property could not be set\n"));
4029 else
4030 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
4031 "%d more properties could not be set\n"), truncated);
4032 }
4033
4034 static int
4035 recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
4036 {
4037 dmu_replay_record_t *drr;
4038 void *buf = zfs_alloc(hdl, SPA_MAXBLOCKSIZE);
4039 char errbuf[1024];
4040
4041 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
4042 "cannot receive:"));
4043
4044 /* XXX would be great to use lseek if possible... */
4045 drr = buf;
4046
4047 while (recv_read(hdl, fd, drr, sizeof (dmu_replay_record_t),
4048 byteswap, NULL) == 0) {
4049 if (byteswap)
4050 drr->drr_type = BSWAP_32(drr->drr_type);
4051
4052 switch (drr->drr_type) {
4053 case DRR_BEGIN:
4054 if (drr->drr_payloadlen != 0) {
4055 (void) recv_read(hdl, fd, buf,
4056 drr->drr_payloadlen, B_FALSE, NULL);
4057 }
4058 break;
4059
4060 case DRR_END:
4061 free(buf);
4062 return (0);
4063
4064 case DRR_OBJECT:
4065 if (byteswap) {
4066 drr->drr_u.drr_object.drr_bonuslen =
4067 BSWAP_32(drr->drr_u.drr_object.
4068 drr_bonuslen);
4069 }
4070 (void) recv_read(hdl, fd, buf,
4071 P2ROUNDUP(drr->drr_u.drr_object.drr_bonuslen, 8),
4072 B_FALSE, NULL);
4073 break;
4074
4075 case DRR_WRITE:
4076 if (byteswap) {
4077 drr->drr_u.drr_write.drr_logical_size =
4078 BSWAP_64(
4079 drr->drr_u.drr_write.drr_logical_size);
4080 drr->drr_u.drr_write.drr_compressed_size =
4081 BSWAP_64(
4082 drr->drr_u.drr_write.drr_compressed_size);
4083 }
4084 uint64_t payload_size =
4085 DRR_WRITE_PAYLOAD_SIZE(&drr->drr_u.drr_write);
4086 (void) recv_read(hdl, fd, buf,
4087 payload_size, B_FALSE, NULL);
4088 break;
4089 case DRR_SPILL:
4090 if (byteswap) {
4091 drr->drr_u.drr_spill.drr_length =
4092 BSWAP_64(drr->drr_u.drr_spill.drr_length);
4093 }
4094 (void) recv_read(hdl, fd, buf,
4095 drr->drr_u.drr_spill.drr_length, B_FALSE, NULL);
4096 break;
4097 case DRR_WRITE_EMBEDDED:
4098 if (byteswap) {
4099 drr->drr_u.drr_write_embedded.drr_psize =
4100 BSWAP_32(drr->drr_u.drr_write_embedded.
4101 drr_psize);
4102 }
4103 (void) recv_read(hdl, fd, buf,
4104 P2ROUNDUP(drr->drr_u.drr_write_embedded.drr_psize,
4105 8), B_FALSE, NULL);
4106 break;
4107 case DRR_OBJECT_RANGE:
4108 case DRR_WRITE_BYREF:
4109 case DRR_FREEOBJECTS:
4110 case DRR_FREE:
4111 break;
4112
4113 default:
4114 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4115 "invalid record type"));
4116 free(buf);
4117 return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
4118 }
4119 }
4120
4121 free(buf);
4122 return (-1);
4123 }
4124
4125 static void
4126 recv_ecksum_set_aux(libzfs_handle_t *hdl, const char *target_snap,
4127 boolean_t resumable)
4128 {
4129 char target_fs[ZFS_MAX_DATASET_NAME_LEN];
4130
4131 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4132 "checksum mismatch or incomplete stream"));
4133
4134 if (!resumable)
4135 return;
4136 (void) strlcpy(target_fs, target_snap, sizeof (target_fs));
4137 *strchr(target_fs, '@') = '\0';
4138 zfs_handle_t *zhp = zfs_open(hdl, target_fs,
4139 ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
4140 if (zhp == NULL)
4141 return;
4142
4143 char token_buf[ZFS_MAXPROPLEN];
4144 int error = zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN,
4145 token_buf, sizeof (token_buf),
4146 NULL, NULL, 0, B_TRUE);
4147 if (error == 0) {
4148 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4149 "checksum mismatch or incomplete stream.\n"
4150 "Partially received snapshot is saved.\n"
4151 "A resuming stream can be generated on the sending "
4152 "system by running:\n"
4153 " zfs send -t %s"),
4154 token_buf);
4155 }
4156 zfs_close(zhp);
4157 }
4158
4159 /*
4160 * Prepare a new nvlist of properties that are to override (-o) or be excluded
4161 * (-x) from the received dataset
4162 * recvprops: received properties from the send stream
4163 * cmdprops: raw input properties from command line
4164 * origprops: properties, both locally-set and received, currently set on the
4165 * target dataset if it exists, NULL otherwise.
4166 * oxprops: valid output override (-o) and excluded (-x) properties
4167 */
4168 static int
4169 zfs_setup_cmdline_props(libzfs_handle_t *hdl, zfs_type_t type,
4170 char *fsname, boolean_t zoned, boolean_t recursive, boolean_t newfs,
4171 boolean_t raw, boolean_t toplevel, nvlist_t *recvprops, nvlist_t *cmdprops,
4172 nvlist_t *origprops, nvlist_t **oxprops, uint8_t **wkeydata_out,
4173 uint_t *wkeylen_out, const char *errbuf)
4174 {
4175 nvpair_t *nvp;
4176 nvlist_t *oprops, *voprops;
4177 zfs_handle_t *zhp = NULL;
4178 zpool_handle_t *zpool_hdl = NULL;
4179 char *cp;
4180 int ret = 0;
4181 char namebuf[ZFS_MAX_DATASET_NAME_LEN];
4182
4183 if (nvlist_empty(cmdprops))
4184 return (0); /* No properties to override or exclude */
4185
4186 *oxprops = fnvlist_alloc();
4187 oprops = fnvlist_alloc();
4188
4189 strlcpy(namebuf, fsname, ZFS_MAX_DATASET_NAME_LEN);
4190
4191 /*
4192 * Get our dataset handle. The target dataset may not exist yet.
4193 */
4194 if (zfs_dataset_exists(hdl, namebuf, ZFS_TYPE_DATASET)) {
4195 zhp = zfs_open(hdl, namebuf, ZFS_TYPE_DATASET);
4196 if (zhp == NULL) {
4197 ret = -1;
4198 goto error;
4199 }
4200 }
4201
4202 /* open the zpool handle */
4203 cp = strchr(namebuf, '/');
4204 if (cp != NULL)
4205 *cp = '\0';
4206 zpool_hdl = zpool_open(hdl, namebuf);
4207 if (zpool_hdl == NULL) {
4208 ret = -1;
4209 goto error;
4210 }
4211
4212 /* restore namebuf to match fsname for later use */
4213 if (cp != NULL)
4214 *cp = '/';
4215
4216 /*
4217 * first iteration: process excluded (-x) properties now and gather
4218 * added (-o) properties to be later processed by zfs_valid_proplist()
4219 */
4220 nvp = NULL;
4221 while ((nvp = nvlist_next_nvpair(cmdprops, nvp)) != NULL) {
4222 const char *name = nvpair_name(nvp);
4223 zfs_prop_t prop = zfs_name_to_prop(name);
4224
4225 /* "origin" is processed separately, don't handle it here */
4226 if (prop == ZFS_PROP_ORIGIN)
4227 continue;
4228
4229 /*
4230 * we're trying to override or exclude a property that does not
4231 * make sense for this type of dataset, but we don't want to
4232 * fail if the receive is recursive: this comes in handy when
4233 * the send stream contains, for instance, a child ZVOL and
4234 * we're trying to receive it with "-o atime=on"
4235 */
4236 if (!zfs_prop_valid_for_type(prop, type, B_FALSE) &&
4237 !zfs_prop_user(name)) {
4238 if (recursive)
4239 continue;
4240 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4241 "property '%s' does not apply to datasets of this "
4242 "type"), name);
4243 ret = zfs_error(hdl, EZFS_BADPROP, errbuf);
4244 goto error;
4245 }
4246
4247 /* raw streams can't override encryption properties */
4248 if ((zfs_prop_encryption_key_param(prop) ||
4249 prop == ZFS_PROP_ENCRYPTION) && (raw || !newfs)) {
4250 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4251 "encryption property '%s' cannot "
4252 "be set or excluded for raw or incremental "
4253 "streams."), name);
4254 ret = zfs_error(hdl, EZFS_BADPROP, errbuf);
4255 goto error;
4256 }
4257
4258 switch (nvpair_type(nvp)) {
4259 case DATA_TYPE_BOOLEAN: /* -x property */
4260 /*
4261 * DATA_TYPE_BOOLEAN is the way we're asked to "exclude"
4262 * a property: this is done by forcing an explicit
4263 * inherit on the destination so the effective value is
4264 * not the one we received from the send stream.
4265 * We do this only if the property is not already
4266 * locally-set, in which case its value will take
4267 * priority over the received anyway.
4268 */
4269 if (nvlist_exists(origprops, name)) {
4270 nvlist_t *attrs;
4271
4272 attrs = fnvlist_lookup_nvlist(origprops, name);
4273 if (strcmp(fnvlist_lookup_string(attrs,
4274 ZPROP_SOURCE), ZPROP_SOURCE_VAL_RECVD) != 0)
4275 continue;
4276 }
4277 /*
4278 * We can't force an explicit inherit on non-inheritable
4279 * properties: if we're asked to exclude this kind of
4280 * values we remove them from "recvprops" input nvlist.
4281 */
4282 if (!zfs_prop_inheritable(prop) &&
4283 !zfs_prop_user(name) && /* can be inherited too */
4284 nvlist_exists(recvprops, name))
4285 fnvlist_remove(recvprops, name);
4286 else
4287 fnvlist_add_nvpair(*oxprops, nvp);
4288 break;
4289 case DATA_TYPE_STRING: /* -o property=value */
4290 fnvlist_add_nvpair(oprops, nvp);
4291 break;
4292 default:
4293 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4294 "property '%s' must be a string or boolean"), name);
4295 ret = zfs_error(hdl, EZFS_BADPROP, errbuf);
4296 goto error;
4297 }
4298 }
4299
4300 if (toplevel) {
4301 /* convert override strings properties to native */
4302 if ((voprops = zfs_valid_proplist(hdl, ZFS_TYPE_DATASET,
4303 oprops, zoned, zhp, zpool_hdl, B_FALSE, errbuf)) == NULL) {
4304 ret = zfs_error(hdl, EZFS_BADPROP, errbuf);
4305 goto error;
4306 }
4307
4308 /*
4309 * zfs_crypto_create() requires the parent name. Get it
4310 * by truncating the fsname copy stored in namebuf.
4311 */
4312 cp = strrchr(namebuf, '/');
4313 if (cp != NULL)
4314 *cp = '\0';
4315
4316 if (!raw && zfs_crypto_create(hdl, namebuf, voprops, NULL,
4317 B_FALSE, wkeydata_out, wkeylen_out) != 0) {
4318 fnvlist_free(voprops);
4319 ret = zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf);
4320 goto error;
4321 }
4322
4323 /* second pass: process "-o" properties */
4324 fnvlist_merge(*oxprops, voprops);
4325 fnvlist_free(voprops);
4326 } else {
4327 /* override props on child dataset are inherited */
4328 nvp = NULL;
4329 while ((nvp = nvlist_next_nvpair(oprops, nvp)) != NULL) {
4330 const char *name = nvpair_name(nvp);
4331 fnvlist_add_boolean(*oxprops, name);
4332 }
4333 }
4334
4335 error:
4336 if (zhp != NULL)
4337 zfs_close(zhp);
4338 if (zpool_hdl != NULL)
4339 zpool_close(zpool_hdl);
4340 fnvlist_free(oprops);
4341 return (ret);
4342 }
4343
4344 /*
4345 * Restores a backup of tosnap from the file descriptor specified by infd.
4346 */
4347 static int
4348 zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
4349 const char *originsnap, recvflags_t *flags, dmu_replay_record_t *drr,
4350 dmu_replay_record_t *drr_noswap, const char *sendfs, nvlist_t *stream_nv,
4351 avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd,
4352 uint64_t *action_handlep, const char *finalsnap, nvlist_t *cmdprops)
4353 {
4354 time_t begin_time;
4355 int ioctl_err, ioctl_errno, err;
4356 char *cp;
4357 struct drr_begin *drrb = &drr->drr_u.drr_begin;
4358 char errbuf[1024];
4359 const char *chopprefix;
4360 boolean_t newfs = B_FALSE;
4361 boolean_t stream_wantsnewfs;
4362 boolean_t newprops = B_FALSE;
4363 uint64_t read_bytes = 0;
4364 uint64_t errflags = 0;
4365 uint64_t parent_snapguid = 0;
4366 prop_changelist_t *clp = NULL;
4367 nvlist_t *snapprops_nvlist = NULL;
4368 nvlist_t *snapholds_nvlist = NULL;
4369 zprop_errflags_t prop_errflags;
4370 nvlist_t *prop_errors = NULL;
4371 boolean_t recursive;
4372 char *snapname = NULL;
4373 char destsnap[MAXPATHLEN * 2];
4374 char origin[MAXNAMELEN];
4375 char name[MAXPATHLEN];
4376 char tmp_keylocation[MAXNAMELEN];
4377 nvlist_t *rcvprops = NULL; /* props received from the send stream */
4378 nvlist_t *oxprops = NULL; /* override (-o) and exclude (-x) props */
4379 nvlist_t *origprops = NULL; /* original props (if destination exists) */
4380 zfs_type_t type;
4381 boolean_t toplevel = B_FALSE;
4382 boolean_t zoned = B_FALSE;
4383 boolean_t hastoken = B_FALSE;
4384 boolean_t redacted;
4385 uint8_t *wkeydata = NULL;
4386 uint_t wkeylen = 0;
4387
4388 begin_time = time(NULL);
4389 bzero(origin, MAXNAMELEN);
4390 bzero(tmp_keylocation, MAXNAMELEN);
4391
4392 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
4393 "cannot receive"));
4394
4395 recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
4396 ENOENT);
4397
4398 /* Did the user request holds be skipped via zfs recv -k? */
4399 boolean_t holds = flags->holds && !flags->skipholds;
4400
4401 if (stream_avl != NULL) {
4402 char *keylocation = NULL;
4403 nvlist_t *lookup = NULL;
4404 nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid,
4405 &snapname);
4406
4407 (void) nvlist_lookup_uint64(fs, "parentfromsnap",
4408 &parent_snapguid);
4409 err = nvlist_lookup_nvlist(fs, "props", &rcvprops);
4410 if (err) {
4411 VERIFY(0 == nvlist_alloc(&rcvprops, NV_UNIQUE_NAME, 0));
4412 newprops = B_TRUE;
4413 }
4414
4415 /*
4416 * The keylocation property may only be set on encryption roots,
4417 * but this dataset might not become an encryption root until
4418 * recv_fix_encryption_hierarchy() is called. That function
4419 * will fixup the keylocation anyway, so we temporarily unset
4420 * the keylocation for now to avoid any errors from the receive
4421 * ioctl.
4422 */
4423 err = nvlist_lookup_string(rcvprops,
4424 zfs_prop_to_name(ZFS_PROP_KEYLOCATION), &keylocation);
4425 if (err == 0) {
4426 strcpy(tmp_keylocation, keylocation);
4427 (void) nvlist_remove_all(rcvprops,
4428 zfs_prop_to_name(ZFS_PROP_KEYLOCATION));
4429 }
4430
4431 if (flags->canmountoff) {
4432 VERIFY(0 == nvlist_add_uint64(rcvprops,
4433 zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0));
4434 } else if (newprops) { /* nothing in rcvprops, eliminate it */
4435 nvlist_free(rcvprops);
4436 rcvprops = NULL;
4437 newprops = B_FALSE;
4438 }
4439 if (0 == nvlist_lookup_nvlist(fs, "snapprops", &lookup)) {
4440 VERIFY(0 == nvlist_lookup_nvlist(lookup,
4441 snapname, &snapprops_nvlist));
4442 }
4443 if (holds) {
4444 if (0 == nvlist_lookup_nvlist(fs, "snapholds",
4445 &lookup)) {
4446 VERIFY(0 == nvlist_lookup_nvlist(lookup,
4447 snapname, &snapholds_nvlist));
4448 }
4449 }
4450 }
4451
4452 cp = NULL;
4453
4454 /*
4455 * Determine how much of the snapshot name stored in the stream
4456 * we are going to tack on to the name they specified on the
4457 * command line, and how much we are going to chop off.
4458 *
4459 * If they specified a snapshot, chop the entire name stored in
4460 * the stream.
4461 */
4462 if (flags->istail) {
4463 /*
4464 * A filesystem was specified with -e. We want to tack on only
4465 * the tail of the sent snapshot path.
4466 */
4467 if (strchr(tosnap, '@')) {
4468 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
4469 "argument - snapshot not allowed with -e"));
4470 err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
4471 goto out;
4472 }
4473
4474 chopprefix = strrchr(sendfs, '/');
4475
4476 if (chopprefix == NULL) {
4477 /*
4478 * The tail is the poolname, so we need to
4479 * prepend a path separator.
4480 */
4481 int len = strlen(drrb->drr_toname);
4482 cp = malloc(len + 2);
4483 cp[0] = '/';
4484 (void) strcpy(&cp[1], drrb->drr_toname);
4485 chopprefix = cp;
4486 } else {
4487 chopprefix = drrb->drr_toname + (chopprefix - sendfs);
4488 }
4489 } else if (flags->isprefix) {
4490 /*
4491 * A filesystem was specified with -d. We want to tack on
4492 * everything but the first element of the sent snapshot path
4493 * (all but the pool name).
4494 */
4495 if (strchr(tosnap, '@')) {
4496 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
4497 "argument - snapshot not allowed with -d"));
4498 err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
4499 goto out;
4500 }
4501
4502 chopprefix = strchr(drrb->drr_toname, '/');
4503 if (chopprefix == NULL)
4504 chopprefix = strchr(drrb->drr_toname, '@');
4505 } else if (strchr(tosnap, '@') == NULL) {
4506 /*
4507 * If a filesystem was specified without -d or -e, we want to
4508 * tack on everything after the fs specified by 'zfs send'.
4509 */
4510 chopprefix = drrb->drr_toname + strlen(sendfs);
4511 } else {
4512 /* A snapshot was specified as an exact path (no -d or -e). */
4513 if (recursive) {
4514 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4515 "cannot specify snapshot name for multi-snapshot "
4516 "stream"));
4517 err = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
4518 goto out;
4519 }
4520 chopprefix = drrb->drr_toname + strlen(drrb->drr_toname);
4521 }
4522
4523 ASSERT(strstr(drrb->drr_toname, sendfs) == drrb->drr_toname);
4524 ASSERT(chopprefix > drrb->drr_toname || strchr(sendfs, '/') == NULL);
4525 ASSERT(chopprefix <= drrb->drr_toname + strlen(drrb->drr_toname) ||
4526 strchr(sendfs, '/') == NULL);
4527 ASSERT(chopprefix[0] == '/' || chopprefix[0] == '@' ||
4528 chopprefix[0] == '\0');
4529
4530 /*
4531 * Determine name of destination snapshot.
4532 */
4533 (void) strlcpy(destsnap, tosnap, sizeof (destsnap));
4534 (void) strlcat(destsnap, chopprefix, sizeof (destsnap));
4535 free(cp);
4536 if (!zfs_name_valid(destsnap, ZFS_TYPE_SNAPSHOT)) {
4537 err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
4538 goto out;
4539 }
4540
4541 /*
4542 * Determine the name of the origin snapshot.
4543 */
4544 if (originsnap) {
4545 (void) strlcpy(origin, originsnap, sizeof (origin));
4546 if (flags->verbose)
4547 (void) printf("using provided clone origin %s\n",
4548 origin);
4549 } else if (drrb->drr_flags & DRR_FLAG_CLONE) {
4550 if (guid_to_name(hdl, destsnap,
4551 drrb->drr_fromguid, B_FALSE, origin) != 0) {
4552 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4553 "local origin for clone %s does not exist"),
4554 destsnap);
4555 err = zfs_error(hdl, EZFS_NOENT, errbuf);
4556 goto out;
4557 }
4558 if (flags->verbose)
4559 (void) printf("found clone origin %s\n", origin);
4560 }
4561
4562 boolean_t resuming = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
4563 DMU_BACKUP_FEATURE_RESUMING;
4564 boolean_t raw = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
4565 DMU_BACKUP_FEATURE_RAW;
4566 boolean_t embedded = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
4567 DMU_BACKUP_FEATURE_EMBED_DATA;
4568 stream_wantsnewfs = (drrb->drr_fromguid == 0 ||
4569 (drrb->drr_flags & DRR_FLAG_CLONE) || originsnap) && !resuming;
4570
4571 if (stream_wantsnewfs) {
4572 /*
4573 * if the parent fs does not exist, look for it based on
4574 * the parent snap GUID
4575 */
4576 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
4577 "cannot receive new filesystem stream"));
4578
4579 (void) strcpy(name, destsnap);
4580 cp = strrchr(name, '/');
4581 if (cp)
4582 *cp = '\0';
4583 if (cp &&
4584 !zfs_dataset_exists(hdl, name, ZFS_TYPE_DATASET)) {
4585 char suffix[ZFS_MAX_DATASET_NAME_LEN];
4586 (void) strcpy(suffix, strrchr(destsnap, '/'));
4587 if (guid_to_name(hdl, name, parent_snapguid,
4588 B_FALSE, destsnap) == 0) {
4589 *strchr(destsnap, '@') = '\0';
4590 (void) strcat(destsnap, suffix);
4591 }
4592 }
4593 } else {
4594 /*
4595 * if the fs does not exist, look for it based on the
4596 * fromsnap GUID
4597 */
4598 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
4599 "cannot receive incremental stream"));
4600
4601 (void) strcpy(name, destsnap);
4602 *strchr(name, '@') = '\0';
4603
4604 /*
4605 * If the exact receive path was specified and this is the
4606 * topmost path in the stream, then if the fs does not exist we
4607 * should look no further.
4608 */
4609 if ((flags->isprefix || (*(chopprefix = drrb->drr_toname +
4610 strlen(sendfs)) != '\0' && *chopprefix != '@')) &&
4611 !zfs_dataset_exists(hdl, name, ZFS_TYPE_DATASET)) {
4612 char snap[ZFS_MAX_DATASET_NAME_LEN];
4613 (void) strcpy(snap, strchr(destsnap, '@'));
4614 if (guid_to_name(hdl, name, drrb->drr_fromguid,
4615 B_FALSE, destsnap) == 0) {
4616 *strchr(destsnap, '@') = '\0';
4617 (void) strcat(destsnap, snap);
4618 }
4619 }
4620 }
4621
4622 (void) strcpy(name, destsnap);
4623 *strchr(name, '@') = '\0';
4624
4625 redacted = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
4626 DMU_BACKUP_FEATURE_REDACTED;
4627
4628 if (zfs_dataset_exists(hdl, name, ZFS_TYPE_DATASET)) {
4629 zfs_cmd_t zc = {"\0"};
4630 zfs_handle_t *zhp;
4631 boolean_t encrypted;
4632
4633 (void) strcpy(zc.zc_name, name);
4634
4635 /*
4636 * Destination fs exists. It must be one of these cases:
4637 * - an incremental send stream
4638 * - the stream specifies a new fs (full stream or clone)
4639 * and they want us to blow away the existing fs (and
4640 * have therefore specified -F and removed any snapshots)
4641 * - we are resuming a failed receive.
4642 */
4643 if (stream_wantsnewfs) {
4644 boolean_t is_volume = drrb->drr_type == DMU_OST_ZVOL;
4645 if (!flags->force) {
4646 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4647 "destination '%s' exists\n"
4648 "must specify -F to overwrite it"), name);
4649 err = zfs_error(hdl, EZFS_EXISTS, errbuf);
4650 goto out;
4651 }
4652 if (ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT,
4653 &zc) == 0) {
4654 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4655 "destination has snapshots (eg. %s)\n"
4656 "must destroy them to overwrite it"),
4657 zc.zc_name);
4658 err = zfs_error(hdl, EZFS_EXISTS, errbuf);
4659 goto out;
4660 }
4661 if (is_volume && strrchr(name, '/') == NULL) {
4662 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4663 "destination %s is the root dataset\n"
4664 "cannot overwrite with a ZVOL"),
4665 name);
4666 err = zfs_error(hdl, EZFS_EXISTS, errbuf);
4667 goto out;
4668 }
4669 if (is_volume &&
4670 ioctl(hdl->libzfs_fd, ZFS_IOC_DATASET_LIST_NEXT,
4671 &zc) == 0) {
4672 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4673 "destination has children (eg. %s)\n"
4674 "cannot overwrite with a ZVOL"),
4675 zc.zc_name);
4676 err = zfs_error(hdl, EZFS_WRONG_PARENT, errbuf);
4677 goto out;
4678 }
4679 }
4680
4681 if ((zhp = zfs_open(hdl, name,
4682 ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) {
4683 err = -1;
4684 goto out;
4685 }
4686
4687 if (stream_wantsnewfs &&
4688 zhp->zfs_dmustats.dds_origin[0]) {
4689 zfs_close(zhp);
4690 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4691 "destination '%s' is a clone\n"
4692 "must destroy it to overwrite it"), name);
4693 err = zfs_error(hdl, EZFS_EXISTS, errbuf);
4694 goto out;
4695 }
4696
4697 /*
4698 * Raw sends can not be performed as an incremental on top
4699 * of existing unencryppted datasets. zfs recv -F cant be
4700 * used to blow away an existing encrypted filesystem. This
4701 * is because it would require the dsl dir to point to the
4702 * new key (or lack of a key) and the old key at the same
4703 * time. The -F flag may still be used for deleting
4704 * intermediate snapshots that would otherwise prevent the
4705 * receive from working.
4706 */
4707 encrypted = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) !=
4708 ZIO_CRYPT_OFF;
4709 if (!stream_wantsnewfs && !encrypted && raw) {
4710 zfs_close(zhp);
4711 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4712 "cannot perform raw receive on top of "
4713 "existing unencrypted dataset"));
4714 err = zfs_error(hdl, EZFS_BADRESTORE, errbuf);
4715 goto out;
4716 }
4717
4718 if (stream_wantsnewfs && flags->force &&
4719 ((raw && !encrypted) || encrypted)) {
4720 zfs_close(zhp);
4721 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4722 "zfs receive -F cannot be used to destroy an "
4723 "encrypted filesystem or overwrite an "
4724 "unencrypted one with an encrypted one"));
4725 err = zfs_error(hdl, EZFS_BADRESTORE, errbuf);
4726 goto out;
4727 }
4728
4729 if (!flags->dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM &&
4730 stream_wantsnewfs) {
4731 /* We can't do online recv in this case */
4732 clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0);
4733 if (clp == NULL) {
4734 zfs_close(zhp);
4735 err = -1;
4736 goto out;
4737 }
4738 if (changelist_prefix(clp) != 0) {
4739 changelist_free(clp);
4740 zfs_close(zhp);
4741 err = -1;
4742 goto out;
4743 }
4744 }
4745
4746 /*
4747 * If we are resuming a newfs, set newfs here so that we will
4748 * mount it if the recv succeeds this time. We can tell
4749 * that it was a newfs on the first recv because the fs
4750 * itself will be inconsistent (if the fs existed when we
4751 * did the first recv, we would have received it into
4752 * .../%recv).
4753 */
4754 if (resuming && zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT))
4755 newfs = B_TRUE;
4756
4757 /* we want to know if we're zoned when validating -o|-x props */
4758 zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED);
4759
4760 /* may need this info later, get it now we have zhp around */
4761 if (zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN, NULL, 0,
4762 NULL, NULL, 0, B_TRUE) == 0)
4763 hastoken = B_TRUE;
4764
4765 /* gather existing properties on destination */
4766 origprops = fnvlist_alloc();
4767 fnvlist_merge(origprops, zhp->zfs_props);
4768 fnvlist_merge(origprops, zhp->zfs_user_props);
4769
4770 zfs_close(zhp);
4771 } else {
4772 zfs_handle_t *zhp;
4773
4774 /*
4775 * Destination filesystem does not exist. Therefore we better
4776 * be creating a new filesystem (either from a full backup, or
4777 * a clone). It would therefore be invalid if the user
4778 * specified only the pool name (i.e. if the destination name
4779 * contained no slash character).
4780 */
4781 cp = strrchr(name, '/');
4782
4783 if (!stream_wantsnewfs || cp == NULL) {
4784 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4785 "destination '%s' does not exist"), name);
4786 err = zfs_error(hdl, EZFS_NOENT, errbuf);
4787 goto out;
4788 }
4789
4790 /*
4791 * Trim off the final dataset component so we perform the
4792 * recvbackup ioctl to the filesystems's parent.
4793 */
4794 *cp = '\0';
4795
4796 if (flags->isprefix && !flags->istail && !flags->dryrun &&
4797 create_parents(hdl, destsnap, strlen(tosnap)) != 0) {
4798 err = zfs_error(hdl, EZFS_BADRESTORE, errbuf);
4799 goto out;
4800 }
4801
4802 /* validate parent */
4803 zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
4804 if (zhp == NULL) {
4805 err = zfs_error(hdl, EZFS_BADRESTORE, errbuf);
4806 goto out;
4807 }
4808 if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) {
4809 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4810 "parent '%s' is not a filesystem"), name);
4811 err = zfs_error(hdl, EZFS_WRONG_PARENT, errbuf);
4812 zfs_close(zhp);
4813 goto out;
4814 }
4815
4816 /*
4817 * It is invalid to receive a properties stream that was
4818 * unencrypted on the send side as a child of an encrypted
4819 * parent. Technically there is nothing preventing this, but
4820 * it would mean that the encryption=off property which is
4821 * locally set on the send side would not be received correctly.
4822 * We can infer encryption=off if the stream is not raw and
4823 * properties were included since the send side will only ever
4824 * send the encryption property in a raw nvlist header. This
4825 * check will be avoided if the user specifically overrides
4826 * the encryption property on the command line.
4827 */
4828 if (!raw && rcvprops != NULL &&
4829 !nvlist_exists(cmdprops,
4830 zfs_prop_to_name(ZFS_PROP_ENCRYPTION))) {
4831 uint64_t crypt;
4832
4833 crypt = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION);
4834
4835 if (crypt != ZIO_CRYPT_OFF) {
4836 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4837 "parent '%s' must not be encrypted to "
4838 "receive unenecrypted property"), name);
4839 err = zfs_error(hdl, EZFS_BADPROP, errbuf);
4840 zfs_close(zhp);
4841 goto out;
4842 }
4843 }
4844 zfs_close(zhp);
4845
4846 newfs = B_TRUE;
4847 *cp = '/';
4848 }
4849
4850 if (flags->verbose) {
4851 (void) printf("%s %s stream of %s into %s\n",
4852 flags->dryrun ? "would receive" : "receiving",
4853 drrb->drr_fromguid ? "incremental" : "full",
4854 drrb->drr_toname, destsnap);
4855 (void) fflush(stdout);
4856 }
4857
4858 if (flags->dryrun) {
4859 err = recv_skip(hdl, infd, flags->byteswap);
4860 goto out;
4861 }
4862
4863 if (top_zfs && (*top_zfs == NULL || strcmp(*top_zfs, name) == 0))
4864 toplevel = B_TRUE;
4865 if (drrb->drr_type == DMU_OST_ZVOL) {
4866 type = ZFS_TYPE_VOLUME;
4867 } else if (drrb->drr_type == DMU_OST_ZFS) {
4868 type = ZFS_TYPE_FILESYSTEM;
4869 } else {
4870 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4871 "invalid record type: 0x%d"), drrb->drr_type);
4872 err = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
4873 goto out;
4874 }
4875 if ((err = zfs_setup_cmdline_props(hdl, type, name, zoned, recursive,
4876 stream_wantsnewfs, raw, toplevel, rcvprops, cmdprops, origprops,
4877 &oxprops, &wkeydata, &wkeylen, errbuf)) != 0)
4878 goto out;
4879
4880 err = ioctl_err = lzc_receive_with_cmdprops(destsnap, rcvprops,
4881 oxprops, wkeydata, wkeylen, origin, flags->force, flags->resumable,
4882 raw, infd, drr_noswap, cleanup_fd, &read_bytes, &errflags,
4883 action_handlep, &prop_errors);
4884 ioctl_errno = ioctl_err;
4885 prop_errflags = errflags;
4886
4887 if (err == 0) {
4888 nvpair_t *prop_err = NULL;
4889
4890 while ((prop_err = nvlist_next_nvpair(prop_errors,
4891 prop_err)) != NULL) {
4892 char tbuf[1024];
4893 zfs_prop_t prop;
4894 int intval;
4895
4896 prop = zfs_name_to_prop(nvpair_name(prop_err));
4897 (void) nvpair_value_int32(prop_err, &intval);
4898 if (strcmp(nvpair_name(prop_err),
4899 ZPROP_N_MORE_ERRORS) == 0) {
4900 trunc_prop_errs(intval);
4901 break;
4902 } else if (snapname == NULL || finalsnap == NULL ||
4903 strcmp(finalsnap, snapname) == 0 ||
4904 strcmp(nvpair_name(prop_err),
4905 zfs_prop_to_name(ZFS_PROP_REFQUOTA)) != 0) {
4906 /*
4907 * Skip the special case of, for example,
4908 * "refquota", errors on intermediate
4909 * snapshots leading up to a final one.
4910 * That's why we have all of the checks above.
4911 *
4912 * See zfs_ioctl.c's extract_delay_props() for
4913 * a list of props which can fail on
4914 * intermediate snapshots, but shouldn't
4915 * affect the overall receive.
4916 */
4917 (void) snprintf(tbuf, sizeof (tbuf),
4918 dgettext(TEXT_DOMAIN,
4919 "cannot receive %s property on %s"),
4920 nvpair_name(prop_err), name);
4921 zfs_setprop_error(hdl, prop, intval, tbuf);
4922 }
4923 }
4924 }
4925
4926 if (err == 0 && snapprops_nvlist) {
4927 zfs_cmd_t zc = {"\0"};
4928
4929 (void) strcpy(zc.zc_name, destsnap);
4930 zc.zc_cookie = B_TRUE; /* received */
4931 if (zcmd_write_src_nvlist(hdl, &zc, snapprops_nvlist) == 0) {
4932 (void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc);
4933 zcmd_free_nvlists(&zc);
4934 }
4935 }
4936 if (err == 0 && snapholds_nvlist) {
4937 nvpair_t *pair;
4938 nvlist_t *holds, *errors = NULL;
4939 int cleanup_fd = -1;
4940
4941 VERIFY(0 == nvlist_alloc(&holds, 0, KM_SLEEP));
4942 for (pair = nvlist_next_nvpair(snapholds_nvlist, NULL);
4943 pair != NULL;
4944 pair = nvlist_next_nvpair(snapholds_nvlist, pair)) {
4945 VERIFY(0 == nvlist_add_string(holds, destsnap,
4946 nvpair_name(pair)));
4947 }
4948 (void) lzc_hold(holds, cleanup_fd, &errors);
4949 nvlist_free(snapholds_nvlist);
4950 nvlist_free(holds);
4951 }
4952
4953 if (err && (ioctl_errno == ENOENT || ioctl_errno == EEXIST)) {
4954 /*
4955 * It may be that this snapshot already exists,
4956 * in which case we want to consume & ignore it
4957 * rather than failing.
4958 */
4959 avl_tree_t *local_avl;
4960 nvlist_t *local_nv, *fs;
4961 cp = strchr(destsnap, '@');
4962
4963 /*
4964 * XXX Do this faster by just iterating over snaps in
4965 * this fs. Also if zc_value does not exist, we will
4966 * get a strange "does not exist" error message.
4967 */
4968 *cp = '\0';
4969 if (gather_nvlist(hdl, destsnap, NULL, NULL, B_FALSE, B_TRUE,
4970 B_FALSE, B_FALSE, B_FALSE, B_FALSE, B_FALSE, B_TRUE,
4971 &local_nv, &local_avl) == 0) {
4972 *cp = '@';
4973 fs = fsavl_find(local_avl, drrb->drr_toguid, NULL);
4974 fsavl_destroy(local_avl);
4975 nvlist_free(local_nv);
4976
4977 if (fs != NULL) {
4978 if (flags->verbose) {
4979 (void) printf("snap %s already exists; "
4980 "ignoring\n", destsnap);
4981 }
4982 err = ioctl_err = recv_skip(hdl, infd,
4983 flags->byteswap);
4984 }
4985 }
4986 *cp = '@';
4987 }
4988
4989 if (ioctl_err != 0) {
4990 switch (ioctl_errno) {
4991 case ENODEV:
4992 cp = strchr(destsnap, '@');
4993 *cp = '\0';
4994 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4995 "most recent snapshot of %s does not\n"
4996 "match incremental source"), destsnap);
4997 (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
4998 *cp = '@';
4999 break;
5000 case ETXTBSY:
5001 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5002 "destination %s has been modified\n"
5003 "since most recent snapshot"), name);
5004 (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
5005 break;
5006 case EACCES:
5007 if (raw && stream_wantsnewfs) {
5008 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5009 "failed to create encryption key"));
5010 } else if (raw && !stream_wantsnewfs) {
5011 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5012 "encryption key does not match "
5013 "existing key"));
5014 } else {
5015 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5016 "inherited key must be loaded"));
5017 }
5018 (void) zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf);
5019 break;
5020 case EEXIST:
5021 cp = strchr(destsnap, '@');
5022 if (newfs) {
5023 /* it's the containing fs that exists */
5024 *cp = '\0';
5025 }
5026 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5027 "destination already exists"));
5028 (void) zfs_error_fmt(hdl, EZFS_EXISTS,
5029 dgettext(TEXT_DOMAIN, "cannot restore to %s"),
5030 destsnap);
5031 *cp = '@';
5032 break;
5033 case EINVAL:
5034 if (flags->resumable)
5035 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5036 "kernel modules must be upgraded to "
5037 "receive this stream."));
5038 if (embedded && !raw)
5039 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5040 "incompatible embedded data stream "
5041 "feature with encrypted receive."));
5042 (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
5043 break;
5044 case ECKSUM:
5045 recv_ecksum_set_aux(hdl, destsnap, flags->resumable);
5046 (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
5047 break;
5048 case ENOTSUP:
5049 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5050 "pool must be upgraded to receive this stream."));
5051 (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
5052 break;
5053 case EDQUOT:
5054 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5055 "destination %s space quota exceeded."), name);
5056 (void) zfs_error(hdl, EZFS_NOSPC, errbuf);
5057 break;
5058 case ZFS_ERR_FROM_IVSET_GUID_MISSING:
5059 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5060 "IV set guid missing. See errata %u at "
5061 "http://zfsonlinux.org/msg/ZFS-8000-ER."),
5062 ZPOOL_ERRATA_ZOL_8308_ENCRYPTION);
5063 (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
5064 break;
5065 case ZFS_ERR_FROM_IVSET_GUID_MISMATCH:
5066 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5067 "IV set guid mismatch. See the 'zfs receive' "
5068 "man page section\n discussing the limitations "
5069 "of raw encrypted send streams."));
5070 (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
5071 break;
5072 case ZFS_ERR_SPILL_BLOCK_FLAG_MISSING:
5073 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5074 "Spill block flag missing for raw send.\n"
5075 "The zfs software on the sending system must "
5076 "be updated."));
5077 (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
5078 break;
5079 case EBUSY:
5080 if (hastoken) {
5081 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5082 "destination %s contains "
5083 "partially-complete state from "
5084 "\"zfs receive -s\"."), name);
5085 (void) zfs_error(hdl, EZFS_BUSY, errbuf);
5086 break;
5087 }
5088 /* fallthru */
5089 default:
5090 (void) zfs_standard_error(hdl, ioctl_errno, errbuf);
5091 }
5092 }
5093
5094 /*
5095 * Mount the target filesystem (if created). Also mount any
5096 * children of the target filesystem if we did a replication
5097 * receive (indicated by stream_avl being non-NULL).
5098 */
5099 cp = strchr(destsnap, '@');
5100 if (cp && (ioctl_err == 0 || !newfs) && !redacted) {
5101 zfs_handle_t *h;
5102
5103 *cp = '\0';
5104 h = zfs_open(hdl, destsnap,
5105 ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
5106 if (h != NULL) {
5107 if (h->zfs_type == ZFS_TYPE_VOLUME) {
5108 *cp = '@';
5109 } else if (newfs || stream_avl) {
5110 /*
5111 * Track the first/top of hierarchy fs,
5112 * for mounting and sharing later.
5113 */
5114 if (top_zfs && *top_zfs == NULL)
5115 *top_zfs = zfs_strdup(hdl, destsnap);
5116 }
5117 zfs_close(h);
5118 }
5119 *cp = '@';
5120 }
5121
5122 if (clp) {
5123 if (!flags->nomount)
5124 err |= changelist_postfix(clp);
5125 changelist_free(clp);
5126 }
5127
5128 if (prop_errflags & ZPROP_ERR_NOCLEAR) {
5129 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
5130 "failed to clear unreceived properties on %s"), name);
5131 (void) fprintf(stderr, "\n");
5132 }
5133 if (prop_errflags & ZPROP_ERR_NORESTORE) {
5134 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
5135 "failed to restore original properties on %s"), name);
5136 (void) fprintf(stderr, "\n");
5137 }
5138
5139 if (err || ioctl_err) {
5140 err = -1;
5141 goto out;
5142 }
5143
5144 if (flags->verbose) {
5145 char buf1[64];
5146 char buf2[64];
5147 uint64_t bytes = read_bytes;
5148 time_t delta = time(NULL) - begin_time;
5149 if (delta == 0)
5150 delta = 1;
5151 zfs_nicebytes(bytes, buf1, sizeof (buf1));
5152 zfs_nicebytes(bytes/delta, buf2, sizeof (buf1));
5153
5154 (void) printf("received %s stream in %lu seconds (%s/sec)\n",
5155 buf1, delta, buf2);
5156 }
5157
5158 err = 0;
5159 out:
5160 if (prop_errors != NULL)
5161 nvlist_free(prop_errors);
5162
5163 if (tmp_keylocation[0] != '\0') {
5164 VERIFY(0 == nvlist_add_string(rcvprops,
5165 zfs_prop_to_name(ZFS_PROP_KEYLOCATION), tmp_keylocation));
5166 }
5167
5168 if (newprops)
5169 nvlist_free(rcvprops);
5170
5171 nvlist_free(oxprops);
5172 nvlist_free(origprops);
5173
5174 return (err);
5175 }
5176
5177 /*
5178 * Check properties we were asked to override (both -o|-x)
5179 */
5180 static boolean_t
5181 zfs_receive_checkprops(libzfs_handle_t *hdl, nvlist_t *props,
5182 const char *errbuf)
5183 {
5184 nvpair_t *nvp;
5185 zfs_prop_t prop;
5186 const char *name;
5187
5188 nvp = NULL;
5189 while ((nvp = nvlist_next_nvpair(props, nvp)) != NULL) {
5190 name = nvpair_name(nvp);
5191 prop = zfs_name_to_prop(name);
5192
5193 if (prop == ZPROP_INVAL) {
5194 if (!zfs_prop_user(name)) {
5195 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5196 "invalid property '%s'"), name);
5197 return (B_FALSE);
5198 }
5199 continue;
5200 }
5201 /*
5202 * "origin" is readonly but is used to receive datasets as
5203 * clones so we don't raise an error here
5204 */
5205 if (prop == ZFS_PROP_ORIGIN)
5206 continue;
5207
5208 /* encryption params have their own verification later */
5209 if (prop == ZFS_PROP_ENCRYPTION ||
5210 zfs_prop_encryption_key_param(prop))
5211 continue;
5212
5213 /*
5214 * cannot override readonly, set-once and other specific
5215 * settable properties
5216 */
5217 if (zfs_prop_readonly(prop) || prop == ZFS_PROP_VERSION ||
5218 prop == ZFS_PROP_VOLSIZE) {
5219 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5220 "invalid property '%s'"), name);
5221 return (B_FALSE);
5222 }
5223 }
5224
5225 return (B_TRUE);
5226 }
5227
5228 static int
5229 zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap,
5230 const char *originsnap, recvflags_t *flags, int infd, const char *sendfs,
5231 nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd,
5232 uint64_t *action_handlep, const char *finalsnap, nvlist_t *cmdprops)
5233 {
5234 int err;
5235 dmu_replay_record_t drr, drr_noswap;
5236 struct drr_begin *drrb = &drr.drr_u.drr_begin;
5237 char errbuf[1024];
5238 zio_cksum_t zcksum = { { 0 } };
5239 uint64_t featureflags;
5240 int hdrtype;
5241
5242 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
5243 "cannot receive"));
5244
5245 /* check cmdline props, raise an error if they cannot be received */
5246 if (!zfs_receive_checkprops(hdl, cmdprops, errbuf)) {
5247 return (zfs_error(hdl, EZFS_BADPROP, errbuf));
5248 }
5249
5250 if (flags->isprefix &&
5251 !zfs_dataset_exists(hdl, tosnap, ZFS_TYPE_DATASET)) {
5252 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified fs "
5253 "(%s) does not exist"), tosnap);
5254 return (zfs_error(hdl, EZFS_NOENT, errbuf));
5255 }
5256 if (originsnap &&
5257 !zfs_dataset_exists(hdl, originsnap, ZFS_TYPE_DATASET)) {
5258 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified origin fs "
5259 "(%s) does not exist"), originsnap);
5260 return (zfs_error(hdl, EZFS_NOENT, errbuf));
5261 }
5262
5263 /* read in the BEGIN record */
5264 if (0 != (err = recv_read(hdl, infd, &drr, sizeof (drr), B_FALSE,
5265 &zcksum)))
5266 return (err);
5267
5268 if (drr.drr_type == DRR_END || drr.drr_type == BSWAP_32(DRR_END)) {
5269 /* It's the double end record at the end of a package */
5270 return (ENODATA);
5271 }
5272
5273 /* the kernel needs the non-byteswapped begin record */
5274 drr_noswap = drr;
5275
5276 flags->byteswap = B_FALSE;
5277 if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
5278 /*
5279 * We computed the checksum in the wrong byteorder in
5280 * recv_read() above; do it again correctly.
5281 */
5282 bzero(&zcksum, sizeof (zio_cksum_t));
5283 fletcher_4_incremental_byteswap(&drr, sizeof (drr), &zcksum);
5284 flags->byteswap = B_TRUE;
5285
5286 drr.drr_type = BSWAP_32(drr.drr_type);
5287 drr.drr_payloadlen = BSWAP_32(drr.drr_payloadlen);
5288 drrb->drr_magic = BSWAP_64(drrb->drr_magic);
5289 drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo);
5290 drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
5291 drrb->drr_type = BSWAP_32(drrb->drr_type);
5292 drrb->drr_flags = BSWAP_32(drrb->drr_flags);
5293 drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
5294 drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
5295 }
5296
5297 if (drrb->drr_magic != DMU_BACKUP_MAGIC || drr.drr_type != DRR_BEGIN) {
5298 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
5299 "stream (bad magic number)"));
5300 return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
5301 }
5302
5303 featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
5304 hdrtype = DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo);
5305
5306 if (!DMU_STREAM_SUPPORTED(featureflags) ||
5307 (hdrtype != DMU_SUBSTREAM && hdrtype != DMU_COMPOUNDSTREAM)) {
5308 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5309 "stream has unsupported feature, feature flags = %lx"),
5310 featureflags);
5311 return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
5312 }
5313
5314 /* Holds feature is set once in the compound stream header. */
5315 boolean_t holds = (DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
5316 DMU_BACKUP_FEATURE_HOLDS);
5317 if (holds)
5318 flags->holds = B_TRUE;
5319
5320 if (strchr(drrb->drr_toname, '@') == NULL) {
5321 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
5322 "stream (bad snapshot name)"));
5323 return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
5324 }
5325
5326 if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == DMU_SUBSTREAM) {
5327 char nonpackage_sendfs[ZFS_MAX_DATASET_NAME_LEN];
5328 if (sendfs == NULL) {
5329 /*
5330 * We were not called from zfs_receive_package(). Get
5331 * the fs specified by 'zfs send'.
5332 */
5333 char *cp;
5334 (void) strlcpy(nonpackage_sendfs,
5335 drr.drr_u.drr_begin.drr_toname,
5336 sizeof (nonpackage_sendfs));
5337 if ((cp = strchr(nonpackage_sendfs, '@')) != NULL)
5338 *cp = '\0';
5339 sendfs = nonpackage_sendfs;
5340 VERIFY(finalsnap == NULL);
5341 }
5342 return (zfs_receive_one(hdl, infd, tosnap, originsnap, flags,
5343 &drr, &drr_noswap, sendfs, stream_nv, stream_avl, top_zfs,
5344 cleanup_fd, action_handlep, finalsnap, cmdprops));
5345 } else {
5346 assert(DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
5347 DMU_COMPOUNDSTREAM);
5348 return (zfs_receive_package(hdl, infd, tosnap, flags, &drr,
5349 &zcksum, top_zfs, cleanup_fd, action_handlep, cmdprops));
5350 }
5351 }
5352
5353 /*
5354 * Restores a backup of tosnap from the file descriptor specified by infd.
5355 * Return 0 on total success, -2 if some things couldn't be
5356 * destroyed/renamed/promoted, -1 if some things couldn't be received.
5357 * (-1 will override -2, if -1 and the resumable flag was specified the
5358 * transfer can be resumed if the sending side supports it).
5359 */
5360 int
5361 zfs_receive(libzfs_handle_t *hdl, const char *tosnap, nvlist_t *props,
5362 recvflags_t *flags, int infd, avl_tree_t *stream_avl)
5363 {
5364 char *top_zfs = NULL;
5365 int err;
5366 int cleanup_fd;
5367 uint64_t action_handle = 0;
5368 struct stat sb;
5369 char *originsnap = NULL;
5370
5371 /*
5372 * The only way fstat can fail is if we do not have a valid file
5373 * descriptor.
5374 */
5375 if (fstat(infd, &sb) == -1) {
5376 perror("fstat");
5377 return (-2);
5378 }
5379
5380 #ifdef __linux__
5381 #ifndef F_SETPIPE_SZ
5382 #define F_SETPIPE_SZ (F_SETLEASE + 7)
5383 #endif /* F_SETPIPE_SZ */
5384
5385 #ifndef F_GETPIPE_SZ
5386 #define F_GETPIPE_SZ (F_GETLEASE + 7)
5387 #endif /* F_GETPIPE_SZ */
5388
5389 /*
5390 * It is not uncommon for gigabytes to be processed in zfs receive.
5391 * Speculatively increase the buffer size via Linux-specific fcntl()
5392 * call.
5393 */
5394 if (S_ISFIFO(sb.st_mode)) {
5395 FILE *procf = fopen("/proc/sys/fs/pipe-max-size", "r");
5396
5397 if (procf != NULL) {
5398 unsigned long max_psize;
5399 long cur_psize;
5400 if (fscanf(procf, "%lu", &max_psize) > 0) {
5401 cur_psize = fcntl(infd, F_GETPIPE_SZ);
5402 if (cur_psize > 0 &&
5403 max_psize > (unsigned long) cur_psize)
5404 (void) fcntl(infd, F_SETPIPE_SZ,
5405 max_psize);
5406 }
5407 fclose(procf);
5408 }
5409 }
5410 #endif /* __linux__ */
5411
5412 if (props) {
5413 err = nvlist_lookup_string(props, "origin", &originsnap);
5414 if (err && err != ENOENT)
5415 return (err);
5416 }
5417
5418 cleanup_fd = open(ZFS_DEV, O_RDWR);
5419 VERIFY(cleanup_fd >= 0);
5420
5421 err = zfs_receive_impl(hdl, tosnap, originsnap, flags, infd, NULL, NULL,
5422 stream_avl, &top_zfs, cleanup_fd, &action_handle, NULL, props);
5423
5424 VERIFY(0 == close(cleanup_fd));
5425
5426 if (err == 0 && !flags->nomount && top_zfs) {
5427 zfs_handle_t *zhp = NULL;
5428 prop_changelist_t *clp = NULL;
5429
5430 zhp = zfs_open(hdl, top_zfs, ZFS_TYPE_FILESYSTEM);
5431 if (zhp != NULL) {
5432 clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT,
5433 CL_GATHER_MOUNT_ALWAYS, 0);
5434 zfs_close(zhp);
5435 if (clp != NULL) {
5436 /* mount and share received datasets */
5437 err = changelist_postfix(clp);
5438 changelist_free(clp);
5439 }
5440 }
5441 if (zhp == NULL || clp == NULL || err)
5442 err = -1;
5443 }
5444 if (top_zfs)
5445 free(top_zfs);
5446
5447 return (err);
5448 }