]> git.proxmox.com Git - mirror_zfs.git/blob - lib/libzfs/libzfs_sendrecv.c
Performance optimization of AVL tree comparator functions
[mirror_zfs.git] / lib / libzfs / libzfs_sendrecv.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
25 * Copyright (c) 2012, Joyent, Inc. All rights reserved.
26 * Copyright (c) 2012 Pawel Jakub Dawidek <pawel@dawidek.net>.
27 * All rights reserved
28 * Copyright (c) 2013 Steven Hartland. All rights reserved.
29 * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
30 */
31
32 #include <assert.h>
33 #include <ctype.h>
34 #include <errno.h>
35 #include <libintl.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <strings.h>
39 #include <unistd.h>
40 #include <stddef.h>
41 #include <fcntl.h>
42 #include <sys/mount.h>
43 #include <sys/mntent.h>
44 #include <sys/mnttab.h>
45 #include <sys/avl.h>
46 #include <sys/debug.h>
47 #include <sys/stat.h>
48 #include <stddef.h>
49 #include <pthread.h>
50 #include <umem.h>
51 #include <time.h>
52
53 #include <libzfs.h>
54 #include <libzfs_core.h>
55
56 #include "zfs_namecheck.h"
57 #include "zfs_prop.h"
58 #include "zfs_fletcher.h"
59 #include "libzfs_impl.h"
60 #include <zlib.h>
61 #include <sys/zio_checksum.h>
62 #include <sys/ddt.h>
63 #include <sys/socket.h>
64
65 /* in libzfs_dataset.c */
66 extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *);
67
68 static int zfs_receive_impl(libzfs_handle_t *, const char *, const char *,
69 recvflags_t *, int, const char *, nvlist_t *, avl_tree_t *, char **, int,
70 uint64_t *, const char *);
71 static int guid_to_name(libzfs_handle_t *, const char *,
72 uint64_t, boolean_t, char *);
73
74 static const zio_cksum_t zero_cksum = { { 0 } };
75
76 typedef struct dedup_arg {
77 int inputfd;
78 int outputfd;
79 libzfs_handle_t *dedup_hdl;
80 } dedup_arg_t;
81
82 typedef struct progress_arg {
83 zfs_handle_t *pa_zhp;
84 int pa_fd;
85 boolean_t pa_parsable;
86 } progress_arg_t;
87
88 typedef struct dataref {
89 uint64_t ref_guid;
90 uint64_t ref_object;
91 uint64_t ref_offset;
92 } dataref_t;
93
94 typedef struct dedup_entry {
95 struct dedup_entry *dde_next;
96 zio_cksum_t dde_chksum;
97 uint64_t dde_prop;
98 dataref_t dde_ref;
99 } dedup_entry_t;
100
101 #define MAX_DDT_PHYSMEM_PERCENT 20
102 #define SMALLEST_POSSIBLE_MAX_DDT_MB 128
103
104 typedef struct dedup_table {
105 dedup_entry_t **dedup_hash_array;
106 umem_cache_t *ddecache;
107 uint64_t max_ddt_size; /* max dedup table size in bytes */
108 uint64_t cur_ddt_size; /* current dedup table size in bytes */
109 uint64_t ddt_count;
110 int numhashbits;
111 boolean_t ddt_full;
112 } dedup_table_t;
113
114 static int
115 high_order_bit(uint64_t n)
116 {
117 int count;
118
119 for (count = 0; n != 0; count++)
120 n >>= 1;
121 return (count);
122 }
123
124 static size_t
125 ssread(void *buf, size_t len, FILE *stream)
126 {
127 size_t outlen;
128
129 if ((outlen = fread(buf, len, 1, stream)) == 0)
130 return (0);
131
132 return (outlen);
133 }
134
135 static void
136 ddt_hash_append(libzfs_handle_t *hdl, dedup_table_t *ddt, dedup_entry_t **ddepp,
137 zio_cksum_t *cs, uint64_t prop, dataref_t *dr)
138 {
139 dedup_entry_t *dde;
140
141 if (ddt->cur_ddt_size >= ddt->max_ddt_size) {
142 if (ddt->ddt_full == B_FALSE) {
143 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
144 "Dedup table full. Deduplication will continue "
145 "with existing table entries"));
146 ddt->ddt_full = B_TRUE;
147 }
148 return;
149 }
150
151 if ((dde = umem_cache_alloc(ddt->ddecache, UMEM_DEFAULT))
152 != NULL) {
153 assert(*ddepp == NULL);
154 dde->dde_next = NULL;
155 dde->dde_chksum = *cs;
156 dde->dde_prop = prop;
157 dde->dde_ref = *dr;
158 *ddepp = dde;
159 ddt->cur_ddt_size += sizeof (dedup_entry_t);
160 ddt->ddt_count++;
161 }
162 }
163
164 /*
165 * Using the specified dedup table, do a lookup for an entry with
166 * the checksum cs. If found, return the block's reference info
167 * in *dr. Otherwise, insert a new entry in the dedup table, using
168 * the reference information specified by *dr.
169 *
170 * return value: true - entry was found
171 * false - entry was not found
172 */
173 static boolean_t
174 ddt_update(libzfs_handle_t *hdl, dedup_table_t *ddt, zio_cksum_t *cs,
175 uint64_t prop, dataref_t *dr)
176 {
177 uint32_t hashcode;
178 dedup_entry_t **ddepp;
179
180 hashcode = BF64_GET(cs->zc_word[0], 0, ddt->numhashbits);
181
182 for (ddepp = &(ddt->dedup_hash_array[hashcode]); *ddepp != NULL;
183 ddepp = &((*ddepp)->dde_next)) {
184 if (ZIO_CHECKSUM_EQUAL(((*ddepp)->dde_chksum), *cs) &&
185 (*ddepp)->dde_prop == prop) {
186 *dr = (*ddepp)->dde_ref;
187 return (B_TRUE);
188 }
189 }
190 ddt_hash_append(hdl, ddt, ddepp, cs, prop, dr);
191 return (B_FALSE);
192 }
193
194 static int
195 dump_record(dmu_replay_record_t *drr, void *payload, int payload_len,
196 zio_cksum_t *zc, int outfd)
197 {
198 ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
199 ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
200 fletcher_4_incremental_native(drr,
201 offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), zc);
202 if (drr->drr_type != DRR_BEGIN) {
203 ASSERT(ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.
204 drr_checksum.drr_checksum));
205 drr->drr_u.drr_checksum.drr_checksum = *zc;
206 }
207 fletcher_4_incremental_native(&drr->drr_u.drr_checksum.drr_checksum,
208 sizeof (zio_cksum_t), zc);
209 if (write(outfd, drr, sizeof (*drr)) == -1)
210 return (errno);
211 if (payload_len != 0) {
212 fletcher_4_incremental_native(payload, payload_len, zc);
213 if (write(outfd, payload, payload_len) == -1)
214 return (errno);
215 }
216 return (0);
217 }
218
219 /*
220 * This function is started in a separate thread when the dedup option
221 * has been requested. The main send thread determines the list of
222 * snapshots to be included in the send stream and makes the ioctl calls
223 * for each one. But instead of having the ioctl send the output to the
224 * the output fd specified by the caller of zfs_send()), the
225 * ioctl is told to direct the output to a pipe, which is read by the
226 * alternate thread running THIS function. This function does the
227 * dedup'ing by:
228 * 1. building a dedup table (the DDT)
229 * 2. doing checksums on each data block and inserting a record in the DDT
230 * 3. looking for matching checksums, and
231 * 4. sending a DRR_WRITE_BYREF record instead of a write record whenever
232 * a duplicate block is found.
233 * The output of this function then goes to the output fd requested
234 * by the caller of zfs_send().
235 */
236 static void *
237 cksummer(void *arg)
238 {
239 dedup_arg_t *dda = arg;
240 char *buf = zfs_alloc(dda->dedup_hdl, SPA_MAXBLOCKSIZE);
241 dmu_replay_record_t thedrr = { 0 };
242 dmu_replay_record_t *drr = &thedrr;
243 FILE *ofp;
244 int outfd;
245 dedup_table_t ddt;
246 zio_cksum_t stream_cksum;
247 uint64_t physmem = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE);
248 uint64_t numbuckets;
249
250 ddt.max_ddt_size =
251 MAX((physmem * MAX_DDT_PHYSMEM_PERCENT) / 100,
252 SMALLEST_POSSIBLE_MAX_DDT_MB << 20);
253
254 numbuckets = ddt.max_ddt_size / (sizeof (dedup_entry_t));
255
256 /*
257 * numbuckets must be a power of 2. Increase number to
258 * a power of 2 if necessary.
259 */
260 if (!ISP2(numbuckets))
261 numbuckets = 1 << high_order_bit(numbuckets);
262
263 ddt.dedup_hash_array = calloc(numbuckets, sizeof (dedup_entry_t *));
264 ddt.ddecache = umem_cache_create("dde", sizeof (dedup_entry_t), 0,
265 NULL, NULL, NULL, NULL, NULL, 0);
266 ddt.cur_ddt_size = numbuckets * sizeof (dedup_entry_t *);
267 ddt.numhashbits = high_order_bit(numbuckets) - 1;
268 ddt.ddt_full = B_FALSE;
269
270 outfd = dda->outputfd;
271 ofp = fdopen(dda->inputfd, "r");
272 while (ssread(drr, sizeof (*drr), ofp) != 0) {
273
274 switch (drr->drr_type) {
275 case DRR_BEGIN:
276 {
277 struct drr_begin *drrb = &drr->drr_u.drr_begin;
278 int fflags;
279 int sz = 0;
280 ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
281
282 ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC);
283
284 /* set the DEDUP feature flag for this stream */
285 fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
286 fflags |= (DMU_BACKUP_FEATURE_DEDUP |
287 DMU_BACKUP_FEATURE_DEDUPPROPS);
288 DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags);
289
290 if (drr->drr_payloadlen != 0) {
291 sz = drr->drr_payloadlen;
292
293 if (sz > SPA_MAXBLOCKSIZE) {
294 buf = zfs_realloc(dda->dedup_hdl, buf,
295 SPA_MAXBLOCKSIZE, sz);
296 }
297 (void) ssread(buf, sz, ofp);
298 if (ferror(stdin))
299 perror("fread");
300 }
301 if (dump_record(drr, buf, sz, &stream_cksum,
302 outfd) != 0)
303 goto out;
304 break;
305 }
306
307 case DRR_END:
308 {
309 struct drr_end *drre = &drr->drr_u.drr_end;
310 /* use the recalculated checksum */
311 drre->drr_checksum = stream_cksum;
312 if (dump_record(drr, NULL, 0, &stream_cksum,
313 outfd) != 0)
314 goto out;
315 break;
316 }
317
318 case DRR_OBJECT:
319 {
320 struct drr_object *drro = &drr->drr_u.drr_object;
321 if (drro->drr_bonuslen > 0) {
322 (void) ssread(buf,
323 P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
324 ofp);
325 }
326 if (dump_record(drr, buf,
327 P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
328 &stream_cksum, outfd) != 0)
329 goto out;
330 break;
331 }
332
333 case DRR_SPILL:
334 {
335 struct drr_spill *drrs = &drr->drr_u.drr_spill;
336 (void) ssread(buf, drrs->drr_length, ofp);
337 if (dump_record(drr, buf, drrs->drr_length,
338 &stream_cksum, outfd) != 0)
339 goto out;
340 break;
341 }
342
343 case DRR_FREEOBJECTS:
344 {
345 if (dump_record(drr, NULL, 0, &stream_cksum,
346 outfd) != 0)
347 goto out;
348 break;
349 }
350
351 case DRR_WRITE:
352 {
353 struct drr_write *drrw = &drr->drr_u.drr_write;
354 dataref_t dataref;
355
356 (void) ssread(buf, drrw->drr_length, ofp);
357
358 /*
359 * Use the existing checksum if it's dedup-capable,
360 * else calculate a SHA256 checksum for it.
361 */
362
363 if (ZIO_CHECKSUM_EQUAL(drrw->drr_key.ddk_cksum,
364 zero_cksum) ||
365 !DRR_IS_DEDUP_CAPABLE(drrw->drr_checksumflags)) {
366 zio_cksum_t tmpsha256;
367
368 zio_checksum_SHA256(buf,
369 drrw->drr_length, &tmpsha256);
370
371 drrw->drr_key.ddk_cksum.zc_word[0] =
372 BE_64(tmpsha256.zc_word[0]);
373 drrw->drr_key.ddk_cksum.zc_word[1] =
374 BE_64(tmpsha256.zc_word[1]);
375 drrw->drr_key.ddk_cksum.zc_word[2] =
376 BE_64(tmpsha256.zc_word[2]);
377 drrw->drr_key.ddk_cksum.zc_word[3] =
378 BE_64(tmpsha256.zc_word[3]);
379 drrw->drr_checksumtype = ZIO_CHECKSUM_SHA256;
380 drrw->drr_checksumflags = DRR_CHECKSUM_DEDUP;
381 }
382
383 dataref.ref_guid = drrw->drr_toguid;
384 dataref.ref_object = drrw->drr_object;
385 dataref.ref_offset = drrw->drr_offset;
386
387 if (ddt_update(dda->dedup_hdl, &ddt,
388 &drrw->drr_key.ddk_cksum, drrw->drr_key.ddk_prop,
389 &dataref)) {
390 dmu_replay_record_t wbr_drr = {0};
391 struct drr_write_byref *wbr_drrr =
392 &wbr_drr.drr_u.drr_write_byref;
393
394 /* block already present in stream */
395 wbr_drr.drr_type = DRR_WRITE_BYREF;
396
397 wbr_drrr->drr_object = drrw->drr_object;
398 wbr_drrr->drr_offset = drrw->drr_offset;
399 wbr_drrr->drr_length = drrw->drr_length;
400 wbr_drrr->drr_toguid = drrw->drr_toguid;
401 wbr_drrr->drr_refguid = dataref.ref_guid;
402 wbr_drrr->drr_refobject =
403 dataref.ref_object;
404 wbr_drrr->drr_refoffset =
405 dataref.ref_offset;
406
407 wbr_drrr->drr_checksumtype =
408 drrw->drr_checksumtype;
409 wbr_drrr->drr_checksumflags =
410 drrw->drr_checksumtype;
411 wbr_drrr->drr_key.ddk_cksum =
412 drrw->drr_key.ddk_cksum;
413 wbr_drrr->drr_key.ddk_prop =
414 drrw->drr_key.ddk_prop;
415
416 if (dump_record(&wbr_drr, NULL, 0,
417 &stream_cksum, outfd) != 0)
418 goto out;
419 } else {
420 /* block not previously seen */
421 if (dump_record(drr, buf, drrw->drr_length,
422 &stream_cksum, outfd) != 0)
423 goto out;
424 }
425 break;
426 }
427
428 case DRR_WRITE_EMBEDDED:
429 {
430 struct drr_write_embedded *drrwe =
431 &drr->drr_u.drr_write_embedded;
432 (void) ssread(buf,
433 P2ROUNDUP((uint64_t)drrwe->drr_psize, 8), ofp);
434 if (dump_record(drr, buf,
435 P2ROUNDUP((uint64_t)drrwe->drr_psize, 8),
436 &stream_cksum, outfd) != 0)
437 goto out;
438 break;
439 }
440
441 case DRR_FREE:
442 {
443 if (dump_record(drr, NULL, 0, &stream_cksum,
444 outfd) != 0)
445 goto out;
446 break;
447 }
448
449 default:
450 (void) fprintf(stderr, "INVALID record type 0x%x\n",
451 drr->drr_type);
452 /* should never happen, so assert */
453 assert(B_FALSE);
454 }
455 }
456 out:
457 umem_cache_destroy(ddt.ddecache);
458 free(ddt.dedup_hash_array);
459 free(buf);
460 (void) fclose(ofp);
461
462 return (NULL);
463 }
464
465 /*
466 * Routines for dealing with the AVL tree of fs-nvlists
467 */
468 typedef struct fsavl_node {
469 avl_node_t fn_node;
470 nvlist_t *fn_nvfs;
471 char *fn_snapname;
472 uint64_t fn_guid;
473 } fsavl_node_t;
474
475 static int
476 fsavl_compare(const void *arg1, const void *arg2)
477 {
478 const fsavl_node_t *fn1 = (const fsavl_node_t *)arg1;
479 const fsavl_node_t *fn2 = (const fsavl_node_t *)arg2;
480
481 return (AVL_CMP(fn1->fn_guid, fn2->fn_guid));
482 }
483
484 /*
485 * Given the GUID of a snapshot, find its containing filesystem and
486 * (optionally) name.
487 */
488 static nvlist_t *
489 fsavl_find(avl_tree_t *avl, uint64_t snapguid, char **snapname)
490 {
491 fsavl_node_t fn_find;
492 fsavl_node_t *fn;
493
494 fn_find.fn_guid = snapguid;
495
496 fn = avl_find(avl, &fn_find, NULL);
497 if (fn) {
498 if (snapname)
499 *snapname = fn->fn_snapname;
500 return (fn->fn_nvfs);
501 }
502 return (NULL);
503 }
504
505 static void
506 fsavl_destroy(avl_tree_t *avl)
507 {
508 fsavl_node_t *fn;
509 void *cookie;
510
511 if (avl == NULL)
512 return;
513
514 cookie = NULL;
515 while ((fn = avl_destroy_nodes(avl, &cookie)) != NULL)
516 free(fn);
517 avl_destroy(avl);
518 free(avl);
519 }
520
521 /*
522 * Given an nvlist, produce an avl tree of snapshots, ordered by guid
523 */
524 static avl_tree_t *
525 fsavl_create(nvlist_t *fss)
526 {
527 avl_tree_t *fsavl;
528 nvpair_t *fselem = NULL;
529
530 if ((fsavl = malloc(sizeof (avl_tree_t))) == NULL)
531 return (NULL);
532
533 avl_create(fsavl, fsavl_compare, sizeof (fsavl_node_t),
534 offsetof(fsavl_node_t, fn_node));
535
536 while ((fselem = nvlist_next_nvpair(fss, fselem)) != NULL) {
537 nvlist_t *nvfs, *snaps;
538 nvpair_t *snapelem = NULL;
539
540 VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
541 VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
542
543 while ((snapelem =
544 nvlist_next_nvpair(snaps, snapelem)) != NULL) {
545 fsavl_node_t *fn;
546 uint64_t guid;
547
548 VERIFY(0 == nvpair_value_uint64(snapelem, &guid));
549 if ((fn = malloc(sizeof (fsavl_node_t))) == NULL) {
550 fsavl_destroy(fsavl);
551 return (NULL);
552 }
553 fn->fn_nvfs = nvfs;
554 fn->fn_snapname = nvpair_name(snapelem);
555 fn->fn_guid = guid;
556
557 /*
558 * Note: if there are multiple snaps with the
559 * same GUID, we ignore all but one.
560 */
561 if (avl_find(fsavl, fn, NULL) == NULL)
562 avl_add(fsavl, fn);
563 else
564 free(fn);
565 }
566 }
567
568 return (fsavl);
569 }
570
571 /*
572 * Routines for dealing with the giant nvlist of fs-nvlists, etc.
573 */
574 typedef struct send_data {
575 uint64_t parent_fromsnap_guid;
576 nvlist_t *parent_snaps;
577 nvlist_t *fss;
578 nvlist_t *snapprops;
579 const char *fromsnap;
580 const char *tosnap;
581 boolean_t recursive;
582 boolean_t seenfrom;
583 boolean_t seento;
584
585 /*
586 * The header nvlist is of the following format:
587 * {
588 * "tosnap" -> string
589 * "fromsnap" -> string (if incremental)
590 * "fss" -> {
591 * id -> {
592 *
593 * "name" -> string (full name; for debugging)
594 * "parentfromsnap" -> number (guid of fromsnap in parent)
595 *
596 * "props" -> { name -> value (only if set here) }
597 * "snaps" -> { name (lastname) -> number (guid) }
598 * "snapprops" -> { name (lastname) -> { name -> value } }
599 *
600 * "origin" -> number (guid) (if clone)
601 * "sent" -> boolean (not on-disk)
602 * }
603 * }
604 * }
605 *
606 */
607 } send_data_t;
608
609 static void send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv);
610
611 static int
612 send_iterate_snap(zfs_handle_t *zhp, void *arg)
613 {
614 send_data_t *sd = arg;
615 uint64_t guid = zhp->zfs_dmustats.dds_guid;
616 char *snapname;
617 nvlist_t *nv;
618 boolean_t isfromsnap, istosnap, istosnapwithnofrom;
619
620 snapname = strrchr(zhp->zfs_name, '@')+1;
621 isfromsnap = (sd->fromsnap != NULL &&
622 strcmp(sd->fromsnap, snapname) == 0);
623 istosnap = (sd->tosnap != NULL && (strcmp(sd->tosnap, snapname) == 0));
624 istosnapwithnofrom = (istosnap && sd->fromsnap == NULL);
625
626 VERIFY(0 == nvlist_add_uint64(sd->parent_snaps, snapname, guid));
627 /*
628 * NB: if there is no fromsnap here (it's a newly created fs in
629 * an incremental replication), we will substitute the tosnap.
630 */
631 if (isfromsnap || (sd->parent_fromsnap_guid == 0 && istosnap)) {
632 sd->parent_fromsnap_guid = guid;
633 }
634
635 if (!sd->recursive) {
636 if (!sd->seenfrom && isfromsnap) {
637 sd->seenfrom = B_TRUE;
638 zfs_close(zhp);
639 return (0);
640 }
641
642 if ((sd->seento || !sd->seenfrom) && !istosnapwithnofrom) {
643 zfs_close(zhp);
644 return (0);
645 }
646
647 if (istosnap)
648 sd->seento = B_TRUE;
649 }
650
651 VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
652 send_iterate_prop(zhp, nv);
653 VERIFY(0 == nvlist_add_nvlist(sd->snapprops, snapname, nv));
654 nvlist_free(nv);
655
656 zfs_close(zhp);
657 return (0);
658 }
659
660 static void
661 send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv)
662 {
663 nvpair_t *elem = NULL;
664
665 while ((elem = nvlist_next_nvpair(zhp->zfs_props, elem)) != NULL) {
666 char *propname = nvpair_name(elem);
667 zfs_prop_t prop = zfs_name_to_prop(propname);
668 nvlist_t *propnv;
669
670 if (!zfs_prop_user(propname)) {
671 /*
672 * Realistically, this should never happen. However,
673 * we want the ability to add DSL properties without
674 * needing to make incompatible version changes. We
675 * need to ignore unknown properties to allow older
676 * software to still send datasets containing these
677 * properties, with the unknown properties elided.
678 */
679 if (prop == ZPROP_INVAL)
680 continue;
681
682 if (zfs_prop_readonly(prop))
683 continue;
684 }
685
686 verify(nvpair_value_nvlist(elem, &propnv) == 0);
687 if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION ||
688 prop == ZFS_PROP_REFQUOTA ||
689 prop == ZFS_PROP_REFRESERVATION) {
690 char *source;
691 uint64_t value;
692 verify(nvlist_lookup_uint64(propnv,
693 ZPROP_VALUE, &value) == 0);
694 if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT)
695 continue;
696 /*
697 * May have no source before SPA_VERSION_RECVD_PROPS,
698 * but is still modifiable.
699 */
700 if (nvlist_lookup_string(propnv,
701 ZPROP_SOURCE, &source) == 0) {
702 if ((strcmp(source, zhp->zfs_name) != 0) &&
703 (strcmp(source,
704 ZPROP_SOURCE_VAL_RECVD) != 0))
705 continue;
706 }
707 } else {
708 char *source;
709 if (nvlist_lookup_string(propnv,
710 ZPROP_SOURCE, &source) != 0)
711 continue;
712 if ((strcmp(source, zhp->zfs_name) != 0) &&
713 (strcmp(source, ZPROP_SOURCE_VAL_RECVD) != 0))
714 continue;
715 }
716
717 if (zfs_prop_user(propname) ||
718 zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
719 char *value;
720 verify(nvlist_lookup_string(propnv,
721 ZPROP_VALUE, &value) == 0);
722 VERIFY(0 == nvlist_add_string(nv, propname, value));
723 } else {
724 uint64_t value;
725 verify(nvlist_lookup_uint64(propnv,
726 ZPROP_VALUE, &value) == 0);
727 VERIFY(0 == nvlist_add_uint64(nv, propname, value));
728 }
729 }
730 }
731
732 /*
733 * recursively generate nvlists describing datasets. See comment
734 * for the data structure send_data_t above for description of contents
735 * of the nvlist.
736 */
737 static int
738 send_iterate_fs(zfs_handle_t *zhp, void *arg)
739 {
740 send_data_t *sd = arg;
741 nvlist_t *nvfs, *nv;
742 int rv = 0;
743 uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid;
744 uint64_t guid = zhp->zfs_dmustats.dds_guid;
745 char guidstring[64];
746
747 VERIFY(0 == nvlist_alloc(&nvfs, NV_UNIQUE_NAME, 0));
748 VERIFY(0 == nvlist_add_string(nvfs, "name", zhp->zfs_name));
749 VERIFY(0 == nvlist_add_uint64(nvfs, "parentfromsnap",
750 sd->parent_fromsnap_guid));
751
752 if (zhp->zfs_dmustats.dds_origin[0]) {
753 zfs_handle_t *origin = zfs_open(zhp->zfs_hdl,
754 zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT);
755 if (origin == NULL)
756 return (-1);
757 VERIFY(0 == nvlist_add_uint64(nvfs, "origin",
758 origin->zfs_dmustats.dds_guid));
759 }
760
761 /* iterate over props */
762 VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
763 send_iterate_prop(zhp, nv);
764 VERIFY(0 == nvlist_add_nvlist(nvfs, "props", nv));
765 nvlist_free(nv);
766
767 /* iterate over snaps, and set sd->parent_fromsnap_guid */
768 sd->parent_fromsnap_guid = 0;
769 VERIFY(0 == nvlist_alloc(&sd->parent_snaps, NV_UNIQUE_NAME, 0));
770 VERIFY(0 == nvlist_alloc(&sd->snapprops, NV_UNIQUE_NAME, 0));
771 (void) zfs_iter_snapshots_sorted(zhp, send_iterate_snap, sd);
772 VERIFY(0 == nvlist_add_nvlist(nvfs, "snaps", sd->parent_snaps));
773 VERIFY(0 == nvlist_add_nvlist(nvfs, "snapprops", sd->snapprops));
774 nvlist_free(sd->parent_snaps);
775 nvlist_free(sd->snapprops);
776
777 /* add this fs to nvlist */
778 (void) snprintf(guidstring, sizeof (guidstring),
779 "0x%llx", (longlong_t)guid);
780 VERIFY(0 == nvlist_add_nvlist(sd->fss, guidstring, nvfs));
781 nvlist_free(nvfs);
782
783 /* iterate over children */
784 if (sd->recursive)
785 rv = zfs_iter_filesystems(zhp, send_iterate_fs, sd);
786
787 sd->parent_fromsnap_guid = parent_fromsnap_guid_save;
788
789 zfs_close(zhp);
790 return (rv);
791 }
792
793 static int
794 gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap,
795 const char *tosnap, boolean_t recursive, nvlist_t **nvlp, avl_tree_t **avlp)
796 {
797 zfs_handle_t *zhp;
798 send_data_t sd = { 0 };
799 int error;
800
801 zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
802 if (zhp == NULL)
803 return (EZFS_BADTYPE);
804
805 VERIFY(0 == nvlist_alloc(&sd.fss, NV_UNIQUE_NAME, 0));
806 sd.fromsnap = fromsnap;
807 sd.tosnap = tosnap;
808 sd.recursive = recursive;
809
810 if ((error = send_iterate_fs(zhp, &sd)) != 0) {
811 nvlist_free(sd.fss);
812 if (avlp != NULL)
813 *avlp = NULL;
814 *nvlp = NULL;
815 return (error);
816 }
817
818 if (avlp != NULL && (*avlp = fsavl_create(sd.fss)) == NULL) {
819 nvlist_free(sd.fss);
820 *nvlp = NULL;
821 return (EZFS_NOMEM);
822 }
823
824 *nvlp = sd.fss;
825 return (0);
826 }
827
828 /*
829 * Routines specific to "zfs send"
830 */
831 typedef struct send_dump_data {
832 /* these are all just the short snapname (the part after the @) */
833 const char *fromsnap;
834 const char *tosnap;
835 char prevsnap[ZFS_MAX_DATASET_NAME_LEN];
836 uint64_t prevsnap_obj;
837 boolean_t seenfrom, seento, replicate, doall, fromorigin;
838 boolean_t verbose, dryrun, parsable, progress, embed_data, std_out;
839 boolean_t large_block;
840 int outfd;
841 boolean_t err;
842 nvlist_t *fss;
843 nvlist_t *snapholds;
844 avl_tree_t *fsavl;
845 snapfilter_cb_t *filter_cb;
846 void *filter_cb_arg;
847 nvlist_t *debugnv;
848 char holdtag[ZFS_MAX_DATASET_NAME_LEN];
849 int cleanup_fd;
850 uint64_t size;
851 } send_dump_data_t;
852
853 static int
854 estimate_ioctl(zfs_handle_t *zhp, uint64_t fromsnap_obj,
855 boolean_t fromorigin, uint64_t *sizep)
856 {
857 zfs_cmd_t zc = {"\0"};
858 libzfs_handle_t *hdl = zhp->zfs_hdl;
859
860 assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
861 assert(fromsnap_obj == 0 || !fromorigin);
862
863 (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
864 zc.zc_obj = fromorigin;
865 zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
866 zc.zc_fromobj = fromsnap_obj;
867 zc.zc_guid = 1; /* estimate flag */
868
869 if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) {
870 char errbuf[1024];
871 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
872 "warning: cannot estimate space for '%s'"), zhp->zfs_name);
873
874 switch (errno) {
875 case EXDEV:
876 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
877 "not an earlier snapshot from the same fs"));
878 return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
879
880 case ENOENT:
881 if (zfs_dataset_exists(hdl, zc.zc_name,
882 ZFS_TYPE_SNAPSHOT)) {
883 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
884 "incremental source (@%s) does not exist"),
885 zc.zc_value);
886 }
887 return (zfs_error(hdl, EZFS_NOENT, errbuf));
888
889 case EDQUOT:
890 case EFBIG:
891 case EIO:
892 case ENOLINK:
893 case ENOSPC:
894 case ENOSTR:
895 case ENXIO:
896 case EPIPE:
897 case ERANGE:
898 case EFAULT:
899 case EROFS:
900 zfs_error_aux(hdl, strerror(errno));
901 return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
902
903 default:
904 return (zfs_standard_error(hdl, errno, errbuf));
905 }
906 }
907
908 *sizep = zc.zc_objset_type;
909
910 return (0);
911 }
912
913 /*
914 * Dumps a backup of the given snapshot (incremental from fromsnap if it's not
915 * NULL) to the file descriptor specified by outfd.
916 */
917 static int
918 dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj,
919 boolean_t fromorigin, int outfd, enum lzc_send_flags flags,
920 nvlist_t *debugnv)
921 {
922 zfs_cmd_t zc = {"\0"};
923 libzfs_handle_t *hdl = zhp->zfs_hdl;
924 nvlist_t *thisdbg;
925
926 assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
927 assert(fromsnap_obj == 0 || !fromorigin);
928
929 (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
930 zc.zc_cookie = outfd;
931 zc.zc_obj = fromorigin;
932 zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
933 zc.zc_fromobj = fromsnap_obj;
934 zc.zc_flags = flags;
935
936 VERIFY(0 == nvlist_alloc(&thisdbg, NV_UNIQUE_NAME, 0));
937 if (fromsnap && fromsnap[0] != '\0') {
938 VERIFY(0 == nvlist_add_string(thisdbg,
939 "fromsnap", fromsnap));
940 }
941
942 if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) {
943 char errbuf[1024];
944 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
945 "warning: cannot send '%s'"), zhp->zfs_name);
946
947 VERIFY(0 == nvlist_add_uint64(thisdbg, "error", errno));
948 if (debugnv) {
949 VERIFY(0 == nvlist_add_nvlist(debugnv,
950 zhp->zfs_name, thisdbg));
951 }
952 nvlist_free(thisdbg);
953
954 switch (errno) {
955 case EXDEV:
956 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
957 "not an earlier snapshot from the same fs"));
958 return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
959
960 case ENOENT:
961 if (zfs_dataset_exists(hdl, zc.zc_name,
962 ZFS_TYPE_SNAPSHOT)) {
963 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
964 "incremental source (@%s) does not exist"),
965 zc.zc_value);
966 }
967 return (zfs_error(hdl, EZFS_NOENT, errbuf));
968
969 case EDQUOT:
970 case EFBIG:
971 case EIO:
972 case ENOLINK:
973 case ENOSPC:
974 case ENOSTR:
975 case ENXIO:
976 case EPIPE:
977 case ERANGE:
978 case EFAULT:
979 case EROFS:
980 zfs_error_aux(hdl, strerror(errno));
981 return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
982
983 default:
984 return (zfs_standard_error(hdl, errno, errbuf));
985 }
986 }
987
988 if (debugnv)
989 VERIFY(0 == nvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg));
990 nvlist_free(thisdbg);
991
992 return (0);
993 }
994
995 static void
996 gather_holds(zfs_handle_t *zhp, send_dump_data_t *sdd)
997 {
998 assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
999
1000 /*
1001 * zfs_send() only sets snapholds for sends that need them,
1002 * e.g. replication and doall.
1003 */
1004 if (sdd->snapholds == NULL)
1005 return;
1006
1007 fnvlist_add_string(sdd->snapholds, zhp->zfs_name, sdd->holdtag);
1008 }
1009
1010 static void *
1011 send_progress_thread(void *arg)
1012 {
1013 progress_arg_t *pa = arg;
1014 zfs_cmd_t zc = {"\0"};
1015 zfs_handle_t *zhp = pa->pa_zhp;
1016 libzfs_handle_t *hdl = zhp->zfs_hdl;
1017 unsigned long long bytes;
1018 char buf[16];
1019 time_t t;
1020 struct tm *tm;
1021
1022 (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
1023
1024 if (!pa->pa_parsable)
1025 (void) fprintf(stderr, "TIME SENT SNAPSHOT\n");
1026
1027 /*
1028 * Print the progress from ZFS_IOC_SEND_PROGRESS every second.
1029 */
1030 for (;;) {
1031 (void) sleep(1);
1032
1033 zc.zc_cookie = pa->pa_fd;
1034 if (zfs_ioctl(hdl, ZFS_IOC_SEND_PROGRESS, &zc) != 0)
1035 return ((void *)-1);
1036
1037 (void) time(&t);
1038 tm = localtime(&t);
1039 bytes = zc.zc_cookie;
1040
1041 if (pa->pa_parsable) {
1042 (void) fprintf(stderr, "%02d:%02d:%02d\t%llu\t%s\n",
1043 tm->tm_hour, tm->tm_min, tm->tm_sec,
1044 bytes, zhp->zfs_name);
1045 } else {
1046 zfs_nicenum(bytes, buf, sizeof (buf));
1047 (void) fprintf(stderr, "%02d:%02d:%02d %5s %s\n",
1048 tm->tm_hour, tm->tm_min, tm->tm_sec,
1049 buf, zhp->zfs_name);
1050 }
1051 }
1052 }
1053
1054 static void
1055 send_print_verbose(FILE *fout, const char *tosnap, const char *fromsnap,
1056 uint64_t size, boolean_t parsable)
1057 {
1058 if (parsable) {
1059 if (fromsnap != NULL) {
1060 (void) fprintf(fout, "incremental\t%s\t%s",
1061 fromsnap, tosnap);
1062 } else {
1063 (void) fprintf(fout, "full\t%s",
1064 tosnap);
1065 }
1066 } else {
1067 if (fromsnap != NULL) {
1068 if (strchr(fromsnap, '@') == NULL &&
1069 strchr(fromsnap, '#') == NULL) {
1070 (void) fprintf(fout, dgettext(TEXT_DOMAIN,
1071 "send from @%s to %s"),
1072 fromsnap, tosnap);
1073 } else {
1074 (void) fprintf(fout, dgettext(TEXT_DOMAIN,
1075 "send from %s to %s"),
1076 fromsnap, tosnap);
1077 }
1078 } else {
1079 (void) fprintf(fout, dgettext(TEXT_DOMAIN,
1080 "full send of %s"),
1081 tosnap);
1082 }
1083 }
1084
1085 if (size != 0) {
1086 if (parsable) {
1087 (void) fprintf(fout, "\t%llu",
1088 (longlong_t)size);
1089 } else {
1090 char buf[16];
1091 zfs_nicenum(size, buf, sizeof (buf));
1092 (void) fprintf(fout, dgettext(TEXT_DOMAIN,
1093 " estimated size is %s"), buf);
1094 }
1095 }
1096 (void) fprintf(fout, "\n");
1097 }
1098
1099 static int
1100 dump_snapshot(zfs_handle_t *zhp, void *arg)
1101 {
1102 send_dump_data_t *sdd = arg;
1103 progress_arg_t pa = { 0 };
1104 pthread_t tid;
1105 char *thissnap;
1106 int err;
1107 boolean_t isfromsnap, istosnap, fromorigin;
1108 boolean_t exclude = B_FALSE;
1109 FILE *fout = sdd->std_out ? stdout : stderr;
1110
1111 err = 0;
1112 thissnap = strchr(zhp->zfs_name, '@') + 1;
1113 isfromsnap = (sdd->fromsnap != NULL &&
1114 strcmp(sdd->fromsnap, thissnap) == 0);
1115
1116 if (!sdd->seenfrom && isfromsnap) {
1117 gather_holds(zhp, sdd);
1118 sdd->seenfrom = B_TRUE;
1119 (void) strcpy(sdd->prevsnap, thissnap);
1120 sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1121 zfs_close(zhp);
1122 return (0);
1123 }
1124
1125 if (sdd->seento || !sdd->seenfrom) {
1126 zfs_close(zhp);
1127 return (0);
1128 }
1129
1130 istosnap = (strcmp(sdd->tosnap, thissnap) == 0);
1131 if (istosnap)
1132 sdd->seento = B_TRUE;
1133
1134 if (!sdd->doall && !isfromsnap && !istosnap) {
1135 if (sdd->replicate) {
1136 char *snapname;
1137 nvlist_t *snapprops;
1138 /*
1139 * Filter out all intermediate snapshots except origin
1140 * snapshots needed to replicate clones.
1141 */
1142 nvlist_t *nvfs = fsavl_find(sdd->fsavl,
1143 zhp->zfs_dmustats.dds_guid, &snapname);
1144
1145 VERIFY(0 == nvlist_lookup_nvlist(nvfs,
1146 "snapprops", &snapprops));
1147 VERIFY(0 == nvlist_lookup_nvlist(snapprops,
1148 thissnap, &snapprops));
1149 exclude = !nvlist_exists(snapprops, "is_clone_origin");
1150 } else {
1151 exclude = B_TRUE;
1152 }
1153 }
1154
1155 /*
1156 * If a filter function exists, call it to determine whether
1157 * this snapshot will be sent.
1158 */
1159 if (exclude || (sdd->filter_cb != NULL &&
1160 sdd->filter_cb(zhp, sdd->filter_cb_arg) == B_FALSE)) {
1161 /*
1162 * This snapshot is filtered out. Don't send it, and don't
1163 * set prevsnap_obj, so it will be as if this snapshot didn't
1164 * exist, and the next accepted snapshot will be sent as
1165 * an incremental from the last accepted one, or as the
1166 * first (and full) snapshot in the case of a replication,
1167 * non-incremental send.
1168 */
1169 zfs_close(zhp);
1170 return (0);
1171 }
1172
1173 gather_holds(zhp, sdd);
1174 fromorigin = sdd->prevsnap[0] == '\0' &&
1175 (sdd->fromorigin || sdd->replicate);
1176
1177 if (sdd->verbose) {
1178 uint64_t size = 0;
1179 (void) estimate_ioctl(zhp, sdd->prevsnap_obj,
1180 fromorigin, &size);
1181
1182 send_print_verbose(fout, zhp->zfs_name,
1183 sdd->prevsnap[0] ? sdd->prevsnap : NULL,
1184 size, sdd->parsable);
1185 sdd->size += size;
1186 }
1187
1188 if (!sdd->dryrun) {
1189 /*
1190 * If progress reporting is requested, spawn a new thread to
1191 * poll ZFS_IOC_SEND_PROGRESS at a regular interval.
1192 */
1193 if (sdd->progress) {
1194 pa.pa_zhp = zhp;
1195 pa.pa_fd = sdd->outfd;
1196 pa.pa_parsable = sdd->parsable;
1197
1198 if ((err = pthread_create(&tid, NULL,
1199 send_progress_thread, &pa))) {
1200 zfs_close(zhp);
1201 return (err);
1202 }
1203 }
1204
1205 enum lzc_send_flags flags = 0;
1206 if (sdd->large_block)
1207 flags |= LZC_SEND_FLAG_LARGE_BLOCK;
1208 if (sdd->embed_data)
1209 flags |= LZC_SEND_FLAG_EMBED_DATA;
1210
1211 err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj,
1212 fromorigin, sdd->outfd, flags, sdd->debugnv);
1213
1214 if (sdd->progress) {
1215 (void) pthread_cancel(tid);
1216 (void) pthread_join(tid, NULL);
1217 }
1218 }
1219
1220 (void) strcpy(sdd->prevsnap, thissnap);
1221 sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1222 zfs_close(zhp);
1223 return (err);
1224 }
1225
1226 static int
1227 dump_filesystem(zfs_handle_t *zhp, void *arg)
1228 {
1229 int rv = 0;
1230 send_dump_data_t *sdd = arg;
1231 boolean_t missingfrom = B_FALSE;
1232 zfs_cmd_t zc = {"\0"};
1233
1234 (void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
1235 zhp->zfs_name, sdd->tosnap);
1236 if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) {
1237 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1238 "WARNING: could not send %s@%s: does not exist\n"),
1239 zhp->zfs_name, sdd->tosnap);
1240 sdd->err = B_TRUE;
1241 return (0);
1242 }
1243
1244 if (sdd->replicate && sdd->fromsnap) {
1245 /*
1246 * If this fs does not have fromsnap, and we're doing
1247 * recursive, we need to send a full stream from the
1248 * beginning (or an incremental from the origin if this
1249 * is a clone). If we're doing non-recursive, then let
1250 * them get the error.
1251 */
1252 (void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
1253 zhp->zfs_name, sdd->fromsnap);
1254 if (ioctl(zhp->zfs_hdl->libzfs_fd,
1255 ZFS_IOC_OBJSET_STATS, &zc) != 0) {
1256 missingfrom = B_TRUE;
1257 }
1258 }
1259
1260 sdd->seenfrom = sdd->seento = sdd->prevsnap[0] = 0;
1261 sdd->prevsnap_obj = 0;
1262 if (sdd->fromsnap == NULL || missingfrom)
1263 sdd->seenfrom = B_TRUE;
1264
1265 rv = zfs_iter_snapshots_sorted(zhp, dump_snapshot, arg);
1266 if (!sdd->seenfrom) {
1267 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1268 "WARNING: could not send %s@%s:\n"
1269 "incremental source (%s@%s) does not exist\n"),
1270 zhp->zfs_name, sdd->tosnap,
1271 zhp->zfs_name, sdd->fromsnap);
1272 sdd->err = B_TRUE;
1273 } else if (!sdd->seento) {
1274 if (sdd->fromsnap) {
1275 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1276 "WARNING: could not send %s@%s:\n"
1277 "incremental source (%s@%s) "
1278 "is not earlier than it\n"),
1279 zhp->zfs_name, sdd->tosnap,
1280 zhp->zfs_name, sdd->fromsnap);
1281 } else {
1282 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1283 "WARNING: "
1284 "could not send %s@%s: does not exist\n"),
1285 zhp->zfs_name, sdd->tosnap);
1286 }
1287 sdd->err = B_TRUE;
1288 }
1289
1290 return (rv);
1291 }
1292
1293 static int
1294 dump_filesystems(zfs_handle_t *rzhp, void *arg)
1295 {
1296 send_dump_data_t *sdd = arg;
1297 nvpair_t *fspair;
1298 boolean_t needagain, progress;
1299
1300 if (!sdd->replicate)
1301 return (dump_filesystem(rzhp, sdd));
1302
1303 /* Mark the clone origin snapshots. */
1304 for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1305 fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1306 nvlist_t *nvfs;
1307 uint64_t origin_guid = 0;
1308
1309 VERIFY(0 == nvpair_value_nvlist(fspair, &nvfs));
1310 (void) nvlist_lookup_uint64(nvfs, "origin", &origin_guid);
1311 if (origin_guid != 0) {
1312 char *snapname;
1313 nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
1314 origin_guid, &snapname);
1315 if (origin_nv != NULL) {
1316 nvlist_t *snapprops;
1317 VERIFY(0 == nvlist_lookup_nvlist(origin_nv,
1318 "snapprops", &snapprops));
1319 VERIFY(0 == nvlist_lookup_nvlist(snapprops,
1320 snapname, &snapprops));
1321 VERIFY(0 == nvlist_add_boolean(
1322 snapprops, "is_clone_origin"));
1323 }
1324 }
1325 }
1326 again:
1327 needagain = progress = B_FALSE;
1328 for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1329 fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1330 nvlist_t *fslist, *parent_nv;
1331 char *fsname;
1332 zfs_handle_t *zhp;
1333 int err;
1334 uint64_t origin_guid = 0;
1335 uint64_t parent_guid = 0;
1336
1337 VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
1338 if (nvlist_lookup_boolean(fslist, "sent") == 0)
1339 continue;
1340
1341 VERIFY(nvlist_lookup_string(fslist, "name", &fsname) == 0);
1342 (void) nvlist_lookup_uint64(fslist, "origin", &origin_guid);
1343 (void) nvlist_lookup_uint64(fslist, "parentfromsnap",
1344 &parent_guid);
1345
1346 if (parent_guid != 0) {
1347 parent_nv = fsavl_find(sdd->fsavl, parent_guid, NULL);
1348 if (!nvlist_exists(parent_nv, "sent")) {
1349 /* parent has not been sent; skip this one */
1350 needagain = B_TRUE;
1351 continue;
1352 }
1353 }
1354
1355 if (origin_guid != 0) {
1356 nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
1357 origin_guid, NULL);
1358 if (origin_nv != NULL &&
1359 !nvlist_exists(origin_nv, "sent")) {
1360 /*
1361 * origin has not been sent yet;
1362 * skip this clone.
1363 */
1364 needagain = B_TRUE;
1365 continue;
1366 }
1367 }
1368
1369 zhp = zfs_open(rzhp->zfs_hdl, fsname, ZFS_TYPE_DATASET);
1370 if (zhp == NULL)
1371 return (-1);
1372 err = dump_filesystem(zhp, sdd);
1373 VERIFY(nvlist_add_boolean(fslist, "sent") == 0);
1374 progress = B_TRUE;
1375 zfs_close(zhp);
1376 if (err)
1377 return (err);
1378 }
1379 if (needagain) {
1380 assert(progress);
1381 goto again;
1382 }
1383
1384 /* clean out the sent flags in case we reuse this fss */
1385 for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1386 fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1387 nvlist_t *fslist;
1388
1389 VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
1390 (void) nvlist_remove_all(fslist, "sent");
1391 }
1392
1393 return (0);
1394 }
1395
1396 nvlist_t *
1397 zfs_send_resume_token_to_nvlist(libzfs_handle_t *hdl, const char *token)
1398 {
1399 unsigned int version;
1400 int nread, i;
1401 unsigned long long checksum, packed_len;
1402
1403 /*
1404 * Decode token header, which is:
1405 * <token version>-<checksum of payload>-<uncompressed payload length>
1406 * Note that the only supported token version is 1.
1407 */
1408 nread = sscanf(token, "%u-%llx-%llx-",
1409 &version, &checksum, &packed_len);
1410 if (nread != 3) {
1411 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1412 "resume token is corrupt (invalid format)"));
1413 return (NULL);
1414 }
1415
1416 if (version != ZFS_SEND_RESUME_TOKEN_VERSION) {
1417 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1418 "resume token is corrupt (invalid version %u)"),
1419 version);
1420 return (NULL);
1421 }
1422
1423 /* convert hexadecimal representation to binary */
1424 token = strrchr(token, '-') + 1;
1425 int len = strlen(token) / 2;
1426 unsigned char *compressed = zfs_alloc(hdl, len);
1427 for (i = 0; i < len; i++) {
1428 nread = sscanf(token + i * 2, "%2hhx", compressed + i);
1429 if (nread != 1) {
1430 free(compressed);
1431 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1432 "resume token is corrupt "
1433 "(payload is not hex-encoded)"));
1434 return (NULL);
1435 }
1436 }
1437
1438 /* verify checksum */
1439 zio_cksum_t cksum;
1440 fletcher_4_native_varsize(compressed, len, &cksum);
1441 if (cksum.zc_word[0] != checksum) {
1442 free(compressed);
1443 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1444 "resume token is corrupt (incorrect checksum)"));
1445 return (NULL);
1446 }
1447
1448 /* uncompress */
1449 void *packed = zfs_alloc(hdl, packed_len);
1450 uLongf packed_len_long = packed_len;
1451 if (uncompress(packed, &packed_len_long, compressed, len) != Z_OK ||
1452 packed_len_long != packed_len) {
1453 free(packed);
1454 free(compressed);
1455 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1456 "resume token is corrupt (decompression failed)"));
1457 return (NULL);
1458 }
1459
1460 /* unpack nvlist */
1461 nvlist_t *nv;
1462 int error = nvlist_unpack(packed, packed_len, &nv, KM_SLEEP);
1463 free(packed);
1464 free(compressed);
1465 if (error != 0) {
1466 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1467 "resume token is corrupt (nvlist_unpack failed)"));
1468 return (NULL);
1469 }
1470 return (nv);
1471 }
1472
1473 int
1474 zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd,
1475 const char *resume_token)
1476 {
1477 char errbuf[1024];
1478 char *toname;
1479 char *fromname = NULL;
1480 uint64_t resumeobj, resumeoff, toguid, fromguid, bytes;
1481 zfs_handle_t *zhp;
1482 int error = 0;
1483 char name[ZFS_MAX_DATASET_NAME_LEN];
1484 enum lzc_send_flags lzc_flags = 0;
1485
1486 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1487 "cannot resume send"));
1488
1489 nvlist_t *resume_nvl =
1490 zfs_send_resume_token_to_nvlist(hdl, resume_token);
1491 if (resume_nvl == NULL) {
1492 /*
1493 * zfs_error_aux has already been set by
1494 * zfs_send_resume_token_to_nvlist
1495 */
1496 return (zfs_error(hdl, EZFS_FAULT, errbuf));
1497 }
1498 if (flags->verbose) {
1499 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1500 "resume token contents:\n"));
1501 nvlist_print(stderr, resume_nvl);
1502 }
1503
1504 if (nvlist_lookup_string(resume_nvl, "toname", &toname) != 0 ||
1505 nvlist_lookup_uint64(resume_nvl, "object", &resumeobj) != 0 ||
1506 nvlist_lookup_uint64(resume_nvl, "offset", &resumeoff) != 0 ||
1507 nvlist_lookup_uint64(resume_nvl, "bytes", &bytes) != 0 ||
1508 nvlist_lookup_uint64(resume_nvl, "toguid", &toguid) != 0) {
1509 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1510 "resume token is corrupt"));
1511 return (zfs_error(hdl, EZFS_FAULT, errbuf));
1512 }
1513 fromguid = 0;
1514 (void) nvlist_lookup_uint64(resume_nvl, "fromguid", &fromguid);
1515
1516 if (flags->embed_data || nvlist_exists(resume_nvl, "embedok"))
1517 lzc_flags |= LZC_SEND_FLAG_EMBED_DATA;
1518
1519 if (guid_to_name(hdl, toname, toguid, B_FALSE, name) != 0) {
1520 if (zfs_dataset_exists(hdl, toname, ZFS_TYPE_DATASET)) {
1521 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1522 "'%s' is no longer the same snapshot used in "
1523 "the initial send"), toname);
1524 } else {
1525 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1526 "'%s' used in the initial send no longer exists"),
1527 toname);
1528 }
1529 return (zfs_error(hdl, EZFS_BADPATH, errbuf));
1530 }
1531 zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
1532 if (zhp == NULL) {
1533 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1534 "unable to access '%s'"), name);
1535 return (zfs_error(hdl, EZFS_BADPATH, errbuf));
1536 }
1537
1538 if (fromguid != 0) {
1539 if (guid_to_name(hdl, toname, fromguid, B_TRUE, name) != 0) {
1540 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1541 "incremental source %#llx no longer exists"),
1542 (longlong_t)fromguid);
1543 return (zfs_error(hdl, EZFS_BADPATH, errbuf));
1544 }
1545 fromname = name;
1546 }
1547
1548 if (flags->verbose) {
1549 uint64_t size = 0;
1550 error = lzc_send_space(zhp->zfs_name, fromname, &size);
1551 if (error == 0)
1552 size = MAX(0, (int64_t)(size - bytes));
1553 send_print_verbose(stderr, zhp->zfs_name, fromname,
1554 size, flags->parsable);
1555 }
1556
1557 if (!flags->dryrun) {
1558 progress_arg_t pa = { 0 };
1559 pthread_t tid;
1560 /*
1561 * If progress reporting is requested, spawn a new thread to
1562 * poll ZFS_IOC_SEND_PROGRESS at a regular interval.
1563 */
1564 if (flags->progress) {
1565 pa.pa_zhp = zhp;
1566 pa.pa_fd = outfd;
1567 pa.pa_parsable = flags->parsable;
1568
1569 error = pthread_create(&tid, NULL,
1570 send_progress_thread, &pa);
1571 if (error != 0) {
1572 zfs_close(zhp);
1573 return (error);
1574 }
1575 }
1576
1577 error = lzc_send_resume(zhp->zfs_name, fromname, outfd,
1578 lzc_flags, resumeobj, resumeoff);
1579
1580 if (flags->progress) {
1581 (void) pthread_cancel(tid);
1582 (void) pthread_join(tid, NULL);
1583 }
1584
1585 char errbuf[1024];
1586 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1587 "warning: cannot send '%s'"), zhp->zfs_name);
1588
1589 zfs_close(zhp);
1590
1591 switch (error) {
1592 case 0:
1593 return (0);
1594 case EXDEV:
1595 case ENOENT:
1596 case EDQUOT:
1597 case EFBIG:
1598 case EIO:
1599 case ENOLINK:
1600 case ENOSPC:
1601 case ENOSTR:
1602 case ENXIO:
1603 case EPIPE:
1604 case ERANGE:
1605 case EFAULT:
1606 case EROFS:
1607 zfs_error_aux(hdl, strerror(errno));
1608 return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
1609
1610 default:
1611 return (zfs_standard_error(hdl, errno, errbuf));
1612 }
1613 }
1614
1615
1616 zfs_close(zhp);
1617
1618 return (error);
1619 }
1620
1621 /*
1622 * Generate a send stream for the dataset identified by the argument zhp.
1623 *
1624 * The content of the send stream is the snapshot identified by
1625 * 'tosnap'. Incremental streams are requested in two ways:
1626 * - from the snapshot identified by "fromsnap" (if non-null) or
1627 * - from the origin of the dataset identified by zhp, which must
1628 * be a clone. In this case, "fromsnap" is null and "fromorigin"
1629 * is TRUE.
1630 *
1631 * The send stream is recursive (i.e. dumps a hierarchy of snapshots) and
1632 * uses a special header (with a hdrtype field of DMU_COMPOUNDSTREAM)
1633 * if "replicate" is set. If "doall" is set, dump all the intermediate
1634 * snapshots. The DMU_COMPOUNDSTREAM header is used in the "doall"
1635 * case too. If "props" is set, send properties.
1636 */
1637 int
1638 zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
1639 sendflags_t *flags, int outfd, snapfilter_cb_t filter_func,
1640 void *cb_arg, nvlist_t **debugnvp)
1641 {
1642 char errbuf[1024];
1643 send_dump_data_t sdd = { 0 };
1644 int err = 0;
1645 nvlist_t *fss = NULL;
1646 avl_tree_t *fsavl = NULL;
1647 static uint64_t holdseq;
1648 int spa_version;
1649 pthread_t tid = 0;
1650 int pipefd[2];
1651 dedup_arg_t dda = { 0 };
1652 int featureflags = 0;
1653 FILE *fout;
1654
1655 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1656 "cannot send '%s'"), zhp->zfs_name);
1657
1658 if (fromsnap && fromsnap[0] == '\0') {
1659 zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
1660 "zero-length incremental source"));
1661 return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
1662 }
1663
1664 if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM) {
1665 uint64_t version;
1666 version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
1667 if (version >= ZPL_VERSION_SA) {
1668 featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
1669 }
1670 }
1671
1672 if (flags->dedup && !flags->dryrun) {
1673 featureflags |= (DMU_BACKUP_FEATURE_DEDUP |
1674 DMU_BACKUP_FEATURE_DEDUPPROPS);
1675 if ((err = socketpair(AF_UNIX, SOCK_STREAM, 0, pipefd))) {
1676 zfs_error_aux(zhp->zfs_hdl, strerror(errno));
1677 return (zfs_error(zhp->zfs_hdl, EZFS_PIPEFAILED,
1678 errbuf));
1679 }
1680 dda.outputfd = outfd;
1681 dda.inputfd = pipefd[1];
1682 dda.dedup_hdl = zhp->zfs_hdl;
1683 if ((err = pthread_create(&tid, NULL, cksummer, &dda))) {
1684 (void) close(pipefd[0]);
1685 (void) close(pipefd[1]);
1686 zfs_error_aux(zhp->zfs_hdl, strerror(errno));
1687 return (zfs_error(zhp->zfs_hdl,
1688 EZFS_THREADCREATEFAILED, errbuf));
1689 }
1690 }
1691
1692 if (flags->replicate || flags->doall || flags->props) {
1693 dmu_replay_record_t drr = { 0 };
1694 char *packbuf = NULL;
1695 size_t buflen = 0;
1696 zio_cksum_t zc;
1697
1698 ZIO_SET_CHECKSUM(&zc, 0, 0, 0, 0);
1699
1700 if (flags->replicate || flags->props) {
1701 nvlist_t *hdrnv;
1702
1703 VERIFY(0 == nvlist_alloc(&hdrnv, NV_UNIQUE_NAME, 0));
1704 if (fromsnap) {
1705 VERIFY(0 == nvlist_add_string(hdrnv,
1706 "fromsnap", fromsnap));
1707 }
1708 VERIFY(0 == nvlist_add_string(hdrnv, "tosnap", tosnap));
1709 if (!flags->replicate) {
1710 VERIFY(0 == nvlist_add_boolean(hdrnv,
1711 "not_recursive"));
1712 }
1713
1714 err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name,
1715 fromsnap, tosnap, flags->replicate, &fss, &fsavl);
1716 if (err)
1717 goto err_out;
1718 VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss));
1719 err = nvlist_pack(hdrnv, &packbuf, &buflen,
1720 NV_ENCODE_XDR, 0);
1721 if (debugnvp)
1722 *debugnvp = hdrnv;
1723 else
1724 nvlist_free(hdrnv);
1725 if (err)
1726 goto stderr_out;
1727 }
1728
1729 if (!flags->dryrun) {
1730 /* write first begin record */
1731 drr.drr_type = DRR_BEGIN;
1732 drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
1733 DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin.
1734 drr_versioninfo, DMU_COMPOUNDSTREAM);
1735 DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin.
1736 drr_versioninfo, featureflags);
1737 (void) snprintf(drr.drr_u.drr_begin.drr_toname,
1738 sizeof (drr.drr_u.drr_begin.drr_toname),
1739 "%s@%s", zhp->zfs_name, tosnap);
1740 drr.drr_payloadlen = buflen;
1741
1742 err = dump_record(&drr, packbuf, buflen, &zc, outfd);
1743 free(packbuf);
1744 if (err != 0)
1745 goto stderr_out;
1746
1747 /* write end record */
1748 bzero(&drr, sizeof (drr));
1749 drr.drr_type = DRR_END;
1750 drr.drr_u.drr_end.drr_checksum = zc;
1751 err = write(outfd, &drr, sizeof (drr));
1752 if (err == -1) {
1753 err = errno;
1754 goto stderr_out;
1755 }
1756
1757 err = 0;
1758 }
1759 }
1760
1761 /* dump each stream */
1762 sdd.fromsnap = fromsnap;
1763 sdd.tosnap = tosnap;
1764 if (tid != 0)
1765 sdd.outfd = pipefd[0];
1766 else
1767 sdd.outfd = outfd;
1768 sdd.replicate = flags->replicate;
1769 sdd.doall = flags->doall;
1770 sdd.fromorigin = flags->fromorigin;
1771 sdd.fss = fss;
1772 sdd.fsavl = fsavl;
1773 sdd.verbose = flags->verbose;
1774 sdd.parsable = flags->parsable;
1775 sdd.progress = flags->progress;
1776 sdd.dryrun = flags->dryrun;
1777 sdd.large_block = flags->largeblock;
1778 sdd.embed_data = flags->embed_data;
1779 sdd.filter_cb = filter_func;
1780 sdd.filter_cb_arg = cb_arg;
1781 if (debugnvp)
1782 sdd.debugnv = *debugnvp;
1783 if (sdd.verbose && sdd.dryrun)
1784 sdd.std_out = B_TRUE;
1785 fout = sdd.std_out ? stdout : stderr;
1786
1787 /*
1788 * Some flags require that we place user holds on the datasets that are
1789 * being sent so they don't get destroyed during the send. We can skip
1790 * this step if the pool is imported read-only since the datasets cannot
1791 * be destroyed.
1792 */
1793 if (!flags->dryrun && !zpool_get_prop_int(zfs_get_pool_handle(zhp),
1794 ZPOOL_PROP_READONLY, NULL) &&
1795 zfs_spa_version(zhp, &spa_version) == 0 &&
1796 spa_version >= SPA_VERSION_USERREFS &&
1797 (flags->doall || flags->replicate)) {
1798 ++holdseq;
1799 (void) snprintf(sdd.holdtag, sizeof (sdd.holdtag),
1800 ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
1801 sdd.cleanup_fd = open(ZFS_DEV, O_RDWR);
1802 if (sdd.cleanup_fd < 0) {
1803 err = errno;
1804 goto stderr_out;
1805 }
1806 sdd.snapholds = fnvlist_alloc();
1807 } else {
1808 sdd.cleanup_fd = -1;
1809 sdd.snapholds = NULL;
1810 }
1811 if (flags->verbose || sdd.snapholds != NULL) {
1812 /*
1813 * Do a verbose no-op dry run to get all the verbose output
1814 * or to gather snapshot hold's before generating any data,
1815 * then do a non-verbose real run to generate the streams.
1816 */
1817 sdd.dryrun = B_TRUE;
1818 err = dump_filesystems(zhp, &sdd);
1819
1820 if (err != 0)
1821 goto stderr_out;
1822
1823 if (flags->verbose) {
1824 if (flags->parsable) {
1825 (void) fprintf(fout, "size\t%llu\n",
1826 (longlong_t)sdd.size);
1827 } else {
1828 char buf[16];
1829 zfs_nicenum(sdd.size, buf, sizeof (buf));
1830 (void) fprintf(fout, dgettext(TEXT_DOMAIN,
1831 "total estimated size is %s\n"), buf);
1832 }
1833 }
1834
1835 /* Ensure no snaps found is treated as an error. */
1836 if (!sdd.seento) {
1837 err = ENOENT;
1838 goto err_out;
1839 }
1840
1841 /* Skip the second run if dryrun was requested. */
1842 if (flags->dryrun)
1843 goto err_out;
1844
1845 if (sdd.snapholds != NULL) {
1846 err = zfs_hold_nvl(zhp, sdd.cleanup_fd, sdd.snapholds);
1847 if (err != 0)
1848 goto stderr_out;
1849
1850 fnvlist_free(sdd.snapholds);
1851 sdd.snapholds = NULL;
1852 }
1853
1854 sdd.dryrun = B_FALSE;
1855 sdd.verbose = B_FALSE;
1856 }
1857
1858 err = dump_filesystems(zhp, &sdd);
1859 fsavl_destroy(fsavl);
1860 nvlist_free(fss);
1861
1862 /* Ensure no snaps found is treated as an error. */
1863 if (err == 0 && !sdd.seento)
1864 err = ENOENT;
1865
1866 if (tid != 0) {
1867 if (err != 0)
1868 (void) pthread_cancel(tid);
1869 (void) close(pipefd[0]);
1870 (void) pthread_join(tid, NULL);
1871 }
1872
1873 if (sdd.cleanup_fd != -1) {
1874 VERIFY(0 == close(sdd.cleanup_fd));
1875 sdd.cleanup_fd = -1;
1876 }
1877
1878 if (!flags->dryrun && (flags->replicate || flags->doall ||
1879 flags->props)) {
1880 /*
1881 * write final end record. NB: want to do this even if
1882 * there was some error, because it might not be totally
1883 * failed.
1884 */
1885 dmu_replay_record_t drr = { 0 };
1886 drr.drr_type = DRR_END;
1887 if (write(outfd, &drr, sizeof (drr)) == -1) {
1888 return (zfs_standard_error(zhp->zfs_hdl,
1889 errno, errbuf));
1890 }
1891 }
1892
1893 return (err || sdd.err);
1894
1895 stderr_out:
1896 err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
1897 err_out:
1898 fsavl_destroy(fsavl);
1899 nvlist_free(fss);
1900 fnvlist_free(sdd.snapholds);
1901
1902 if (sdd.cleanup_fd != -1)
1903 VERIFY(0 == close(sdd.cleanup_fd));
1904 if (tid != 0) {
1905 (void) pthread_cancel(tid);
1906 (void) close(pipefd[0]);
1907 (void) pthread_join(tid, NULL);
1908 }
1909 return (err);
1910 }
1911
1912 int
1913 zfs_send_one(zfs_handle_t *zhp, const char *from, int fd,
1914 enum lzc_send_flags flags)
1915 {
1916 int err;
1917 libzfs_handle_t *hdl = zhp->zfs_hdl;
1918
1919 char errbuf[1024];
1920 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1921 "warning: cannot send '%s'"), zhp->zfs_name);
1922
1923 err = lzc_send(zhp->zfs_name, from, fd, flags);
1924 if (err != 0) {
1925 switch (errno) {
1926 case EXDEV:
1927 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1928 "not an earlier snapshot from the same fs"));
1929 return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
1930
1931 case ENOENT:
1932 case ESRCH:
1933 if (lzc_exists(zhp->zfs_name)) {
1934 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1935 "incremental source (%s) does not exist"),
1936 from);
1937 }
1938 return (zfs_error(hdl, EZFS_NOENT, errbuf));
1939
1940 case EBUSY:
1941 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1942 "target is busy; if a filesystem, "
1943 "it must not be mounted"));
1944 return (zfs_error(hdl, EZFS_BUSY, errbuf));
1945
1946 case EDQUOT:
1947 case EFBIG:
1948 case EIO:
1949 case ENOLINK:
1950 case ENOSPC:
1951 case ENOSTR:
1952 case ENXIO:
1953 case EPIPE:
1954 case ERANGE:
1955 case EFAULT:
1956 case EROFS:
1957 zfs_error_aux(hdl, strerror(errno));
1958 return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
1959
1960 default:
1961 return (zfs_standard_error(hdl, errno, errbuf));
1962 }
1963 }
1964 return (err != 0);
1965 }
1966
1967 /*
1968 * Routines specific to "zfs recv"
1969 */
1970
1971 static int
1972 recv_read(libzfs_handle_t *hdl, int fd, void *buf, int ilen,
1973 boolean_t byteswap, zio_cksum_t *zc)
1974 {
1975 char *cp = buf;
1976 int rv;
1977 int len = ilen;
1978
1979 assert(ilen <= SPA_MAXBLOCKSIZE);
1980
1981 do {
1982 rv = read(fd, cp, len);
1983 cp += rv;
1984 len -= rv;
1985 } while (rv > 0);
1986
1987 if (rv < 0 || len != 0) {
1988 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1989 "failed to read from stream"));
1990 return (zfs_error(hdl, EZFS_BADSTREAM, dgettext(TEXT_DOMAIN,
1991 "cannot receive")));
1992 }
1993
1994 if (zc) {
1995 if (byteswap)
1996 fletcher_4_incremental_byteswap(buf, ilen, zc);
1997 else
1998 fletcher_4_incremental_native(buf, ilen, zc);
1999 }
2000 return (0);
2001 }
2002
2003 static int
2004 recv_read_nvlist(libzfs_handle_t *hdl, int fd, int len, nvlist_t **nvp,
2005 boolean_t byteswap, zio_cksum_t *zc)
2006 {
2007 char *buf;
2008 int err;
2009
2010 buf = zfs_alloc(hdl, len);
2011 if (buf == NULL)
2012 return (ENOMEM);
2013
2014 err = recv_read(hdl, fd, buf, len, byteswap, zc);
2015 if (err != 0) {
2016 free(buf);
2017 return (err);
2018 }
2019
2020 err = nvlist_unpack(buf, len, nvp, 0);
2021 free(buf);
2022 if (err != 0) {
2023 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
2024 "stream (malformed nvlist)"));
2025 return (EINVAL);
2026 }
2027 return (0);
2028 }
2029
2030 static int
2031 recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
2032 int baselen, char *newname, recvflags_t *flags)
2033 {
2034 static int seq;
2035 zfs_cmd_t zc = {"\0"};
2036 int err;
2037 prop_changelist_t *clp;
2038 zfs_handle_t *zhp;
2039
2040 zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
2041 if (zhp == NULL)
2042 return (-1);
2043 clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
2044 flags->force ? MS_FORCE : 0);
2045 zfs_close(zhp);
2046 if (clp == NULL)
2047 return (-1);
2048 err = changelist_prefix(clp);
2049 if (err)
2050 return (err);
2051
2052 zc.zc_objset_type = DMU_OST_ZFS;
2053 (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
2054
2055 if (tryname) {
2056 (void) strcpy(newname, tryname);
2057
2058 (void) strlcpy(zc.zc_value, tryname, sizeof (zc.zc_value));
2059
2060 if (flags->verbose) {
2061 (void) printf("attempting rename %s to %s\n",
2062 zc.zc_name, zc.zc_value);
2063 }
2064 err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
2065 if (err == 0)
2066 changelist_rename(clp, name, tryname);
2067 } else {
2068 err = ENOENT;
2069 }
2070
2071 if (err != 0 && strncmp(name + baselen, "recv-", 5) != 0) {
2072 seq++;
2073
2074 (void) snprintf(newname, ZFS_MAX_DATASET_NAME_LEN,
2075 "%.*srecv-%u-%u", baselen, name, getpid(), seq);
2076 (void) strlcpy(zc.zc_value, newname, sizeof (zc.zc_value));
2077
2078 if (flags->verbose) {
2079 (void) printf("failed - trying rename %s to %s\n",
2080 zc.zc_name, zc.zc_value);
2081 }
2082 err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
2083 if (err == 0)
2084 changelist_rename(clp, name, newname);
2085 if (err && flags->verbose) {
2086 (void) printf("failed (%u) - "
2087 "will try again on next pass\n", errno);
2088 }
2089 err = EAGAIN;
2090 } else if (flags->verbose) {
2091 if (err == 0)
2092 (void) printf("success\n");
2093 else
2094 (void) printf("failed (%u)\n", errno);
2095 }
2096
2097 (void) changelist_postfix(clp);
2098 changelist_free(clp);
2099
2100 return (err);
2101 }
2102
2103 static int
2104 recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen,
2105 char *newname, recvflags_t *flags)
2106 {
2107 zfs_cmd_t zc = {"\0"};
2108 int err = 0;
2109 prop_changelist_t *clp;
2110 zfs_handle_t *zhp;
2111 boolean_t defer = B_FALSE;
2112 int spa_version;
2113
2114 zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
2115 if (zhp == NULL)
2116 return (-1);
2117 clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
2118 flags->force ? MS_FORCE : 0);
2119 if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT &&
2120 zfs_spa_version(zhp, &spa_version) == 0 &&
2121 spa_version >= SPA_VERSION_USERREFS)
2122 defer = B_TRUE;
2123 zfs_close(zhp);
2124 if (clp == NULL)
2125 return (-1);
2126 err = changelist_prefix(clp);
2127 if (err)
2128 return (err);
2129
2130 zc.zc_objset_type = DMU_OST_ZFS;
2131 zc.zc_defer_destroy = defer;
2132 (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
2133
2134 if (flags->verbose)
2135 (void) printf("attempting destroy %s\n", zc.zc_name);
2136 err = ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc);
2137 if (err == 0) {
2138 if (flags->verbose)
2139 (void) printf("success\n");
2140 changelist_remove(clp, zc.zc_name);
2141 }
2142
2143 (void) changelist_postfix(clp);
2144 changelist_free(clp);
2145
2146 /*
2147 * Deferred destroy might destroy the snapshot or only mark it to be
2148 * destroyed later, and it returns success in either case.
2149 */
2150 if (err != 0 || (defer && zfs_dataset_exists(hdl, name,
2151 ZFS_TYPE_SNAPSHOT))) {
2152 err = recv_rename(hdl, name, NULL, baselen, newname, flags);
2153 }
2154
2155 return (err);
2156 }
2157
2158 typedef struct guid_to_name_data {
2159 uint64_t guid;
2160 boolean_t bookmark_ok;
2161 char *name;
2162 char *skip;
2163 } guid_to_name_data_t;
2164
2165 static int
2166 guid_to_name_cb(zfs_handle_t *zhp, void *arg)
2167 {
2168 guid_to_name_data_t *gtnd = arg;
2169 const char *slash;
2170 int err;
2171
2172 if (gtnd->skip != NULL &&
2173 (slash = strrchr(zhp->zfs_name, '/')) != NULL &&
2174 strcmp(slash + 1, gtnd->skip) == 0) {
2175 zfs_close(zhp);
2176 return (0);
2177 }
2178
2179 if (zfs_prop_get_int(zhp, ZFS_PROP_GUID) == gtnd->guid) {
2180 (void) strcpy(gtnd->name, zhp->zfs_name);
2181 zfs_close(zhp);
2182 return (EEXIST);
2183 }
2184
2185 err = zfs_iter_children(zhp, guid_to_name_cb, gtnd);
2186 if (err != EEXIST && gtnd->bookmark_ok)
2187 err = zfs_iter_bookmarks(zhp, guid_to_name_cb, gtnd);
2188 zfs_close(zhp);
2189 return (err);
2190 }
2191
2192 /*
2193 * Attempt to find the local dataset associated with this guid. In the case of
2194 * multiple matches, we attempt to find the "best" match by searching
2195 * progressively larger portions of the hierarchy. This allows one to send a
2196 * tree of datasets individually and guarantee that we will find the source
2197 * guid within that hierarchy, even if there are multiple matches elsewhere.
2198 */
2199 static int
2200 guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid,
2201 boolean_t bookmark_ok, char *name)
2202 {
2203 char pname[ZFS_MAX_DATASET_NAME_LEN];
2204 guid_to_name_data_t gtnd;
2205
2206 gtnd.guid = guid;
2207 gtnd.bookmark_ok = bookmark_ok;
2208 gtnd.name = name;
2209 gtnd.skip = NULL;
2210
2211 /*
2212 * Search progressively larger portions of the hierarchy, starting
2213 * with the filesystem specified by 'parent'. This will
2214 * select the "most local" version of the origin snapshot in the case
2215 * that there are multiple matching snapshots in the system.
2216 */
2217 (void) strlcpy(pname, parent, sizeof (pname));
2218 char *cp = strrchr(pname, '@');
2219 if (cp == NULL)
2220 cp = strchr(pname, '\0');
2221 for (; cp != NULL; cp = strrchr(pname, '/')) {
2222 /* Chop off the last component and open the parent */
2223 *cp = '\0';
2224 zfs_handle_t *zhp = make_dataset_handle(hdl, pname);
2225
2226 if (zhp == NULL)
2227 continue;
2228 int err = guid_to_name_cb(zfs_handle_dup(zhp), &gtnd);
2229 if (err != EEXIST)
2230 err = zfs_iter_children(zhp, guid_to_name_cb, &gtnd);
2231 if (err != EEXIST && bookmark_ok)
2232 err = zfs_iter_bookmarks(zhp, guid_to_name_cb, &gtnd);
2233 zfs_close(zhp);
2234 if (err == EEXIST)
2235 return (0);
2236
2237 /*
2238 * Remember the last portion of the dataset so we skip it next
2239 * time through (as we've already searched that portion of the
2240 * hierarchy).
2241 */
2242 gtnd.skip = strrchr(pname, '/') + 1;
2243 }
2244
2245 return (ENOENT);
2246 }
2247
2248 /*
2249 * Return +1 if guid1 is before guid2, 0 if they are the same, and -1 if
2250 * guid1 is after guid2.
2251 */
2252 static int
2253 created_before(libzfs_handle_t *hdl, avl_tree_t *avl,
2254 uint64_t guid1, uint64_t guid2)
2255 {
2256 nvlist_t *nvfs;
2257 char *fsname = NULL, *snapname = NULL;
2258 char buf[ZFS_MAX_DATASET_NAME_LEN];
2259 int rv;
2260 zfs_handle_t *guid1hdl, *guid2hdl;
2261 uint64_t create1, create2;
2262
2263 if (guid2 == 0)
2264 return (0);
2265 if (guid1 == 0)
2266 return (1);
2267
2268 nvfs = fsavl_find(avl, guid1, &snapname);
2269 VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
2270 (void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
2271 guid1hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
2272 if (guid1hdl == NULL)
2273 return (-1);
2274
2275 nvfs = fsavl_find(avl, guid2, &snapname);
2276 VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
2277 (void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
2278 guid2hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
2279 if (guid2hdl == NULL) {
2280 zfs_close(guid1hdl);
2281 return (-1);
2282 }
2283
2284 create1 = zfs_prop_get_int(guid1hdl, ZFS_PROP_CREATETXG);
2285 create2 = zfs_prop_get_int(guid2hdl, ZFS_PROP_CREATETXG);
2286
2287 if (create1 < create2)
2288 rv = -1;
2289 else if (create1 > create2)
2290 rv = +1;
2291 else
2292 rv = 0;
2293
2294 zfs_close(guid1hdl);
2295 zfs_close(guid2hdl);
2296
2297 return (rv);
2298 }
2299
2300 static int
2301 recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs,
2302 recvflags_t *flags, nvlist_t *stream_nv, avl_tree_t *stream_avl,
2303 nvlist_t *renamed)
2304 {
2305 nvlist_t *local_nv, *deleted = NULL;
2306 avl_tree_t *local_avl;
2307 nvpair_t *fselem, *nextfselem;
2308 char *fromsnap;
2309 char newname[ZFS_MAX_DATASET_NAME_LEN];
2310 char guidname[32];
2311 int error;
2312 boolean_t needagain, progress, recursive;
2313 char *s1, *s2;
2314
2315 VERIFY(0 == nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap));
2316
2317 recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
2318 ENOENT);
2319
2320 if (flags->dryrun)
2321 return (0);
2322
2323 again:
2324 needagain = progress = B_FALSE;
2325
2326 VERIFY(0 == nvlist_alloc(&deleted, NV_UNIQUE_NAME, 0));
2327
2328 if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL,
2329 recursive, &local_nv, &local_avl)) != 0)
2330 return (error);
2331
2332 /*
2333 * Process deletes and renames
2334 */
2335 for (fselem = nvlist_next_nvpair(local_nv, NULL);
2336 fselem; fselem = nextfselem) {
2337 nvlist_t *nvfs, *snaps;
2338 nvlist_t *stream_nvfs = NULL;
2339 nvpair_t *snapelem, *nextsnapelem;
2340 uint64_t fromguid = 0;
2341 uint64_t originguid = 0;
2342 uint64_t stream_originguid = 0;
2343 uint64_t parent_fromsnap_guid, stream_parent_fromsnap_guid;
2344 char *fsname, *stream_fsname;
2345
2346 nextfselem = nvlist_next_nvpair(local_nv, fselem);
2347
2348 VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
2349 VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
2350 VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
2351 VERIFY(0 == nvlist_lookup_uint64(nvfs, "parentfromsnap",
2352 &parent_fromsnap_guid));
2353 (void) nvlist_lookup_uint64(nvfs, "origin", &originguid);
2354
2355 /*
2356 * First find the stream's fs, so we can check for
2357 * a different origin (due to "zfs promote")
2358 */
2359 for (snapelem = nvlist_next_nvpair(snaps, NULL);
2360 snapelem; snapelem = nvlist_next_nvpair(snaps, snapelem)) {
2361 uint64_t thisguid;
2362
2363 VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
2364 stream_nvfs = fsavl_find(stream_avl, thisguid, NULL);
2365
2366 if (stream_nvfs != NULL)
2367 break;
2368 }
2369
2370 /* check for promote */
2371 (void) nvlist_lookup_uint64(stream_nvfs, "origin",
2372 &stream_originguid);
2373 if (stream_nvfs && originguid != stream_originguid) {
2374 switch (created_before(hdl, local_avl,
2375 stream_originguid, originguid)) {
2376 case 1: {
2377 /* promote it! */
2378 zfs_cmd_t zc = {"\0"};
2379 nvlist_t *origin_nvfs;
2380 char *origin_fsname;
2381
2382 if (flags->verbose)
2383 (void) printf("promoting %s\n", fsname);
2384
2385 origin_nvfs = fsavl_find(local_avl, originguid,
2386 NULL);
2387 VERIFY(0 == nvlist_lookup_string(origin_nvfs,
2388 "name", &origin_fsname));
2389 (void) strlcpy(zc.zc_value, origin_fsname,
2390 sizeof (zc.zc_value));
2391 (void) strlcpy(zc.zc_name, fsname,
2392 sizeof (zc.zc_name));
2393 error = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
2394 if (error == 0)
2395 progress = B_TRUE;
2396 break;
2397 }
2398 default:
2399 break;
2400 case -1:
2401 fsavl_destroy(local_avl);
2402 nvlist_free(local_nv);
2403 return (-1);
2404 }
2405 /*
2406 * We had/have the wrong origin, therefore our
2407 * list of snapshots is wrong. Need to handle
2408 * them on the next pass.
2409 */
2410 needagain = B_TRUE;
2411 continue;
2412 }
2413
2414 for (snapelem = nvlist_next_nvpair(snaps, NULL);
2415 snapelem; snapelem = nextsnapelem) {
2416 uint64_t thisguid;
2417 char *stream_snapname;
2418 nvlist_t *found, *props;
2419
2420 nextsnapelem = nvlist_next_nvpair(snaps, snapelem);
2421
2422 VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
2423 found = fsavl_find(stream_avl, thisguid,
2424 &stream_snapname);
2425
2426 /* check for delete */
2427 if (found == NULL) {
2428 char name[ZFS_MAX_DATASET_NAME_LEN];
2429
2430 if (!flags->force)
2431 continue;
2432
2433 (void) snprintf(name, sizeof (name), "%s@%s",
2434 fsname, nvpair_name(snapelem));
2435
2436 error = recv_destroy(hdl, name,
2437 strlen(fsname)+1, newname, flags);
2438 if (error)
2439 needagain = B_TRUE;
2440 else
2441 progress = B_TRUE;
2442 sprintf(guidname, "%llu",
2443 (u_longlong_t)thisguid);
2444 nvlist_add_boolean(deleted, guidname);
2445 continue;
2446 }
2447
2448 stream_nvfs = found;
2449
2450 if (0 == nvlist_lookup_nvlist(stream_nvfs, "snapprops",
2451 &props) && 0 == nvlist_lookup_nvlist(props,
2452 stream_snapname, &props)) {
2453 zfs_cmd_t zc = {"\0"};
2454
2455 zc.zc_cookie = B_TRUE; /* received */
2456 (void) snprintf(zc.zc_name, sizeof (zc.zc_name),
2457 "%s@%s", fsname, nvpair_name(snapelem));
2458 if (zcmd_write_src_nvlist(hdl, &zc,
2459 props) == 0) {
2460 (void) zfs_ioctl(hdl,
2461 ZFS_IOC_SET_PROP, &zc);
2462 zcmd_free_nvlists(&zc);
2463 }
2464 }
2465
2466 /* check for different snapname */
2467 if (strcmp(nvpair_name(snapelem),
2468 stream_snapname) != 0) {
2469 char name[ZFS_MAX_DATASET_NAME_LEN];
2470 char tryname[ZFS_MAX_DATASET_NAME_LEN];
2471
2472 (void) snprintf(name, sizeof (name), "%s@%s",
2473 fsname, nvpair_name(snapelem));
2474 (void) snprintf(tryname, sizeof (name), "%s@%s",
2475 fsname, stream_snapname);
2476
2477 error = recv_rename(hdl, name, tryname,
2478 strlen(fsname)+1, newname, flags);
2479 if (error)
2480 needagain = B_TRUE;
2481 else
2482 progress = B_TRUE;
2483 }
2484
2485 if (strcmp(stream_snapname, fromsnap) == 0)
2486 fromguid = thisguid;
2487 }
2488
2489 /* check for delete */
2490 if (stream_nvfs == NULL) {
2491 if (!flags->force)
2492 continue;
2493
2494 error = recv_destroy(hdl, fsname, strlen(tofs)+1,
2495 newname, flags);
2496 if (error)
2497 needagain = B_TRUE;
2498 else
2499 progress = B_TRUE;
2500 sprintf(guidname, "%llu",
2501 (u_longlong_t) parent_fromsnap_guid);
2502 nvlist_add_boolean(deleted, guidname);
2503 continue;
2504 }
2505
2506 if (fromguid == 0) {
2507 if (flags->verbose) {
2508 (void) printf("local fs %s does not have "
2509 "fromsnap (%s in stream); must have "
2510 "been deleted locally; ignoring\n",
2511 fsname, fromsnap);
2512 }
2513 continue;
2514 }
2515
2516 VERIFY(0 == nvlist_lookup_string(stream_nvfs,
2517 "name", &stream_fsname));
2518 VERIFY(0 == nvlist_lookup_uint64(stream_nvfs,
2519 "parentfromsnap", &stream_parent_fromsnap_guid));
2520
2521 s1 = strrchr(fsname, '/');
2522 s2 = strrchr(stream_fsname, '/');
2523
2524 /*
2525 * Check if we're going to rename based on parent guid change
2526 * and the current parent guid was also deleted. If it was then
2527 * rename will fail and is likely unneeded, so avoid this and
2528 * force an early retry to determine the new
2529 * parent_fromsnap_guid.
2530 */
2531 if (stream_parent_fromsnap_guid != 0 &&
2532 parent_fromsnap_guid != 0 &&
2533 stream_parent_fromsnap_guid != parent_fromsnap_guid) {
2534 sprintf(guidname, "%llu",
2535 (u_longlong_t) parent_fromsnap_guid);
2536 if (nvlist_exists(deleted, guidname)) {
2537 progress = B_TRUE;
2538 needagain = B_TRUE;
2539 goto doagain;
2540 }
2541 }
2542
2543 /*
2544 * Check for rename. If the exact receive path is specified, it
2545 * does not count as a rename, but we still need to check the
2546 * datasets beneath it.
2547 */
2548 if ((stream_parent_fromsnap_guid != 0 &&
2549 parent_fromsnap_guid != 0 &&
2550 stream_parent_fromsnap_guid != parent_fromsnap_guid) ||
2551 ((flags->isprefix || strcmp(tofs, fsname) != 0) &&
2552 (s1 != NULL) && (s2 != NULL) && strcmp(s1, s2) != 0)) {
2553 nvlist_t *parent;
2554 char tryname[ZFS_MAX_DATASET_NAME_LEN];
2555
2556 parent = fsavl_find(local_avl,
2557 stream_parent_fromsnap_guid, NULL);
2558 /*
2559 * NB: parent might not be found if we used the
2560 * tosnap for stream_parent_fromsnap_guid,
2561 * because the parent is a newly-created fs;
2562 * we'll be able to rename it after we recv the
2563 * new fs.
2564 */
2565 if (parent != NULL) {
2566 char *pname;
2567
2568 VERIFY(0 == nvlist_lookup_string(parent, "name",
2569 &pname));
2570 (void) snprintf(tryname, sizeof (tryname),
2571 "%s%s", pname, strrchr(stream_fsname, '/'));
2572 } else {
2573 tryname[0] = '\0';
2574 if (flags->verbose) {
2575 (void) printf("local fs %s new parent "
2576 "not found\n", fsname);
2577 }
2578 }
2579
2580 newname[0] = '\0';
2581
2582 error = recv_rename(hdl, fsname, tryname,
2583 strlen(tofs)+1, newname, flags);
2584
2585 if (renamed != NULL && newname[0] != '\0') {
2586 VERIFY(0 == nvlist_add_boolean(renamed,
2587 newname));
2588 }
2589
2590 if (error)
2591 needagain = B_TRUE;
2592 else
2593 progress = B_TRUE;
2594 }
2595 }
2596
2597 doagain:
2598 fsavl_destroy(local_avl);
2599 nvlist_free(local_nv);
2600 nvlist_free(deleted);
2601
2602 if (needagain && progress) {
2603 /* do another pass to fix up temporary names */
2604 if (flags->verbose)
2605 (void) printf("another pass:\n");
2606 goto again;
2607 }
2608
2609 return (needagain);
2610 }
2611
2612 static int
2613 zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
2614 recvflags_t *flags, dmu_replay_record_t *drr, zio_cksum_t *zc,
2615 char **top_zfs, int cleanup_fd, uint64_t *action_handlep)
2616 {
2617 nvlist_t *stream_nv = NULL;
2618 avl_tree_t *stream_avl = NULL;
2619 char *fromsnap = NULL;
2620 char *sendsnap = NULL;
2621 char *cp;
2622 char tofs[ZFS_MAX_DATASET_NAME_LEN];
2623 char sendfs[ZFS_MAX_DATASET_NAME_LEN];
2624 char errbuf[1024];
2625 dmu_replay_record_t drre;
2626 int error;
2627 boolean_t anyerr = B_FALSE;
2628 boolean_t softerr = B_FALSE;
2629 boolean_t recursive;
2630
2631 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2632 "cannot receive"));
2633
2634 assert(drr->drr_type == DRR_BEGIN);
2635 assert(drr->drr_u.drr_begin.drr_magic == DMU_BACKUP_MAGIC);
2636 assert(DMU_GET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo) ==
2637 DMU_COMPOUNDSTREAM);
2638
2639 /*
2640 * Read in the nvlist from the stream.
2641 */
2642 if (drr->drr_payloadlen != 0) {
2643 error = recv_read_nvlist(hdl, fd, drr->drr_payloadlen,
2644 &stream_nv, flags->byteswap, zc);
2645 if (error) {
2646 error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2647 goto out;
2648 }
2649 }
2650
2651 recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
2652 ENOENT);
2653
2654 if (recursive && strchr(destname, '@')) {
2655 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2656 "cannot specify snapshot name for multi-snapshot stream"));
2657 error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2658 goto out;
2659 }
2660
2661 /*
2662 * Read in the end record and verify checksum.
2663 */
2664 if (0 != (error = recv_read(hdl, fd, &drre, sizeof (drre),
2665 flags->byteswap, NULL)))
2666 goto out;
2667 if (flags->byteswap) {
2668 drre.drr_type = BSWAP_32(drre.drr_type);
2669 drre.drr_u.drr_end.drr_checksum.zc_word[0] =
2670 BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[0]);
2671 drre.drr_u.drr_end.drr_checksum.zc_word[1] =
2672 BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[1]);
2673 drre.drr_u.drr_end.drr_checksum.zc_word[2] =
2674 BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[2]);
2675 drre.drr_u.drr_end.drr_checksum.zc_word[3] =
2676 BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[3]);
2677 }
2678 if (drre.drr_type != DRR_END) {
2679 error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2680 goto out;
2681 }
2682 if (!ZIO_CHECKSUM_EQUAL(drre.drr_u.drr_end.drr_checksum, *zc)) {
2683 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2684 "incorrect header checksum"));
2685 error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2686 goto out;
2687 }
2688
2689 (void) nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap);
2690
2691 if (drr->drr_payloadlen != 0) {
2692 nvlist_t *stream_fss;
2693
2694 VERIFY(0 == nvlist_lookup_nvlist(stream_nv, "fss",
2695 &stream_fss));
2696 if ((stream_avl = fsavl_create(stream_fss)) == NULL) {
2697 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2698 "couldn't allocate avl tree"));
2699 error = zfs_error(hdl, EZFS_NOMEM, errbuf);
2700 goto out;
2701 }
2702
2703 if (fromsnap != NULL) {
2704 nvlist_t *renamed = NULL;
2705 nvpair_t *pair = NULL;
2706
2707 (void) strlcpy(tofs, destname, sizeof (tofs));
2708 if (flags->isprefix) {
2709 struct drr_begin *drrb = &drr->drr_u.drr_begin;
2710 int i;
2711
2712 if (flags->istail) {
2713 cp = strrchr(drrb->drr_toname, '/');
2714 if (cp == NULL) {
2715 (void) strlcat(tofs, "/",
2716 sizeof (tofs));
2717 i = 0;
2718 } else {
2719 i = (cp - drrb->drr_toname);
2720 }
2721 } else {
2722 i = strcspn(drrb->drr_toname, "/@");
2723 }
2724 /* zfs_receive_one() will create_parents() */
2725 (void) strlcat(tofs, &drrb->drr_toname[i],
2726 sizeof (tofs));
2727 *strchr(tofs, '@') = '\0';
2728 }
2729
2730 if (recursive && !flags->dryrun && !flags->nomount) {
2731 VERIFY(0 == nvlist_alloc(&renamed,
2732 NV_UNIQUE_NAME, 0));
2733 }
2734
2735 softerr = recv_incremental_replication(hdl, tofs, flags,
2736 stream_nv, stream_avl, renamed);
2737
2738 /* Unmount renamed filesystems before receiving. */
2739 while ((pair = nvlist_next_nvpair(renamed,
2740 pair)) != NULL) {
2741 zfs_handle_t *zhp;
2742 prop_changelist_t *clp = NULL;
2743
2744 zhp = zfs_open(hdl, nvpair_name(pair),
2745 ZFS_TYPE_FILESYSTEM);
2746 if (zhp != NULL) {
2747 clp = changelist_gather(zhp,
2748 ZFS_PROP_MOUNTPOINT, 0, 0);
2749 zfs_close(zhp);
2750 if (clp != NULL) {
2751 softerr |=
2752 changelist_prefix(clp);
2753 changelist_free(clp);
2754 }
2755 }
2756 }
2757
2758 nvlist_free(renamed);
2759 }
2760 }
2761
2762 /*
2763 * Get the fs specified by the first path in the stream (the top level
2764 * specified by 'zfs send') and pass it to each invocation of
2765 * zfs_receive_one().
2766 */
2767 (void) strlcpy(sendfs, drr->drr_u.drr_begin.drr_toname,
2768 sizeof (sendfs));
2769 if ((cp = strchr(sendfs, '@')) != NULL) {
2770 *cp = '\0';
2771 /*
2772 * Find the "sendsnap", the final snapshot in a replication
2773 * stream. zfs_receive_one() handles certain errors
2774 * differently, depending on if the contained stream is the
2775 * last one or not.
2776 */
2777 sendsnap = (cp + 1);
2778 }
2779
2780 /* Finally, receive each contained stream */
2781 do {
2782 /*
2783 * we should figure out if it has a recoverable
2784 * error, in which case do a recv_skip() and drive on.
2785 * Note, if we fail due to already having this guid,
2786 * zfs_receive_one() will take care of it (ie,
2787 * recv_skip() and return 0).
2788 */
2789 error = zfs_receive_impl(hdl, destname, NULL, flags, fd,
2790 sendfs, stream_nv, stream_avl, top_zfs, cleanup_fd,
2791 action_handlep, sendsnap);
2792 if (error == ENODATA) {
2793 error = 0;
2794 break;
2795 }
2796 anyerr |= error;
2797 } while (error == 0);
2798
2799 if (drr->drr_payloadlen != 0 && fromsnap != NULL) {
2800 /*
2801 * Now that we have the fs's they sent us, try the
2802 * renames again.
2803 */
2804 softerr = recv_incremental_replication(hdl, tofs, flags,
2805 stream_nv, stream_avl, NULL);
2806 }
2807
2808 out:
2809 fsavl_destroy(stream_avl);
2810 nvlist_free(stream_nv);
2811 if (softerr)
2812 error = -2;
2813 if (anyerr)
2814 error = -1;
2815 return (error);
2816 }
2817
2818 static void
2819 trunc_prop_errs(int truncated)
2820 {
2821 ASSERT(truncated != 0);
2822
2823 if (truncated == 1)
2824 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
2825 "1 more property could not be set\n"));
2826 else
2827 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
2828 "%d more properties could not be set\n"), truncated);
2829 }
2830
2831 static int
2832 recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
2833 {
2834 dmu_replay_record_t *drr;
2835 void *buf = zfs_alloc(hdl, SPA_MAXBLOCKSIZE);
2836 char errbuf[1024];
2837
2838 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2839 "cannot receive:"));
2840
2841 /* XXX would be great to use lseek if possible... */
2842 drr = buf;
2843
2844 while (recv_read(hdl, fd, drr, sizeof (dmu_replay_record_t),
2845 byteswap, NULL) == 0) {
2846 if (byteswap)
2847 drr->drr_type = BSWAP_32(drr->drr_type);
2848
2849 switch (drr->drr_type) {
2850 case DRR_BEGIN:
2851 if (drr->drr_payloadlen != 0) {
2852 (void) recv_read(hdl, fd, buf,
2853 drr->drr_payloadlen, B_FALSE, NULL);
2854 }
2855 break;
2856
2857 case DRR_END:
2858 free(buf);
2859 return (0);
2860
2861 case DRR_OBJECT:
2862 if (byteswap) {
2863 drr->drr_u.drr_object.drr_bonuslen =
2864 BSWAP_32(drr->drr_u.drr_object.
2865 drr_bonuslen);
2866 }
2867 (void) recv_read(hdl, fd, buf,
2868 P2ROUNDUP(drr->drr_u.drr_object.drr_bonuslen, 8),
2869 B_FALSE, NULL);
2870 break;
2871
2872 case DRR_WRITE:
2873 if (byteswap) {
2874 drr->drr_u.drr_write.drr_length =
2875 BSWAP_64(drr->drr_u.drr_write.drr_length);
2876 }
2877 (void) recv_read(hdl, fd, buf,
2878 drr->drr_u.drr_write.drr_length, B_FALSE, NULL);
2879 break;
2880 case DRR_SPILL:
2881 if (byteswap) {
2882 drr->drr_u.drr_spill.drr_length =
2883 BSWAP_64(drr->drr_u.drr_spill.drr_length);
2884 }
2885 (void) recv_read(hdl, fd, buf,
2886 drr->drr_u.drr_spill.drr_length, B_FALSE, NULL);
2887 break;
2888 case DRR_WRITE_EMBEDDED:
2889 if (byteswap) {
2890 drr->drr_u.drr_write_embedded.drr_psize =
2891 BSWAP_32(drr->drr_u.drr_write_embedded.
2892 drr_psize);
2893 }
2894 (void) recv_read(hdl, fd, buf,
2895 P2ROUNDUP(drr->drr_u.drr_write_embedded.drr_psize,
2896 8), B_FALSE, NULL);
2897 break;
2898 case DRR_WRITE_BYREF:
2899 case DRR_FREEOBJECTS:
2900 case DRR_FREE:
2901 break;
2902
2903 default:
2904 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2905 "invalid record type"));
2906 return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2907 }
2908 }
2909
2910 free(buf);
2911 return (-1);
2912 }
2913
2914 static void
2915 recv_ecksum_set_aux(libzfs_handle_t *hdl, const char *target_snap,
2916 boolean_t resumable)
2917 {
2918 char target_fs[ZFS_MAX_DATASET_NAME_LEN];
2919
2920 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2921 "checksum mismatch or incomplete stream"));
2922
2923 if (!resumable)
2924 return;
2925 (void) strlcpy(target_fs, target_snap, sizeof (target_fs));
2926 *strchr(target_fs, '@') = '\0';
2927 zfs_handle_t *zhp = zfs_open(hdl, target_fs,
2928 ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
2929 if (zhp == NULL)
2930 return;
2931
2932 char token_buf[ZFS_MAXPROPLEN];
2933 int error = zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN,
2934 token_buf, sizeof (token_buf),
2935 NULL, NULL, 0, B_TRUE);
2936 if (error == 0) {
2937 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2938 "checksum mismatch or incomplete stream.\n"
2939 "Partially received snapshot is saved.\n"
2940 "A resuming stream can be generated on the sending "
2941 "system by running:\n"
2942 " zfs send -t %s"),
2943 token_buf);
2944 }
2945 zfs_close(zhp);
2946 }
2947
2948 /*
2949 * Restores a backup of tosnap from the file descriptor specified by infd.
2950 */
2951 static int
2952 zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
2953 const char *originsnap, recvflags_t *flags, dmu_replay_record_t *drr,
2954 dmu_replay_record_t *drr_noswap, const char *sendfs, nvlist_t *stream_nv,
2955 avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd,
2956 uint64_t *action_handlep, const char *finalsnap)
2957 {
2958 time_t begin_time;
2959 int ioctl_err, ioctl_errno, err;
2960 char *cp;
2961 struct drr_begin *drrb = &drr->drr_u.drr_begin;
2962 char errbuf[1024];
2963 const char *chopprefix;
2964 boolean_t newfs = B_FALSE;
2965 boolean_t stream_wantsnewfs;
2966 boolean_t newprops = B_FALSE;
2967 uint64_t read_bytes = 0;
2968 uint64_t errflags = 0;
2969 uint64_t parent_snapguid = 0;
2970 prop_changelist_t *clp = NULL;
2971 nvlist_t *snapprops_nvlist = NULL;
2972 zprop_errflags_t prop_errflags;
2973 nvlist_t *prop_errors = NULL;
2974 boolean_t recursive;
2975 char *snapname = NULL;
2976 char destsnap[MAXPATHLEN * 2];
2977 char origin[MAXNAMELEN];
2978 char name[MAXPATHLEN];
2979 nvlist_t *props = NULL;
2980
2981 begin_time = time(NULL);
2982 bzero(origin, MAXNAMELEN);
2983
2984 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2985 "cannot receive"));
2986
2987 recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
2988 ENOENT);
2989
2990 if (stream_avl != NULL) {
2991 nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid,
2992 &snapname);
2993
2994 (void) nvlist_lookup_uint64(fs, "parentfromsnap",
2995 &parent_snapguid);
2996 err = nvlist_lookup_nvlist(fs, "props", &props);
2997 if (err) {
2998 VERIFY(0 == nvlist_alloc(&props, NV_UNIQUE_NAME, 0));
2999 newprops = B_TRUE;
3000 }
3001
3002 if (flags->canmountoff) {
3003 VERIFY(0 == nvlist_add_uint64(props,
3004 zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0));
3005 }
3006 }
3007
3008 cp = NULL;
3009
3010 /*
3011 * Determine how much of the snapshot name stored in the stream
3012 * we are going to tack on to the name they specified on the
3013 * command line, and how much we are going to chop off.
3014 *
3015 * If they specified a snapshot, chop the entire name stored in
3016 * the stream.
3017 */
3018 if (flags->istail) {
3019 /*
3020 * A filesystem was specified with -e. We want to tack on only
3021 * the tail of the sent snapshot path.
3022 */
3023 if (strchr(tosnap, '@')) {
3024 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
3025 "argument - snapshot not allowed with -e"));
3026 err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
3027 goto out;
3028 }
3029
3030 chopprefix = strrchr(sendfs, '/');
3031
3032 if (chopprefix == NULL) {
3033 /*
3034 * The tail is the poolname, so we need to
3035 * prepend a path separator.
3036 */
3037 int len = strlen(drrb->drr_toname);
3038 cp = malloc(len + 2);
3039 cp[0] = '/';
3040 (void) strcpy(&cp[1], drrb->drr_toname);
3041 chopprefix = cp;
3042 } else {
3043 chopprefix = drrb->drr_toname + (chopprefix - sendfs);
3044 }
3045 } else if (flags->isprefix) {
3046 /*
3047 * A filesystem was specified with -d. We want to tack on
3048 * everything but the first element of the sent snapshot path
3049 * (all but the pool name).
3050 */
3051 if (strchr(tosnap, '@')) {
3052 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
3053 "argument - snapshot not allowed with -d"));
3054 err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
3055 goto out;
3056 }
3057
3058 chopprefix = strchr(drrb->drr_toname, '/');
3059 if (chopprefix == NULL)
3060 chopprefix = strchr(drrb->drr_toname, '@');
3061 } else if (strchr(tosnap, '@') == NULL) {
3062 /*
3063 * If a filesystem was specified without -d or -e, we want to
3064 * tack on everything after the fs specified by 'zfs send'.
3065 */
3066 chopprefix = drrb->drr_toname + strlen(sendfs);
3067 } else {
3068 /* A snapshot was specified as an exact path (no -d or -e). */
3069 if (recursive) {
3070 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3071 "cannot specify snapshot name for multi-snapshot "
3072 "stream"));
3073 err = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3074 goto out;
3075 }
3076 chopprefix = drrb->drr_toname + strlen(drrb->drr_toname);
3077 }
3078
3079 ASSERT(strstr(drrb->drr_toname, sendfs) == drrb->drr_toname);
3080 ASSERT(chopprefix > drrb->drr_toname);
3081 ASSERT(chopprefix <= drrb->drr_toname + strlen(drrb->drr_toname));
3082 ASSERT(chopprefix[0] == '/' || chopprefix[0] == '@' ||
3083 chopprefix[0] == '\0');
3084
3085 /*
3086 * Determine name of destination snapshot.
3087 */
3088 (void) strcpy(destsnap, tosnap);
3089 (void) strlcat(destsnap, chopprefix, sizeof (destsnap));
3090 free(cp);
3091 if (!zfs_name_valid(destsnap, ZFS_TYPE_SNAPSHOT)) {
3092 err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
3093 goto out;
3094 }
3095
3096 /*
3097 * Determine the name of the origin snapshot.
3098 */
3099 if (drrb->drr_flags & DRR_FLAG_CLONE) {
3100 if (guid_to_name(hdl, destsnap,
3101 drrb->drr_fromguid, B_FALSE, origin) != 0) {
3102 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3103 "local origin for clone %s does not exist"),
3104 destsnap);
3105 err = zfs_error(hdl, EZFS_NOENT, errbuf);
3106 goto out;
3107 }
3108 if (flags->verbose)
3109 (void) printf("found clone origin %s\n", origin);
3110 } else if (originsnap) {
3111 (void) strncpy(origin, originsnap, sizeof (origin));
3112 if (flags->verbose)
3113 (void) printf("using provided clone origin %s\n",
3114 origin);
3115 }
3116
3117 boolean_t resuming = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
3118 DMU_BACKUP_FEATURE_RESUMING;
3119 stream_wantsnewfs = (drrb->drr_fromguid == 0 ||
3120 (drrb->drr_flags & DRR_FLAG_CLONE) || originsnap) && !resuming;
3121
3122 if (stream_wantsnewfs) {
3123 /*
3124 * if the parent fs does not exist, look for it based on
3125 * the parent snap GUID
3126 */
3127 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3128 "cannot receive new filesystem stream"));
3129
3130 (void) strcpy(name, destsnap);
3131 cp = strrchr(name, '/');
3132 if (cp)
3133 *cp = '\0';
3134 if (cp &&
3135 !zfs_dataset_exists(hdl, name, ZFS_TYPE_DATASET)) {
3136 char suffix[ZFS_MAX_DATASET_NAME_LEN];
3137 (void) strcpy(suffix, strrchr(destsnap, '/'));
3138 if (guid_to_name(hdl, name, parent_snapguid,
3139 B_FALSE, destsnap) == 0) {
3140 *strchr(destsnap, '@') = '\0';
3141 (void) strcat(destsnap, suffix);
3142 }
3143 }
3144 } else {
3145 /*
3146 * if the fs does not exist, look for it based on the
3147 * fromsnap GUID
3148 */
3149 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3150 "cannot receive incremental stream"));
3151
3152 (void) strcpy(name, destsnap);
3153 *strchr(name, '@') = '\0';
3154
3155 /*
3156 * If the exact receive path was specified and this is the
3157 * topmost path in the stream, then if the fs does not exist we
3158 * should look no further.
3159 */
3160 if ((flags->isprefix || (*(chopprefix = drrb->drr_toname +
3161 strlen(sendfs)) != '\0' && *chopprefix != '@')) &&
3162 !zfs_dataset_exists(hdl, name, ZFS_TYPE_DATASET)) {
3163 char snap[ZFS_MAX_DATASET_NAME_LEN];
3164 (void) strcpy(snap, strchr(destsnap, '@'));
3165 if (guid_to_name(hdl, name, drrb->drr_fromguid,
3166 B_FALSE, destsnap) == 0) {
3167 *strchr(destsnap, '@') = '\0';
3168 (void) strcat(destsnap, snap);
3169 }
3170 }
3171 }
3172
3173 (void) strcpy(name, destsnap);
3174 *strchr(name, '@') = '\0';
3175
3176 if (zfs_dataset_exists(hdl, name, ZFS_TYPE_DATASET)) {
3177 zfs_cmd_t zc = {"\0"};
3178 zfs_handle_t *zhp;
3179
3180 (void) strcpy(zc.zc_name, name);
3181
3182 /*
3183 * Destination fs exists. It must be one of these cases:
3184 * - an incremental send stream
3185 * - the stream specifies a new fs (full stream or clone)
3186 * and they want us to blow away the existing fs (and
3187 * have therefore specified -F and removed any snapshots)
3188 * - we are resuming a failed receive.
3189 */
3190 if (stream_wantsnewfs) {
3191 if (!flags->force) {
3192 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3193 "destination '%s' exists\n"
3194 "must specify -F to overwrite it"), name);
3195 err = zfs_error(hdl, EZFS_EXISTS, errbuf);
3196 goto out;
3197 }
3198 if (ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT,
3199 &zc) == 0) {
3200 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3201 "destination has snapshots (eg. %s)\n"
3202 "must destroy them to overwrite it"),
3203 name);
3204 err = zfs_error(hdl, EZFS_EXISTS, errbuf);
3205 goto out;
3206 }
3207 }
3208
3209 if ((zhp = zfs_open(hdl, name,
3210 ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) {
3211 err = -1;
3212 goto out;
3213 }
3214
3215 if (stream_wantsnewfs &&
3216 zhp->zfs_dmustats.dds_origin[0]) {
3217 zfs_close(zhp);
3218 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3219 "destination '%s' is a clone\n"
3220 "must destroy it to overwrite it"), name);
3221 err = zfs_error(hdl, EZFS_EXISTS, errbuf);
3222 goto out;
3223 }
3224
3225 if (!flags->dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM &&
3226 stream_wantsnewfs) {
3227 /* We can't do online recv in this case */
3228 clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0);
3229 if (clp == NULL) {
3230 zfs_close(zhp);
3231 err = -1;
3232 goto out;
3233 }
3234 if (changelist_prefix(clp) != 0) {
3235 changelist_free(clp);
3236 zfs_close(zhp);
3237 err = -1;
3238 goto out;
3239 }
3240 }
3241
3242 /*
3243 * If we are resuming a newfs, set newfs here so that we will
3244 * mount it if the recv succeeds this time. We can tell
3245 * that it was a newfs on the first recv because the fs
3246 * itself will be inconsistent (if the fs existed when we
3247 * did the first recv, we would have received it into
3248 * .../%recv).
3249 */
3250 if (resuming && zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT))
3251 newfs = B_TRUE;
3252
3253 zfs_close(zhp);
3254 } else {
3255 /*
3256 * Destination filesystem does not exist. Therefore we better
3257 * be creating a new filesystem (either from a full backup, or
3258 * a clone). It would therefore be invalid if the user
3259 * specified only the pool name (i.e. if the destination name
3260 * contained no slash character).
3261 */
3262 cp = strrchr(name, '/');
3263
3264 if (!stream_wantsnewfs || cp == NULL) {
3265 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3266 "destination '%s' does not exist"), name);
3267 err = zfs_error(hdl, EZFS_NOENT, errbuf);
3268 goto out;
3269 }
3270
3271 /*
3272 * Trim off the final dataset component so we perform the
3273 * recvbackup ioctl to the filesystems's parent.
3274 */
3275 *cp = '\0';
3276
3277 if (flags->isprefix && !flags->istail && !flags->dryrun &&
3278 create_parents(hdl, destsnap, strlen(tosnap)) != 0) {
3279 err = zfs_error(hdl, EZFS_BADRESTORE, errbuf);
3280 goto out;
3281 }
3282
3283 newfs = B_TRUE;
3284 }
3285
3286 if (flags->verbose) {
3287 (void) printf("%s %s stream of %s into %s\n",
3288 flags->dryrun ? "would receive" : "receiving",
3289 drrb->drr_fromguid ? "incremental" : "full",
3290 drrb->drr_toname, destsnap);
3291 (void) fflush(stdout);
3292 }
3293
3294 if (flags->dryrun) {
3295 err = recv_skip(hdl, infd, flags->byteswap);
3296 goto out;
3297 }
3298
3299 err = ioctl_err = lzc_receive_one(destsnap, props, origin,
3300 flags->force, flags->resumable, infd, drr_noswap, cleanup_fd,
3301 &read_bytes, &errflags, action_handlep, &prop_errors);
3302 ioctl_errno = ioctl_err;
3303 prop_errflags = errflags;
3304
3305 if (err == 0) {
3306 nvpair_t *prop_err = NULL;
3307
3308 while ((prop_err = nvlist_next_nvpair(prop_errors,
3309 prop_err)) != NULL) {
3310 char tbuf[1024];
3311 zfs_prop_t prop;
3312 int intval;
3313
3314 prop = zfs_name_to_prop(nvpair_name(prop_err));
3315 (void) nvpair_value_int32(prop_err, &intval);
3316 if (strcmp(nvpair_name(prop_err),
3317 ZPROP_N_MORE_ERRORS) == 0) {
3318 trunc_prop_errs(intval);
3319 break;
3320 } else if (snapname == NULL || finalsnap == NULL ||
3321 strcmp(finalsnap, snapname) == 0 ||
3322 strcmp(nvpair_name(prop_err),
3323 zfs_prop_to_name(ZFS_PROP_REFQUOTA)) != 0) {
3324 /*
3325 * Skip the special case of, for example,
3326 * "refquota", errors on intermediate
3327 * snapshots leading up to a final one.
3328 * That's why we have all of the checks above.
3329 *
3330 * See zfs_ioctl.c's extract_delay_props() for
3331 * a list of props which can fail on
3332 * intermediate snapshots, but shouldn't
3333 * affect the overall receive.
3334 */
3335 (void) snprintf(tbuf, sizeof (tbuf),
3336 dgettext(TEXT_DOMAIN,
3337 "cannot receive %s property on %s"),
3338 nvpair_name(prop_err), name);
3339 zfs_setprop_error(hdl, prop, intval, tbuf);
3340 }
3341 }
3342 }
3343
3344 if (err == 0 && snapprops_nvlist) {
3345 zfs_cmd_t zc = {"\0"};
3346
3347 (void) strcpy(zc.zc_name, destsnap);
3348 zc.zc_cookie = B_TRUE; /* received */
3349 if (zcmd_write_src_nvlist(hdl, &zc, snapprops_nvlist) == 0) {
3350 (void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc);
3351 zcmd_free_nvlists(&zc);
3352 }
3353 }
3354
3355 if (err && (ioctl_errno == ENOENT || ioctl_errno == EEXIST)) {
3356 /*
3357 * It may be that this snapshot already exists,
3358 * in which case we want to consume & ignore it
3359 * rather than failing.
3360 */
3361 avl_tree_t *local_avl;
3362 nvlist_t *local_nv, *fs;
3363 cp = strchr(destsnap, '@');
3364
3365 /*
3366 * XXX Do this faster by just iterating over snaps in
3367 * this fs. Also if zc_value does not exist, we will
3368 * get a strange "does not exist" error message.
3369 */
3370 *cp = '\0';
3371 if (gather_nvlist(hdl, destsnap, NULL, NULL, B_FALSE,
3372 &local_nv, &local_avl) == 0) {
3373 *cp = '@';
3374 fs = fsavl_find(local_avl, drrb->drr_toguid, NULL);
3375 fsavl_destroy(local_avl);
3376 nvlist_free(local_nv);
3377
3378 if (fs != NULL) {
3379 if (flags->verbose) {
3380 (void) printf("snap %s already exists; "
3381 "ignoring\n", destsnap);
3382 }
3383 err = ioctl_err = recv_skip(hdl, infd,
3384 flags->byteswap);
3385 }
3386 }
3387 *cp = '@';
3388 }
3389
3390 if (ioctl_err != 0) {
3391 switch (ioctl_errno) {
3392 case ENODEV:
3393 cp = strchr(destsnap, '@');
3394 *cp = '\0';
3395 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3396 "most recent snapshot of %s does not\n"
3397 "match incremental source"), destsnap);
3398 (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
3399 *cp = '@';
3400 break;
3401 case ETXTBSY:
3402 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3403 "destination %s has been modified\n"
3404 "since most recent snapshot"), name);
3405 (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
3406 break;
3407 case EEXIST:
3408 cp = strchr(destsnap, '@');
3409 if (newfs) {
3410 /* it's the containing fs that exists */
3411 *cp = '\0';
3412 }
3413 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3414 "destination already exists"));
3415 (void) zfs_error_fmt(hdl, EZFS_EXISTS,
3416 dgettext(TEXT_DOMAIN, "cannot restore to %s"),
3417 destsnap);
3418 *cp = '@';
3419 break;
3420 case EINVAL:
3421 if (flags->resumable)
3422 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3423 "kernel modules must be upgraded to "
3424 "receive this stream."));
3425 (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3426 break;
3427 case ECKSUM:
3428 recv_ecksum_set_aux(hdl, destsnap, flags->resumable);
3429 (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3430 break;
3431 case ENOTSUP:
3432 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3433 "pool must be upgraded to receive this stream."));
3434 (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
3435 break;
3436 case EDQUOT:
3437 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3438 "destination %s space quota exceeded"), name);
3439 (void) zfs_error(hdl, EZFS_NOSPC, errbuf);
3440 break;
3441 default:
3442 (void) zfs_standard_error(hdl, ioctl_errno, errbuf);
3443 }
3444 }
3445
3446 /*
3447 * Mount the target filesystem (if created). Also mount any
3448 * children of the target filesystem if we did a replication
3449 * receive (indicated by stream_avl being non-NULL).
3450 */
3451 cp = strchr(destsnap, '@');
3452 if (cp && (ioctl_err == 0 || !newfs)) {
3453 zfs_handle_t *h;
3454
3455 *cp = '\0';
3456 h = zfs_open(hdl, destsnap,
3457 ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
3458 if (h != NULL) {
3459 if (h->zfs_type == ZFS_TYPE_VOLUME) {
3460 *cp = '@';
3461 } else if (newfs || stream_avl) {
3462 /*
3463 * Track the first/top of hierarchy fs,
3464 * for mounting and sharing later.
3465 */
3466 if (top_zfs && *top_zfs == NULL)
3467 *top_zfs = zfs_strdup(hdl, destsnap);
3468 }
3469 zfs_close(h);
3470 }
3471 *cp = '@';
3472 }
3473
3474 if (clp) {
3475 err |= changelist_postfix(clp);
3476 changelist_free(clp);
3477 }
3478
3479 if (prop_errflags & ZPROP_ERR_NOCLEAR) {
3480 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
3481 "failed to clear unreceived properties on %s"), name);
3482 (void) fprintf(stderr, "\n");
3483 }
3484 if (prop_errflags & ZPROP_ERR_NORESTORE) {
3485 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
3486 "failed to restore original properties on %s"), name);
3487 (void) fprintf(stderr, "\n");
3488 }
3489
3490 if (err || ioctl_err) {
3491 err = -1;
3492 goto out;
3493 }
3494
3495 if (flags->verbose) {
3496 char buf1[64];
3497 char buf2[64];
3498 uint64_t bytes = read_bytes;
3499 time_t delta = time(NULL) - begin_time;
3500 if (delta == 0)
3501 delta = 1;
3502 zfs_nicenum(bytes, buf1, sizeof (buf1));
3503 zfs_nicenum(bytes/delta, buf2, sizeof (buf1));
3504
3505 (void) printf("received %sB stream in %lu seconds (%sB/sec)\n",
3506 buf1, delta, buf2);
3507 }
3508
3509 err = 0;
3510 out:
3511 if (prop_errors != NULL)
3512 nvlist_free(prop_errors);
3513
3514 if (newprops)
3515 nvlist_free(props);
3516
3517 return (err);
3518 }
3519
3520 static int
3521 zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap,
3522 const char *originsnap, recvflags_t *flags, int infd, const char *sendfs,
3523 nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd,
3524 uint64_t *action_handlep, const char *finalsnap)
3525 {
3526 int err;
3527 dmu_replay_record_t drr, drr_noswap;
3528 struct drr_begin *drrb = &drr.drr_u.drr_begin;
3529 char errbuf[1024];
3530 zio_cksum_t zcksum = { { 0 } };
3531 uint64_t featureflags;
3532 int hdrtype;
3533
3534 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3535 "cannot receive"));
3536
3537 if (flags->isprefix &&
3538 !zfs_dataset_exists(hdl, tosnap, ZFS_TYPE_DATASET)) {
3539 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified fs "
3540 "(%s) does not exist"), tosnap);
3541 return (zfs_error(hdl, EZFS_NOENT, errbuf));
3542 }
3543 if (originsnap &&
3544 !zfs_dataset_exists(hdl, originsnap, ZFS_TYPE_DATASET)) {
3545 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified origin fs "
3546 "(%s) does not exist"), originsnap);
3547 return (zfs_error(hdl, EZFS_NOENT, errbuf));
3548 }
3549
3550 /* read in the BEGIN record */
3551 if (0 != (err = recv_read(hdl, infd, &drr, sizeof (drr), B_FALSE,
3552 &zcksum)))
3553 return (err);
3554
3555 if (drr.drr_type == DRR_END || drr.drr_type == BSWAP_32(DRR_END)) {
3556 /* It's the double end record at the end of a package */
3557 return (ENODATA);
3558 }
3559
3560 /* the kernel needs the non-byteswapped begin record */
3561 drr_noswap = drr;
3562
3563 flags->byteswap = B_FALSE;
3564 if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
3565 /*
3566 * We computed the checksum in the wrong byteorder in
3567 * recv_read() above; do it again correctly.
3568 */
3569 bzero(&zcksum, sizeof (zio_cksum_t));
3570 fletcher_4_incremental_byteswap(&drr, sizeof (drr), &zcksum);
3571 flags->byteswap = B_TRUE;
3572
3573 drr.drr_type = BSWAP_32(drr.drr_type);
3574 drr.drr_payloadlen = BSWAP_32(drr.drr_payloadlen);
3575 drrb->drr_magic = BSWAP_64(drrb->drr_magic);
3576 drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo);
3577 drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
3578 drrb->drr_type = BSWAP_32(drrb->drr_type);
3579 drrb->drr_flags = BSWAP_32(drrb->drr_flags);
3580 drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
3581 drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
3582 }
3583
3584 if (drrb->drr_magic != DMU_BACKUP_MAGIC || drr.drr_type != DRR_BEGIN) {
3585 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
3586 "stream (bad magic number)"));
3587 return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3588 }
3589
3590 featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
3591 hdrtype = DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo);
3592
3593 if (!DMU_STREAM_SUPPORTED(featureflags) ||
3594 (hdrtype != DMU_SUBSTREAM && hdrtype != DMU_COMPOUNDSTREAM)) {
3595 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3596 "stream has unsupported feature, feature flags = %lx"),
3597 featureflags);
3598 return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3599 }
3600
3601 if (strchr(drrb->drr_toname, '@') == NULL) {
3602 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
3603 "stream (bad snapshot name)"));
3604 return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3605 }
3606
3607 if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == DMU_SUBSTREAM) {
3608 char nonpackage_sendfs[ZFS_MAX_DATASET_NAME_LEN];
3609 if (sendfs == NULL) {
3610 /*
3611 * We were not called from zfs_receive_package(). Get
3612 * the fs specified by 'zfs send'.
3613 */
3614 char *cp;
3615 (void) strlcpy(nonpackage_sendfs,
3616 drr.drr_u.drr_begin.drr_toname,
3617 sizeof (nonpackage_sendfs));
3618 if ((cp = strchr(nonpackage_sendfs, '@')) != NULL)
3619 *cp = '\0';
3620 sendfs = nonpackage_sendfs;
3621 VERIFY(finalsnap == NULL);
3622 }
3623 return (zfs_receive_one(hdl, infd, tosnap, originsnap, flags,
3624 &drr, &drr_noswap, sendfs, stream_nv, stream_avl, top_zfs,
3625 cleanup_fd, action_handlep, finalsnap));
3626 } else {
3627 assert(DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
3628 DMU_COMPOUNDSTREAM);
3629 return (zfs_receive_package(hdl, infd, tosnap, flags, &drr,
3630 &zcksum, top_zfs, cleanup_fd, action_handlep));
3631 }
3632 }
3633
3634 /*
3635 * Restores a backup of tosnap from the file descriptor specified by infd.
3636 * Return 0 on total success, -2 if some things couldn't be
3637 * destroyed/renamed/promoted, -1 if some things couldn't be received.
3638 * (-1 will override -2, if -1 and the resumable flag was specified the
3639 * transfer can be resumed if the sending side supports it).
3640 */
3641 int
3642 zfs_receive(libzfs_handle_t *hdl, const char *tosnap, nvlist_t *props,
3643 recvflags_t *flags, int infd, avl_tree_t *stream_avl)
3644 {
3645 char *top_zfs = NULL;
3646 int err;
3647 int cleanup_fd;
3648 uint64_t action_handle = 0;
3649 struct stat sb;
3650 char *originsnap = NULL;
3651
3652 /*
3653 * The only way fstat can fail is if we do not have a valid file
3654 * descriptor.
3655 */
3656 if (fstat(infd, &sb) == -1) {
3657 perror("fstat");
3658 return (-2);
3659 }
3660
3661 #ifdef __linux__
3662 #ifndef F_SETPIPE_SZ
3663 #define F_SETPIPE_SZ (F_SETLEASE + 7)
3664 #endif /* F_SETPIPE_SZ */
3665
3666 #ifndef F_GETPIPE_SZ
3667 #define F_GETPIPE_SZ (F_GETLEASE + 7)
3668 #endif /* F_GETPIPE_SZ */
3669
3670 /*
3671 * It is not uncommon for gigabytes to be processed in zfs receive.
3672 * Speculatively increase the buffer size via Linux-specific fcntl()
3673 * call.
3674 */
3675 if (S_ISFIFO(sb.st_mode)) {
3676 FILE *procf = fopen("/proc/sys/fs/pipe-max-size", "r");
3677
3678 if (procf != NULL) {
3679 unsigned long max_psize;
3680 long cur_psize;
3681 if (fscanf(procf, "%lu", &max_psize) > 0) {
3682 cur_psize = fcntl(infd, F_GETPIPE_SZ);
3683 if (cur_psize > 0 &&
3684 max_psize > (unsigned long) cur_psize)
3685 (void) fcntl(infd, F_SETPIPE_SZ,
3686 max_psize);
3687 }
3688 fclose(procf);
3689 }
3690 }
3691 #endif /* __linux__ */
3692
3693 if (props) {
3694 err = nvlist_lookup_string(props, "origin", &originsnap);
3695 if (err && err != ENOENT)
3696 return (err);
3697 }
3698
3699 cleanup_fd = open(ZFS_DEV, O_RDWR);
3700 VERIFY(cleanup_fd >= 0);
3701
3702 err = zfs_receive_impl(hdl, tosnap, originsnap, flags, infd, NULL, NULL,
3703 stream_avl, &top_zfs, cleanup_fd, &action_handle, NULL);
3704
3705 VERIFY(0 == close(cleanup_fd));
3706
3707 if (err == 0 && !flags->nomount && top_zfs) {
3708 zfs_handle_t *zhp = NULL;
3709 prop_changelist_t *clp = NULL;
3710
3711 zhp = zfs_open(hdl, top_zfs, ZFS_TYPE_FILESYSTEM);
3712 if (zhp != NULL) {
3713 clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT,
3714 CL_GATHER_MOUNT_ALWAYS, 0);
3715 zfs_close(zhp);
3716 if (clp != NULL) {
3717 /* mount and share received datasets */
3718 err = changelist_postfix(clp);
3719 changelist_free(clp);
3720 }
3721 }
3722 if (zhp == NULL || clp == NULL || err)
3723 err = -1;
3724 }
3725 if (top_zfs)
3726 free(top_zfs);
3727
3728 return (err);
3729 }