]> git.proxmox.com Git - mirror_zfs.git/blob - lib/libzfs/libzfs_sendrecv.c
OpenZFS 7247 - zfs receive of deduplicated stream fails
[mirror_zfs.git] / lib / libzfs / libzfs_sendrecv.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
25 * Copyright (c) 2012, Joyent, Inc. All rights reserved.
26 * Copyright (c) 2012 Pawel Jakub Dawidek <pawel@dawidek.net>.
27 * All rights reserved
28 * Copyright (c) 2013 Steven Hartland. All rights reserved.
29 * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
30 */
31
32 #include <assert.h>
33 #include <ctype.h>
34 #include <errno.h>
35 #include <libintl.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <strings.h>
39 #include <unistd.h>
40 #include <stddef.h>
41 #include <fcntl.h>
42 #include <sys/mount.h>
43 #include <sys/mntent.h>
44 #include <sys/mnttab.h>
45 #include <sys/avl.h>
46 #include <sys/debug.h>
47 #include <sys/stat.h>
48 #include <stddef.h>
49 #include <pthread.h>
50 #include <umem.h>
51 #include <time.h>
52
53 #include <libzfs.h>
54 #include <libzfs_core.h>
55
56 #include "zfs_namecheck.h"
57 #include "zfs_prop.h"
58 #include "zfs_fletcher.h"
59 #include "libzfs_impl.h"
60 #include <zlib.h>
61 #include <sys/zio_checksum.h>
62 #include <sys/ddt.h>
63 #include <sys/socket.h>
64 #include <sys/sha2.h>
65
66 /* in libzfs_dataset.c */
67 extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *);
68
69 static int zfs_receive_impl(libzfs_handle_t *, const char *, const char *,
70 recvflags_t *, int, const char *, nvlist_t *, avl_tree_t *, char **, int,
71 uint64_t *, const char *);
72 static int guid_to_name(libzfs_handle_t *, const char *,
73 uint64_t, boolean_t, char *);
74
75 static const zio_cksum_t zero_cksum = { { 0 } };
76
77 typedef struct dedup_arg {
78 int inputfd;
79 int outputfd;
80 libzfs_handle_t *dedup_hdl;
81 } dedup_arg_t;
82
83 typedef struct progress_arg {
84 zfs_handle_t *pa_zhp;
85 int pa_fd;
86 boolean_t pa_parsable;
87 } progress_arg_t;
88
89 typedef struct dataref {
90 uint64_t ref_guid;
91 uint64_t ref_object;
92 uint64_t ref_offset;
93 } dataref_t;
94
95 typedef struct dedup_entry {
96 struct dedup_entry *dde_next;
97 zio_cksum_t dde_chksum;
98 uint64_t dde_prop;
99 dataref_t dde_ref;
100 } dedup_entry_t;
101
102 #define MAX_DDT_PHYSMEM_PERCENT 20
103 #define SMALLEST_POSSIBLE_MAX_DDT_MB 128
104
105 typedef struct dedup_table {
106 dedup_entry_t **dedup_hash_array;
107 umem_cache_t *ddecache;
108 uint64_t max_ddt_size; /* max dedup table size in bytes */
109 uint64_t cur_ddt_size; /* current dedup table size in bytes */
110 uint64_t ddt_count;
111 int numhashbits;
112 boolean_t ddt_full;
113 } dedup_table_t;
114
115 static int
116 high_order_bit(uint64_t n)
117 {
118 int count;
119
120 for (count = 0; n != 0; count++)
121 n >>= 1;
122 return (count);
123 }
124
125 static size_t
126 ssread(void *buf, size_t len, FILE *stream)
127 {
128 size_t outlen;
129
130 if ((outlen = fread(buf, len, 1, stream)) == 0)
131 return (0);
132
133 return (outlen);
134 }
135
136 static void
137 ddt_hash_append(libzfs_handle_t *hdl, dedup_table_t *ddt, dedup_entry_t **ddepp,
138 zio_cksum_t *cs, uint64_t prop, dataref_t *dr)
139 {
140 dedup_entry_t *dde;
141
142 if (ddt->cur_ddt_size >= ddt->max_ddt_size) {
143 if (ddt->ddt_full == B_FALSE) {
144 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
145 "Dedup table full. Deduplication will continue "
146 "with existing table entries"));
147 ddt->ddt_full = B_TRUE;
148 }
149 return;
150 }
151
152 if ((dde = umem_cache_alloc(ddt->ddecache, UMEM_DEFAULT))
153 != NULL) {
154 assert(*ddepp == NULL);
155 dde->dde_next = NULL;
156 dde->dde_chksum = *cs;
157 dde->dde_prop = prop;
158 dde->dde_ref = *dr;
159 *ddepp = dde;
160 ddt->cur_ddt_size += sizeof (dedup_entry_t);
161 ddt->ddt_count++;
162 }
163 }
164
165 /*
166 * Using the specified dedup table, do a lookup for an entry with
167 * the checksum cs. If found, return the block's reference info
168 * in *dr. Otherwise, insert a new entry in the dedup table, using
169 * the reference information specified by *dr.
170 *
171 * return value: true - entry was found
172 * false - entry was not found
173 */
174 static boolean_t
175 ddt_update(libzfs_handle_t *hdl, dedup_table_t *ddt, zio_cksum_t *cs,
176 uint64_t prop, dataref_t *dr)
177 {
178 uint32_t hashcode;
179 dedup_entry_t **ddepp;
180
181 hashcode = BF64_GET(cs->zc_word[0], 0, ddt->numhashbits);
182
183 for (ddepp = &(ddt->dedup_hash_array[hashcode]); *ddepp != NULL;
184 ddepp = &((*ddepp)->dde_next)) {
185 if (ZIO_CHECKSUM_EQUAL(((*ddepp)->dde_chksum), *cs) &&
186 (*ddepp)->dde_prop == prop) {
187 *dr = (*ddepp)->dde_ref;
188 return (B_TRUE);
189 }
190 }
191 ddt_hash_append(hdl, ddt, ddepp, cs, prop, dr);
192 return (B_FALSE);
193 }
194
195 static int
196 dump_record(dmu_replay_record_t *drr, void *payload, int payload_len,
197 zio_cksum_t *zc, int outfd)
198 {
199 ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
200 ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
201 fletcher_4_incremental_native(drr,
202 offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), zc);
203 if (drr->drr_type != DRR_BEGIN) {
204 ASSERT(ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.
205 drr_checksum.drr_checksum));
206 drr->drr_u.drr_checksum.drr_checksum = *zc;
207 }
208 fletcher_4_incremental_native(&drr->drr_u.drr_checksum.drr_checksum,
209 sizeof (zio_cksum_t), zc);
210 if (write(outfd, drr, sizeof (*drr)) == -1)
211 return (errno);
212 if (payload_len != 0) {
213 fletcher_4_incremental_native(payload, payload_len, zc);
214 if (write(outfd, payload, payload_len) == -1)
215 return (errno);
216 }
217 return (0);
218 }
219
220 /*
221 * This function is started in a separate thread when the dedup option
222 * has been requested. The main send thread determines the list of
223 * snapshots to be included in the send stream and makes the ioctl calls
224 * for each one. But instead of having the ioctl send the output to the
225 * the output fd specified by the caller of zfs_send()), the
226 * ioctl is told to direct the output to a pipe, which is read by the
227 * alternate thread running THIS function. This function does the
228 * dedup'ing by:
229 * 1. building a dedup table (the DDT)
230 * 2. doing checksums on each data block and inserting a record in the DDT
231 * 3. looking for matching checksums, and
232 * 4. sending a DRR_WRITE_BYREF record instead of a write record whenever
233 * a duplicate block is found.
234 * The output of this function then goes to the output fd requested
235 * by the caller of zfs_send().
236 */
237 static void *
238 cksummer(void *arg)
239 {
240 dedup_arg_t *dda = arg;
241 char *buf = zfs_alloc(dda->dedup_hdl, SPA_MAXBLOCKSIZE);
242 dmu_replay_record_t thedrr = { 0 };
243 dmu_replay_record_t *drr = &thedrr;
244 FILE *ofp;
245 int outfd;
246 dedup_table_t ddt;
247 zio_cksum_t stream_cksum;
248 uint64_t numbuckets;
249
250 #ifdef _ILP32
251 ddt.max_ddt_size = SMALLEST_POSSIBLE_MAX_DDT_MB << 20;
252 #else
253 uint64_t physmem = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE);
254 ddt.max_ddt_size =
255 MAX((physmem * MAX_DDT_PHYSMEM_PERCENT) / 100,
256 SMALLEST_POSSIBLE_MAX_DDT_MB << 20);
257 #endif
258
259 numbuckets = ddt.max_ddt_size / (sizeof (dedup_entry_t));
260
261 /*
262 * numbuckets must be a power of 2. Increase number to
263 * a power of 2 if necessary.
264 */
265 if (!ISP2(numbuckets))
266 numbuckets = 1ULL << high_order_bit(numbuckets);
267
268 ddt.dedup_hash_array = calloc(numbuckets, sizeof (dedup_entry_t *));
269 ddt.ddecache = umem_cache_create("dde", sizeof (dedup_entry_t), 0,
270 NULL, NULL, NULL, NULL, NULL, 0);
271 ddt.cur_ddt_size = numbuckets * sizeof (dedup_entry_t *);
272 ddt.numhashbits = high_order_bit(numbuckets) - 1;
273 ddt.ddt_full = B_FALSE;
274
275 outfd = dda->outputfd;
276 ofp = fdopen(dda->inputfd, "r");
277 while (ssread(drr, sizeof (*drr), ofp) != 0) {
278
279 /*
280 * kernel filled in checksum, we are going to write same
281 * record, but need to regenerate checksum.
282 */
283 if (drr->drr_type != DRR_BEGIN) {
284 bzero(&drr->drr_u.drr_checksum.drr_checksum,
285 sizeof (drr->drr_u.drr_checksum.drr_checksum));
286 }
287
288 switch (drr->drr_type) {
289 case DRR_BEGIN:
290 {
291 struct drr_begin *drrb = &drr->drr_u.drr_begin;
292 int fflags;
293 int sz = 0;
294 ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
295
296 ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC);
297
298 /* set the DEDUP feature flag for this stream */
299 fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
300 fflags |= (DMU_BACKUP_FEATURE_DEDUP |
301 DMU_BACKUP_FEATURE_DEDUPPROPS);
302 DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags);
303
304 if (drr->drr_payloadlen != 0) {
305 sz = drr->drr_payloadlen;
306
307 if (sz > SPA_MAXBLOCKSIZE) {
308 buf = zfs_realloc(dda->dedup_hdl, buf,
309 SPA_MAXBLOCKSIZE, sz);
310 }
311 (void) ssread(buf, sz, ofp);
312 if (ferror(stdin))
313 perror("fread");
314 }
315 if (dump_record(drr, buf, sz, &stream_cksum,
316 outfd) != 0)
317 goto out;
318 break;
319 }
320
321 case DRR_END:
322 {
323 struct drr_end *drre = &drr->drr_u.drr_end;
324 /* use the recalculated checksum */
325 drre->drr_checksum = stream_cksum;
326 if (dump_record(drr, NULL, 0, &stream_cksum,
327 outfd) != 0)
328 goto out;
329 break;
330 }
331
332 case DRR_OBJECT:
333 {
334 struct drr_object *drro = &drr->drr_u.drr_object;
335 if (drro->drr_bonuslen > 0) {
336 (void) ssread(buf,
337 P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
338 ofp);
339 }
340 if (dump_record(drr, buf,
341 P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
342 &stream_cksum, outfd) != 0)
343 goto out;
344 break;
345 }
346
347 case DRR_SPILL:
348 {
349 struct drr_spill *drrs = &drr->drr_u.drr_spill;
350 (void) ssread(buf, drrs->drr_length, ofp);
351 if (dump_record(drr, buf, drrs->drr_length,
352 &stream_cksum, outfd) != 0)
353 goto out;
354 break;
355 }
356
357 case DRR_FREEOBJECTS:
358 {
359 if (dump_record(drr, NULL, 0, &stream_cksum,
360 outfd) != 0)
361 goto out;
362 break;
363 }
364
365 case DRR_WRITE:
366 {
367 struct drr_write *drrw = &drr->drr_u.drr_write;
368 dataref_t dataref;
369 uint64_t payload_size;
370
371 payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw);
372 (void) ssread(buf, payload_size, ofp);
373
374 /*
375 * Use the existing checksum if it's dedup-capable,
376 * else calculate a SHA256 checksum for it.
377 */
378
379 if (ZIO_CHECKSUM_EQUAL(drrw->drr_key.ddk_cksum,
380 zero_cksum) ||
381 !DRR_IS_DEDUP_CAPABLE(drrw->drr_checksumflags)) {
382 SHA2_CTX ctx;
383 zio_cksum_t tmpsha256;
384
385 SHA2Init(SHA256, &ctx);
386 SHA2Update(&ctx, buf, payload_size);
387 SHA2Final(&tmpsha256, &ctx);
388
389 drrw->drr_key.ddk_cksum.zc_word[0] =
390 BE_64(tmpsha256.zc_word[0]);
391 drrw->drr_key.ddk_cksum.zc_word[1] =
392 BE_64(tmpsha256.zc_word[1]);
393 drrw->drr_key.ddk_cksum.zc_word[2] =
394 BE_64(tmpsha256.zc_word[2]);
395 drrw->drr_key.ddk_cksum.zc_word[3] =
396 BE_64(tmpsha256.zc_word[3]);
397 drrw->drr_checksumtype = ZIO_CHECKSUM_SHA256;
398 drrw->drr_checksumflags = DRR_CHECKSUM_DEDUP;
399 }
400
401 dataref.ref_guid = drrw->drr_toguid;
402 dataref.ref_object = drrw->drr_object;
403 dataref.ref_offset = drrw->drr_offset;
404
405 if (ddt_update(dda->dedup_hdl, &ddt,
406 &drrw->drr_key.ddk_cksum, drrw->drr_key.ddk_prop,
407 &dataref)) {
408 dmu_replay_record_t wbr_drr = {0};
409 struct drr_write_byref *wbr_drrr =
410 &wbr_drr.drr_u.drr_write_byref;
411
412 /* block already present in stream */
413 wbr_drr.drr_type = DRR_WRITE_BYREF;
414
415 wbr_drrr->drr_object = drrw->drr_object;
416 wbr_drrr->drr_offset = drrw->drr_offset;
417 wbr_drrr->drr_length = drrw->drr_logical_size;
418 wbr_drrr->drr_toguid = drrw->drr_toguid;
419 wbr_drrr->drr_refguid = dataref.ref_guid;
420 wbr_drrr->drr_refobject =
421 dataref.ref_object;
422 wbr_drrr->drr_refoffset =
423 dataref.ref_offset;
424
425 wbr_drrr->drr_checksumtype =
426 drrw->drr_checksumtype;
427 wbr_drrr->drr_checksumflags =
428 drrw->drr_checksumtype;
429 wbr_drrr->drr_key.ddk_cksum =
430 drrw->drr_key.ddk_cksum;
431 wbr_drrr->drr_key.ddk_prop =
432 drrw->drr_key.ddk_prop;
433
434 if (dump_record(&wbr_drr, NULL, 0,
435 &stream_cksum, outfd) != 0)
436 goto out;
437 } else {
438 /* block not previously seen */
439 if (dump_record(drr, buf, payload_size,
440 &stream_cksum, outfd) != 0)
441 goto out;
442 }
443 break;
444 }
445
446 case DRR_WRITE_EMBEDDED:
447 {
448 struct drr_write_embedded *drrwe =
449 &drr->drr_u.drr_write_embedded;
450 (void) ssread(buf,
451 P2ROUNDUP((uint64_t)drrwe->drr_psize, 8), ofp);
452 if (dump_record(drr, buf,
453 P2ROUNDUP((uint64_t)drrwe->drr_psize, 8),
454 &stream_cksum, outfd) != 0)
455 goto out;
456 break;
457 }
458
459 case DRR_FREE:
460 {
461 if (dump_record(drr, NULL, 0, &stream_cksum,
462 outfd) != 0)
463 goto out;
464 break;
465 }
466
467 default:
468 (void) fprintf(stderr, "INVALID record type 0x%x\n",
469 drr->drr_type);
470 /* should never happen, so assert */
471 assert(B_FALSE);
472 }
473 }
474 out:
475 umem_cache_destroy(ddt.ddecache);
476 free(ddt.dedup_hash_array);
477 free(buf);
478 (void) fclose(ofp);
479
480 return (NULL);
481 }
482
483 /*
484 * Routines for dealing with the AVL tree of fs-nvlists
485 */
486 typedef struct fsavl_node {
487 avl_node_t fn_node;
488 nvlist_t *fn_nvfs;
489 char *fn_snapname;
490 uint64_t fn_guid;
491 } fsavl_node_t;
492
493 static int
494 fsavl_compare(const void *arg1, const void *arg2)
495 {
496 const fsavl_node_t *fn1 = (const fsavl_node_t *)arg1;
497 const fsavl_node_t *fn2 = (const fsavl_node_t *)arg2;
498
499 return (AVL_CMP(fn1->fn_guid, fn2->fn_guid));
500 }
501
502 /*
503 * Given the GUID of a snapshot, find its containing filesystem and
504 * (optionally) name.
505 */
506 static nvlist_t *
507 fsavl_find(avl_tree_t *avl, uint64_t snapguid, char **snapname)
508 {
509 fsavl_node_t fn_find;
510 fsavl_node_t *fn;
511
512 fn_find.fn_guid = snapguid;
513
514 fn = avl_find(avl, &fn_find, NULL);
515 if (fn) {
516 if (snapname)
517 *snapname = fn->fn_snapname;
518 return (fn->fn_nvfs);
519 }
520 return (NULL);
521 }
522
523 static void
524 fsavl_destroy(avl_tree_t *avl)
525 {
526 fsavl_node_t *fn;
527 void *cookie;
528
529 if (avl == NULL)
530 return;
531
532 cookie = NULL;
533 while ((fn = avl_destroy_nodes(avl, &cookie)) != NULL)
534 free(fn);
535 avl_destroy(avl);
536 free(avl);
537 }
538
539 /*
540 * Given an nvlist, produce an avl tree of snapshots, ordered by guid
541 */
542 static avl_tree_t *
543 fsavl_create(nvlist_t *fss)
544 {
545 avl_tree_t *fsavl;
546 nvpair_t *fselem = NULL;
547
548 if ((fsavl = malloc(sizeof (avl_tree_t))) == NULL)
549 return (NULL);
550
551 avl_create(fsavl, fsavl_compare, sizeof (fsavl_node_t),
552 offsetof(fsavl_node_t, fn_node));
553
554 while ((fselem = nvlist_next_nvpair(fss, fselem)) != NULL) {
555 nvlist_t *nvfs, *snaps;
556 nvpair_t *snapelem = NULL;
557
558 VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
559 VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
560
561 while ((snapelem =
562 nvlist_next_nvpair(snaps, snapelem)) != NULL) {
563 fsavl_node_t *fn;
564 uint64_t guid;
565
566 VERIFY(0 == nvpair_value_uint64(snapelem, &guid));
567 if ((fn = malloc(sizeof (fsavl_node_t))) == NULL) {
568 fsavl_destroy(fsavl);
569 return (NULL);
570 }
571 fn->fn_nvfs = nvfs;
572 fn->fn_snapname = nvpair_name(snapelem);
573 fn->fn_guid = guid;
574
575 /*
576 * Note: if there are multiple snaps with the
577 * same GUID, we ignore all but one.
578 */
579 if (avl_find(fsavl, fn, NULL) == NULL)
580 avl_add(fsavl, fn);
581 else
582 free(fn);
583 }
584 }
585
586 return (fsavl);
587 }
588
589 /*
590 * Routines for dealing with the giant nvlist of fs-nvlists, etc.
591 */
592 typedef struct send_data {
593 /*
594 * assigned inside every recursive call,
595 * restored from *_save on return:
596 *
597 * guid of fromsnap snapshot in parent dataset
598 * txg of fromsnap snapshot in current dataset
599 * txg of tosnap snapshot in current dataset
600 */
601
602 uint64_t parent_fromsnap_guid;
603 uint64_t fromsnap_txg;
604 uint64_t tosnap_txg;
605
606 /* the nvlists get accumulated during depth-first traversal */
607 nvlist_t *parent_snaps;
608 nvlist_t *fss;
609 nvlist_t *snapprops;
610
611 /* send-receive configuration, does not change during traversal */
612 const char *fsname;
613 const char *fromsnap;
614 const char *tosnap;
615 boolean_t recursive;
616 boolean_t verbose;
617 boolean_t seenfrom;
618 boolean_t seento;
619
620 /*
621 * The header nvlist is of the following format:
622 * {
623 * "tosnap" -> string
624 * "fromsnap" -> string (if incremental)
625 * "fss" -> {
626 * id -> {
627 *
628 * "name" -> string (full name; for debugging)
629 * "parentfromsnap" -> number (guid of fromsnap in parent)
630 *
631 * "props" -> { name -> value (only if set here) }
632 * "snaps" -> { name (lastname) -> number (guid) }
633 * "snapprops" -> { name (lastname) -> { name -> value } }
634 *
635 * "origin" -> number (guid) (if clone)
636 * "sent" -> boolean (not on-disk)
637 * }
638 * }
639 * }
640 *
641 */
642 } send_data_t;
643
644 static void send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv);
645
646 static int
647 send_iterate_snap(zfs_handle_t *zhp, void *arg)
648 {
649 send_data_t *sd = arg;
650 uint64_t guid = zhp->zfs_dmustats.dds_guid;
651 uint64_t txg = zhp->zfs_dmustats.dds_creation_txg;
652 char *snapname;
653 nvlist_t *nv;
654 boolean_t isfromsnap, istosnap, istosnapwithnofrom;
655
656 snapname = strrchr(zhp->zfs_name, '@')+1;
657 isfromsnap = (sd->fromsnap != NULL &&
658 strcmp(sd->fromsnap, snapname) == 0);
659 istosnap = (sd->tosnap != NULL && (strcmp(sd->tosnap, snapname) == 0));
660 istosnapwithnofrom = (istosnap && sd->fromsnap == NULL);
661
662 if (sd->tosnap_txg != 0 && txg > sd->tosnap_txg) {
663 if (sd->verbose) {
664 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
665 "skipping snapshot %s because it was created "
666 "after the destination snapshot (%s)\n"),
667 zhp->zfs_name, sd->tosnap);
668 }
669 zfs_close(zhp);
670 return (0);
671 }
672
673 VERIFY(0 == nvlist_add_uint64(sd->parent_snaps, snapname, guid));
674 /*
675 * NB: if there is no fromsnap here (it's a newly created fs in
676 * an incremental replication), we will substitute the tosnap.
677 */
678 if (isfromsnap || (sd->parent_fromsnap_guid == 0 && istosnap)) {
679 sd->parent_fromsnap_guid = guid;
680 }
681
682 if (!sd->recursive) {
683 if (!sd->seenfrom && isfromsnap) {
684 sd->seenfrom = B_TRUE;
685 zfs_close(zhp);
686 return (0);
687 }
688
689 if ((sd->seento || !sd->seenfrom) && !istosnapwithnofrom) {
690 zfs_close(zhp);
691 return (0);
692 }
693
694 if (istosnap)
695 sd->seento = B_TRUE;
696 }
697
698 VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
699 send_iterate_prop(zhp, nv);
700 VERIFY(0 == nvlist_add_nvlist(sd->snapprops, snapname, nv));
701 nvlist_free(nv);
702
703 zfs_close(zhp);
704 return (0);
705 }
706
707 static void
708 send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv)
709 {
710 nvpair_t *elem = NULL;
711
712 while ((elem = nvlist_next_nvpair(zhp->zfs_props, elem)) != NULL) {
713 char *propname = nvpair_name(elem);
714 zfs_prop_t prop = zfs_name_to_prop(propname);
715 nvlist_t *propnv;
716
717 if (!zfs_prop_user(propname)) {
718 /*
719 * Realistically, this should never happen. However,
720 * we want the ability to add DSL properties without
721 * needing to make incompatible version changes. We
722 * need to ignore unknown properties to allow older
723 * software to still send datasets containing these
724 * properties, with the unknown properties elided.
725 */
726 if (prop == ZPROP_INVAL)
727 continue;
728
729 if (zfs_prop_readonly(prop))
730 continue;
731 }
732
733 verify(nvpair_value_nvlist(elem, &propnv) == 0);
734 if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION ||
735 prop == ZFS_PROP_REFQUOTA ||
736 prop == ZFS_PROP_REFRESERVATION) {
737 char *source;
738 uint64_t value;
739 verify(nvlist_lookup_uint64(propnv,
740 ZPROP_VALUE, &value) == 0);
741 if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT)
742 continue;
743 /*
744 * May have no source before SPA_VERSION_RECVD_PROPS,
745 * but is still modifiable.
746 */
747 if (nvlist_lookup_string(propnv,
748 ZPROP_SOURCE, &source) == 0) {
749 if ((strcmp(source, zhp->zfs_name) != 0) &&
750 (strcmp(source,
751 ZPROP_SOURCE_VAL_RECVD) != 0))
752 continue;
753 }
754 } else {
755 char *source;
756 if (nvlist_lookup_string(propnv,
757 ZPROP_SOURCE, &source) != 0)
758 continue;
759 if ((strcmp(source, zhp->zfs_name) != 0) &&
760 (strcmp(source, ZPROP_SOURCE_VAL_RECVD) != 0))
761 continue;
762 }
763
764 if (zfs_prop_user(propname) ||
765 zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
766 char *value;
767 verify(nvlist_lookup_string(propnv,
768 ZPROP_VALUE, &value) == 0);
769 VERIFY(0 == nvlist_add_string(nv, propname, value));
770 } else {
771 uint64_t value;
772 verify(nvlist_lookup_uint64(propnv,
773 ZPROP_VALUE, &value) == 0);
774 VERIFY(0 == nvlist_add_uint64(nv, propname, value));
775 }
776 }
777 }
778
779 /*
780 * returns snapshot creation txg
781 * and returns 0 if the snapshot does not exist
782 */
783 static uint64_t
784 get_snap_txg(libzfs_handle_t *hdl, const char *fs, const char *snap)
785 {
786 char name[ZFS_MAX_DATASET_NAME_LEN];
787 uint64_t txg = 0;
788
789 if (fs == NULL || fs[0] == '\0' || snap == NULL || snap[0] == '\0')
790 return (txg);
791
792 (void) snprintf(name, sizeof (name), "%s@%s", fs, snap);
793 if (zfs_dataset_exists(hdl, name, ZFS_TYPE_SNAPSHOT)) {
794 zfs_handle_t *zhp = zfs_open(hdl, name, ZFS_TYPE_SNAPSHOT);
795 if (zhp != NULL) {
796 txg = zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG);
797 zfs_close(zhp);
798 }
799 }
800
801 return (txg);
802 }
803
804 /*
805 * recursively generate nvlists describing datasets. See comment
806 * for the data structure send_data_t above for description of contents
807 * of the nvlist.
808 */
809 static int
810 send_iterate_fs(zfs_handle_t *zhp, void *arg)
811 {
812 send_data_t *sd = arg;
813 nvlist_t *nvfs, *nv;
814 int rv = 0;
815 uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid;
816 uint64_t fromsnap_txg_save = sd->fromsnap_txg;
817 uint64_t tosnap_txg_save = sd->tosnap_txg;
818 uint64_t txg = zhp->zfs_dmustats.dds_creation_txg;
819 uint64_t guid = zhp->zfs_dmustats.dds_guid;
820 uint64_t fromsnap_txg, tosnap_txg;
821 char guidstring[64];
822
823 fromsnap_txg = get_snap_txg(zhp->zfs_hdl, zhp->zfs_name, sd->fromsnap);
824 if (fromsnap_txg != 0)
825 sd->fromsnap_txg = fromsnap_txg;
826
827 tosnap_txg = get_snap_txg(zhp->zfs_hdl, zhp->zfs_name, sd->tosnap);
828 if (tosnap_txg != 0)
829 sd->tosnap_txg = tosnap_txg;
830
831 /*
832 * on the send side, if the current dataset does not have tosnap,
833 * perform two additional checks:
834 *
835 * - skip sending the current dataset if it was created later than
836 * the parent tosnap
837 * - return error if the current dataset was created earlier than
838 * the parent tosnap
839 */
840 if (sd->tosnap != NULL && tosnap_txg == 0) {
841 if (sd->tosnap_txg != 0 && txg > sd->tosnap_txg) {
842 if (sd->verbose) {
843 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
844 "skipping dataset %s: snapshot %s does "
845 "not exist\n"), zhp->zfs_name, sd->tosnap);
846 }
847 } else {
848 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
849 "cannot send %s@%s%s: snapshot %s@%s does not "
850 "exist\n"), sd->fsname, sd->tosnap, sd->recursive ?
851 dgettext(TEXT_DOMAIN, " recursively") : "",
852 zhp->zfs_name, sd->tosnap);
853 rv = -1;
854 }
855 goto out;
856 }
857
858 VERIFY(0 == nvlist_alloc(&nvfs, NV_UNIQUE_NAME, 0));
859 VERIFY(0 == nvlist_add_string(nvfs, "name", zhp->zfs_name));
860 VERIFY(0 == nvlist_add_uint64(nvfs, "parentfromsnap",
861 sd->parent_fromsnap_guid));
862
863 if (zhp->zfs_dmustats.dds_origin[0]) {
864 zfs_handle_t *origin = zfs_open(zhp->zfs_hdl,
865 zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT);
866 if (origin == NULL) {
867 rv = -1;
868 goto out;
869 }
870 VERIFY(0 == nvlist_add_uint64(nvfs, "origin",
871 origin->zfs_dmustats.dds_guid));
872 }
873
874 /* iterate over props */
875 VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
876 send_iterate_prop(zhp, nv);
877 VERIFY(0 == nvlist_add_nvlist(nvfs, "props", nv));
878 nvlist_free(nv);
879
880 /* iterate over snaps, and set sd->parent_fromsnap_guid */
881 sd->parent_fromsnap_guid = 0;
882 VERIFY(0 == nvlist_alloc(&sd->parent_snaps, NV_UNIQUE_NAME, 0));
883 VERIFY(0 == nvlist_alloc(&sd->snapprops, NV_UNIQUE_NAME, 0));
884 (void) zfs_iter_snapshots_sorted(zhp, send_iterate_snap, sd);
885 VERIFY(0 == nvlist_add_nvlist(nvfs, "snaps", sd->parent_snaps));
886 VERIFY(0 == nvlist_add_nvlist(nvfs, "snapprops", sd->snapprops));
887 nvlist_free(sd->parent_snaps);
888 nvlist_free(sd->snapprops);
889
890 /* add this fs to nvlist */
891 (void) snprintf(guidstring, sizeof (guidstring),
892 "0x%llx", (longlong_t)guid);
893 VERIFY(0 == nvlist_add_nvlist(sd->fss, guidstring, nvfs));
894 nvlist_free(nvfs);
895
896 /* iterate over children */
897 if (sd->recursive)
898 rv = zfs_iter_filesystems(zhp, send_iterate_fs, sd);
899
900 out:
901 sd->parent_fromsnap_guid = parent_fromsnap_guid_save;
902 sd->fromsnap_txg = fromsnap_txg_save;
903 sd->tosnap_txg = tosnap_txg_save;
904
905 zfs_close(zhp);
906 return (rv);
907 }
908
909 static int
910 gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap,
911 const char *tosnap, boolean_t recursive, boolean_t verbose,
912 nvlist_t **nvlp, avl_tree_t **avlp)
913 {
914 zfs_handle_t *zhp;
915 send_data_t sd = { 0 };
916 int error;
917
918 zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
919 if (zhp == NULL)
920 return (EZFS_BADTYPE);
921
922 VERIFY(0 == nvlist_alloc(&sd.fss, NV_UNIQUE_NAME, 0));
923 sd.fsname = fsname;
924 sd.fromsnap = fromsnap;
925 sd.tosnap = tosnap;
926 sd.recursive = recursive;
927 sd.verbose = verbose;
928
929 if ((error = send_iterate_fs(zhp, &sd)) != 0) {
930 nvlist_free(sd.fss);
931 if (avlp != NULL)
932 *avlp = NULL;
933 *nvlp = NULL;
934 return (error);
935 }
936
937 if (avlp != NULL && (*avlp = fsavl_create(sd.fss)) == NULL) {
938 nvlist_free(sd.fss);
939 *nvlp = NULL;
940 return (EZFS_NOMEM);
941 }
942
943 *nvlp = sd.fss;
944 return (0);
945 }
946
947 /*
948 * Routines specific to "zfs send"
949 */
950 typedef struct send_dump_data {
951 /* these are all just the short snapname (the part after the @) */
952 const char *fromsnap;
953 const char *tosnap;
954 char prevsnap[ZFS_MAX_DATASET_NAME_LEN];
955 uint64_t prevsnap_obj;
956 boolean_t seenfrom, seento, replicate, doall, fromorigin;
957 boolean_t verbose, dryrun, parsable, progress, embed_data, std_out;
958 boolean_t large_block, compress;
959 int outfd;
960 boolean_t err;
961 nvlist_t *fss;
962 nvlist_t *snapholds;
963 avl_tree_t *fsavl;
964 snapfilter_cb_t *filter_cb;
965 void *filter_cb_arg;
966 nvlist_t *debugnv;
967 char holdtag[ZFS_MAX_DATASET_NAME_LEN];
968 int cleanup_fd;
969 uint64_t size;
970 } send_dump_data_t;
971
972 static int
973 estimate_ioctl(zfs_handle_t *zhp, uint64_t fromsnap_obj,
974 boolean_t fromorigin, enum lzc_send_flags flags, uint64_t *sizep)
975 {
976 zfs_cmd_t zc = {"\0"};
977 libzfs_handle_t *hdl = zhp->zfs_hdl;
978
979 assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
980 assert(fromsnap_obj == 0 || !fromorigin);
981
982 (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
983 zc.zc_obj = fromorigin;
984 zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
985 zc.zc_fromobj = fromsnap_obj;
986 zc.zc_guid = 1; /* estimate flag */
987 zc.zc_flags = flags;
988
989 if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) {
990 char errbuf[1024];
991 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
992 "warning: cannot estimate space for '%s'"), zhp->zfs_name);
993
994 switch (errno) {
995 case EXDEV:
996 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
997 "not an earlier snapshot from the same fs"));
998 return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
999
1000 case ENOENT:
1001 if (zfs_dataset_exists(hdl, zc.zc_name,
1002 ZFS_TYPE_SNAPSHOT)) {
1003 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1004 "incremental source (@%s) does not exist"),
1005 zc.zc_value);
1006 }
1007 return (zfs_error(hdl, EZFS_NOENT, errbuf));
1008
1009 case EDQUOT:
1010 case EFBIG:
1011 case EIO:
1012 case ENOLINK:
1013 case ENOSPC:
1014 case ENOSTR:
1015 case ENXIO:
1016 case EPIPE:
1017 case ERANGE:
1018 case EFAULT:
1019 case EROFS:
1020 zfs_error_aux(hdl, strerror(errno));
1021 return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
1022
1023 default:
1024 return (zfs_standard_error(hdl, errno, errbuf));
1025 }
1026 }
1027
1028 *sizep = zc.zc_objset_type;
1029
1030 return (0);
1031 }
1032
1033 /*
1034 * Dumps a backup of the given snapshot (incremental from fromsnap if it's not
1035 * NULL) to the file descriptor specified by outfd.
1036 */
1037 static int
1038 dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj,
1039 boolean_t fromorigin, int outfd, enum lzc_send_flags flags,
1040 nvlist_t *debugnv)
1041 {
1042 zfs_cmd_t zc = {"\0"};
1043 libzfs_handle_t *hdl = zhp->zfs_hdl;
1044 nvlist_t *thisdbg;
1045
1046 assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
1047 assert(fromsnap_obj == 0 || !fromorigin);
1048
1049 (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
1050 zc.zc_cookie = outfd;
1051 zc.zc_obj = fromorigin;
1052 zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1053 zc.zc_fromobj = fromsnap_obj;
1054 zc.zc_flags = flags;
1055
1056 VERIFY(0 == nvlist_alloc(&thisdbg, NV_UNIQUE_NAME, 0));
1057 if (fromsnap && fromsnap[0] != '\0') {
1058 VERIFY(0 == nvlist_add_string(thisdbg,
1059 "fromsnap", fromsnap));
1060 }
1061
1062 if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) {
1063 char errbuf[1024];
1064 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1065 "warning: cannot send '%s'"), zhp->zfs_name);
1066
1067 VERIFY(0 == nvlist_add_uint64(thisdbg, "error", errno));
1068 if (debugnv) {
1069 VERIFY(0 == nvlist_add_nvlist(debugnv,
1070 zhp->zfs_name, thisdbg));
1071 }
1072 nvlist_free(thisdbg);
1073
1074 switch (errno) {
1075 case EXDEV:
1076 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1077 "not an earlier snapshot from the same fs"));
1078 return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
1079
1080 case ENOENT:
1081 if (zfs_dataset_exists(hdl, zc.zc_name,
1082 ZFS_TYPE_SNAPSHOT)) {
1083 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1084 "incremental source (@%s) does not exist"),
1085 zc.zc_value);
1086 }
1087 return (zfs_error(hdl, EZFS_NOENT, errbuf));
1088
1089 case EDQUOT:
1090 case EFBIG:
1091 case EIO:
1092 case ENOLINK:
1093 case ENOSPC:
1094 case ENOSTR:
1095 case ENXIO:
1096 case EPIPE:
1097 case ERANGE:
1098 case EFAULT:
1099 case EROFS:
1100 zfs_error_aux(hdl, strerror(errno));
1101 return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
1102
1103 default:
1104 return (zfs_standard_error(hdl, errno, errbuf));
1105 }
1106 }
1107
1108 if (debugnv)
1109 VERIFY(0 == nvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg));
1110 nvlist_free(thisdbg);
1111
1112 return (0);
1113 }
1114
1115 static void
1116 gather_holds(zfs_handle_t *zhp, send_dump_data_t *sdd)
1117 {
1118 assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
1119
1120 /*
1121 * zfs_send() only sets snapholds for sends that need them,
1122 * e.g. replication and doall.
1123 */
1124 if (sdd->snapholds == NULL)
1125 return;
1126
1127 fnvlist_add_string(sdd->snapholds, zhp->zfs_name, sdd->holdtag);
1128 }
1129
1130 static void *
1131 send_progress_thread(void *arg)
1132 {
1133 progress_arg_t *pa = arg;
1134 zfs_cmd_t zc = {"\0"};
1135 zfs_handle_t *zhp = pa->pa_zhp;
1136 libzfs_handle_t *hdl = zhp->zfs_hdl;
1137 unsigned long long bytes;
1138 char buf[16];
1139 time_t t;
1140 struct tm *tm;
1141
1142 (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
1143
1144 if (!pa->pa_parsable)
1145 (void) fprintf(stderr, "TIME SENT SNAPSHOT\n");
1146
1147 /*
1148 * Print the progress from ZFS_IOC_SEND_PROGRESS every second.
1149 */
1150 for (;;) {
1151 (void) sleep(1);
1152
1153 zc.zc_cookie = pa->pa_fd;
1154 if (zfs_ioctl(hdl, ZFS_IOC_SEND_PROGRESS, &zc) != 0)
1155 return ((void *)-1);
1156
1157 (void) time(&t);
1158 tm = localtime(&t);
1159 bytes = zc.zc_cookie;
1160
1161 if (pa->pa_parsable) {
1162 (void) fprintf(stderr, "%02d:%02d:%02d\t%llu\t%s\n",
1163 tm->tm_hour, tm->tm_min, tm->tm_sec,
1164 bytes, zhp->zfs_name);
1165 } else {
1166 zfs_nicenum(bytes, buf, sizeof (buf));
1167 (void) fprintf(stderr, "%02d:%02d:%02d %5s %s\n",
1168 tm->tm_hour, tm->tm_min, tm->tm_sec,
1169 buf, zhp->zfs_name);
1170 }
1171 }
1172 }
1173
1174 static void
1175 send_print_verbose(FILE *fout, const char *tosnap, const char *fromsnap,
1176 uint64_t size, boolean_t parsable)
1177 {
1178 if (parsable) {
1179 if (fromsnap != NULL) {
1180 (void) fprintf(fout, "incremental\t%s\t%s",
1181 fromsnap, tosnap);
1182 } else {
1183 (void) fprintf(fout, "full\t%s",
1184 tosnap);
1185 }
1186 } else {
1187 if (fromsnap != NULL) {
1188 if (strchr(fromsnap, '@') == NULL &&
1189 strchr(fromsnap, '#') == NULL) {
1190 (void) fprintf(fout, dgettext(TEXT_DOMAIN,
1191 "send from @%s to %s"),
1192 fromsnap, tosnap);
1193 } else {
1194 (void) fprintf(fout, dgettext(TEXT_DOMAIN,
1195 "send from %s to %s"),
1196 fromsnap, tosnap);
1197 }
1198 } else {
1199 (void) fprintf(fout, dgettext(TEXT_DOMAIN,
1200 "full send of %s"),
1201 tosnap);
1202 }
1203 }
1204
1205 if (size != 0) {
1206 if (parsable) {
1207 (void) fprintf(fout, "\t%llu",
1208 (longlong_t)size);
1209 } else {
1210 char buf[16];
1211 zfs_nicenum(size, buf, sizeof (buf));
1212 (void) fprintf(fout, dgettext(TEXT_DOMAIN,
1213 " estimated size is %s"), buf);
1214 }
1215 }
1216 (void) fprintf(fout, "\n");
1217 }
1218
1219 static int
1220 dump_snapshot(zfs_handle_t *zhp, void *arg)
1221 {
1222 send_dump_data_t *sdd = arg;
1223 progress_arg_t pa = { 0 };
1224 pthread_t tid;
1225 char *thissnap;
1226 enum lzc_send_flags flags = 0;
1227 int err;
1228 boolean_t isfromsnap, istosnap, fromorigin;
1229 boolean_t exclude = B_FALSE;
1230 FILE *fout = sdd->std_out ? stdout : stderr;
1231
1232 err = 0;
1233 thissnap = strchr(zhp->zfs_name, '@') + 1;
1234 isfromsnap = (sdd->fromsnap != NULL &&
1235 strcmp(sdd->fromsnap, thissnap) == 0);
1236
1237 if (!sdd->seenfrom && isfromsnap) {
1238 gather_holds(zhp, sdd);
1239 sdd->seenfrom = B_TRUE;
1240 (void) strlcpy(sdd->prevsnap, thissnap,
1241 sizeof (sdd->prevsnap));
1242 sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1243 zfs_close(zhp);
1244 return (0);
1245 }
1246
1247 if (sdd->seento || !sdd->seenfrom) {
1248 zfs_close(zhp);
1249 return (0);
1250 }
1251
1252 istosnap = (strcmp(sdd->tosnap, thissnap) == 0);
1253 if (istosnap)
1254 sdd->seento = B_TRUE;
1255
1256 if (sdd->large_block)
1257 flags |= LZC_SEND_FLAG_LARGE_BLOCK;
1258 if (sdd->embed_data)
1259 flags |= LZC_SEND_FLAG_EMBED_DATA;
1260 if (sdd->compress)
1261 flags |= LZC_SEND_FLAG_COMPRESS;
1262
1263 if (!sdd->doall && !isfromsnap && !istosnap) {
1264 if (sdd->replicate) {
1265 char *snapname;
1266 nvlist_t *snapprops;
1267 /*
1268 * Filter out all intermediate snapshots except origin
1269 * snapshots needed to replicate clones.
1270 */
1271 nvlist_t *nvfs = fsavl_find(sdd->fsavl,
1272 zhp->zfs_dmustats.dds_guid, &snapname);
1273
1274 VERIFY(0 == nvlist_lookup_nvlist(nvfs,
1275 "snapprops", &snapprops));
1276 VERIFY(0 == nvlist_lookup_nvlist(snapprops,
1277 thissnap, &snapprops));
1278 exclude = !nvlist_exists(snapprops, "is_clone_origin");
1279 } else {
1280 exclude = B_TRUE;
1281 }
1282 }
1283
1284 /*
1285 * If a filter function exists, call it to determine whether
1286 * this snapshot will be sent.
1287 */
1288 if (exclude || (sdd->filter_cb != NULL &&
1289 sdd->filter_cb(zhp, sdd->filter_cb_arg) == B_FALSE)) {
1290 /*
1291 * This snapshot is filtered out. Don't send it, and don't
1292 * set prevsnap_obj, so it will be as if this snapshot didn't
1293 * exist, and the next accepted snapshot will be sent as
1294 * an incremental from the last accepted one, or as the
1295 * first (and full) snapshot in the case of a replication,
1296 * non-incremental send.
1297 */
1298 zfs_close(zhp);
1299 return (0);
1300 }
1301
1302 gather_holds(zhp, sdd);
1303 fromorigin = sdd->prevsnap[0] == '\0' &&
1304 (sdd->fromorigin || sdd->replicate);
1305
1306 if (sdd->verbose) {
1307 uint64_t size = 0;
1308 (void) estimate_ioctl(zhp, sdd->prevsnap_obj,
1309 fromorigin, flags, &size);
1310
1311 send_print_verbose(fout, zhp->zfs_name,
1312 sdd->prevsnap[0] ? sdd->prevsnap : NULL,
1313 size, sdd->parsable);
1314 sdd->size += size;
1315 }
1316
1317 if (!sdd->dryrun) {
1318 /*
1319 * If progress reporting is requested, spawn a new thread to
1320 * poll ZFS_IOC_SEND_PROGRESS at a regular interval.
1321 */
1322 if (sdd->progress) {
1323 pa.pa_zhp = zhp;
1324 pa.pa_fd = sdd->outfd;
1325 pa.pa_parsable = sdd->parsable;
1326
1327 if ((err = pthread_create(&tid, NULL,
1328 send_progress_thread, &pa))) {
1329 zfs_close(zhp);
1330 return (err);
1331 }
1332 }
1333
1334 err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj,
1335 fromorigin, sdd->outfd, flags, sdd->debugnv);
1336
1337 if (sdd->progress) {
1338 (void) pthread_cancel(tid);
1339 (void) pthread_join(tid, NULL);
1340 }
1341 }
1342
1343 (void) strcpy(sdd->prevsnap, thissnap);
1344 sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1345 zfs_close(zhp);
1346 return (err);
1347 }
1348
1349 static int
1350 dump_filesystem(zfs_handle_t *zhp, void *arg)
1351 {
1352 int rv = 0;
1353 send_dump_data_t *sdd = arg;
1354 boolean_t missingfrom = B_FALSE;
1355 zfs_cmd_t zc = {"\0"};
1356
1357 (void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
1358 zhp->zfs_name, sdd->tosnap);
1359 if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) {
1360 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1361 "WARNING: could not send %s@%s: does not exist\n"),
1362 zhp->zfs_name, sdd->tosnap);
1363 sdd->err = B_TRUE;
1364 return (0);
1365 }
1366
1367 if (sdd->replicate && sdd->fromsnap) {
1368 /*
1369 * If this fs does not have fromsnap, and we're doing
1370 * recursive, we need to send a full stream from the
1371 * beginning (or an incremental from the origin if this
1372 * is a clone). If we're doing non-recursive, then let
1373 * them get the error.
1374 */
1375 (void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
1376 zhp->zfs_name, sdd->fromsnap);
1377 if (ioctl(zhp->zfs_hdl->libzfs_fd,
1378 ZFS_IOC_OBJSET_STATS, &zc) != 0) {
1379 missingfrom = B_TRUE;
1380 }
1381 }
1382
1383 sdd->seenfrom = sdd->seento = sdd->prevsnap[0] = 0;
1384 sdd->prevsnap_obj = 0;
1385 if (sdd->fromsnap == NULL || missingfrom)
1386 sdd->seenfrom = B_TRUE;
1387
1388 rv = zfs_iter_snapshots_sorted(zhp, dump_snapshot, arg);
1389 if (!sdd->seenfrom) {
1390 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1391 "WARNING: could not send %s@%s:\n"
1392 "incremental source (%s@%s) does not exist\n"),
1393 zhp->zfs_name, sdd->tosnap,
1394 zhp->zfs_name, sdd->fromsnap);
1395 sdd->err = B_TRUE;
1396 } else if (!sdd->seento) {
1397 if (sdd->fromsnap) {
1398 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1399 "WARNING: could not send %s@%s:\n"
1400 "incremental source (%s@%s) "
1401 "is not earlier than it\n"),
1402 zhp->zfs_name, sdd->tosnap,
1403 zhp->zfs_name, sdd->fromsnap);
1404 } else {
1405 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1406 "WARNING: "
1407 "could not send %s@%s: does not exist\n"),
1408 zhp->zfs_name, sdd->tosnap);
1409 }
1410 sdd->err = B_TRUE;
1411 }
1412
1413 return (rv);
1414 }
1415
1416 static int
1417 dump_filesystems(zfs_handle_t *rzhp, void *arg)
1418 {
1419 send_dump_data_t *sdd = arg;
1420 nvpair_t *fspair;
1421 boolean_t needagain, progress;
1422
1423 if (!sdd->replicate)
1424 return (dump_filesystem(rzhp, sdd));
1425
1426 /* Mark the clone origin snapshots. */
1427 for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1428 fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1429 nvlist_t *nvfs;
1430 uint64_t origin_guid = 0;
1431
1432 VERIFY(0 == nvpair_value_nvlist(fspair, &nvfs));
1433 (void) nvlist_lookup_uint64(nvfs, "origin", &origin_guid);
1434 if (origin_guid != 0) {
1435 char *snapname;
1436 nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
1437 origin_guid, &snapname);
1438 if (origin_nv != NULL) {
1439 nvlist_t *snapprops;
1440 VERIFY(0 == nvlist_lookup_nvlist(origin_nv,
1441 "snapprops", &snapprops));
1442 VERIFY(0 == nvlist_lookup_nvlist(snapprops,
1443 snapname, &snapprops));
1444 VERIFY(0 == nvlist_add_boolean(
1445 snapprops, "is_clone_origin"));
1446 }
1447 }
1448 }
1449 again:
1450 needagain = progress = B_FALSE;
1451 for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1452 fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1453 nvlist_t *fslist, *parent_nv;
1454 char *fsname;
1455 zfs_handle_t *zhp;
1456 int err;
1457 uint64_t origin_guid = 0;
1458 uint64_t parent_guid = 0;
1459
1460 VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
1461 if (nvlist_lookup_boolean(fslist, "sent") == 0)
1462 continue;
1463
1464 VERIFY(nvlist_lookup_string(fslist, "name", &fsname) == 0);
1465 (void) nvlist_lookup_uint64(fslist, "origin", &origin_guid);
1466 (void) nvlist_lookup_uint64(fslist, "parentfromsnap",
1467 &parent_guid);
1468
1469 if (parent_guid != 0) {
1470 parent_nv = fsavl_find(sdd->fsavl, parent_guid, NULL);
1471 if (!nvlist_exists(parent_nv, "sent")) {
1472 /* parent has not been sent; skip this one */
1473 needagain = B_TRUE;
1474 continue;
1475 }
1476 }
1477
1478 if (origin_guid != 0) {
1479 nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
1480 origin_guid, NULL);
1481 if (origin_nv != NULL &&
1482 !nvlist_exists(origin_nv, "sent")) {
1483 /*
1484 * origin has not been sent yet;
1485 * skip this clone.
1486 */
1487 needagain = B_TRUE;
1488 continue;
1489 }
1490 }
1491
1492 zhp = zfs_open(rzhp->zfs_hdl, fsname, ZFS_TYPE_DATASET);
1493 if (zhp == NULL)
1494 return (-1);
1495 err = dump_filesystem(zhp, sdd);
1496 VERIFY(nvlist_add_boolean(fslist, "sent") == 0);
1497 progress = B_TRUE;
1498 zfs_close(zhp);
1499 if (err)
1500 return (err);
1501 }
1502 if (needagain) {
1503 assert(progress);
1504 goto again;
1505 }
1506
1507 /* clean out the sent flags in case we reuse this fss */
1508 for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1509 fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1510 nvlist_t *fslist;
1511
1512 VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
1513 (void) nvlist_remove_all(fslist, "sent");
1514 }
1515
1516 return (0);
1517 }
1518
1519 nvlist_t *
1520 zfs_send_resume_token_to_nvlist(libzfs_handle_t *hdl, const char *token)
1521 {
1522 unsigned int version;
1523 int nread, i;
1524 unsigned long long checksum, packed_len;
1525
1526 /*
1527 * Decode token header, which is:
1528 * <token version>-<checksum of payload>-<uncompressed payload length>
1529 * Note that the only supported token version is 1.
1530 */
1531 nread = sscanf(token, "%u-%llx-%llx-",
1532 &version, &checksum, &packed_len);
1533 if (nread != 3) {
1534 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1535 "resume token is corrupt (invalid format)"));
1536 return (NULL);
1537 }
1538
1539 if (version != ZFS_SEND_RESUME_TOKEN_VERSION) {
1540 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1541 "resume token is corrupt (invalid version %u)"),
1542 version);
1543 return (NULL);
1544 }
1545
1546 /* convert hexadecimal representation to binary */
1547 token = strrchr(token, '-') + 1;
1548 int len = strlen(token) / 2;
1549 unsigned char *compressed = zfs_alloc(hdl, len);
1550 for (i = 0; i < len; i++) {
1551 nread = sscanf(token + i * 2, "%2hhx", compressed + i);
1552 if (nread != 1) {
1553 free(compressed);
1554 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1555 "resume token is corrupt "
1556 "(payload is not hex-encoded)"));
1557 return (NULL);
1558 }
1559 }
1560
1561 /* verify checksum */
1562 zio_cksum_t cksum;
1563 fletcher_4_native_varsize(compressed, len, &cksum);
1564 if (cksum.zc_word[0] != checksum) {
1565 free(compressed);
1566 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1567 "resume token is corrupt (incorrect checksum)"));
1568 return (NULL);
1569 }
1570
1571 /* uncompress */
1572 void *packed = zfs_alloc(hdl, packed_len);
1573 uLongf packed_len_long = packed_len;
1574 if (uncompress(packed, &packed_len_long, compressed, len) != Z_OK ||
1575 packed_len_long != packed_len) {
1576 free(packed);
1577 free(compressed);
1578 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1579 "resume token is corrupt (decompression failed)"));
1580 return (NULL);
1581 }
1582
1583 /* unpack nvlist */
1584 nvlist_t *nv;
1585 int error = nvlist_unpack(packed, packed_len, &nv, KM_SLEEP);
1586 free(packed);
1587 free(compressed);
1588 if (error != 0) {
1589 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1590 "resume token is corrupt (nvlist_unpack failed)"));
1591 return (NULL);
1592 }
1593 return (nv);
1594 }
1595
1596 int
1597 zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd,
1598 const char *resume_token)
1599 {
1600 char errbuf[1024];
1601 char *toname;
1602 char *fromname = NULL;
1603 uint64_t resumeobj, resumeoff, toguid, fromguid, bytes;
1604 zfs_handle_t *zhp;
1605 int error = 0;
1606 char name[ZFS_MAX_DATASET_NAME_LEN];
1607 enum lzc_send_flags lzc_flags = 0;
1608
1609 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1610 "cannot resume send"));
1611
1612 nvlist_t *resume_nvl =
1613 zfs_send_resume_token_to_nvlist(hdl, resume_token);
1614 if (resume_nvl == NULL) {
1615 /*
1616 * zfs_error_aux has already been set by
1617 * zfs_send_resume_token_to_nvlist
1618 */
1619 return (zfs_error(hdl, EZFS_FAULT, errbuf));
1620 }
1621 if (flags->verbose) {
1622 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1623 "resume token contents:\n"));
1624 nvlist_print(stderr, resume_nvl);
1625 }
1626
1627 if (nvlist_lookup_string(resume_nvl, "toname", &toname) != 0 ||
1628 nvlist_lookup_uint64(resume_nvl, "object", &resumeobj) != 0 ||
1629 nvlist_lookup_uint64(resume_nvl, "offset", &resumeoff) != 0 ||
1630 nvlist_lookup_uint64(resume_nvl, "bytes", &bytes) != 0 ||
1631 nvlist_lookup_uint64(resume_nvl, "toguid", &toguid) != 0) {
1632 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1633 "resume token is corrupt"));
1634 return (zfs_error(hdl, EZFS_FAULT, errbuf));
1635 }
1636 fromguid = 0;
1637 (void) nvlist_lookup_uint64(resume_nvl, "fromguid", &fromguid);
1638
1639 if (flags->largeblock || nvlist_exists(resume_nvl, "largeblockok"))
1640 lzc_flags |= LZC_SEND_FLAG_LARGE_BLOCK;
1641 if (flags->embed_data || nvlist_exists(resume_nvl, "embedok"))
1642 lzc_flags |= LZC_SEND_FLAG_EMBED_DATA;
1643 if (flags->compress || nvlist_exists(resume_nvl, "compressok"))
1644 lzc_flags |= LZC_SEND_FLAG_COMPRESS;
1645
1646 if (guid_to_name(hdl, toname, toguid, B_FALSE, name) != 0) {
1647 if (zfs_dataset_exists(hdl, toname, ZFS_TYPE_DATASET)) {
1648 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1649 "'%s' is no longer the same snapshot used in "
1650 "the initial send"), toname);
1651 } else {
1652 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1653 "'%s' used in the initial send no longer exists"),
1654 toname);
1655 }
1656 return (zfs_error(hdl, EZFS_BADPATH, errbuf));
1657 }
1658 zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
1659 if (zhp == NULL) {
1660 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1661 "unable to access '%s'"), name);
1662 return (zfs_error(hdl, EZFS_BADPATH, errbuf));
1663 }
1664
1665 if (fromguid != 0) {
1666 if (guid_to_name(hdl, toname, fromguid, B_TRUE, name) != 0) {
1667 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1668 "incremental source %#llx no longer exists"),
1669 (longlong_t)fromguid);
1670 return (zfs_error(hdl, EZFS_BADPATH, errbuf));
1671 }
1672 fromname = name;
1673 }
1674
1675 if (flags->verbose) {
1676 uint64_t size = 0;
1677 error = lzc_send_space(zhp->zfs_name, fromname,
1678 lzc_flags, &size);
1679 if (error == 0)
1680 size = MAX(0, (int64_t)(size - bytes));
1681 send_print_verbose(stderr, zhp->zfs_name, fromname,
1682 size, flags->parsable);
1683 }
1684
1685 if (!flags->dryrun) {
1686 progress_arg_t pa = { 0 };
1687 pthread_t tid;
1688 /*
1689 * If progress reporting is requested, spawn a new thread to
1690 * poll ZFS_IOC_SEND_PROGRESS at a regular interval.
1691 */
1692 if (flags->progress) {
1693 pa.pa_zhp = zhp;
1694 pa.pa_fd = outfd;
1695 pa.pa_parsable = flags->parsable;
1696
1697 error = pthread_create(&tid, NULL,
1698 send_progress_thread, &pa);
1699 if (error != 0) {
1700 zfs_close(zhp);
1701 return (error);
1702 }
1703 }
1704
1705 error = lzc_send_resume(zhp->zfs_name, fromname, outfd,
1706 lzc_flags, resumeobj, resumeoff);
1707
1708 if (flags->progress) {
1709 (void) pthread_cancel(tid);
1710 (void) pthread_join(tid, NULL);
1711 }
1712
1713 char errbuf[1024];
1714 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1715 "warning: cannot send '%s'"), zhp->zfs_name);
1716
1717 zfs_close(zhp);
1718
1719 switch (error) {
1720 case 0:
1721 return (0);
1722 case EXDEV:
1723 case ENOENT:
1724 case EDQUOT:
1725 case EFBIG:
1726 case EIO:
1727 case ENOLINK:
1728 case ENOSPC:
1729 case ENOSTR:
1730 case ENXIO:
1731 case EPIPE:
1732 case ERANGE:
1733 case EFAULT:
1734 case EROFS:
1735 zfs_error_aux(hdl, strerror(errno));
1736 return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
1737
1738 default:
1739 return (zfs_standard_error(hdl, errno, errbuf));
1740 }
1741 }
1742
1743
1744 zfs_close(zhp);
1745
1746 return (error);
1747 }
1748
1749 /*
1750 * Generate a send stream for the dataset identified by the argument zhp.
1751 *
1752 * The content of the send stream is the snapshot identified by
1753 * 'tosnap'. Incremental streams are requested in two ways:
1754 * - from the snapshot identified by "fromsnap" (if non-null) or
1755 * - from the origin of the dataset identified by zhp, which must
1756 * be a clone. In this case, "fromsnap" is null and "fromorigin"
1757 * is TRUE.
1758 *
1759 * The send stream is recursive (i.e. dumps a hierarchy of snapshots) and
1760 * uses a special header (with a hdrtype field of DMU_COMPOUNDSTREAM)
1761 * if "replicate" is set. If "doall" is set, dump all the intermediate
1762 * snapshots. The DMU_COMPOUNDSTREAM header is used in the "doall"
1763 * case too. If "props" is set, send properties.
1764 */
1765 int
1766 zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
1767 sendflags_t *flags, int outfd, snapfilter_cb_t filter_func,
1768 void *cb_arg, nvlist_t **debugnvp)
1769 {
1770 char errbuf[1024];
1771 send_dump_data_t sdd = { 0 };
1772 int err = 0;
1773 nvlist_t *fss = NULL;
1774 avl_tree_t *fsavl = NULL;
1775 static uint64_t holdseq;
1776 int spa_version;
1777 pthread_t tid = 0;
1778 int pipefd[2];
1779 dedup_arg_t dda = { 0 };
1780 int featureflags = 0;
1781 FILE *fout;
1782
1783 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1784 "cannot send '%s'"), zhp->zfs_name);
1785
1786 if (fromsnap && fromsnap[0] == '\0') {
1787 zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
1788 "zero-length incremental source"));
1789 return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
1790 }
1791
1792 if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM) {
1793 uint64_t version;
1794 version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
1795 if (version >= ZPL_VERSION_SA) {
1796 featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
1797 }
1798 }
1799
1800 if (flags->dedup && !flags->dryrun) {
1801 featureflags |= (DMU_BACKUP_FEATURE_DEDUP |
1802 DMU_BACKUP_FEATURE_DEDUPPROPS);
1803 if ((err = socketpair(AF_UNIX, SOCK_STREAM, 0, pipefd))) {
1804 zfs_error_aux(zhp->zfs_hdl, strerror(errno));
1805 return (zfs_error(zhp->zfs_hdl, EZFS_PIPEFAILED,
1806 errbuf));
1807 }
1808 dda.outputfd = outfd;
1809 dda.inputfd = pipefd[1];
1810 dda.dedup_hdl = zhp->zfs_hdl;
1811 if ((err = pthread_create(&tid, NULL, cksummer, &dda))) {
1812 (void) close(pipefd[0]);
1813 (void) close(pipefd[1]);
1814 zfs_error_aux(zhp->zfs_hdl, strerror(errno));
1815 return (zfs_error(zhp->zfs_hdl,
1816 EZFS_THREADCREATEFAILED, errbuf));
1817 }
1818 }
1819
1820 if (flags->replicate || flags->doall || flags->props) {
1821 dmu_replay_record_t drr = { 0 };
1822 char *packbuf = NULL;
1823 size_t buflen = 0;
1824 zio_cksum_t zc;
1825
1826 ZIO_SET_CHECKSUM(&zc, 0, 0, 0, 0);
1827
1828 if (flags->replicate || flags->props) {
1829 nvlist_t *hdrnv;
1830
1831 VERIFY(0 == nvlist_alloc(&hdrnv, NV_UNIQUE_NAME, 0));
1832 if (fromsnap) {
1833 VERIFY(0 == nvlist_add_string(hdrnv,
1834 "fromsnap", fromsnap));
1835 }
1836 VERIFY(0 == nvlist_add_string(hdrnv, "tosnap", tosnap));
1837 if (!flags->replicate) {
1838 VERIFY(0 == nvlist_add_boolean(hdrnv,
1839 "not_recursive"));
1840 }
1841
1842 err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name,
1843 fromsnap, tosnap, flags->replicate, flags->verbose,
1844 &fss, &fsavl);
1845 if (err)
1846 goto err_out;
1847 VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss));
1848 err = nvlist_pack(hdrnv, &packbuf, &buflen,
1849 NV_ENCODE_XDR, 0);
1850 if (debugnvp)
1851 *debugnvp = hdrnv;
1852 else
1853 nvlist_free(hdrnv);
1854 if (err)
1855 goto stderr_out;
1856 }
1857
1858 if (!flags->dryrun) {
1859 /* write first begin record */
1860 drr.drr_type = DRR_BEGIN;
1861 drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
1862 DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin.
1863 drr_versioninfo, DMU_COMPOUNDSTREAM);
1864 DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin.
1865 drr_versioninfo, featureflags);
1866 (void) snprintf(drr.drr_u.drr_begin.drr_toname,
1867 sizeof (drr.drr_u.drr_begin.drr_toname),
1868 "%s@%s", zhp->zfs_name, tosnap);
1869 drr.drr_payloadlen = buflen;
1870
1871 err = dump_record(&drr, packbuf, buflen, &zc, outfd);
1872 free(packbuf);
1873 if (err != 0)
1874 goto stderr_out;
1875
1876 /* write end record */
1877 bzero(&drr, sizeof (drr));
1878 drr.drr_type = DRR_END;
1879 drr.drr_u.drr_end.drr_checksum = zc;
1880 err = write(outfd, &drr, sizeof (drr));
1881 if (err == -1) {
1882 err = errno;
1883 goto stderr_out;
1884 }
1885
1886 err = 0;
1887 }
1888 }
1889
1890 /* dump each stream */
1891 sdd.fromsnap = fromsnap;
1892 sdd.tosnap = tosnap;
1893 if (tid != 0)
1894 sdd.outfd = pipefd[0];
1895 else
1896 sdd.outfd = outfd;
1897 sdd.replicate = flags->replicate;
1898 sdd.doall = flags->doall;
1899 sdd.fromorigin = flags->fromorigin;
1900 sdd.fss = fss;
1901 sdd.fsavl = fsavl;
1902 sdd.verbose = flags->verbose;
1903 sdd.parsable = flags->parsable;
1904 sdd.progress = flags->progress;
1905 sdd.dryrun = flags->dryrun;
1906 sdd.large_block = flags->largeblock;
1907 sdd.embed_data = flags->embed_data;
1908 sdd.compress = flags->compress;
1909 sdd.filter_cb = filter_func;
1910 sdd.filter_cb_arg = cb_arg;
1911 if (debugnvp)
1912 sdd.debugnv = *debugnvp;
1913 if (sdd.verbose && sdd.dryrun)
1914 sdd.std_out = B_TRUE;
1915 fout = sdd.std_out ? stdout : stderr;
1916
1917 /*
1918 * Some flags require that we place user holds on the datasets that are
1919 * being sent so they don't get destroyed during the send. We can skip
1920 * this step if the pool is imported read-only since the datasets cannot
1921 * be destroyed.
1922 */
1923 if (!flags->dryrun && !zpool_get_prop_int(zfs_get_pool_handle(zhp),
1924 ZPOOL_PROP_READONLY, NULL) &&
1925 zfs_spa_version(zhp, &spa_version) == 0 &&
1926 spa_version >= SPA_VERSION_USERREFS &&
1927 (flags->doall || flags->replicate)) {
1928 ++holdseq;
1929 (void) snprintf(sdd.holdtag, sizeof (sdd.holdtag),
1930 ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
1931 sdd.cleanup_fd = open(ZFS_DEV, O_RDWR);
1932 if (sdd.cleanup_fd < 0) {
1933 err = errno;
1934 goto stderr_out;
1935 }
1936 sdd.snapholds = fnvlist_alloc();
1937 } else {
1938 sdd.cleanup_fd = -1;
1939 sdd.snapholds = NULL;
1940 }
1941 if (flags->verbose || sdd.snapholds != NULL) {
1942 /*
1943 * Do a verbose no-op dry run to get all the verbose output
1944 * or to gather snapshot hold's before generating any data,
1945 * then do a non-verbose real run to generate the streams.
1946 */
1947 sdd.dryrun = B_TRUE;
1948 err = dump_filesystems(zhp, &sdd);
1949
1950 if (err != 0)
1951 goto stderr_out;
1952
1953 if (flags->verbose) {
1954 if (flags->parsable) {
1955 (void) fprintf(fout, "size\t%llu\n",
1956 (longlong_t)sdd.size);
1957 } else {
1958 char buf[16];
1959 zfs_nicenum(sdd.size, buf, sizeof (buf));
1960 (void) fprintf(fout, dgettext(TEXT_DOMAIN,
1961 "total estimated size is %s\n"), buf);
1962 }
1963 }
1964
1965 /* Ensure no snaps found is treated as an error. */
1966 if (!sdd.seento) {
1967 err = ENOENT;
1968 goto err_out;
1969 }
1970
1971 /* Skip the second run if dryrun was requested. */
1972 if (flags->dryrun)
1973 goto err_out;
1974
1975 if (sdd.snapholds != NULL) {
1976 err = zfs_hold_nvl(zhp, sdd.cleanup_fd, sdd.snapholds);
1977 if (err != 0)
1978 goto stderr_out;
1979
1980 fnvlist_free(sdd.snapholds);
1981 sdd.snapholds = NULL;
1982 }
1983
1984 sdd.dryrun = B_FALSE;
1985 sdd.verbose = B_FALSE;
1986 }
1987
1988 err = dump_filesystems(zhp, &sdd);
1989 fsavl_destroy(fsavl);
1990 nvlist_free(fss);
1991
1992 /* Ensure no snaps found is treated as an error. */
1993 if (err == 0 && !sdd.seento)
1994 err = ENOENT;
1995
1996 if (tid != 0) {
1997 if (err != 0)
1998 (void) pthread_cancel(tid);
1999 (void) close(pipefd[0]);
2000 (void) pthread_join(tid, NULL);
2001 }
2002
2003 if (sdd.cleanup_fd != -1) {
2004 VERIFY(0 == close(sdd.cleanup_fd));
2005 sdd.cleanup_fd = -1;
2006 }
2007
2008 if (!flags->dryrun && (flags->replicate || flags->doall ||
2009 flags->props)) {
2010 /*
2011 * write final end record. NB: want to do this even if
2012 * there was some error, because it might not be totally
2013 * failed.
2014 */
2015 dmu_replay_record_t drr = { 0 };
2016 drr.drr_type = DRR_END;
2017 if (write(outfd, &drr, sizeof (drr)) == -1) {
2018 return (zfs_standard_error(zhp->zfs_hdl,
2019 errno, errbuf));
2020 }
2021 }
2022
2023 return (err || sdd.err);
2024
2025 stderr_out:
2026 err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
2027 err_out:
2028 fsavl_destroy(fsavl);
2029 nvlist_free(fss);
2030 fnvlist_free(sdd.snapholds);
2031
2032 if (sdd.cleanup_fd != -1)
2033 VERIFY(0 == close(sdd.cleanup_fd));
2034 if (tid != 0) {
2035 (void) pthread_cancel(tid);
2036 (void) close(pipefd[0]);
2037 (void) pthread_join(tid, NULL);
2038 }
2039 return (err);
2040 }
2041
2042 int
2043 zfs_send_one(zfs_handle_t *zhp, const char *from, int fd,
2044 enum lzc_send_flags flags)
2045 {
2046 int err;
2047 libzfs_handle_t *hdl = zhp->zfs_hdl;
2048
2049 char errbuf[1024];
2050 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2051 "warning: cannot send '%s'"), zhp->zfs_name);
2052
2053 err = lzc_send(zhp->zfs_name, from, fd, flags);
2054 if (err != 0) {
2055 switch (errno) {
2056 case EXDEV:
2057 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2058 "not an earlier snapshot from the same fs"));
2059 return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
2060
2061 case ENOENT:
2062 case ESRCH:
2063 if (lzc_exists(zhp->zfs_name)) {
2064 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2065 "incremental source (%s) does not exist"),
2066 from);
2067 }
2068 return (zfs_error(hdl, EZFS_NOENT, errbuf));
2069
2070 case EBUSY:
2071 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2072 "target is busy; if a filesystem, "
2073 "it must not be mounted"));
2074 return (zfs_error(hdl, EZFS_BUSY, errbuf));
2075
2076 case EDQUOT:
2077 case EFBIG:
2078 case EIO:
2079 case ENOLINK:
2080 case ENOSPC:
2081 case ENOSTR:
2082 case ENXIO:
2083 case EPIPE:
2084 case ERANGE:
2085 case EFAULT:
2086 case EROFS:
2087 zfs_error_aux(hdl, strerror(errno));
2088 return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
2089
2090 default:
2091 return (zfs_standard_error(hdl, errno, errbuf));
2092 }
2093 }
2094 return (err != 0);
2095 }
2096
2097 /*
2098 * Routines specific to "zfs recv"
2099 */
2100
2101 static int
2102 recv_read(libzfs_handle_t *hdl, int fd, void *buf, int ilen,
2103 boolean_t byteswap, zio_cksum_t *zc)
2104 {
2105 char *cp = buf;
2106 int rv;
2107 int len = ilen;
2108
2109 assert(ilen <= SPA_MAXBLOCKSIZE);
2110
2111 do {
2112 rv = read(fd, cp, len);
2113 cp += rv;
2114 len -= rv;
2115 } while (rv > 0);
2116
2117 if (rv < 0 || len != 0) {
2118 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2119 "failed to read from stream"));
2120 return (zfs_error(hdl, EZFS_BADSTREAM, dgettext(TEXT_DOMAIN,
2121 "cannot receive")));
2122 }
2123
2124 if (zc) {
2125 if (byteswap)
2126 fletcher_4_incremental_byteswap(buf, ilen, zc);
2127 else
2128 fletcher_4_incremental_native(buf, ilen, zc);
2129 }
2130 return (0);
2131 }
2132
2133 static int
2134 recv_read_nvlist(libzfs_handle_t *hdl, int fd, int len, nvlist_t **nvp,
2135 boolean_t byteswap, zio_cksum_t *zc)
2136 {
2137 char *buf;
2138 int err;
2139
2140 buf = zfs_alloc(hdl, len);
2141 if (buf == NULL)
2142 return (ENOMEM);
2143
2144 err = recv_read(hdl, fd, buf, len, byteswap, zc);
2145 if (err != 0) {
2146 free(buf);
2147 return (err);
2148 }
2149
2150 err = nvlist_unpack(buf, len, nvp, 0);
2151 free(buf);
2152 if (err != 0) {
2153 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
2154 "stream (malformed nvlist)"));
2155 return (EINVAL);
2156 }
2157 return (0);
2158 }
2159
2160 static int
2161 recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
2162 int baselen, char *newname, recvflags_t *flags)
2163 {
2164 static int seq;
2165 zfs_cmd_t zc = {"\0"};
2166 int err;
2167 prop_changelist_t *clp;
2168 zfs_handle_t *zhp;
2169
2170 zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
2171 if (zhp == NULL)
2172 return (-1);
2173 clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
2174 flags->force ? MS_FORCE : 0);
2175 zfs_close(zhp);
2176 if (clp == NULL)
2177 return (-1);
2178 err = changelist_prefix(clp);
2179 if (err)
2180 return (err);
2181
2182 zc.zc_objset_type = DMU_OST_ZFS;
2183 (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
2184
2185 if (tryname) {
2186 (void) strcpy(newname, tryname);
2187
2188 (void) strlcpy(zc.zc_value, tryname, sizeof (zc.zc_value));
2189
2190 if (flags->verbose) {
2191 (void) printf("attempting rename %s to %s\n",
2192 zc.zc_name, zc.zc_value);
2193 }
2194 err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
2195 if (err == 0)
2196 changelist_rename(clp, name, tryname);
2197 } else {
2198 err = ENOENT;
2199 }
2200
2201 if (err != 0 && strncmp(name + baselen, "recv-", 5) != 0) {
2202 seq++;
2203
2204 (void) snprintf(newname, ZFS_MAX_DATASET_NAME_LEN,
2205 "%.*srecv-%u-%u", baselen, name, getpid(), seq);
2206 (void) strlcpy(zc.zc_value, newname, sizeof (zc.zc_value));
2207
2208 if (flags->verbose) {
2209 (void) printf("failed - trying rename %s to %s\n",
2210 zc.zc_name, zc.zc_value);
2211 }
2212 err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
2213 if (err == 0)
2214 changelist_rename(clp, name, newname);
2215 if (err && flags->verbose) {
2216 (void) printf("failed (%u) - "
2217 "will try again on next pass\n", errno);
2218 }
2219 err = EAGAIN;
2220 } else if (flags->verbose) {
2221 if (err == 0)
2222 (void) printf("success\n");
2223 else
2224 (void) printf("failed (%u)\n", errno);
2225 }
2226
2227 (void) changelist_postfix(clp);
2228 changelist_free(clp);
2229
2230 return (err);
2231 }
2232
2233 static int
2234 recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen,
2235 char *newname, recvflags_t *flags)
2236 {
2237 zfs_cmd_t zc = {"\0"};
2238 int err = 0;
2239 prop_changelist_t *clp;
2240 zfs_handle_t *zhp;
2241 boolean_t defer = B_FALSE;
2242 int spa_version;
2243
2244 zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
2245 if (zhp == NULL)
2246 return (-1);
2247 clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
2248 flags->force ? MS_FORCE : 0);
2249 if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT &&
2250 zfs_spa_version(zhp, &spa_version) == 0 &&
2251 spa_version >= SPA_VERSION_USERREFS)
2252 defer = B_TRUE;
2253 zfs_close(zhp);
2254 if (clp == NULL)
2255 return (-1);
2256 err = changelist_prefix(clp);
2257 if (err)
2258 return (err);
2259
2260 zc.zc_objset_type = DMU_OST_ZFS;
2261 zc.zc_defer_destroy = defer;
2262 (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
2263
2264 if (flags->verbose)
2265 (void) printf("attempting destroy %s\n", zc.zc_name);
2266 err = ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc);
2267 if (err == 0) {
2268 if (flags->verbose)
2269 (void) printf("success\n");
2270 changelist_remove(clp, zc.zc_name);
2271 }
2272
2273 (void) changelist_postfix(clp);
2274 changelist_free(clp);
2275
2276 /*
2277 * Deferred destroy might destroy the snapshot or only mark it to be
2278 * destroyed later, and it returns success in either case.
2279 */
2280 if (err != 0 || (defer && zfs_dataset_exists(hdl, name,
2281 ZFS_TYPE_SNAPSHOT))) {
2282 err = recv_rename(hdl, name, NULL, baselen, newname, flags);
2283 }
2284
2285 return (err);
2286 }
2287
2288 typedef struct guid_to_name_data {
2289 uint64_t guid;
2290 boolean_t bookmark_ok;
2291 char *name;
2292 char *skip;
2293 } guid_to_name_data_t;
2294
2295 static int
2296 guid_to_name_cb(zfs_handle_t *zhp, void *arg)
2297 {
2298 guid_to_name_data_t *gtnd = arg;
2299 const char *slash;
2300 int err;
2301
2302 if (gtnd->skip != NULL &&
2303 (slash = strrchr(zhp->zfs_name, '/')) != NULL &&
2304 strcmp(slash + 1, gtnd->skip) == 0) {
2305 zfs_close(zhp);
2306 return (0);
2307 }
2308
2309 if (zfs_prop_get_int(zhp, ZFS_PROP_GUID) == gtnd->guid) {
2310 (void) strcpy(gtnd->name, zhp->zfs_name);
2311 zfs_close(zhp);
2312 return (EEXIST);
2313 }
2314
2315 err = zfs_iter_children(zhp, guid_to_name_cb, gtnd);
2316 if (err != EEXIST && gtnd->bookmark_ok)
2317 err = zfs_iter_bookmarks(zhp, guid_to_name_cb, gtnd);
2318 zfs_close(zhp);
2319 return (err);
2320 }
2321
2322 /*
2323 * Attempt to find the local dataset associated with this guid. In the case of
2324 * multiple matches, we attempt to find the "best" match by searching
2325 * progressively larger portions of the hierarchy. This allows one to send a
2326 * tree of datasets individually and guarantee that we will find the source
2327 * guid within that hierarchy, even if there are multiple matches elsewhere.
2328 */
2329 static int
2330 guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid,
2331 boolean_t bookmark_ok, char *name)
2332 {
2333 char pname[ZFS_MAX_DATASET_NAME_LEN];
2334 guid_to_name_data_t gtnd;
2335
2336 gtnd.guid = guid;
2337 gtnd.bookmark_ok = bookmark_ok;
2338 gtnd.name = name;
2339 gtnd.skip = NULL;
2340
2341 /*
2342 * Search progressively larger portions of the hierarchy, starting
2343 * with the filesystem specified by 'parent'. This will
2344 * select the "most local" version of the origin snapshot in the case
2345 * that there are multiple matching snapshots in the system.
2346 */
2347 (void) strlcpy(pname, parent, sizeof (pname));
2348 char *cp = strrchr(pname, '@');
2349 if (cp == NULL)
2350 cp = strchr(pname, '\0');
2351 for (; cp != NULL; cp = strrchr(pname, '/')) {
2352 /* Chop off the last component and open the parent */
2353 *cp = '\0';
2354 zfs_handle_t *zhp = make_dataset_handle(hdl, pname);
2355
2356 if (zhp == NULL)
2357 continue;
2358 int err = guid_to_name_cb(zfs_handle_dup(zhp), &gtnd);
2359 if (err != EEXIST)
2360 err = zfs_iter_children(zhp, guid_to_name_cb, &gtnd);
2361 if (err != EEXIST && bookmark_ok)
2362 err = zfs_iter_bookmarks(zhp, guid_to_name_cb, &gtnd);
2363 zfs_close(zhp);
2364 if (err == EEXIST)
2365 return (0);
2366
2367 /*
2368 * Remember the last portion of the dataset so we skip it next
2369 * time through (as we've already searched that portion of the
2370 * hierarchy).
2371 */
2372 gtnd.skip = strrchr(pname, '/') + 1;
2373 }
2374
2375 return (ENOENT);
2376 }
2377
2378 /*
2379 * Return +1 if guid1 is before guid2, 0 if they are the same, and -1 if
2380 * guid1 is after guid2.
2381 */
2382 static int
2383 created_before(libzfs_handle_t *hdl, avl_tree_t *avl,
2384 uint64_t guid1, uint64_t guid2)
2385 {
2386 nvlist_t *nvfs;
2387 char *fsname = NULL, *snapname = NULL;
2388 char buf[ZFS_MAX_DATASET_NAME_LEN];
2389 int rv;
2390 zfs_handle_t *guid1hdl, *guid2hdl;
2391 uint64_t create1, create2;
2392
2393 if (guid2 == 0)
2394 return (0);
2395 if (guid1 == 0)
2396 return (1);
2397
2398 nvfs = fsavl_find(avl, guid1, &snapname);
2399 VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
2400 (void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
2401 guid1hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
2402 if (guid1hdl == NULL)
2403 return (-1);
2404
2405 nvfs = fsavl_find(avl, guid2, &snapname);
2406 VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
2407 (void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
2408 guid2hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
2409 if (guid2hdl == NULL) {
2410 zfs_close(guid1hdl);
2411 return (-1);
2412 }
2413
2414 create1 = zfs_prop_get_int(guid1hdl, ZFS_PROP_CREATETXG);
2415 create2 = zfs_prop_get_int(guid2hdl, ZFS_PROP_CREATETXG);
2416
2417 if (create1 < create2)
2418 rv = -1;
2419 else if (create1 > create2)
2420 rv = +1;
2421 else
2422 rv = 0;
2423
2424 zfs_close(guid1hdl);
2425 zfs_close(guid2hdl);
2426
2427 return (rv);
2428 }
2429
2430 static int
2431 recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs,
2432 recvflags_t *flags, nvlist_t *stream_nv, avl_tree_t *stream_avl,
2433 nvlist_t *renamed)
2434 {
2435 nvlist_t *local_nv, *deleted = NULL;
2436 avl_tree_t *local_avl;
2437 nvpair_t *fselem, *nextfselem;
2438 char *fromsnap;
2439 char newname[ZFS_MAX_DATASET_NAME_LEN];
2440 char guidname[32];
2441 int error;
2442 boolean_t needagain, progress, recursive;
2443 char *s1, *s2;
2444
2445 VERIFY(0 == nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap));
2446
2447 recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
2448 ENOENT);
2449
2450 if (flags->dryrun)
2451 return (0);
2452
2453 again:
2454 needagain = progress = B_FALSE;
2455
2456 VERIFY(0 == nvlist_alloc(&deleted, NV_UNIQUE_NAME, 0));
2457
2458 if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL,
2459 recursive, B_FALSE, &local_nv, &local_avl)) != 0)
2460 return (error);
2461
2462 /*
2463 * Process deletes and renames
2464 */
2465 for (fselem = nvlist_next_nvpair(local_nv, NULL);
2466 fselem; fselem = nextfselem) {
2467 nvlist_t *nvfs, *snaps;
2468 nvlist_t *stream_nvfs = NULL;
2469 nvpair_t *snapelem, *nextsnapelem;
2470 uint64_t fromguid = 0;
2471 uint64_t originguid = 0;
2472 uint64_t stream_originguid = 0;
2473 uint64_t parent_fromsnap_guid, stream_parent_fromsnap_guid;
2474 char *fsname, *stream_fsname;
2475
2476 nextfselem = nvlist_next_nvpair(local_nv, fselem);
2477
2478 VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
2479 VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
2480 VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
2481 VERIFY(0 == nvlist_lookup_uint64(nvfs, "parentfromsnap",
2482 &parent_fromsnap_guid));
2483 (void) nvlist_lookup_uint64(nvfs, "origin", &originguid);
2484
2485 /*
2486 * First find the stream's fs, so we can check for
2487 * a different origin (due to "zfs promote")
2488 */
2489 for (snapelem = nvlist_next_nvpair(snaps, NULL);
2490 snapelem; snapelem = nvlist_next_nvpair(snaps, snapelem)) {
2491 uint64_t thisguid;
2492
2493 VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
2494 stream_nvfs = fsavl_find(stream_avl, thisguid, NULL);
2495
2496 if (stream_nvfs != NULL)
2497 break;
2498 }
2499
2500 /* check for promote */
2501 (void) nvlist_lookup_uint64(stream_nvfs, "origin",
2502 &stream_originguid);
2503 if (stream_nvfs && originguid != stream_originguid) {
2504 switch (created_before(hdl, local_avl,
2505 stream_originguid, originguid)) {
2506 case 1: {
2507 /* promote it! */
2508 zfs_cmd_t zc = {"\0"};
2509 nvlist_t *origin_nvfs;
2510 char *origin_fsname;
2511
2512 if (flags->verbose)
2513 (void) printf("promoting %s\n", fsname);
2514
2515 origin_nvfs = fsavl_find(local_avl, originguid,
2516 NULL);
2517 VERIFY(0 == nvlist_lookup_string(origin_nvfs,
2518 "name", &origin_fsname));
2519 (void) strlcpy(zc.zc_value, origin_fsname,
2520 sizeof (zc.zc_value));
2521 (void) strlcpy(zc.zc_name, fsname,
2522 sizeof (zc.zc_name));
2523 error = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
2524 if (error == 0)
2525 progress = B_TRUE;
2526 break;
2527 }
2528 default:
2529 break;
2530 case -1:
2531 fsavl_destroy(local_avl);
2532 nvlist_free(local_nv);
2533 return (-1);
2534 }
2535 /*
2536 * We had/have the wrong origin, therefore our
2537 * list of snapshots is wrong. Need to handle
2538 * them on the next pass.
2539 */
2540 needagain = B_TRUE;
2541 continue;
2542 }
2543
2544 for (snapelem = nvlist_next_nvpair(snaps, NULL);
2545 snapelem; snapelem = nextsnapelem) {
2546 uint64_t thisguid;
2547 char *stream_snapname;
2548 nvlist_t *found, *props;
2549
2550 nextsnapelem = nvlist_next_nvpair(snaps, snapelem);
2551
2552 VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
2553 found = fsavl_find(stream_avl, thisguid,
2554 &stream_snapname);
2555
2556 /* check for delete */
2557 if (found == NULL) {
2558 char name[ZFS_MAX_DATASET_NAME_LEN];
2559
2560 if (!flags->force)
2561 continue;
2562
2563 (void) snprintf(name, sizeof (name), "%s@%s",
2564 fsname, nvpair_name(snapelem));
2565
2566 error = recv_destroy(hdl, name,
2567 strlen(fsname)+1, newname, flags);
2568 if (error)
2569 needagain = B_TRUE;
2570 else
2571 progress = B_TRUE;
2572 sprintf(guidname, "%llu",
2573 (u_longlong_t)thisguid);
2574 nvlist_add_boolean(deleted, guidname);
2575 continue;
2576 }
2577
2578 stream_nvfs = found;
2579
2580 if (0 == nvlist_lookup_nvlist(stream_nvfs, "snapprops",
2581 &props) && 0 == nvlist_lookup_nvlist(props,
2582 stream_snapname, &props)) {
2583 zfs_cmd_t zc = {"\0"};
2584
2585 zc.zc_cookie = B_TRUE; /* received */
2586 (void) snprintf(zc.zc_name, sizeof (zc.zc_name),
2587 "%s@%s", fsname, nvpair_name(snapelem));
2588 if (zcmd_write_src_nvlist(hdl, &zc,
2589 props) == 0) {
2590 (void) zfs_ioctl(hdl,
2591 ZFS_IOC_SET_PROP, &zc);
2592 zcmd_free_nvlists(&zc);
2593 }
2594 }
2595
2596 /* check for different snapname */
2597 if (strcmp(nvpair_name(snapelem),
2598 stream_snapname) != 0) {
2599 char name[ZFS_MAX_DATASET_NAME_LEN];
2600 char tryname[ZFS_MAX_DATASET_NAME_LEN];
2601
2602 (void) snprintf(name, sizeof (name), "%s@%s",
2603 fsname, nvpair_name(snapelem));
2604 (void) snprintf(tryname, sizeof (name), "%s@%s",
2605 fsname, stream_snapname);
2606
2607 error = recv_rename(hdl, name, tryname,
2608 strlen(fsname)+1, newname, flags);
2609 if (error)
2610 needagain = B_TRUE;
2611 else
2612 progress = B_TRUE;
2613 }
2614
2615 if (strcmp(stream_snapname, fromsnap) == 0)
2616 fromguid = thisguid;
2617 }
2618
2619 /* check for delete */
2620 if (stream_nvfs == NULL) {
2621 if (!flags->force)
2622 continue;
2623
2624 error = recv_destroy(hdl, fsname, strlen(tofs)+1,
2625 newname, flags);
2626 if (error)
2627 needagain = B_TRUE;
2628 else
2629 progress = B_TRUE;
2630 sprintf(guidname, "%llu",
2631 (u_longlong_t)parent_fromsnap_guid);
2632 nvlist_add_boolean(deleted, guidname);
2633 continue;
2634 }
2635
2636 if (fromguid == 0) {
2637 if (flags->verbose) {
2638 (void) printf("local fs %s does not have "
2639 "fromsnap (%s in stream); must have "
2640 "been deleted locally; ignoring\n",
2641 fsname, fromsnap);
2642 }
2643 continue;
2644 }
2645
2646 VERIFY(0 == nvlist_lookup_string(stream_nvfs,
2647 "name", &stream_fsname));
2648 VERIFY(0 == nvlist_lookup_uint64(stream_nvfs,
2649 "parentfromsnap", &stream_parent_fromsnap_guid));
2650
2651 s1 = strrchr(fsname, '/');
2652 s2 = strrchr(stream_fsname, '/');
2653
2654 /*
2655 * Check if we're going to rename based on parent guid change
2656 * and the current parent guid was also deleted. If it was then
2657 * rename will fail and is likely unneeded, so avoid this and
2658 * force an early retry to determine the new
2659 * parent_fromsnap_guid.
2660 */
2661 if (stream_parent_fromsnap_guid != 0 &&
2662 parent_fromsnap_guid != 0 &&
2663 stream_parent_fromsnap_guid != parent_fromsnap_guid) {
2664 sprintf(guidname, "%llu",
2665 (u_longlong_t)parent_fromsnap_guid);
2666 if (nvlist_exists(deleted, guidname)) {
2667 progress = B_TRUE;
2668 needagain = B_TRUE;
2669 goto doagain;
2670 }
2671 }
2672
2673 /*
2674 * Check for rename. If the exact receive path is specified, it
2675 * does not count as a rename, but we still need to check the
2676 * datasets beneath it.
2677 */
2678 if ((stream_parent_fromsnap_guid != 0 &&
2679 parent_fromsnap_guid != 0 &&
2680 stream_parent_fromsnap_guid != parent_fromsnap_guid) ||
2681 ((flags->isprefix || strcmp(tofs, fsname) != 0) &&
2682 (s1 != NULL) && (s2 != NULL) && strcmp(s1, s2) != 0)) {
2683 nvlist_t *parent;
2684 char tryname[ZFS_MAX_DATASET_NAME_LEN];
2685
2686 parent = fsavl_find(local_avl,
2687 stream_parent_fromsnap_guid, NULL);
2688 /*
2689 * NB: parent might not be found if we used the
2690 * tosnap for stream_parent_fromsnap_guid,
2691 * because the parent is a newly-created fs;
2692 * we'll be able to rename it after we recv the
2693 * new fs.
2694 */
2695 if (parent != NULL) {
2696 char *pname;
2697
2698 VERIFY(0 == nvlist_lookup_string(parent, "name",
2699 &pname));
2700 (void) snprintf(tryname, sizeof (tryname),
2701 "%s%s", pname, strrchr(stream_fsname, '/'));
2702 } else {
2703 tryname[0] = '\0';
2704 if (flags->verbose) {
2705 (void) printf("local fs %s new parent "
2706 "not found\n", fsname);
2707 }
2708 }
2709
2710 newname[0] = '\0';
2711
2712 error = recv_rename(hdl, fsname, tryname,
2713 strlen(tofs)+1, newname, flags);
2714
2715 if (renamed != NULL && newname[0] != '\0') {
2716 VERIFY(0 == nvlist_add_boolean(renamed,
2717 newname));
2718 }
2719
2720 if (error)
2721 needagain = B_TRUE;
2722 else
2723 progress = B_TRUE;
2724 }
2725 }
2726
2727 doagain:
2728 fsavl_destroy(local_avl);
2729 nvlist_free(local_nv);
2730 nvlist_free(deleted);
2731
2732 if (needagain && progress) {
2733 /* do another pass to fix up temporary names */
2734 if (flags->verbose)
2735 (void) printf("another pass:\n");
2736 goto again;
2737 }
2738
2739 return (needagain);
2740 }
2741
2742 static int
2743 zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
2744 recvflags_t *flags, dmu_replay_record_t *drr, zio_cksum_t *zc,
2745 char **top_zfs, int cleanup_fd, uint64_t *action_handlep)
2746 {
2747 nvlist_t *stream_nv = NULL;
2748 avl_tree_t *stream_avl = NULL;
2749 char *fromsnap = NULL;
2750 char *sendsnap = NULL;
2751 char *cp;
2752 char tofs[ZFS_MAX_DATASET_NAME_LEN];
2753 char sendfs[ZFS_MAX_DATASET_NAME_LEN];
2754 char errbuf[1024];
2755 dmu_replay_record_t drre;
2756 int error;
2757 boolean_t anyerr = B_FALSE;
2758 boolean_t softerr = B_FALSE;
2759 boolean_t recursive;
2760
2761 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2762 "cannot receive"));
2763
2764 assert(drr->drr_type == DRR_BEGIN);
2765 assert(drr->drr_u.drr_begin.drr_magic == DMU_BACKUP_MAGIC);
2766 assert(DMU_GET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo) ==
2767 DMU_COMPOUNDSTREAM);
2768
2769 /*
2770 * Read in the nvlist from the stream.
2771 */
2772 if (drr->drr_payloadlen != 0) {
2773 error = recv_read_nvlist(hdl, fd, drr->drr_payloadlen,
2774 &stream_nv, flags->byteswap, zc);
2775 if (error) {
2776 error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2777 goto out;
2778 }
2779 }
2780
2781 recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
2782 ENOENT);
2783
2784 if (recursive && strchr(destname, '@')) {
2785 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2786 "cannot specify snapshot name for multi-snapshot stream"));
2787 error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2788 goto out;
2789 }
2790
2791 /*
2792 * Read in the end record and verify checksum.
2793 */
2794 if (0 != (error = recv_read(hdl, fd, &drre, sizeof (drre),
2795 flags->byteswap, NULL)))
2796 goto out;
2797 if (flags->byteswap) {
2798 drre.drr_type = BSWAP_32(drre.drr_type);
2799 drre.drr_u.drr_end.drr_checksum.zc_word[0] =
2800 BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[0]);
2801 drre.drr_u.drr_end.drr_checksum.zc_word[1] =
2802 BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[1]);
2803 drre.drr_u.drr_end.drr_checksum.zc_word[2] =
2804 BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[2]);
2805 drre.drr_u.drr_end.drr_checksum.zc_word[3] =
2806 BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[3]);
2807 }
2808 if (drre.drr_type != DRR_END) {
2809 error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2810 goto out;
2811 }
2812 if (!ZIO_CHECKSUM_EQUAL(drre.drr_u.drr_end.drr_checksum, *zc)) {
2813 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2814 "incorrect header checksum"));
2815 error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2816 goto out;
2817 }
2818
2819 (void) nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap);
2820
2821 if (drr->drr_payloadlen != 0) {
2822 nvlist_t *stream_fss;
2823
2824 VERIFY(0 == nvlist_lookup_nvlist(stream_nv, "fss",
2825 &stream_fss));
2826 if ((stream_avl = fsavl_create(stream_fss)) == NULL) {
2827 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2828 "couldn't allocate avl tree"));
2829 error = zfs_error(hdl, EZFS_NOMEM, errbuf);
2830 goto out;
2831 }
2832
2833 if (fromsnap != NULL) {
2834 nvlist_t *renamed = NULL;
2835 nvpair_t *pair = NULL;
2836
2837 (void) strlcpy(tofs, destname, sizeof (tofs));
2838 if (flags->isprefix) {
2839 struct drr_begin *drrb = &drr->drr_u.drr_begin;
2840 int i;
2841
2842 if (flags->istail) {
2843 cp = strrchr(drrb->drr_toname, '/');
2844 if (cp == NULL) {
2845 (void) strlcat(tofs, "/",
2846 sizeof (tofs));
2847 i = 0;
2848 } else {
2849 i = (cp - drrb->drr_toname);
2850 }
2851 } else {
2852 i = strcspn(drrb->drr_toname, "/@");
2853 }
2854 /* zfs_receive_one() will create_parents() */
2855 (void) strlcat(tofs, &drrb->drr_toname[i],
2856 sizeof (tofs));
2857 *strchr(tofs, '@') = '\0';
2858 }
2859
2860 if (recursive && !flags->dryrun && !flags->nomount) {
2861 VERIFY(0 == nvlist_alloc(&renamed,
2862 NV_UNIQUE_NAME, 0));
2863 }
2864
2865 softerr = recv_incremental_replication(hdl, tofs, flags,
2866 stream_nv, stream_avl, renamed);
2867
2868 /* Unmount renamed filesystems before receiving. */
2869 while ((pair = nvlist_next_nvpair(renamed,
2870 pair)) != NULL) {
2871 zfs_handle_t *zhp;
2872 prop_changelist_t *clp = NULL;
2873
2874 zhp = zfs_open(hdl, nvpair_name(pair),
2875 ZFS_TYPE_FILESYSTEM);
2876 if (zhp != NULL) {
2877 clp = changelist_gather(zhp,
2878 ZFS_PROP_MOUNTPOINT, 0, 0);
2879 zfs_close(zhp);
2880 if (clp != NULL) {
2881 softerr |=
2882 changelist_prefix(clp);
2883 changelist_free(clp);
2884 }
2885 }
2886 }
2887
2888 nvlist_free(renamed);
2889 }
2890 }
2891
2892 /*
2893 * Get the fs specified by the first path in the stream (the top level
2894 * specified by 'zfs send') and pass it to each invocation of
2895 * zfs_receive_one().
2896 */
2897 (void) strlcpy(sendfs, drr->drr_u.drr_begin.drr_toname,
2898 sizeof (sendfs));
2899 if ((cp = strchr(sendfs, '@')) != NULL) {
2900 *cp = '\0';
2901 /*
2902 * Find the "sendsnap", the final snapshot in a replication
2903 * stream. zfs_receive_one() handles certain errors
2904 * differently, depending on if the contained stream is the
2905 * last one or not.
2906 */
2907 sendsnap = (cp + 1);
2908 }
2909
2910 /* Finally, receive each contained stream */
2911 do {
2912 /*
2913 * we should figure out if it has a recoverable
2914 * error, in which case do a recv_skip() and drive on.
2915 * Note, if we fail due to already having this guid,
2916 * zfs_receive_one() will take care of it (ie,
2917 * recv_skip() and return 0).
2918 */
2919 error = zfs_receive_impl(hdl, destname, NULL, flags, fd,
2920 sendfs, stream_nv, stream_avl, top_zfs, cleanup_fd,
2921 action_handlep, sendsnap);
2922 if (error == ENODATA) {
2923 error = 0;
2924 break;
2925 }
2926 anyerr |= error;
2927 } while (error == 0);
2928
2929 if (drr->drr_payloadlen != 0 && fromsnap != NULL) {
2930 /*
2931 * Now that we have the fs's they sent us, try the
2932 * renames again.
2933 */
2934 softerr = recv_incremental_replication(hdl, tofs, flags,
2935 stream_nv, stream_avl, NULL);
2936 }
2937
2938 out:
2939 fsavl_destroy(stream_avl);
2940 nvlist_free(stream_nv);
2941 if (softerr)
2942 error = -2;
2943 if (anyerr)
2944 error = -1;
2945 return (error);
2946 }
2947
2948 static void
2949 trunc_prop_errs(int truncated)
2950 {
2951 ASSERT(truncated != 0);
2952
2953 if (truncated == 1)
2954 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
2955 "1 more property could not be set\n"));
2956 else
2957 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
2958 "%d more properties could not be set\n"), truncated);
2959 }
2960
2961 static int
2962 recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
2963 {
2964 dmu_replay_record_t *drr;
2965 void *buf = zfs_alloc(hdl, SPA_MAXBLOCKSIZE);
2966 char errbuf[1024];
2967
2968 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2969 "cannot receive:"));
2970
2971 /* XXX would be great to use lseek if possible... */
2972 drr = buf;
2973
2974 while (recv_read(hdl, fd, drr, sizeof (dmu_replay_record_t),
2975 byteswap, NULL) == 0) {
2976 if (byteswap)
2977 drr->drr_type = BSWAP_32(drr->drr_type);
2978
2979 switch (drr->drr_type) {
2980 case DRR_BEGIN:
2981 if (drr->drr_payloadlen != 0) {
2982 (void) recv_read(hdl, fd, buf,
2983 drr->drr_payloadlen, B_FALSE, NULL);
2984 }
2985 break;
2986
2987 case DRR_END:
2988 free(buf);
2989 return (0);
2990
2991 case DRR_OBJECT:
2992 if (byteswap) {
2993 drr->drr_u.drr_object.drr_bonuslen =
2994 BSWAP_32(drr->drr_u.drr_object.
2995 drr_bonuslen);
2996 }
2997 (void) recv_read(hdl, fd, buf,
2998 P2ROUNDUP(drr->drr_u.drr_object.drr_bonuslen, 8),
2999 B_FALSE, NULL);
3000 break;
3001
3002 case DRR_WRITE:
3003 if (byteswap) {
3004 drr->drr_u.drr_write.drr_logical_size =
3005 BSWAP_64(
3006 drr->drr_u.drr_write.drr_logical_size);
3007 drr->drr_u.drr_write.drr_compressed_size =
3008 BSWAP_64(
3009 drr->drr_u.drr_write.drr_compressed_size);
3010 }
3011 uint64_t payload_size =
3012 DRR_WRITE_PAYLOAD_SIZE(&drr->drr_u.drr_write);
3013 (void) recv_read(hdl, fd, buf,
3014 payload_size, B_FALSE, NULL);
3015 break;
3016 case DRR_SPILL:
3017 if (byteswap) {
3018 drr->drr_u.drr_spill.drr_length =
3019 BSWAP_64(drr->drr_u.drr_spill.drr_length);
3020 }
3021 (void) recv_read(hdl, fd, buf,
3022 drr->drr_u.drr_spill.drr_length, B_FALSE, NULL);
3023 break;
3024 case DRR_WRITE_EMBEDDED:
3025 if (byteswap) {
3026 drr->drr_u.drr_write_embedded.drr_psize =
3027 BSWAP_32(drr->drr_u.drr_write_embedded.
3028 drr_psize);
3029 }
3030 (void) recv_read(hdl, fd, buf,
3031 P2ROUNDUP(drr->drr_u.drr_write_embedded.drr_psize,
3032 8), B_FALSE, NULL);
3033 break;
3034 case DRR_WRITE_BYREF:
3035 case DRR_FREEOBJECTS:
3036 case DRR_FREE:
3037 break;
3038
3039 default:
3040 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3041 "invalid record type"));
3042 free(buf);
3043 return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3044 }
3045 }
3046
3047 free(buf);
3048 return (-1);
3049 }
3050
3051 static void
3052 recv_ecksum_set_aux(libzfs_handle_t *hdl, const char *target_snap,
3053 boolean_t resumable)
3054 {
3055 char target_fs[ZFS_MAX_DATASET_NAME_LEN];
3056
3057 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3058 "checksum mismatch or incomplete stream"));
3059
3060 if (!resumable)
3061 return;
3062 (void) strlcpy(target_fs, target_snap, sizeof (target_fs));
3063 *strchr(target_fs, '@') = '\0';
3064 zfs_handle_t *zhp = zfs_open(hdl, target_fs,
3065 ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
3066 if (zhp == NULL)
3067 return;
3068
3069 char token_buf[ZFS_MAXPROPLEN];
3070 int error = zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN,
3071 token_buf, sizeof (token_buf),
3072 NULL, NULL, 0, B_TRUE);
3073 if (error == 0) {
3074 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3075 "checksum mismatch or incomplete stream.\n"
3076 "Partially received snapshot is saved.\n"
3077 "A resuming stream can be generated on the sending "
3078 "system by running:\n"
3079 " zfs send -t %s"),
3080 token_buf);
3081 }
3082 zfs_close(zhp);
3083 }
3084
3085 /*
3086 * Restores a backup of tosnap from the file descriptor specified by infd.
3087 */
3088 static int
3089 zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
3090 const char *originsnap, recvflags_t *flags, dmu_replay_record_t *drr,
3091 dmu_replay_record_t *drr_noswap, const char *sendfs, nvlist_t *stream_nv,
3092 avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd,
3093 uint64_t *action_handlep, const char *finalsnap)
3094 {
3095 time_t begin_time;
3096 int ioctl_err, ioctl_errno, err;
3097 char *cp;
3098 struct drr_begin *drrb = &drr->drr_u.drr_begin;
3099 char errbuf[1024];
3100 const char *chopprefix;
3101 boolean_t newfs = B_FALSE;
3102 boolean_t stream_wantsnewfs;
3103 boolean_t newprops = B_FALSE;
3104 uint64_t read_bytes = 0;
3105 uint64_t errflags = 0;
3106 uint64_t parent_snapguid = 0;
3107 prop_changelist_t *clp = NULL;
3108 nvlist_t *snapprops_nvlist = NULL;
3109 zprop_errflags_t prop_errflags;
3110 nvlist_t *prop_errors = NULL;
3111 boolean_t recursive;
3112 char *snapname = NULL;
3113 char destsnap[MAXPATHLEN * 2];
3114 char origin[MAXNAMELEN];
3115 char name[MAXPATHLEN];
3116 nvlist_t *props = NULL;
3117
3118 begin_time = time(NULL);
3119 bzero(origin, MAXNAMELEN);
3120
3121 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3122 "cannot receive"));
3123
3124 recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
3125 ENOENT);
3126
3127 if (stream_avl != NULL) {
3128 nvlist_t *lookup = NULL;
3129 nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid,
3130 &snapname);
3131
3132 (void) nvlist_lookup_uint64(fs, "parentfromsnap",
3133 &parent_snapguid);
3134 err = nvlist_lookup_nvlist(fs, "props", &props);
3135 if (err) {
3136 VERIFY(0 == nvlist_alloc(&props, NV_UNIQUE_NAME, 0));
3137 newprops = B_TRUE;
3138 }
3139
3140 if (flags->canmountoff) {
3141 VERIFY(0 == nvlist_add_uint64(props,
3142 zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0));
3143 }
3144 if (0 == nvlist_lookup_nvlist(fs, "snapprops", &lookup)) {
3145 VERIFY(0 == nvlist_lookup_nvlist(lookup,
3146 snapname, &snapprops_nvlist));
3147 }
3148 }
3149
3150 cp = NULL;
3151
3152 /*
3153 * Determine how much of the snapshot name stored in the stream
3154 * we are going to tack on to the name they specified on the
3155 * command line, and how much we are going to chop off.
3156 *
3157 * If they specified a snapshot, chop the entire name stored in
3158 * the stream.
3159 */
3160 if (flags->istail) {
3161 /*
3162 * A filesystem was specified with -e. We want to tack on only
3163 * the tail of the sent snapshot path.
3164 */
3165 if (strchr(tosnap, '@')) {
3166 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
3167 "argument - snapshot not allowed with -e"));
3168 err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
3169 goto out;
3170 }
3171
3172 chopprefix = strrchr(sendfs, '/');
3173
3174 if (chopprefix == NULL) {
3175 /*
3176 * The tail is the poolname, so we need to
3177 * prepend a path separator.
3178 */
3179 int len = strlen(drrb->drr_toname);
3180 cp = malloc(len + 2);
3181 cp[0] = '/';
3182 (void) strcpy(&cp[1], drrb->drr_toname);
3183 chopprefix = cp;
3184 } else {
3185 chopprefix = drrb->drr_toname + (chopprefix - sendfs);
3186 }
3187 } else if (flags->isprefix) {
3188 /*
3189 * A filesystem was specified with -d. We want to tack on
3190 * everything but the first element of the sent snapshot path
3191 * (all but the pool name).
3192 */
3193 if (strchr(tosnap, '@')) {
3194 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
3195 "argument - snapshot not allowed with -d"));
3196 err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
3197 goto out;
3198 }
3199
3200 chopprefix = strchr(drrb->drr_toname, '/');
3201 if (chopprefix == NULL)
3202 chopprefix = strchr(drrb->drr_toname, '@');
3203 } else if (strchr(tosnap, '@') == NULL) {
3204 /*
3205 * If a filesystem was specified without -d or -e, we want to
3206 * tack on everything after the fs specified by 'zfs send'.
3207 */
3208 chopprefix = drrb->drr_toname + strlen(sendfs);
3209 } else {
3210 /* A snapshot was specified as an exact path (no -d or -e). */
3211 if (recursive) {
3212 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3213 "cannot specify snapshot name for multi-snapshot "
3214 "stream"));
3215 err = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3216 goto out;
3217 }
3218 chopprefix = drrb->drr_toname + strlen(drrb->drr_toname);
3219 }
3220
3221 ASSERT(strstr(drrb->drr_toname, sendfs) == drrb->drr_toname);
3222 ASSERT(chopprefix > drrb->drr_toname);
3223 ASSERT(chopprefix <= drrb->drr_toname + strlen(drrb->drr_toname));
3224 ASSERT(chopprefix[0] == '/' || chopprefix[0] == '@' ||
3225 chopprefix[0] == '\0');
3226
3227 /*
3228 * Determine name of destination snapshot.
3229 */
3230 (void) strlcpy(destsnap, tosnap, sizeof (destsnap));
3231 (void) strlcat(destsnap, chopprefix, sizeof (destsnap));
3232 free(cp);
3233 if (!zfs_name_valid(destsnap, ZFS_TYPE_SNAPSHOT)) {
3234 err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
3235 goto out;
3236 }
3237
3238 /*
3239 * Determine the name of the origin snapshot.
3240 */
3241 if (originsnap) {
3242 (void) strncpy(origin, originsnap, sizeof (origin));
3243 if (flags->verbose)
3244 (void) printf("using provided clone origin %s\n",
3245 origin);
3246 } else if (drrb->drr_flags & DRR_FLAG_CLONE) {
3247 if (guid_to_name(hdl, destsnap,
3248 drrb->drr_fromguid, B_FALSE, origin) != 0) {
3249 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3250 "local origin for clone %s does not exist"),
3251 destsnap);
3252 err = zfs_error(hdl, EZFS_NOENT, errbuf);
3253 goto out;
3254 }
3255 if (flags->verbose)
3256 (void) printf("found clone origin %s\n", origin);
3257 }
3258
3259 boolean_t resuming = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
3260 DMU_BACKUP_FEATURE_RESUMING;
3261 stream_wantsnewfs = (drrb->drr_fromguid == 0 ||
3262 (drrb->drr_flags & DRR_FLAG_CLONE) || originsnap) && !resuming;
3263
3264 if (stream_wantsnewfs) {
3265 /*
3266 * if the parent fs does not exist, look for it based on
3267 * the parent snap GUID
3268 */
3269 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3270 "cannot receive new filesystem stream"));
3271
3272 (void) strcpy(name, destsnap);
3273 cp = strrchr(name, '/');
3274 if (cp)
3275 *cp = '\0';
3276 if (cp &&
3277 !zfs_dataset_exists(hdl, name, ZFS_TYPE_DATASET)) {
3278 char suffix[ZFS_MAX_DATASET_NAME_LEN];
3279 (void) strcpy(suffix, strrchr(destsnap, '/'));
3280 if (guid_to_name(hdl, name, parent_snapguid,
3281 B_FALSE, destsnap) == 0) {
3282 *strchr(destsnap, '@') = '\0';
3283 (void) strcat(destsnap, suffix);
3284 }
3285 }
3286 } else {
3287 /*
3288 * if the fs does not exist, look for it based on the
3289 * fromsnap GUID
3290 */
3291 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3292 "cannot receive incremental stream"));
3293
3294 (void) strcpy(name, destsnap);
3295 *strchr(name, '@') = '\0';
3296
3297 /*
3298 * If the exact receive path was specified and this is the
3299 * topmost path in the stream, then if the fs does not exist we
3300 * should look no further.
3301 */
3302 if ((flags->isprefix || (*(chopprefix = drrb->drr_toname +
3303 strlen(sendfs)) != '\0' && *chopprefix != '@')) &&
3304 !zfs_dataset_exists(hdl, name, ZFS_TYPE_DATASET)) {
3305 char snap[ZFS_MAX_DATASET_NAME_LEN];
3306 (void) strcpy(snap, strchr(destsnap, '@'));
3307 if (guid_to_name(hdl, name, drrb->drr_fromguid,
3308 B_FALSE, destsnap) == 0) {
3309 *strchr(destsnap, '@') = '\0';
3310 (void) strcat(destsnap, snap);
3311 }
3312 }
3313 }
3314
3315 (void) strcpy(name, destsnap);
3316 *strchr(name, '@') = '\0';
3317
3318 if (zfs_dataset_exists(hdl, name, ZFS_TYPE_DATASET)) {
3319 zfs_cmd_t zc = {"\0"};
3320 zfs_handle_t *zhp;
3321
3322 (void) strcpy(zc.zc_name, name);
3323
3324 /*
3325 * Destination fs exists. It must be one of these cases:
3326 * - an incremental send stream
3327 * - the stream specifies a new fs (full stream or clone)
3328 * and they want us to blow away the existing fs (and
3329 * have therefore specified -F and removed any snapshots)
3330 * - we are resuming a failed receive.
3331 */
3332 if (stream_wantsnewfs) {
3333 if (!flags->force) {
3334 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3335 "destination '%s' exists\n"
3336 "must specify -F to overwrite it"), name);
3337 err = zfs_error(hdl, EZFS_EXISTS, errbuf);
3338 goto out;
3339 }
3340 if (ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT,
3341 &zc) == 0) {
3342 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3343 "destination has snapshots (eg. %s)\n"
3344 "must destroy them to overwrite it"),
3345 name);
3346 err = zfs_error(hdl, EZFS_EXISTS, errbuf);
3347 goto out;
3348 }
3349 }
3350
3351 if ((zhp = zfs_open(hdl, name,
3352 ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) {
3353 err = -1;
3354 goto out;
3355 }
3356
3357 if (stream_wantsnewfs &&
3358 zhp->zfs_dmustats.dds_origin[0]) {
3359 zfs_close(zhp);
3360 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3361 "destination '%s' is a clone\n"
3362 "must destroy it to overwrite it"), name);
3363 err = zfs_error(hdl, EZFS_EXISTS, errbuf);
3364 goto out;
3365 }
3366
3367 if (!flags->dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM &&
3368 stream_wantsnewfs) {
3369 /* We can't do online recv in this case */
3370 clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0);
3371 if (clp == NULL) {
3372 zfs_close(zhp);
3373 err = -1;
3374 goto out;
3375 }
3376 if (changelist_prefix(clp) != 0) {
3377 changelist_free(clp);
3378 zfs_close(zhp);
3379 err = -1;
3380 goto out;
3381 }
3382 }
3383
3384 /*
3385 * If we are resuming a newfs, set newfs here so that we will
3386 * mount it if the recv succeeds this time. We can tell
3387 * that it was a newfs on the first recv because the fs
3388 * itself will be inconsistent (if the fs existed when we
3389 * did the first recv, we would have received it into
3390 * .../%recv).
3391 */
3392 if (resuming && zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT))
3393 newfs = B_TRUE;
3394
3395 zfs_close(zhp);
3396 } else {
3397 /*
3398 * Destination filesystem does not exist. Therefore we better
3399 * be creating a new filesystem (either from a full backup, or
3400 * a clone). It would therefore be invalid if the user
3401 * specified only the pool name (i.e. if the destination name
3402 * contained no slash character).
3403 */
3404 cp = strrchr(name, '/');
3405
3406 if (!stream_wantsnewfs || cp == NULL) {
3407 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3408 "destination '%s' does not exist"), name);
3409 err = zfs_error(hdl, EZFS_NOENT, errbuf);
3410 goto out;
3411 }
3412
3413 /*
3414 * Trim off the final dataset component so we perform the
3415 * recvbackup ioctl to the filesystems's parent.
3416 */
3417 *cp = '\0';
3418
3419 if (flags->isprefix && !flags->istail && !flags->dryrun &&
3420 create_parents(hdl, destsnap, strlen(tosnap)) != 0) {
3421 err = zfs_error(hdl, EZFS_BADRESTORE, errbuf);
3422 goto out;
3423 }
3424
3425 newfs = B_TRUE;
3426 }
3427
3428 if (flags->verbose) {
3429 (void) printf("%s %s stream of %s into %s\n",
3430 flags->dryrun ? "would receive" : "receiving",
3431 drrb->drr_fromguid ? "incremental" : "full",
3432 drrb->drr_toname, destsnap);
3433 (void) fflush(stdout);
3434 }
3435
3436 if (flags->dryrun) {
3437 err = recv_skip(hdl, infd, flags->byteswap);
3438 goto out;
3439 }
3440
3441 err = ioctl_err = lzc_receive_one(destsnap, props, origin,
3442 flags->force, flags->resumable, infd, drr_noswap, cleanup_fd,
3443 &read_bytes, &errflags, action_handlep, &prop_errors);
3444 ioctl_errno = ioctl_err;
3445 prop_errflags = errflags;
3446
3447 if (err == 0) {
3448 nvpair_t *prop_err = NULL;
3449
3450 while ((prop_err = nvlist_next_nvpair(prop_errors,
3451 prop_err)) != NULL) {
3452 char tbuf[1024];
3453 zfs_prop_t prop;
3454 int intval;
3455
3456 prop = zfs_name_to_prop(nvpair_name(prop_err));
3457 (void) nvpair_value_int32(prop_err, &intval);
3458 if (strcmp(nvpair_name(prop_err),
3459 ZPROP_N_MORE_ERRORS) == 0) {
3460 trunc_prop_errs(intval);
3461 break;
3462 } else if (snapname == NULL || finalsnap == NULL ||
3463 strcmp(finalsnap, snapname) == 0 ||
3464 strcmp(nvpair_name(prop_err),
3465 zfs_prop_to_name(ZFS_PROP_REFQUOTA)) != 0) {
3466 /*
3467 * Skip the special case of, for example,
3468 * "refquota", errors on intermediate
3469 * snapshots leading up to a final one.
3470 * That's why we have all of the checks above.
3471 *
3472 * See zfs_ioctl.c's extract_delay_props() for
3473 * a list of props which can fail on
3474 * intermediate snapshots, but shouldn't
3475 * affect the overall receive.
3476 */
3477 (void) snprintf(tbuf, sizeof (tbuf),
3478 dgettext(TEXT_DOMAIN,
3479 "cannot receive %s property on %s"),
3480 nvpair_name(prop_err), name);
3481 zfs_setprop_error(hdl, prop, intval, tbuf);
3482 }
3483 }
3484 }
3485
3486 if (err == 0 && snapprops_nvlist) {
3487 zfs_cmd_t zc = {"\0"};
3488
3489 (void) strcpy(zc.zc_name, destsnap);
3490 zc.zc_cookie = B_TRUE; /* received */
3491 if (zcmd_write_src_nvlist(hdl, &zc, snapprops_nvlist) == 0) {
3492 (void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc);
3493 zcmd_free_nvlists(&zc);
3494 }
3495 }
3496
3497 if (err && (ioctl_errno == ENOENT || ioctl_errno == EEXIST)) {
3498 /*
3499 * It may be that this snapshot already exists,
3500 * in which case we want to consume & ignore it
3501 * rather than failing.
3502 */
3503 avl_tree_t *local_avl;
3504 nvlist_t *local_nv, *fs;
3505 cp = strchr(destsnap, '@');
3506
3507 /*
3508 * XXX Do this faster by just iterating over snaps in
3509 * this fs. Also if zc_value does not exist, we will
3510 * get a strange "does not exist" error message.
3511 */
3512 *cp = '\0';
3513 if (gather_nvlist(hdl, destsnap, NULL, NULL, B_FALSE,
3514 B_FALSE, &local_nv, &local_avl) == 0) {
3515 *cp = '@';
3516 fs = fsavl_find(local_avl, drrb->drr_toguid, NULL);
3517 fsavl_destroy(local_avl);
3518 nvlist_free(local_nv);
3519
3520 if (fs != NULL) {
3521 if (flags->verbose) {
3522 (void) printf("snap %s already exists; "
3523 "ignoring\n", destsnap);
3524 }
3525 err = ioctl_err = recv_skip(hdl, infd,
3526 flags->byteswap);
3527 }
3528 }
3529 *cp = '@';
3530 }
3531
3532 if (ioctl_err != 0) {
3533 switch (ioctl_errno) {
3534 case ENODEV:
3535 cp = strchr(destsnap, '@');
3536 *cp = '\0';
3537 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3538 "most recent snapshot of %s does not\n"
3539 "match incremental source"), destsnap);
3540 (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
3541 *cp = '@';
3542 break;
3543 case ETXTBSY:
3544 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3545 "destination %s has been modified\n"
3546 "since most recent snapshot"), name);
3547 (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
3548 break;
3549 case EEXIST:
3550 cp = strchr(destsnap, '@');
3551 if (newfs) {
3552 /* it's the containing fs that exists */
3553 *cp = '\0';
3554 }
3555 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3556 "destination already exists"));
3557 (void) zfs_error_fmt(hdl, EZFS_EXISTS,
3558 dgettext(TEXT_DOMAIN, "cannot restore to %s"),
3559 destsnap);
3560 *cp = '@';
3561 break;
3562 case EINVAL:
3563 if (flags->resumable)
3564 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3565 "kernel modules must be upgraded to "
3566 "receive this stream."));
3567 (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3568 break;
3569 case ECKSUM:
3570 recv_ecksum_set_aux(hdl, destsnap, flags->resumable);
3571 (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3572 break;
3573 case ENOTSUP:
3574 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3575 "pool must be upgraded to receive this stream."));
3576 (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
3577 break;
3578 case EDQUOT:
3579 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3580 "destination %s space quota exceeded"), name);
3581 (void) zfs_error(hdl, EZFS_NOSPC, errbuf);
3582 break;
3583 default:
3584 (void) zfs_standard_error(hdl, ioctl_errno, errbuf);
3585 }
3586 }
3587
3588 /*
3589 * Mount the target filesystem (if created). Also mount any
3590 * children of the target filesystem if we did a replication
3591 * receive (indicated by stream_avl being non-NULL).
3592 */
3593 cp = strchr(destsnap, '@');
3594 if (cp && (ioctl_err == 0 || !newfs)) {
3595 zfs_handle_t *h;
3596
3597 *cp = '\0';
3598 h = zfs_open(hdl, destsnap,
3599 ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
3600 if (h != NULL) {
3601 if (h->zfs_type == ZFS_TYPE_VOLUME) {
3602 *cp = '@';
3603 } else if (newfs || stream_avl) {
3604 /*
3605 * Track the first/top of hierarchy fs,
3606 * for mounting and sharing later.
3607 */
3608 if (top_zfs && *top_zfs == NULL)
3609 *top_zfs = zfs_strdup(hdl, destsnap);
3610 }
3611 zfs_close(h);
3612 }
3613 *cp = '@';
3614 }
3615
3616 if (clp) {
3617 if (!flags->nomount)
3618 err |= changelist_postfix(clp);
3619 changelist_free(clp);
3620 }
3621
3622 if (prop_errflags & ZPROP_ERR_NOCLEAR) {
3623 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
3624 "failed to clear unreceived properties on %s"), name);
3625 (void) fprintf(stderr, "\n");
3626 }
3627 if (prop_errflags & ZPROP_ERR_NORESTORE) {
3628 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
3629 "failed to restore original properties on %s"), name);
3630 (void) fprintf(stderr, "\n");
3631 }
3632
3633 if (err || ioctl_err) {
3634 err = -1;
3635 goto out;
3636 }
3637
3638 if (flags->verbose) {
3639 char buf1[64];
3640 char buf2[64];
3641 uint64_t bytes = read_bytes;
3642 time_t delta = time(NULL) - begin_time;
3643 if (delta == 0)
3644 delta = 1;
3645 zfs_nicenum(bytes, buf1, sizeof (buf1));
3646 zfs_nicenum(bytes/delta, buf2, sizeof (buf1));
3647
3648 (void) printf("received %sB stream in %lu seconds (%sB/sec)\n",
3649 buf1, delta, buf2);
3650 }
3651
3652 err = 0;
3653 out:
3654 if (prop_errors != NULL)
3655 nvlist_free(prop_errors);
3656
3657 if (newprops)
3658 nvlist_free(props);
3659
3660 return (err);
3661 }
3662
3663 static int
3664 zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap,
3665 const char *originsnap, recvflags_t *flags, int infd, const char *sendfs,
3666 nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd,
3667 uint64_t *action_handlep, const char *finalsnap)
3668 {
3669 int err;
3670 dmu_replay_record_t drr, drr_noswap;
3671 struct drr_begin *drrb = &drr.drr_u.drr_begin;
3672 char errbuf[1024];
3673 zio_cksum_t zcksum = { { 0 } };
3674 uint64_t featureflags;
3675 int hdrtype;
3676
3677 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3678 "cannot receive"));
3679
3680 if (flags->isprefix &&
3681 !zfs_dataset_exists(hdl, tosnap, ZFS_TYPE_DATASET)) {
3682 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified fs "
3683 "(%s) does not exist"), tosnap);
3684 return (zfs_error(hdl, EZFS_NOENT, errbuf));
3685 }
3686 if (originsnap &&
3687 !zfs_dataset_exists(hdl, originsnap, ZFS_TYPE_DATASET)) {
3688 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified origin fs "
3689 "(%s) does not exist"), originsnap);
3690 return (zfs_error(hdl, EZFS_NOENT, errbuf));
3691 }
3692
3693 /* read in the BEGIN record */
3694 if (0 != (err = recv_read(hdl, infd, &drr, sizeof (drr), B_FALSE,
3695 &zcksum)))
3696 return (err);
3697
3698 if (drr.drr_type == DRR_END || drr.drr_type == BSWAP_32(DRR_END)) {
3699 /* It's the double end record at the end of a package */
3700 return (ENODATA);
3701 }
3702
3703 /* the kernel needs the non-byteswapped begin record */
3704 drr_noswap = drr;
3705
3706 flags->byteswap = B_FALSE;
3707 if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
3708 /*
3709 * We computed the checksum in the wrong byteorder in
3710 * recv_read() above; do it again correctly.
3711 */
3712 bzero(&zcksum, sizeof (zio_cksum_t));
3713 fletcher_4_incremental_byteswap(&drr, sizeof (drr), &zcksum);
3714 flags->byteswap = B_TRUE;
3715
3716 drr.drr_type = BSWAP_32(drr.drr_type);
3717 drr.drr_payloadlen = BSWAP_32(drr.drr_payloadlen);
3718 drrb->drr_magic = BSWAP_64(drrb->drr_magic);
3719 drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo);
3720 drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
3721 drrb->drr_type = BSWAP_32(drrb->drr_type);
3722 drrb->drr_flags = BSWAP_32(drrb->drr_flags);
3723 drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
3724 drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
3725 }
3726
3727 if (drrb->drr_magic != DMU_BACKUP_MAGIC || drr.drr_type != DRR_BEGIN) {
3728 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
3729 "stream (bad magic number)"));
3730 return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3731 }
3732
3733 featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
3734 hdrtype = DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo);
3735
3736 if (!DMU_STREAM_SUPPORTED(featureflags) ||
3737 (hdrtype != DMU_SUBSTREAM && hdrtype != DMU_COMPOUNDSTREAM)) {
3738 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3739 "stream has unsupported feature, feature flags = %lx"),
3740 featureflags);
3741 return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3742 }
3743
3744 if (strchr(drrb->drr_toname, '@') == NULL) {
3745 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
3746 "stream (bad snapshot name)"));
3747 return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3748 }
3749
3750 if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == DMU_SUBSTREAM) {
3751 char nonpackage_sendfs[ZFS_MAX_DATASET_NAME_LEN];
3752 if (sendfs == NULL) {
3753 /*
3754 * We were not called from zfs_receive_package(). Get
3755 * the fs specified by 'zfs send'.
3756 */
3757 char *cp;
3758 (void) strlcpy(nonpackage_sendfs,
3759 drr.drr_u.drr_begin.drr_toname,
3760 sizeof (nonpackage_sendfs));
3761 if ((cp = strchr(nonpackage_sendfs, '@')) != NULL)
3762 *cp = '\0';
3763 sendfs = nonpackage_sendfs;
3764 VERIFY(finalsnap == NULL);
3765 }
3766 return (zfs_receive_one(hdl, infd, tosnap, originsnap, flags,
3767 &drr, &drr_noswap, sendfs, stream_nv, stream_avl, top_zfs,
3768 cleanup_fd, action_handlep, finalsnap));
3769 } else {
3770 assert(DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
3771 DMU_COMPOUNDSTREAM);
3772 return (zfs_receive_package(hdl, infd, tosnap, flags, &drr,
3773 &zcksum, top_zfs, cleanup_fd, action_handlep));
3774 }
3775 }
3776
3777 /*
3778 * Restores a backup of tosnap from the file descriptor specified by infd.
3779 * Return 0 on total success, -2 if some things couldn't be
3780 * destroyed/renamed/promoted, -1 if some things couldn't be received.
3781 * (-1 will override -2, if -1 and the resumable flag was specified the
3782 * transfer can be resumed if the sending side supports it).
3783 */
3784 int
3785 zfs_receive(libzfs_handle_t *hdl, const char *tosnap, nvlist_t *props,
3786 recvflags_t *flags, int infd, avl_tree_t *stream_avl)
3787 {
3788 char *top_zfs = NULL;
3789 int err;
3790 int cleanup_fd;
3791 uint64_t action_handle = 0;
3792 struct stat sb;
3793 char *originsnap = NULL;
3794
3795 /*
3796 * The only way fstat can fail is if we do not have a valid file
3797 * descriptor.
3798 */
3799 if (fstat(infd, &sb) == -1) {
3800 perror("fstat");
3801 return (-2);
3802 }
3803
3804 #ifdef __linux__
3805 #ifndef F_SETPIPE_SZ
3806 #define F_SETPIPE_SZ (F_SETLEASE + 7)
3807 #endif /* F_SETPIPE_SZ */
3808
3809 #ifndef F_GETPIPE_SZ
3810 #define F_GETPIPE_SZ (F_GETLEASE + 7)
3811 #endif /* F_GETPIPE_SZ */
3812
3813 /*
3814 * It is not uncommon for gigabytes to be processed in zfs receive.
3815 * Speculatively increase the buffer size via Linux-specific fcntl()
3816 * call.
3817 */
3818 if (S_ISFIFO(sb.st_mode)) {
3819 FILE *procf = fopen("/proc/sys/fs/pipe-max-size", "r");
3820
3821 if (procf != NULL) {
3822 unsigned long max_psize;
3823 long cur_psize;
3824 if (fscanf(procf, "%lu", &max_psize) > 0) {
3825 cur_psize = fcntl(infd, F_GETPIPE_SZ);
3826 if (cur_psize > 0 &&
3827 max_psize > (unsigned long) cur_psize)
3828 (void) fcntl(infd, F_SETPIPE_SZ,
3829 max_psize);
3830 }
3831 fclose(procf);
3832 }
3833 }
3834 #endif /* __linux__ */
3835
3836 if (props) {
3837 err = nvlist_lookup_string(props, "origin", &originsnap);
3838 if (err && err != ENOENT)
3839 return (err);
3840 }
3841
3842 cleanup_fd = open(ZFS_DEV, O_RDWR);
3843 VERIFY(cleanup_fd >= 0);
3844
3845 err = zfs_receive_impl(hdl, tosnap, originsnap, flags, infd, NULL, NULL,
3846 stream_avl, &top_zfs, cleanup_fd, &action_handle, NULL);
3847
3848 VERIFY(0 == close(cleanup_fd));
3849
3850 if (err == 0 && !flags->nomount && top_zfs) {
3851 zfs_handle_t *zhp = NULL;
3852 prop_changelist_t *clp = NULL;
3853
3854 zhp = zfs_open(hdl, top_zfs, ZFS_TYPE_FILESYSTEM);
3855 if (zhp != NULL) {
3856 clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT,
3857 CL_GATHER_MOUNT_ALWAYS, 0);
3858 zfs_close(zhp);
3859 if (clp != NULL) {
3860 /* mount and share received datasets */
3861 err = changelist_postfix(clp);
3862 changelist_free(clp);
3863 }
3864 }
3865 if (zhp == NULL || clp == NULL || err)
3866 err = -1;
3867 }
3868 if (top_zfs)
3869 free(top_zfs);
3870
3871 return (err);
3872 }