]> git.proxmox.com Git - mirror_zfs-debian.git/blob - lib/libzfs_core/libzfs_core.c
New upstream version 0.7.2
[mirror_zfs-debian.git] / lib / libzfs_core / libzfs_core.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
24 * Copyright (c) 2013 Steven Hartland. All rights reserved.
25 * Copyright (c) 2017 Datto Inc.
26 * Copyright 2017 RackTop Systems.
27 */
28
29 /*
30 * LibZFS_Core (lzc) is intended to replace most functionality in libzfs.
31 * It has the following characteristics:
32 *
33 * - Thread Safe. libzfs_core is accessible concurrently from multiple
34 * threads. This is accomplished primarily by avoiding global data
35 * (e.g. caching). Since it's thread-safe, there is no reason for a
36 * process to have multiple libzfs "instances". Therefore, we store
37 * our few pieces of data (e.g. the file descriptor) in global
38 * variables. The fd is reference-counted so that the libzfs_core
39 * library can be "initialized" multiple times (e.g. by different
40 * consumers within the same process).
41 *
42 * - Committed Interface. The libzfs_core interface will be committed,
43 * therefore consumers can compile against it and be confident that
44 * their code will continue to work on future releases of this code.
45 * Currently, the interface is Evolving (not Committed), but we intend
46 * to commit to it once it is more complete and we determine that it
47 * meets the needs of all consumers.
48 *
49 * - Programmatic Error Handling. libzfs_core communicates errors with
50 * defined error numbers, and doesn't print anything to stdout/stderr.
51 *
52 * - Thin Layer. libzfs_core is a thin layer, marshaling arguments
53 * to/from the kernel ioctls. There is generally a 1:1 correspondence
54 * between libzfs_core functions and ioctls to /dev/zfs.
55 *
56 * - Clear Atomicity. Because libzfs_core functions are generally 1:1
57 * with kernel ioctls, and kernel ioctls are general atomic, each
58 * libzfs_core function is atomic. For example, creating multiple
59 * snapshots with a single call to lzc_snapshot() is atomic -- it
60 * can't fail with only some of the requested snapshots created, even
61 * in the event of power loss or system crash.
62 *
63 * - Continued libzfs Support. Some higher-level operations (e.g.
64 * support for "zfs send -R") are too complicated to fit the scope of
65 * libzfs_core. This functionality will continue to live in libzfs.
66 * Where appropriate, libzfs will use the underlying atomic operations
67 * of libzfs_core. For example, libzfs may implement "zfs send -R |
68 * zfs receive" by using individual "send one snapshot", rename,
69 * destroy, and "receive one snapshot" operations in libzfs_core.
70 * /sbin/zfs and /zbin/zpool will link with both libzfs and
71 * libzfs_core. Other consumers should aim to use only libzfs_core,
72 * since that will be the supported, stable interface going forwards.
73 */
74
75 #include <libzfs_core.h>
76 #include <ctype.h>
77 #include <unistd.h>
78 #include <stdlib.h>
79 #include <string.h>
80 #include <errno.h>
81 #include <fcntl.h>
82 #include <pthread.h>
83 #include <sys/nvpair.h>
84 #include <sys/param.h>
85 #include <sys/types.h>
86 #include <sys/stat.h>
87 #include <sys/zfs_ioctl.h>
88
89 static int g_fd = -1;
90 static pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER;
91 static int g_refcount;
92
93 int
94 libzfs_core_init(void)
95 {
96 (void) pthread_mutex_lock(&g_lock);
97 if (g_refcount == 0) {
98 g_fd = open("/dev/zfs", O_RDWR);
99 if (g_fd < 0) {
100 (void) pthread_mutex_unlock(&g_lock);
101 return (errno);
102 }
103 }
104 g_refcount++;
105 (void) pthread_mutex_unlock(&g_lock);
106 return (0);
107 }
108
109 void
110 libzfs_core_fini(void)
111 {
112 (void) pthread_mutex_lock(&g_lock);
113 ASSERT3S(g_refcount, >, 0);
114
115 if (g_refcount > 0)
116 g_refcount--;
117
118 if (g_refcount == 0 && g_fd != -1) {
119 (void) close(g_fd);
120 g_fd = -1;
121 }
122 (void) pthread_mutex_unlock(&g_lock);
123 }
124
125 static int
126 lzc_ioctl(zfs_ioc_t ioc, const char *name,
127 nvlist_t *source, nvlist_t **resultp)
128 {
129 zfs_cmd_t zc = {"\0"};
130 int error = 0;
131 char *packed = NULL;
132 size_t size = 0;
133
134 ASSERT3S(g_refcount, >, 0);
135 VERIFY3S(g_fd, !=, -1);
136
137 if (name != NULL)
138 (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
139
140 if (source != NULL) {
141 packed = fnvlist_pack(source, &size);
142 zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
143 zc.zc_nvlist_src_size = size;
144 }
145
146 if (resultp != NULL) {
147 *resultp = NULL;
148 zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024);
149 zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
150 malloc(zc.zc_nvlist_dst_size);
151 if (zc.zc_nvlist_dst == (uint64_t)0) {
152 error = ENOMEM;
153 goto out;
154 }
155 }
156
157 while (ioctl(g_fd, ioc, &zc) != 0) {
158 if (errno == ENOMEM && resultp != NULL) {
159 free((void *)(uintptr_t)zc.zc_nvlist_dst);
160 zc.zc_nvlist_dst_size *= 2;
161 zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
162 malloc(zc.zc_nvlist_dst_size);
163 if (zc.zc_nvlist_dst == (uint64_t)0) {
164 error = ENOMEM;
165 goto out;
166 }
167 } else {
168 error = errno;
169 break;
170 }
171 }
172 if (zc.zc_nvlist_dst_filled) {
173 *resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
174 zc.zc_nvlist_dst_size);
175 }
176
177 out:
178 fnvlist_pack_free(packed, size);
179 free((void *)(uintptr_t)zc.zc_nvlist_dst);
180 return (error);
181 }
182
183 int
184 lzc_create(const char *fsname, enum lzc_dataset_type type, nvlist_t *props)
185 {
186 int error;
187 nvlist_t *args = fnvlist_alloc();
188 fnvlist_add_int32(args, "type", (dmu_objset_type_t)type);
189 if (props != NULL)
190 fnvlist_add_nvlist(args, "props", props);
191 error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL);
192 nvlist_free(args);
193 return (error);
194 }
195
196 int
197 lzc_clone(const char *fsname, const char *origin,
198 nvlist_t *props)
199 {
200 int error;
201 nvlist_t *args = fnvlist_alloc();
202 fnvlist_add_string(args, "origin", origin);
203 if (props != NULL)
204 fnvlist_add_nvlist(args, "props", props);
205 error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL);
206 nvlist_free(args);
207 return (error);
208 }
209
210 int
211 lzc_promote(const char *fsname, char *snapnamebuf, int snapnamelen)
212 {
213 /*
214 * The promote ioctl is still legacy, so we need to construct our
215 * own zfs_cmd_t rather than using lzc_ioctl().
216 */
217 zfs_cmd_t zc = { "\0" };
218
219 ASSERT3S(g_refcount, >, 0);
220 VERIFY3S(g_fd, !=, -1);
221
222 (void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name));
223 if (ioctl(g_fd, ZFS_IOC_PROMOTE, &zc) != 0) {
224 int error = errno;
225 if (error == EEXIST && snapnamebuf != NULL)
226 (void) strlcpy(snapnamebuf, zc.zc_string, snapnamelen);
227 return (error);
228 }
229 return (0);
230 }
231
232 /*
233 * Creates snapshots.
234 *
235 * The keys in the snaps nvlist are the snapshots to be created.
236 * They must all be in the same pool.
237 *
238 * The props nvlist is properties to set. Currently only user properties
239 * are supported. { user:prop_name -> string value }
240 *
241 * The returned results nvlist will have an entry for each snapshot that failed.
242 * The value will be the (int32) error code.
243 *
244 * The return value will be 0 if all snapshots were created, otherwise it will
245 * be the errno of a (unspecified) snapshot that failed.
246 */
247 int
248 lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist)
249 {
250 nvpair_t *elem;
251 nvlist_t *args;
252 int error;
253 char pool[ZFS_MAX_DATASET_NAME_LEN];
254
255 *errlist = NULL;
256
257 /* determine the pool name */
258 elem = nvlist_next_nvpair(snaps, NULL);
259 if (elem == NULL)
260 return (0);
261 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
262 pool[strcspn(pool, "/@")] = '\0';
263
264 args = fnvlist_alloc();
265 fnvlist_add_nvlist(args, "snaps", snaps);
266 if (props != NULL)
267 fnvlist_add_nvlist(args, "props", props);
268
269 error = lzc_ioctl(ZFS_IOC_SNAPSHOT, pool, args, errlist);
270 nvlist_free(args);
271
272 return (error);
273 }
274
275 /*
276 * Destroys snapshots.
277 *
278 * The keys in the snaps nvlist are the snapshots to be destroyed.
279 * They must all be in the same pool.
280 *
281 * Snapshots that do not exist will be silently ignored.
282 *
283 * If 'defer' is not set, and a snapshot has user holds or clones, the
284 * destroy operation will fail and none of the snapshots will be
285 * destroyed.
286 *
287 * If 'defer' is set, and a snapshot has user holds or clones, it will be
288 * marked for deferred destruction, and will be destroyed when the last hold
289 * or clone is removed/destroyed.
290 *
291 * The return value will be 0 if all snapshots were destroyed (or marked for
292 * later destruction if 'defer' is set) or didn't exist to begin with.
293 *
294 * Otherwise the return value will be the errno of a (unspecified) snapshot
295 * that failed, no snapshots will be destroyed, and the errlist will have an
296 * entry for each snapshot that failed. The value in the errlist will be
297 * the (int32) error code.
298 */
299 int
300 lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist)
301 {
302 nvpair_t *elem;
303 nvlist_t *args;
304 int error;
305 char pool[ZFS_MAX_DATASET_NAME_LEN];
306
307 /* determine the pool name */
308 elem = nvlist_next_nvpair(snaps, NULL);
309 if (elem == NULL)
310 return (0);
311 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
312 pool[strcspn(pool, "/@")] = '\0';
313
314 args = fnvlist_alloc();
315 fnvlist_add_nvlist(args, "snaps", snaps);
316 if (defer)
317 fnvlist_add_boolean(args, "defer");
318
319 error = lzc_ioctl(ZFS_IOC_DESTROY_SNAPS, pool, args, errlist);
320 nvlist_free(args);
321
322 return (error);
323 }
324
325 int
326 lzc_snaprange_space(const char *firstsnap, const char *lastsnap,
327 uint64_t *usedp)
328 {
329 nvlist_t *args;
330 nvlist_t *result;
331 int err;
332 char fs[ZFS_MAX_DATASET_NAME_LEN];
333 char *atp;
334
335 /* determine the fs name */
336 (void) strlcpy(fs, firstsnap, sizeof (fs));
337 atp = strchr(fs, '@');
338 if (atp == NULL)
339 return (EINVAL);
340 *atp = '\0';
341
342 args = fnvlist_alloc();
343 fnvlist_add_string(args, "firstsnap", firstsnap);
344
345 err = lzc_ioctl(ZFS_IOC_SPACE_SNAPS, lastsnap, args, &result);
346 nvlist_free(args);
347 if (err == 0)
348 *usedp = fnvlist_lookup_uint64(result, "used");
349 fnvlist_free(result);
350
351 return (err);
352 }
353
354 boolean_t
355 lzc_exists(const char *dataset)
356 {
357 /*
358 * The objset_stats ioctl is still legacy, so we need to construct our
359 * own zfs_cmd_t rather than using lzc_ioctl().
360 */
361 zfs_cmd_t zc = {"\0"};
362
363 ASSERT3S(g_refcount, >, 0);
364 VERIFY3S(g_fd, !=, -1);
365
366 (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
367 return (ioctl(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0);
368 }
369
370 /*
371 * outnvl is unused.
372 * It was added to preserve the function signature in case it is
373 * needed in the future.
374 */
375 /*ARGSUSED*/
376 int
377 lzc_sync(const char *pool_name, nvlist_t *innvl, nvlist_t **outnvl)
378 {
379 return (lzc_ioctl(ZFS_IOC_POOL_SYNC, pool_name, innvl, NULL));
380 }
381
382 /*
383 * Create "user holds" on snapshots. If there is a hold on a snapshot,
384 * the snapshot can not be destroyed. (However, it can be marked for deletion
385 * by lzc_destroy_snaps(defer=B_TRUE).)
386 *
387 * The keys in the nvlist are snapshot names.
388 * The snapshots must all be in the same pool.
389 * The value is the name of the hold (string type).
390 *
391 * If cleanup_fd is not -1, it must be the result of open("/dev/zfs", O_EXCL).
392 * In this case, when the cleanup_fd is closed (including on process
393 * termination), the holds will be released. If the system is shut down
394 * uncleanly, the holds will be released when the pool is next opened
395 * or imported.
396 *
397 * Holds for snapshots which don't exist will be skipped and have an entry
398 * added to errlist, but will not cause an overall failure.
399 *
400 * The return value will be 0 if all holds, for snapshots that existed,
401 * were successfully created.
402 *
403 * Otherwise the return value will be the errno of a (unspecified) hold that
404 * failed and no holds will be created.
405 *
406 * In all cases the errlist will have an entry for each hold that failed
407 * (name = snapshot), with its value being the error code (int32).
408 */
409 int
410 lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist)
411 {
412 char pool[ZFS_MAX_DATASET_NAME_LEN];
413 nvlist_t *args;
414 nvpair_t *elem;
415 int error;
416
417 /* determine the pool name */
418 elem = nvlist_next_nvpair(holds, NULL);
419 if (elem == NULL)
420 return (0);
421 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
422 pool[strcspn(pool, "/@")] = '\0';
423
424 args = fnvlist_alloc();
425 fnvlist_add_nvlist(args, "holds", holds);
426 if (cleanup_fd != -1)
427 fnvlist_add_int32(args, "cleanup_fd", cleanup_fd);
428
429 error = lzc_ioctl(ZFS_IOC_HOLD, pool, args, errlist);
430 nvlist_free(args);
431 return (error);
432 }
433
434 /*
435 * Release "user holds" on snapshots. If the snapshot has been marked for
436 * deferred destroy (by lzc_destroy_snaps(defer=B_TRUE)), it does not have
437 * any clones, and all the user holds are removed, then the snapshot will be
438 * destroyed.
439 *
440 * The keys in the nvlist are snapshot names.
441 * The snapshots must all be in the same pool.
442 * The value is an nvlist whose keys are the holds to remove.
443 *
444 * Holds which failed to release because they didn't exist will have an entry
445 * added to errlist, but will not cause an overall failure.
446 *
447 * The return value will be 0 if the nvl holds was empty or all holds that
448 * existed, were successfully removed.
449 *
450 * Otherwise the return value will be the errno of a (unspecified) hold that
451 * failed to release and no holds will be released.
452 *
453 * In all cases the errlist will have an entry for each hold that failed to
454 * to release.
455 */
456 int
457 lzc_release(nvlist_t *holds, nvlist_t **errlist)
458 {
459 char pool[ZFS_MAX_DATASET_NAME_LEN];
460 nvpair_t *elem;
461
462 /* determine the pool name */
463 elem = nvlist_next_nvpair(holds, NULL);
464 if (elem == NULL)
465 return (0);
466 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
467 pool[strcspn(pool, "/@")] = '\0';
468
469 return (lzc_ioctl(ZFS_IOC_RELEASE, pool, holds, errlist));
470 }
471
472 /*
473 * Retrieve list of user holds on the specified snapshot.
474 *
475 * On success, *holdsp will be set to an nvlist which the caller must free.
476 * The keys are the names of the holds, and the value is the creation time
477 * of the hold (uint64) in seconds since the epoch.
478 */
479 int
480 lzc_get_holds(const char *snapname, nvlist_t **holdsp)
481 {
482 return (lzc_ioctl(ZFS_IOC_GET_HOLDS, snapname, NULL, holdsp));
483 }
484
485 /*
486 * Generate a zfs send stream for the specified snapshot and write it to
487 * the specified file descriptor.
488 *
489 * "snapname" is the full name of the snapshot to send (e.g. "pool/fs@snap")
490 *
491 * If "from" is NULL, a full (non-incremental) stream will be sent.
492 * If "from" is non-NULL, it must be the full name of a snapshot or
493 * bookmark to send an incremental from (e.g. "pool/fs@earlier_snap" or
494 * "pool/fs#earlier_bmark"). If non-NULL, the specified snapshot or
495 * bookmark must represent an earlier point in the history of "snapname").
496 * It can be an earlier snapshot in the same filesystem or zvol as "snapname",
497 * or it can be the origin of "snapname"'s filesystem, or an earlier
498 * snapshot in the origin, etc.
499 *
500 * "fd" is the file descriptor to write the send stream to.
501 *
502 * If "flags" contains LZC_SEND_FLAG_LARGE_BLOCK, the stream is permitted
503 * to contain DRR_WRITE records with drr_length > 128K, and DRR_OBJECT
504 * records with drr_blksz > 128K.
505 *
506 * If "flags" contains LZC_SEND_FLAG_EMBED_DATA, the stream is permitted
507 * to contain DRR_WRITE_EMBEDDED records with drr_etype==BP_EMBEDDED_TYPE_DATA,
508 * which the receiving system must support (as indicated by support
509 * for the "embedded_data" feature).
510 */
511 int
512 lzc_send(const char *snapname, const char *from, int fd,
513 enum lzc_send_flags flags)
514 {
515 return (lzc_send_resume(snapname, from, fd, flags, 0, 0));
516 }
517
518 int
519 lzc_send_resume(const char *snapname, const char *from, int fd,
520 enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff)
521 {
522 nvlist_t *args;
523 int err;
524
525 args = fnvlist_alloc();
526 fnvlist_add_int32(args, "fd", fd);
527 if (from != NULL)
528 fnvlist_add_string(args, "fromsnap", from);
529 if (flags & LZC_SEND_FLAG_LARGE_BLOCK)
530 fnvlist_add_boolean(args, "largeblockok");
531 if (flags & LZC_SEND_FLAG_EMBED_DATA)
532 fnvlist_add_boolean(args, "embedok");
533 if (flags & LZC_SEND_FLAG_COMPRESS)
534 fnvlist_add_boolean(args, "compressok");
535 if (resumeobj != 0 || resumeoff != 0) {
536 fnvlist_add_uint64(args, "resume_object", resumeobj);
537 fnvlist_add_uint64(args, "resume_offset", resumeoff);
538 }
539 err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL);
540 nvlist_free(args);
541 return (err);
542 }
543
544 /*
545 * "from" can be NULL, a snapshot, or a bookmark.
546 *
547 * If from is NULL, a full (non-incremental) stream will be estimated. This
548 * is calculated very efficiently.
549 *
550 * If from is a snapshot, lzc_send_space uses the deadlists attached to
551 * each snapshot to efficiently estimate the stream size.
552 *
553 * If from is a bookmark, the indirect blocks in the destination snapshot
554 * are traversed, looking for blocks with a birth time since the creation TXG of
555 * the snapshot this bookmark was created from. This will result in
556 * significantly more I/O and be less efficient than a send space estimation on
557 * an equivalent snapshot.
558 */
559 int
560 lzc_send_space(const char *snapname, const char *from,
561 enum lzc_send_flags flags, uint64_t *spacep)
562 {
563 nvlist_t *args;
564 nvlist_t *result;
565 int err;
566
567 args = fnvlist_alloc();
568 if (from != NULL)
569 fnvlist_add_string(args, "from", from);
570 if (flags & LZC_SEND_FLAG_LARGE_BLOCK)
571 fnvlist_add_boolean(args, "largeblockok");
572 if (flags & LZC_SEND_FLAG_EMBED_DATA)
573 fnvlist_add_boolean(args, "embedok");
574 if (flags & LZC_SEND_FLAG_COMPRESS)
575 fnvlist_add_boolean(args, "compressok");
576 err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result);
577 nvlist_free(args);
578 if (err == 0)
579 *spacep = fnvlist_lookup_uint64(result, "space");
580 nvlist_free(result);
581 return (err);
582 }
583
584 static int
585 recv_read(int fd, void *buf, int ilen)
586 {
587 char *cp = buf;
588 int rv;
589 int len = ilen;
590
591 do {
592 rv = read(fd, cp, len);
593 cp += rv;
594 len -= rv;
595 } while (rv > 0);
596
597 if (rv < 0 || len != 0)
598 return (EIO);
599
600 return (0);
601 }
602
603 /*
604 * Linux adds ZFS_IOC_RECV_NEW for resumable streams and preserves the legacy
605 * ZFS_IOC_RECV user/kernel interface. The new interface supports all stream
606 * options but is currently only used for resumable streams. This way updated
607 * user space utilities will interoperate with older kernel modules.
608 *
609 * Non-Linux OpenZFS platforms have opted to modify the legacy interface.
610 */
611 static int
612 recv_impl(const char *snapname, nvlist_t *recvdprops, nvlist_t *localprops,
613 const char *origin, boolean_t force, boolean_t resumable, int input_fd,
614 const dmu_replay_record_t *begin_record, int cleanup_fd,
615 uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle,
616 nvlist_t **errors)
617 {
618 dmu_replay_record_t drr;
619 char fsname[MAXPATHLEN];
620 char *atp;
621 int error;
622
623 ASSERT3S(g_refcount, >, 0);
624 VERIFY3S(g_fd, !=, -1);
625
626 /* Set 'fsname' to the name of containing filesystem */
627 (void) strlcpy(fsname, snapname, sizeof (fsname));
628 atp = strchr(fsname, '@');
629 if (atp == NULL)
630 return (EINVAL);
631 *atp = '\0';
632
633 /* If the fs does not exist, try its parent. */
634 if (!lzc_exists(fsname)) {
635 char *slashp = strrchr(fsname, '/');
636 if (slashp == NULL)
637 return (ENOENT);
638 *slashp = '\0';
639 }
640
641 /*
642 * The begin_record is normally a non-byteswapped BEGIN record.
643 * For resumable streams it may be set to any non-byteswapped
644 * dmu_replay_record_t.
645 */
646 if (begin_record == NULL) {
647 error = recv_read(input_fd, &drr, sizeof (drr));
648 if (error != 0)
649 return (error);
650 } else {
651 drr = *begin_record;
652 }
653
654 if (resumable) {
655 nvlist_t *outnvl = NULL;
656 nvlist_t *innvl = fnvlist_alloc();
657
658 fnvlist_add_string(innvl, "snapname", snapname);
659
660 if (recvdprops != NULL)
661 fnvlist_add_nvlist(innvl, "props", recvdprops);
662
663 if (localprops != NULL)
664 fnvlist_add_nvlist(innvl, "localprops", localprops);
665
666 if (origin != NULL && strlen(origin))
667 fnvlist_add_string(innvl, "origin", origin);
668
669 fnvlist_add_byte_array(innvl, "begin_record",
670 (uchar_t *)&drr, sizeof (drr));
671
672 fnvlist_add_int32(innvl, "input_fd", input_fd);
673
674 if (force)
675 fnvlist_add_boolean(innvl, "force");
676
677 if (resumable)
678 fnvlist_add_boolean(innvl, "resumable");
679
680 if (cleanup_fd >= 0)
681 fnvlist_add_int32(innvl, "cleanup_fd", cleanup_fd);
682
683 if (action_handle != NULL)
684 fnvlist_add_uint64(innvl, "action_handle",
685 *action_handle);
686
687 error = lzc_ioctl(ZFS_IOC_RECV_NEW, fsname, innvl, &outnvl);
688
689 if (error == 0 && read_bytes != NULL)
690 error = nvlist_lookup_uint64(outnvl, "read_bytes",
691 read_bytes);
692
693 if (error == 0 && errflags != NULL)
694 error = nvlist_lookup_uint64(outnvl, "error_flags",
695 errflags);
696
697 if (error == 0 && action_handle != NULL)
698 error = nvlist_lookup_uint64(outnvl, "action_handle",
699 action_handle);
700
701 if (error == 0 && errors != NULL) {
702 nvlist_t *nvl;
703 error = nvlist_lookup_nvlist(outnvl, "errors", &nvl);
704 if (error == 0)
705 *errors = fnvlist_dup(nvl);
706 }
707
708 fnvlist_free(innvl);
709 fnvlist_free(outnvl);
710 } else {
711 zfs_cmd_t zc = {"\0"};
712 char *packed = NULL;
713 size_t size;
714
715 ASSERT3S(g_refcount, >, 0);
716
717 (void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_value));
718 (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
719
720 if (recvdprops != NULL) {
721 packed = fnvlist_pack(recvdprops, &size);
722 zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
723 zc.zc_nvlist_src_size = size;
724 }
725
726 if (localprops != NULL) {
727 packed = fnvlist_pack(localprops, &size);
728 zc.zc_nvlist_conf = (uint64_t)(uintptr_t)packed;
729 zc.zc_nvlist_conf_size = size;
730 }
731
732 if (origin != NULL)
733 (void) strlcpy(zc.zc_string, origin,
734 sizeof (zc.zc_string));
735
736 ASSERT3S(drr.drr_type, ==, DRR_BEGIN);
737 zc.zc_begin_record = drr.drr_u.drr_begin;
738 zc.zc_guid = force;
739 zc.zc_cookie = input_fd;
740 zc.zc_cleanup_fd = -1;
741 zc.zc_action_handle = 0;
742
743 if (cleanup_fd >= 0)
744 zc.zc_cleanup_fd = cleanup_fd;
745
746 if (action_handle != NULL)
747 zc.zc_action_handle = *action_handle;
748
749 zc.zc_nvlist_dst_size = 128 * 1024;
750 zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
751 malloc(zc.zc_nvlist_dst_size);
752
753 error = ioctl(g_fd, ZFS_IOC_RECV, &zc);
754 if (error != 0) {
755 error = errno;
756 } else {
757 if (read_bytes != NULL)
758 *read_bytes = zc.zc_cookie;
759
760 if (errflags != NULL)
761 *errflags = zc.zc_obj;
762
763 if (action_handle != NULL)
764 *action_handle = zc.zc_action_handle;
765
766 if (errors != NULL)
767 VERIFY0(nvlist_unpack(
768 (void *)(uintptr_t)zc.zc_nvlist_dst,
769 zc.zc_nvlist_dst_size, errors, KM_SLEEP));
770 }
771
772 if (packed != NULL)
773 fnvlist_pack_free(packed, size);
774 free((void *)(uintptr_t)zc.zc_nvlist_dst);
775 }
776
777 return (error);
778 }
779
780 /*
781 * The simplest receive case: receive from the specified fd, creating the
782 * specified snapshot. Apply the specified properties as "received" properties
783 * (which can be overridden by locally-set properties). If the stream is a
784 * clone, its origin snapshot must be specified by 'origin'. The 'force'
785 * flag will cause the target filesystem to be rolled back or destroyed if
786 * necessary to receive.
787 *
788 * Return 0 on success or an errno on failure.
789 *
790 * Note: this interface does not work on dedup'd streams
791 * (those with DMU_BACKUP_FEATURE_DEDUP).
792 */
793 int
794 lzc_receive(const char *snapname, nvlist_t *props, const char *origin,
795 boolean_t force, int fd)
796 {
797 return (recv_impl(snapname, props, NULL, origin, force, B_FALSE, fd,
798 NULL, -1, NULL, NULL, NULL, NULL));
799 }
800
801 /*
802 * Like lzc_receive, but if the receive fails due to premature stream
803 * termination, the intermediate state will be preserved on disk. In this
804 * case, ECKSUM will be returned. The receive may subsequently be resumed
805 * with a resuming send stream generated by lzc_send_resume().
806 */
807 int
808 lzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin,
809 boolean_t force, int fd)
810 {
811 return (recv_impl(snapname, props, NULL, origin, force, B_TRUE, fd,
812 NULL, -1, NULL, NULL, NULL, NULL));
813 }
814
815 /*
816 * Like lzc_receive, but allows the caller to read the begin record and then to
817 * pass it in. That could be useful if the caller wants to derive, for example,
818 * the snapname or the origin parameters based on the information contained in
819 * the begin record.
820 * The begin record must be in its original form as read from the stream,
821 * in other words, it should not be byteswapped.
822 *
823 * The 'resumable' parameter allows to obtain the same behavior as with
824 * lzc_receive_resumable.
825 */
826 int
827 lzc_receive_with_header(const char *snapname, nvlist_t *props,
828 const char *origin, boolean_t force, boolean_t resumable, int fd,
829 const dmu_replay_record_t *begin_record)
830 {
831 if (begin_record == NULL)
832 return (EINVAL);
833 return (recv_impl(snapname, props, NULL, origin, force, resumable, fd,
834 begin_record, -1, NULL, NULL, NULL, NULL));
835 }
836
837 /*
838 * Like lzc_receive, but allows the caller to pass all supported arguments
839 * and retrieve all values returned. The only additional input parameter
840 * is 'cleanup_fd' which is used to set a cleanup-on-exit file descriptor.
841 *
842 * The following parameters all provide return values. Several may be set
843 * in the failure case and will contain additional information.
844 *
845 * The 'read_bytes' value will be set to the total number of bytes read.
846 *
847 * The 'errflags' value will contain zprop_errflags_t flags which are
848 * used to describe any failures.
849 *
850 * The 'action_handle' is used to pass the handle for this guid/ds mapping.
851 * It should be set to zero on first call and will contain an updated handle
852 * on success, it should be passed in subsequent calls.
853 *
854 * The 'errors' nvlist contains an entry for each unapplied received
855 * property. Callers are responsible for freeing this nvlist.
856 */
857 int lzc_receive_one(const char *snapname, nvlist_t *props,
858 const char *origin, boolean_t force, boolean_t resumable, int input_fd,
859 const dmu_replay_record_t *begin_record, int cleanup_fd,
860 uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle,
861 nvlist_t **errors)
862 {
863 return (recv_impl(snapname, props, NULL, origin, force, resumable,
864 input_fd, begin_record, cleanup_fd, read_bytes, errflags,
865 action_handle, errors));
866 }
867
868 /*
869 * Like lzc_receive_one, but allows the caller to pass an additional 'cmdprops'
870 * argument.
871 *
872 * The 'cmdprops' nvlist contains both override ('zfs receive -o') and
873 * exclude ('zfs receive -x') properties. Callers are responsible for freeing
874 * this nvlist
875 */
876 int lzc_receive_with_cmdprops(const char *snapname, nvlist_t *props,
877 nvlist_t *cmdprops, const char *origin, boolean_t force,
878 boolean_t resumable, int input_fd, const dmu_replay_record_t *begin_record,
879 int cleanup_fd, uint64_t *read_bytes, uint64_t *errflags,
880 uint64_t *action_handle, nvlist_t **errors)
881 {
882 return (recv_impl(snapname, props, cmdprops, origin, force, resumable,
883 input_fd, begin_record, cleanup_fd, read_bytes, errflags,
884 action_handle, errors));
885 }
886
887 /*
888 * Roll back this filesystem or volume to its most recent snapshot.
889 * If snapnamebuf is not NULL, it will be filled in with the name
890 * of the most recent snapshot.
891 * Note that the latest snapshot may change if a new one is concurrently
892 * created or the current one is destroyed. lzc_rollback_to can be used
893 * to roll back to a specific latest snapshot.
894 *
895 * Return 0 on success or an errno on failure.
896 */
897 int
898 lzc_rollback(const char *fsname, char *snapnamebuf, int snapnamelen)
899 {
900 nvlist_t *args;
901 nvlist_t *result;
902 int err;
903
904 args = fnvlist_alloc();
905 err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result);
906 nvlist_free(args);
907 if (err == 0 && snapnamebuf != NULL) {
908 const char *snapname = fnvlist_lookup_string(result, "target");
909 (void) strlcpy(snapnamebuf, snapname, snapnamelen);
910 }
911 nvlist_free(result);
912
913 return (err);
914 }
915
916 /*
917 * Roll back this filesystem or volume to the specified snapshot,
918 * if possible.
919 *
920 * Return 0 on success or an errno on failure.
921 */
922 int
923 lzc_rollback_to(const char *fsname, const char *snapname)
924 {
925 nvlist_t *args;
926 nvlist_t *result;
927 int err;
928
929 args = fnvlist_alloc();
930 fnvlist_add_string(args, "target", snapname);
931 err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result);
932 nvlist_free(args);
933 nvlist_free(result);
934 return (err);
935 }
936
937 /*
938 * Creates bookmarks.
939 *
940 * The bookmarks nvlist maps from name of the bookmark (e.g. "pool/fs#bmark") to
941 * the name of the snapshot (e.g. "pool/fs@snap"). All the bookmarks and
942 * snapshots must be in the same pool.
943 *
944 * The returned results nvlist will have an entry for each bookmark that failed.
945 * The value will be the (int32) error code.
946 *
947 * The return value will be 0 if all bookmarks were created, otherwise it will
948 * be the errno of a (undetermined) bookmarks that failed.
949 */
950 int
951 lzc_bookmark(nvlist_t *bookmarks, nvlist_t **errlist)
952 {
953 nvpair_t *elem;
954 int error;
955 char pool[ZFS_MAX_DATASET_NAME_LEN];
956
957 /* determine the pool name */
958 elem = nvlist_next_nvpair(bookmarks, NULL);
959 if (elem == NULL)
960 return (0);
961 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
962 pool[strcspn(pool, "/#")] = '\0';
963
964 error = lzc_ioctl(ZFS_IOC_BOOKMARK, pool, bookmarks, errlist);
965
966 return (error);
967 }
968
969 /*
970 * Retrieve bookmarks.
971 *
972 * Retrieve the list of bookmarks for the given file system. The props
973 * parameter is an nvlist of property names (with no values) that will be
974 * returned for each bookmark.
975 *
976 * The following are valid properties on bookmarks, all of which are numbers
977 * (represented as uint64 in the nvlist)
978 *
979 * "guid" - globally unique identifier of the snapshot it refers to
980 * "createtxg" - txg when the snapshot it refers to was created
981 * "creation" - timestamp when the snapshot it refers to was created
982 *
983 * The format of the returned nvlist as follows:
984 * <short name of bookmark> -> {
985 * <name of property> -> {
986 * "value" -> uint64
987 * }
988 * }
989 */
990 int
991 lzc_get_bookmarks(const char *fsname, nvlist_t *props, nvlist_t **bmarks)
992 {
993 return (lzc_ioctl(ZFS_IOC_GET_BOOKMARKS, fsname, props, bmarks));
994 }
995
996 /*
997 * Destroys bookmarks.
998 *
999 * The keys in the bmarks nvlist are the bookmarks to be destroyed.
1000 * They must all be in the same pool. Bookmarks are specified as
1001 * <fs>#<bmark>.
1002 *
1003 * Bookmarks that do not exist will be silently ignored.
1004 *
1005 * The return value will be 0 if all bookmarks that existed were destroyed.
1006 *
1007 * Otherwise the return value will be the errno of a (undetermined) bookmark
1008 * that failed, no bookmarks will be destroyed, and the errlist will have an
1009 * entry for each bookmarks that failed. The value in the errlist will be
1010 * the (int32) error code.
1011 */
1012 int
1013 lzc_destroy_bookmarks(nvlist_t *bmarks, nvlist_t **errlist)
1014 {
1015 nvpair_t *elem;
1016 int error;
1017 char pool[ZFS_MAX_DATASET_NAME_LEN];
1018
1019 /* determine the pool name */
1020 elem = nvlist_next_nvpair(bmarks, NULL);
1021 if (elem == NULL)
1022 return (0);
1023 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
1024 pool[strcspn(pool, "/#")] = '\0';
1025
1026 error = lzc_ioctl(ZFS_IOC_DESTROY_BOOKMARKS, pool, bmarks, errlist);
1027
1028 return (error);
1029 }