]> git.proxmox.com Git - mirror_zfs.git/blame - lib/libzfs_core/libzfs_core.c
Change U16 to U32 due to atomic_inc_32_nv
[mirror_zfs.git] / lib / libzfs_core / libzfs_core.c
CommitLineData
6f1ffb06
MA
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
5dc8b736 23 * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
95fd54a1 24 * Copyright (c) 2013 Steven Hartland. All rights reserved.
6f1ffb06
MA
25 */
26
27/*
28 * LibZFS_Core (lzc) is intended to replace most functionality in libzfs.
29 * It has the following characteristics:
30 *
31 * - Thread Safe. libzfs_core is accessible concurrently from multiple
32 * threads. This is accomplished primarily by avoiding global data
33 * (e.g. caching). Since it's thread-safe, there is no reason for a
34 * process to have multiple libzfs "instances". Therefore, we store
35 * our few pieces of data (e.g. the file descriptor) in global
36 * variables. The fd is reference-counted so that the libzfs_core
37 * library can be "initialized" multiple times (e.g. by different
38 * consumers within the same process).
39 *
40 * - Committed Interface. The libzfs_core interface will be committed,
41 * therefore consumers can compile against it and be confident that
42 * their code will continue to work on future releases of this code.
43 * Currently, the interface is Evolving (not Committed), but we intend
44 * to commit to it once it is more complete and we determine that it
45 * meets the needs of all consumers.
46 *
b8fce77b 47 * - Programmatic Error Handling. libzfs_core communicates errors with
6f1ffb06
MA
48 * defined error numbers, and doesn't print anything to stdout/stderr.
49 *
50 * - Thin Layer. libzfs_core is a thin layer, marshaling arguments
51 * to/from the kernel ioctls. There is generally a 1:1 correspondence
52 * between libzfs_core functions and ioctls to /dev/zfs.
53 *
54 * - Clear Atomicity. Because libzfs_core functions are generally 1:1
55 * with kernel ioctls, and kernel ioctls are general atomic, each
56 * libzfs_core function is atomic. For example, creating multiple
57 * snapshots with a single call to lzc_snapshot() is atomic -- it
58 * can't fail with only some of the requested snapshots created, even
59 * in the event of power loss or system crash.
60 *
61 * - Continued libzfs Support. Some higher-level operations (e.g.
62 * support for "zfs send -R") are too complicated to fit the scope of
63 * libzfs_core. This functionality will continue to live in libzfs.
64 * Where appropriate, libzfs will use the underlying atomic operations
65 * of libzfs_core. For example, libzfs may implement "zfs send -R |
66 * zfs receive" by using individual "send one snapshot", rename,
67 * destroy, and "receive one snapshot" operations in libzfs_core.
68 * /sbin/zfs and /zbin/zpool will link with both libzfs and
69 * libzfs_core. Other consumers should aim to use only libzfs_core,
70 * since that will be the supported, stable interface going forwards.
71 */
72
73#include <libzfs_core.h>
74#include <ctype.h>
75#include <unistd.h>
76#include <stdlib.h>
77#include <string.h>
78#include <errno.h>
79#include <fcntl.h>
80#include <pthread.h>
81#include <sys/nvpair.h>
82#include <sys/param.h>
83#include <sys/types.h>
84#include <sys/stat.h>
85#include <sys/zfs_ioctl.h>
86
e2454897 87static int g_fd = -1;
6f1ffb06
MA
88static pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER;
89static int g_refcount;
90
91int
92libzfs_core_init(void)
93{
94 (void) pthread_mutex_lock(&g_lock);
95 if (g_refcount == 0) {
96 g_fd = open("/dev/zfs", O_RDWR);
97 if (g_fd < 0) {
98 (void) pthread_mutex_unlock(&g_lock);
99 return (errno);
100 }
101 }
102 g_refcount++;
103 (void) pthread_mutex_unlock(&g_lock);
104 return (0);
105}
106
107void
108libzfs_core_fini(void)
109{
110 (void) pthread_mutex_lock(&g_lock);
111 ASSERT3S(g_refcount, >, 0);
e2454897
GM
112
113 if (g_refcount > 0)
114 g_refcount--;
115
116 if (g_refcount == 0 && g_fd != -1) {
6f1ffb06 117 (void) close(g_fd);
e2454897
GM
118 g_fd = -1;
119 }
6f1ffb06
MA
120 (void) pthread_mutex_unlock(&g_lock);
121}
122
123static int
124lzc_ioctl(zfs_ioc_t ioc, const char *name,
125 nvlist_t *source, nvlist_t **resultp)
126{
13fe0198 127 zfs_cmd_t zc = {"\0"};
6f1ffb06
MA
128 int error = 0;
129 char *packed;
130 size_t size;
131
132 ASSERT3S(g_refcount, >, 0);
e2454897 133 VERIFY3S(g_fd, !=, -1);
6f1ffb06
MA
134
135 (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
136
137 packed = fnvlist_pack(source, &size);
138 zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
139 zc.zc_nvlist_src_size = size;
140
141 if (resultp != NULL) {
13fe0198 142 *resultp = NULL;
6f1ffb06
MA
143 zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024);
144 zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
145 malloc(zc.zc_nvlist_dst_size);
146 if (zc.zc_nvlist_dst == (uint64_t)0) {
147 error = ENOMEM;
148 goto out;
149 }
150 }
151
152 while (ioctl(g_fd, ioc, &zc) != 0) {
153 if (errno == ENOMEM && resultp != NULL) {
154 free((void *)(uintptr_t)zc.zc_nvlist_dst);
155 zc.zc_nvlist_dst_size *= 2;
156 zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
157 malloc(zc.zc_nvlist_dst_size);
158 if (zc.zc_nvlist_dst == (uint64_t)0) {
159 error = ENOMEM;
160 goto out;
161 }
162 } else {
163 error = errno;
164 break;
165 }
166 }
167 if (zc.zc_nvlist_dst_filled) {
168 *resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
169 zc.zc_nvlist_dst_size);
6f1ffb06
MA
170 }
171
172out:
173 fnvlist_pack_free(packed, size);
174 free((void *)(uintptr_t)zc.zc_nvlist_dst);
175 return (error);
176}
177
178int
e67a7ffb 179lzc_create(const char *fsname, enum lzc_dataset_type type, nvlist_t *props)
6f1ffb06
MA
180{
181 int error;
182 nvlist_t *args = fnvlist_alloc();
e67a7ffb 183 fnvlist_add_int32(args, "type", (dmu_objset_type_t)type);
6f1ffb06
MA
184 if (props != NULL)
185 fnvlist_add_nvlist(args, "props", props);
186 error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL);
187 nvlist_free(args);
188 return (error);
189}
190
191int
192lzc_clone(const char *fsname, const char *origin,
193 nvlist_t *props)
194{
195 int error;
196 nvlist_t *args = fnvlist_alloc();
197 fnvlist_add_string(args, "origin", origin);
198 if (props != NULL)
199 fnvlist_add_nvlist(args, "props", props);
200 error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL);
201 nvlist_free(args);
202 return (error);
203}
204
205/*
206 * Creates snapshots.
207 *
208 * The keys in the snaps nvlist are the snapshots to be created.
209 * They must all be in the same pool.
210 *
211 * The props nvlist is properties to set. Currently only user properties
212 * are supported. { user:prop_name -> string value }
213 *
214 * The returned results nvlist will have an entry for each snapshot that failed.
215 * The value will be the (int32) error code.
216 *
217 * The return value will be 0 if all snapshots were created, otherwise it will
13fe0198 218 * be the errno of a (unspecified) snapshot that failed.
6f1ffb06
MA
219 */
220int
221lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist)
222{
223 nvpair_t *elem;
224 nvlist_t *args;
225 int error;
eca7b760 226 char pool[ZFS_MAX_DATASET_NAME_LEN];
6f1ffb06
MA
227
228 *errlist = NULL;
229
230 /* determine the pool name */
231 elem = nvlist_next_nvpair(snaps, NULL);
232 if (elem == NULL)
233 return (0);
234 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
235 pool[strcspn(pool, "/@")] = '\0';
236
237 args = fnvlist_alloc();
238 fnvlist_add_nvlist(args, "snaps", snaps);
239 if (props != NULL)
240 fnvlist_add_nvlist(args, "props", props);
241
242 error = lzc_ioctl(ZFS_IOC_SNAPSHOT, pool, args, errlist);
243 nvlist_free(args);
244
245 return (error);
246}
247
248/*
249 * Destroys snapshots.
250 *
251 * The keys in the snaps nvlist are the snapshots to be destroyed.
252 * They must all be in the same pool.
253 *
254 * Snapshots that do not exist will be silently ignored.
255 *
256 * If 'defer' is not set, and a snapshot has user holds or clones, the
257 * destroy operation will fail and none of the snapshots will be
258 * destroyed.
259 *
260 * If 'defer' is set, and a snapshot has user holds or clones, it will be
261 * marked for deferred destruction, and will be destroyed when the last hold
262 * or clone is removed/destroyed.
263 *
264 * The return value will be 0 if all snapshots were destroyed (or marked for
1a077756 265 * later destruction if 'defer' is set) or didn't exist to begin with.
6f1ffb06 266 *
13fe0198 267 * Otherwise the return value will be the errno of a (unspecified) snapshot
6f1ffb06
MA
268 * that failed, no snapshots will be destroyed, and the errlist will have an
269 * entry for each snapshot that failed. The value in the errlist will be
270 * the (int32) error code.
271 */
272int
273lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist)
274{
275 nvpair_t *elem;
276 nvlist_t *args;
277 int error;
eca7b760 278 char pool[ZFS_MAX_DATASET_NAME_LEN];
6f1ffb06
MA
279
280 /* determine the pool name */
281 elem = nvlist_next_nvpair(snaps, NULL);
282 if (elem == NULL)
283 return (0);
284 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
285 pool[strcspn(pool, "/@")] = '\0';
286
287 args = fnvlist_alloc();
288 fnvlist_add_nvlist(args, "snaps", snaps);
289 if (defer)
290 fnvlist_add_boolean(args, "defer");
291
292 error = lzc_ioctl(ZFS_IOC_DESTROY_SNAPS, pool, args, errlist);
293 nvlist_free(args);
294
295 return (error);
6f1ffb06
MA
296}
297
298int
299lzc_snaprange_space(const char *firstsnap, const char *lastsnap,
300 uint64_t *usedp)
301{
302 nvlist_t *args;
303 nvlist_t *result;
304 int err;
eca7b760 305 char fs[ZFS_MAX_DATASET_NAME_LEN];
6f1ffb06
MA
306 char *atp;
307
308 /* determine the fs name */
309 (void) strlcpy(fs, firstsnap, sizeof (fs));
310 atp = strchr(fs, '@');
311 if (atp == NULL)
312 return (EINVAL);
313 *atp = '\0';
314
315 args = fnvlist_alloc();
316 fnvlist_add_string(args, "firstsnap", firstsnap);
317
318 err = lzc_ioctl(ZFS_IOC_SPACE_SNAPS, lastsnap, args, &result);
319 nvlist_free(args);
320 if (err == 0)
321 *usedp = fnvlist_lookup_uint64(result, "used");
322 fnvlist_free(result);
323
324 return (err);
325}
326
327boolean_t
328lzc_exists(const char *dataset)
329{
330 /*
331 * The objset_stats ioctl is still legacy, so we need to construct our
332 * own zfs_cmd_t rather than using zfsc_ioctl().
333 */
13fe0198 334 zfs_cmd_t zc = {"\0"};
6f1ffb06 335
e2454897
GM
336 ASSERT3S(g_refcount, >, 0);
337 VERIFY3S(g_fd, !=, -1);
338
6f1ffb06
MA
339 (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
340 return (ioctl(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0);
341}
342
13fe0198
MA
343/*
344 * Create "user holds" on snapshots. If there is a hold on a snapshot,
345 * the snapshot can not be destroyed. (However, it can be marked for deletion
346 * by lzc_destroy_snaps(defer=B_TRUE).)
347 *
348 * The keys in the nvlist are snapshot names.
349 * The snapshots must all be in the same pool.
350 * The value is the name of the hold (string type).
351 *
352 * If cleanup_fd is not -1, it must be the result of open("/dev/zfs", O_EXCL).
353 * In this case, when the cleanup_fd is closed (including on process
354 * termination), the holds will be released. If the system is shut down
355 * uncleanly, the holds will be released when the pool is next opened
356 * or imported.
357 *
95fd54a1 358 * Holds for snapshots which don't exist will be skipped and have an entry
1a077756 359 * added to errlist, but will not cause an overall failure.
95fd54a1 360 *
1a077756 361 * The return value will be 0 if all holds, for snapshots that existed,
b8fce77b 362 * were successfully created.
95fd54a1
SH
363 *
364 * Otherwise the return value will be the errno of a (unspecified) hold that
365 * failed and no holds will be created.
366 *
367 * In all cases the errlist will have an entry for each hold that failed
368 * (name = snapshot), with its value being the error code (int32).
13fe0198
MA
369 */
370int
371lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist)
372{
eca7b760 373 char pool[ZFS_MAX_DATASET_NAME_LEN];
13fe0198
MA
374 nvlist_t *args;
375 nvpair_t *elem;
376 int error;
377
378 /* determine the pool name */
379 elem = nvlist_next_nvpair(holds, NULL);
380 if (elem == NULL)
381 return (0);
382 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
383 pool[strcspn(pool, "/@")] = '\0';
384
385 args = fnvlist_alloc();
386 fnvlist_add_nvlist(args, "holds", holds);
387 if (cleanup_fd != -1)
388 fnvlist_add_int32(args, "cleanup_fd", cleanup_fd);
389
390 error = lzc_ioctl(ZFS_IOC_HOLD, pool, args, errlist);
391 nvlist_free(args);
392 return (error);
393}
394
395/*
396 * Release "user holds" on snapshots. If the snapshot has been marked for
397 * deferred destroy (by lzc_destroy_snaps(defer=B_TRUE)), it does not have
398 * any clones, and all the user holds are removed, then the snapshot will be
399 * destroyed.
400 *
401 * The keys in the nvlist are snapshot names.
402 * The snapshots must all be in the same pool.
d5884c34 403 * The value is an nvlist whose keys are the holds to remove.
13fe0198 404 *
95fd54a1 405 * Holds which failed to release because they didn't exist will have an entry
1a077756 406 * added to errlist, but will not cause an overall failure.
95fd54a1
SH
407 *
408 * The return value will be 0 if the nvl holds was empty or all holds that
1a077756 409 * existed, were successfully removed.
95fd54a1
SH
410 *
411 * Otherwise the return value will be the errno of a (unspecified) hold that
412 * failed to release and no holds will be released.
413 *
414 * In all cases the errlist will have an entry for each hold that failed to
415 * to release.
13fe0198
MA
416 */
417int
418lzc_release(nvlist_t *holds, nvlist_t **errlist)
419{
eca7b760 420 char pool[ZFS_MAX_DATASET_NAME_LEN];
13fe0198
MA
421 nvpair_t *elem;
422
423 /* determine the pool name */
424 elem = nvlist_next_nvpair(holds, NULL);
425 if (elem == NULL)
426 return (0);
427 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
428 pool[strcspn(pool, "/@")] = '\0';
429
430 return (lzc_ioctl(ZFS_IOC_RELEASE, pool, holds, errlist));
431}
432
433/*
434 * Retrieve list of user holds on the specified snapshot.
435 *
d5884c34 436 * On success, *holdsp will be set to an nvlist which the caller must free.
13fe0198
MA
437 * The keys are the names of the holds, and the value is the creation time
438 * of the hold (uint64) in seconds since the epoch.
439 */
440int
441lzc_get_holds(const char *snapname, nvlist_t **holdsp)
442{
443 int error;
444 nvlist_t *innvl = fnvlist_alloc();
445 error = lzc_ioctl(ZFS_IOC_GET_HOLDS, snapname, innvl, holdsp);
446 fnvlist_free(innvl);
447 return (error);
448}
449
6f1ffb06 450/*
9b67f605
MA
451 * Generate a zfs send stream for the specified snapshot and write it to
452 * the specified file descriptor.
da536844
MA
453 *
454 * "snapname" is the full name of the snapshot to send (e.g. "pool/fs@snap")
455 *
456 * If "from" is NULL, a full (non-incremental) stream will be sent.
457 * If "from" is non-NULL, it must be the full name of a snapshot or
458 * bookmark to send an incremental from (e.g. "pool/fs@earlier_snap" or
459 * "pool/fs#earlier_bmark"). If non-NULL, the specified snapshot or
460 * bookmark must represent an earlier point in the history of "snapname").
461 * It can be an earlier snapshot in the same filesystem or zvol as "snapname",
462 * or it can be the origin of "snapname"'s filesystem, or an earlier
463 * snapshot in the origin, etc.
464 *
465 * "fd" is the file descriptor to write the send stream to.
9b67f605 466 *
f1512ee6
MA
467 * If "flags" contains LZC_SEND_FLAG_LARGE_BLOCK, the stream is permitted
468 * to contain DRR_WRITE records with drr_length > 128K, and DRR_OBJECT
469 * records with drr_blksz > 128K.
470 *
9b67f605
MA
471 * If "flags" contains LZC_SEND_FLAG_EMBED_DATA, the stream is permitted
472 * to contain DRR_WRITE_EMBEDDED records with drr_etype==BP_EMBEDDED_TYPE_DATA,
473 * which the receiving system must support (as indicated by support
474 * for the "embedded_data" feature).
6f1ffb06
MA
475 */
476int
9b67f605
MA
477lzc_send(const char *snapname, const char *from, int fd,
478 enum lzc_send_flags flags)
47dfff3b
MA
479{
480 return (lzc_send_resume(snapname, from, fd, flags, 0, 0));
481}
482
483int
484lzc_send_resume(const char *snapname, const char *from, int fd,
485 enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff)
6f1ffb06
MA
486{
487 nvlist_t *args;
488 int err;
489
490 args = fnvlist_alloc();
491 fnvlist_add_int32(args, "fd", fd);
da536844
MA
492 if (from != NULL)
493 fnvlist_add_string(args, "fromsnap", from);
f1512ee6
MA
494 if (flags & LZC_SEND_FLAG_LARGE_BLOCK)
495 fnvlist_add_boolean(args, "largeblockok");
2aa34383
DK
496 if (flags & LZC_SEND_FLAG_COMPRESS)
497 fnvlist_add_boolean(args, "compressok");
9b67f605
MA
498 if (flags & LZC_SEND_FLAG_EMBED_DATA)
499 fnvlist_add_boolean(args, "embedok");
47dfff3b
MA
500 if (resumeobj != 0 || resumeoff != 0) {
501 fnvlist_add_uint64(args, "resume_object", resumeobj);
502 fnvlist_add_uint64(args, "resume_offset", resumeoff);
503 }
6f1ffb06
MA
504 err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL);
505 nvlist_free(args);
506 return (err);
507}
508
509/*
5dc8b736
MG
510 * "from" can be NULL, a snapshot, or a bookmark.
511 *
512 * If from is NULL, a full (non-incremental) stream will be estimated. This
513 * is calculated very efficiently.
514 *
515 * If from is a snapshot, lzc_send_space uses the deadlists attached to
516 * each snapshot to efficiently estimate the stream size.
517 *
518 * If from is a bookmark, the indirect blocks in the destination snapshot
519 * are traversed, looking for blocks with a birth time since the creation TXG of
520 * the snapshot this bookmark was created from. This will result in
521 * significantly more I/O and be less efficient than a send space estimation on
522 * an equivalent snapshot.
6f1ffb06
MA
523 */
524int
2aa34383
DK
525lzc_send_space(const char *snapname, const char *from,
526 enum lzc_send_flags flags, uint64_t *spacep)
6f1ffb06
MA
527{
528 nvlist_t *args;
529 nvlist_t *result;
530 int err;
531
532 args = fnvlist_alloc();
5dc8b736
MG
533 if (from != NULL)
534 fnvlist_add_string(args, "from", from);
2aa34383
DK
535 if (flags & LZC_SEND_FLAG_LARGE_BLOCK)
536 fnvlist_add_boolean(args, "largeblockok");
537 if (flags & LZC_SEND_FLAG_EMBED_DATA)
538 fnvlist_add_boolean(args, "embedok");
539 if (flags & LZC_SEND_FLAG_COMPRESS)
540 fnvlist_add_boolean(args, "compressok");
6f1ffb06
MA
541 err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result);
542 nvlist_free(args);
543 if (err == 0)
544 *spacep = fnvlist_lookup_uint64(result, "space");
545 nvlist_free(result);
546 return (err);
547}
548
549static int
550recv_read(int fd, void *buf, int ilen)
551{
552 char *cp = buf;
553 int rv;
554 int len = ilen;
555
556 do {
557 rv = read(fd, cp, len);
558 cp += rv;
559 len -= rv;
560 } while (rv > 0);
561
562 if (rv < 0 || len != 0)
563 return (EIO);
564
565 return (0);
566}
567
43e52edd
BB
568/*
569 * Linux adds ZFS_IOC_RECV_NEW for resumable streams and preserves the legacy
570 * ZFS_IOC_RECV user/kernel interface. The new interface supports all stream
571 * options but is currently only used for resumable streams. This way updated
572 * user space utilities will interoperate with older kernel modules.
573 *
574 * Non-Linux OpenZFS platforms have opted to modify the legacy interface.
575 */
47dfff3b 576static int
fd41e935 577recv_impl(const char *snapname, nvlist_t *props, const char *origin,
43e52edd
BB
578 boolean_t force, boolean_t resumable, int input_fd,
579 const dmu_replay_record_t *begin_record, int cleanup_fd,
580 uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle,
581 nvlist_t **errors)
6f1ffb06 582{
43e52edd
BB
583 dmu_replay_record_t drr;
584 char fsname[MAXPATHLEN];
6f1ffb06 585 char *atp;
6f1ffb06
MA
586 int error;
587
e2454897
GM
588 ASSERT3S(g_refcount, >, 0);
589 VERIFY3S(g_fd, !=, -1);
590
43e52edd
BB
591 /* Set 'fsname' to the name of containing filesystem */
592 (void) strlcpy(fsname, snapname, sizeof (fsname));
593 atp = strchr(fsname, '@');
6f1ffb06
MA
594 if (atp == NULL)
595 return (EINVAL);
596 *atp = '\0';
597
43e52edd
BB
598 /* If the fs does not exist, try its parent. */
599 if (!lzc_exists(fsname)) {
600 char *slashp = strrchr(fsname, '/');
6f1ffb06
MA
601 if (slashp == NULL)
602 return (ENOENT);
603 *slashp = '\0';
43e52edd 604 }
6f1ffb06 605
43e52edd
BB
606 /*
607 * The begin_record is normally a non-byteswapped BEGIN record.
608 * For resumable streams it may be set to any non-byteswapped
609 * dmu_replay_record_t.
610 */
611 if (begin_record == NULL) {
612 error = recv_read(input_fd, &drr, sizeof (drr));
613 if (error != 0)
614 return (error);
615 } else {
616 drr = *begin_record;
6f1ffb06
MA
617 }
618
43e52edd
BB
619 if (resumable) {
620 nvlist_t *outnvl = NULL;
621 nvlist_t *innvl = fnvlist_alloc();
6f1ffb06 622
43e52edd 623 fnvlist_add_string(innvl, "snapname", snapname);
6f1ffb06 624
43e52edd
BB
625 if (props != NULL)
626 fnvlist_add_nvlist(innvl, "props", props);
6f1ffb06 627
43e52edd
BB
628 if (origin != NULL && strlen(origin))
629 fnvlist_add_string(innvl, "origin", origin);
630
631 fnvlist_add_byte_array(innvl, "begin_record",
02730c33 632 (uchar_t *)&drr, sizeof (drr));
43e52edd
BB
633
634 fnvlist_add_int32(innvl, "input_fd", input_fd);
635
636 if (force)
637 fnvlist_add_boolean(innvl, "force");
638
639 if (resumable)
640 fnvlist_add_boolean(innvl, "resumable");
641
642 if (cleanup_fd >= 0)
643 fnvlist_add_int32(innvl, "cleanup_fd", cleanup_fd);
644
645 if (action_handle != NULL)
646 fnvlist_add_uint64(innvl, "action_handle",
647 *action_handle);
648
649 error = lzc_ioctl(ZFS_IOC_RECV_NEW, fsname, innvl, &outnvl);
650
651 if (error == 0 && read_bytes != NULL)
652 error = nvlist_lookup_uint64(outnvl, "read_bytes",
653 read_bytes);
654
655 if (error == 0 && errflags != NULL)
656 error = nvlist_lookup_uint64(outnvl, "error_flags",
657 errflags);
658
659 if (error == 0 && action_handle != NULL)
660 error = nvlist_lookup_uint64(outnvl, "action_handle",
661 action_handle);
662
663 if (error == 0 && errors != NULL) {
664 nvlist_t *nvl;
665 error = nvlist_lookup_nvlist(outnvl, "errors", &nvl);
666 if (error == 0)
667 *errors = fnvlist_dup(nvl);
668 }
669
670 fnvlist_free(innvl);
671 fnvlist_free(outnvl);
fd41e935 672 } else {
43e52edd
BB
673 zfs_cmd_t zc = {"\0"};
674 char *packed = NULL;
675 size_t size;
6f1ffb06 676
43e52edd 677 ASSERT3S(g_refcount, >, 0);
6f1ffb06 678
43e52edd
BB
679 (void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_value));
680 (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
6f1ffb06 681
43e52edd
BB
682 if (props != NULL) {
683 packed = fnvlist_pack(props, &size);
684 zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
685 zc.zc_nvlist_src_size = size;
686 }
47dfff3b 687
43e52edd
BB
688 if (origin != NULL)
689 (void) strlcpy(zc.zc_string, origin,
690 sizeof (zc.zc_string));
6f1ffb06 691
43e52edd
BB
692 ASSERT3S(drr.drr_type, ==, DRR_BEGIN);
693 zc.zc_begin_record = drr.drr_u.drr_begin;
694 zc.zc_guid = force;
695 zc.zc_cookie = input_fd;
696 zc.zc_cleanup_fd = -1;
697 zc.zc_action_handle = 0;
698
699 if (cleanup_fd >= 0)
700 zc.zc_cleanup_fd = cleanup_fd;
701
702 if (action_handle != NULL)
703 zc.zc_action_handle = *action_handle;
704
705 zc.zc_nvlist_dst_size = 128 * 1024;
706 zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
707 malloc(zc.zc_nvlist_dst_size);
708
709 error = ioctl(g_fd, ZFS_IOC_RECV, &zc);
710 if (error != 0) {
711 error = errno;
712 } else {
713 if (read_bytes != NULL)
714 *read_bytes = zc.zc_cookie;
715
716 if (errflags != NULL)
717 *errflags = zc.zc_obj;
718
719 if (action_handle != NULL)
720 *action_handle = zc.zc_action_handle;
721
722 if (errors != NULL)
723 VERIFY0(nvlist_unpack(
724 (void *)(uintptr_t)zc.zc_nvlist_dst,
725 zc.zc_nvlist_dst_size, errors, KM_SLEEP));
726 }
727
728 if (packed != NULL)
729 fnvlist_pack_free(packed, size);
730 free((void *)(uintptr_t)zc.zc_nvlist_dst);
731 }
6f1ffb06 732
6f1ffb06
MA
733 return (error);
734}
46ba1e59 735
47dfff3b
MA
736/*
737 * The simplest receive case: receive from the specified fd, creating the
738 * specified snapshot. Apply the specified properties as "received" properties
739 * (which can be overridden by locally-set properties). If the stream is a
740 * clone, its origin snapshot must be specified by 'origin'. The 'force'
741 * flag will cause the target filesystem to be rolled back or destroyed if
742 * necessary to receive.
743 *
744 * Return 0 on success or an errno on failure.
745 *
746 * Note: this interface does not work on dedup'd streams
747 * (those with DMU_BACKUP_FEATURE_DEDUP).
748 */
749int
750lzc_receive(const char *snapname, nvlist_t *props, const char *origin,
751 boolean_t force, int fd)
752{
43e52edd
BB
753 return (recv_impl(snapname, props, origin, force, B_FALSE, fd,
754 NULL, -1, NULL, NULL, NULL, NULL));
47dfff3b
MA
755}
756
757/*
758 * Like lzc_receive, but if the receive fails due to premature stream
759 * termination, the intermediate state will be preserved on disk. In this
760 * case, ECKSUM will be returned. The receive may subsequently be resumed
761 * with a resuming send stream generated by lzc_send_resume().
762 */
763int
764lzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin,
765 boolean_t force, int fd)
766{
43e52edd
BB
767 return (recv_impl(snapname, props, origin, force, B_TRUE, fd,
768 NULL, -1, NULL, NULL, NULL, NULL));
fd41e935
BB
769}
770
771/*
772 * Like lzc_receive, but allows the caller to read the begin record and then to
773 * pass it in. That could be useful if the caller wants to derive, for example,
774 * the snapname or the origin parameters based on the information contained in
775 * the begin record.
776 * The begin record must be in its original form as read from the stream,
777 * in other words, it should not be byteswapped.
778 *
779 * The 'resumable' parameter allows to obtain the same behavior as with
780 * lzc_receive_resumable.
781 */
782int
783lzc_receive_with_header(const char *snapname, nvlist_t *props,
784 const char *origin, boolean_t force, boolean_t resumable, int fd,
785 const dmu_replay_record_t *begin_record)
786{
787 if (begin_record == NULL)
788 return (EINVAL);
789 return (recv_impl(snapname, props, origin, force, resumable, fd,
43e52edd
BB
790 begin_record, -1, NULL, NULL, NULL, NULL));
791}
792
793/*
794 * Like lzc_receive, but allows the caller to pass all supported arguments
795 * and retrieve all values returned. The only additional input parameter
796 * is 'cleanup_fd' which is used to set a cleanup-on-exit file descriptor.
797 *
798 * The following parameters all provide return values. Several may be set
799 * in the failure case and will contain additional information.
800 *
801 * The 'read_bytes' value will be set to the total number of bytes read.
802 *
803 * The 'errflags' value will contain zprop_errflags_t flags which are
804 * used to describe any failures.
805 *
806 * The 'action_handle' is used to pass the handle for this guid/ds mapping.
807 * It should be set to zero on first call and will contain an updated handle
808 * on success, it should be passed in subsequent calls.
809 *
810 * The 'errors' nvlist contains an entry for each unapplied received
811 * property. Callers are responsible for freeing this nvlist.
812 */
813int lzc_receive_one(const char *snapname, nvlist_t *props,
814 const char *origin, boolean_t force, boolean_t resumable, int input_fd,
815 const dmu_replay_record_t *begin_record, int cleanup_fd,
816 uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle,
817 nvlist_t **errors)
818{
819 return (recv_impl(snapname, props, origin, force, resumable,
820 input_fd, begin_record, cleanup_fd, read_bytes, errflags,
821 action_handle, errors));
47dfff3b
MA
822}
823
46ba1e59
MA
824/*
825 * Roll back this filesystem or volume to its most recent snapshot.
826 * If snapnamebuf is not NULL, it will be filled in with the name
827 * of the most recent snapshot.
828 *
829 * Return 0 on success or an errno on failure.
830 */
831int
832lzc_rollback(const char *fsname, char *snapnamebuf, int snapnamelen)
833{
834 nvlist_t *args;
835 nvlist_t *result;
836 int err;
837
838 args = fnvlist_alloc();
839 err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result);
840 nvlist_free(args);
841 if (err == 0 && snapnamebuf != NULL) {
842 const char *snapname = fnvlist_lookup_string(result, "target");
843 (void) strlcpy(snapnamebuf, snapname, snapnamelen);
844 }
bb7ffdaf
GM
845 nvlist_free(result);
846
46ba1e59
MA
847 return (err);
848}
da536844
MA
849
850/*
851 * Creates bookmarks.
852 *
853 * The bookmarks nvlist maps from name of the bookmark (e.g. "pool/fs#bmark") to
854 * the name of the snapshot (e.g. "pool/fs@snap"). All the bookmarks and
855 * snapshots must be in the same pool.
856 *
857 * The returned results nvlist will have an entry for each bookmark that failed.
858 * The value will be the (int32) error code.
859 *
860 * The return value will be 0 if all bookmarks were created, otherwise it will
861 * be the errno of a (undetermined) bookmarks that failed.
862 */
863int
864lzc_bookmark(nvlist_t *bookmarks, nvlist_t **errlist)
865{
866 nvpair_t *elem;
867 int error;
eca7b760 868 char pool[ZFS_MAX_DATASET_NAME_LEN];
da536844
MA
869
870 /* determine the pool name */
871 elem = nvlist_next_nvpair(bookmarks, NULL);
872 if (elem == NULL)
873 return (0);
874 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
875 pool[strcspn(pool, "/#")] = '\0';
876
877 error = lzc_ioctl(ZFS_IOC_BOOKMARK, pool, bookmarks, errlist);
878
879 return (error);
880}
881
882/*
883 * Retrieve bookmarks.
884 *
885 * Retrieve the list of bookmarks for the given file system. The props
886 * parameter is an nvlist of property names (with no values) that will be
887 * returned for each bookmark.
888 *
889 * The following are valid properties on bookmarks, all of which are numbers
890 * (represented as uint64 in the nvlist)
891 *
892 * "guid" - globally unique identifier of the snapshot it refers to
893 * "createtxg" - txg when the snapshot it refers to was created
894 * "creation" - timestamp when the snapshot it refers to was created
895 *
896 * The format of the returned nvlist as follows:
897 * <short name of bookmark> -> {
898 * <name of property> -> {
899 * "value" -> uint64
900 * }
901 * }
902 */
903int
904lzc_get_bookmarks(const char *fsname, nvlist_t *props, nvlist_t **bmarks)
905{
906 return (lzc_ioctl(ZFS_IOC_GET_BOOKMARKS, fsname, props, bmarks));
907}
908
909/*
910 * Destroys bookmarks.
911 *
912 * The keys in the bmarks nvlist are the bookmarks to be destroyed.
913 * They must all be in the same pool. Bookmarks are specified as
914 * <fs>#<bmark>.
915 *
916 * Bookmarks that do not exist will be silently ignored.
917 *
918 * The return value will be 0 if all bookmarks that existed were destroyed.
919 *
920 * Otherwise the return value will be the errno of a (undetermined) bookmark
921 * that failed, no bookmarks will be destroyed, and the errlist will have an
922 * entry for each bookmarks that failed. The value in the errlist will be
923 * the (int32) error code.
924 */
925int
926lzc_destroy_bookmarks(nvlist_t *bmarks, nvlist_t **errlist)
927{
928 nvpair_t *elem;
929 int error;
eca7b760 930 char pool[ZFS_MAX_DATASET_NAME_LEN];
da536844
MA
931
932 /* determine the pool name */
933 elem = nvlist_next_nvpair(bmarks, NULL);
934 if (elem == NULL)
935 return (0);
936 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
937 pool[strcspn(pool, "/#")] = '\0';
938
939 error = lzc_ioctl(ZFS_IOC_DESTROY_BOOKMARKS, pool, bmarks, errlist);
940
941 return (error);
942}