]> git.proxmox.com Git - mirror_zfs.git/blame - lib/libzfs_core/libzfs_core.c
OpenZFS 9102 - zfs should be able to initialize storage devices
[mirror_zfs.git] / lib / libzfs_core / libzfs_core.c
CommitLineData
6f1ffb06
MA
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
b83a0e2d 23 * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
95fd54a1 24 * Copyright (c) 2013 Steven Hartland. All rights reserved.
bec1067d 25 * Copyright (c) 2017 Datto Inc.
d12f91fd 26 * Copyright 2017 RackTop Systems.
d3f2cd7e 27 * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
6f1ffb06
MA
28 */
29
30/*
31 * LibZFS_Core (lzc) is intended to replace most functionality in libzfs.
32 * It has the following characteristics:
33 *
34 * - Thread Safe. libzfs_core is accessible concurrently from multiple
35 * threads. This is accomplished primarily by avoiding global data
36 * (e.g. caching). Since it's thread-safe, there is no reason for a
37 * process to have multiple libzfs "instances". Therefore, we store
38 * our few pieces of data (e.g. the file descriptor) in global
39 * variables. The fd is reference-counted so that the libzfs_core
40 * library can be "initialized" multiple times (e.g. by different
41 * consumers within the same process).
42 *
43 * - Committed Interface. The libzfs_core interface will be committed,
44 * therefore consumers can compile against it and be confident that
45 * their code will continue to work on future releases of this code.
46 * Currently, the interface is Evolving (not Committed), but we intend
47 * to commit to it once it is more complete and we determine that it
48 * meets the needs of all consumers.
49 *
b8fce77b 50 * - Programmatic Error Handling. libzfs_core communicates errors with
6f1ffb06
MA
51 * defined error numbers, and doesn't print anything to stdout/stderr.
52 *
53 * - Thin Layer. libzfs_core is a thin layer, marshaling arguments
54 * to/from the kernel ioctls. There is generally a 1:1 correspondence
55 * between libzfs_core functions and ioctls to /dev/zfs.
56 *
57 * - Clear Atomicity. Because libzfs_core functions are generally 1:1
58 * with kernel ioctls, and kernel ioctls are general atomic, each
59 * libzfs_core function is atomic. For example, creating multiple
60 * snapshots with a single call to lzc_snapshot() is atomic -- it
61 * can't fail with only some of the requested snapshots created, even
62 * in the event of power loss or system crash.
63 *
64 * - Continued libzfs Support. Some higher-level operations (e.g.
65 * support for "zfs send -R") are too complicated to fit the scope of
66 * libzfs_core. This functionality will continue to live in libzfs.
67 * Where appropriate, libzfs will use the underlying atomic operations
68 * of libzfs_core. For example, libzfs may implement "zfs send -R |
69 * zfs receive" by using individual "send one snapshot", rename,
70 * destroy, and "receive one snapshot" operations in libzfs_core.
71 * /sbin/zfs and /zbin/zpool will link with both libzfs and
72 * libzfs_core. Other consumers should aim to use only libzfs_core,
73 * since that will be the supported, stable interface going forwards.
74 */
75
76#include <libzfs_core.h>
77#include <ctype.h>
78#include <unistd.h>
79#include <stdlib.h>
80#include <string.h>
b83a0e2d
DB
81#ifdef ZFS_DEBUG
82#include <stdio.h>
83#endif
6f1ffb06
MA
84#include <errno.h>
85#include <fcntl.h>
86#include <pthread.h>
87#include <sys/nvpair.h>
88#include <sys/param.h>
89#include <sys/types.h>
90#include <sys/stat.h>
91#include <sys/zfs_ioctl.h>
92
e2454897 93static int g_fd = -1;
6f1ffb06
MA
94static pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER;
95static int g_refcount;
96
b83a0e2d
DB
97#ifdef ZFS_DEBUG
98static zfs_ioc_t fail_ioc_cmd;
99static zfs_errno_t fail_ioc_err;
100
101static void
102libzfs_core_debug_ioc(void)
103{
104 /*
105 * To test running newer user space binaries with kernel's
106 * that don't yet support an ioctl or a new ioctl arg we
107 * provide an override to intentionally fail an ioctl.
108 *
109 * USAGE:
110 * The override variable, ZFS_IOC_TEST, is of the form "cmd:err"
111 *
112 * For example, to fail a ZFS_IOC_POOL_CHECKPOINT with a
113 * ZFS_ERR_IOC_CMD_UNAVAIL, the string would be "0x5a4d:1029"
114 *
115 * $ sudo sh -c "ZFS_IOC_TEST=0x5a4d:1029 zpool checkpoint tank"
116 * cannot checkpoint 'tank': the loaded zfs module does not support
117 * this operation. A reboot may be required to enable this operation.
118 */
119 if (fail_ioc_cmd == 0) {
120 char *ioc_test = getenv("ZFS_IOC_TEST");
121 unsigned int ioc_num = 0, ioc_err = 0;
122
123 if (ioc_test != NULL &&
124 sscanf(ioc_test, "%i:%i", &ioc_num, &ioc_err) == 2 &&
125 ioc_num < ZFS_IOC_LAST) {
126 fail_ioc_cmd = ioc_num;
127 fail_ioc_err = ioc_err;
128 }
129 }
130}
131#endif
132
6f1ffb06
MA
133int
134libzfs_core_init(void)
135{
136 (void) pthread_mutex_lock(&g_lock);
137 if (g_refcount == 0) {
138 g_fd = open("/dev/zfs", O_RDWR);
139 if (g_fd < 0) {
140 (void) pthread_mutex_unlock(&g_lock);
141 return (errno);
142 }
143 }
144 g_refcount++;
b83a0e2d
DB
145
146#ifdef ZFS_DEBUG
147 libzfs_core_debug_ioc();
148#endif
6f1ffb06
MA
149 (void) pthread_mutex_unlock(&g_lock);
150 return (0);
151}
152
153void
154libzfs_core_fini(void)
155{
156 (void) pthread_mutex_lock(&g_lock);
157 ASSERT3S(g_refcount, >, 0);
e2454897
GM
158
159 if (g_refcount > 0)
160 g_refcount--;
161
162 if (g_refcount == 0 && g_fd != -1) {
6f1ffb06 163 (void) close(g_fd);
e2454897
GM
164 g_fd = -1;
165 }
6f1ffb06
MA
166 (void) pthread_mutex_unlock(&g_lock);
167}
168
169static int
170lzc_ioctl(zfs_ioc_t ioc, const char *name,
171 nvlist_t *source, nvlist_t **resultp)
172{
13fe0198 173 zfs_cmd_t zc = {"\0"};
6f1ffb06 174 int error = 0;
bec1067d
AP
175 char *packed = NULL;
176 size_t size = 0;
6f1ffb06
MA
177
178 ASSERT3S(g_refcount, >, 0);
e2454897 179 VERIFY3S(g_fd, !=, -1);
6f1ffb06 180
b83a0e2d
DB
181#ifdef ZFS_DEBUG
182 if (ioc == fail_ioc_cmd)
183 return (fail_ioc_err);
184#endif
185
bec1067d
AP
186 if (name != NULL)
187 (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
6f1ffb06 188
bec1067d
AP
189 if (source != NULL) {
190 packed = fnvlist_pack(source, &size);
191 zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
192 zc.zc_nvlist_src_size = size;
193 }
6f1ffb06
MA
194
195 if (resultp != NULL) {
13fe0198 196 *resultp = NULL;
234c91c5
CW
197 if (ioc == ZFS_IOC_CHANNEL_PROGRAM) {
198 zc.zc_nvlist_dst_size = fnvlist_lookup_uint64(source,
199 ZCP_ARG_MEMLIMIT);
200 } else {
201 zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024);
202 }
6f1ffb06
MA
203 zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
204 malloc(zc.zc_nvlist_dst_size);
205 if (zc.zc_nvlist_dst == (uint64_t)0) {
206 error = ENOMEM;
207 goto out;
208 }
209 }
210
211 while (ioctl(g_fd, ioc, &zc) != 0) {
d99a0153
CW
212 /*
213 * If ioctl exited with ENOMEM, we retry the ioctl after
214 * increasing the size of the destination nvlist.
215 *
234c91c5 216 * Channel programs that exit with ENOMEM ran over the
d99a0153
CW
217 * lua memory sandbox; they should not be retried.
218 */
219 if (errno == ENOMEM && resultp != NULL &&
220 ioc != ZFS_IOC_CHANNEL_PROGRAM) {
6f1ffb06
MA
221 free((void *)(uintptr_t)zc.zc_nvlist_dst);
222 zc.zc_nvlist_dst_size *= 2;
223 zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
224 malloc(zc.zc_nvlist_dst_size);
225 if (zc.zc_nvlist_dst == (uint64_t)0) {
226 error = ENOMEM;
227 goto out;
228 }
229 } else {
230 error = errno;
231 break;
232 }
233 }
234 if (zc.zc_nvlist_dst_filled) {
235 *resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
236 zc.zc_nvlist_dst_size);
6f1ffb06
MA
237 }
238
239out:
b5256303
TC
240 if (packed != NULL)
241 fnvlist_pack_free(packed, size);
6f1ffb06
MA
242 free((void *)(uintptr_t)zc.zc_nvlist_dst);
243 return (error);
244}
245
246int
b5256303
TC
247lzc_create(const char *fsname, enum lzc_dataset_type type, nvlist_t *props,
248 uint8_t *wkeydata, uint_t wkeylen)
6f1ffb06
MA
249{
250 int error;
b5256303 251 nvlist_t *hidden_args = NULL;
6f1ffb06 252 nvlist_t *args = fnvlist_alloc();
b5256303 253
e67a7ffb 254 fnvlist_add_int32(args, "type", (dmu_objset_type_t)type);
6f1ffb06
MA
255 if (props != NULL)
256 fnvlist_add_nvlist(args, "props", props);
b5256303
TC
257
258 if (wkeydata != NULL) {
259 hidden_args = fnvlist_alloc();
260 fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata,
261 wkeylen);
262 fnvlist_add_nvlist(args, ZPOOL_HIDDEN_ARGS, hidden_args);
263 }
264
6f1ffb06 265 error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL);
b5256303 266 nvlist_free(hidden_args);
6f1ffb06
MA
267 nvlist_free(args);
268 return (error);
269}
270
271int
b5256303 272lzc_clone(const char *fsname, const char *origin, nvlist_t *props)
6f1ffb06
MA
273{
274 int error;
b5256303 275 nvlist_t *hidden_args = NULL;
6f1ffb06 276 nvlist_t *args = fnvlist_alloc();
b5256303 277
6f1ffb06
MA
278 fnvlist_add_string(args, "origin", origin);
279 if (props != NULL)
280 fnvlist_add_nvlist(args, "props", props);
281 error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL);
b5256303 282 nvlist_free(hidden_args);
6f1ffb06
MA
283 nvlist_free(args);
284 return (error);
285}
286
d12f91fd
GDN
287int
288lzc_promote(const char *fsname, char *snapnamebuf, int snapnamelen)
289{
290 /*
291 * The promote ioctl is still legacy, so we need to construct our
292 * own zfs_cmd_t rather than using lzc_ioctl().
293 */
294 zfs_cmd_t zc = { "\0" };
295
296 ASSERT3S(g_refcount, >, 0);
297 VERIFY3S(g_fd, !=, -1);
298
299 (void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name));
300 if (ioctl(g_fd, ZFS_IOC_PROMOTE, &zc) != 0) {
301 int error = errno;
302 if (error == EEXIST && snapnamebuf != NULL)
303 (void) strlcpy(snapnamebuf, zc.zc_string, snapnamelen);
304 return (error);
305 }
306 return (0);
307}
308
a1d477c2
MA
309int
310lzc_remap(const char *fsname)
311{
312 int error;
313 nvlist_t *args = fnvlist_alloc();
314 error = lzc_ioctl(ZFS_IOC_REMAP, fsname, args, NULL);
315 nvlist_free(args);
316 return (error);
317}
318
dc1c630b
AG
319int
320lzc_rename(const char *source, const char *target)
321{
322 zfs_cmd_t zc = { "\0" };
323 int error;
324 ASSERT3S(g_refcount, >, 0);
325 VERIFY3S(g_fd, !=, -1);
326 (void) strlcpy(zc.zc_name, source, sizeof (zc.zc_name));
327 (void) strlcpy(zc.zc_value, target, sizeof (zc.zc_value));
328 error = ioctl(g_fd, ZFS_IOC_RENAME, &zc);
329 if (error != 0)
330 error = errno;
331 return (error);
332}
333int
334lzc_destroy(const char *fsname)
335{
336 int error;
337 nvlist_t *args = fnvlist_alloc();
338 error = lzc_ioctl(ZFS_IOC_DESTROY, fsname, args, NULL);
339 nvlist_free(args);
340 return (error);
341}
342
6f1ffb06
MA
343/*
344 * Creates snapshots.
345 *
346 * The keys in the snaps nvlist are the snapshots to be created.
347 * They must all be in the same pool.
348 *
349 * The props nvlist is properties to set. Currently only user properties
350 * are supported. { user:prop_name -> string value }
351 *
352 * The returned results nvlist will have an entry for each snapshot that failed.
353 * The value will be the (int32) error code.
354 *
355 * The return value will be 0 if all snapshots were created, otherwise it will
13fe0198 356 * be the errno of a (unspecified) snapshot that failed.
6f1ffb06
MA
357 */
358int
359lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist)
360{
361 nvpair_t *elem;
362 nvlist_t *args;
363 int error;
eca7b760 364 char pool[ZFS_MAX_DATASET_NAME_LEN];
6f1ffb06
MA
365
366 *errlist = NULL;
367
368 /* determine the pool name */
369 elem = nvlist_next_nvpair(snaps, NULL);
370 if (elem == NULL)
371 return (0);
372 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
373 pool[strcspn(pool, "/@")] = '\0';
374
375 args = fnvlist_alloc();
376 fnvlist_add_nvlist(args, "snaps", snaps);
377 if (props != NULL)
378 fnvlist_add_nvlist(args, "props", props);
379
380 error = lzc_ioctl(ZFS_IOC_SNAPSHOT, pool, args, errlist);
381 nvlist_free(args);
382
383 return (error);
384}
385
386/*
387 * Destroys snapshots.
388 *
389 * The keys in the snaps nvlist are the snapshots to be destroyed.
390 * They must all be in the same pool.
391 *
392 * Snapshots that do not exist will be silently ignored.
393 *
394 * If 'defer' is not set, and a snapshot has user holds or clones, the
395 * destroy operation will fail and none of the snapshots will be
396 * destroyed.
397 *
398 * If 'defer' is set, and a snapshot has user holds or clones, it will be
399 * marked for deferred destruction, and will be destroyed when the last hold
400 * or clone is removed/destroyed.
401 *
402 * The return value will be 0 if all snapshots were destroyed (or marked for
1a077756 403 * later destruction if 'defer' is set) or didn't exist to begin with.
6f1ffb06 404 *
13fe0198 405 * Otherwise the return value will be the errno of a (unspecified) snapshot
6f1ffb06
MA
406 * that failed, no snapshots will be destroyed, and the errlist will have an
407 * entry for each snapshot that failed. The value in the errlist will be
408 * the (int32) error code.
409 */
410int
411lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist)
412{
413 nvpair_t *elem;
414 nvlist_t *args;
415 int error;
eca7b760 416 char pool[ZFS_MAX_DATASET_NAME_LEN];
6f1ffb06
MA
417
418 /* determine the pool name */
419 elem = nvlist_next_nvpair(snaps, NULL);
420 if (elem == NULL)
421 return (0);
422 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
423 pool[strcspn(pool, "/@")] = '\0';
424
425 args = fnvlist_alloc();
426 fnvlist_add_nvlist(args, "snaps", snaps);
427 if (defer)
428 fnvlist_add_boolean(args, "defer");
429
430 error = lzc_ioctl(ZFS_IOC_DESTROY_SNAPS, pool, args, errlist);
431 nvlist_free(args);
432
433 return (error);
6f1ffb06
MA
434}
435
436int
437lzc_snaprange_space(const char *firstsnap, const char *lastsnap,
438 uint64_t *usedp)
439{
440 nvlist_t *args;
441 nvlist_t *result;
442 int err;
eca7b760 443 char fs[ZFS_MAX_DATASET_NAME_LEN];
6f1ffb06
MA
444 char *atp;
445
446 /* determine the fs name */
447 (void) strlcpy(fs, firstsnap, sizeof (fs));
448 atp = strchr(fs, '@');
449 if (atp == NULL)
450 return (EINVAL);
451 *atp = '\0';
452
453 args = fnvlist_alloc();
454 fnvlist_add_string(args, "firstsnap", firstsnap);
455
456 err = lzc_ioctl(ZFS_IOC_SPACE_SNAPS, lastsnap, args, &result);
457 nvlist_free(args);
458 if (err == 0)
459 *usedp = fnvlist_lookup_uint64(result, "used");
460 fnvlist_free(result);
461
462 return (err);
463}
464
465boolean_t
466lzc_exists(const char *dataset)
467{
468 /*
469 * The objset_stats ioctl is still legacy, so we need to construct our
d12f91fd 470 * own zfs_cmd_t rather than using lzc_ioctl().
6f1ffb06 471 */
13fe0198 472 zfs_cmd_t zc = {"\0"};
6f1ffb06 473
e2454897
GM
474 ASSERT3S(g_refcount, >, 0);
475 VERIFY3S(g_fd, !=, -1);
476
6f1ffb06
MA
477 (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
478 return (ioctl(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0);
479}
480
bec1067d
AP
481/*
482 * outnvl is unused.
483 * It was added to preserve the function signature in case it is
484 * needed in the future.
485 */
486/*ARGSUSED*/
487int
488lzc_sync(const char *pool_name, nvlist_t *innvl, nvlist_t **outnvl)
489{
490 return (lzc_ioctl(ZFS_IOC_POOL_SYNC, pool_name, innvl, NULL));
491}
492
13fe0198
MA
493/*
494 * Create "user holds" on snapshots. If there is a hold on a snapshot,
495 * the snapshot can not be destroyed. (However, it can be marked for deletion
496 * by lzc_destroy_snaps(defer=B_TRUE).)
497 *
498 * The keys in the nvlist are snapshot names.
499 * The snapshots must all be in the same pool.
500 * The value is the name of the hold (string type).
501 *
502 * If cleanup_fd is not -1, it must be the result of open("/dev/zfs", O_EXCL).
503 * In this case, when the cleanup_fd is closed (including on process
504 * termination), the holds will be released. If the system is shut down
505 * uncleanly, the holds will be released when the pool is next opened
506 * or imported.
507 *
95fd54a1 508 * Holds for snapshots which don't exist will be skipped and have an entry
1a077756 509 * added to errlist, but will not cause an overall failure.
95fd54a1 510 *
1a077756 511 * The return value will be 0 if all holds, for snapshots that existed,
b8fce77b 512 * were successfully created.
95fd54a1
SH
513 *
514 * Otherwise the return value will be the errno of a (unspecified) hold that
515 * failed and no holds will be created.
516 *
517 * In all cases the errlist will have an entry for each hold that failed
518 * (name = snapshot), with its value being the error code (int32).
13fe0198
MA
519 */
520int
521lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist)
522{
eca7b760 523 char pool[ZFS_MAX_DATASET_NAME_LEN];
13fe0198
MA
524 nvlist_t *args;
525 nvpair_t *elem;
526 int error;
527
528 /* determine the pool name */
529 elem = nvlist_next_nvpair(holds, NULL);
530 if (elem == NULL)
531 return (0);
532 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
533 pool[strcspn(pool, "/@")] = '\0';
534
535 args = fnvlist_alloc();
536 fnvlist_add_nvlist(args, "holds", holds);
537 if (cleanup_fd != -1)
538 fnvlist_add_int32(args, "cleanup_fd", cleanup_fd);
539
540 error = lzc_ioctl(ZFS_IOC_HOLD, pool, args, errlist);
541 nvlist_free(args);
542 return (error);
543}
544
545/*
546 * Release "user holds" on snapshots. If the snapshot has been marked for
547 * deferred destroy (by lzc_destroy_snaps(defer=B_TRUE)), it does not have
548 * any clones, and all the user holds are removed, then the snapshot will be
549 * destroyed.
550 *
551 * The keys in the nvlist are snapshot names.
552 * The snapshots must all be in the same pool.
d5884c34 553 * The value is an nvlist whose keys are the holds to remove.
13fe0198 554 *
95fd54a1 555 * Holds which failed to release because they didn't exist will have an entry
1a077756 556 * added to errlist, but will not cause an overall failure.
95fd54a1
SH
557 *
558 * The return value will be 0 if the nvl holds was empty or all holds that
1a077756 559 * existed, were successfully removed.
95fd54a1
SH
560 *
561 * Otherwise the return value will be the errno of a (unspecified) hold that
562 * failed to release and no holds will be released.
563 *
564 * In all cases the errlist will have an entry for each hold that failed to
565 * to release.
13fe0198
MA
566 */
567int
568lzc_release(nvlist_t *holds, nvlist_t **errlist)
569{
eca7b760 570 char pool[ZFS_MAX_DATASET_NAME_LEN];
13fe0198
MA
571 nvpair_t *elem;
572
573 /* determine the pool name */
574 elem = nvlist_next_nvpair(holds, NULL);
575 if (elem == NULL)
576 return (0);
577 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
578 pool[strcspn(pool, "/@")] = '\0';
579
580 return (lzc_ioctl(ZFS_IOC_RELEASE, pool, holds, errlist));
581}
582
583/*
584 * Retrieve list of user holds on the specified snapshot.
585 *
d5884c34 586 * On success, *holdsp will be set to an nvlist which the caller must free.
13fe0198
MA
587 * The keys are the names of the holds, and the value is the creation time
588 * of the hold (uint64) in seconds since the epoch.
589 */
590int
591lzc_get_holds(const char *snapname, nvlist_t **holdsp)
592{
bec1067d 593 return (lzc_ioctl(ZFS_IOC_GET_HOLDS, snapname, NULL, holdsp));
13fe0198
MA
594}
595
6f1ffb06 596/*
9b67f605
MA
597 * Generate a zfs send stream for the specified snapshot and write it to
598 * the specified file descriptor.
da536844
MA
599 *
600 * "snapname" is the full name of the snapshot to send (e.g. "pool/fs@snap")
601 *
602 * If "from" is NULL, a full (non-incremental) stream will be sent.
603 * If "from" is non-NULL, it must be the full name of a snapshot or
604 * bookmark to send an incremental from (e.g. "pool/fs@earlier_snap" or
605 * "pool/fs#earlier_bmark"). If non-NULL, the specified snapshot or
606 * bookmark must represent an earlier point in the history of "snapname").
607 * It can be an earlier snapshot in the same filesystem or zvol as "snapname",
608 * or it can be the origin of "snapname"'s filesystem, or an earlier
609 * snapshot in the origin, etc.
610 *
611 * "fd" is the file descriptor to write the send stream to.
9b67f605 612 *
f1512ee6
MA
613 * If "flags" contains LZC_SEND_FLAG_LARGE_BLOCK, the stream is permitted
614 * to contain DRR_WRITE records with drr_length > 128K, and DRR_OBJECT
615 * records with drr_blksz > 128K.
616 *
9b67f605
MA
617 * If "flags" contains LZC_SEND_FLAG_EMBED_DATA, the stream is permitted
618 * to contain DRR_WRITE_EMBEDDED records with drr_etype==BP_EMBEDDED_TYPE_DATA,
619 * which the receiving system must support (as indicated by support
620 * for the "embedded_data" feature).
85ce3f4f 621 *
622 * If "flags" contains LZC_SEND_FLAG_COMPRESS, the stream is generated by using
623 * compressed WRITE records for blocks which are compressed on disk and in
624 * memory. If the lz4_compress feature is active on the sending system, then
625 * the receiving system must have that feature enabled as well.
626 *
627 * If "flags" contains LZC_SEND_FLAG_RAW, the stream is generated, for encrypted
628 * datasets, by sending data exactly as it exists on disk. This allows backups
629 * to be taken even if encryption keys are not currently loaded.
6f1ffb06
MA
630 */
631int
9b67f605
MA
632lzc_send(const char *snapname, const char *from, int fd,
633 enum lzc_send_flags flags)
47dfff3b
MA
634{
635 return (lzc_send_resume(snapname, from, fd, flags, 0, 0));
636}
637
638int
639lzc_send_resume(const char *snapname, const char *from, int fd,
640 enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff)
6f1ffb06
MA
641{
642 nvlist_t *args;
643 int err;
644
645 args = fnvlist_alloc();
646 fnvlist_add_int32(args, "fd", fd);
da536844
MA
647 if (from != NULL)
648 fnvlist_add_string(args, "fromsnap", from);
f1512ee6
MA
649 if (flags & LZC_SEND_FLAG_LARGE_BLOCK)
650 fnvlist_add_boolean(args, "largeblockok");
9b67f605
MA
651 if (flags & LZC_SEND_FLAG_EMBED_DATA)
652 fnvlist_add_boolean(args, "embedok");
a7004725
DK
653 if (flags & LZC_SEND_FLAG_COMPRESS)
654 fnvlist_add_boolean(args, "compressok");
b5256303
TC
655 if (flags & LZC_SEND_FLAG_RAW)
656 fnvlist_add_boolean(args, "rawok");
47dfff3b
MA
657 if (resumeobj != 0 || resumeoff != 0) {
658 fnvlist_add_uint64(args, "resume_object", resumeobj);
659 fnvlist_add_uint64(args, "resume_offset", resumeoff);
660 }
6f1ffb06
MA
661 err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL);
662 nvlist_free(args);
663 return (err);
664}
665
666/*
5dc8b736
MG
667 * "from" can be NULL, a snapshot, or a bookmark.
668 *
669 * If from is NULL, a full (non-incremental) stream will be estimated. This
670 * is calculated very efficiently.
671 *
672 * If from is a snapshot, lzc_send_space uses the deadlists attached to
673 * each snapshot to efficiently estimate the stream size.
674 *
675 * If from is a bookmark, the indirect blocks in the destination snapshot
676 * are traversed, looking for blocks with a birth time since the creation TXG of
677 * the snapshot this bookmark was created from. This will result in
678 * significantly more I/O and be less efficient than a send space estimation on
679 * an equivalent snapshot.
6f1ffb06
MA
680 */
681int
2aa34383
DK
682lzc_send_space(const char *snapname, const char *from,
683 enum lzc_send_flags flags, uint64_t *spacep)
6f1ffb06
MA
684{
685 nvlist_t *args;
686 nvlist_t *result;
687 int err;
688
689 args = fnvlist_alloc();
5dc8b736
MG
690 if (from != NULL)
691 fnvlist_add_string(args, "from", from);
2aa34383
DK
692 if (flags & LZC_SEND_FLAG_LARGE_BLOCK)
693 fnvlist_add_boolean(args, "largeblockok");
694 if (flags & LZC_SEND_FLAG_EMBED_DATA)
695 fnvlist_add_boolean(args, "embedok");
696 if (flags & LZC_SEND_FLAG_COMPRESS)
697 fnvlist_add_boolean(args, "compressok");
cf7684bc 698 if (flags & LZC_SEND_FLAG_RAW)
699 fnvlist_add_boolean(args, "rawok");
6f1ffb06
MA
700 err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result);
701 nvlist_free(args);
702 if (err == 0)
703 *spacep = fnvlist_lookup_uint64(result, "space");
704 nvlist_free(result);
705 return (err);
706}
707
708static int
709recv_read(int fd, void *buf, int ilen)
710{
711 char *cp = buf;
712 int rv;
713 int len = ilen;
714
715 do {
716 rv = read(fd, cp, len);
717 cp += rv;
718 len -= rv;
719 } while (rv > 0);
720
721 if (rv < 0 || len != 0)
722 return (EIO);
723
724 return (0);
725}
726
43e52edd 727/*
b5256303
TC
728 * Linux adds ZFS_IOC_RECV_NEW for resumable and raw streams and preserves the
729 * legacy ZFS_IOC_RECV user/kernel interface. The new interface supports all
730 * stream options but is currently only used for resumable streams. This way
731 * updated user space utilities will interoperate with older kernel modules.
43e52edd
BB
732 *
733 * Non-Linux OpenZFS platforms have opted to modify the legacy interface.
734 */
47dfff3b 735static int
a3eeab2d 736recv_impl(const char *snapname, nvlist_t *recvdprops, nvlist_t *localprops,
d9c460a0
TC
737 uint8_t *wkeydata, uint_t wkeylen, const char *origin, boolean_t force,
738 boolean_t resumable, boolean_t raw, int input_fd,
739 const dmu_replay_record_t *begin_record, int cleanup_fd,
43e52edd
BB
740 uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle,
741 nvlist_t **errors)
6f1ffb06 742{
43e52edd
BB
743 dmu_replay_record_t drr;
744 char fsname[MAXPATHLEN];
6f1ffb06 745 char *atp;
6f1ffb06
MA
746 int error;
747
e2454897
GM
748 ASSERT3S(g_refcount, >, 0);
749 VERIFY3S(g_fd, !=, -1);
750
43e52edd
BB
751 /* Set 'fsname' to the name of containing filesystem */
752 (void) strlcpy(fsname, snapname, sizeof (fsname));
753 atp = strchr(fsname, '@');
6f1ffb06
MA
754 if (atp == NULL)
755 return (EINVAL);
756 *atp = '\0';
757
43e52edd
BB
758 /* If the fs does not exist, try its parent. */
759 if (!lzc_exists(fsname)) {
760 char *slashp = strrchr(fsname, '/');
6f1ffb06
MA
761 if (slashp == NULL)
762 return (ENOENT);
763 *slashp = '\0';
43e52edd 764 }
6f1ffb06 765
43e52edd
BB
766 /*
767 * The begin_record is normally a non-byteswapped BEGIN record.
768 * For resumable streams it may be set to any non-byteswapped
769 * dmu_replay_record_t.
770 */
771 if (begin_record == NULL) {
772 error = recv_read(input_fd, &drr, sizeof (drr));
773 if (error != 0)
774 return (error);
775 } else {
776 drr = *begin_record;
6f1ffb06
MA
777 }
778
d9c460a0
TC
779 /*
780 * Raw receives, resumable receives, and receives that include a
781 * wrapping key all use the new interface.
782 */
783 if (resumable || raw || wkeydata != NULL) {
43e52edd
BB
784 nvlist_t *outnvl = NULL;
785 nvlist_t *innvl = fnvlist_alloc();
6f1ffb06 786
43e52edd 787 fnvlist_add_string(innvl, "snapname", snapname);
6f1ffb06 788
a3eeab2d 789 if (recvdprops != NULL)
790 fnvlist_add_nvlist(innvl, "props", recvdprops);
791
792 if (localprops != NULL)
793 fnvlist_add_nvlist(innvl, "localprops", localprops);
6f1ffb06 794
d9c460a0
TC
795 if (wkeydata != NULL) {
796 /*
797 * wkeydata must be placed in the special
798 * ZPOOL_HIDDEN_ARGS nvlist so that it
799 * will not be printed to the zpool history.
800 */
801 nvlist_t *hidden_args = fnvlist_alloc();
802 fnvlist_add_uint8_array(hidden_args, "wkeydata",
803 wkeydata, wkeylen);
804 fnvlist_add_nvlist(innvl, ZPOOL_HIDDEN_ARGS,
805 hidden_args);
806 nvlist_free(hidden_args);
807 }
808
43e52edd
BB
809 if (origin != NULL && strlen(origin))
810 fnvlist_add_string(innvl, "origin", origin);
811
812 fnvlist_add_byte_array(innvl, "begin_record",
02730c33 813 (uchar_t *)&drr, sizeof (drr));
43e52edd
BB
814
815 fnvlist_add_int32(innvl, "input_fd", input_fd);
816
817 if (force)
818 fnvlist_add_boolean(innvl, "force");
819
820 if (resumable)
821 fnvlist_add_boolean(innvl, "resumable");
822
823 if (cleanup_fd >= 0)
824 fnvlist_add_int32(innvl, "cleanup_fd", cleanup_fd);
825
826 if (action_handle != NULL)
827 fnvlist_add_uint64(innvl, "action_handle",
828 *action_handle);
829
830 error = lzc_ioctl(ZFS_IOC_RECV_NEW, fsname, innvl, &outnvl);
831
832 if (error == 0 && read_bytes != NULL)
833 error = nvlist_lookup_uint64(outnvl, "read_bytes",
834 read_bytes);
835
836 if (error == 0 && errflags != NULL)
837 error = nvlist_lookup_uint64(outnvl, "error_flags",
838 errflags);
839
840 if (error == 0 && action_handle != NULL)
841 error = nvlist_lookup_uint64(outnvl, "action_handle",
842 action_handle);
843
844 if (error == 0 && errors != NULL) {
845 nvlist_t *nvl;
846 error = nvlist_lookup_nvlist(outnvl, "errors", &nvl);
847 if (error == 0)
848 *errors = fnvlist_dup(nvl);
849 }
850
851 fnvlist_free(innvl);
852 fnvlist_free(outnvl);
fd41e935 853 } else {
43e52edd
BB
854 zfs_cmd_t zc = {"\0"};
855 char *packed = NULL;
856 size_t size;
6f1ffb06 857
43e52edd 858 ASSERT3S(g_refcount, >, 0);
6f1ffb06 859
43e52edd
BB
860 (void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_value));
861 (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
6f1ffb06 862
a3eeab2d 863 if (recvdprops != NULL) {
864 packed = fnvlist_pack(recvdprops, &size);
43e52edd
BB
865 zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
866 zc.zc_nvlist_src_size = size;
867 }
47dfff3b 868
a3eeab2d 869 if (localprops != NULL) {
870 packed = fnvlist_pack(localprops, &size);
871 zc.zc_nvlist_conf = (uint64_t)(uintptr_t)packed;
872 zc.zc_nvlist_conf_size = size;
873 }
874
43e52edd
BB
875 if (origin != NULL)
876 (void) strlcpy(zc.zc_string, origin,
877 sizeof (zc.zc_string));
6f1ffb06 878
43e52edd
BB
879 ASSERT3S(drr.drr_type, ==, DRR_BEGIN);
880 zc.zc_begin_record = drr.drr_u.drr_begin;
881 zc.zc_guid = force;
882 zc.zc_cookie = input_fd;
883 zc.zc_cleanup_fd = -1;
884 zc.zc_action_handle = 0;
885
886 if (cleanup_fd >= 0)
887 zc.zc_cleanup_fd = cleanup_fd;
888
889 if (action_handle != NULL)
890 zc.zc_action_handle = *action_handle;
891
892 zc.zc_nvlist_dst_size = 128 * 1024;
893 zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
894 malloc(zc.zc_nvlist_dst_size);
895
896 error = ioctl(g_fd, ZFS_IOC_RECV, &zc);
897 if (error != 0) {
898 error = errno;
899 } else {
900 if (read_bytes != NULL)
901 *read_bytes = zc.zc_cookie;
902
903 if (errflags != NULL)
904 *errflags = zc.zc_obj;
905
906 if (action_handle != NULL)
907 *action_handle = zc.zc_action_handle;
908
909 if (errors != NULL)
910 VERIFY0(nvlist_unpack(
911 (void *)(uintptr_t)zc.zc_nvlist_dst,
912 zc.zc_nvlist_dst_size, errors, KM_SLEEP));
913 }
914
915 if (packed != NULL)
916 fnvlist_pack_free(packed, size);
917 free((void *)(uintptr_t)zc.zc_nvlist_dst);
918 }
6f1ffb06 919
6f1ffb06
MA
920 return (error);
921}
46ba1e59 922
47dfff3b
MA
923/*
924 * The simplest receive case: receive from the specified fd, creating the
925 * specified snapshot. Apply the specified properties as "received" properties
926 * (which can be overridden by locally-set properties). If the stream is a
927 * clone, its origin snapshot must be specified by 'origin'. The 'force'
928 * flag will cause the target filesystem to be rolled back or destroyed if
929 * necessary to receive.
930 *
931 * Return 0 on success or an errno on failure.
932 *
933 * Note: this interface does not work on dedup'd streams
934 * (those with DMU_BACKUP_FEATURE_DEDUP).
935 */
936int
937lzc_receive(const char *snapname, nvlist_t *props, const char *origin,
b5256303 938 boolean_t force, boolean_t raw, int fd)
47dfff3b 939{
d9c460a0
TC
940 return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
941 B_FALSE, raw, fd, NULL, -1, NULL, NULL, NULL, NULL));
47dfff3b
MA
942}
943
944/*
945 * Like lzc_receive, but if the receive fails due to premature stream
946 * termination, the intermediate state will be preserved on disk. In this
947 * case, ECKSUM will be returned. The receive may subsequently be resumed
948 * with a resuming send stream generated by lzc_send_resume().
949 */
950int
951lzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin,
b5256303 952 boolean_t force, boolean_t raw, int fd)
47dfff3b 953{
d9c460a0
TC
954 return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
955 B_TRUE, raw, fd, NULL, -1, NULL, NULL, NULL, NULL));
fd41e935
BB
956}
957
958/*
959 * Like lzc_receive, but allows the caller to read the begin record and then to
960 * pass it in. That could be useful if the caller wants to derive, for example,
961 * the snapname or the origin parameters based on the information contained in
962 * the begin record.
963 * The begin record must be in its original form as read from the stream,
964 * in other words, it should not be byteswapped.
965 *
966 * The 'resumable' parameter allows to obtain the same behavior as with
967 * lzc_receive_resumable.
968 */
969int
970lzc_receive_with_header(const char *snapname, nvlist_t *props,
b5256303
TC
971 const char *origin, boolean_t force, boolean_t resumable, boolean_t raw,
972 int fd, const dmu_replay_record_t *begin_record)
fd41e935
BB
973{
974 if (begin_record == NULL)
975 return (EINVAL);
b5256303 976
d9c460a0
TC
977 return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
978 resumable, raw, fd, begin_record, -1, NULL, NULL, NULL, NULL));
43e52edd
BB
979}
980
981/*
982 * Like lzc_receive, but allows the caller to pass all supported arguments
983 * and retrieve all values returned. The only additional input parameter
984 * is 'cleanup_fd' which is used to set a cleanup-on-exit file descriptor.
985 *
986 * The following parameters all provide return values. Several may be set
987 * in the failure case and will contain additional information.
988 *
989 * The 'read_bytes' value will be set to the total number of bytes read.
990 *
991 * The 'errflags' value will contain zprop_errflags_t flags which are
992 * used to describe any failures.
993 *
994 * The 'action_handle' is used to pass the handle for this guid/ds mapping.
995 * It should be set to zero on first call and will contain an updated handle
996 * on success, it should be passed in subsequent calls.
997 *
998 * The 'errors' nvlist contains an entry for each unapplied received
999 * property. Callers are responsible for freeing this nvlist.
1000 */
1001int lzc_receive_one(const char *snapname, nvlist_t *props,
b5256303
TC
1002 const char *origin, boolean_t force, boolean_t resumable, boolean_t raw,
1003 int input_fd, const dmu_replay_record_t *begin_record, int cleanup_fd,
43e52edd
BB
1004 uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle,
1005 nvlist_t **errors)
1006{
d9c460a0
TC
1007 return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
1008 resumable, raw, input_fd, begin_record, cleanup_fd, read_bytes,
1009 errflags, action_handle, errors));
a3eeab2d 1010}
1011
1012/*
1013 * Like lzc_receive_one, but allows the caller to pass an additional 'cmdprops'
1014 * argument.
1015 *
1016 * The 'cmdprops' nvlist contains both override ('zfs receive -o') and
1017 * exclude ('zfs receive -x') properties. Callers are responsible for freeing
1018 * this nvlist
1019 */
1020int lzc_receive_with_cmdprops(const char *snapname, nvlist_t *props,
d9c460a0
TC
1021 nvlist_t *cmdprops, uint8_t *wkeydata, uint_t wkeylen, const char *origin,
1022 boolean_t force, boolean_t resumable, boolean_t raw, int input_fd,
b5256303
TC
1023 const dmu_replay_record_t *begin_record, int cleanup_fd,
1024 uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle,
1025 nvlist_t **errors)
a3eeab2d 1026{
d9c460a0
TC
1027 return (recv_impl(snapname, props, cmdprops, wkeydata, wkeylen, origin,
1028 force, resumable, raw, input_fd, begin_record, cleanup_fd,
1029 read_bytes, errflags, action_handle, errors));
47dfff3b
MA
1030}
1031
46ba1e59
MA
1032/*
1033 * Roll back this filesystem or volume to its most recent snapshot.
1034 * If snapnamebuf is not NULL, it will be filled in with the name
1035 * of the most recent snapshot.
8ca78ab0
AG
1036 * Note that the latest snapshot may change if a new one is concurrently
1037 * created or the current one is destroyed. lzc_rollback_to can be used
1038 * to roll back to a specific latest snapshot.
46ba1e59
MA
1039 *
1040 * Return 0 on success or an errno on failure.
1041 */
1042int
1043lzc_rollback(const char *fsname, char *snapnamebuf, int snapnamelen)
1044{
1045 nvlist_t *args;
1046 nvlist_t *result;
1047 int err;
1048
1049 args = fnvlist_alloc();
1050 err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result);
1051 nvlist_free(args);
1052 if (err == 0 && snapnamebuf != NULL) {
1053 const char *snapname = fnvlist_lookup_string(result, "target");
1054 (void) strlcpy(snapnamebuf, snapname, snapnamelen);
1055 }
bb7ffdaf
GM
1056 nvlist_free(result);
1057
46ba1e59
MA
1058 return (err);
1059}
da536844 1060
8ca78ab0
AG
1061/*
1062 * Roll back this filesystem or volume to the specified snapshot,
1063 * if possible.
1064 *
1065 * Return 0 on success or an errno on failure.
1066 */
1067int
1068lzc_rollback_to(const char *fsname, const char *snapname)
1069{
1070 nvlist_t *args;
1071 nvlist_t *result;
1072 int err;
1073
1074 args = fnvlist_alloc();
1075 fnvlist_add_string(args, "target", snapname);
1076 err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result);
1077 nvlist_free(args);
1078 nvlist_free(result);
1079 return (err);
1080}
1081
da536844
MA
1082/*
1083 * Creates bookmarks.
1084 *
1085 * The bookmarks nvlist maps from name of the bookmark (e.g. "pool/fs#bmark") to
1086 * the name of the snapshot (e.g. "pool/fs@snap"). All the bookmarks and
1087 * snapshots must be in the same pool.
1088 *
1089 * The returned results nvlist will have an entry for each bookmark that failed.
1090 * The value will be the (int32) error code.
1091 *
1092 * The return value will be 0 if all bookmarks were created, otherwise it will
1093 * be the errno of a (undetermined) bookmarks that failed.
1094 */
1095int
1096lzc_bookmark(nvlist_t *bookmarks, nvlist_t **errlist)
1097{
1098 nvpair_t *elem;
1099 int error;
eca7b760 1100 char pool[ZFS_MAX_DATASET_NAME_LEN];
da536844
MA
1101
1102 /* determine the pool name */
1103 elem = nvlist_next_nvpair(bookmarks, NULL);
1104 if (elem == NULL)
1105 return (0);
1106 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
1107 pool[strcspn(pool, "/#")] = '\0';
1108
1109 error = lzc_ioctl(ZFS_IOC_BOOKMARK, pool, bookmarks, errlist);
1110
1111 return (error);
1112}
1113
1114/*
1115 * Retrieve bookmarks.
1116 *
1117 * Retrieve the list of bookmarks for the given file system. The props
1118 * parameter is an nvlist of property names (with no values) that will be
1119 * returned for each bookmark.
1120 *
1121 * The following are valid properties on bookmarks, all of which are numbers
1122 * (represented as uint64 in the nvlist)
1123 *
1124 * "guid" - globally unique identifier of the snapshot it refers to
1125 * "createtxg" - txg when the snapshot it refers to was created
1126 * "creation" - timestamp when the snapshot it refers to was created
1127 *
1128 * The format of the returned nvlist as follows:
1129 * <short name of bookmark> -> {
1130 * <name of property> -> {
1131 * "value" -> uint64
1132 * }
1133 * }
1134 */
1135int
1136lzc_get_bookmarks(const char *fsname, nvlist_t *props, nvlist_t **bmarks)
1137{
1138 return (lzc_ioctl(ZFS_IOC_GET_BOOKMARKS, fsname, props, bmarks));
1139}
1140
1141/*
1142 * Destroys bookmarks.
1143 *
1144 * The keys in the bmarks nvlist are the bookmarks to be destroyed.
1145 * They must all be in the same pool. Bookmarks are specified as
1146 * <fs>#<bmark>.
1147 *
1148 * Bookmarks that do not exist will be silently ignored.
1149 *
1150 * The return value will be 0 if all bookmarks that existed were destroyed.
1151 *
1152 * Otherwise the return value will be the errno of a (undetermined) bookmark
1153 * that failed, no bookmarks will be destroyed, and the errlist will have an
1154 * entry for each bookmarks that failed. The value in the errlist will be
1155 * the (int32) error code.
1156 */
1157int
1158lzc_destroy_bookmarks(nvlist_t *bmarks, nvlist_t **errlist)
1159{
1160 nvpair_t *elem;
1161 int error;
eca7b760 1162 char pool[ZFS_MAX_DATASET_NAME_LEN];
da536844
MA
1163
1164 /* determine the pool name */
1165 elem = nvlist_next_nvpair(bmarks, NULL);
1166 if (elem == NULL)
1167 return (0);
1168 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
1169 pool[strcspn(pool, "/#")] = '\0';
1170
1171 error = lzc_ioctl(ZFS_IOC_DESTROY_BOOKMARKS, pool, bmarks, errlist);
1172
1173 return (error);
1174}
b5256303 1175
5b72a38d
SD
1176static int
1177lzc_channel_program_impl(const char *pool, const char *program, boolean_t sync,
1178 uint64_t instrlimit, uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl)
1179{
1180 int error;
1181 nvlist_t *args;
1182
1183 args = fnvlist_alloc();
1184 fnvlist_add_string(args, ZCP_ARG_PROGRAM, program);
1185 fnvlist_add_nvlist(args, ZCP_ARG_ARGLIST, argnvl);
1186 fnvlist_add_boolean_value(args, ZCP_ARG_SYNC, sync);
1187 fnvlist_add_uint64(args, ZCP_ARG_INSTRLIMIT, instrlimit);
1188 fnvlist_add_uint64(args, ZCP_ARG_MEMLIMIT, memlimit);
1189 error = lzc_ioctl(ZFS_IOC_CHANNEL_PROGRAM, pool, args, outnvl);
1190 fnvlist_free(args);
1191
1192 return (error);
1193}
1194
d99a0153
CW
1195/*
1196 * Executes a channel program.
1197 *
1198 * If this function returns 0 the channel program was successfully loaded and
1199 * ran without failing. Note that individual commands the channel program ran
1200 * may have failed and the channel program is responsible for reporting such
1201 * errors through outnvl if they are important.
1202 *
1203 * This method may also return:
1204 *
1205 * EINVAL The program contains syntax errors, or an invalid memory or time
1206 * limit was given. No part of the channel program was executed.
1207 * If caused by syntax errors, 'outnvl' contains information about the
1208 * errors.
1209 *
1210 * ECHRNG The program was executed, but encountered a runtime error, such as
1211 * calling a function with incorrect arguments, invoking the error()
1212 * function directly, failing an assert() command, etc. Some portion
1213 * of the channel program may have executed and committed changes.
1214 * Information about the failure can be found in 'outnvl'.
1215 *
1216 * ENOMEM The program fully executed, but the output buffer was not large
1217 * enough to store the returned value. No output is returned through
1218 * 'outnvl'.
1219 *
1220 * ENOSPC The program was terminated because it exceeded its memory usage
1221 * limit. Some portion of the channel program may have executed and
1222 * committed changes to disk. No output is returned through 'outnvl'.
1223 *
1224 * ETIME The program was terminated because it exceeded its Lua instruction
1225 * limit. Some portion of the channel program may have executed and
1226 * committed changes to disk. No output is returned through 'outnvl'.
1227 */
1228int
1229lzc_channel_program(const char *pool, const char *program, uint64_t instrlimit,
1230 uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl)
1231{
5b72a38d
SD
1232 return (lzc_channel_program_impl(pool, program, B_TRUE, instrlimit,
1233 memlimit, argnvl, outnvl));
1234}
d99a0153 1235
d2734cce
SD
1236/*
1237 * Creates a checkpoint for the specified pool.
1238 *
1239 * If this function returns 0 the pool was successfully checkpointed.
1240 *
1241 * This method may also return:
1242 *
1243 * ZFS_ERR_CHECKPOINT_EXISTS
1244 * The pool already has a checkpoint. A pools can only have one
1245 * checkpoint at most, at any given time.
1246 *
1247 * ZFS_ERR_DISCARDING_CHECKPOINT
1248 * ZFS is in the middle of discarding a checkpoint for this pool.
1249 * The pool can be checkpointed again once the discard is done.
1250 *
1251 * ZFS_DEVRM_IN_PROGRESS
1252 * A vdev is currently being removed. The pool cannot be
1253 * checkpointed until the device removal is done.
1254 *
1255 * ZFS_VDEV_TOO_BIG
1256 * One or more top-level vdevs exceed the maximum vdev size
1257 * supported for this feature.
1258 */
1259int
1260lzc_pool_checkpoint(const char *pool)
1261{
1262 int error;
1263
1264 nvlist_t *result = NULL;
1265 nvlist_t *args = fnvlist_alloc();
1266
1267 error = lzc_ioctl(ZFS_IOC_POOL_CHECKPOINT, pool, args, &result);
1268
1269 fnvlist_free(args);
1270 fnvlist_free(result);
1271
1272 return (error);
1273}
1274
1275/*
1276 * Discard the checkpoint from the specified pool.
1277 *
1278 * If this function returns 0 the checkpoint was successfully discarded.
1279 *
1280 * This method may also return:
1281 *
1282 * ZFS_ERR_NO_CHECKPOINT
1283 * The pool does not have a checkpoint.
1284 *
1285 * ZFS_ERR_DISCARDING_CHECKPOINT
1286 * ZFS is already in the middle of discarding the checkpoint.
1287 */
1288int
1289lzc_pool_checkpoint_discard(const char *pool)
1290{
1291 int error;
1292
1293 nvlist_t *result = NULL;
1294 nvlist_t *args = fnvlist_alloc();
1295
1296 error = lzc_ioctl(ZFS_IOC_POOL_DISCARD_CHECKPOINT, pool, args, &result);
1297
1298 fnvlist_free(args);
1299 fnvlist_free(result);
1300
1301 return (error);
1302}
1303
5b72a38d
SD
1304/*
1305 * Executes a read-only channel program.
1306 *
1307 * A read-only channel program works programmatically the same way as a
1308 * normal channel program executed with lzc_channel_program(). The only
1309 * difference is it runs exclusively in open-context and therefore can
1310 * return faster. The downside to that, is that the program cannot change
1311 * on-disk state by calling functions from the zfs.sync submodule.
1312 *
1313 * The return values of this function (and their meaning) are exactly the
1314 * same as the ones described in lzc_channel_program().
1315 */
1316int
1317lzc_channel_program_nosync(const char *pool, const char *program,
1318 uint64_t timeout, uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl)
1319{
1320 return (lzc_channel_program_impl(pool, program, B_FALSE, timeout,
1321 memlimit, argnvl, outnvl));
d99a0153
CW
1322}
1323
b5256303
TC
1324/*
1325 * Performs key management functions
1326 *
85ce3f4f 1327 * crypto_cmd should be a value from dcp_cmd_t. If the command specifies to
1328 * load or change a wrapping key, the key should be specified in the
1329 * hidden_args nvlist so that it is not logged.
b5256303
TC
1330 */
1331int
1332lzc_load_key(const char *fsname, boolean_t noop, uint8_t *wkeydata,
1333 uint_t wkeylen)
1334{
1335 int error;
1336 nvlist_t *ioc_args;
1337 nvlist_t *hidden_args;
1338
1339 if (wkeydata == NULL)
1340 return (EINVAL);
1341
1342 ioc_args = fnvlist_alloc();
1343 hidden_args = fnvlist_alloc();
1344 fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata, wkeylen);
1345 fnvlist_add_nvlist(ioc_args, ZPOOL_HIDDEN_ARGS, hidden_args);
1346 if (noop)
1347 fnvlist_add_boolean(ioc_args, "noop");
1348 error = lzc_ioctl(ZFS_IOC_LOAD_KEY, fsname, ioc_args, NULL);
1349 nvlist_free(hidden_args);
1350 nvlist_free(ioc_args);
1351
1352 return (error);
1353}
1354
1355int
1356lzc_unload_key(const char *fsname)
1357{
1358 return (lzc_ioctl(ZFS_IOC_UNLOAD_KEY, fsname, NULL, NULL));
1359}
1360
1361int
1362lzc_change_key(const char *fsname, uint64_t crypt_cmd, nvlist_t *props,
1363 uint8_t *wkeydata, uint_t wkeylen)
1364{
1365 int error;
1366 nvlist_t *ioc_args = fnvlist_alloc();
1367 nvlist_t *hidden_args = NULL;
1368
1369 fnvlist_add_uint64(ioc_args, "crypt_cmd", crypt_cmd);
1370
1371 if (wkeydata != NULL) {
1372 hidden_args = fnvlist_alloc();
1373 fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata,
1374 wkeylen);
1375 fnvlist_add_nvlist(ioc_args, ZPOOL_HIDDEN_ARGS, hidden_args);
1376 }
1377
1378 if (props != NULL)
1379 fnvlist_add_nvlist(ioc_args, "props", props);
1380
1381 error = lzc_ioctl(ZFS_IOC_CHANGE_KEY, fsname, ioc_args, NULL);
1382 nvlist_free(hidden_args);
1383 nvlist_free(ioc_args);
d99a0153 1384
b5256303
TC
1385 return (error);
1386}
d3f2cd7e
AB
1387
1388int
1389lzc_reopen(const char *pool_name, boolean_t scrub_restart)
1390{
1391 nvlist_t *args = fnvlist_alloc();
1392 int error;
1393
1394 fnvlist_add_boolean_value(args, "scrub_restart", scrub_restart);
1395
1396 error = lzc_ioctl(ZFS_IOC_POOL_REOPEN, pool_name, args, NULL);
1397 nvlist_free(args);
1398 return (error);
1399}
619f0976
GW
1400
1401/*
1402 * Changes initializing state.
1403 *
1404 * vdevs should be a list of (<key>, guid) where guid is a uint64 vdev GUID.
1405 * The key is ignored.
1406 *
1407 * If there are errors related to vdev arguments, per-vdev errors are returned
1408 * in an nvlist with the key "vdevs". Each error is a (guid, errno) pair where
1409 * guid is stringified with PRIu64, and errno is one of the following as
1410 * an int64_t:
1411 * - ENODEV if the device was not found
1412 * - EINVAL if the devices is not a leaf or is not concrete (e.g. missing)
1413 * - EROFS if the device is not writeable
1414 * - EBUSY start requested but the device is already being initialized
1415 * - ESRCH cancel/suspend requested but device is not being initialized
1416 *
1417 * If the errlist is empty, then return value will be:
1418 * - EINVAL if one or more arguments was invalid
1419 * - Other spa_open failures
1420 * - 0 if the operation succeeded
1421 */
1422int
1423lzc_initialize(const char *poolname, pool_initialize_func_t cmd_type,
1424 nvlist_t *vdevs, nvlist_t **errlist)
1425{
1426 int error;
1427 nvlist_t *args = fnvlist_alloc();
1428 fnvlist_add_uint64(args, ZPOOL_INITIALIZE_COMMAND, (uint64_t)cmd_type);
1429 fnvlist_add_nvlist(args, ZPOOL_INITIALIZE_VDEVS, vdevs);
1430
1431 error = lzc_ioctl(ZFS_IOC_POOL_INITIALIZE, poolname, args, errlist);
1432
1433 fnvlist_free(args);
1434
1435 return (error);
1436}