]> git.proxmox.com Git - mirror_zfs.git/blob - lib/libzfs_core/libzfs_core.c
8ee1dd5e5a1c93345a3983fcca1486cff9e1dbf1
[mirror_zfs.git] / lib / libzfs_core / libzfs_core.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
24 * Copyright (c) 2013 Steven Hartland. All rights reserved.
25 * Copyright (c) 2017 Datto Inc.
26 * Copyright 2017 RackTop Systems.
27 * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
28 */
29
30 /*
31 * LibZFS_Core (lzc) is intended to replace most functionality in libzfs.
32 * It has the following characteristics:
33 *
34 * - Thread Safe. libzfs_core is accessible concurrently from multiple
35 * threads. This is accomplished primarily by avoiding global data
36 * (e.g. caching). Since it's thread-safe, there is no reason for a
37 * process to have multiple libzfs "instances". Therefore, we store
38 * our few pieces of data (e.g. the file descriptor) in global
39 * variables. The fd is reference-counted so that the libzfs_core
40 * library can be "initialized" multiple times (e.g. by different
41 * consumers within the same process).
42 *
43 * - Committed Interface. The libzfs_core interface will be committed,
44 * therefore consumers can compile against it and be confident that
45 * their code will continue to work on future releases of this code.
46 * Currently, the interface is Evolving (not Committed), but we intend
47 * to commit to it once it is more complete and we determine that it
48 * meets the needs of all consumers.
49 *
50 * - Programmatic Error Handling. libzfs_core communicates errors with
51 * defined error numbers, and doesn't print anything to stdout/stderr.
52 *
53 * - Thin Layer. libzfs_core is a thin layer, marshaling arguments
54 * to/from the kernel ioctls. There is generally a 1:1 correspondence
55 * between libzfs_core functions and ioctls to ZFS_DEV.
56 *
57 * - Clear Atomicity. Because libzfs_core functions are generally 1:1
58 * with kernel ioctls, and kernel ioctls are general atomic, each
59 * libzfs_core function is atomic. For example, creating multiple
60 * snapshots with a single call to lzc_snapshot() is atomic -- it
61 * can't fail with only some of the requested snapshots created, even
62 * in the event of power loss or system crash.
63 *
64 * - Continued libzfs Support. Some higher-level operations (e.g.
65 * support for "zfs send -R") are too complicated to fit the scope of
66 * libzfs_core. This functionality will continue to live in libzfs.
67 * Where appropriate, libzfs will use the underlying atomic operations
68 * of libzfs_core. For example, libzfs may implement "zfs send -R |
69 * zfs receive" by using individual "send one snapshot", rename,
70 * destroy, and "receive one snapshot" operations in libzfs_core.
71 * /sbin/zfs and /sbin/zpool will link with both libzfs and
72 * libzfs_core. Other consumers should aim to use only libzfs_core,
73 * since that will be the supported, stable interface going forwards.
74 */
75
76 #include <libzfs_core.h>
77 #include <ctype.h>
78 #include <unistd.h>
79 #include <stdlib.h>
80 #include <string.h>
81 #ifdef ZFS_DEBUG
82 #include <stdio.h>
83 #endif
84 #include <errno.h>
85 #include <fcntl.h>
86 #include <pthread.h>
87 #include <sys/nvpair.h>
88 #include <sys/param.h>
89 #include <sys/types.h>
90 #include <sys/stat.h>
91 #include <sys/zfs_ioctl.h>
92
93 static int g_fd = -1;
94 static pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER;
95 static int g_refcount;
96
97 #ifdef ZFS_DEBUG
98 static zfs_ioc_t fail_ioc_cmd;
99 static zfs_errno_t fail_ioc_err;
100
101 static void
102 libzfs_core_debug_ioc(void)
103 {
104 /*
105 * To test running newer user space binaries with kernel's
106 * that don't yet support an ioctl or a new ioctl arg we
107 * provide an override to intentionally fail an ioctl.
108 *
109 * USAGE:
110 * The override variable, ZFS_IOC_TEST, is of the form "cmd:err"
111 *
112 * For example, to fail a ZFS_IOC_POOL_CHECKPOINT with a
113 * ZFS_ERR_IOC_CMD_UNAVAIL, the string would be "0x5a4d:1029"
114 *
115 * $ sudo sh -c "ZFS_IOC_TEST=0x5a4d:1029 zpool checkpoint tank"
116 * cannot checkpoint 'tank': the loaded zfs module does not support
117 * this operation. A reboot may be required to enable this operation.
118 */
119 if (fail_ioc_cmd == 0) {
120 char *ioc_test = getenv("ZFS_IOC_TEST");
121 unsigned int ioc_num = 0, ioc_err = 0;
122
123 if (ioc_test != NULL &&
124 sscanf(ioc_test, "%i:%i", &ioc_num, &ioc_err) == 2 &&
125 ioc_num < ZFS_IOC_LAST) {
126 fail_ioc_cmd = ioc_num;
127 fail_ioc_err = ioc_err;
128 }
129 }
130 }
131 #endif
132
133 int
134 libzfs_core_init(void)
135 {
136 (void) pthread_mutex_lock(&g_lock);
137 if (g_refcount == 0) {
138 g_fd = open(ZFS_DEV, O_RDWR);
139 if (g_fd < 0) {
140 (void) pthread_mutex_unlock(&g_lock);
141 return (errno);
142 }
143 }
144 g_refcount++;
145
146 #ifdef ZFS_DEBUG
147 libzfs_core_debug_ioc();
148 #endif
149 (void) pthread_mutex_unlock(&g_lock);
150 return (0);
151 }
152
153 void
154 libzfs_core_fini(void)
155 {
156 (void) pthread_mutex_lock(&g_lock);
157 ASSERT3S(g_refcount, >, 0);
158
159 if (g_refcount > 0)
160 g_refcount--;
161
162 if (g_refcount == 0 && g_fd != -1) {
163 (void) close(g_fd);
164 g_fd = -1;
165 }
166 (void) pthread_mutex_unlock(&g_lock);
167 }
168
169 static int
170 lzc_ioctl(zfs_ioc_t ioc, const char *name,
171 nvlist_t *source, nvlist_t **resultp)
172 {
173 zfs_cmd_t zc = {"\0"};
174 int error = 0;
175 char *packed = NULL;
176 size_t size = 0;
177
178 ASSERT3S(g_refcount, >, 0);
179 VERIFY3S(g_fd, !=, -1);
180
181 #ifdef ZFS_DEBUG
182 if (ioc == fail_ioc_cmd)
183 return (fail_ioc_err);
184 #endif
185
186 if (name != NULL)
187 (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
188
189 if (source != NULL) {
190 packed = fnvlist_pack(source, &size);
191 zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
192 zc.zc_nvlist_src_size = size;
193 }
194
195 if (resultp != NULL) {
196 *resultp = NULL;
197 if (ioc == ZFS_IOC_CHANNEL_PROGRAM) {
198 zc.zc_nvlist_dst_size = fnvlist_lookup_uint64(source,
199 ZCP_ARG_MEMLIMIT);
200 } else {
201 zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024);
202 }
203 zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
204 malloc(zc.zc_nvlist_dst_size);
205 if (zc.zc_nvlist_dst == (uint64_t)0) {
206 error = ENOMEM;
207 goto out;
208 }
209 }
210
211 while (ioctl(g_fd, ioc, &zc) != 0) {
212 /*
213 * If ioctl exited with ENOMEM, we retry the ioctl after
214 * increasing the size of the destination nvlist.
215 *
216 * Channel programs that exit with ENOMEM ran over the
217 * lua memory sandbox; they should not be retried.
218 */
219 if (errno == ENOMEM && resultp != NULL &&
220 ioc != ZFS_IOC_CHANNEL_PROGRAM) {
221 free((void *)(uintptr_t)zc.zc_nvlist_dst);
222 zc.zc_nvlist_dst_size *= 2;
223 zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
224 malloc(zc.zc_nvlist_dst_size);
225 if (zc.zc_nvlist_dst == (uint64_t)0) {
226 error = ENOMEM;
227 goto out;
228 }
229 } else {
230 error = errno;
231 break;
232 }
233 }
234 if (zc.zc_nvlist_dst_filled) {
235 *resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
236 zc.zc_nvlist_dst_size);
237 }
238
239 out:
240 if (packed != NULL)
241 fnvlist_pack_free(packed, size);
242 free((void *)(uintptr_t)zc.zc_nvlist_dst);
243 return (error);
244 }
245
246 int
247 lzc_create(const char *fsname, enum lzc_dataset_type type, nvlist_t *props,
248 uint8_t *wkeydata, uint_t wkeylen)
249 {
250 int error;
251 nvlist_t *hidden_args = NULL;
252 nvlist_t *args = fnvlist_alloc();
253
254 fnvlist_add_int32(args, "type", (dmu_objset_type_t)type);
255 if (props != NULL)
256 fnvlist_add_nvlist(args, "props", props);
257
258 if (wkeydata != NULL) {
259 hidden_args = fnvlist_alloc();
260 fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata,
261 wkeylen);
262 fnvlist_add_nvlist(args, ZPOOL_HIDDEN_ARGS, hidden_args);
263 }
264
265 error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL);
266 nvlist_free(hidden_args);
267 nvlist_free(args);
268 return (error);
269 }
270
271 int
272 lzc_clone(const char *fsname, const char *origin, nvlist_t *props)
273 {
274 int error;
275 nvlist_t *hidden_args = NULL;
276 nvlist_t *args = fnvlist_alloc();
277
278 fnvlist_add_string(args, "origin", origin);
279 if (props != NULL)
280 fnvlist_add_nvlist(args, "props", props);
281 error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL);
282 nvlist_free(hidden_args);
283 nvlist_free(args);
284 return (error);
285 }
286
287 int
288 lzc_promote(const char *fsname, char *snapnamebuf, int snapnamelen)
289 {
290 /*
291 * The promote ioctl is still legacy, so we need to construct our
292 * own zfs_cmd_t rather than using lzc_ioctl().
293 */
294 zfs_cmd_t zc = { "\0" };
295
296 ASSERT3S(g_refcount, >, 0);
297 VERIFY3S(g_fd, !=, -1);
298
299 (void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name));
300 if (ioctl(g_fd, ZFS_IOC_PROMOTE, &zc) != 0) {
301 int error = errno;
302 if (error == EEXIST && snapnamebuf != NULL)
303 (void) strlcpy(snapnamebuf, zc.zc_string, snapnamelen);
304 return (error);
305 }
306 return (0);
307 }
308
309 int
310 lzc_remap(const char *fsname)
311 {
312 int error;
313 nvlist_t *args = fnvlist_alloc();
314 error = lzc_ioctl(ZFS_IOC_REMAP, fsname, args, NULL);
315 nvlist_free(args);
316 return (error);
317 }
318
319 int
320 lzc_rename(const char *source, const char *target)
321 {
322 zfs_cmd_t zc = { "\0" };
323 int error;
324 ASSERT3S(g_refcount, >, 0);
325 VERIFY3S(g_fd, !=, -1);
326 (void) strlcpy(zc.zc_name, source, sizeof (zc.zc_name));
327 (void) strlcpy(zc.zc_value, target, sizeof (zc.zc_value));
328 error = ioctl(g_fd, ZFS_IOC_RENAME, &zc);
329 if (error != 0)
330 error = errno;
331 return (error);
332 }
333 int
334 lzc_destroy(const char *fsname)
335 {
336 int error;
337 nvlist_t *args = fnvlist_alloc();
338 error = lzc_ioctl(ZFS_IOC_DESTROY, fsname, args, NULL);
339 nvlist_free(args);
340 return (error);
341 }
342
343 /*
344 * Creates snapshots.
345 *
346 * The keys in the snaps nvlist are the snapshots to be created.
347 * They must all be in the same pool.
348 *
349 * The props nvlist is properties to set. Currently only user properties
350 * are supported. { user:prop_name -> string value }
351 *
352 * The returned results nvlist will have an entry for each snapshot that failed.
353 * The value will be the (int32) error code.
354 *
355 * The return value will be 0 if all snapshots were created, otherwise it will
356 * be the errno of a (unspecified) snapshot that failed.
357 */
358 int
359 lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist)
360 {
361 nvpair_t *elem;
362 nvlist_t *args;
363 int error;
364 char pool[ZFS_MAX_DATASET_NAME_LEN];
365
366 *errlist = NULL;
367
368 /* determine the pool name */
369 elem = nvlist_next_nvpair(snaps, NULL);
370 if (elem == NULL)
371 return (0);
372 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
373 pool[strcspn(pool, "/@")] = '\0';
374
375 args = fnvlist_alloc();
376 fnvlist_add_nvlist(args, "snaps", snaps);
377 if (props != NULL)
378 fnvlist_add_nvlist(args, "props", props);
379
380 error = lzc_ioctl(ZFS_IOC_SNAPSHOT, pool, args, errlist);
381 nvlist_free(args);
382
383 return (error);
384 }
385
386 /*
387 * Destroys snapshots.
388 *
389 * The keys in the snaps nvlist are the snapshots to be destroyed.
390 * They must all be in the same pool.
391 *
392 * Snapshots that do not exist will be silently ignored.
393 *
394 * If 'defer' is not set, and a snapshot has user holds or clones, the
395 * destroy operation will fail and none of the snapshots will be
396 * destroyed.
397 *
398 * If 'defer' is set, and a snapshot has user holds or clones, it will be
399 * marked for deferred destruction, and will be destroyed when the last hold
400 * or clone is removed/destroyed.
401 *
402 * The return value will be 0 if all snapshots were destroyed (or marked for
403 * later destruction if 'defer' is set) or didn't exist to begin with.
404 *
405 * Otherwise the return value will be the errno of a (unspecified) snapshot
406 * that failed, no snapshots will be destroyed, and the errlist will have an
407 * entry for each snapshot that failed. The value in the errlist will be
408 * the (int32) error code.
409 */
410 int
411 lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist)
412 {
413 nvpair_t *elem;
414 nvlist_t *args;
415 int error;
416 char pool[ZFS_MAX_DATASET_NAME_LEN];
417
418 /* determine the pool name */
419 elem = nvlist_next_nvpair(snaps, NULL);
420 if (elem == NULL)
421 return (0);
422 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
423 pool[strcspn(pool, "/@")] = '\0';
424
425 args = fnvlist_alloc();
426 fnvlist_add_nvlist(args, "snaps", snaps);
427 if (defer)
428 fnvlist_add_boolean(args, "defer");
429
430 error = lzc_ioctl(ZFS_IOC_DESTROY_SNAPS, pool, args, errlist);
431 nvlist_free(args);
432
433 return (error);
434 }
435
436 int
437 lzc_snaprange_space(const char *firstsnap, const char *lastsnap,
438 uint64_t *usedp)
439 {
440 nvlist_t *args;
441 nvlist_t *result;
442 int err;
443 char fs[ZFS_MAX_DATASET_NAME_LEN];
444 char *atp;
445
446 /* determine the fs name */
447 (void) strlcpy(fs, firstsnap, sizeof (fs));
448 atp = strchr(fs, '@');
449 if (atp == NULL)
450 return (EINVAL);
451 *atp = '\0';
452
453 args = fnvlist_alloc();
454 fnvlist_add_string(args, "firstsnap", firstsnap);
455
456 err = lzc_ioctl(ZFS_IOC_SPACE_SNAPS, lastsnap, args, &result);
457 nvlist_free(args);
458 if (err == 0)
459 *usedp = fnvlist_lookup_uint64(result, "used");
460 fnvlist_free(result);
461
462 return (err);
463 }
464
465 boolean_t
466 lzc_exists(const char *dataset)
467 {
468 /*
469 * The objset_stats ioctl is still legacy, so we need to construct our
470 * own zfs_cmd_t rather than using lzc_ioctl().
471 */
472 zfs_cmd_t zc = {"\0"};
473
474 ASSERT3S(g_refcount, >, 0);
475 VERIFY3S(g_fd, !=, -1);
476
477 (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
478 return (ioctl(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0);
479 }
480
481 /*
482 * outnvl is unused.
483 * It was added to preserve the function signature in case it is
484 * needed in the future.
485 */
486 /*ARGSUSED*/
487 int
488 lzc_sync(const char *pool_name, nvlist_t *innvl, nvlist_t **outnvl)
489 {
490 return (lzc_ioctl(ZFS_IOC_POOL_SYNC, pool_name, innvl, NULL));
491 }
492
493 /*
494 * Create "user holds" on snapshots. If there is a hold on a snapshot,
495 * the snapshot can not be destroyed. (However, it can be marked for deletion
496 * by lzc_destroy_snaps(defer=B_TRUE).)
497 *
498 * The keys in the nvlist are snapshot names.
499 * The snapshots must all be in the same pool.
500 * The value is the name of the hold (string type).
501 *
502 * If cleanup_fd is not -1, it must be the result of open(ZFS_DEV, O_EXCL).
503 * In this case, when the cleanup_fd is closed (including on process
504 * termination), the holds will be released. If the system is shut down
505 * uncleanly, the holds will be released when the pool is next opened
506 * or imported.
507 *
508 * Holds for snapshots which don't exist will be skipped and have an entry
509 * added to errlist, but will not cause an overall failure.
510 *
511 * The return value will be 0 if all holds, for snapshots that existed,
512 * were successfully created.
513 *
514 * Otherwise the return value will be the errno of a (unspecified) hold that
515 * failed and no holds will be created.
516 *
517 * In all cases the errlist will have an entry for each hold that failed
518 * (name = snapshot), with its value being the error code (int32).
519 */
520 int
521 lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist)
522 {
523 char pool[ZFS_MAX_DATASET_NAME_LEN];
524 nvlist_t *args;
525 nvpair_t *elem;
526 int error;
527
528 /* determine the pool name */
529 elem = nvlist_next_nvpair(holds, NULL);
530 if (elem == NULL)
531 return (0);
532 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
533 pool[strcspn(pool, "/@")] = '\0';
534
535 args = fnvlist_alloc();
536 fnvlist_add_nvlist(args, "holds", holds);
537 if (cleanup_fd != -1)
538 fnvlist_add_int32(args, "cleanup_fd", cleanup_fd);
539
540 error = lzc_ioctl(ZFS_IOC_HOLD, pool, args, errlist);
541 nvlist_free(args);
542 return (error);
543 }
544
545 /*
546 * Release "user holds" on snapshots. If the snapshot has been marked for
547 * deferred destroy (by lzc_destroy_snaps(defer=B_TRUE)), it does not have
548 * any clones, and all the user holds are removed, then the snapshot will be
549 * destroyed.
550 *
551 * The keys in the nvlist are snapshot names.
552 * The snapshots must all be in the same pool.
553 * The value is an nvlist whose keys are the holds to remove.
554 *
555 * Holds which failed to release because they didn't exist will have an entry
556 * added to errlist, but will not cause an overall failure.
557 *
558 * The return value will be 0 if the nvl holds was empty or all holds that
559 * existed, were successfully removed.
560 *
561 * Otherwise the return value will be the errno of a (unspecified) hold that
562 * failed to release and no holds will be released.
563 *
564 * In all cases the errlist will have an entry for each hold that failed to
565 * to release.
566 */
567 int
568 lzc_release(nvlist_t *holds, nvlist_t **errlist)
569 {
570 char pool[ZFS_MAX_DATASET_NAME_LEN];
571 nvpair_t *elem;
572
573 /* determine the pool name */
574 elem = nvlist_next_nvpair(holds, NULL);
575 if (elem == NULL)
576 return (0);
577 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
578 pool[strcspn(pool, "/@")] = '\0';
579
580 return (lzc_ioctl(ZFS_IOC_RELEASE, pool, holds, errlist));
581 }
582
583 /*
584 * Retrieve list of user holds on the specified snapshot.
585 *
586 * On success, *holdsp will be set to an nvlist which the caller must free.
587 * The keys are the names of the holds, and the value is the creation time
588 * of the hold (uint64) in seconds since the epoch.
589 */
590 int
591 lzc_get_holds(const char *snapname, nvlist_t **holdsp)
592 {
593 return (lzc_ioctl(ZFS_IOC_GET_HOLDS, snapname, NULL, holdsp));
594 }
595
596 /*
597 * Generate a zfs send stream for the specified snapshot and write it to
598 * the specified file descriptor.
599 *
600 * "snapname" is the full name of the snapshot to send (e.g. "pool/fs@snap")
601 *
602 * If "from" is NULL, a full (non-incremental) stream will be sent.
603 * If "from" is non-NULL, it must be the full name of a snapshot or
604 * bookmark to send an incremental from (e.g. "pool/fs@earlier_snap" or
605 * "pool/fs#earlier_bmark"). If non-NULL, the specified snapshot or
606 * bookmark must represent an earlier point in the history of "snapname").
607 * It can be an earlier snapshot in the same filesystem or zvol as "snapname",
608 * or it can be the origin of "snapname"'s filesystem, or an earlier
609 * snapshot in the origin, etc.
610 *
611 * "fd" is the file descriptor to write the send stream to.
612 *
613 * If "flags" contains LZC_SEND_FLAG_LARGE_BLOCK, the stream is permitted
614 * to contain DRR_WRITE records with drr_length > 128K, and DRR_OBJECT
615 * records with drr_blksz > 128K.
616 *
617 * If "flags" contains LZC_SEND_FLAG_EMBED_DATA, the stream is permitted
618 * to contain DRR_WRITE_EMBEDDED records with drr_etype==BP_EMBEDDED_TYPE_DATA,
619 * which the receiving system must support (as indicated by support
620 * for the "embedded_data" feature).
621 *
622 * If "flags" contains LZC_SEND_FLAG_COMPRESS, the stream is generated by using
623 * compressed WRITE records for blocks which are compressed on disk and in
624 * memory. If the lz4_compress feature is active on the sending system, then
625 * the receiving system must have that feature enabled as well.
626 *
627 * If "flags" contains LZC_SEND_FLAG_RAW, the stream is generated, for encrypted
628 * datasets, by sending data exactly as it exists on disk. This allows backups
629 * to be taken even if encryption keys are not currently loaded.
630 */
631 int
632 lzc_send(const char *snapname, const char *from, int fd,
633 enum lzc_send_flags flags)
634 {
635 return (lzc_send_resume_redacted(snapname, from, fd, flags, 0, 0,
636 NULL));
637 }
638
639 int
640 lzc_send_redacted(const char *snapname, const char *from, int fd,
641 enum lzc_send_flags flags, const char *redactbook)
642 {
643 return (lzc_send_resume_redacted(snapname, from, fd, flags, 0, 0,
644 redactbook));
645 }
646
647 int
648 lzc_send_resume(const char *snapname, const char *from, int fd,
649 enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff)
650 {
651 return (lzc_send_resume_redacted(snapname, from, fd, flags, resumeobj,
652 resumeoff, NULL));
653 }
654
655 /*
656 * snapname: The name of the "tosnap", or the snapshot whose contents we are
657 * sending.
658 * from: The name of the "fromsnap", or the incremental source.
659 * fd: File descriptor to write the stream to.
660 * flags: flags that determine features to be used by the stream.
661 * resumeobj: Object to resume from, for resuming send
662 * resumeoff: Offset to resume from, for resuming send.
663 * redactnv: nvlist of string -> boolean(ignored) containing the names of all
664 * the snapshots that we should redact with respect to.
665 * redactbook: Name of the redaction bookmark to create.
666 */
667 int
668 lzc_send_resume_redacted(const char *snapname, const char *from, int fd,
669 enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff,
670 const char *redactbook)
671 {
672 nvlist_t *args;
673 int err;
674
675 args = fnvlist_alloc();
676 fnvlist_add_int32(args, "fd", fd);
677 if (from != NULL)
678 fnvlist_add_string(args, "fromsnap", from);
679 if (flags & LZC_SEND_FLAG_LARGE_BLOCK)
680 fnvlist_add_boolean(args, "largeblockok");
681 if (flags & LZC_SEND_FLAG_EMBED_DATA)
682 fnvlist_add_boolean(args, "embedok");
683 if (flags & LZC_SEND_FLAG_COMPRESS)
684 fnvlist_add_boolean(args, "compressok");
685 if (flags & LZC_SEND_FLAG_RAW)
686 fnvlist_add_boolean(args, "rawok");
687 if (resumeobj != 0 || resumeoff != 0) {
688 fnvlist_add_uint64(args, "resume_object", resumeobj);
689 fnvlist_add_uint64(args, "resume_offset", resumeoff);
690 }
691 if (redactbook != NULL)
692 fnvlist_add_string(args, "redactbook", redactbook);
693
694 err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL);
695 nvlist_free(args);
696 return (err);
697 }
698
699 /*
700 * "from" can be NULL, a snapshot, or a bookmark.
701 *
702 * If from is NULL, a full (non-incremental) stream will be estimated. This
703 * is calculated very efficiently.
704 *
705 * If from is a snapshot, lzc_send_space uses the deadlists attached to
706 * each snapshot to efficiently estimate the stream size.
707 *
708 * If from is a bookmark, the indirect blocks in the destination snapshot
709 * are traversed, looking for blocks with a birth time since the creation TXG of
710 * the snapshot this bookmark was created from. This will result in
711 * significantly more I/O and be less efficient than a send space estimation on
712 * an equivalent snapshot. This process is also used if redact_snaps is
713 * non-null.
714 */
715 int
716 lzc_send_space_resume_redacted(const char *snapname, const char *from,
717 enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff,
718 uint64_t resume_bytes, const char *redactbook, int fd, uint64_t *spacep)
719 {
720 nvlist_t *args;
721 nvlist_t *result;
722 int err;
723
724 args = fnvlist_alloc();
725 if (from != NULL)
726 fnvlist_add_string(args, "from", from);
727 if (flags & LZC_SEND_FLAG_LARGE_BLOCK)
728 fnvlist_add_boolean(args, "largeblockok");
729 if (flags & LZC_SEND_FLAG_EMBED_DATA)
730 fnvlist_add_boolean(args, "embedok");
731 if (flags & LZC_SEND_FLAG_COMPRESS)
732 fnvlist_add_boolean(args, "compressok");
733 if (flags & LZC_SEND_FLAG_RAW)
734 fnvlist_add_boolean(args, "rawok");
735 if (resumeobj != 0 || resumeoff != 0) {
736 fnvlist_add_uint64(args, "resume_object", resumeobj);
737 fnvlist_add_uint64(args, "resume_offset", resumeoff);
738 fnvlist_add_uint64(args, "bytes", resume_bytes);
739 }
740 if (redactbook != NULL)
741 fnvlist_add_string(args, "redactbook", redactbook);
742 if (fd != -1)
743 fnvlist_add_int32(args, "fd", fd);
744
745 err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result);
746 nvlist_free(args);
747 if (err == 0)
748 *spacep = fnvlist_lookup_uint64(result, "space");
749 nvlist_free(result);
750 return (err);
751 }
752
753 int
754 lzc_send_space(const char *snapname, const char *from,
755 enum lzc_send_flags flags, uint64_t *spacep)
756 {
757 return (lzc_send_space_resume_redacted(snapname, from, flags, 0, 0, 0,
758 NULL, -1, spacep));
759 }
760
761 static int
762 recv_read(int fd, void *buf, int ilen)
763 {
764 char *cp = buf;
765 int rv;
766 int len = ilen;
767
768 do {
769 rv = read(fd, cp, len);
770 cp += rv;
771 len -= rv;
772 } while (rv > 0);
773
774 if (rv < 0 || len != 0)
775 return (EIO);
776
777 return (0);
778 }
779
780 /*
781 * Linux adds ZFS_IOC_RECV_NEW for resumable and raw streams and preserves the
782 * legacy ZFS_IOC_RECV user/kernel interface. The new interface supports all
783 * stream options but is currently only used for resumable streams. This way
784 * updated user space utilities will interoperate with older kernel modules.
785 *
786 * Non-Linux OpenZFS platforms have opted to modify the legacy interface.
787 */
788 static int
789 recv_impl(const char *snapname, nvlist_t *recvdprops, nvlist_t *localprops,
790 uint8_t *wkeydata, uint_t wkeylen, const char *origin, boolean_t force,
791 boolean_t resumable, boolean_t raw, int input_fd,
792 const dmu_replay_record_t *begin_record, int cleanup_fd,
793 uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle,
794 nvlist_t **errors)
795 {
796 dmu_replay_record_t drr;
797 char fsname[MAXPATHLEN];
798 char *atp;
799 int error;
800 boolean_t payload = B_FALSE;
801
802 ASSERT3S(g_refcount, >, 0);
803 VERIFY3S(g_fd, !=, -1);
804
805 /* Set 'fsname' to the name of containing filesystem */
806 (void) strlcpy(fsname, snapname, sizeof (fsname));
807 atp = strchr(fsname, '@');
808 if (atp == NULL)
809 return (EINVAL);
810 *atp = '\0';
811
812 /* If the fs does not exist, try its parent. */
813 if (!lzc_exists(fsname)) {
814 char *slashp = strrchr(fsname, '/');
815 if (slashp == NULL)
816 return (ENOENT);
817 *slashp = '\0';
818 }
819
820 /*
821 * The begin_record is normally a non-byteswapped BEGIN record.
822 * For resumable streams it may be set to any non-byteswapped
823 * dmu_replay_record_t.
824 */
825 if (begin_record == NULL) {
826 error = recv_read(input_fd, &drr, sizeof (drr));
827 if (error != 0)
828 return (error);
829 } else {
830 drr = *begin_record;
831 payload = (begin_record->drr_payloadlen != 0);
832 }
833
834 /*
835 * All recives with a payload should use the new interface.
836 */
837 if (resumable || raw || wkeydata != NULL || payload) {
838 nvlist_t *outnvl = NULL;
839 nvlist_t *innvl = fnvlist_alloc();
840
841 fnvlist_add_string(innvl, "snapname", snapname);
842
843 if (recvdprops != NULL)
844 fnvlist_add_nvlist(innvl, "props", recvdprops);
845
846 if (localprops != NULL)
847 fnvlist_add_nvlist(innvl, "localprops", localprops);
848
849 if (wkeydata != NULL) {
850 /*
851 * wkeydata must be placed in the special
852 * ZPOOL_HIDDEN_ARGS nvlist so that it
853 * will not be printed to the zpool history.
854 */
855 nvlist_t *hidden_args = fnvlist_alloc();
856 fnvlist_add_uint8_array(hidden_args, "wkeydata",
857 wkeydata, wkeylen);
858 fnvlist_add_nvlist(innvl, ZPOOL_HIDDEN_ARGS,
859 hidden_args);
860 nvlist_free(hidden_args);
861 }
862
863 if (origin != NULL && strlen(origin))
864 fnvlist_add_string(innvl, "origin", origin);
865
866 fnvlist_add_byte_array(innvl, "begin_record",
867 (uchar_t *)&drr, sizeof (drr));
868
869 fnvlist_add_int32(innvl, "input_fd", input_fd);
870
871 if (force)
872 fnvlist_add_boolean(innvl, "force");
873
874 if (resumable)
875 fnvlist_add_boolean(innvl, "resumable");
876
877 if (cleanup_fd >= 0)
878 fnvlist_add_int32(innvl, "cleanup_fd", cleanup_fd);
879
880 if (action_handle != NULL)
881 fnvlist_add_uint64(innvl, "action_handle",
882 *action_handle);
883
884 error = lzc_ioctl(ZFS_IOC_RECV_NEW, fsname, innvl, &outnvl);
885
886 if (error == 0 && read_bytes != NULL)
887 error = nvlist_lookup_uint64(outnvl, "read_bytes",
888 read_bytes);
889
890 if (error == 0 && errflags != NULL)
891 error = nvlist_lookup_uint64(outnvl, "error_flags",
892 errflags);
893
894 if (error == 0 && action_handle != NULL)
895 error = nvlist_lookup_uint64(outnvl, "action_handle",
896 action_handle);
897
898 if (error == 0 && errors != NULL) {
899 nvlist_t *nvl;
900 error = nvlist_lookup_nvlist(outnvl, "errors", &nvl);
901 if (error == 0)
902 *errors = fnvlist_dup(nvl);
903 }
904
905 fnvlist_free(innvl);
906 fnvlist_free(outnvl);
907 } else {
908 zfs_cmd_t zc = {"\0"};
909 char *packed = NULL;
910 size_t size;
911
912 ASSERT3S(g_refcount, >, 0);
913
914 (void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name));
915 (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
916
917 if (recvdprops != NULL) {
918 packed = fnvlist_pack(recvdprops, &size);
919 zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
920 zc.zc_nvlist_src_size = size;
921 }
922
923 if (localprops != NULL) {
924 packed = fnvlist_pack(localprops, &size);
925 zc.zc_nvlist_conf = (uint64_t)(uintptr_t)packed;
926 zc.zc_nvlist_conf_size = size;
927 }
928
929 if (origin != NULL)
930 (void) strlcpy(zc.zc_string, origin,
931 sizeof (zc.zc_string));
932
933 ASSERT3S(drr.drr_type, ==, DRR_BEGIN);
934 zc.zc_begin_record = drr.drr_u.drr_begin;
935 zc.zc_guid = force;
936 zc.zc_cookie = input_fd;
937 zc.zc_cleanup_fd = -1;
938 zc.zc_action_handle = 0;
939
940 if (cleanup_fd >= 0)
941 zc.zc_cleanup_fd = cleanup_fd;
942
943 if (action_handle != NULL)
944 zc.zc_action_handle = *action_handle;
945
946 zc.zc_nvlist_dst_size = 128 * 1024;
947 zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
948 malloc(zc.zc_nvlist_dst_size);
949
950 error = ioctl(g_fd, ZFS_IOC_RECV, &zc);
951 if (error != 0) {
952 error = errno;
953 } else {
954 if (read_bytes != NULL)
955 *read_bytes = zc.zc_cookie;
956
957 if (errflags != NULL)
958 *errflags = zc.zc_obj;
959
960 if (action_handle != NULL)
961 *action_handle = zc.zc_action_handle;
962
963 if (errors != NULL)
964 VERIFY0(nvlist_unpack(
965 (void *)(uintptr_t)zc.zc_nvlist_dst,
966 zc.zc_nvlist_dst_size, errors, KM_SLEEP));
967 }
968
969 if (packed != NULL)
970 fnvlist_pack_free(packed, size);
971 free((void *)(uintptr_t)zc.zc_nvlist_dst);
972 }
973
974 return (error);
975 }
976
977 /*
978 * The simplest receive case: receive from the specified fd, creating the
979 * specified snapshot. Apply the specified properties as "received" properties
980 * (which can be overridden by locally-set properties). If the stream is a
981 * clone, its origin snapshot must be specified by 'origin'. The 'force'
982 * flag will cause the target filesystem to be rolled back or destroyed if
983 * necessary to receive.
984 *
985 * Return 0 on success or an errno on failure.
986 *
987 * Note: this interface does not work on dedup'd streams
988 * (those with DMU_BACKUP_FEATURE_DEDUP).
989 */
990 int
991 lzc_receive(const char *snapname, nvlist_t *props, const char *origin,
992 boolean_t force, boolean_t raw, int fd)
993 {
994 return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
995 B_FALSE, raw, fd, NULL, -1, NULL, NULL, NULL, NULL));
996 }
997
998 /*
999 * Like lzc_receive, but if the receive fails due to premature stream
1000 * termination, the intermediate state will be preserved on disk. In this
1001 * case, ECKSUM will be returned. The receive may subsequently be resumed
1002 * with a resuming send stream generated by lzc_send_resume().
1003 */
1004 int
1005 lzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin,
1006 boolean_t force, boolean_t raw, int fd)
1007 {
1008 return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
1009 B_TRUE, raw, fd, NULL, -1, NULL, NULL, NULL, NULL));
1010 }
1011
1012 /*
1013 * Like lzc_receive, but allows the caller to read the begin record and then to
1014 * pass it in. That could be useful if the caller wants to derive, for example,
1015 * the snapname or the origin parameters based on the information contained in
1016 * the begin record.
1017 * The begin record must be in its original form as read from the stream,
1018 * in other words, it should not be byteswapped.
1019 *
1020 * The 'resumable' parameter allows to obtain the same behavior as with
1021 * lzc_receive_resumable.
1022 */
1023 int
1024 lzc_receive_with_header(const char *snapname, nvlist_t *props,
1025 const char *origin, boolean_t force, boolean_t resumable, boolean_t raw,
1026 int fd, const dmu_replay_record_t *begin_record)
1027 {
1028 if (begin_record == NULL)
1029 return (EINVAL);
1030
1031 return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
1032 resumable, raw, fd, begin_record, -1, NULL, NULL, NULL, NULL));
1033 }
1034
1035 /*
1036 * Like lzc_receive, but allows the caller to pass all supported arguments
1037 * and retrieve all values returned. The only additional input parameter
1038 * is 'cleanup_fd' which is used to set a cleanup-on-exit file descriptor.
1039 *
1040 * The following parameters all provide return values. Several may be set
1041 * in the failure case and will contain additional information.
1042 *
1043 * The 'read_bytes' value will be set to the total number of bytes read.
1044 *
1045 * The 'errflags' value will contain zprop_errflags_t flags which are
1046 * used to describe any failures.
1047 *
1048 * The 'action_handle' is used to pass the handle for this guid/ds mapping.
1049 * It should be set to zero on first call and will contain an updated handle
1050 * on success, it should be passed in subsequent calls.
1051 *
1052 * The 'errors' nvlist contains an entry for each unapplied received
1053 * property. Callers are responsible for freeing this nvlist.
1054 */
1055 int lzc_receive_one(const char *snapname, nvlist_t *props,
1056 const char *origin, boolean_t force, boolean_t resumable, boolean_t raw,
1057 int input_fd, const dmu_replay_record_t *begin_record, int cleanup_fd,
1058 uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle,
1059 nvlist_t **errors)
1060 {
1061 return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
1062 resumable, raw, input_fd, begin_record, cleanup_fd, read_bytes,
1063 errflags, action_handle, errors));
1064 }
1065
1066 /*
1067 * Like lzc_receive_one, but allows the caller to pass an additional 'cmdprops'
1068 * argument.
1069 *
1070 * The 'cmdprops' nvlist contains both override ('zfs receive -o') and
1071 * exclude ('zfs receive -x') properties. Callers are responsible for freeing
1072 * this nvlist
1073 */
1074 int lzc_receive_with_cmdprops(const char *snapname, nvlist_t *props,
1075 nvlist_t *cmdprops, uint8_t *wkeydata, uint_t wkeylen, const char *origin,
1076 boolean_t force, boolean_t resumable, boolean_t raw, int input_fd,
1077 const dmu_replay_record_t *begin_record, int cleanup_fd,
1078 uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle,
1079 nvlist_t **errors)
1080 {
1081 return (recv_impl(snapname, props, cmdprops, wkeydata, wkeylen, origin,
1082 force, resumable, raw, input_fd, begin_record, cleanup_fd,
1083 read_bytes, errflags, action_handle, errors));
1084 }
1085
1086 /*
1087 * Roll back this filesystem or volume to its most recent snapshot.
1088 * If snapnamebuf is not NULL, it will be filled in with the name
1089 * of the most recent snapshot.
1090 * Note that the latest snapshot may change if a new one is concurrently
1091 * created or the current one is destroyed. lzc_rollback_to can be used
1092 * to roll back to a specific latest snapshot.
1093 *
1094 * Return 0 on success or an errno on failure.
1095 */
1096 int
1097 lzc_rollback(const char *fsname, char *snapnamebuf, int snapnamelen)
1098 {
1099 nvlist_t *args;
1100 nvlist_t *result;
1101 int err;
1102
1103 args = fnvlist_alloc();
1104 err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result);
1105 nvlist_free(args);
1106 if (err == 0 && snapnamebuf != NULL) {
1107 const char *snapname = fnvlist_lookup_string(result, "target");
1108 (void) strlcpy(snapnamebuf, snapname, snapnamelen);
1109 }
1110 nvlist_free(result);
1111
1112 return (err);
1113 }
1114
1115 /*
1116 * Roll back this filesystem or volume to the specified snapshot,
1117 * if possible.
1118 *
1119 * Return 0 on success or an errno on failure.
1120 */
1121 int
1122 lzc_rollback_to(const char *fsname, const char *snapname)
1123 {
1124 nvlist_t *args;
1125 nvlist_t *result;
1126 int err;
1127
1128 args = fnvlist_alloc();
1129 fnvlist_add_string(args, "target", snapname);
1130 err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result);
1131 nvlist_free(args);
1132 nvlist_free(result);
1133 return (err);
1134 }
1135
1136 /*
1137 * Creates bookmarks.
1138 *
1139 * The bookmarks nvlist maps from name of the bookmark (e.g. "pool/fs#bmark") to
1140 * the name of the snapshot (e.g. "pool/fs@snap"). All the bookmarks and
1141 * snapshots must be in the same pool.
1142 *
1143 * The returned results nvlist will have an entry for each bookmark that failed.
1144 * The value will be the (int32) error code.
1145 *
1146 * The return value will be 0 if all bookmarks were created, otherwise it will
1147 * be the errno of a (undetermined) bookmarks that failed.
1148 */
1149 int
1150 lzc_bookmark(nvlist_t *bookmarks, nvlist_t **errlist)
1151 {
1152 nvpair_t *elem;
1153 int error;
1154 char pool[ZFS_MAX_DATASET_NAME_LEN];
1155
1156 /* determine the pool name */
1157 elem = nvlist_next_nvpair(bookmarks, NULL);
1158 if (elem == NULL)
1159 return (0);
1160 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
1161 pool[strcspn(pool, "/#")] = '\0';
1162
1163 error = lzc_ioctl(ZFS_IOC_BOOKMARK, pool, bookmarks, errlist);
1164
1165 return (error);
1166 }
1167
1168 /*
1169 * Retrieve bookmarks.
1170 *
1171 * Retrieve the list of bookmarks for the given file system. The props
1172 * parameter is an nvlist of property names (with no values) that will be
1173 * returned for each bookmark.
1174 *
1175 * The following are valid properties on bookmarks, most of which are numbers
1176 * (represented as uint64 in the nvlist), except redact_snaps, which is a
1177 * uint64 array, and redact_complete, which is a boolean
1178 *
1179 * "guid" - globally unique identifier of the snapshot it refers to
1180 * "createtxg" - txg when the snapshot it refers to was created
1181 * "creation" - timestamp when the snapshot it refers to was created
1182 * "ivsetguid" - IVset guid for identifying encrypted snapshots
1183 * "redact_snaps" - list of guids of the redaction snapshots for the specified
1184 * bookmark. If the bookmark is not a redaction bookmark, the nvlist will
1185 * not contain an entry for this value. If it is redacted with respect to
1186 * no snapshots, it will contain value -> NULL uint64 array
1187 * "redact_complete" - boolean value; true if the redaction bookmark is
1188 * complete, false otherwise.
1189 *
1190 * The format of the returned nvlist as follows:
1191 * <short name of bookmark> -> {
1192 * <name of property> -> {
1193 * "value" -> uint64
1194 * }
1195 * ...
1196 * "redact_snaps" -> {
1197 * "value" -> uint64 array
1198 * }
1199 * "redact_complete" -> {
1200 * "value" -> boolean value
1201 * }
1202 * }
1203 */
1204 int
1205 lzc_get_bookmarks(const char *fsname, nvlist_t *props, nvlist_t **bmarks)
1206 {
1207 return (lzc_ioctl(ZFS_IOC_GET_BOOKMARKS, fsname, props, bmarks));
1208 }
1209
1210 /*
1211 * Get bookmark properties.
1212 *
1213 * Given a bookmark's full name, retrieve all properties for the bookmark.
1214 *
1215 * The format of the returned property list is as follows:
1216 * {
1217 * <name of property> -> {
1218 * "value" -> uint64
1219 * }
1220 * ...
1221 * "redact_snaps" -> {
1222 * "value" -> uint64 array
1223 * }
1224 */
1225 int
1226 lzc_get_bookmark_props(const char *bookmark, nvlist_t **props)
1227 {
1228 int error;
1229
1230 nvlist_t *innvl = fnvlist_alloc();
1231 error = lzc_ioctl(ZFS_IOC_GET_BOOKMARK_PROPS, bookmark, innvl, props);
1232 fnvlist_free(innvl);
1233
1234 return (error);
1235 }
1236
1237 /*
1238 * Destroys bookmarks.
1239 *
1240 * The keys in the bmarks nvlist are the bookmarks to be destroyed.
1241 * They must all be in the same pool. Bookmarks are specified as
1242 * <fs>#<bmark>.
1243 *
1244 * Bookmarks that do not exist will be silently ignored.
1245 *
1246 * The return value will be 0 if all bookmarks that existed were destroyed.
1247 *
1248 * Otherwise the return value will be the errno of a (undetermined) bookmark
1249 * that failed, no bookmarks will be destroyed, and the errlist will have an
1250 * entry for each bookmarks that failed. The value in the errlist will be
1251 * the (int32) error code.
1252 */
1253 int
1254 lzc_destroy_bookmarks(nvlist_t *bmarks, nvlist_t **errlist)
1255 {
1256 nvpair_t *elem;
1257 int error;
1258 char pool[ZFS_MAX_DATASET_NAME_LEN];
1259
1260 /* determine the pool name */
1261 elem = nvlist_next_nvpair(bmarks, NULL);
1262 if (elem == NULL)
1263 return (0);
1264 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
1265 pool[strcspn(pool, "/#")] = '\0';
1266
1267 error = lzc_ioctl(ZFS_IOC_DESTROY_BOOKMARKS, pool, bmarks, errlist);
1268
1269 return (error);
1270 }
1271
1272 static int
1273 lzc_channel_program_impl(const char *pool, const char *program, boolean_t sync,
1274 uint64_t instrlimit, uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl)
1275 {
1276 int error;
1277 nvlist_t *args;
1278
1279 args = fnvlist_alloc();
1280 fnvlist_add_string(args, ZCP_ARG_PROGRAM, program);
1281 fnvlist_add_nvlist(args, ZCP_ARG_ARGLIST, argnvl);
1282 fnvlist_add_boolean_value(args, ZCP_ARG_SYNC, sync);
1283 fnvlist_add_uint64(args, ZCP_ARG_INSTRLIMIT, instrlimit);
1284 fnvlist_add_uint64(args, ZCP_ARG_MEMLIMIT, memlimit);
1285 error = lzc_ioctl(ZFS_IOC_CHANNEL_PROGRAM, pool, args, outnvl);
1286 fnvlist_free(args);
1287
1288 return (error);
1289 }
1290
1291 /*
1292 * Executes a channel program.
1293 *
1294 * If this function returns 0 the channel program was successfully loaded and
1295 * ran without failing. Note that individual commands the channel program ran
1296 * may have failed and the channel program is responsible for reporting such
1297 * errors through outnvl if they are important.
1298 *
1299 * This method may also return:
1300 *
1301 * EINVAL The program contains syntax errors, or an invalid memory or time
1302 * limit was given. No part of the channel program was executed.
1303 * If caused by syntax errors, 'outnvl' contains information about the
1304 * errors.
1305 *
1306 * ECHRNG The program was executed, but encountered a runtime error, such as
1307 * calling a function with incorrect arguments, invoking the error()
1308 * function directly, failing an assert() command, etc. Some portion
1309 * of the channel program may have executed and committed changes.
1310 * Information about the failure can be found in 'outnvl'.
1311 *
1312 * ENOMEM The program fully executed, but the output buffer was not large
1313 * enough to store the returned value. No output is returned through
1314 * 'outnvl'.
1315 *
1316 * ENOSPC The program was terminated because it exceeded its memory usage
1317 * limit. Some portion of the channel program may have executed and
1318 * committed changes to disk. No output is returned through 'outnvl'.
1319 *
1320 * ETIME The program was terminated because it exceeded its Lua instruction
1321 * limit. Some portion of the channel program may have executed and
1322 * committed changes to disk. No output is returned through 'outnvl'.
1323 */
1324 int
1325 lzc_channel_program(const char *pool, const char *program, uint64_t instrlimit,
1326 uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl)
1327 {
1328 return (lzc_channel_program_impl(pool, program, B_TRUE, instrlimit,
1329 memlimit, argnvl, outnvl));
1330 }
1331
1332 /*
1333 * Creates a checkpoint for the specified pool.
1334 *
1335 * If this function returns 0 the pool was successfully checkpointed.
1336 *
1337 * This method may also return:
1338 *
1339 * ZFS_ERR_CHECKPOINT_EXISTS
1340 * The pool already has a checkpoint. A pools can only have one
1341 * checkpoint at most, at any given time.
1342 *
1343 * ZFS_ERR_DISCARDING_CHECKPOINT
1344 * ZFS is in the middle of discarding a checkpoint for this pool.
1345 * The pool can be checkpointed again once the discard is done.
1346 *
1347 * ZFS_DEVRM_IN_PROGRESS
1348 * A vdev is currently being removed. The pool cannot be
1349 * checkpointed until the device removal is done.
1350 *
1351 * ZFS_VDEV_TOO_BIG
1352 * One or more top-level vdevs exceed the maximum vdev size
1353 * supported for this feature.
1354 */
1355 int
1356 lzc_pool_checkpoint(const char *pool)
1357 {
1358 int error;
1359
1360 nvlist_t *result = NULL;
1361 nvlist_t *args = fnvlist_alloc();
1362
1363 error = lzc_ioctl(ZFS_IOC_POOL_CHECKPOINT, pool, args, &result);
1364
1365 fnvlist_free(args);
1366 fnvlist_free(result);
1367
1368 return (error);
1369 }
1370
1371 /*
1372 * Discard the checkpoint from the specified pool.
1373 *
1374 * If this function returns 0 the checkpoint was successfully discarded.
1375 *
1376 * This method may also return:
1377 *
1378 * ZFS_ERR_NO_CHECKPOINT
1379 * The pool does not have a checkpoint.
1380 *
1381 * ZFS_ERR_DISCARDING_CHECKPOINT
1382 * ZFS is already in the middle of discarding the checkpoint.
1383 */
1384 int
1385 lzc_pool_checkpoint_discard(const char *pool)
1386 {
1387 int error;
1388
1389 nvlist_t *result = NULL;
1390 nvlist_t *args = fnvlist_alloc();
1391
1392 error = lzc_ioctl(ZFS_IOC_POOL_DISCARD_CHECKPOINT, pool, args, &result);
1393
1394 fnvlist_free(args);
1395 fnvlist_free(result);
1396
1397 return (error);
1398 }
1399
1400 /*
1401 * Executes a read-only channel program.
1402 *
1403 * A read-only channel program works programmatically the same way as a
1404 * normal channel program executed with lzc_channel_program(). The only
1405 * difference is it runs exclusively in open-context and therefore can
1406 * return faster. The downside to that, is that the program cannot change
1407 * on-disk state by calling functions from the zfs.sync submodule.
1408 *
1409 * The return values of this function (and their meaning) are exactly the
1410 * same as the ones described in lzc_channel_program().
1411 */
1412 int
1413 lzc_channel_program_nosync(const char *pool, const char *program,
1414 uint64_t timeout, uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl)
1415 {
1416 return (lzc_channel_program_impl(pool, program, B_FALSE, timeout,
1417 memlimit, argnvl, outnvl));
1418 }
1419
1420 /*
1421 * Performs key management functions
1422 *
1423 * crypto_cmd should be a value from dcp_cmd_t. If the command specifies to
1424 * load or change a wrapping key, the key should be specified in the
1425 * hidden_args nvlist so that it is not logged.
1426 */
1427 int
1428 lzc_load_key(const char *fsname, boolean_t noop, uint8_t *wkeydata,
1429 uint_t wkeylen)
1430 {
1431 int error;
1432 nvlist_t *ioc_args;
1433 nvlist_t *hidden_args;
1434
1435 if (wkeydata == NULL)
1436 return (EINVAL);
1437
1438 ioc_args = fnvlist_alloc();
1439 hidden_args = fnvlist_alloc();
1440 fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata, wkeylen);
1441 fnvlist_add_nvlist(ioc_args, ZPOOL_HIDDEN_ARGS, hidden_args);
1442 if (noop)
1443 fnvlist_add_boolean(ioc_args, "noop");
1444 error = lzc_ioctl(ZFS_IOC_LOAD_KEY, fsname, ioc_args, NULL);
1445 nvlist_free(hidden_args);
1446 nvlist_free(ioc_args);
1447
1448 return (error);
1449 }
1450
1451 int
1452 lzc_unload_key(const char *fsname)
1453 {
1454 return (lzc_ioctl(ZFS_IOC_UNLOAD_KEY, fsname, NULL, NULL));
1455 }
1456
1457 int
1458 lzc_change_key(const char *fsname, uint64_t crypt_cmd, nvlist_t *props,
1459 uint8_t *wkeydata, uint_t wkeylen)
1460 {
1461 int error;
1462 nvlist_t *ioc_args = fnvlist_alloc();
1463 nvlist_t *hidden_args = NULL;
1464
1465 fnvlist_add_uint64(ioc_args, "crypt_cmd", crypt_cmd);
1466
1467 if (wkeydata != NULL) {
1468 hidden_args = fnvlist_alloc();
1469 fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata,
1470 wkeylen);
1471 fnvlist_add_nvlist(ioc_args, ZPOOL_HIDDEN_ARGS, hidden_args);
1472 }
1473
1474 if (props != NULL)
1475 fnvlist_add_nvlist(ioc_args, "props", props);
1476
1477 error = lzc_ioctl(ZFS_IOC_CHANGE_KEY, fsname, ioc_args, NULL);
1478 nvlist_free(hidden_args);
1479 nvlist_free(ioc_args);
1480
1481 return (error);
1482 }
1483
1484 int
1485 lzc_reopen(const char *pool_name, boolean_t scrub_restart)
1486 {
1487 nvlist_t *args = fnvlist_alloc();
1488 int error;
1489
1490 fnvlist_add_boolean_value(args, "scrub_restart", scrub_restart);
1491
1492 error = lzc_ioctl(ZFS_IOC_POOL_REOPEN, pool_name, args, NULL);
1493 nvlist_free(args);
1494 return (error);
1495 }
1496
1497 /*
1498 * Changes initializing state.
1499 *
1500 * vdevs should be a list of (<key>, guid) where guid is a uint64 vdev GUID.
1501 * The key is ignored.
1502 *
1503 * If there are errors related to vdev arguments, per-vdev errors are returned
1504 * in an nvlist with the key "vdevs". Each error is a (guid, errno) pair where
1505 * guid is stringified with PRIu64, and errno is one of the following as
1506 * an int64_t:
1507 * - ENODEV if the device was not found
1508 * - EINVAL if the devices is not a leaf or is not concrete (e.g. missing)
1509 * - EROFS if the device is not writeable
1510 * - EBUSY start requested but the device is already being either
1511 * initialized or trimmed
1512 * - ESRCH cancel/suspend requested but device is not being initialized
1513 *
1514 * If the errlist is empty, then return value will be:
1515 * - EINVAL if one or more arguments was invalid
1516 * - Other spa_open failures
1517 * - 0 if the operation succeeded
1518 */
1519 int
1520 lzc_initialize(const char *poolname, pool_initialize_func_t cmd_type,
1521 nvlist_t *vdevs, nvlist_t **errlist)
1522 {
1523 int error;
1524
1525 nvlist_t *args = fnvlist_alloc();
1526 fnvlist_add_uint64(args, ZPOOL_INITIALIZE_COMMAND, (uint64_t)cmd_type);
1527 fnvlist_add_nvlist(args, ZPOOL_INITIALIZE_VDEVS, vdevs);
1528
1529 error = lzc_ioctl(ZFS_IOC_POOL_INITIALIZE, poolname, args, errlist);
1530
1531 fnvlist_free(args);
1532
1533 return (error);
1534 }
1535
1536 /*
1537 * Changes TRIM state.
1538 *
1539 * vdevs should be a list of (<key>, guid) where guid is a uint64 vdev GUID.
1540 * The key is ignored.
1541 *
1542 * If there are errors related to vdev arguments, per-vdev errors are returned
1543 * in an nvlist with the key "vdevs". Each error is a (guid, errno) pair where
1544 * guid is stringified with PRIu64, and errno is one of the following as
1545 * an int64_t:
1546 * - ENODEV if the device was not found
1547 * - EINVAL if the devices is not a leaf or is not concrete (e.g. missing)
1548 * - EROFS if the device is not writeable
1549 * - EBUSY start requested but the device is already being either trimmed
1550 * or initialized
1551 * - ESRCH cancel/suspend requested but device is not being initialized
1552 * - EOPNOTSUPP if the device does not support TRIM (or secure TRIM)
1553 *
1554 * If the errlist is empty, then return value will be:
1555 * - EINVAL if one or more arguments was invalid
1556 * - Other spa_open failures
1557 * - 0 if the operation succeeded
1558 */
1559 int
1560 lzc_trim(const char *poolname, pool_trim_func_t cmd_type, uint64_t rate,
1561 boolean_t secure, nvlist_t *vdevs, nvlist_t **errlist)
1562 {
1563 int error;
1564
1565 nvlist_t *args = fnvlist_alloc();
1566 fnvlist_add_uint64(args, ZPOOL_TRIM_COMMAND, (uint64_t)cmd_type);
1567 fnvlist_add_nvlist(args, ZPOOL_TRIM_VDEVS, vdevs);
1568 fnvlist_add_uint64(args, ZPOOL_TRIM_RATE, rate);
1569 fnvlist_add_boolean_value(args, ZPOOL_TRIM_SECURE, secure);
1570
1571 error = lzc_ioctl(ZFS_IOC_POOL_TRIM, poolname, args, errlist);
1572
1573 fnvlist_free(args);
1574
1575 return (error);
1576 }
1577
1578 /*
1579 * Create a redaction bookmark named bookname by redacting snapshot with respect
1580 * to all the snapshots in snapnv.
1581 */
1582 int
1583 lzc_redact(const char *snapshot, const char *bookname, nvlist_t *snapnv)
1584 {
1585 nvlist_t *args = fnvlist_alloc();
1586 fnvlist_add_string(args, "bookname", bookname);
1587 fnvlist_add_nvlist(args, "snapnv", snapnv);
1588 int error = lzc_ioctl(ZFS_IOC_REDACT, snapshot, args, NULL);
1589 fnvlist_free(args);
1590 return (error);
1591 }