]> git.proxmox.com Git - mirror_zfs.git/blob - lib/libzfs_core/libzfs_core.c
Fix typo "/zbin/zpool" -> "/sbin/zpool"
[mirror_zfs.git] / lib / libzfs_core / libzfs_core.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
24 * Copyright (c) 2013 Steven Hartland. All rights reserved.
25 * Copyright (c) 2017 Datto Inc.
26 * Copyright 2017 RackTop Systems.
27 * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
28 */
29
30 /*
31 * LibZFS_Core (lzc) is intended to replace most functionality in libzfs.
32 * It has the following characteristics:
33 *
34 * - Thread Safe. libzfs_core is accessible concurrently from multiple
35 * threads. This is accomplished primarily by avoiding global data
36 * (e.g. caching). Since it's thread-safe, there is no reason for a
37 * process to have multiple libzfs "instances". Therefore, we store
38 * our few pieces of data (e.g. the file descriptor) in global
39 * variables. The fd is reference-counted so that the libzfs_core
40 * library can be "initialized" multiple times (e.g. by different
41 * consumers within the same process).
42 *
43 * - Committed Interface. The libzfs_core interface will be committed,
44 * therefore consumers can compile against it and be confident that
45 * their code will continue to work on future releases of this code.
46 * Currently, the interface is Evolving (not Committed), but we intend
47 * to commit to it once it is more complete and we determine that it
48 * meets the needs of all consumers.
49 *
50 * - Programmatic Error Handling. libzfs_core communicates errors with
51 * defined error numbers, and doesn't print anything to stdout/stderr.
52 *
53 * - Thin Layer. libzfs_core is a thin layer, marshaling arguments
54 * to/from the kernel ioctls. There is generally a 1:1 correspondence
55 * between libzfs_core functions and ioctls to /dev/zfs.
56 *
57 * - Clear Atomicity. Because libzfs_core functions are generally 1:1
58 * with kernel ioctls, and kernel ioctls are general atomic, each
59 * libzfs_core function is atomic. For example, creating multiple
60 * snapshots with a single call to lzc_snapshot() is atomic -- it
61 * can't fail with only some of the requested snapshots created, even
62 * in the event of power loss or system crash.
63 *
64 * - Continued libzfs Support. Some higher-level operations (e.g.
65 * support for "zfs send -R") are too complicated to fit the scope of
66 * libzfs_core. This functionality will continue to live in libzfs.
67 * Where appropriate, libzfs will use the underlying atomic operations
68 * of libzfs_core. For example, libzfs may implement "zfs send -R |
69 * zfs receive" by using individual "send one snapshot", rename,
70 * destroy, and "receive one snapshot" operations in libzfs_core.
71 * /sbin/zfs and /sbin/zpool will link with both libzfs and
72 * libzfs_core. Other consumers should aim to use only libzfs_core,
73 * since that will be the supported, stable interface going forwards.
74 */
75
76 #include <libzfs_core.h>
77 #include <ctype.h>
78 #include <unistd.h>
79 #include <stdlib.h>
80 #include <string.h>
81 #ifdef ZFS_DEBUG
82 #include <stdio.h>
83 #endif
84 #include <errno.h>
85 #include <fcntl.h>
86 #include <pthread.h>
87 #include <sys/nvpair.h>
88 #include <sys/param.h>
89 #include <sys/types.h>
90 #include <sys/stat.h>
91 #include <sys/zfs_ioctl.h>
92
93 static int g_fd = -1;
94 static pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER;
95 static int g_refcount;
96
97 #ifdef ZFS_DEBUG
98 static zfs_ioc_t fail_ioc_cmd;
99 static zfs_errno_t fail_ioc_err;
100
101 static void
102 libzfs_core_debug_ioc(void)
103 {
104 /*
105 * To test running newer user space binaries with kernel's
106 * that don't yet support an ioctl or a new ioctl arg we
107 * provide an override to intentionally fail an ioctl.
108 *
109 * USAGE:
110 * The override variable, ZFS_IOC_TEST, is of the form "cmd:err"
111 *
112 * For example, to fail a ZFS_IOC_POOL_CHECKPOINT with a
113 * ZFS_ERR_IOC_CMD_UNAVAIL, the string would be "0x5a4d:1029"
114 *
115 * $ sudo sh -c "ZFS_IOC_TEST=0x5a4d:1029 zpool checkpoint tank"
116 * cannot checkpoint 'tank': the loaded zfs module does not support
117 * this operation. A reboot may be required to enable this operation.
118 */
119 if (fail_ioc_cmd == 0) {
120 char *ioc_test = getenv("ZFS_IOC_TEST");
121 unsigned int ioc_num = 0, ioc_err = 0;
122
123 if (ioc_test != NULL &&
124 sscanf(ioc_test, "%i:%i", &ioc_num, &ioc_err) == 2 &&
125 ioc_num < ZFS_IOC_LAST) {
126 fail_ioc_cmd = ioc_num;
127 fail_ioc_err = ioc_err;
128 }
129 }
130 }
131 #endif
132
133 int
134 libzfs_core_init(void)
135 {
136 (void) pthread_mutex_lock(&g_lock);
137 if (g_refcount == 0) {
138 g_fd = open("/dev/zfs", O_RDWR);
139 if (g_fd < 0) {
140 (void) pthread_mutex_unlock(&g_lock);
141 return (errno);
142 }
143 }
144 g_refcount++;
145
146 #ifdef ZFS_DEBUG
147 libzfs_core_debug_ioc();
148 #endif
149 (void) pthread_mutex_unlock(&g_lock);
150 return (0);
151 }
152
153 void
154 libzfs_core_fini(void)
155 {
156 (void) pthread_mutex_lock(&g_lock);
157 ASSERT3S(g_refcount, >, 0);
158
159 if (g_refcount > 0)
160 g_refcount--;
161
162 if (g_refcount == 0 && g_fd != -1) {
163 (void) close(g_fd);
164 g_fd = -1;
165 }
166 (void) pthread_mutex_unlock(&g_lock);
167 }
168
169 static int
170 lzc_ioctl(zfs_ioc_t ioc, const char *name,
171 nvlist_t *source, nvlist_t **resultp)
172 {
173 zfs_cmd_t zc = {"\0"};
174 int error = 0;
175 char *packed = NULL;
176 size_t size = 0;
177
178 ASSERT3S(g_refcount, >, 0);
179 VERIFY3S(g_fd, !=, -1);
180
181 #ifdef ZFS_DEBUG
182 if (ioc == fail_ioc_cmd)
183 return (fail_ioc_err);
184 #endif
185
186 if (name != NULL)
187 (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
188
189 if (source != NULL) {
190 packed = fnvlist_pack(source, &size);
191 zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
192 zc.zc_nvlist_src_size = size;
193 }
194
195 if (resultp != NULL) {
196 *resultp = NULL;
197 if (ioc == ZFS_IOC_CHANNEL_PROGRAM) {
198 zc.zc_nvlist_dst_size = fnvlist_lookup_uint64(source,
199 ZCP_ARG_MEMLIMIT);
200 } else {
201 zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024);
202 }
203 zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
204 malloc(zc.zc_nvlist_dst_size);
205 if (zc.zc_nvlist_dst == (uint64_t)0) {
206 error = ENOMEM;
207 goto out;
208 }
209 }
210
211 while (ioctl(g_fd, ioc, &zc) != 0) {
212 /*
213 * If ioctl exited with ENOMEM, we retry the ioctl after
214 * increasing the size of the destination nvlist.
215 *
216 * Channel programs that exit with ENOMEM ran over the
217 * lua memory sandbox; they should not be retried.
218 */
219 if (errno == ENOMEM && resultp != NULL &&
220 ioc != ZFS_IOC_CHANNEL_PROGRAM) {
221 free((void *)(uintptr_t)zc.zc_nvlist_dst);
222 zc.zc_nvlist_dst_size *= 2;
223 zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
224 malloc(zc.zc_nvlist_dst_size);
225 if (zc.zc_nvlist_dst == (uint64_t)0) {
226 error = ENOMEM;
227 goto out;
228 }
229 } else {
230 error = errno;
231 break;
232 }
233 }
234 if (zc.zc_nvlist_dst_filled) {
235 *resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
236 zc.zc_nvlist_dst_size);
237 }
238
239 out:
240 if (packed != NULL)
241 fnvlist_pack_free(packed, size);
242 free((void *)(uintptr_t)zc.zc_nvlist_dst);
243 return (error);
244 }
245
246 int
247 lzc_create(const char *fsname, enum lzc_dataset_type type, nvlist_t *props,
248 uint8_t *wkeydata, uint_t wkeylen)
249 {
250 int error;
251 nvlist_t *hidden_args = NULL;
252 nvlist_t *args = fnvlist_alloc();
253
254 fnvlist_add_int32(args, "type", (dmu_objset_type_t)type);
255 if (props != NULL)
256 fnvlist_add_nvlist(args, "props", props);
257
258 if (wkeydata != NULL) {
259 hidden_args = fnvlist_alloc();
260 fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata,
261 wkeylen);
262 fnvlist_add_nvlist(args, ZPOOL_HIDDEN_ARGS, hidden_args);
263 }
264
265 error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL);
266 nvlist_free(hidden_args);
267 nvlist_free(args);
268 return (error);
269 }
270
271 int
272 lzc_clone(const char *fsname, const char *origin, nvlist_t *props)
273 {
274 int error;
275 nvlist_t *hidden_args = NULL;
276 nvlist_t *args = fnvlist_alloc();
277
278 fnvlist_add_string(args, "origin", origin);
279 if (props != NULL)
280 fnvlist_add_nvlist(args, "props", props);
281 error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL);
282 nvlist_free(hidden_args);
283 nvlist_free(args);
284 return (error);
285 }
286
287 int
288 lzc_promote(const char *fsname, char *snapnamebuf, int snapnamelen)
289 {
290 /*
291 * The promote ioctl is still legacy, so we need to construct our
292 * own zfs_cmd_t rather than using lzc_ioctl().
293 */
294 zfs_cmd_t zc = { "\0" };
295
296 ASSERT3S(g_refcount, >, 0);
297 VERIFY3S(g_fd, !=, -1);
298
299 (void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name));
300 if (ioctl(g_fd, ZFS_IOC_PROMOTE, &zc) != 0) {
301 int error = errno;
302 if (error == EEXIST && snapnamebuf != NULL)
303 (void) strlcpy(snapnamebuf, zc.zc_string, snapnamelen);
304 return (error);
305 }
306 return (0);
307 }
308
309 int
310 lzc_remap(const char *fsname)
311 {
312 int error;
313 nvlist_t *args = fnvlist_alloc();
314 error = lzc_ioctl(ZFS_IOC_REMAP, fsname, args, NULL);
315 nvlist_free(args);
316 return (error);
317 }
318
319 int
320 lzc_rename(const char *source, const char *target)
321 {
322 zfs_cmd_t zc = { "\0" };
323 int error;
324 ASSERT3S(g_refcount, >, 0);
325 VERIFY3S(g_fd, !=, -1);
326 (void) strlcpy(zc.zc_name, source, sizeof (zc.zc_name));
327 (void) strlcpy(zc.zc_value, target, sizeof (zc.zc_value));
328 error = ioctl(g_fd, ZFS_IOC_RENAME, &zc);
329 if (error != 0)
330 error = errno;
331 return (error);
332 }
333 int
334 lzc_destroy(const char *fsname)
335 {
336 int error;
337 nvlist_t *args = fnvlist_alloc();
338 error = lzc_ioctl(ZFS_IOC_DESTROY, fsname, args, NULL);
339 nvlist_free(args);
340 return (error);
341 }
342
343 /*
344 * Creates snapshots.
345 *
346 * The keys in the snaps nvlist are the snapshots to be created.
347 * They must all be in the same pool.
348 *
349 * The props nvlist is properties to set. Currently only user properties
350 * are supported. { user:prop_name -> string value }
351 *
352 * The returned results nvlist will have an entry for each snapshot that failed.
353 * The value will be the (int32) error code.
354 *
355 * The return value will be 0 if all snapshots were created, otherwise it will
356 * be the errno of a (unspecified) snapshot that failed.
357 */
358 int
359 lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist)
360 {
361 nvpair_t *elem;
362 nvlist_t *args;
363 int error;
364 char pool[ZFS_MAX_DATASET_NAME_LEN];
365
366 *errlist = NULL;
367
368 /* determine the pool name */
369 elem = nvlist_next_nvpair(snaps, NULL);
370 if (elem == NULL)
371 return (0);
372 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
373 pool[strcspn(pool, "/@")] = '\0';
374
375 args = fnvlist_alloc();
376 fnvlist_add_nvlist(args, "snaps", snaps);
377 if (props != NULL)
378 fnvlist_add_nvlist(args, "props", props);
379
380 error = lzc_ioctl(ZFS_IOC_SNAPSHOT, pool, args, errlist);
381 nvlist_free(args);
382
383 return (error);
384 }
385
386 /*
387 * Destroys snapshots.
388 *
389 * The keys in the snaps nvlist are the snapshots to be destroyed.
390 * They must all be in the same pool.
391 *
392 * Snapshots that do not exist will be silently ignored.
393 *
394 * If 'defer' is not set, and a snapshot has user holds or clones, the
395 * destroy operation will fail and none of the snapshots will be
396 * destroyed.
397 *
398 * If 'defer' is set, and a snapshot has user holds or clones, it will be
399 * marked for deferred destruction, and will be destroyed when the last hold
400 * or clone is removed/destroyed.
401 *
402 * The return value will be 0 if all snapshots were destroyed (or marked for
403 * later destruction if 'defer' is set) or didn't exist to begin with.
404 *
405 * Otherwise the return value will be the errno of a (unspecified) snapshot
406 * that failed, no snapshots will be destroyed, and the errlist will have an
407 * entry for each snapshot that failed. The value in the errlist will be
408 * the (int32) error code.
409 */
410 int
411 lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist)
412 {
413 nvpair_t *elem;
414 nvlist_t *args;
415 int error;
416 char pool[ZFS_MAX_DATASET_NAME_LEN];
417
418 /* determine the pool name */
419 elem = nvlist_next_nvpair(snaps, NULL);
420 if (elem == NULL)
421 return (0);
422 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
423 pool[strcspn(pool, "/@")] = '\0';
424
425 args = fnvlist_alloc();
426 fnvlist_add_nvlist(args, "snaps", snaps);
427 if (defer)
428 fnvlist_add_boolean(args, "defer");
429
430 error = lzc_ioctl(ZFS_IOC_DESTROY_SNAPS, pool, args, errlist);
431 nvlist_free(args);
432
433 return (error);
434 }
435
436 int
437 lzc_snaprange_space(const char *firstsnap, const char *lastsnap,
438 uint64_t *usedp)
439 {
440 nvlist_t *args;
441 nvlist_t *result;
442 int err;
443 char fs[ZFS_MAX_DATASET_NAME_LEN];
444 char *atp;
445
446 /* determine the fs name */
447 (void) strlcpy(fs, firstsnap, sizeof (fs));
448 atp = strchr(fs, '@');
449 if (atp == NULL)
450 return (EINVAL);
451 *atp = '\0';
452
453 args = fnvlist_alloc();
454 fnvlist_add_string(args, "firstsnap", firstsnap);
455
456 err = lzc_ioctl(ZFS_IOC_SPACE_SNAPS, lastsnap, args, &result);
457 nvlist_free(args);
458 if (err == 0)
459 *usedp = fnvlist_lookup_uint64(result, "used");
460 fnvlist_free(result);
461
462 return (err);
463 }
464
465 boolean_t
466 lzc_exists(const char *dataset)
467 {
468 /*
469 * The objset_stats ioctl is still legacy, so we need to construct our
470 * own zfs_cmd_t rather than using lzc_ioctl().
471 */
472 zfs_cmd_t zc = {"\0"};
473
474 ASSERT3S(g_refcount, >, 0);
475 VERIFY3S(g_fd, !=, -1);
476
477 (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
478 return (ioctl(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0);
479 }
480
481 /*
482 * outnvl is unused.
483 * It was added to preserve the function signature in case it is
484 * needed in the future.
485 */
486 /*ARGSUSED*/
487 int
488 lzc_sync(const char *pool_name, nvlist_t *innvl, nvlist_t **outnvl)
489 {
490 return (lzc_ioctl(ZFS_IOC_POOL_SYNC, pool_name, innvl, NULL));
491 }
492
493 /*
494 * Create "user holds" on snapshots. If there is a hold on a snapshot,
495 * the snapshot can not be destroyed. (However, it can be marked for deletion
496 * by lzc_destroy_snaps(defer=B_TRUE).)
497 *
498 * The keys in the nvlist are snapshot names.
499 * The snapshots must all be in the same pool.
500 * The value is the name of the hold (string type).
501 *
502 * If cleanup_fd is not -1, it must be the result of open("/dev/zfs", O_EXCL).
503 * In this case, when the cleanup_fd is closed (including on process
504 * termination), the holds will be released. If the system is shut down
505 * uncleanly, the holds will be released when the pool is next opened
506 * or imported.
507 *
508 * Holds for snapshots which don't exist will be skipped and have an entry
509 * added to errlist, but will not cause an overall failure.
510 *
511 * The return value will be 0 if all holds, for snapshots that existed,
512 * were successfully created.
513 *
514 * Otherwise the return value will be the errno of a (unspecified) hold that
515 * failed and no holds will be created.
516 *
517 * In all cases the errlist will have an entry for each hold that failed
518 * (name = snapshot), with its value being the error code (int32).
519 */
520 int
521 lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist)
522 {
523 char pool[ZFS_MAX_DATASET_NAME_LEN];
524 nvlist_t *args;
525 nvpair_t *elem;
526 int error;
527
528 /* determine the pool name */
529 elem = nvlist_next_nvpair(holds, NULL);
530 if (elem == NULL)
531 return (0);
532 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
533 pool[strcspn(pool, "/@")] = '\0';
534
535 args = fnvlist_alloc();
536 fnvlist_add_nvlist(args, "holds", holds);
537 if (cleanup_fd != -1)
538 fnvlist_add_int32(args, "cleanup_fd", cleanup_fd);
539
540 error = lzc_ioctl(ZFS_IOC_HOLD, pool, args, errlist);
541 nvlist_free(args);
542 return (error);
543 }
544
545 /*
546 * Release "user holds" on snapshots. If the snapshot has been marked for
547 * deferred destroy (by lzc_destroy_snaps(defer=B_TRUE)), it does not have
548 * any clones, and all the user holds are removed, then the snapshot will be
549 * destroyed.
550 *
551 * The keys in the nvlist are snapshot names.
552 * The snapshots must all be in the same pool.
553 * The value is an nvlist whose keys are the holds to remove.
554 *
555 * Holds which failed to release because they didn't exist will have an entry
556 * added to errlist, but will not cause an overall failure.
557 *
558 * The return value will be 0 if the nvl holds was empty or all holds that
559 * existed, were successfully removed.
560 *
561 * Otherwise the return value will be the errno of a (unspecified) hold that
562 * failed to release and no holds will be released.
563 *
564 * In all cases the errlist will have an entry for each hold that failed to
565 * to release.
566 */
567 int
568 lzc_release(nvlist_t *holds, nvlist_t **errlist)
569 {
570 char pool[ZFS_MAX_DATASET_NAME_LEN];
571 nvpair_t *elem;
572
573 /* determine the pool name */
574 elem = nvlist_next_nvpair(holds, NULL);
575 if (elem == NULL)
576 return (0);
577 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
578 pool[strcspn(pool, "/@")] = '\0';
579
580 return (lzc_ioctl(ZFS_IOC_RELEASE, pool, holds, errlist));
581 }
582
583 /*
584 * Retrieve list of user holds on the specified snapshot.
585 *
586 * On success, *holdsp will be set to an nvlist which the caller must free.
587 * The keys are the names of the holds, and the value is the creation time
588 * of the hold (uint64) in seconds since the epoch.
589 */
590 int
591 lzc_get_holds(const char *snapname, nvlist_t **holdsp)
592 {
593 return (lzc_ioctl(ZFS_IOC_GET_HOLDS, snapname, NULL, holdsp));
594 }
595
596 /*
597 * Generate a zfs send stream for the specified snapshot and write it to
598 * the specified file descriptor.
599 *
600 * "snapname" is the full name of the snapshot to send (e.g. "pool/fs@snap")
601 *
602 * If "from" is NULL, a full (non-incremental) stream will be sent.
603 * If "from" is non-NULL, it must be the full name of a snapshot or
604 * bookmark to send an incremental from (e.g. "pool/fs@earlier_snap" or
605 * "pool/fs#earlier_bmark"). If non-NULL, the specified snapshot or
606 * bookmark must represent an earlier point in the history of "snapname").
607 * It can be an earlier snapshot in the same filesystem or zvol as "snapname",
608 * or it can be the origin of "snapname"'s filesystem, or an earlier
609 * snapshot in the origin, etc.
610 *
611 * "fd" is the file descriptor to write the send stream to.
612 *
613 * If "flags" contains LZC_SEND_FLAG_LARGE_BLOCK, the stream is permitted
614 * to contain DRR_WRITE records with drr_length > 128K, and DRR_OBJECT
615 * records with drr_blksz > 128K.
616 *
617 * If "flags" contains LZC_SEND_FLAG_EMBED_DATA, the stream is permitted
618 * to contain DRR_WRITE_EMBEDDED records with drr_etype==BP_EMBEDDED_TYPE_DATA,
619 * which the receiving system must support (as indicated by support
620 * for the "embedded_data" feature).
621 *
622 * If "flags" contains LZC_SEND_FLAG_COMPRESS, the stream is generated by using
623 * compressed WRITE records for blocks which are compressed on disk and in
624 * memory. If the lz4_compress feature is active on the sending system, then
625 * the receiving system must have that feature enabled as well.
626 *
627 * If "flags" contains LZC_SEND_FLAG_RAW, the stream is generated, for encrypted
628 * datasets, by sending data exactly as it exists on disk. This allows backups
629 * to be taken even if encryption keys are not currently loaded.
630 */
631 int
632 lzc_send(const char *snapname, const char *from, int fd,
633 enum lzc_send_flags flags)
634 {
635 return (lzc_send_resume(snapname, from, fd, flags, 0, 0));
636 }
637
638 int
639 lzc_send_resume(const char *snapname, const char *from, int fd,
640 enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff)
641 {
642 nvlist_t *args;
643 int err;
644
645 args = fnvlist_alloc();
646 fnvlist_add_int32(args, "fd", fd);
647 if (from != NULL)
648 fnvlist_add_string(args, "fromsnap", from);
649 if (flags & LZC_SEND_FLAG_LARGE_BLOCK)
650 fnvlist_add_boolean(args, "largeblockok");
651 if (flags & LZC_SEND_FLAG_EMBED_DATA)
652 fnvlist_add_boolean(args, "embedok");
653 if (flags & LZC_SEND_FLAG_COMPRESS)
654 fnvlist_add_boolean(args, "compressok");
655 if (flags & LZC_SEND_FLAG_RAW)
656 fnvlist_add_boolean(args, "rawok");
657 if (resumeobj != 0 || resumeoff != 0) {
658 fnvlist_add_uint64(args, "resume_object", resumeobj);
659 fnvlist_add_uint64(args, "resume_offset", resumeoff);
660 }
661 err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL);
662 nvlist_free(args);
663 return (err);
664 }
665
666 /*
667 * "from" can be NULL, a snapshot, or a bookmark.
668 *
669 * If from is NULL, a full (non-incremental) stream will be estimated. This
670 * is calculated very efficiently.
671 *
672 * If from is a snapshot, lzc_send_space uses the deadlists attached to
673 * each snapshot to efficiently estimate the stream size.
674 *
675 * If from is a bookmark, the indirect blocks in the destination snapshot
676 * are traversed, looking for blocks with a birth time since the creation TXG of
677 * the snapshot this bookmark was created from. This will result in
678 * significantly more I/O and be less efficient than a send space estimation on
679 * an equivalent snapshot.
680 */
681 int
682 lzc_send_space(const char *snapname, const char *from,
683 enum lzc_send_flags flags, uint64_t *spacep)
684 {
685 nvlist_t *args;
686 nvlist_t *result;
687 int err;
688
689 args = fnvlist_alloc();
690 if (from != NULL)
691 fnvlist_add_string(args, "from", from);
692 if (flags & LZC_SEND_FLAG_LARGE_BLOCK)
693 fnvlist_add_boolean(args, "largeblockok");
694 if (flags & LZC_SEND_FLAG_EMBED_DATA)
695 fnvlist_add_boolean(args, "embedok");
696 if (flags & LZC_SEND_FLAG_COMPRESS)
697 fnvlist_add_boolean(args, "compressok");
698 if (flags & LZC_SEND_FLAG_RAW)
699 fnvlist_add_boolean(args, "rawok");
700 err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result);
701 nvlist_free(args);
702 if (err == 0)
703 *spacep = fnvlist_lookup_uint64(result, "space");
704 nvlist_free(result);
705 return (err);
706 }
707
708 static int
709 recv_read(int fd, void *buf, int ilen)
710 {
711 char *cp = buf;
712 int rv;
713 int len = ilen;
714
715 do {
716 rv = read(fd, cp, len);
717 cp += rv;
718 len -= rv;
719 } while (rv > 0);
720
721 if (rv < 0 || len != 0)
722 return (EIO);
723
724 return (0);
725 }
726
727 /*
728 * Linux adds ZFS_IOC_RECV_NEW for resumable and raw streams and preserves the
729 * legacy ZFS_IOC_RECV user/kernel interface. The new interface supports all
730 * stream options but is currently only used for resumable streams. This way
731 * updated user space utilities will interoperate with older kernel modules.
732 *
733 * Non-Linux OpenZFS platforms have opted to modify the legacy interface.
734 */
735 static int
736 recv_impl(const char *snapname, nvlist_t *recvdprops, nvlist_t *localprops,
737 uint8_t *wkeydata, uint_t wkeylen, const char *origin, boolean_t force,
738 boolean_t resumable, boolean_t raw, int input_fd,
739 const dmu_replay_record_t *begin_record, int cleanup_fd,
740 uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle,
741 nvlist_t **errors)
742 {
743 dmu_replay_record_t drr;
744 char fsname[MAXPATHLEN];
745 char *atp;
746 int error;
747
748 ASSERT3S(g_refcount, >, 0);
749 VERIFY3S(g_fd, !=, -1);
750
751 /* Set 'fsname' to the name of containing filesystem */
752 (void) strlcpy(fsname, snapname, sizeof (fsname));
753 atp = strchr(fsname, '@');
754 if (atp == NULL)
755 return (EINVAL);
756 *atp = '\0';
757
758 /* If the fs does not exist, try its parent. */
759 if (!lzc_exists(fsname)) {
760 char *slashp = strrchr(fsname, '/');
761 if (slashp == NULL)
762 return (ENOENT);
763 *slashp = '\0';
764 }
765
766 /*
767 * The begin_record is normally a non-byteswapped BEGIN record.
768 * For resumable streams it may be set to any non-byteswapped
769 * dmu_replay_record_t.
770 */
771 if (begin_record == NULL) {
772 error = recv_read(input_fd, &drr, sizeof (drr));
773 if (error != 0)
774 return (error);
775 } else {
776 drr = *begin_record;
777 }
778
779 /*
780 * Raw receives, resumable receives, and receives that include a
781 * wrapping key all use the new interface.
782 */
783 if (resumable || raw || wkeydata != NULL) {
784 nvlist_t *outnvl = NULL;
785 nvlist_t *innvl = fnvlist_alloc();
786
787 fnvlist_add_string(innvl, "snapname", snapname);
788
789 if (recvdprops != NULL)
790 fnvlist_add_nvlist(innvl, "props", recvdprops);
791
792 if (localprops != NULL)
793 fnvlist_add_nvlist(innvl, "localprops", localprops);
794
795 if (wkeydata != NULL) {
796 /*
797 * wkeydata must be placed in the special
798 * ZPOOL_HIDDEN_ARGS nvlist so that it
799 * will not be printed to the zpool history.
800 */
801 nvlist_t *hidden_args = fnvlist_alloc();
802 fnvlist_add_uint8_array(hidden_args, "wkeydata",
803 wkeydata, wkeylen);
804 fnvlist_add_nvlist(innvl, ZPOOL_HIDDEN_ARGS,
805 hidden_args);
806 nvlist_free(hidden_args);
807 }
808
809 if (origin != NULL && strlen(origin))
810 fnvlist_add_string(innvl, "origin", origin);
811
812 fnvlist_add_byte_array(innvl, "begin_record",
813 (uchar_t *)&drr, sizeof (drr));
814
815 fnvlist_add_int32(innvl, "input_fd", input_fd);
816
817 if (force)
818 fnvlist_add_boolean(innvl, "force");
819
820 if (resumable)
821 fnvlist_add_boolean(innvl, "resumable");
822
823 if (cleanup_fd >= 0)
824 fnvlist_add_int32(innvl, "cleanup_fd", cleanup_fd);
825
826 if (action_handle != NULL)
827 fnvlist_add_uint64(innvl, "action_handle",
828 *action_handle);
829
830 error = lzc_ioctl(ZFS_IOC_RECV_NEW, fsname, innvl, &outnvl);
831
832 if (error == 0 && read_bytes != NULL)
833 error = nvlist_lookup_uint64(outnvl, "read_bytes",
834 read_bytes);
835
836 if (error == 0 && errflags != NULL)
837 error = nvlist_lookup_uint64(outnvl, "error_flags",
838 errflags);
839
840 if (error == 0 && action_handle != NULL)
841 error = nvlist_lookup_uint64(outnvl, "action_handle",
842 action_handle);
843
844 if (error == 0 && errors != NULL) {
845 nvlist_t *nvl;
846 error = nvlist_lookup_nvlist(outnvl, "errors", &nvl);
847 if (error == 0)
848 *errors = fnvlist_dup(nvl);
849 }
850
851 fnvlist_free(innvl);
852 fnvlist_free(outnvl);
853 } else {
854 zfs_cmd_t zc = {"\0"};
855 char *packed = NULL;
856 size_t size;
857
858 ASSERT3S(g_refcount, >, 0);
859
860 (void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name));
861 (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
862
863 if (recvdprops != NULL) {
864 packed = fnvlist_pack(recvdprops, &size);
865 zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
866 zc.zc_nvlist_src_size = size;
867 }
868
869 if (localprops != NULL) {
870 packed = fnvlist_pack(localprops, &size);
871 zc.zc_nvlist_conf = (uint64_t)(uintptr_t)packed;
872 zc.zc_nvlist_conf_size = size;
873 }
874
875 if (origin != NULL)
876 (void) strlcpy(zc.zc_string, origin,
877 sizeof (zc.zc_string));
878
879 ASSERT3S(drr.drr_type, ==, DRR_BEGIN);
880 zc.zc_begin_record = drr.drr_u.drr_begin;
881 zc.zc_guid = force;
882 zc.zc_cookie = input_fd;
883 zc.zc_cleanup_fd = -1;
884 zc.zc_action_handle = 0;
885
886 if (cleanup_fd >= 0)
887 zc.zc_cleanup_fd = cleanup_fd;
888
889 if (action_handle != NULL)
890 zc.zc_action_handle = *action_handle;
891
892 zc.zc_nvlist_dst_size = 128 * 1024;
893 zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
894 malloc(zc.zc_nvlist_dst_size);
895
896 error = ioctl(g_fd, ZFS_IOC_RECV, &zc);
897 if (error != 0) {
898 error = errno;
899 } else {
900 if (read_bytes != NULL)
901 *read_bytes = zc.zc_cookie;
902
903 if (errflags != NULL)
904 *errflags = zc.zc_obj;
905
906 if (action_handle != NULL)
907 *action_handle = zc.zc_action_handle;
908
909 if (errors != NULL)
910 VERIFY0(nvlist_unpack(
911 (void *)(uintptr_t)zc.zc_nvlist_dst,
912 zc.zc_nvlist_dst_size, errors, KM_SLEEP));
913 }
914
915 if (packed != NULL)
916 fnvlist_pack_free(packed, size);
917 free((void *)(uintptr_t)zc.zc_nvlist_dst);
918 }
919
920 return (error);
921 }
922
923 /*
924 * The simplest receive case: receive from the specified fd, creating the
925 * specified snapshot. Apply the specified properties as "received" properties
926 * (which can be overridden by locally-set properties). If the stream is a
927 * clone, its origin snapshot must be specified by 'origin'. The 'force'
928 * flag will cause the target filesystem to be rolled back or destroyed if
929 * necessary to receive.
930 *
931 * Return 0 on success or an errno on failure.
932 *
933 * Note: this interface does not work on dedup'd streams
934 * (those with DMU_BACKUP_FEATURE_DEDUP).
935 */
936 int
937 lzc_receive(const char *snapname, nvlist_t *props, const char *origin,
938 boolean_t force, boolean_t raw, int fd)
939 {
940 return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
941 B_FALSE, raw, fd, NULL, -1, NULL, NULL, NULL, NULL));
942 }
943
944 /*
945 * Like lzc_receive, but if the receive fails due to premature stream
946 * termination, the intermediate state will be preserved on disk. In this
947 * case, ECKSUM will be returned. The receive may subsequently be resumed
948 * with a resuming send stream generated by lzc_send_resume().
949 */
950 int
951 lzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin,
952 boolean_t force, boolean_t raw, int fd)
953 {
954 return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
955 B_TRUE, raw, fd, NULL, -1, NULL, NULL, NULL, NULL));
956 }
957
958 /*
959 * Like lzc_receive, but allows the caller to read the begin record and then to
960 * pass it in. That could be useful if the caller wants to derive, for example,
961 * the snapname or the origin parameters based on the information contained in
962 * the begin record.
963 * The begin record must be in its original form as read from the stream,
964 * in other words, it should not be byteswapped.
965 *
966 * The 'resumable' parameter allows to obtain the same behavior as with
967 * lzc_receive_resumable.
968 */
969 int
970 lzc_receive_with_header(const char *snapname, nvlist_t *props,
971 const char *origin, boolean_t force, boolean_t resumable, boolean_t raw,
972 int fd, const dmu_replay_record_t *begin_record)
973 {
974 if (begin_record == NULL)
975 return (EINVAL);
976
977 return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
978 resumable, raw, fd, begin_record, -1, NULL, NULL, NULL, NULL));
979 }
980
981 /*
982 * Like lzc_receive, but allows the caller to pass all supported arguments
983 * and retrieve all values returned. The only additional input parameter
984 * is 'cleanup_fd' which is used to set a cleanup-on-exit file descriptor.
985 *
986 * The following parameters all provide return values. Several may be set
987 * in the failure case and will contain additional information.
988 *
989 * The 'read_bytes' value will be set to the total number of bytes read.
990 *
991 * The 'errflags' value will contain zprop_errflags_t flags which are
992 * used to describe any failures.
993 *
994 * The 'action_handle' is used to pass the handle for this guid/ds mapping.
995 * It should be set to zero on first call and will contain an updated handle
996 * on success, it should be passed in subsequent calls.
997 *
998 * The 'errors' nvlist contains an entry for each unapplied received
999 * property. Callers are responsible for freeing this nvlist.
1000 */
1001 int lzc_receive_one(const char *snapname, nvlist_t *props,
1002 const char *origin, boolean_t force, boolean_t resumable, boolean_t raw,
1003 int input_fd, const dmu_replay_record_t *begin_record, int cleanup_fd,
1004 uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle,
1005 nvlist_t **errors)
1006 {
1007 return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
1008 resumable, raw, input_fd, begin_record, cleanup_fd, read_bytes,
1009 errflags, action_handle, errors));
1010 }
1011
1012 /*
1013 * Like lzc_receive_one, but allows the caller to pass an additional 'cmdprops'
1014 * argument.
1015 *
1016 * The 'cmdprops' nvlist contains both override ('zfs receive -o') and
1017 * exclude ('zfs receive -x') properties. Callers are responsible for freeing
1018 * this nvlist
1019 */
1020 int lzc_receive_with_cmdprops(const char *snapname, nvlist_t *props,
1021 nvlist_t *cmdprops, uint8_t *wkeydata, uint_t wkeylen, const char *origin,
1022 boolean_t force, boolean_t resumable, boolean_t raw, int input_fd,
1023 const dmu_replay_record_t *begin_record, int cleanup_fd,
1024 uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle,
1025 nvlist_t **errors)
1026 {
1027 return (recv_impl(snapname, props, cmdprops, wkeydata, wkeylen, origin,
1028 force, resumable, raw, input_fd, begin_record, cleanup_fd,
1029 read_bytes, errflags, action_handle, errors));
1030 }
1031
1032 /*
1033 * Roll back this filesystem or volume to its most recent snapshot.
1034 * If snapnamebuf is not NULL, it will be filled in with the name
1035 * of the most recent snapshot.
1036 * Note that the latest snapshot may change if a new one is concurrently
1037 * created or the current one is destroyed. lzc_rollback_to can be used
1038 * to roll back to a specific latest snapshot.
1039 *
1040 * Return 0 on success or an errno on failure.
1041 */
1042 int
1043 lzc_rollback(const char *fsname, char *snapnamebuf, int snapnamelen)
1044 {
1045 nvlist_t *args;
1046 nvlist_t *result;
1047 int err;
1048
1049 args = fnvlist_alloc();
1050 err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result);
1051 nvlist_free(args);
1052 if (err == 0 && snapnamebuf != NULL) {
1053 const char *snapname = fnvlist_lookup_string(result, "target");
1054 (void) strlcpy(snapnamebuf, snapname, snapnamelen);
1055 }
1056 nvlist_free(result);
1057
1058 return (err);
1059 }
1060
1061 /*
1062 * Roll back this filesystem or volume to the specified snapshot,
1063 * if possible.
1064 *
1065 * Return 0 on success or an errno on failure.
1066 */
1067 int
1068 lzc_rollback_to(const char *fsname, const char *snapname)
1069 {
1070 nvlist_t *args;
1071 nvlist_t *result;
1072 int err;
1073
1074 args = fnvlist_alloc();
1075 fnvlist_add_string(args, "target", snapname);
1076 err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result);
1077 nvlist_free(args);
1078 nvlist_free(result);
1079 return (err);
1080 }
1081
1082 /*
1083 * Creates bookmarks.
1084 *
1085 * The bookmarks nvlist maps from name of the bookmark (e.g. "pool/fs#bmark") to
1086 * the name of the snapshot (e.g. "pool/fs@snap"). All the bookmarks and
1087 * snapshots must be in the same pool.
1088 *
1089 * The returned results nvlist will have an entry for each bookmark that failed.
1090 * The value will be the (int32) error code.
1091 *
1092 * The return value will be 0 if all bookmarks were created, otherwise it will
1093 * be the errno of a (undetermined) bookmarks that failed.
1094 */
1095 int
1096 lzc_bookmark(nvlist_t *bookmarks, nvlist_t **errlist)
1097 {
1098 nvpair_t *elem;
1099 int error;
1100 char pool[ZFS_MAX_DATASET_NAME_LEN];
1101
1102 /* determine the pool name */
1103 elem = nvlist_next_nvpair(bookmarks, NULL);
1104 if (elem == NULL)
1105 return (0);
1106 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
1107 pool[strcspn(pool, "/#")] = '\0';
1108
1109 error = lzc_ioctl(ZFS_IOC_BOOKMARK, pool, bookmarks, errlist);
1110
1111 return (error);
1112 }
1113
1114 /*
1115 * Retrieve bookmarks.
1116 *
1117 * Retrieve the list of bookmarks for the given file system. The props
1118 * parameter is an nvlist of property names (with no values) that will be
1119 * returned for each bookmark.
1120 *
1121 * The following are valid properties on bookmarks, all of which are numbers
1122 * (represented as uint64 in the nvlist)
1123 *
1124 * "guid" - globally unique identifier of the snapshot it refers to
1125 * "createtxg" - txg when the snapshot it refers to was created
1126 * "creation" - timestamp when the snapshot it refers to was created
1127 * "ivsetguid" - IVset guid for identifying encrypted snapshots
1128 *
1129 * The format of the returned nvlist as follows:
1130 * <short name of bookmark> -> {
1131 * <name of property> -> {
1132 * "value" -> uint64
1133 * }
1134 * }
1135 */
1136 int
1137 lzc_get_bookmarks(const char *fsname, nvlist_t *props, nvlist_t **bmarks)
1138 {
1139 return (lzc_ioctl(ZFS_IOC_GET_BOOKMARKS, fsname, props, bmarks));
1140 }
1141
1142 /*
1143 * Destroys bookmarks.
1144 *
1145 * The keys in the bmarks nvlist are the bookmarks to be destroyed.
1146 * They must all be in the same pool. Bookmarks are specified as
1147 * <fs>#<bmark>.
1148 *
1149 * Bookmarks that do not exist will be silently ignored.
1150 *
1151 * The return value will be 0 if all bookmarks that existed were destroyed.
1152 *
1153 * Otherwise the return value will be the errno of a (undetermined) bookmark
1154 * that failed, no bookmarks will be destroyed, and the errlist will have an
1155 * entry for each bookmarks that failed. The value in the errlist will be
1156 * the (int32) error code.
1157 */
1158 int
1159 lzc_destroy_bookmarks(nvlist_t *bmarks, nvlist_t **errlist)
1160 {
1161 nvpair_t *elem;
1162 int error;
1163 char pool[ZFS_MAX_DATASET_NAME_LEN];
1164
1165 /* determine the pool name */
1166 elem = nvlist_next_nvpair(bmarks, NULL);
1167 if (elem == NULL)
1168 return (0);
1169 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
1170 pool[strcspn(pool, "/#")] = '\0';
1171
1172 error = lzc_ioctl(ZFS_IOC_DESTROY_BOOKMARKS, pool, bmarks, errlist);
1173
1174 return (error);
1175 }
1176
1177 static int
1178 lzc_channel_program_impl(const char *pool, const char *program, boolean_t sync,
1179 uint64_t instrlimit, uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl)
1180 {
1181 int error;
1182 nvlist_t *args;
1183
1184 args = fnvlist_alloc();
1185 fnvlist_add_string(args, ZCP_ARG_PROGRAM, program);
1186 fnvlist_add_nvlist(args, ZCP_ARG_ARGLIST, argnvl);
1187 fnvlist_add_boolean_value(args, ZCP_ARG_SYNC, sync);
1188 fnvlist_add_uint64(args, ZCP_ARG_INSTRLIMIT, instrlimit);
1189 fnvlist_add_uint64(args, ZCP_ARG_MEMLIMIT, memlimit);
1190 error = lzc_ioctl(ZFS_IOC_CHANNEL_PROGRAM, pool, args, outnvl);
1191 fnvlist_free(args);
1192
1193 return (error);
1194 }
1195
1196 /*
1197 * Executes a channel program.
1198 *
1199 * If this function returns 0 the channel program was successfully loaded and
1200 * ran without failing. Note that individual commands the channel program ran
1201 * may have failed and the channel program is responsible for reporting such
1202 * errors through outnvl if they are important.
1203 *
1204 * This method may also return:
1205 *
1206 * EINVAL The program contains syntax errors, or an invalid memory or time
1207 * limit was given. No part of the channel program was executed.
1208 * If caused by syntax errors, 'outnvl' contains information about the
1209 * errors.
1210 *
1211 * ECHRNG The program was executed, but encountered a runtime error, such as
1212 * calling a function with incorrect arguments, invoking the error()
1213 * function directly, failing an assert() command, etc. Some portion
1214 * of the channel program may have executed and committed changes.
1215 * Information about the failure can be found in 'outnvl'.
1216 *
1217 * ENOMEM The program fully executed, but the output buffer was not large
1218 * enough to store the returned value. No output is returned through
1219 * 'outnvl'.
1220 *
1221 * ENOSPC The program was terminated because it exceeded its memory usage
1222 * limit. Some portion of the channel program may have executed and
1223 * committed changes to disk. No output is returned through 'outnvl'.
1224 *
1225 * ETIME The program was terminated because it exceeded its Lua instruction
1226 * limit. Some portion of the channel program may have executed and
1227 * committed changes to disk. No output is returned through 'outnvl'.
1228 */
1229 int
1230 lzc_channel_program(const char *pool, const char *program, uint64_t instrlimit,
1231 uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl)
1232 {
1233 return (lzc_channel_program_impl(pool, program, B_TRUE, instrlimit,
1234 memlimit, argnvl, outnvl));
1235 }
1236
1237 /*
1238 * Creates a checkpoint for the specified pool.
1239 *
1240 * If this function returns 0 the pool was successfully checkpointed.
1241 *
1242 * This method may also return:
1243 *
1244 * ZFS_ERR_CHECKPOINT_EXISTS
1245 * The pool already has a checkpoint. A pools can only have one
1246 * checkpoint at most, at any given time.
1247 *
1248 * ZFS_ERR_DISCARDING_CHECKPOINT
1249 * ZFS is in the middle of discarding a checkpoint for this pool.
1250 * The pool can be checkpointed again once the discard is done.
1251 *
1252 * ZFS_DEVRM_IN_PROGRESS
1253 * A vdev is currently being removed. The pool cannot be
1254 * checkpointed until the device removal is done.
1255 *
1256 * ZFS_VDEV_TOO_BIG
1257 * One or more top-level vdevs exceed the maximum vdev size
1258 * supported for this feature.
1259 */
1260 int
1261 lzc_pool_checkpoint(const char *pool)
1262 {
1263 int error;
1264
1265 nvlist_t *result = NULL;
1266 nvlist_t *args = fnvlist_alloc();
1267
1268 error = lzc_ioctl(ZFS_IOC_POOL_CHECKPOINT, pool, args, &result);
1269
1270 fnvlist_free(args);
1271 fnvlist_free(result);
1272
1273 return (error);
1274 }
1275
1276 /*
1277 * Discard the checkpoint from the specified pool.
1278 *
1279 * If this function returns 0 the checkpoint was successfully discarded.
1280 *
1281 * This method may also return:
1282 *
1283 * ZFS_ERR_NO_CHECKPOINT
1284 * The pool does not have a checkpoint.
1285 *
1286 * ZFS_ERR_DISCARDING_CHECKPOINT
1287 * ZFS is already in the middle of discarding the checkpoint.
1288 */
1289 int
1290 lzc_pool_checkpoint_discard(const char *pool)
1291 {
1292 int error;
1293
1294 nvlist_t *result = NULL;
1295 nvlist_t *args = fnvlist_alloc();
1296
1297 error = lzc_ioctl(ZFS_IOC_POOL_DISCARD_CHECKPOINT, pool, args, &result);
1298
1299 fnvlist_free(args);
1300 fnvlist_free(result);
1301
1302 return (error);
1303 }
1304
1305 /*
1306 * Executes a read-only channel program.
1307 *
1308 * A read-only channel program works programmatically the same way as a
1309 * normal channel program executed with lzc_channel_program(). The only
1310 * difference is it runs exclusively in open-context and therefore can
1311 * return faster. The downside to that, is that the program cannot change
1312 * on-disk state by calling functions from the zfs.sync submodule.
1313 *
1314 * The return values of this function (and their meaning) are exactly the
1315 * same as the ones described in lzc_channel_program().
1316 */
1317 int
1318 lzc_channel_program_nosync(const char *pool, const char *program,
1319 uint64_t timeout, uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl)
1320 {
1321 return (lzc_channel_program_impl(pool, program, B_FALSE, timeout,
1322 memlimit, argnvl, outnvl));
1323 }
1324
1325 /*
1326 * Performs key management functions
1327 *
1328 * crypto_cmd should be a value from dcp_cmd_t. If the command specifies to
1329 * load or change a wrapping key, the key should be specified in the
1330 * hidden_args nvlist so that it is not logged.
1331 */
1332 int
1333 lzc_load_key(const char *fsname, boolean_t noop, uint8_t *wkeydata,
1334 uint_t wkeylen)
1335 {
1336 int error;
1337 nvlist_t *ioc_args;
1338 nvlist_t *hidden_args;
1339
1340 if (wkeydata == NULL)
1341 return (EINVAL);
1342
1343 ioc_args = fnvlist_alloc();
1344 hidden_args = fnvlist_alloc();
1345 fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata, wkeylen);
1346 fnvlist_add_nvlist(ioc_args, ZPOOL_HIDDEN_ARGS, hidden_args);
1347 if (noop)
1348 fnvlist_add_boolean(ioc_args, "noop");
1349 error = lzc_ioctl(ZFS_IOC_LOAD_KEY, fsname, ioc_args, NULL);
1350 nvlist_free(hidden_args);
1351 nvlist_free(ioc_args);
1352
1353 return (error);
1354 }
1355
1356 int
1357 lzc_unload_key(const char *fsname)
1358 {
1359 return (lzc_ioctl(ZFS_IOC_UNLOAD_KEY, fsname, NULL, NULL));
1360 }
1361
1362 int
1363 lzc_change_key(const char *fsname, uint64_t crypt_cmd, nvlist_t *props,
1364 uint8_t *wkeydata, uint_t wkeylen)
1365 {
1366 int error;
1367 nvlist_t *ioc_args = fnvlist_alloc();
1368 nvlist_t *hidden_args = NULL;
1369
1370 fnvlist_add_uint64(ioc_args, "crypt_cmd", crypt_cmd);
1371
1372 if (wkeydata != NULL) {
1373 hidden_args = fnvlist_alloc();
1374 fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata,
1375 wkeylen);
1376 fnvlist_add_nvlist(ioc_args, ZPOOL_HIDDEN_ARGS, hidden_args);
1377 }
1378
1379 if (props != NULL)
1380 fnvlist_add_nvlist(ioc_args, "props", props);
1381
1382 error = lzc_ioctl(ZFS_IOC_CHANGE_KEY, fsname, ioc_args, NULL);
1383 nvlist_free(hidden_args);
1384 nvlist_free(ioc_args);
1385
1386 return (error);
1387 }
1388
1389 int
1390 lzc_reopen(const char *pool_name, boolean_t scrub_restart)
1391 {
1392 nvlist_t *args = fnvlist_alloc();
1393 int error;
1394
1395 fnvlist_add_boolean_value(args, "scrub_restart", scrub_restart);
1396
1397 error = lzc_ioctl(ZFS_IOC_POOL_REOPEN, pool_name, args, NULL);
1398 nvlist_free(args);
1399 return (error);
1400 }
1401
1402 /*
1403 * Changes initializing state.
1404 *
1405 * vdevs should be a list of (<key>, guid) where guid is a uint64 vdev GUID.
1406 * The key is ignored.
1407 *
1408 * If there are errors related to vdev arguments, per-vdev errors are returned
1409 * in an nvlist with the key "vdevs". Each error is a (guid, errno) pair where
1410 * guid is stringified with PRIu64, and errno is one of the following as
1411 * an int64_t:
1412 * - ENODEV if the device was not found
1413 * - EINVAL if the devices is not a leaf or is not concrete (e.g. missing)
1414 * - EROFS if the device is not writeable
1415 * - EBUSY start requested but the device is already being either
1416 * initialized or trimmed
1417 * - ESRCH cancel/suspend requested but device is not being initialized
1418 *
1419 * If the errlist is empty, then return value will be:
1420 * - EINVAL if one or more arguments was invalid
1421 * - Other spa_open failures
1422 * - 0 if the operation succeeded
1423 */
1424 int
1425 lzc_initialize(const char *poolname, pool_initialize_func_t cmd_type,
1426 nvlist_t *vdevs, nvlist_t **errlist)
1427 {
1428 int error;
1429
1430 nvlist_t *args = fnvlist_alloc();
1431 fnvlist_add_uint64(args, ZPOOL_INITIALIZE_COMMAND, (uint64_t)cmd_type);
1432 fnvlist_add_nvlist(args, ZPOOL_INITIALIZE_VDEVS, vdevs);
1433
1434 error = lzc_ioctl(ZFS_IOC_POOL_INITIALIZE, poolname, args, errlist);
1435
1436 fnvlist_free(args);
1437
1438 return (error);
1439 }
1440
1441 /*
1442 * Changes TRIM state.
1443 *
1444 * vdevs should be a list of (<key>, guid) where guid is a uint64 vdev GUID.
1445 * The key is ignored.
1446 *
1447 * If there are errors related to vdev arguments, per-vdev errors are returned
1448 * in an nvlist with the key "vdevs". Each error is a (guid, errno) pair where
1449 * guid is stringified with PRIu64, and errno is one of the following as
1450 * an int64_t:
1451 * - ENODEV if the device was not found
1452 * - EINVAL if the devices is not a leaf or is not concrete (e.g. missing)
1453 * - EROFS if the device is not writeable
1454 * - EBUSY start requested but the device is already being either trimmed
1455 * or initialized
1456 * - ESRCH cancel/suspend requested but device is not being initialized
1457 * - EOPNOTSUPP if the device does not support TRIM (or secure TRIM)
1458 *
1459 * If the errlist is empty, then return value will be:
1460 * - EINVAL if one or more arguments was invalid
1461 * - Other spa_open failures
1462 * - 0 if the operation succeeded
1463 */
1464 int
1465 lzc_trim(const char *poolname, pool_trim_func_t cmd_type, uint64_t rate,
1466 boolean_t secure, nvlist_t *vdevs, nvlist_t **errlist)
1467 {
1468 int error;
1469
1470 nvlist_t *args = fnvlist_alloc();
1471 fnvlist_add_uint64(args, ZPOOL_TRIM_COMMAND, (uint64_t)cmd_type);
1472 fnvlist_add_nvlist(args, ZPOOL_TRIM_VDEVS, vdevs);
1473 fnvlist_add_uint64(args, ZPOOL_TRIM_RATE, rate);
1474 fnvlist_add_boolean_value(args, ZPOOL_TRIM_SECURE, secure);
1475
1476 error = lzc_ioctl(ZFS_IOC_POOL_TRIM, poolname, args, errlist);
1477
1478 fnvlist_free(args);
1479
1480 return (error);
1481 }