]>
Commit | Line | Data |
---|---|---|
6f1ffb06 MA |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
1d3ba0bf | 9 | * or https://opensource.org/licenses/CDDL-1.0. |
6f1ffb06 MA |
10 | * See the License for the specific language governing permissions |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | ||
22 | /* | |
196bee4c | 23 | * Copyright (c) 2012, 2020 by Delphix. All rights reserved. |
95fd54a1 | 24 | * Copyright (c) 2013 Steven Hartland. All rights reserved. |
d12f91fd | 25 | * Copyright 2017 RackTop Systems. |
d3f2cd7e | 26 | * Copyright (c) 2017 Open-E, Inc. All Rights Reserved. |
a73f361f | 27 | * Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved. |
e8cf3a4f | 28 | * Copyright (c) 2019 Datto Inc. |
6f1ffb06 MA |
29 | */ |
30 | ||
31 | /* | |
32 | * LibZFS_Core (lzc) is intended to replace most functionality in libzfs. | |
33 | * It has the following characteristics: | |
34 | * | |
35 | * - Thread Safe. libzfs_core is accessible concurrently from multiple | |
36 | * threads. This is accomplished primarily by avoiding global data | |
37 | * (e.g. caching). Since it's thread-safe, there is no reason for a | |
38 | * process to have multiple libzfs "instances". Therefore, we store | |
39 | * our few pieces of data (e.g. the file descriptor) in global | |
40 | * variables. The fd is reference-counted so that the libzfs_core | |
41 | * library can be "initialized" multiple times (e.g. by different | |
42 | * consumers within the same process). | |
43 | * | |
44 | * - Committed Interface. The libzfs_core interface will be committed, | |
45 | * therefore consumers can compile against it and be confident that | |
46 | * their code will continue to work on future releases of this code. | |
47 | * Currently, the interface is Evolving (not Committed), but we intend | |
48 | * to commit to it once it is more complete and we determine that it | |
49 | * meets the needs of all consumers. | |
50 | * | |
b8fce77b | 51 | * - Programmatic Error Handling. libzfs_core communicates errors with |
6f1ffb06 MA |
52 | * defined error numbers, and doesn't print anything to stdout/stderr. |
53 | * | |
54 | * - Thin Layer. libzfs_core is a thin layer, marshaling arguments | |
55 | * to/from the kernel ioctls. There is generally a 1:1 correspondence | |
fb0be12d | 56 | * between libzfs_core functions and ioctls to ZFS_DEV. |
6f1ffb06 MA |
57 | * |
58 | * - Clear Atomicity. Because libzfs_core functions are generally 1:1 | |
59 | * with kernel ioctls, and kernel ioctls are general atomic, each | |
60 | * libzfs_core function is atomic. For example, creating multiple | |
61 | * snapshots with a single call to lzc_snapshot() is atomic -- it | |
62 | * can't fail with only some of the requested snapshots created, even | |
63 | * in the event of power loss or system crash. | |
64 | * | |
65 | * - Continued libzfs Support. Some higher-level operations (e.g. | |
66 | * support for "zfs send -R") are too complicated to fit the scope of | |
67 | * libzfs_core. This functionality will continue to live in libzfs. | |
68 | * Where appropriate, libzfs will use the underlying atomic operations | |
69 | * of libzfs_core. For example, libzfs may implement "zfs send -R | | |
70 | * zfs receive" by using individual "send one snapshot", rename, | |
71 | * destroy, and "receive one snapshot" operations in libzfs_core. | |
f8b2ca6b | 72 | * /sbin/zfs and /sbin/zpool will link with both libzfs and |
6f1ffb06 MA |
73 | * libzfs_core. Other consumers should aim to use only libzfs_core, |
74 | * since that will be the supported, stable interface going forwards. | |
75 | */ | |
76 | ||
77 | #include <libzfs_core.h> | |
78 | #include <ctype.h> | |
79 | #include <unistd.h> | |
80 | #include <stdlib.h> | |
81 | #include <string.h> | |
b83a0e2d DB |
82 | #ifdef ZFS_DEBUG |
83 | #include <stdio.h> | |
84 | #endif | |
6f1ffb06 MA |
85 | #include <errno.h> |
86 | #include <fcntl.h> | |
87 | #include <pthread.h> | |
1f2f46be | 88 | #include <libzutil.h> |
6f1ffb06 MA |
89 | #include <sys/nvpair.h> |
90 | #include <sys/param.h> | |
91 | #include <sys/types.h> | |
92 | #include <sys/stat.h> | |
93 | #include <sys/zfs_ioctl.h> | |
fbbea09d AZ |
94 | #if __FreeBSD__ |
95 | #define BIG_PIPE_SIZE (64 * 1024) /* From sys/pipe.h */ | |
96 | #endif | |
6f1ffb06 | 97 | |
e2454897 | 98 | static int g_fd = -1; |
6f1ffb06 MA |
99 | static pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER; |
100 | static int g_refcount; | |
101 | ||
b83a0e2d | 102 | #ifdef ZFS_DEBUG |
4f7fb135 | 103 | static zfs_ioc_t fail_ioc_cmd = ZFS_IOC_LAST; |
b83a0e2d DB |
104 | static zfs_errno_t fail_ioc_err; |
105 | ||
106 | static void | |
107 | libzfs_core_debug_ioc(void) | |
108 | { | |
109 | /* | |
110 | * To test running newer user space binaries with kernel's | |
111 | * that don't yet support an ioctl or a new ioctl arg we | |
112 | * provide an override to intentionally fail an ioctl. | |
113 | * | |
114 | * USAGE: | |
115 | * The override variable, ZFS_IOC_TEST, is of the form "cmd:err" | |
116 | * | |
117 | * For example, to fail a ZFS_IOC_POOL_CHECKPOINT with a | |
118 | * ZFS_ERR_IOC_CMD_UNAVAIL, the string would be "0x5a4d:1029" | |
119 | * | |
120 | * $ sudo sh -c "ZFS_IOC_TEST=0x5a4d:1029 zpool checkpoint tank" | |
121 | * cannot checkpoint 'tank': the loaded zfs module does not support | |
122 | * this operation. A reboot may be required to enable this operation. | |
123 | */ | |
4f7fb135 | 124 | if (fail_ioc_cmd == ZFS_IOC_LAST) { |
b83a0e2d DB |
125 | char *ioc_test = getenv("ZFS_IOC_TEST"); |
126 | unsigned int ioc_num = 0, ioc_err = 0; | |
127 | ||
128 | if (ioc_test != NULL && | |
129 | sscanf(ioc_test, "%i:%i", &ioc_num, &ioc_err) == 2 && | |
130 | ioc_num < ZFS_IOC_LAST) { | |
131 | fail_ioc_cmd = ioc_num; | |
132 | fail_ioc_err = ioc_err; | |
133 | } | |
134 | } | |
135 | } | |
136 | #endif | |
137 | ||
6f1ffb06 MA |
138 | int |
139 | libzfs_core_init(void) | |
140 | { | |
141 | (void) pthread_mutex_lock(&g_lock); | |
142 | if (g_refcount == 0) { | |
92ffd87a | 143 | g_fd = open(ZFS_DEV, O_RDWR|O_CLOEXEC); |
6f1ffb06 MA |
144 | if (g_fd < 0) { |
145 | (void) pthread_mutex_unlock(&g_lock); | |
146 | return (errno); | |
147 | } | |
148 | } | |
149 | g_refcount++; | |
b83a0e2d DB |
150 | |
151 | #ifdef ZFS_DEBUG | |
152 | libzfs_core_debug_ioc(); | |
153 | #endif | |
6f1ffb06 MA |
154 | (void) pthread_mutex_unlock(&g_lock); |
155 | return (0); | |
156 | } | |
157 | ||
158 | void | |
159 | libzfs_core_fini(void) | |
160 | { | |
161 | (void) pthread_mutex_lock(&g_lock); | |
162 | ASSERT3S(g_refcount, >, 0); | |
e2454897 | 163 | |
a0cb347c | 164 | g_refcount--; |
e2454897 GM |
165 | |
166 | if (g_refcount == 0 && g_fd != -1) { | |
6f1ffb06 | 167 | (void) close(g_fd); |
e2454897 GM |
168 | g_fd = -1; |
169 | } | |
6f1ffb06 MA |
170 | (void) pthread_mutex_unlock(&g_lock); |
171 | } | |
172 | ||
173 | static int | |
174 | lzc_ioctl(zfs_ioc_t ioc, const char *name, | |
175 | nvlist_t *source, nvlist_t **resultp) | |
176 | { | |
13fe0198 | 177 | zfs_cmd_t zc = {"\0"}; |
6f1ffb06 | 178 | int error = 0; |
bec1067d AP |
179 | char *packed = NULL; |
180 | size_t size = 0; | |
6f1ffb06 MA |
181 | |
182 | ASSERT3S(g_refcount, >, 0); | |
e2454897 | 183 | VERIFY3S(g_fd, !=, -1); |
6f1ffb06 | 184 | |
b83a0e2d DB |
185 | #ifdef ZFS_DEBUG |
186 | if (ioc == fail_ioc_cmd) | |
187 | return (fail_ioc_err); | |
188 | #endif | |
189 | ||
bec1067d AP |
190 | if (name != NULL) |
191 | (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name)); | |
6f1ffb06 | 192 | |
bec1067d AP |
193 | if (source != NULL) { |
194 | packed = fnvlist_pack(source, &size); | |
195 | zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed; | |
196 | zc.zc_nvlist_src_size = size; | |
197 | } | |
6f1ffb06 MA |
198 | |
199 | if (resultp != NULL) { | |
13fe0198 | 200 | *resultp = NULL; |
234c91c5 CW |
201 | if (ioc == ZFS_IOC_CHANNEL_PROGRAM) { |
202 | zc.zc_nvlist_dst_size = fnvlist_lookup_uint64(source, | |
203 | ZCP_ARG_MEMLIMIT); | |
204 | } else { | |
205 | zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024); | |
206 | } | |
6f1ffb06 MA |
207 | zc.zc_nvlist_dst = (uint64_t)(uintptr_t) |
208 | malloc(zc.zc_nvlist_dst_size); | |
209 | if (zc.zc_nvlist_dst == (uint64_t)0) { | |
210 | error = ENOMEM; | |
211 | goto out; | |
212 | } | |
213 | } | |
214 | ||
514498fe | 215 | while (lzc_ioctl_fd(g_fd, ioc, &zc) != 0) { |
d99a0153 CW |
216 | /* |
217 | * If ioctl exited with ENOMEM, we retry the ioctl after | |
218 | * increasing the size of the destination nvlist. | |
219 | * | |
234c91c5 | 220 | * Channel programs that exit with ENOMEM ran over the |
d99a0153 CW |
221 | * lua memory sandbox; they should not be retried. |
222 | */ | |
223 | if (errno == ENOMEM && resultp != NULL && | |
224 | ioc != ZFS_IOC_CHANNEL_PROGRAM) { | |
6f1ffb06 MA |
225 | free((void *)(uintptr_t)zc.zc_nvlist_dst); |
226 | zc.zc_nvlist_dst_size *= 2; | |
227 | zc.zc_nvlist_dst = (uint64_t)(uintptr_t) | |
228 | malloc(zc.zc_nvlist_dst_size); | |
229 | if (zc.zc_nvlist_dst == (uint64_t)0) { | |
230 | error = ENOMEM; | |
231 | goto out; | |
232 | } | |
233 | } else { | |
234 | error = errno; | |
235 | break; | |
236 | } | |
237 | } | |
19516b69 | 238 | if (zc.zc_nvlist_dst_filled && resultp != NULL) { |
6f1ffb06 MA |
239 | *resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst, |
240 | zc.zc_nvlist_dst_size); | |
6f1ffb06 MA |
241 | } |
242 | ||
243 | out: | |
b5256303 TC |
244 | if (packed != NULL) |
245 | fnvlist_pack_free(packed, size); | |
6f1ffb06 MA |
246 | free((void *)(uintptr_t)zc.zc_nvlist_dst); |
247 | return (error); | |
248 | } | |
249 | ||
482eeef8 GA |
250 | int |
251 | lzc_scrub(zfs_ioc_t ioc, const char *name, | |
252 | nvlist_t *source, nvlist_t **resultp) | |
253 | { | |
254 | return (lzc_ioctl(ioc, name, source, resultp)); | |
255 | } | |
256 | ||
6f1ffb06 | 257 | int |
b5256303 TC |
258 | lzc_create(const char *fsname, enum lzc_dataset_type type, nvlist_t *props, |
259 | uint8_t *wkeydata, uint_t wkeylen) | |
6f1ffb06 MA |
260 | { |
261 | int error; | |
b5256303 | 262 | nvlist_t *hidden_args = NULL; |
6f1ffb06 | 263 | nvlist_t *args = fnvlist_alloc(); |
b5256303 | 264 | |
e67a7ffb | 265 | fnvlist_add_int32(args, "type", (dmu_objset_type_t)type); |
6f1ffb06 MA |
266 | if (props != NULL) |
267 | fnvlist_add_nvlist(args, "props", props); | |
b5256303 TC |
268 | |
269 | if (wkeydata != NULL) { | |
270 | hidden_args = fnvlist_alloc(); | |
271 | fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata, | |
272 | wkeylen); | |
273 | fnvlist_add_nvlist(args, ZPOOL_HIDDEN_ARGS, hidden_args); | |
274 | } | |
275 | ||
6f1ffb06 | 276 | error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL); |
b5256303 | 277 | nvlist_free(hidden_args); |
6f1ffb06 MA |
278 | nvlist_free(args); |
279 | return (error); | |
280 | } | |
281 | ||
282 | int | |
b5256303 | 283 | lzc_clone(const char *fsname, const char *origin, nvlist_t *props) |
6f1ffb06 MA |
284 | { |
285 | int error; | |
b5256303 | 286 | nvlist_t *hidden_args = NULL; |
6f1ffb06 | 287 | nvlist_t *args = fnvlist_alloc(); |
b5256303 | 288 | |
6f1ffb06 MA |
289 | fnvlist_add_string(args, "origin", origin); |
290 | if (props != NULL) | |
291 | fnvlist_add_nvlist(args, "props", props); | |
292 | error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL); | |
b5256303 | 293 | nvlist_free(hidden_args); |
6f1ffb06 MA |
294 | nvlist_free(args); |
295 | return (error); | |
296 | } | |
297 | ||
d12f91fd GDN |
298 | int |
299 | lzc_promote(const char *fsname, char *snapnamebuf, int snapnamelen) | |
300 | { | |
301 | /* | |
302 | * The promote ioctl is still legacy, so we need to construct our | |
303 | * own zfs_cmd_t rather than using lzc_ioctl(). | |
304 | */ | |
659f4008 | 305 | zfs_cmd_t zc = {"\0"}; |
d12f91fd GDN |
306 | |
307 | ASSERT3S(g_refcount, >, 0); | |
308 | VERIFY3S(g_fd, !=, -1); | |
309 | ||
310 | (void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name)); | |
514498fe | 311 | if (lzc_ioctl_fd(g_fd, ZFS_IOC_PROMOTE, &zc) != 0) { |
d12f91fd GDN |
312 | int error = errno; |
313 | if (error == EEXIST && snapnamebuf != NULL) | |
314 | (void) strlcpy(snapnamebuf, zc.zc_string, snapnamelen); | |
315 | return (error); | |
316 | } | |
317 | return (0); | |
318 | } | |
319 | ||
dc1c630b AG |
320 | int |
321 | lzc_rename(const char *source, const char *target) | |
322 | { | |
659f4008 | 323 | zfs_cmd_t zc = {"\0"}; |
dc1c630b | 324 | int error; |
659f4008 | 325 | |
dc1c630b AG |
326 | ASSERT3S(g_refcount, >, 0); |
327 | VERIFY3S(g_fd, !=, -1); | |
328 | (void) strlcpy(zc.zc_name, source, sizeof (zc.zc_name)); | |
329 | (void) strlcpy(zc.zc_value, target, sizeof (zc.zc_value)); | |
514498fe | 330 | error = lzc_ioctl_fd(g_fd, ZFS_IOC_RENAME, &zc); |
dc1c630b AG |
331 | if (error != 0) |
332 | error = errno; | |
333 | return (error); | |
334 | } | |
e1af0d0d | 335 | |
dc1c630b AG |
336 | int |
337 | lzc_destroy(const char *fsname) | |
338 | { | |
339 | int error; | |
340 | nvlist_t *args = fnvlist_alloc(); | |
341 | error = lzc_ioctl(ZFS_IOC_DESTROY, fsname, args, NULL); | |
342 | nvlist_free(args); | |
343 | return (error); | |
344 | } | |
345 | ||
6f1ffb06 MA |
346 | /* |
347 | * Creates snapshots. | |
348 | * | |
349 | * The keys in the snaps nvlist are the snapshots to be created. | |
350 | * They must all be in the same pool. | |
351 | * | |
352 | * The props nvlist is properties to set. Currently only user properties | |
353 | * are supported. { user:prop_name -> string value } | |
354 | * | |
355 | * The returned results nvlist will have an entry for each snapshot that failed. | |
356 | * The value will be the (int32) error code. | |
357 | * | |
358 | * The return value will be 0 if all snapshots were created, otherwise it will | |
13fe0198 | 359 | * be the errno of a (unspecified) snapshot that failed. |
6f1ffb06 MA |
360 | */ |
361 | int | |
362 | lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist) | |
363 | { | |
364 | nvpair_t *elem; | |
365 | nvlist_t *args; | |
366 | int error; | |
eca7b760 | 367 | char pool[ZFS_MAX_DATASET_NAME_LEN]; |
6f1ffb06 MA |
368 | |
369 | *errlist = NULL; | |
370 | ||
371 | /* determine the pool name */ | |
372 | elem = nvlist_next_nvpair(snaps, NULL); | |
373 | if (elem == NULL) | |
374 | return (0); | |
375 | (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); | |
376 | pool[strcspn(pool, "/@")] = '\0'; | |
377 | ||
378 | args = fnvlist_alloc(); | |
379 | fnvlist_add_nvlist(args, "snaps", snaps); | |
380 | if (props != NULL) | |
381 | fnvlist_add_nvlist(args, "props", props); | |
382 | ||
383 | error = lzc_ioctl(ZFS_IOC_SNAPSHOT, pool, args, errlist); | |
384 | nvlist_free(args); | |
385 | ||
386 | return (error); | |
387 | } | |
388 | ||
389 | /* | |
390 | * Destroys snapshots. | |
391 | * | |
392 | * The keys in the snaps nvlist are the snapshots to be destroyed. | |
393 | * They must all be in the same pool. | |
394 | * | |
395 | * Snapshots that do not exist will be silently ignored. | |
396 | * | |
397 | * If 'defer' is not set, and a snapshot has user holds or clones, the | |
398 | * destroy operation will fail and none of the snapshots will be | |
399 | * destroyed. | |
400 | * | |
401 | * If 'defer' is set, and a snapshot has user holds or clones, it will be | |
402 | * marked for deferred destruction, and will be destroyed when the last hold | |
403 | * or clone is removed/destroyed. | |
404 | * | |
405 | * The return value will be 0 if all snapshots were destroyed (or marked for | |
1a077756 | 406 | * later destruction if 'defer' is set) or didn't exist to begin with. |
6f1ffb06 | 407 | * |
13fe0198 | 408 | * Otherwise the return value will be the errno of a (unspecified) snapshot |
6f1ffb06 MA |
409 | * that failed, no snapshots will be destroyed, and the errlist will have an |
410 | * entry for each snapshot that failed. The value in the errlist will be | |
411 | * the (int32) error code. | |
412 | */ | |
413 | int | |
414 | lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist) | |
415 | { | |
416 | nvpair_t *elem; | |
417 | nvlist_t *args; | |
418 | int error; | |
eca7b760 | 419 | char pool[ZFS_MAX_DATASET_NAME_LEN]; |
6f1ffb06 MA |
420 | |
421 | /* determine the pool name */ | |
422 | elem = nvlist_next_nvpair(snaps, NULL); | |
423 | if (elem == NULL) | |
424 | return (0); | |
425 | (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); | |
426 | pool[strcspn(pool, "/@")] = '\0'; | |
427 | ||
428 | args = fnvlist_alloc(); | |
429 | fnvlist_add_nvlist(args, "snaps", snaps); | |
430 | if (defer) | |
431 | fnvlist_add_boolean(args, "defer"); | |
432 | ||
433 | error = lzc_ioctl(ZFS_IOC_DESTROY_SNAPS, pool, args, errlist); | |
434 | nvlist_free(args); | |
435 | ||
436 | return (error); | |
6f1ffb06 MA |
437 | } |
438 | ||
439 | int | |
440 | lzc_snaprange_space(const char *firstsnap, const char *lastsnap, | |
441 | uint64_t *usedp) | |
442 | { | |
443 | nvlist_t *args; | |
444 | nvlist_t *result; | |
445 | int err; | |
eca7b760 | 446 | char fs[ZFS_MAX_DATASET_NAME_LEN]; |
6f1ffb06 MA |
447 | char *atp; |
448 | ||
449 | /* determine the fs name */ | |
450 | (void) strlcpy(fs, firstsnap, sizeof (fs)); | |
451 | atp = strchr(fs, '@'); | |
452 | if (atp == NULL) | |
453 | return (EINVAL); | |
454 | *atp = '\0'; | |
455 | ||
456 | args = fnvlist_alloc(); | |
457 | fnvlist_add_string(args, "firstsnap", firstsnap); | |
458 | ||
459 | err = lzc_ioctl(ZFS_IOC_SPACE_SNAPS, lastsnap, args, &result); | |
460 | nvlist_free(args); | |
461 | if (err == 0) | |
462 | *usedp = fnvlist_lookup_uint64(result, "used"); | |
463 | fnvlist_free(result); | |
464 | ||
465 | return (err); | |
466 | } | |
467 | ||
468 | boolean_t | |
469 | lzc_exists(const char *dataset) | |
470 | { | |
471 | /* | |
472 | * The objset_stats ioctl is still legacy, so we need to construct our | |
d12f91fd | 473 | * own zfs_cmd_t rather than using lzc_ioctl(). |
6f1ffb06 | 474 | */ |
13fe0198 | 475 | zfs_cmd_t zc = {"\0"}; |
6f1ffb06 | 476 | |
e2454897 GM |
477 | ASSERT3S(g_refcount, >, 0); |
478 | VERIFY3S(g_fd, !=, -1); | |
479 | ||
6f1ffb06 | 480 | (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); |
514498fe | 481 | return (lzc_ioctl_fd(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0); |
6f1ffb06 MA |
482 | } |
483 | ||
bec1067d AP |
484 | /* |
485 | * outnvl is unused. | |
486 | * It was added to preserve the function signature in case it is | |
487 | * needed in the future. | |
488 | */ | |
bec1067d AP |
489 | int |
490 | lzc_sync(const char *pool_name, nvlist_t *innvl, nvlist_t **outnvl) | |
491 | { | |
bfc17897 | 492 | (void) outnvl; |
bec1067d AP |
493 | return (lzc_ioctl(ZFS_IOC_POOL_SYNC, pool_name, innvl, NULL)); |
494 | } | |
495 | ||
13fe0198 MA |
496 | /* |
497 | * Create "user holds" on snapshots. If there is a hold on a snapshot, | |
498 | * the snapshot can not be destroyed. (However, it can be marked for deletion | |
499 | * by lzc_destroy_snaps(defer=B_TRUE).) | |
500 | * | |
501 | * The keys in the nvlist are snapshot names. | |
502 | * The snapshots must all be in the same pool. | |
503 | * The value is the name of the hold (string type). | |
504 | * | |
fb0be12d | 505 | * If cleanup_fd is not -1, it must be the result of open(ZFS_DEV, O_EXCL). |
13fe0198 MA |
506 | * In this case, when the cleanup_fd is closed (including on process |
507 | * termination), the holds will be released. If the system is shut down | |
508 | * uncleanly, the holds will be released when the pool is next opened | |
509 | * or imported. | |
510 | * | |
95fd54a1 | 511 | * Holds for snapshots which don't exist will be skipped and have an entry |
1a077756 | 512 | * added to errlist, but will not cause an overall failure. |
95fd54a1 | 513 | * |
1a077756 | 514 | * The return value will be 0 if all holds, for snapshots that existed, |
b8fce77b | 515 | * were successfully created. |
95fd54a1 SH |
516 | * |
517 | * Otherwise the return value will be the errno of a (unspecified) hold that | |
518 | * failed and no holds will be created. | |
519 | * | |
520 | * In all cases the errlist will have an entry for each hold that failed | |
521 | * (name = snapshot), with its value being the error code (int32). | |
13fe0198 MA |
522 | */ |
523 | int | |
524 | lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist) | |
525 | { | |
eca7b760 | 526 | char pool[ZFS_MAX_DATASET_NAME_LEN]; |
13fe0198 MA |
527 | nvlist_t *args; |
528 | nvpair_t *elem; | |
529 | int error; | |
530 | ||
531 | /* determine the pool name */ | |
532 | elem = nvlist_next_nvpair(holds, NULL); | |
533 | if (elem == NULL) | |
534 | return (0); | |
535 | (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); | |
536 | pool[strcspn(pool, "/@")] = '\0'; | |
537 | ||
538 | args = fnvlist_alloc(); | |
539 | fnvlist_add_nvlist(args, "holds", holds); | |
540 | if (cleanup_fd != -1) | |
541 | fnvlist_add_int32(args, "cleanup_fd", cleanup_fd); | |
542 | ||
543 | error = lzc_ioctl(ZFS_IOC_HOLD, pool, args, errlist); | |
544 | nvlist_free(args); | |
545 | return (error); | |
546 | } | |
547 | ||
548 | /* | |
549 | * Release "user holds" on snapshots. If the snapshot has been marked for | |
550 | * deferred destroy (by lzc_destroy_snaps(defer=B_TRUE)), it does not have | |
551 | * any clones, and all the user holds are removed, then the snapshot will be | |
552 | * destroyed. | |
553 | * | |
554 | * The keys in the nvlist are snapshot names. | |
555 | * The snapshots must all be in the same pool. | |
d5884c34 | 556 | * The value is an nvlist whose keys are the holds to remove. |
13fe0198 | 557 | * |
95fd54a1 | 558 | * Holds which failed to release because they didn't exist will have an entry |
1a077756 | 559 | * added to errlist, but will not cause an overall failure. |
95fd54a1 SH |
560 | * |
561 | * The return value will be 0 if the nvl holds was empty or all holds that | |
1a077756 | 562 | * existed, were successfully removed. |
95fd54a1 SH |
563 | * |
564 | * Otherwise the return value will be the errno of a (unspecified) hold that | |
565 | * failed to release and no holds will be released. | |
566 | * | |
567 | * In all cases the errlist will have an entry for each hold that failed to | |
568 | * to release. | |
13fe0198 MA |
569 | */ |
570 | int | |
571 | lzc_release(nvlist_t *holds, nvlist_t **errlist) | |
572 | { | |
eca7b760 | 573 | char pool[ZFS_MAX_DATASET_NAME_LEN]; |
13fe0198 MA |
574 | nvpair_t *elem; |
575 | ||
576 | /* determine the pool name */ | |
577 | elem = nvlist_next_nvpair(holds, NULL); | |
578 | if (elem == NULL) | |
579 | return (0); | |
580 | (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); | |
581 | pool[strcspn(pool, "/@")] = '\0'; | |
582 | ||
583 | return (lzc_ioctl(ZFS_IOC_RELEASE, pool, holds, errlist)); | |
584 | } | |
585 | ||
586 | /* | |
587 | * Retrieve list of user holds on the specified snapshot. | |
588 | * | |
d5884c34 | 589 | * On success, *holdsp will be set to an nvlist which the caller must free. |
13fe0198 MA |
590 | * The keys are the names of the holds, and the value is the creation time |
591 | * of the hold (uint64) in seconds since the epoch. | |
592 | */ | |
593 | int | |
594 | lzc_get_holds(const char *snapname, nvlist_t **holdsp) | |
595 | { | |
bec1067d | 596 | return (lzc_ioctl(ZFS_IOC_GET_HOLDS, snapname, NULL, holdsp)); |
13fe0198 MA |
597 | } |
598 | ||
fbbea09d | 599 | static unsigned int |
8a3d7735 AZ |
600 | max_pipe_buffer(int infd) |
601 | { | |
602 | #if __linux__ | |
fbbea09d AZ |
603 | static unsigned int max; |
604 | if (max == 0) { | |
605 | max = 1048576; /* fs/pipe.c default */ | |
606 | ||
607 | FILE *procf = fopen("/proc/sys/fs/pipe-max-size", "re"); | |
608 | if (procf != NULL) { | |
609 | if (fscanf(procf, "%u", &max) <= 0) { | |
610 | /* ignore error: max untouched if parse fails */ | |
611 | } | |
612 | fclose(procf); | |
8a3d7735 | 613 | } |
8a3d7735 | 614 | } |
fbbea09d AZ |
615 | |
616 | unsigned int cur = fcntl(infd, F_GETPIPE_SZ); | |
a30927f7 RE |
617 | /* |
618 | * Sadly, Linux has an unfixed deadlock if you do SETPIPE_SZ on a pipe | |
619 | * with data in it. | |
620 | * cf. #13232, https://bugzilla.kernel.org/show_bug.cgi?id=212295 | |
621 | * | |
622 | * And since the problem is in waking up the writer, there's nothing | |
623 | * we can do about it from here. | |
624 | * | |
625 | * So if people want to, they can set this, but they | |
626 | * may regret it... | |
627 | */ | |
628 | if (getenv("ZFS_SET_PIPE_MAX") == NULL) | |
629 | return (cur); | |
fbbea09d AZ |
630 | if (cur < max && fcntl(infd, F_SETPIPE_SZ, max) != -1) |
631 | cur = max; | |
632 | return (cur); | |
8a3d7735 AZ |
633 | #else |
634 | /* FreeBSD automatically resizes */ | |
635 | (void) infd; | |
fbbea09d | 636 | return (BIG_PIPE_SIZE); |
8a3d7735 AZ |
637 | #endif |
638 | } | |
639 | ||
3a909fe3 AZ |
640 | #if __linux__ |
641 | struct send_worker_ctx { | |
642 | int from; /* read end of pipe, with send data; closed on exit */ | |
643 | int to; /* original arbitrary output fd; mustn't be a pipe */ | |
644 | }; | |
645 | ||
646 | static void * | |
647 | send_worker(void *arg) | |
648 | { | |
649 | struct send_worker_ctx *ctx = arg; | |
650 | unsigned int bufsiz = max_pipe_buffer(ctx->from); | |
651 | ssize_t rd; | |
652 | ||
6d9bc3ec PD |
653 | for (;;) { |
654 | rd = splice(ctx->from, NULL, ctx->to, NULL, bufsiz, | |
655 | SPLICE_F_MOVE | SPLICE_F_MORE); | |
656 | if ((rd == -1 && errno != EINTR) || rd == 0) | |
657 | break; | |
658 | } | |
3a909fe3 AZ |
659 | int err = (rd == -1) ? errno : 0; |
660 | close(ctx->from); | |
661 | return ((void *)(uintptr_t)err); | |
662 | } | |
663 | #endif | |
664 | ||
665 | /* | |
666 | * Since Linux 5.10, 4d03e3cc59828c82ee89ea6e27a2f3cdf95aaadf | |
667 | * ("fs: don't allow kernel reads and writes without iter ops"), | |
668 | * ZFS_IOC_SEND* will EINVAL when writing to /dev/null, /dev/zero, &c. | |
669 | * | |
670 | * This wrapper transparently executes func() with a pipe | |
671 | * by spawning a thread to copy from that pipe to the original output | |
672 | * in the background. | |
673 | * | |
674 | * Returns the error from func(), if nonzero, | |
675 | * otherwise the error from the thread. | |
676 | * | |
a86e0894 AZ |
677 | * No-op if orig_fd is -1, already a pipe (but the buffer size is bumped), |
678 | * and on not-Linux; as such, it is safe to wrap/call wrapped functions | |
679 | * in a wrapped context. | |
3a909fe3 AZ |
680 | */ |
681 | int | |
682 | lzc_send_wrapper(int (*func)(int, void *), int orig_fd, void *data) | |
683 | { | |
684 | #if __linux__ | |
685 | struct stat sb; | |
686 | if (orig_fd != -1 && fstat(orig_fd, &sb) == -1) | |
687 | return (errno); | |
a86e0894 AZ |
688 | if (orig_fd == -1 || S_ISFIFO(sb.st_mode)) { |
689 | if (orig_fd != -1) | |
690 | (void) max_pipe_buffer(orig_fd); | |
3a909fe3 | 691 | return (func(orig_fd, data)); |
a86e0894 | 692 | } |
3a909fe3 AZ |
693 | if ((fcntl(orig_fd, F_GETFL) & O_ACCMODE) == O_RDONLY) |
694 | return (errno = EBADF); | |
695 | ||
696 | int rw[2]; | |
697 | if (pipe2(rw, O_CLOEXEC) == -1) | |
698 | return (errno); | |
699 | ||
700 | int err; | |
701 | pthread_t send_thread; | |
702 | struct send_worker_ctx ctx = {.from = rw[0], .to = orig_fd}; | |
703 | if ((err = pthread_create(&send_thread, NULL, send_worker, &ctx)) | |
704 | != 0) { | |
705 | close(rw[0]); | |
706 | close(rw[1]); | |
707 | return (errno = err); | |
708 | } | |
709 | ||
710 | err = func(rw[1], data); | |
711 | ||
712 | void *send_err; | |
713 | close(rw[1]); | |
714 | pthread_join(send_thread, &send_err); | |
715 | if (err == 0 && send_err != 0) | |
716 | errno = err = (uintptr_t)send_err; | |
717 | ||
718 | return (err); | |
719 | #else | |
720 | return (func(orig_fd, data)); | |
721 | #endif | |
722 | } | |
723 | ||
6f1ffb06 | 724 | /* |
9b67f605 MA |
725 | * Generate a zfs send stream for the specified snapshot and write it to |
726 | * the specified file descriptor. | |
da536844 MA |
727 | * |
728 | * "snapname" is the full name of the snapshot to send (e.g. "pool/fs@snap") | |
729 | * | |
730 | * If "from" is NULL, a full (non-incremental) stream will be sent. | |
731 | * If "from" is non-NULL, it must be the full name of a snapshot or | |
732 | * bookmark to send an incremental from (e.g. "pool/fs@earlier_snap" or | |
733 | * "pool/fs#earlier_bmark"). If non-NULL, the specified snapshot or | |
734 | * bookmark must represent an earlier point in the history of "snapname"). | |
735 | * It can be an earlier snapshot in the same filesystem or zvol as "snapname", | |
736 | * or it can be the origin of "snapname"'s filesystem, or an earlier | |
737 | * snapshot in the origin, etc. | |
738 | * | |
739 | * "fd" is the file descriptor to write the send stream to. | |
9b67f605 | 740 | * |
f1512ee6 MA |
741 | * If "flags" contains LZC_SEND_FLAG_LARGE_BLOCK, the stream is permitted |
742 | * to contain DRR_WRITE records with drr_length > 128K, and DRR_OBJECT | |
743 | * records with drr_blksz > 128K. | |
744 | * | |
9b67f605 MA |
745 | * If "flags" contains LZC_SEND_FLAG_EMBED_DATA, the stream is permitted |
746 | * to contain DRR_WRITE_EMBEDDED records with drr_etype==BP_EMBEDDED_TYPE_DATA, | |
747 | * which the receiving system must support (as indicated by support | |
748 | * for the "embedded_data" feature). | |
85ce3f4f | 749 | * |
750 | * If "flags" contains LZC_SEND_FLAG_COMPRESS, the stream is generated by using | |
751 | * compressed WRITE records for blocks which are compressed on disk and in | |
752 | * memory. If the lz4_compress feature is active on the sending system, then | |
753 | * the receiving system must have that feature enabled as well. | |
754 | * | |
755 | * If "flags" contains LZC_SEND_FLAG_RAW, the stream is generated, for encrypted | |
756 | * datasets, by sending data exactly as it exists on disk. This allows backups | |
757 | * to be taken even if encryption keys are not currently loaded. | |
6f1ffb06 MA |
758 | */ |
759 | int | |
9b67f605 MA |
760 | lzc_send(const char *snapname, const char *from, int fd, |
761 | enum lzc_send_flags flags) | |
47dfff3b | 762 | { |
30af21b0 PD |
763 | return (lzc_send_resume_redacted(snapname, from, fd, flags, 0, 0, |
764 | NULL)); | |
765 | } | |
766 | ||
767 | int | |
768 | lzc_send_redacted(const char *snapname, const char *from, int fd, | |
769 | enum lzc_send_flags flags, const char *redactbook) | |
770 | { | |
771 | return (lzc_send_resume_redacted(snapname, from, fd, flags, 0, 0, | |
772 | redactbook)); | |
47dfff3b MA |
773 | } |
774 | ||
775 | int | |
776 | lzc_send_resume(const char *snapname, const char *from, int fd, | |
777 | enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff) | |
30af21b0 PD |
778 | { |
779 | return (lzc_send_resume_redacted(snapname, from, fd, flags, resumeobj, | |
780 | resumeoff, NULL)); | |
781 | } | |
782 | ||
783 | /* | |
784 | * snapname: The name of the "tosnap", or the snapshot whose contents we are | |
785 | * sending. | |
786 | * from: The name of the "fromsnap", or the incremental source. | |
787 | * fd: File descriptor to write the stream to. | |
788 | * flags: flags that determine features to be used by the stream. | |
789 | * resumeobj: Object to resume from, for resuming send | |
790 | * resumeoff: Offset to resume from, for resuming send. | |
791 | * redactnv: nvlist of string -> boolean(ignored) containing the names of all | |
792 | * the snapshots that we should redact with respect to. | |
793 | * redactbook: Name of the redaction bookmark to create. | |
3a909fe3 AZ |
794 | * |
795 | * Pre-wrapped. | |
30af21b0 | 796 | */ |
3a909fe3 AZ |
797 | static int |
798 | lzc_send_resume_redacted_cb_impl(const char *snapname, const char *from, int fd, | |
30af21b0 PD |
799 | enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff, |
800 | const char *redactbook) | |
6f1ffb06 MA |
801 | { |
802 | nvlist_t *args; | |
803 | int err; | |
804 | ||
805 | args = fnvlist_alloc(); | |
806 | fnvlist_add_int32(args, "fd", fd); | |
da536844 MA |
807 | if (from != NULL) |
808 | fnvlist_add_string(args, "fromsnap", from); | |
f1512ee6 MA |
809 | if (flags & LZC_SEND_FLAG_LARGE_BLOCK) |
810 | fnvlist_add_boolean(args, "largeblockok"); | |
9b67f605 MA |
811 | if (flags & LZC_SEND_FLAG_EMBED_DATA) |
812 | fnvlist_add_boolean(args, "embedok"); | |
a7004725 DK |
813 | if (flags & LZC_SEND_FLAG_COMPRESS) |
814 | fnvlist_add_boolean(args, "compressok"); | |
b5256303 TC |
815 | if (flags & LZC_SEND_FLAG_RAW) |
816 | fnvlist_add_boolean(args, "rawok"); | |
ba0ba69e TC |
817 | if (flags & LZC_SEND_FLAG_SAVED) |
818 | fnvlist_add_boolean(args, "savedok"); | |
47dfff3b MA |
819 | if (resumeobj != 0 || resumeoff != 0) { |
820 | fnvlist_add_uint64(args, "resume_object", resumeobj); | |
821 | fnvlist_add_uint64(args, "resume_offset", resumeoff); | |
822 | } | |
30af21b0 PD |
823 | if (redactbook != NULL) |
824 | fnvlist_add_string(args, "redactbook", redactbook); | |
825 | ||
6f1ffb06 MA |
826 | err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL); |
827 | nvlist_free(args); | |
828 | return (err); | |
829 | } | |
830 | ||
3a909fe3 AZ |
831 | struct lzc_send_resume_redacted { |
832 | const char *snapname; | |
833 | const char *from; | |
834 | enum lzc_send_flags flags; | |
835 | uint64_t resumeobj; | |
836 | uint64_t resumeoff; | |
837 | const char *redactbook; | |
838 | }; | |
839 | ||
840 | static int | |
841 | lzc_send_resume_redacted_cb(int fd, void *arg) | |
842 | { | |
843 | struct lzc_send_resume_redacted *zsrr = arg; | |
844 | return (lzc_send_resume_redacted_cb_impl(zsrr->snapname, zsrr->from, | |
845 | fd, zsrr->flags, zsrr->resumeobj, zsrr->resumeoff, | |
846 | zsrr->redactbook)); | |
847 | } | |
848 | ||
849 | int | |
850 | lzc_send_resume_redacted(const char *snapname, const char *from, int fd, | |
851 | enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff, | |
852 | const char *redactbook) | |
853 | { | |
854 | struct lzc_send_resume_redacted zsrr = { | |
855 | .snapname = snapname, | |
856 | .from = from, | |
857 | .flags = flags, | |
858 | .resumeobj = resumeobj, | |
859 | .resumeoff = resumeoff, | |
860 | .redactbook = redactbook, | |
861 | }; | |
862 | return (lzc_send_wrapper(lzc_send_resume_redacted_cb, fd, &zsrr)); | |
863 | } | |
864 | ||
6f1ffb06 | 865 | /* |
5dc8b736 MG |
866 | * "from" can be NULL, a snapshot, or a bookmark. |
867 | * | |
868 | * If from is NULL, a full (non-incremental) stream will be estimated. This | |
869 | * is calculated very efficiently. | |
870 | * | |
871 | * If from is a snapshot, lzc_send_space uses the deadlists attached to | |
872 | * each snapshot to efficiently estimate the stream size. | |
873 | * | |
874 | * If from is a bookmark, the indirect blocks in the destination snapshot | |
875 | * are traversed, looking for blocks with a birth time since the creation TXG of | |
876 | * the snapshot this bookmark was created from. This will result in | |
877 | * significantly more I/O and be less efficient than a send space estimation on | |
30af21b0 PD |
878 | * an equivalent snapshot. This process is also used if redact_snaps is |
879 | * non-null. | |
3a909fe3 AZ |
880 | * |
881 | * Pre-wrapped. | |
6f1ffb06 | 882 | */ |
3a909fe3 AZ |
883 | static int |
884 | lzc_send_space_resume_redacted_cb_impl(const char *snapname, const char *from, | |
30af21b0 PD |
885 | enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff, |
886 | uint64_t resume_bytes, const char *redactbook, int fd, uint64_t *spacep) | |
6f1ffb06 MA |
887 | { |
888 | nvlist_t *args; | |
889 | nvlist_t *result; | |
890 | int err; | |
891 | ||
892 | args = fnvlist_alloc(); | |
5dc8b736 MG |
893 | if (from != NULL) |
894 | fnvlist_add_string(args, "from", from); | |
2aa34383 DK |
895 | if (flags & LZC_SEND_FLAG_LARGE_BLOCK) |
896 | fnvlist_add_boolean(args, "largeblockok"); | |
897 | if (flags & LZC_SEND_FLAG_EMBED_DATA) | |
898 | fnvlist_add_boolean(args, "embedok"); | |
899 | if (flags & LZC_SEND_FLAG_COMPRESS) | |
900 | fnvlist_add_boolean(args, "compressok"); | |
cf7684bc | 901 | if (flags & LZC_SEND_FLAG_RAW) |
902 | fnvlist_add_boolean(args, "rawok"); | |
30af21b0 PD |
903 | if (resumeobj != 0 || resumeoff != 0) { |
904 | fnvlist_add_uint64(args, "resume_object", resumeobj); | |
905 | fnvlist_add_uint64(args, "resume_offset", resumeoff); | |
906 | fnvlist_add_uint64(args, "bytes", resume_bytes); | |
907 | } | |
908 | if (redactbook != NULL) | |
909 | fnvlist_add_string(args, "redactbook", redactbook); | |
910 | if (fd != -1) | |
911 | fnvlist_add_int32(args, "fd", fd); | |
912 | ||
6f1ffb06 MA |
913 | err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result); |
914 | nvlist_free(args); | |
915 | if (err == 0) | |
916 | *spacep = fnvlist_lookup_uint64(result, "space"); | |
917 | nvlist_free(result); | |
918 | return (err); | |
919 | } | |
920 | ||
3a909fe3 AZ |
921 | struct lzc_send_space_resume_redacted { |
922 | const char *snapname; | |
923 | const char *from; | |
924 | enum lzc_send_flags flags; | |
925 | uint64_t resumeobj; | |
926 | uint64_t resumeoff; | |
927 | uint64_t resume_bytes; | |
928 | const char *redactbook; | |
929 | uint64_t *spacep; | |
930 | }; | |
931 | ||
932 | static int | |
933 | lzc_send_space_resume_redacted_cb(int fd, void *arg) | |
934 | { | |
935 | struct lzc_send_space_resume_redacted *zssrr = arg; | |
936 | return (lzc_send_space_resume_redacted_cb_impl(zssrr->snapname, | |
937 | zssrr->from, zssrr->flags, zssrr->resumeobj, zssrr->resumeoff, | |
938 | zssrr->resume_bytes, zssrr->redactbook, fd, zssrr->spacep)); | |
939 | } | |
940 | ||
941 | int | |
942 | lzc_send_space_resume_redacted(const char *snapname, const char *from, | |
943 | enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff, | |
944 | uint64_t resume_bytes, const char *redactbook, int fd, uint64_t *spacep) | |
945 | { | |
946 | struct lzc_send_space_resume_redacted zssrr = { | |
947 | .snapname = snapname, | |
948 | .from = from, | |
949 | .flags = flags, | |
950 | .resumeobj = resumeobj, | |
951 | .resumeoff = resumeoff, | |
952 | .resume_bytes = resume_bytes, | |
953 | .redactbook = redactbook, | |
954 | .spacep = spacep, | |
955 | }; | |
956 | return (lzc_send_wrapper(lzc_send_space_resume_redacted_cb, | |
957 | fd, &zssrr)); | |
958 | } | |
959 | ||
30af21b0 PD |
960 | int |
961 | lzc_send_space(const char *snapname, const char *from, | |
962 | enum lzc_send_flags flags, uint64_t *spacep) | |
963 | { | |
964 | return (lzc_send_space_resume_redacted(snapname, from, flags, 0, 0, 0, | |
965 | NULL, -1, spacep)); | |
966 | } | |
967 | ||
6f1ffb06 MA |
968 | static int |
969 | recv_read(int fd, void *buf, int ilen) | |
970 | { | |
971 | char *cp = buf; | |
972 | int rv; | |
973 | int len = ilen; | |
974 | ||
975 | do { | |
976 | rv = read(fd, cp, len); | |
977 | cp += rv; | |
978 | len -= rv; | |
979 | } while (rv > 0); | |
980 | ||
981 | if (rv < 0 || len != 0) | |
982 | return (EIO); | |
983 | ||
984 | return (0); | |
985 | } | |
986 | ||
43e52edd | 987 | /* |
b5256303 TC |
988 | * Linux adds ZFS_IOC_RECV_NEW for resumable and raw streams and preserves the |
989 | * legacy ZFS_IOC_RECV user/kernel interface. The new interface supports all | |
990 | * stream options but is currently only used for resumable streams. This way | |
991 | * updated user space utilities will interoperate with older kernel modules. | |
43e52edd BB |
992 | * |
993 | * Non-Linux OpenZFS platforms have opted to modify the legacy interface. | |
994 | */ | |
47dfff3b | 995 | static int |
a3eeab2d | 996 | recv_impl(const char *snapname, nvlist_t *recvdprops, nvlist_t *localprops, |
d9c460a0 | 997 | uint8_t *wkeydata, uint_t wkeylen, const char *origin, boolean_t force, |
e8cf3a4f | 998 | boolean_t heal, boolean_t resumable, boolean_t raw, int input_fd, |
196bee4c MA |
999 | const dmu_replay_record_t *begin_record, uint64_t *read_bytes, |
1000 | uint64_t *errflags, nvlist_t **errors) | |
6f1ffb06 | 1001 | { |
43e52edd BB |
1002 | dmu_replay_record_t drr; |
1003 | char fsname[MAXPATHLEN]; | |
6f1ffb06 | 1004 | char *atp; |
6f1ffb06 | 1005 | int error; |
30af21b0 | 1006 | boolean_t payload = B_FALSE; |
6f1ffb06 | 1007 | |
e2454897 GM |
1008 | ASSERT3S(g_refcount, >, 0); |
1009 | VERIFY3S(g_fd, !=, -1); | |
1010 | ||
43e52edd BB |
1011 | /* Set 'fsname' to the name of containing filesystem */ |
1012 | (void) strlcpy(fsname, snapname, sizeof (fsname)); | |
1013 | atp = strchr(fsname, '@'); | |
6f1ffb06 MA |
1014 | if (atp == NULL) |
1015 | return (EINVAL); | |
1016 | *atp = '\0'; | |
1017 | ||
43e52edd BB |
1018 | /* If the fs does not exist, try its parent. */ |
1019 | if (!lzc_exists(fsname)) { | |
1020 | char *slashp = strrchr(fsname, '/'); | |
6f1ffb06 MA |
1021 | if (slashp == NULL) |
1022 | return (ENOENT); | |
1023 | *slashp = '\0'; | |
43e52edd | 1024 | } |
6f1ffb06 | 1025 | |
8a3d7735 AZ |
1026 | /* |
1027 | * It is not uncommon for gigabytes to be processed by zfs receive. | |
1028 | * Speculatively increase the buffer size if supported by the platform. | |
1029 | */ | |
1030 | struct stat sb; | |
1031 | if (fstat(input_fd, &sb) == -1) | |
1032 | return (errno); | |
1033 | if (S_ISFIFO(sb.st_mode)) | |
fbbea09d | 1034 | (void) max_pipe_buffer(input_fd); |
8a3d7735 | 1035 | |
43e52edd BB |
1036 | /* |
1037 | * The begin_record is normally a non-byteswapped BEGIN record. | |
1038 | * For resumable streams it may be set to any non-byteswapped | |
1039 | * dmu_replay_record_t. | |
1040 | */ | |
1041 | if (begin_record == NULL) { | |
1042 | error = recv_read(input_fd, &drr, sizeof (drr)); | |
1043 | if (error != 0) | |
1044 | return (error); | |
1045 | } else { | |
1046 | drr = *begin_record; | |
30af21b0 | 1047 | payload = (begin_record->drr_payloadlen != 0); |
6f1ffb06 MA |
1048 | } |
1049 | ||
d9c460a0 | 1050 | /* |
78595377 | 1051 | * All receives with a payload should use the new interface. |
d9c460a0 | 1052 | */ |
e8cf3a4f | 1053 | if (resumable || heal || raw || wkeydata != NULL || payload) { |
43e52edd BB |
1054 | nvlist_t *outnvl = NULL; |
1055 | nvlist_t *innvl = fnvlist_alloc(); | |
6f1ffb06 | 1056 | |
43e52edd | 1057 | fnvlist_add_string(innvl, "snapname", snapname); |
6f1ffb06 | 1058 | |
a3eeab2d | 1059 | if (recvdprops != NULL) |
1060 | fnvlist_add_nvlist(innvl, "props", recvdprops); | |
1061 | ||
1062 | if (localprops != NULL) | |
1063 | fnvlist_add_nvlist(innvl, "localprops", localprops); | |
6f1ffb06 | 1064 | |
d9c460a0 TC |
1065 | if (wkeydata != NULL) { |
1066 | /* | |
1067 | * wkeydata must be placed in the special | |
1068 | * ZPOOL_HIDDEN_ARGS nvlist so that it | |
1069 | * will not be printed to the zpool history. | |
1070 | */ | |
1071 | nvlist_t *hidden_args = fnvlist_alloc(); | |
1072 | fnvlist_add_uint8_array(hidden_args, "wkeydata", | |
1073 | wkeydata, wkeylen); | |
1074 | fnvlist_add_nvlist(innvl, ZPOOL_HIDDEN_ARGS, | |
1075 | hidden_args); | |
1076 | nvlist_free(hidden_args); | |
1077 | } | |
1078 | ||
43e52edd BB |
1079 | if (origin != NULL && strlen(origin)) |
1080 | fnvlist_add_string(innvl, "origin", origin); | |
1081 | ||
1082 | fnvlist_add_byte_array(innvl, "begin_record", | |
02730c33 | 1083 | (uchar_t *)&drr, sizeof (drr)); |
43e52edd BB |
1084 | |
1085 | fnvlist_add_int32(innvl, "input_fd", input_fd); | |
1086 | ||
1087 | if (force) | |
1088 | fnvlist_add_boolean(innvl, "force"); | |
1089 | ||
1090 | if (resumable) | |
1091 | fnvlist_add_boolean(innvl, "resumable"); | |
1092 | ||
e8cf3a4f AP |
1093 | if (heal) |
1094 | fnvlist_add_boolean(innvl, "heal"); | |
43e52edd BB |
1095 | |
1096 | error = lzc_ioctl(ZFS_IOC_RECV_NEW, fsname, innvl, &outnvl); | |
1097 | ||
1098 | if (error == 0 && read_bytes != NULL) | |
1099 | error = nvlist_lookup_uint64(outnvl, "read_bytes", | |
1100 | read_bytes); | |
1101 | ||
1102 | if (error == 0 && errflags != NULL) | |
1103 | error = nvlist_lookup_uint64(outnvl, "error_flags", | |
1104 | errflags); | |
1105 | ||
43e52edd BB |
1106 | if (error == 0 && errors != NULL) { |
1107 | nvlist_t *nvl; | |
1108 | error = nvlist_lookup_nvlist(outnvl, "errors", &nvl); | |
1109 | if (error == 0) | |
1110 | *errors = fnvlist_dup(nvl); | |
1111 | } | |
1112 | ||
1113 | fnvlist_free(innvl); | |
1114 | fnvlist_free(outnvl); | |
fd41e935 | 1115 | } else { |
43e52edd | 1116 | zfs_cmd_t zc = {"\0"}; |
d6df4441 BD |
1117 | char *rp_packed = NULL; |
1118 | char *lp_packed = NULL; | |
43e52edd | 1119 | size_t size; |
6f1ffb06 | 1120 | |
43e52edd | 1121 | ASSERT3S(g_refcount, >, 0); |
6f1ffb06 | 1122 | |
ac4985e4 | 1123 | (void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name)); |
43e52edd | 1124 | (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value)); |
6f1ffb06 | 1125 | |
a3eeab2d | 1126 | if (recvdprops != NULL) { |
d6df4441 BD |
1127 | rp_packed = fnvlist_pack(recvdprops, &size); |
1128 | zc.zc_nvlist_src = (uint64_t)(uintptr_t)rp_packed; | |
43e52edd BB |
1129 | zc.zc_nvlist_src_size = size; |
1130 | } | |
47dfff3b | 1131 | |
a3eeab2d | 1132 | if (localprops != NULL) { |
d6df4441 BD |
1133 | lp_packed = fnvlist_pack(localprops, &size); |
1134 | zc.zc_nvlist_conf = (uint64_t)(uintptr_t)lp_packed; | |
a3eeab2d | 1135 | zc.zc_nvlist_conf_size = size; |
1136 | } | |
1137 | ||
43e52edd BB |
1138 | if (origin != NULL) |
1139 | (void) strlcpy(zc.zc_string, origin, | |
1140 | sizeof (zc.zc_string)); | |
6f1ffb06 | 1141 | |
43e52edd BB |
1142 | ASSERT3S(drr.drr_type, ==, DRR_BEGIN); |
1143 | zc.zc_begin_record = drr.drr_u.drr_begin; | |
1144 | zc.zc_guid = force; | |
1145 | zc.zc_cookie = input_fd; | |
1146 | zc.zc_cleanup_fd = -1; | |
1147 | zc.zc_action_handle = 0; | |
1148 | ||
43e52edd BB |
1149 | zc.zc_nvlist_dst_size = 128 * 1024; |
1150 | zc.zc_nvlist_dst = (uint64_t)(uintptr_t) | |
1151 | malloc(zc.zc_nvlist_dst_size); | |
1152 | ||
514498fe | 1153 | error = lzc_ioctl_fd(g_fd, ZFS_IOC_RECV, &zc); |
43e52edd BB |
1154 | if (error != 0) { |
1155 | error = errno; | |
1156 | } else { | |
1157 | if (read_bytes != NULL) | |
1158 | *read_bytes = zc.zc_cookie; | |
1159 | ||
1160 | if (errflags != NULL) | |
1161 | *errflags = zc.zc_obj; | |
1162 | ||
43e52edd BB |
1163 | if (errors != NULL) |
1164 | VERIFY0(nvlist_unpack( | |
1165 | (void *)(uintptr_t)zc.zc_nvlist_dst, | |
1166 | zc.zc_nvlist_dst_size, errors, KM_SLEEP)); | |
1167 | } | |
1168 | ||
d6df4441 BD |
1169 | if (rp_packed != NULL) |
1170 | fnvlist_pack_free(rp_packed, size); | |
1171 | if (lp_packed != NULL) | |
1172 | fnvlist_pack_free(lp_packed, size); | |
43e52edd BB |
1173 | free((void *)(uintptr_t)zc.zc_nvlist_dst); |
1174 | } | |
6f1ffb06 | 1175 | |
6f1ffb06 MA |
1176 | return (error); |
1177 | } | |
46ba1e59 | 1178 | |
47dfff3b MA |
1179 | /* |
1180 | * The simplest receive case: receive from the specified fd, creating the | |
1181 | * specified snapshot. Apply the specified properties as "received" properties | |
1182 | * (which can be overridden by locally-set properties). If the stream is a | |
1183 | * clone, its origin snapshot must be specified by 'origin'. The 'force' | |
1184 | * flag will cause the target filesystem to be rolled back or destroyed if | |
1185 | * necessary to receive. | |
1186 | * | |
1187 | * Return 0 on success or an errno on failure. | |
1188 | * | |
1189 | * Note: this interface does not work on dedup'd streams | |
1190 | * (those with DMU_BACKUP_FEATURE_DEDUP). | |
1191 | */ | |
1192 | int | |
1193 | lzc_receive(const char *snapname, nvlist_t *props, const char *origin, | |
b5256303 | 1194 | boolean_t force, boolean_t raw, int fd) |
47dfff3b | 1195 | { |
d9c460a0 | 1196 | return (recv_impl(snapname, props, NULL, NULL, 0, origin, force, |
e8cf3a4f | 1197 | B_FALSE, B_FALSE, raw, fd, NULL, NULL, NULL, NULL)); |
47dfff3b MA |
1198 | } |
1199 | ||
1200 | /* | |
1201 | * Like lzc_receive, but if the receive fails due to premature stream | |
1202 | * termination, the intermediate state will be preserved on disk. In this | |
1203 | * case, ECKSUM will be returned. The receive may subsequently be resumed | |
1204 | * with a resuming send stream generated by lzc_send_resume(). | |
1205 | */ | |
1206 | int | |
1207 | lzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin, | |
b5256303 | 1208 | boolean_t force, boolean_t raw, int fd) |
47dfff3b | 1209 | { |
d9c460a0 | 1210 | return (recv_impl(snapname, props, NULL, NULL, 0, origin, force, |
e8cf3a4f | 1211 | B_FALSE, B_TRUE, raw, fd, NULL, NULL, NULL, NULL)); |
fd41e935 BB |
1212 | } |
1213 | ||
1214 | /* | |
1215 | * Like lzc_receive, but allows the caller to read the begin record and then to | |
1216 | * pass it in. That could be useful if the caller wants to derive, for example, | |
1217 | * the snapname or the origin parameters based on the information contained in | |
1218 | * the begin record. | |
1219 | * The begin record must be in its original form as read from the stream, | |
1220 | * in other words, it should not be byteswapped. | |
1221 | * | |
1222 | * The 'resumable' parameter allows to obtain the same behavior as with | |
1223 | * lzc_receive_resumable. | |
1224 | */ | |
1225 | int | |
1226 | lzc_receive_with_header(const char *snapname, nvlist_t *props, | |
b5256303 TC |
1227 | const char *origin, boolean_t force, boolean_t resumable, boolean_t raw, |
1228 | int fd, const dmu_replay_record_t *begin_record) | |
fd41e935 BB |
1229 | { |
1230 | if (begin_record == NULL) | |
1231 | return (EINVAL); | |
b5256303 | 1232 | |
d9c460a0 | 1233 | return (recv_impl(snapname, props, NULL, NULL, 0, origin, force, |
e8cf3a4f | 1234 | B_FALSE, resumable, raw, fd, begin_record, NULL, NULL, NULL)); |
43e52edd BB |
1235 | } |
1236 | ||
1237 | /* | |
1238 | * Like lzc_receive, but allows the caller to pass all supported arguments | |
1239 | * and retrieve all values returned. The only additional input parameter | |
1240 | * is 'cleanup_fd' which is used to set a cleanup-on-exit file descriptor. | |
1241 | * | |
1242 | * The following parameters all provide return values. Several may be set | |
1243 | * in the failure case and will contain additional information. | |
1244 | * | |
1245 | * The 'read_bytes' value will be set to the total number of bytes read. | |
1246 | * | |
1247 | * The 'errflags' value will contain zprop_errflags_t flags which are | |
1248 | * used to describe any failures. | |
1249 | * | |
196bee4c | 1250 | * The 'action_handle' and 'cleanup_fd' are no longer used, and are ignored. |
43e52edd BB |
1251 | * |
1252 | * The 'errors' nvlist contains an entry for each unapplied received | |
1253 | * property. Callers are responsible for freeing this nvlist. | |
1254 | */ | |
e1af0d0d RM |
1255 | int |
1256 | lzc_receive_one(const char *snapname, nvlist_t *props, | |
b5256303 TC |
1257 | const char *origin, boolean_t force, boolean_t resumable, boolean_t raw, |
1258 | int input_fd, const dmu_replay_record_t *begin_record, int cleanup_fd, | |
43e52edd BB |
1259 | uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle, |
1260 | nvlist_t **errors) | |
1261 | { | |
bfc17897 | 1262 | (void) action_handle, (void) cleanup_fd; |
d9c460a0 | 1263 | return (recv_impl(snapname, props, NULL, NULL, 0, origin, force, |
e8cf3a4f | 1264 | B_FALSE, resumable, raw, input_fd, begin_record, |
196bee4c | 1265 | read_bytes, errflags, errors)); |
a3eeab2d | 1266 | } |
1267 | ||
1268 | /* | |
1269 | * Like lzc_receive_one, but allows the caller to pass an additional 'cmdprops' | |
1270 | * argument. | |
1271 | * | |
1272 | * The 'cmdprops' nvlist contains both override ('zfs receive -o') and | |
1273 | * exclude ('zfs receive -x') properties. Callers are responsible for freeing | |
1274 | * this nvlist | |
1275 | */ | |
e1af0d0d RM |
1276 | int |
1277 | lzc_receive_with_cmdprops(const char *snapname, nvlist_t *props, | |
d9c460a0 TC |
1278 | nvlist_t *cmdprops, uint8_t *wkeydata, uint_t wkeylen, const char *origin, |
1279 | boolean_t force, boolean_t resumable, boolean_t raw, int input_fd, | |
b5256303 TC |
1280 | const dmu_replay_record_t *begin_record, int cleanup_fd, |
1281 | uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle, | |
1282 | nvlist_t **errors) | |
a3eeab2d | 1283 | { |
bfc17897 | 1284 | (void) action_handle, (void) cleanup_fd; |
d9c460a0 | 1285 | return (recv_impl(snapname, props, cmdprops, wkeydata, wkeylen, origin, |
e8cf3a4f AP |
1286 | force, B_FALSE, resumable, raw, input_fd, begin_record, |
1287 | read_bytes, errflags, errors)); | |
1288 | } | |
1289 | ||
1290 | /* | |
1291 | * Like lzc_receive_with_cmdprops, but allows the caller to pass an additional | |
1292 | * 'heal' argument. | |
1293 | * | |
1294 | * The heal arguments tells us to heal the provided snapshot using the provided | |
1295 | * send stream | |
1296 | */ | |
1297 | int lzc_receive_with_heal(const char *snapname, nvlist_t *props, | |
1298 | nvlist_t *cmdprops, uint8_t *wkeydata, uint_t wkeylen, const char *origin, | |
1299 | boolean_t force, boolean_t heal, boolean_t resumable, boolean_t raw, | |
1300 | int input_fd, const dmu_replay_record_t *begin_record, int cleanup_fd, | |
1301 | uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle, | |
1302 | nvlist_t **errors) | |
1303 | { | |
1304 | (void) action_handle, (void) cleanup_fd; | |
1305 | return (recv_impl(snapname, props, cmdprops, wkeydata, wkeylen, origin, | |
1306 | force, heal, resumable, raw, input_fd, begin_record, | |
196bee4c | 1307 | read_bytes, errflags, errors)); |
47dfff3b MA |
1308 | } |
1309 | ||
46ba1e59 MA |
1310 | /* |
1311 | * Roll back this filesystem or volume to its most recent snapshot. | |
1312 | * If snapnamebuf is not NULL, it will be filled in with the name | |
1313 | * of the most recent snapshot. | |
8ca78ab0 AG |
1314 | * Note that the latest snapshot may change if a new one is concurrently |
1315 | * created or the current one is destroyed. lzc_rollback_to can be used | |
1316 | * to roll back to a specific latest snapshot. | |
46ba1e59 MA |
1317 | * |
1318 | * Return 0 on success or an errno on failure. | |
1319 | */ | |
1320 | int | |
1321 | lzc_rollback(const char *fsname, char *snapnamebuf, int snapnamelen) | |
1322 | { | |
1323 | nvlist_t *args; | |
1324 | nvlist_t *result; | |
1325 | int err; | |
1326 | ||
1327 | args = fnvlist_alloc(); | |
1328 | err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result); | |
1329 | nvlist_free(args); | |
1330 | if (err == 0 && snapnamebuf != NULL) { | |
1331 | const char *snapname = fnvlist_lookup_string(result, "target"); | |
1332 | (void) strlcpy(snapnamebuf, snapname, snapnamelen); | |
1333 | } | |
bb7ffdaf GM |
1334 | nvlist_free(result); |
1335 | ||
46ba1e59 MA |
1336 | return (err); |
1337 | } | |
da536844 | 1338 | |
8ca78ab0 AG |
1339 | /* |
1340 | * Roll back this filesystem or volume to the specified snapshot, | |
1341 | * if possible. | |
1342 | * | |
1343 | * Return 0 on success or an errno on failure. | |
1344 | */ | |
1345 | int | |
1346 | lzc_rollback_to(const char *fsname, const char *snapname) | |
1347 | { | |
1348 | nvlist_t *args; | |
1349 | nvlist_t *result; | |
1350 | int err; | |
1351 | ||
1352 | args = fnvlist_alloc(); | |
1353 | fnvlist_add_string(args, "target", snapname); | |
1354 | err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result); | |
1355 | nvlist_free(args); | |
1356 | nvlist_free(result); | |
1357 | return (err); | |
1358 | } | |
1359 | ||
da536844 | 1360 | /* |
a73f361f | 1361 | * Creates new bookmarks from existing snapshot or bookmark. |
da536844 | 1362 | * |
a73f361f CS |
1363 | * The bookmarks nvlist maps from the full name of the new bookmark to |
1364 | * the full name of the source snapshot or bookmark. | |
1365 | * All the bookmarks and snapshots must be in the same pool. | |
1366 | * The new bookmarks names must be unique. | |
1367 | * => see function dsl_bookmark_create_nvl_validate | |
da536844 MA |
1368 | * |
1369 | * The returned results nvlist will have an entry for each bookmark that failed. | |
1370 | * The value will be the (int32) error code. | |
1371 | * | |
1372 | * The return value will be 0 if all bookmarks were created, otherwise it will | |
1373 | * be the errno of a (undetermined) bookmarks that failed. | |
1374 | */ | |
1375 | int | |
1376 | lzc_bookmark(nvlist_t *bookmarks, nvlist_t **errlist) | |
1377 | { | |
1378 | nvpair_t *elem; | |
1379 | int error; | |
eca7b760 | 1380 | char pool[ZFS_MAX_DATASET_NAME_LEN]; |
da536844 | 1381 | |
a73f361f | 1382 | /* determine pool name from first bookmark */ |
da536844 MA |
1383 | elem = nvlist_next_nvpair(bookmarks, NULL); |
1384 | if (elem == NULL) | |
1385 | return (0); | |
1386 | (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); | |
1387 | pool[strcspn(pool, "/#")] = '\0'; | |
1388 | ||
1389 | error = lzc_ioctl(ZFS_IOC_BOOKMARK, pool, bookmarks, errlist); | |
1390 | ||
1391 | return (error); | |
1392 | } | |
1393 | ||
1394 | /* | |
1395 | * Retrieve bookmarks. | |
1396 | * | |
1397 | * Retrieve the list of bookmarks for the given file system. The props | |
1398 | * parameter is an nvlist of property names (with no values) that will be | |
1399 | * returned for each bookmark. | |
1400 | * | |
30af21b0 PD |
1401 | * The following are valid properties on bookmarks, most of which are numbers |
1402 | * (represented as uint64 in the nvlist), except redact_snaps, which is a | |
1403 | * uint64 array, and redact_complete, which is a boolean | |
da536844 MA |
1404 | * |
1405 | * "guid" - globally unique identifier of the snapshot it refers to | |
1406 | * "createtxg" - txg when the snapshot it refers to was created | |
1407 | * "creation" - timestamp when the snapshot it refers to was created | |
f00ab3f2 | 1408 | * "ivsetguid" - IVset guid for identifying encrypted snapshots |
30af21b0 PD |
1409 | * "redact_snaps" - list of guids of the redaction snapshots for the specified |
1410 | * bookmark. If the bookmark is not a redaction bookmark, the nvlist will | |
1411 | * not contain an entry for this value. If it is redacted with respect to | |
1412 | * no snapshots, it will contain value -> NULL uint64 array | |
1413 | * "redact_complete" - boolean value; true if the redaction bookmark is | |
1414 | * complete, false otherwise. | |
da536844 MA |
1415 | * |
1416 | * The format of the returned nvlist as follows: | |
1417 | * <short name of bookmark> -> { | |
1418 | * <name of property> -> { | |
1419 | * "value" -> uint64 | |
1420 | * } | |
30af21b0 PD |
1421 | * ... |
1422 | * "redact_snaps" -> { | |
1423 | * "value" -> uint64 array | |
1424 | * } | |
1425 | * "redact_complete" -> { | |
1426 | * "value" -> boolean value | |
1427 | * } | |
da536844 MA |
1428 | * } |
1429 | */ | |
1430 | int | |
1431 | lzc_get_bookmarks(const char *fsname, nvlist_t *props, nvlist_t **bmarks) | |
1432 | { | |
1433 | return (lzc_ioctl(ZFS_IOC_GET_BOOKMARKS, fsname, props, bmarks)); | |
1434 | } | |
1435 | ||
30af21b0 PD |
1436 | /* |
1437 | * Get bookmark properties. | |
1438 | * | |
1439 | * Given a bookmark's full name, retrieve all properties for the bookmark. | |
1440 | * | |
1441 | * The format of the returned property list is as follows: | |
1442 | * { | |
1443 | * <name of property> -> { | |
1444 | * "value" -> uint64 | |
1445 | * } | |
1446 | * ... | |
1447 | * "redact_snaps" -> { | |
1448 | * "value" -> uint64 array | |
1449 | * } | |
1450 | */ | |
1451 | int | |
1452 | lzc_get_bookmark_props(const char *bookmark, nvlist_t **props) | |
1453 | { | |
1454 | int error; | |
1455 | ||
1456 | nvlist_t *innvl = fnvlist_alloc(); | |
1457 | error = lzc_ioctl(ZFS_IOC_GET_BOOKMARK_PROPS, bookmark, innvl, props); | |
1458 | fnvlist_free(innvl); | |
1459 | ||
1460 | return (error); | |
1461 | } | |
1462 | ||
da536844 MA |
1463 | /* |
1464 | * Destroys bookmarks. | |
1465 | * | |
1466 | * The keys in the bmarks nvlist are the bookmarks to be destroyed. | |
1467 | * They must all be in the same pool. Bookmarks are specified as | |
1468 | * <fs>#<bmark>. | |
1469 | * | |
1470 | * Bookmarks that do not exist will be silently ignored. | |
1471 | * | |
1472 | * The return value will be 0 if all bookmarks that existed were destroyed. | |
1473 | * | |
1474 | * Otherwise the return value will be the errno of a (undetermined) bookmark | |
1475 | * that failed, no bookmarks will be destroyed, and the errlist will have an | |
1476 | * entry for each bookmarks that failed. The value in the errlist will be | |
1477 | * the (int32) error code. | |
1478 | */ | |
1479 | int | |
1480 | lzc_destroy_bookmarks(nvlist_t *bmarks, nvlist_t **errlist) | |
1481 | { | |
1482 | nvpair_t *elem; | |
1483 | int error; | |
eca7b760 | 1484 | char pool[ZFS_MAX_DATASET_NAME_LEN]; |
da536844 MA |
1485 | |
1486 | /* determine the pool name */ | |
1487 | elem = nvlist_next_nvpair(bmarks, NULL); | |
1488 | if (elem == NULL) | |
1489 | return (0); | |
1490 | (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); | |
1491 | pool[strcspn(pool, "/#")] = '\0'; | |
1492 | ||
1493 | error = lzc_ioctl(ZFS_IOC_DESTROY_BOOKMARKS, pool, bmarks, errlist); | |
1494 | ||
1495 | return (error); | |
1496 | } | |
b5256303 | 1497 | |
5b72a38d SD |
1498 | static int |
1499 | lzc_channel_program_impl(const char *pool, const char *program, boolean_t sync, | |
1500 | uint64_t instrlimit, uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl) | |
1501 | { | |
1502 | int error; | |
1503 | nvlist_t *args; | |
1504 | ||
1505 | args = fnvlist_alloc(); | |
1506 | fnvlist_add_string(args, ZCP_ARG_PROGRAM, program); | |
1507 | fnvlist_add_nvlist(args, ZCP_ARG_ARGLIST, argnvl); | |
1508 | fnvlist_add_boolean_value(args, ZCP_ARG_SYNC, sync); | |
1509 | fnvlist_add_uint64(args, ZCP_ARG_INSTRLIMIT, instrlimit); | |
1510 | fnvlist_add_uint64(args, ZCP_ARG_MEMLIMIT, memlimit); | |
1511 | error = lzc_ioctl(ZFS_IOC_CHANNEL_PROGRAM, pool, args, outnvl); | |
1512 | fnvlist_free(args); | |
1513 | ||
1514 | return (error); | |
1515 | } | |
1516 | ||
d99a0153 CW |
1517 | /* |
1518 | * Executes a channel program. | |
1519 | * | |
1520 | * If this function returns 0 the channel program was successfully loaded and | |
1521 | * ran without failing. Note that individual commands the channel program ran | |
1522 | * may have failed and the channel program is responsible for reporting such | |
1523 | * errors through outnvl if they are important. | |
1524 | * | |
1525 | * This method may also return: | |
1526 | * | |
1527 | * EINVAL The program contains syntax errors, or an invalid memory or time | |
1528 | * limit was given. No part of the channel program was executed. | |
1529 | * If caused by syntax errors, 'outnvl' contains information about the | |
1530 | * errors. | |
1531 | * | |
1532 | * ECHRNG The program was executed, but encountered a runtime error, such as | |
1533 | * calling a function with incorrect arguments, invoking the error() | |
1534 | * function directly, failing an assert() command, etc. Some portion | |
1535 | * of the channel program may have executed and committed changes. | |
1536 | * Information about the failure can be found in 'outnvl'. | |
1537 | * | |
1538 | * ENOMEM The program fully executed, but the output buffer was not large | |
1539 | * enough to store the returned value. No output is returned through | |
1540 | * 'outnvl'. | |
1541 | * | |
1542 | * ENOSPC The program was terminated because it exceeded its memory usage | |
1543 | * limit. Some portion of the channel program may have executed and | |
1544 | * committed changes to disk. No output is returned through 'outnvl'. | |
1545 | * | |
1546 | * ETIME The program was terminated because it exceeded its Lua instruction | |
1547 | * limit. Some portion of the channel program may have executed and | |
1548 | * committed changes to disk. No output is returned through 'outnvl'. | |
1549 | */ | |
1550 | int | |
1551 | lzc_channel_program(const char *pool, const char *program, uint64_t instrlimit, | |
1552 | uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl) | |
1553 | { | |
5b72a38d SD |
1554 | return (lzc_channel_program_impl(pool, program, B_TRUE, instrlimit, |
1555 | memlimit, argnvl, outnvl)); | |
1556 | } | |
d99a0153 | 1557 | |
d2734cce SD |
1558 | /* |
1559 | * Creates a checkpoint for the specified pool. | |
1560 | * | |
1561 | * If this function returns 0 the pool was successfully checkpointed. | |
1562 | * | |
1563 | * This method may also return: | |
1564 | * | |
1565 | * ZFS_ERR_CHECKPOINT_EXISTS | |
1566 | * The pool already has a checkpoint. A pools can only have one | |
1567 | * checkpoint at most, at any given time. | |
1568 | * | |
1569 | * ZFS_ERR_DISCARDING_CHECKPOINT | |
1570 | * ZFS is in the middle of discarding a checkpoint for this pool. | |
1571 | * The pool can be checkpointed again once the discard is done. | |
1572 | * | |
1573 | * ZFS_DEVRM_IN_PROGRESS | |
1574 | * A vdev is currently being removed. The pool cannot be | |
1575 | * checkpointed until the device removal is done. | |
1576 | * | |
1577 | * ZFS_VDEV_TOO_BIG | |
1578 | * One or more top-level vdevs exceed the maximum vdev size | |
1579 | * supported for this feature. | |
1580 | */ | |
1581 | int | |
1582 | lzc_pool_checkpoint(const char *pool) | |
1583 | { | |
1584 | int error; | |
1585 | ||
1586 | nvlist_t *result = NULL; | |
1587 | nvlist_t *args = fnvlist_alloc(); | |
1588 | ||
1589 | error = lzc_ioctl(ZFS_IOC_POOL_CHECKPOINT, pool, args, &result); | |
1590 | ||
1591 | fnvlist_free(args); | |
1592 | fnvlist_free(result); | |
1593 | ||
1594 | return (error); | |
1595 | } | |
1596 | ||
1597 | /* | |
1598 | * Discard the checkpoint from the specified pool. | |
1599 | * | |
1600 | * If this function returns 0 the checkpoint was successfully discarded. | |
1601 | * | |
1602 | * This method may also return: | |
1603 | * | |
1604 | * ZFS_ERR_NO_CHECKPOINT | |
1605 | * The pool does not have a checkpoint. | |
1606 | * | |
1607 | * ZFS_ERR_DISCARDING_CHECKPOINT | |
1608 | * ZFS is already in the middle of discarding the checkpoint. | |
1609 | */ | |
1610 | int | |
1611 | lzc_pool_checkpoint_discard(const char *pool) | |
1612 | { | |
1613 | int error; | |
1614 | ||
1615 | nvlist_t *result = NULL; | |
1616 | nvlist_t *args = fnvlist_alloc(); | |
1617 | ||
1618 | error = lzc_ioctl(ZFS_IOC_POOL_DISCARD_CHECKPOINT, pool, args, &result); | |
1619 | ||
1620 | fnvlist_free(args); | |
1621 | fnvlist_free(result); | |
1622 | ||
1623 | return (error); | |
1624 | } | |
1625 | ||
5b72a38d SD |
1626 | /* |
1627 | * Executes a read-only channel program. | |
1628 | * | |
1629 | * A read-only channel program works programmatically the same way as a | |
1630 | * normal channel program executed with lzc_channel_program(). The only | |
1631 | * difference is it runs exclusively in open-context and therefore can | |
1632 | * return faster. The downside to that, is that the program cannot change | |
1633 | * on-disk state by calling functions from the zfs.sync submodule. | |
1634 | * | |
1635 | * The return values of this function (and their meaning) are exactly the | |
1636 | * same as the ones described in lzc_channel_program(). | |
1637 | */ | |
1638 | int | |
1639 | lzc_channel_program_nosync(const char *pool, const char *program, | |
1640 | uint64_t timeout, uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl) | |
1641 | { | |
1642 | return (lzc_channel_program_impl(pool, program, B_FALSE, timeout, | |
1643 | memlimit, argnvl, outnvl)); | |
d99a0153 CW |
1644 | } |
1645 | ||
2a673e76 AJ |
1646 | int |
1647 | lzc_get_vdev_prop(const char *poolname, nvlist_t *innvl, nvlist_t **outnvl) | |
1648 | { | |
1649 | return (lzc_ioctl(ZFS_IOC_VDEV_GET_PROPS, poolname, innvl, outnvl)); | |
1650 | } | |
1651 | ||
1652 | int | |
1653 | lzc_set_vdev_prop(const char *poolname, nvlist_t *innvl, nvlist_t **outnvl) | |
1654 | { | |
1655 | return (lzc_ioctl(ZFS_IOC_VDEV_SET_PROPS, poolname, innvl, outnvl)); | |
1656 | } | |
1657 | ||
b5256303 TC |
1658 | /* |
1659 | * Performs key management functions | |
1660 | * | |
85ce3f4f | 1661 | * crypto_cmd should be a value from dcp_cmd_t. If the command specifies to |
1662 | * load or change a wrapping key, the key should be specified in the | |
1663 | * hidden_args nvlist so that it is not logged. | |
b5256303 TC |
1664 | */ |
1665 | int | |
1666 | lzc_load_key(const char *fsname, boolean_t noop, uint8_t *wkeydata, | |
1667 | uint_t wkeylen) | |
1668 | { | |
1669 | int error; | |
1670 | nvlist_t *ioc_args; | |
1671 | nvlist_t *hidden_args; | |
1672 | ||
1673 | if (wkeydata == NULL) | |
1674 | return (EINVAL); | |
1675 | ||
1676 | ioc_args = fnvlist_alloc(); | |
1677 | hidden_args = fnvlist_alloc(); | |
1678 | fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata, wkeylen); | |
1679 | fnvlist_add_nvlist(ioc_args, ZPOOL_HIDDEN_ARGS, hidden_args); | |
1680 | if (noop) | |
1681 | fnvlist_add_boolean(ioc_args, "noop"); | |
1682 | error = lzc_ioctl(ZFS_IOC_LOAD_KEY, fsname, ioc_args, NULL); | |
1683 | nvlist_free(hidden_args); | |
1684 | nvlist_free(ioc_args); | |
1685 | ||
1686 | return (error); | |
1687 | } | |
1688 | ||
1689 | int | |
1690 | lzc_unload_key(const char *fsname) | |
1691 | { | |
1692 | return (lzc_ioctl(ZFS_IOC_UNLOAD_KEY, fsname, NULL, NULL)); | |
1693 | } | |
1694 | ||
1695 | int | |
1696 | lzc_change_key(const char *fsname, uint64_t crypt_cmd, nvlist_t *props, | |
1697 | uint8_t *wkeydata, uint_t wkeylen) | |
1698 | { | |
1699 | int error; | |
1700 | nvlist_t *ioc_args = fnvlist_alloc(); | |
1701 | nvlist_t *hidden_args = NULL; | |
1702 | ||
1703 | fnvlist_add_uint64(ioc_args, "crypt_cmd", crypt_cmd); | |
1704 | ||
1705 | if (wkeydata != NULL) { | |
1706 | hidden_args = fnvlist_alloc(); | |
1707 | fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata, | |
1708 | wkeylen); | |
1709 | fnvlist_add_nvlist(ioc_args, ZPOOL_HIDDEN_ARGS, hidden_args); | |
1710 | } | |
1711 | ||
1712 | if (props != NULL) | |
1713 | fnvlist_add_nvlist(ioc_args, "props", props); | |
1714 | ||
1715 | error = lzc_ioctl(ZFS_IOC_CHANGE_KEY, fsname, ioc_args, NULL); | |
1716 | nvlist_free(hidden_args); | |
1717 | nvlist_free(ioc_args); | |
d99a0153 | 1718 | |
b5256303 TC |
1719 | return (error); |
1720 | } | |
d3f2cd7e AB |
1721 | |
1722 | int | |
1723 | lzc_reopen(const char *pool_name, boolean_t scrub_restart) | |
1724 | { | |
1725 | nvlist_t *args = fnvlist_alloc(); | |
1726 | int error; | |
1727 | ||
1728 | fnvlist_add_boolean_value(args, "scrub_restart", scrub_restart); | |
1729 | ||
1730 | error = lzc_ioctl(ZFS_IOC_POOL_REOPEN, pool_name, args, NULL); | |
1731 | nvlist_free(args); | |
1732 | return (error); | |
1733 | } | |
619f0976 GW |
1734 | |
1735 | /* | |
1736 | * Changes initializing state. | |
1737 | * | |
1738 | * vdevs should be a list of (<key>, guid) where guid is a uint64 vdev GUID. | |
1739 | * The key is ignored. | |
1740 | * | |
1741 | * If there are errors related to vdev arguments, per-vdev errors are returned | |
1742 | * in an nvlist with the key "vdevs". Each error is a (guid, errno) pair where | |
1743 | * guid is stringified with PRIu64, and errno is one of the following as | |
1744 | * an int64_t: | |
1745 | * - ENODEV if the device was not found | |
1746 | * - EINVAL if the devices is not a leaf or is not concrete (e.g. missing) | |
1747 | * - EROFS if the device is not writeable | |
1b939560 BB |
1748 | * - EBUSY start requested but the device is already being either |
1749 | * initialized or trimmed | |
619f0976 GW |
1750 | * - ESRCH cancel/suspend requested but device is not being initialized |
1751 | * | |
1752 | * If the errlist is empty, then return value will be: | |
1753 | * - EINVAL if one or more arguments was invalid | |
1754 | * - Other spa_open failures | |
1755 | * - 0 if the operation succeeded | |
1756 | */ | |
1757 | int | |
1758 | lzc_initialize(const char *poolname, pool_initialize_func_t cmd_type, | |
1759 | nvlist_t *vdevs, nvlist_t **errlist) | |
1760 | { | |
1761 | int error; | |
1b939560 | 1762 | |
619f0976 GW |
1763 | nvlist_t *args = fnvlist_alloc(); |
1764 | fnvlist_add_uint64(args, ZPOOL_INITIALIZE_COMMAND, (uint64_t)cmd_type); | |
1765 | fnvlist_add_nvlist(args, ZPOOL_INITIALIZE_VDEVS, vdevs); | |
1766 | ||
1767 | error = lzc_ioctl(ZFS_IOC_POOL_INITIALIZE, poolname, args, errlist); | |
1768 | ||
1769 | fnvlist_free(args); | |
1770 | ||
1771 | return (error); | |
1772 | } | |
1b939560 BB |
1773 | |
1774 | /* | |
1775 | * Changes TRIM state. | |
1776 | * | |
1777 | * vdevs should be a list of (<key>, guid) where guid is a uint64 vdev GUID. | |
1778 | * The key is ignored. | |
1779 | * | |
1780 | * If there are errors related to vdev arguments, per-vdev errors are returned | |
1781 | * in an nvlist with the key "vdevs". Each error is a (guid, errno) pair where | |
1782 | * guid is stringified with PRIu64, and errno is one of the following as | |
1783 | * an int64_t: | |
1784 | * - ENODEV if the device was not found | |
1785 | * - EINVAL if the devices is not a leaf or is not concrete (e.g. missing) | |
1786 | * - EROFS if the device is not writeable | |
1787 | * - EBUSY start requested but the device is already being either trimmed | |
1788 | * or initialized | |
1789 | * - ESRCH cancel/suspend requested but device is not being initialized | |
1790 | * - EOPNOTSUPP if the device does not support TRIM (or secure TRIM) | |
1791 | * | |
1792 | * If the errlist is empty, then return value will be: | |
1793 | * - EINVAL if one or more arguments was invalid | |
1794 | * - Other spa_open failures | |
1795 | * - 0 if the operation succeeded | |
1796 | */ | |
1797 | int | |
1798 | lzc_trim(const char *poolname, pool_trim_func_t cmd_type, uint64_t rate, | |
1799 | boolean_t secure, nvlist_t *vdevs, nvlist_t **errlist) | |
1800 | { | |
1801 | int error; | |
1802 | ||
1803 | nvlist_t *args = fnvlist_alloc(); | |
1804 | fnvlist_add_uint64(args, ZPOOL_TRIM_COMMAND, (uint64_t)cmd_type); | |
1805 | fnvlist_add_nvlist(args, ZPOOL_TRIM_VDEVS, vdevs); | |
1806 | fnvlist_add_uint64(args, ZPOOL_TRIM_RATE, rate); | |
1807 | fnvlist_add_boolean_value(args, ZPOOL_TRIM_SECURE, secure); | |
1808 | ||
1809 | error = lzc_ioctl(ZFS_IOC_POOL_TRIM, poolname, args, errlist); | |
1810 | ||
1811 | fnvlist_free(args); | |
1812 | ||
1813 | return (error); | |
1814 | } | |
30af21b0 PD |
1815 | |
1816 | /* | |
1817 | * Create a redaction bookmark named bookname by redacting snapshot with respect | |
1818 | * to all the snapshots in snapnv. | |
1819 | */ | |
1820 | int | |
1821 | lzc_redact(const char *snapshot, const char *bookname, nvlist_t *snapnv) | |
1822 | { | |
1823 | nvlist_t *args = fnvlist_alloc(); | |
1824 | fnvlist_add_string(args, "bookname", bookname); | |
1825 | fnvlist_add_nvlist(args, "snapnv", snapnv); | |
1826 | int error = lzc_ioctl(ZFS_IOC_REDACT, snapshot, args, NULL); | |
1827 | fnvlist_free(args); | |
1828 | return (error); | |
1829 | } | |
e60e158e JG |
1830 | |
1831 | static int | |
1832 | wait_common(const char *pool, zpool_wait_activity_t activity, boolean_t use_tag, | |
1833 | uint64_t tag, boolean_t *waited) | |
1834 | { | |
1835 | nvlist_t *args = fnvlist_alloc(); | |
1836 | nvlist_t *result = NULL; | |
1837 | ||
1838 | fnvlist_add_int32(args, ZPOOL_WAIT_ACTIVITY, activity); | |
1839 | if (use_tag) | |
1840 | fnvlist_add_uint64(args, ZPOOL_WAIT_TAG, tag); | |
1841 | ||
1842 | int error = lzc_ioctl(ZFS_IOC_WAIT, pool, args, &result); | |
1843 | ||
1844 | if (error == 0 && waited != NULL) | |
1845 | *waited = fnvlist_lookup_boolean_value(result, | |
1846 | ZPOOL_WAIT_WAITED); | |
1847 | ||
1848 | fnvlist_free(args); | |
1849 | fnvlist_free(result); | |
1850 | ||
1851 | return (error); | |
1852 | } | |
1853 | ||
1854 | int | |
1855 | lzc_wait(const char *pool, zpool_wait_activity_t activity, boolean_t *waited) | |
1856 | { | |
1857 | return (wait_common(pool, activity, B_FALSE, 0, waited)); | |
1858 | } | |
1859 | ||
1860 | int | |
1861 | lzc_wait_tag(const char *pool, zpool_wait_activity_t activity, uint64_t tag, | |
1862 | boolean_t *waited) | |
1863 | { | |
1864 | return (wait_common(pool, activity, B_TRUE, tag, waited)); | |
1865 | } | |
5a42ef04 PD |
1866 | |
1867 | int | |
1868 | lzc_wait_fs(const char *fs, zfs_wait_activity_t activity, boolean_t *waited) | |
1869 | { | |
1870 | nvlist_t *args = fnvlist_alloc(); | |
1871 | nvlist_t *result = NULL; | |
1872 | ||
1873 | fnvlist_add_int32(args, ZFS_WAIT_ACTIVITY, activity); | |
1874 | ||
1875 | int error = lzc_ioctl(ZFS_IOC_WAIT_FS, fs, args, &result); | |
1876 | ||
1877 | if (error == 0 && waited != NULL) | |
1878 | *waited = fnvlist_lookup_boolean_value(result, | |
1879 | ZFS_WAIT_WAITED); | |
1880 | ||
1881 | fnvlist_free(args); | |
1882 | fnvlist_free(result); | |
1883 | ||
1884 | return (error); | |
1885 | } | |
108a454a PD |
1886 | |
1887 | /* | |
1888 | * Set the bootenv contents for the given pool. | |
1889 | */ | |
1890 | int | |
1db9e6e4 | 1891 | lzc_set_bootenv(const char *pool, const nvlist_t *env) |
108a454a | 1892 | { |
1db9e6e4 | 1893 | return (lzc_ioctl(ZFS_IOC_SET_BOOTENV, pool, (nvlist_t *)env, NULL)); |
108a454a PD |
1894 | } |
1895 | ||
1896 | /* | |
1897 | * Get the contents of the bootenv of the given pool. | |
1898 | */ | |
1899 | int | |
1900 | lzc_get_bootenv(const char *pool, nvlist_t **outnvl) | |
1901 | { | |
1902 | return (lzc_ioctl(ZFS_IOC_GET_BOOTENV, pool, NULL, outnvl)); | |
1903 | } |