]>
Commit | Line | Data |
---|---|---|
6f1ffb06 MA |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or http://www.opensolaris.org/os/licensing. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | ||
22 | /* | |
5dc8b736 | 23 | * Copyright (c) 2012, 2014 by Delphix. All rights reserved. |
95fd54a1 | 24 | * Copyright (c) 2013 Steven Hartland. All rights reserved. |
bec1067d | 25 | * Copyright (c) 2017 Datto Inc. |
6f1ffb06 MA |
26 | */ |
27 | ||
28 | /* | |
29 | * LibZFS_Core (lzc) is intended to replace most functionality in libzfs. | |
30 | * It has the following characteristics: | |
31 | * | |
32 | * - Thread Safe. libzfs_core is accessible concurrently from multiple | |
33 | * threads. This is accomplished primarily by avoiding global data | |
34 | * (e.g. caching). Since it's thread-safe, there is no reason for a | |
35 | * process to have multiple libzfs "instances". Therefore, we store | |
36 | * our few pieces of data (e.g. the file descriptor) in global | |
37 | * variables. The fd is reference-counted so that the libzfs_core | |
38 | * library can be "initialized" multiple times (e.g. by different | |
39 | * consumers within the same process). | |
40 | * | |
41 | * - Committed Interface. The libzfs_core interface will be committed, | |
42 | * therefore consumers can compile against it and be confident that | |
43 | * their code will continue to work on future releases of this code. | |
44 | * Currently, the interface is Evolving (not Committed), but we intend | |
45 | * to commit to it once it is more complete and we determine that it | |
46 | * meets the needs of all consumers. | |
47 | * | |
b8fce77b | 48 | * - Programmatic Error Handling. libzfs_core communicates errors with |
6f1ffb06 MA |
49 | * defined error numbers, and doesn't print anything to stdout/stderr. |
50 | * | |
51 | * - Thin Layer. libzfs_core is a thin layer, marshaling arguments | |
52 | * to/from the kernel ioctls. There is generally a 1:1 correspondence | |
53 | * between libzfs_core functions and ioctls to /dev/zfs. | |
54 | * | |
55 | * - Clear Atomicity. Because libzfs_core functions are generally 1:1 | |
56 | * with kernel ioctls, and kernel ioctls are general atomic, each | |
57 | * libzfs_core function is atomic. For example, creating multiple | |
58 | * snapshots with a single call to lzc_snapshot() is atomic -- it | |
59 | * can't fail with only some of the requested snapshots created, even | |
60 | * in the event of power loss or system crash. | |
61 | * | |
62 | * - Continued libzfs Support. Some higher-level operations (e.g. | |
63 | * support for "zfs send -R") are too complicated to fit the scope of | |
64 | * libzfs_core. This functionality will continue to live in libzfs. | |
65 | * Where appropriate, libzfs will use the underlying atomic operations | |
66 | * of libzfs_core. For example, libzfs may implement "zfs send -R | | |
67 | * zfs receive" by using individual "send one snapshot", rename, | |
68 | * destroy, and "receive one snapshot" operations in libzfs_core. | |
69 | * /sbin/zfs and /zbin/zpool will link with both libzfs and | |
70 | * libzfs_core. Other consumers should aim to use only libzfs_core, | |
71 | * since that will be the supported, stable interface going forwards. | |
72 | */ | |
73 | ||
74 | #include <libzfs_core.h> | |
75 | #include <ctype.h> | |
76 | #include <unistd.h> | |
77 | #include <stdlib.h> | |
78 | #include <string.h> | |
79 | #include <errno.h> | |
80 | #include <fcntl.h> | |
81 | #include <pthread.h> | |
82 | #include <sys/nvpair.h> | |
83 | #include <sys/param.h> | |
84 | #include <sys/types.h> | |
85 | #include <sys/stat.h> | |
86 | #include <sys/zfs_ioctl.h> | |
87 | ||
e2454897 | 88 | static int g_fd = -1; |
6f1ffb06 MA |
89 | static pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER; |
90 | static int g_refcount; | |
91 | ||
92 | int | |
93 | libzfs_core_init(void) | |
94 | { | |
95 | (void) pthread_mutex_lock(&g_lock); | |
96 | if (g_refcount == 0) { | |
97 | g_fd = open("/dev/zfs", O_RDWR); | |
98 | if (g_fd < 0) { | |
99 | (void) pthread_mutex_unlock(&g_lock); | |
100 | return (errno); | |
101 | } | |
102 | } | |
103 | g_refcount++; | |
104 | (void) pthread_mutex_unlock(&g_lock); | |
105 | return (0); | |
106 | } | |
107 | ||
108 | void | |
109 | libzfs_core_fini(void) | |
110 | { | |
111 | (void) pthread_mutex_lock(&g_lock); | |
112 | ASSERT3S(g_refcount, >, 0); | |
e2454897 GM |
113 | |
114 | if (g_refcount > 0) | |
115 | g_refcount--; | |
116 | ||
117 | if (g_refcount == 0 && g_fd != -1) { | |
6f1ffb06 | 118 | (void) close(g_fd); |
e2454897 GM |
119 | g_fd = -1; |
120 | } | |
6f1ffb06 MA |
121 | (void) pthread_mutex_unlock(&g_lock); |
122 | } | |
123 | ||
124 | static int | |
125 | lzc_ioctl(zfs_ioc_t ioc, const char *name, | |
126 | nvlist_t *source, nvlist_t **resultp) | |
127 | { | |
13fe0198 | 128 | zfs_cmd_t zc = {"\0"}; |
6f1ffb06 | 129 | int error = 0; |
bec1067d AP |
130 | char *packed = NULL; |
131 | size_t size = 0; | |
6f1ffb06 MA |
132 | |
133 | ASSERT3S(g_refcount, >, 0); | |
e2454897 | 134 | VERIFY3S(g_fd, !=, -1); |
6f1ffb06 | 135 | |
bec1067d AP |
136 | if (name != NULL) |
137 | (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name)); | |
6f1ffb06 | 138 | |
bec1067d AP |
139 | if (source != NULL) { |
140 | packed = fnvlist_pack(source, &size); | |
141 | zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed; | |
142 | zc.zc_nvlist_src_size = size; | |
143 | } | |
6f1ffb06 MA |
144 | |
145 | if (resultp != NULL) { | |
13fe0198 | 146 | *resultp = NULL; |
6f1ffb06 MA |
147 | zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024); |
148 | zc.zc_nvlist_dst = (uint64_t)(uintptr_t) | |
149 | malloc(zc.zc_nvlist_dst_size); | |
150 | if (zc.zc_nvlist_dst == (uint64_t)0) { | |
151 | error = ENOMEM; | |
152 | goto out; | |
153 | } | |
154 | } | |
155 | ||
156 | while (ioctl(g_fd, ioc, &zc) != 0) { | |
157 | if (errno == ENOMEM && resultp != NULL) { | |
158 | free((void *)(uintptr_t)zc.zc_nvlist_dst); | |
159 | zc.zc_nvlist_dst_size *= 2; | |
160 | zc.zc_nvlist_dst = (uint64_t)(uintptr_t) | |
161 | malloc(zc.zc_nvlist_dst_size); | |
162 | if (zc.zc_nvlist_dst == (uint64_t)0) { | |
163 | error = ENOMEM; | |
164 | goto out; | |
165 | } | |
166 | } else { | |
167 | error = errno; | |
168 | break; | |
169 | } | |
170 | } | |
171 | if (zc.zc_nvlist_dst_filled) { | |
172 | *resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst, | |
173 | zc.zc_nvlist_dst_size); | |
6f1ffb06 MA |
174 | } |
175 | ||
176 | out: | |
177 | fnvlist_pack_free(packed, size); | |
178 | free((void *)(uintptr_t)zc.zc_nvlist_dst); | |
179 | return (error); | |
180 | } | |
181 | ||
182 | int | |
e67a7ffb | 183 | lzc_create(const char *fsname, enum lzc_dataset_type type, nvlist_t *props) |
6f1ffb06 MA |
184 | { |
185 | int error; | |
186 | nvlist_t *args = fnvlist_alloc(); | |
e67a7ffb | 187 | fnvlist_add_int32(args, "type", (dmu_objset_type_t)type); |
6f1ffb06 MA |
188 | if (props != NULL) |
189 | fnvlist_add_nvlist(args, "props", props); | |
190 | error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL); | |
191 | nvlist_free(args); | |
192 | return (error); | |
193 | } | |
194 | ||
195 | int | |
196 | lzc_clone(const char *fsname, const char *origin, | |
197 | nvlist_t *props) | |
198 | { | |
199 | int error; | |
200 | nvlist_t *args = fnvlist_alloc(); | |
201 | fnvlist_add_string(args, "origin", origin); | |
202 | if (props != NULL) | |
203 | fnvlist_add_nvlist(args, "props", props); | |
204 | error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL); | |
205 | nvlist_free(args); | |
206 | return (error); | |
207 | } | |
208 | ||
209 | /* | |
210 | * Creates snapshots. | |
211 | * | |
212 | * The keys in the snaps nvlist are the snapshots to be created. | |
213 | * They must all be in the same pool. | |
214 | * | |
215 | * The props nvlist is properties to set. Currently only user properties | |
216 | * are supported. { user:prop_name -> string value } | |
217 | * | |
218 | * The returned results nvlist will have an entry for each snapshot that failed. | |
219 | * The value will be the (int32) error code. | |
220 | * | |
221 | * The return value will be 0 if all snapshots were created, otherwise it will | |
13fe0198 | 222 | * be the errno of a (unspecified) snapshot that failed. |
6f1ffb06 MA |
223 | */ |
224 | int | |
225 | lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist) | |
226 | { | |
227 | nvpair_t *elem; | |
228 | nvlist_t *args; | |
229 | int error; | |
eca7b760 | 230 | char pool[ZFS_MAX_DATASET_NAME_LEN]; |
6f1ffb06 MA |
231 | |
232 | *errlist = NULL; | |
233 | ||
234 | /* determine the pool name */ | |
235 | elem = nvlist_next_nvpair(snaps, NULL); | |
236 | if (elem == NULL) | |
237 | return (0); | |
238 | (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); | |
239 | pool[strcspn(pool, "/@")] = '\0'; | |
240 | ||
241 | args = fnvlist_alloc(); | |
242 | fnvlist_add_nvlist(args, "snaps", snaps); | |
243 | if (props != NULL) | |
244 | fnvlist_add_nvlist(args, "props", props); | |
245 | ||
246 | error = lzc_ioctl(ZFS_IOC_SNAPSHOT, pool, args, errlist); | |
247 | nvlist_free(args); | |
248 | ||
249 | return (error); | |
250 | } | |
251 | ||
252 | /* | |
253 | * Destroys snapshots. | |
254 | * | |
255 | * The keys in the snaps nvlist are the snapshots to be destroyed. | |
256 | * They must all be in the same pool. | |
257 | * | |
258 | * Snapshots that do not exist will be silently ignored. | |
259 | * | |
260 | * If 'defer' is not set, and a snapshot has user holds or clones, the | |
261 | * destroy operation will fail and none of the snapshots will be | |
262 | * destroyed. | |
263 | * | |
264 | * If 'defer' is set, and a snapshot has user holds or clones, it will be | |
265 | * marked for deferred destruction, and will be destroyed when the last hold | |
266 | * or clone is removed/destroyed. | |
267 | * | |
268 | * The return value will be 0 if all snapshots were destroyed (or marked for | |
1a077756 | 269 | * later destruction if 'defer' is set) or didn't exist to begin with. |
6f1ffb06 | 270 | * |
13fe0198 | 271 | * Otherwise the return value will be the errno of a (unspecified) snapshot |
6f1ffb06 MA |
272 | * that failed, no snapshots will be destroyed, and the errlist will have an |
273 | * entry for each snapshot that failed. The value in the errlist will be | |
274 | * the (int32) error code. | |
275 | */ | |
276 | int | |
277 | lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist) | |
278 | { | |
279 | nvpair_t *elem; | |
280 | nvlist_t *args; | |
281 | int error; | |
eca7b760 | 282 | char pool[ZFS_MAX_DATASET_NAME_LEN]; |
6f1ffb06 MA |
283 | |
284 | /* determine the pool name */ | |
285 | elem = nvlist_next_nvpair(snaps, NULL); | |
286 | if (elem == NULL) | |
287 | return (0); | |
288 | (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); | |
289 | pool[strcspn(pool, "/@")] = '\0'; | |
290 | ||
291 | args = fnvlist_alloc(); | |
292 | fnvlist_add_nvlist(args, "snaps", snaps); | |
293 | if (defer) | |
294 | fnvlist_add_boolean(args, "defer"); | |
295 | ||
296 | error = lzc_ioctl(ZFS_IOC_DESTROY_SNAPS, pool, args, errlist); | |
297 | nvlist_free(args); | |
298 | ||
299 | return (error); | |
6f1ffb06 MA |
300 | } |
301 | ||
302 | int | |
303 | lzc_snaprange_space(const char *firstsnap, const char *lastsnap, | |
304 | uint64_t *usedp) | |
305 | { | |
306 | nvlist_t *args; | |
307 | nvlist_t *result; | |
308 | int err; | |
eca7b760 | 309 | char fs[ZFS_MAX_DATASET_NAME_LEN]; |
6f1ffb06 MA |
310 | char *atp; |
311 | ||
312 | /* determine the fs name */ | |
313 | (void) strlcpy(fs, firstsnap, sizeof (fs)); | |
314 | atp = strchr(fs, '@'); | |
315 | if (atp == NULL) | |
316 | return (EINVAL); | |
317 | *atp = '\0'; | |
318 | ||
319 | args = fnvlist_alloc(); | |
320 | fnvlist_add_string(args, "firstsnap", firstsnap); | |
321 | ||
322 | err = lzc_ioctl(ZFS_IOC_SPACE_SNAPS, lastsnap, args, &result); | |
323 | nvlist_free(args); | |
324 | if (err == 0) | |
325 | *usedp = fnvlist_lookup_uint64(result, "used"); | |
326 | fnvlist_free(result); | |
327 | ||
328 | return (err); | |
329 | } | |
330 | ||
331 | boolean_t | |
332 | lzc_exists(const char *dataset) | |
333 | { | |
334 | /* | |
335 | * The objset_stats ioctl is still legacy, so we need to construct our | |
336 | * own zfs_cmd_t rather than using zfsc_ioctl(). | |
337 | */ | |
13fe0198 | 338 | zfs_cmd_t zc = {"\0"}; |
6f1ffb06 | 339 | |
e2454897 GM |
340 | ASSERT3S(g_refcount, >, 0); |
341 | VERIFY3S(g_fd, !=, -1); | |
342 | ||
6f1ffb06 MA |
343 | (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); |
344 | return (ioctl(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0); | |
345 | } | |
346 | ||
bec1067d AP |
347 | /* |
348 | * outnvl is unused. | |
349 | * It was added to preserve the function signature in case it is | |
350 | * needed in the future. | |
351 | */ | |
352 | /*ARGSUSED*/ | |
353 | int | |
354 | lzc_sync(const char *pool_name, nvlist_t *innvl, nvlist_t **outnvl) | |
355 | { | |
356 | return (lzc_ioctl(ZFS_IOC_POOL_SYNC, pool_name, innvl, NULL)); | |
357 | } | |
358 | ||
13fe0198 MA |
359 | /* |
360 | * Create "user holds" on snapshots. If there is a hold on a snapshot, | |
361 | * the snapshot can not be destroyed. (However, it can be marked for deletion | |
362 | * by lzc_destroy_snaps(defer=B_TRUE).) | |
363 | * | |
364 | * The keys in the nvlist are snapshot names. | |
365 | * The snapshots must all be in the same pool. | |
366 | * The value is the name of the hold (string type). | |
367 | * | |
368 | * If cleanup_fd is not -1, it must be the result of open("/dev/zfs", O_EXCL). | |
369 | * In this case, when the cleanup_fd is closed (including on process | |
370 | * termination), the holds will be released. If the system is shut down | |
371 | * uncleanly, the holds will be released when the pool is next opened | |
372 | * or imported. | |
373 | * | |
95fd54a1 | 374 | * Holds for snapshots which don't exist will be skipped and have an entry |
1a077756 | 375 | * added to errlist, but will not cause an overall failure. |
95fd54a1 | 376 | * |
1a077756 | 377 | * The return value will be 0 if all holds, for snapshots that existed, |
b8fce77b | 378 | * were successfully created. |
95fd54a1 SH |
379 | * |
380 | * Otherwise the return value will be the errno of a (unspecified) hold that | |
381 | * failed and no holds will be created. | |
382 | * | |
383 | * In all cases the errlist will have an entry for each hold that failed | |
384 | * (name = snapshot), with its value being the error code (int32). | |
13fe0198 MA |
385 | */ |
386 | int | |
387 | lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist) | |
388 | { | |
eca7b760 | 389 | char pool[ZFS_MAX_DATASET_NAME_LEN]; |
13fe0198 MA |
390 | nvlist_t *args; |
391 | nvpair_t *elem; | |
392 | int error; | |
393 | ||
394 | /* determine the pool name */ | |
395 | elem = nvlist_next_nvpair(holds, NULL); | |
396 | if (elem == NULL) | |
397 | return (0); | |
398 | (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); | |
399 | pool[strcspn(pool, "/@")] = '\0'; | |
400 | ||
401 | args = fnvlist_alloc(); | |
402 | fnvlist_add_nvlist(args, "holds", holds); | |
403 | if (cleanup_fd != -1) | |
404 | fnvlist_add_int32(args, "cleanup_fd", cleanup_fd); | |
405 | ||
406 | error = lzc_ioctl(ZFS_IOC_HOLD, pool, args, errlist); | |
407 | nvlist_free(args); | |
408 | return (error); | |
409 | } | |
410 | ||
411 | /* | |
412 | * Release "user holds" on snapshots. If the snapshot has been marked for | |
413 | * deferred destroy (by lzc_destroy_snaps(defer=B_TRUE)), it does not have | |
414 | * any clones, and all the user holds are removed, then the snapshot will be | |
415 | * destroyed. | |
416 | * | |
417 | * The keys in the nvlist are snapshot names. | |
418 | * The snapshots must all be in the same pool. | |
d5884c34 | 419 | * The value is an nvlist whose keys are the holds to remove. |
13fe0198 | 420 | * |
95fd54a1 | 421 | * Holds which failed to release because they didn't exist will have an entry |
1a077756 | 422 | * added to errlist, but will not cause an overall failure. |
95fd54a1 SH |
423 | * |
424 | * The return value will be 0 if the nvl holds was empty or all holds that | |
1a077756 | 425 | * existed, were successfully removed. |
95fd54a1 SH |
426 | * |
427 | * Otherwise the return value will be the errno of a (unspecified) hold that | |
428 | * failed to release and no holds will be released. | |
429 | * | |
430 | * In all cases the errlist will have an entry for each hold that failed to | |
431 | * to release. | |
13fe0198 MA |
432 | */ |
433 | int | |
434 | lzc_release(nvlist_t *holds, nvlist_t **errlist) | |
435 | { | |
eca7b760 | 436 | char pool[ZFS_MAX_DATASET_NAME_LEN]; |
13fe0198 MA |
437 | nvpair_t *elem; |
438 | ||
439 | /* determine the pool name */ | |
440 | elem = nvlist_next_nvpair(holds, NULL); | |
441 | if (elem == NULL) | |
442 | return (0); | |
443 | (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); | |
444 | pool[strcspn(pool, "/@")] = '\0'; | |
445 | ||
446 | return (lzc_ioctl(ZFS_IOC_RELEASE, pool, holds, errlist)); | |
447 | } | |
448 | ||
449 | /* | |
450 | * Retrieve list of user holds on the specified snapshot. | |
451 | * | |
d5884c34 | 452 | * On success, *holdsp will be set to an nvlist which the caller must free. |
13fe0198 MA |
453 | * The keys are the names of the holds, and the value is the creation time |
454 | * of the hold (uint64) in seconds since the epoch. | |
455 | */ | |
456 | int | |
457 | lzc_get_holds(const char *snapname, nvlist_t **holdsp) | |
458 | { | |
bec1067d | 459 | return (lzc_ioctl(ZFS_IOC_GET_HOLDS, snapname, NULL, holdsp)); |
13fe0198 MA |
460 | } |
461 | ||
6f1ffb06 | 462 | /* |
9b67f605 MA |
463 | * Generate a zfs send stream for the specified snapshot and write it to |
464 | * the specified file descriptor. | |
da536844 MA |
465 | * |
466 | * "snapname" is the full name of the snapshot to send (e.g. "pool/fs@snap") | |
467 | * | |
468 | * If "from" is NULL, a full (non-incremental) stream will be sent. | |
469 | * If "from" is non-NULL, it must be the full name of a snapshot or | |
470 | * bookmark to send an incremental from (e.g. "pool/fs@earlier_snap" or | |
471 | * "pool/fs#earlier_bmark"). If non-NULL, the specified snapshot or | |
472 | * bookmark must represent an earlier point in the history of "snapname"). | |
473 | * It can be an earlier snapshot in the same filesystem or zvol as "snapname", | |
474 | * or it can be the origin of "snapname"'s filesystem, or an earlier | |
475 | * snapshot in the origin, etc. | |
476 | * | |
477 | * "fd" is the file descriptor to write the send stream to. | |
9b67f605 | 478 | * |
f1512ee6 MA |
479 | * If "flags" contains LZC_SEND_FLAG_LARGE_BLOCK, the stream is permitted |
480 | * to contain DRR_WRITE records with drr_length > 128K, and DRR_OBJECT | |
481 | * records with drr_blksz > 128K. | |
482 | * | |
9b67f605 MA |
483 | * If "flags" contains LZC_SEND_FLAG_EMBED_DATA, the stream is permitted |
484 | * to contain DRR_WRITE_EMBEDDED records with drr_etype==BP_EMBEDDED_TYPE_DATA, | |
485 | * which the receiving system must support (as indicated by support | |
486 | * for the "embedded_data" feature). | |
6f1ffb06 MA |
487 | */ |
488 | int | |
9b67f605 MA |
489 | lzc_send(const char *snapname, const char *from, int fd, |
490 | enum lzc_send_flags flags) | |
47dfff3b MA |
491 | { |
492 | return (lzc_send_resume(snapname, from, fd, flags, 0, 0)); | |
493 | } | |
494 | ||
495 | int | |
496 | lzc_send_resume(const char *snapname, const char *from, int fd, | |
497 | enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff) | |
6f1ffb06 MA |
498 | { |
499 | nvlist_t *args; | |
500 | int err; | |
501 | ||
502 | args = fnvlist_alloc(); | |
503 | fnvlist_add_int32(args, "fd", fd); | |
da536844 MA |
504 | if (from != NULL) |
505 | fnvlist_add_string(args, "fromsnap", from); | |
f1512ee6 MA |
506 | if (flags & LZC_SEND_FLAG_LARGE_BLOCK) |
507 | fnvlist_add_boolean(args, "largeblockok"); | |
9b67f605 MA |
508 | if (flags & LZC_SEND_FLAG_EMBED_DATA) |
509 | fnvlist_add_boolean(args, "embedok"); | |
a7004725 DK |
510 | if (flags & LZC_SEND_FLAG_COMPRESS) |
511 | fnvlist_add_boolean(args, "compressok"); | |
47dfff3b MA |
512 | if (resumeobj != 0 || resumeoff != 0) { |
513 | fnvlist_add_uint64(args, "resume_object", resumeobj); | |
514 | fnvlist_add_uint64(args, "resume_offset", resumeoff); | |
515 | } | |
6f1ffb06 MA |
516 | err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL); |
517 | nvlist_free(args); | |
518 | return (err); | |
519 | } | |
520 | ||
521 | /* | |
5dc8b736 MG |
522 | * "from" can be NULL, a snapshot, or a bookmark. |
523 | * | |
524 | * If from is NULL, a full (non-incremental) stream will be estimated. This | |
525 | * is calculated very efficiently. | |
526 | * | |
527 | * If from is a snapshot, lzc_send_space uses the deadlists attached to | |
528 | * each snapshot to efficiently estimate the stream size. | |
529 | * | |
530 | * If from is a bookmark, the indirect blocks in the destination snapshot | |
531 | * are traversed, looking for blocks with a birth time since the creation TXG of | |
532 | * the snapshot this bookmark was created from. This will result in | |
533 | * significantly more I/O and be less efficient than a send space estimation on | |
534 | * an equivalent snapshot. | |
6f1ffb06 MA |
535 | */ |
536 | int | |
2aa34383 DK |
537 | lzc_send_space(const char *snapname, const char *from, |
538 | enum lzc_send_flags flags, uint64_t *spacep) | |
6f1ffb06 MA |
539 | { |
540 | nvlist_t *args; | |
541 | nvlist_t *result; | |
542 | int err; | |
543 | ||
544 | args = fnvlist_alloc(); | |
5dc8b736 MG |
545 | if (from != NULL) |
546 | fnvlist_add_string(args, "from", from); | |
2aa34383 DK |
547 | if (flags & LZC_SEND_FLAG_LARGE_BLOCK) |
548 | fnvlist_add_boolean(args, "largeblockok"); | |
549 | if (flags & LZC_SEND_FLAG_EMBED_DATA) | |
550 | fnvlist_add_boolean(args, "embedok"); | |
551 | if (flags & LZC_SEND_FLAG_COMPRESS) | |
552 | fnvlist_add_boolean(args, "compressok"); | |
6f1ffb06 MA |
553 | err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result); |
554 | nvlist_free(args); | |
555 | if (err == 0) | |
556 | *spacep = fnvlist_lookup_uint64(result, "space"); | |
557 | nvlist_free(result); | |
558 | return (err); | |
559 | } | |
560 | ||
561 | static int | |
562 | recv_read(int fd, void *buf, int ilen) | |
563 | { | |
564 | char *cp = buf; | |
565 | int rv; | |
566 | int len = ilen; | |
567 | ||
568 | do { | |
569 | rv = read(fd, cp, len); | |
570 | cp += rv; | |
571 | len -= rv; | |
572 | } while (rv > 0); | |
573 | ||
574 | if (rv < 0 || len != 0) | |
575 | return (EIO); | |
576 | ||
577 | return (0); | |
578 | } | |
579 | ||
43e52edd BB |
580 | /* |
581 | * Linux adds ZFS_IOC_RECV_NEW for resumable streams and preserves the legacy | |
582 | * ZFS_IOC_RECV user/kernel interface. The new interface supports all stream | |
583 | * options but is currently only used for resumable streams. This way updated | |
584 | * user space utilities will interoperate with older kernel modules. | |
585 | * | |
586 | * Non-Linux OpenZFS platforms have opted to modify the legacy interface. | |
587 | */ | |
47dfff3b | 588 | static int |
a3eeab2d | 589 | recv_impl(const char *snapname, nvlist_t *recvdprops, nvlist_t *localprops, |
590 | const char *origin, boolean_t force, boolean_t resumable, int input_fd, | |
43e52edd BB |
591 | const dmu_replay_record_t *begin_record, int cleanup_fd, |
592 | uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle, | |
593 | nvlist_t **errors) | |
6f1ffb06 | 594 | { |
43e52edd BB |
595 | dmu_replay_record_t drr; |
596 | char fsname[MAXPATHLEN]; | |
6f1ffb06 | 597 | char *atp; |
6f1ffb06 MA |
598 | int error; |
599 | ||
e2454897 GM |
600 | ASSERT3S(g_refcount, >, 0); |
601 | VERIFY3S(g_fd, !=, -1); | |
602 | ||
43e52edd BB |
603 | /* Set 'fsname' to the name of containing filesystem */ |
604 | (void) strlcpy(fsname, snapname, sizeof (fsname)); | |
605 | atp = strchr(fsname, '@'); | |
6f1ffb06 MA |
606 | if (atp == NULL) |
607 | return (EINVAL); | |
608 | *atp = '\0'; | |
609 | ||
43e52edd BB |
610 | /* If the fs does not exist, try its parent. */ |
611 | if (!lzc_exists(fsname)) { | |
612 | char *slashp = strrchr(fsname, '/'); | |
6f1ffb06 MA |
613 | if (slashp == NULL) |
614 | return (ENOENT); | |
615 | *slashp = '\0'; | |
43e52edd | 616 | } |
6f1ffb06 | 617 | |
43e52edd BB |
618 | /* |
619 | * The begin_record is normally a non-byteswapped BEGIN record. | |
620 | * For resumable streams it may be set to any non-byteswapped | |
621 | * dmu_replay_record_t. | |
622 | */ | |
623 | if (begin_record == NULL) { | |
624 | error = recv_read(input_fd, &drr, sizeof (drr)); | |
625 | if (error != 0) | |
626 | return (error); | |
627 | } else { | |
628 | drr = *begin_record; | |
6f1ffb06 MA |
629 | } |
630 | ||
43e52edd BB |
631 | if (resumable) { |
632 | nvlist_t *outnvl = NULL; | |
633 | nvlist_t *innvl = fnvlist_alloc(); | |
6f1ffb06 | 634 | |
43e52edd | 635 | fnvlist_add_string(innvl, "snapname", snapname); |
6f1ffb06 | 636 | |
a3eeab2d | 637 | if (recvdprops != NULL) |
638 | fnvlist_add_nvlist(innvl, "props", recvdprops); | |
639 | ||
640 | if (localprops != NULL) | |
641 | fnvlist_add_nvlist(innvl, "localprops", localprops); | |
6f1ffb06 | 642 | |
43e52edd BB |
643 | if (origin != NULL && strlen(origin)) |
644 | fnvlist_add_string(innvl, "origin", origin); | |
645 | ||
646 | fnvlist_add_byte_array(innvl, "begin_record", | |
02730c33 | 647 | (uchar_t *)&drr, sizeof (drr)); |
43e52edd BB |
648 | |
649 | fnvlist_add_int32(innvl, "input_fd", input_fd); | |
650 | ||
651 | if (force) | |
652 | fnvlist_add_boolean(innvl, "force"); | |
653 | ||
654 | if (resumable) | |
655 | fnvlist_add_boolean(innvl, "resumable"); | |
656 | ||
657 | if (cleanup_fd >= 0) | |
658 | fnvlist_add_int32(innvl, "cleanup_fd", cleanup_fd); | |
659 | ||
660 | if (action_handle != NULL) | |
661 | fnvlist_add_uint64(innvl, "action_handle", | |
662 | *action_handle); | |
663 | ||
664 | error = lzc_ioctl(ZFS_IOC_RECV_NEW, fsname, innvl, &outnvl); | |
665 | ||
666 | if (error == 0 && read_bytes != NULL) | |
667 | error = nvlist_lookup_uint64(outnvl, "read_bytes", | |
668 | read_bytes); | |
669 | ||
670 | if (error == 0 && errflags != NULL) | |
671 | error = nvlist_lookup_uint64(outnvl, "error_flags", | |
672 | errflags); | |
673 | ||
674 | if (error == 0 && action_handle != NULL) | |
675 | error = nvlist_lookup_uint64(outnvl, "action_handle", | |
676 | action_handle); | |
677 | ||
678 | if (error == 0 && errors != NULL) { | |
679 | nvlist_t *nvl; | |
680 | error = nvlist_lookup_nvlist(outnvl, "errors", &nvl); | |
681 | if (error == 0) | |
682 | *errors = fnvlist_dup(nvl); | |
683 | } | |
684 | ||
685 | fnvlist_free(innvl); | |
686 | fnvlist_free(outnvl); | |
fd41e935 | 687 | } else { |
43e52edd BB |
688 | zfs_cmd_t zc = {"\0"}; |
689 | char *packed = NULL; | |
690 | size_t size; | |
6f1ffb06 | 691 | |
43e52edd | 692 | ASSERT3S(g_refcount, >, 0); |
6f1ffb06 | 693 | |
43e52edd BB |
694 | (void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_value)); |
695 | (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value)); | |
6f1ffb06 | 696 | |
a3eeab2d | 697 | if (recvdprops != NULL) { |
698 | packed = fnvlist_pack(recvdprops, &size); | |
43e52edd BB |
699 | zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed; |
700 | zc.zc_nvlist_src_size = size; | |
701 | } | |
47dfff3b | 702 | |
a3eeab2d | 703 | if (localprops != NULL) { |
704 | packed = fnvlist_pack(localprops, &size); | |
705 | zc.zc_nvlist_conf = (uint64_t)(uintptr_t)packed; | |
706 | zc.zc_nvlist_conf_size = size; | |
707 | } | |
708 | ||
43e52edd BB |
709 | if (origin != NULL) |
710 | (void) strlcpy(zc.zc_string, origin, | |
711 | sizeof (zc.zc_string)); | |
6f1ffb06 | 712 | |
43e52edd BB |
713 | ASSERT3S(drr.drr_type, ==, DRR_BEGIN); |
714 | zc.zc_begin_record = drr.drr_u.drr_begin; | |
715 | zc.zc_guid = force; | |
716 | zc.zc_cookie = input_fd; | |
717 | zc.zc_cleanup_fd = -1; | |
718 | zc.zc_action_handle = 0; | |
719 | ||
720 | if (cleanup_fd >= 0) | |
721 | zc.zc_cleanup_fd = cleanup_fd; | |
722 | ||
723 | if (action_handle != NULL) | |
724 | zc.zc_action_handle = *action_handle; | |
725 | ||
726 | zc.zc_nvlist_dst_size = 128 * 1024; | |
727 | zc.zc_nvlist_dst = (uint64_t)(uintptr_t) | |
728 | malloc(zc.zc_nvlist_dst_size); | |
729 | ||
730 | error = ioctl(g_fd, ZFS_IOC_RECV, &zc); | |
731 | if (error != 0) { | |
732 | error = errno; | |
733 | } else { | |
734 | if (read_bytes != NULL) | |
735 | *read_bytes = zc.zc_cookie; | |
736 | ||
737 | if (errflags != NULL) | |
738 | *errflags = zc.zc_obj; | |
739 | ||
740 | if (action_handle != NULL) | |
741 | *action_handle = zc.zc_action_handle; | |
742 | ||
743 | if (errors != NULL) | |
744 | VERIFY0(nvlist_unpack( | |
745 | (void *)(uintptr_t)zc.zc_nvlist_dst, | |
746 | zc.zc_nvlist_dst_size, errors, KM_SLEEP)); | |
747 | } | |
748 | ||
749 | if (packed != NULL) | |
750 | fnvlist_pack_free(packed, size); | |
751 | free((void *)(uintptr_t)zc.zc_nvlist_dst); | |
752 | } | |
6f1ffb06 | 753 | |
6f1ffb06 MA |
754 | return (error); |
755 | } | |
46ba1e59 | 756 | |
47dfff3b MA |
757 | /* |
758 | * The simplest receive case: receive from the specified fd, creating the | |
759 | * specified snapshot. Apply the specified properties as "received" properties | |
760 | * (which can be overridden by locally-set properties). If the stream is a | |
761 | * clone, its origin snapshot must be specified by 'origin'. The 'force' | |
762 | * flag will cause the target filesystem to be rolled back or destroyed if | |
763 | * necessary to receive. | |
764 | * | |
765 | * Return 0 on success or an errno on failure. | |
766 | * | |
767 | * Note: this interface does not work on dedup'd streams | |
768 | * (those with DMU_BACKUP_FEATURE_DEDUP). | |
769 | */ | |
770 | int | |
771 | lzc_receive(const char *snapname, nvlist_t *props, const char *origin, | |
772 | boolean_t force, int fd) | |
773 | { | |
a3eeab2d | 774 | return (recv_impl(snapname, props, NULL, origin, force, B_FALSE, fd, |
43e52edd | 775 | NULL, -1, NULL, NULL, NULL, NULL)); |
47dfff3b MA |
776 | } |
777 | ||
778 | /* | |
779 | * Like lzc_receive, but if the receive fails due to premature stream | |
780 | * termination, the intermediate state will be preserved on disk. In this | |
781 | * case, ECKSUM will be returned. The receive may subsequently be resumed | |
782 | * with a resuming send stream generated by lzc_send_resume(). | |
783 | */ | |
784 | int | |
785 | lzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin, | |
786 | boolean_t force, int fd) | |
787 | { | |
a3eeab2d | 788 | return (recv_impl(snapname, props, NULL, origin, force, B_TRUE, fd, |
43e52edd | 789 | NULL, -1, NULL, NULL, NULL, NULL)); |
fd41e935 BB |
790 | } |
791 | ||
792 | /* | |
793 | * Like lzc_receive, but allows the caller to read the begin record and then to | |
794 | * pass it in. That could be useful if the caller wants to derive, for example, | |
795 | * the snapname or the origin parameters based on the information contained in | |
796 | * the begin record. | |
797 | * The begin record must be in its original form as read from the stream, | |
798 | * in other words, it should not be byteswapped. | |
799 | * | |
800 | * The 'resumable' parameter allows to obtain the same behavior as with | |
801 | * lzc_receive_resumable. | |
802 | */ | |
803 | int | |
804 | lzc_receive_with_header(const char *snapname, nvlist_t *props, | |
805 | const char *origin, boolean_t force, boolean_t resumable, int fd, | |
806 | const dmu_replay_record_t *begin_record) | |
807 | { | |
808 | if (begin_record == NULL) | |
809 | return (EINVAL); | |
a3eeab2d | 810 | return (recv_impl(snapname, props, NULL, origin, force, resumable, fd, |
43e52edd BB |
811 | begin_record, -1, NULL, NULL, NULL, NULL)); |
812 | } | |
813 | ||
814 | /* | |
815 | * Like lzc_receive, but allows the caller to pass all supported arguments | |
816 | * and retrieve all values returned. The only additional input parameter | |
817 | * is 'cleanup_fd' which is used to set a cleanup-on-exit file descriptor. | |
818 | * | |
819 | * The following parameters all provide return values. Several may be set | |
820 | * in the failure case and will contain additional information. | |
821 | * | |
822 | * The 'read_bytes' value will be set to the total number of bytes read. | |
823 | * | |
824 | * The 'errflags' value will contain zprop_errflags_t flags which are | |
825 | * used to describe any failures. | |
826 | * | |
827 | * The 'action_handle' is used to pass the handle for this guid/ds mapping. | |
828 | * It should be set to zero on first call and will contain an updated handle | |
829 | * on success, it should be passed in subsequent calls. | |
830 | * | |
831 | * The 'errors' nvlist contains an entry for each unapplied received | |
832 | * property. Callers are responsible for freeing this nvlist. | |
833 | */ | |
834 | int lzc_receive_one(const char *snapname, nvlist_t *props, | |
835 | const char *origin, boolean_t force, boolean_t resumable, int input_fd, | |
836 | const dmu_replay_record_t *begin_record, int cleanup_fd, | |
837 | uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle, | |
838 | nvlist_t **errors) | |
839 | { | |
a3eeab2d | 840 | return (recv_impl(snapname, props, NULL, origin, force, resumable, |
841 | input_fd, begin_record, cleanup_fd, read_bytes, errflags, | |
842 | action_handle, errors)); | |
843 | } | |
844 | ||
845 | /* | |
846 | * Like lzc_receive_one, but allows the caller to pass an additional 'cmdprops' | |
847 | * argument. | |
848 | * | |
849 | * The 'cmdprops' nvlist contains both override ('zfs receive -o') and | |
850 | * exclude ('zfs receive -x') properties. Callers are responsible for freeing | |
851 | * this nvlist | |
852 | */ | |
853 | int lzc_receive_with_cmdprops(const char *snapname, nvlist_t *props, | |
854 | nvlist_t *cmdprops, const char *origin, boolean_t force, | |
855 | boolean_t resumable, int input_fd, const dmu_replay_record_t *begin_record, | |
856 | int cleanup_fd, uint64_t *read_bytes, uint64_t *errflags, | |
857 | uint64_t *action_handle, nvlist_t **errors) | |
858 | { | |
859 | return (recv_impl(snapname, props, cmdprops, origin, force, resumable, | |
43e52edd BB |
860 | input_fd, begin_record, cleanup_fd, read_bytes, errflags, |
861 | action_handle, errors)); | |
47dfff3b MA |
862 | } |
863 | ||
46ba1e59 MA |
864 | /* |
865 | * Roll back this filesystem or volume to its most recent snapshot. | |
866 | * If snapnamebuf is not NULL, it will be filled in with the name | |
867 | * of the most recent snapshot. | |
868 | * | |
869 | * Return 0 on success or an errno on failure. | |
870 | */ | |
871 | int | |
872 | lzc_rollback(const char *fsname, char *snapnamebuf, int snapnamelen) | |
873 | { | |
874 | nvlist_t *args; | |
875 | nvlist_t *result; | |
876 | int err; | |
877 | ||
878 | args = fnvlist_alloc(); | |
879 | err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result); | |
880 | nvlist_free(args); | |
881 | if (err == 0 && snapnamebuf != NULL) { | |
882 | const char *snapname = fnvlist_lookup_string(result, "target"); | |
883 | (void) strlcpy(snapnamebuf, snapname, snapnamelen); | |
884 | } | |
bb7ffdaf GM |
885 | nvlist_free(result); |
886 | ||
46ba1e59 MA |
887 | return (err); |
888 | } | |
da536844 MA |
889 | |
890 | /* | |
891 | * Creates bookmarks. | |
892 | * | |
893 | * The bookmarks nvlist maps from name of the bookmark (e.g. "pool/fs#bmark") to | |
894 | * the name of the snapshot (e.g. "pool/fs@snap"). All the bookmarks and | |
895 | * snapshots must be in the same pool. | |
896 | * | |
897 | * The returned results nvlist will have an entry for each bookmark that failed. | |
898 | * The value will be the (int32) error code. | |
899 | * | |
900 | * The return value will be 0 if all bookmarks were created, otherwise it will | |
901 | * be the errno of a (undetermined) bookmarks that failed. | |
902 | */ | |
903 | int | |
904 | lzc_bookmark(nvlist_t *bookmarks, nvlist_t **errlist) | |
905 | { | |
906 | nvpair_t *elem; | |
907 | int error; | |
eca7b760 | 908 | char pool[ZFS_MAX_DATASET_NAME_LEN]; |
da536844 MA |
909 | |
910 | /* determine the pool name */ | |
911 | elem = nvlist_next_nvpair(bookmarks, NULL); | |
912 | if (elem == NULL) | |
913 | return (0); | |
914 | (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); | |
915 | pool[strcspn(pool, "/#")] = '\0'; | |
916 | ||
917 | error = lzc_ioctl(ZFS_IOC_BOOKMARK, pool, bookmarks, errlist); | |
918 | ||
919 | return (error); | |
920 | } | |
921 | ||
922 | /* | |
923 | * Retrieve bookmarks. | |
924 | * | |
925 | * Retrieve the list of bookmarks for the given file system. The props | |
926 | * parameter is an nvlist of property names (with no values) that will be | |
927 | * returned for each bookmark. | |
928 | * | |
929 | * The following are valid properties on bookmarks, all of which are numbers | |
930 | * (represented as uint64 in the nvlist) | |
931 | * | |
932 | * "guid" - globally unique identifier of the snapshot it refers to | |
933 | * "createtxg" - txg when the snapshot it refers to was created | |
934 | * "creation" - timestamp when the snapshot it refers to was created | |
935 | * | |
936 | * The format of the returned nvlist as follows: | |
937 | * <short name of bookmark> -> { | |
938 | * <name of property> -> { | |
939 | * "value" -> uint64 | |
940 | * } | |
941 | * } | |
942 | */ | |
943 | int | |
944 | lzc_get_bookmarks(const char *fsname, nvlist_t *props, nvlist_t **bmarks) | |
945 | { | |
946 | return (lzc_ioctl(ZFS_IOC_GET_BOOKMARKS, fsname, props, bmarks)); | |
947 | } | |
948 | ||
949 | /* | |
950 | * Destroys bookmarks. | |
951 | * | |
952 | * The keys in the bmarks nvlist are the bookmarks to be destroyed. | |
953 | * They must all be in the same pool. Bookmarks are specified as | |
954 | * <fs>#<bmark>. | |
955 | * | |
956 | * Bookmarks that do not exist will be silently ignored. | |
957 | * | |
958 | * The return value will be 0 if all bookmarks that existed were destroyed. | |
959 | * | |
960 | * Otherwise the return value will be the errno of a (undetermined) bookmark | |
961 | * that failed, no bookmarks will be destroyed, and the errlist will have an | |
962 | * entry for each bookmarks that failed. The value in the errlist will be | |
963 | * the (int32) error code. | |
964 | */ | |
965 | int | |
966 | lzc_destroy_bookmarks(nvlist_t *bmarks, nvlist_t **errlist) | |
967 | { | |
968 | nvpair_t *elem; | |
969 | int error; | |
eca7b760 | 970 | char pool[ZFS_MAX_DATASET_NAME_LEN]; |
da536844 MA |
971 | |
972 | /* determine the pool name */ | |
973 | elem = nvlist_next_nvpair(bmarks, NULL); | |
974 | if (elem == NULL) | |
975 | return (0); | |
976 | (void) strlcpy(pool, nvpair_name(elem), sizeof (pool)); | |
977 | pool[strcspn(pool, "/#")] = '\0'; | |
978 | ||
979 | error = lzc_ioctl(ZFS_IOC_DESTROY_BOOKMARKS, pool, bmarks, errlist); | |
980 | ||
981 | return (error); | |
982 | } |