4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2012 by Delphix. All rights reserved.
27 * LibZFS_Core (lzc) is intended to replace most functionality in libzfs.
28 * It has the following characteristics:
30 * - Thread Safe. libzfs_core is accessible concurrently from multiple
31 * threads. This is accomplished primarily by avoiding global data
32 * (e.g. caching). Since it's thread-safe, there is no reason for a
33 * process to have multiple libzfs "instances". Therefore, we store
34 * our few pieces of data (e.g. the file descriptor) in global
35 * variables. The fd is reference-counted so that the libzfs_core
36 * library can be "initialized" multiple times (e.g. by different
37 * consumers within the same process).
39 * - Committed Interface. The libzfs_core interface will be committed,
40 * therefore consumers can compile against it and be confident that
41 * their code will continue to work on future releases of this code.
42 * Currently, the interface is Evolving (not Committed), but we intend
43 * to commit to it once it is more complete and we determine that it
44 * meets the needs of all consumers.
46 * - Programatic Error Handling. libzfs_core communicates errors with
47 * defined error numbers, and doesn't print anything to stdout/stderr.
49 * - Thin Layer. libzfs_core is a thin layer, marshaling arguments
50 * to/from the kernel ioctls. There is generally a 1:1 correspondence
51 * between libzfs_core functions and ioctls to /dev/zfs.
53 * - Clear Atomicity. Because libzfs_core functions are generally 1:1
54 * with kernel ioctls, and kernel ioctls are general atomic, each
55 * libzfs_core function is atomic. For example, creating multiple
56 * snapshots with a single call to lzc_snapshot() is atomic -- it
57 * can't fail with only some of the requested snapshots created, even
58 * in the event of power loss or system crash.
60 * - Continued libzfs Support. Some higher-level operations (e.g.
61 * support for "zfs send -R") are too complicated to fit the scope of
62 * libzfs_core. This functionality will continue to live in libzfs.
63 * Where appropriate, libzfs will use the underlying atomic operations
64 * of libzfs_core. For example, libzfs may implement "zfs send -R |
65 * zfs receive" by using individual "send one snapshot", rename,
66 * destroy, and "receive one snapshot" operations in libzfs_core.
67 * /sbin/zfs and /zbin/zpool will link with both libzfs and
68 * libzfs_core. Other consumers should aim to use only libzfs_core,
69 * since that will be the supported, stable interface going forwards.
72 #include <libzfs_core.h>
80 #include <sys/nvpair.h>
81 #include <sys/param.h>
82 #include <sys/types.h>
84 #include <sys/zfs_ioctl.h>
87 static pthread_mutex_t g_lock
= PTHREAD_MUTEX_INITIALIZER
;
88 static int g_refcount
;
91 libzfs_core_init(void)
93 (void) pthread_mutex_lock(&g_lock
);
94 if (g_refcount
== 0) {
95 g_fd
= open("/dev/zfs", O_RDWR
);
97 (void) pthread_mutex_unlock(&g_lock
);
102 (void) pthread_mutex_unlock(&g_lock
);
107 libzfs_core_fini(void)
109 (void) pthread_mutex_lock(&g_lock
);
110 ASSERT3S(g_refcount
, >, 0);
114 (void) pthread_mutex_unlock(&g_lock
);
118 lzc_ioctl(zfs_ioc_t ioc
, const char *name
,
119 nvlist_t
*source
, nvlist_t
**resultp
)
121 zfs_cmd_t zc
= {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
126 ASSERT3S(g_refcount
, >, 0);
128 (void) strlcpy(zc
.zc_name
, name
, sizeof (zc
.zc_name
));
130 packed
= fnvlist_pack(source
, &size
);
131 zc
.zc_nvlist_src
= (uint64_t)(uintptr_t)packed
;
132 zc
.zc_nvlist_src_size
= size
;
134 if (resultp
!= NULL
) {
135 zc
.zc_nvlist_dst_size
= MAX(size
* 2, 128 * 1024);
136 zc
.zc_nvlist_dst
= (uint64_t)(uintptr_t)
137 malloc(zc
.zc_nvlist_dst_size
);
138 if (zc
.zc_nvlist_dst
== (uint64_t)0) {
144 while (ioctl(g_fd
, ioc
, &zc
) != 0) {
145 if (errno
== ENOMEM
&& resultp
!= NULL
) {
146 free((void *)(uintptr_t)zc
.zc_nvlist_dst
);
147 zc
.zc_nvlist_dst_size
*= 2;
148 zc
.zc_nvlist_dst
= (uint64_t)(uintptr_t)
149 malloc(zc
.zc_nvlist_dst_size
);
150 if (zc
.zc_nvlist_dst
== (uint64_t)0) {
159 if (zc
.zc_nvlist_dst_filled
) {
160 *resultp
= fnvlist_unpack((void *)(uintptr_t)zc
.zc_nvlist_dst
,
161 zc
.zc_nvlist_dst_size
);
162 } else if (resultp
!= NULL
) {
167 fnvlist_pack_free(packed
, size
);
168 free((void *)(uintptr_t)zc
.zc_nvlist_dst
);
173 lzc_create(const char *fsname
, dmu_objset_type_t type
, nvlist_t
*props
)
176 nvlist_t
*args
= fnvlist_alloc();
177 fnvlist_add_int32(args
, "type", type
);
179 fnvlist_add_nvlist(args
, "props", props
);
180 error
= lzc_ioctl(ZFS_IOC_CREATE
, fsname
, args
, NULL
);
186 lzc_clone(const char *fsname
, const char *origin
,
190 nvlist_t
*args
= fnvlist_alloc();
191 fnvlist_add_string(args
, "origin", origin
);
193 fnvlist_add_nvlist(args
, "props", props
);
194 error
= lzc_ioctl(ZFS_IOC_CLONE
, fsname
, args
, NULL
);
202 * The keys in the snaps nvlist are the snapshots to be created.
203 * They must all be in the same pool.
205 * The props nvlist is properties to set. Currently only user properties
206 * are supported. { user:prop_name -> string value }
208 * The returned results nvlist will have an entry for each snapshot that failed.
209 * The value will be the (int32) error code.
211 * The return value will be 0 if all snapshots were created, otherwise it will
212 * be the errno of a (undetermined) snapshot that failed.
215 lzc_snapshot(nvlist_t
*snaps
, nvlist_t
*props
, nvlist_t
**errlist
)
220 char pool
[MAXNAMELEN
];
224 /* determine the pool name */
225 elem
= nvlist_next_nvpair(snaps
, NULL
);
228 (void) strlcpy(pool
, nvpair_name(elem
), sizeof (pool
));
229 pool
[strcspn(pool
, "/@")] = '\0';
231 args
= fnvlist_alloc();
232 fnvlist_add_nvlist(args
, "snaps", snaps
);
234 fnvlist_add_nvlist(args
, "props", props
);
236 error
= lzc_ioctl(ZFS_IOC_SNAPSHOT
, pool
, args
, errlist
);
243 * Destroys snapshots.
245 * The keys in the snaps nvlist are the snapshots to be destroyed.
246 * They must all be in the same pool.
248 * Snapshots that do not exist will be silently ignored.
250 * If 'defer' is not set, and a snapshot has user holds or clones, the
251 * destroy operation will fail and none of the snapshots will be
254 * If 'defer' is set, and a snapshot has user holds or clones, it will be
255 * marked for deferred destruction, and will be destroyed when the last hold
256 * or clone is removed/destroyed.
258 * The return value will be 0 if all snapshots were destroyed (or marked for
259 * later destruction if 'defer' is set) or didn't exist to begin with.
261 * Otherwise the return value will be the errno of a (undetermined) snapshot
262 * that failed, no snapshots will be destroyed, and the errlist will have an
263 * entry for each snapshot that failed. The value in the errlist will be
264 * the (int32) error code.
267 lzc_destroy_snaps(nvlist_t
*snaps
, boolean_t defer
, nvlist_t
**errlist
)
272 char pool
[MAXNAMELEN
];
274 /* determine the pool name */
275 elem
= nvlist_next_nvpair(snaps
, NULL
);
278 (void) strlcpy(pool
, nvpair_name(elem
), sizeof (pool
));
279 pool
[strcspn(pool
, "/@")] = '\0';
281 args
= fnvlist_alloc();
282 fnvlist_add_nvlist(args
, "snaps", snaps
);
284 fnvlist_add_boolean(args
, "defer");
286 error
= lzc_ioctl(ZFS_IOC_DESTROY_SNAPS
, pool
, args
, errlist
);
294 lzc_snaprange_space(const char *firstsnap
, const char *lastsnap
,
303 /* determine the fs name */
304 (void) strlcpy(fs
, firstsnap
, sizeof (fs
));
305 atp
= strchr(fs
, '@');
310 args
= fnvlist_alloc();
311 fnvlist_add_string(args
, "firstsnap", firstsnap
);
313 err
= lzc_ioctl(ZFS_IOC_SPACE_SNAPS
, lastsnap
, args
, &result
);
316 *usedp
= fnvlist_lookup_uint64(result
, "used");
317 fnvlist_free(result
);
323 lzc_exists(const char *dataset
)
326 * The objset_stats ioctl is still legacy, so we need to construct our
327 * own zfs_cmd_t rather than using zfsc_ioctl().
329 zfs_cmd_t zc
= {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
331 (void) strlcpy(zc
.zc_name
, dataset
, sizeof (zc
.zc_name
));
332 return (ioctl(g_fd
, ZFS_IOC_OBJSET_STATS
, &zc
) == 0);
336 * If fromsnap is NULL, a full (non-incremental) stream will be sent.
339 lzc_send(const char *snapname
, const char *fromsnap
, int fd
)
344 args
= fnvlist_alloc();
345 fnvlist_add_int32(args
, "fd", fd
);
346 if (fromsnap
!= NULL
)
347 fnvlist_add_string(args
, "fromsnap", fromsnap
);
348 err
= lzc_ioctl(ZFS_IOC_SEND_NEW
, snapname
, args
, NULL
);
354 * If fromsnap is NULL, a full (non-incremental) stream will be estimated.
357 lzc_send_space(const char *snapname
, const char *fromsnap
, uint64_t *spacep
)
363 args
= fnvlist_alloc();
364 if (fromsnap
!= NULL
)
365 fnvlist_add_string(args
, "fromsnap", fromsnap
);
366 err
= lzc_ioctl(ZFS_IOC_SEND_SPACE
, snapname
, args
, &result
);
369 *spacep
= fnvlist_lookup_uint64(result
, "space");
375 recv_read(int fd
, void *buf
, int ilen
)
382 rv
= read(fd
, cp
, len
);
387 if (rv
< 0 || len
!= 0)
394 * The simplest receive case: receive from the specified fd, creating the
395 * specified snapshot. Apply the specified properties a "received" properties
396 * (which can be overridden by locally-set properties). If the stream is a
397 * clone, its origin snapshot must be specified by 'origin'. The 'force'
398 * flag will cause the target filesystem to be rolled back or destroyed if
399 * necessary to receive.
401 * Return 0 on success or an errno on failure.
403 * Note: this interface does not work on dedup'd streams
404 * (those with DMU_BACKUP_FEATURE_DEDUP).
407 lzc_receive(const char *snapname
, nvlist_t
*props
, const char *origin
,
408 boolean_t force
, int fd
)
411 * The receive ioctl is still legacy, so we need to construct our own
412 * zfs_cmd_t rather than using zfsc_ioctl().
414 zfs_cmd_t zc
= {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
418 dmu_replay_record_t drr
;
421 ASSERT3S(g_refcount
, >, 0);
423 /* zc_name is name of containing filesystem */
424 (void) strlcpy(zc
.zc_name
, snapname
, sizeof (zc
.zc_name
));
425 atp
= strchr(zc
.zc_name
, '@');
430 /* if the fs does not exist, try its parent. */
431 if (!lzc_exists(zc
.zc_name
)) {
432 char *slashp
= strrchr(zc
.zc_name
, '/');
439 /* zc_value is full name of the snapshot to create */
440 (void) strlcpy(zc
.zc_value
, snapname
, sizeof (zc
.zc_value
));
443 /* zc_nvlist_src is props to set */
444 packed
= fnvlist_pack(props
, &size
);
445 zc
.zc_nvlist_src
= (uint64_t)(uintptr_t)packed
;
446 zc
.zc_nvlist_src_size
= size
;
449 /* zc_string is name of clone origin (if DRR_FLAG_CLONE) */
451 (void) strlcpy(zc
.zc_string
, origin
, sizeof (zc
.zc_string
));
453 /* zc_begin_record is non-byteswapped BEGIN record */
454 error
= recv_read(fd
, &drr
, sizeof (drr
));
457 zc
.zc_begin_record
= drr
.drr_u
.drr_begin
;
459 /* zc_cookie is fd to read from */
462 /* zc guid is force flag */
465 /* zc_cleanup_fd is unused */
466 zc
.zc_cleanup_fd
= -1;
468 error
= ioctl(g_fd
, ZFS_IOC_RECV
, &zc
);
474 fnvlist_pack_free(packed
, size
);
475 free((void*)(uintptr_t)zc
.zc_nvlist_dst
);