]> git.proxmox.com Git - mirror_zfs.git/blob - module/zfs/zfs_ioctl.c
Make use of ZFS_DEBUG consistent within kmod sources
[mirror_zfs.git] / module / zfs / zfs_ioctl.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Portions Copyright 2011 Martin Matuska
25 * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
26 * Portions Copyright 2012 Pawel Jakub Dawidek <pawel@dawidek.net>
27 * Copyright (c) 2014, 2016 Joyent, Inc. All rights reserved.
28 * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
29 * Copyright (c) 2014, Joyent, Inc. All rights reserved.
30 * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
31 * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
32 * Copyright (c) 2013 Steven Hartland. All rights reserved.
33 * Copyright (c) 2014 Integros [integros.com]
34 * Copyright 2016 Toomas Soome <tsoome@me.com>
35 * Copyright (c) 2016 Actifio, Inc. All rights reserved.
36 * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
37 * Copyright 2017 RackTop Systems.
38 * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
39 * Copyright (c) 2019 Datto Inc.
40 * Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved.
41 */
42
43 /*
44 * ZFS ioctls.
45 *
46 * This file handles the ioctls to /dev/zfs, used for configuring ZFS storage
47 * pools and filesystems, e.g. with /sbin/zfs and /sbin/zpool.
48 *
49 * There are two ways that we handle ioctls: the legacy way where almost
50 * all of the logic is in the ioctl callback, and the new way where most
51 * of the marshalling is handled in the common entry point, zfsdev_ioctl().
52 *
53 * Non-legacy ioctls should be registered by calling
54 * zfs_ioctl_register() from zfs_ioctl_init(). The ioctl is invoked
55 * from userland by lzc_ioctl().
56 *
57 * The registration arguments are as follows:
58 *
59 * const char *name
60 * The name of the ioctl. This is used for history logging. If the
61 * ioctl returns successfully (the callback returns 0), and allow_log
62 * is true, then a history log entry will be recorded with the input &
63 * output nvlists. The log entry can be printed with "zpool history -i".
64 *
65 * zfs_ioc_t ioc
66 * The ioctl request number, which userland will pass to ioctl(2).
67 * We want newer versions of libzfs and libzfs_core to run against
68 * existing zfs kernel modules (i.e. a deferred reboot after an update).
69 * Therefore the ioctl numbers cannot change from release to release.
70 *
71 * zfs_secpolicy_func_t *secpolicy
72 * This function will be called before the zfs_ioc_func_t, to
73 * determine if this operation is permitted. It should return EPERM
74 * on failure, and 0 on success. Checks include determining if the
75 * dataset is visible in this zone, and if the user has either all
76 * zfs privileges in the zone (SYS_MOUNT), or has been granted permission
77 * to do this operation on this dataset with "zfs allow".
78 *
79 * zfs_ioc_namecheck_t namecheck
80 * This specifies what to expect in the zfs_cmd_t:zc_name -- a pool
81 * name, a dataset name, or nothing. If the name is not well-formed,
82 * the ioctl will fail and the callback will not be called.
83 * Therefore, the callback can assume that the name is well-formed
84 * (e.g. is null-terminated, doesn't have more than one '@' character,
85 * doesn't have invalid characters).
86 *
87 * zfs_ioc_poolcheck_t pool_check
88 * This specifies requirements on the pool state. If the pool does
89 * not meet them (is suspended or is readonly), the ioctl will fail
90 * and the callback will not be called. If any checks are specified
91 * (i.e. it is not POOL_CHECK_NONE), namecheck must not be NO_NAME.
92 * Multiple checks can be or-ed together (e.g. POOL_CHECK_SUSPENDED |
93 * POOL_CHECK_READONLY).
94 *
95 * zfs_ioc_key_t *nvl_keys
96 * The list of expected/allowable innvl input keys. This list is used
97 * to validate the nvlist input to the ioctl.
98 *
99 * boolean_t smush_outnvlist
100 * If smush_outnvlist is true, then the output is presumed to be a
101 * list of errors, and it will be "smushed" down to fit into the
102 * caller's buffer, by removing some entries and replacing them with a
103 * single "N_MORE_ERRORS" entry indicating how many were removed. See
104 * nvlist_smush() for details. If smush_outnvlist is false, and the
105 * outnvlist does not fit into the userland-provided buffer, then the
106 * ioctl will fail with ENOMEM.
107 *
108 * zfs_ioc_func_t *func
109 * The callback function that will perform the operation.
110 *
111 * The callback should return 0 on success, or an error number on
112 * failure. If the function fails, the userland ioctl will return -1,
113 * and errno will be set to the callback's return value. The callback
114 * will be called with the following arguments:
115 *
116 * const char *name
117 * The name of the pool or dataset to operate on, from
118 * zfs_cmd_t:zc_name. The 'namecheck' argument specifies the
119 * expected type (pool, dataset, or none).
120 *
121 * nvlist_t *innvl
122 * The input nvlist, deserialized from zfs_cmd_t:zc_nvlist_src. Or
123 * NULL if no input nvlist was provided. Changes to this nvlist are
124 * ignored. If the input nvlist could not be deserialized, the
125 * ioctl will fail and the callback will not be called.
126 *
127 * nvlist_t *outnvl
128 * The output nvlist, initially empty. The callback can fill it in,
129 * and it will be returned to userland by serializing it into
130 * zfs_cmd_t:zc_nvlist_dst. If it is non-empty, and serialization
131 * fails (e.g. because the caller didn't supply a large enough
132 * buffer), then the overall ioctl will fail. See the
133 * 'smush_nvlist' argument above for additional behaviors.
134 *
135 * There are two typical uses of the output nvlist:
136 * - To return state, e.g. property values. In this case,
137 * smush_outnvlist should be false. If the buffer was not large
138 * enough, the caller will reallocate a larger buffer and try
139 * the ioctl again.
140 *
141 * - To return multiple errors from an ioctl which makes on-disk
142 * changes. In this case, smush_outnvlist should be true.
143 * Ioctls which make on-disk modifications should generally not
144 * use the outnvl if they succeed, because the caller can not
145 * distinguish between the operation failing, and
146 * deserialization failing.
147 *
148 * IOCTL Interface Errors
149 *
150 * The following ioctl input errors can be returned:
151 * ZFS_ERR_IOC_CMD_UNAVAIL the ioctl number is not supported by kernel
152 * ZFS_ERR_IOC_ARG_UNAVAIL an input argument is not supported by kernel
153 * ZFS_ERR_IOC_ARG_REQUIRED a required input argument is missing
154 * ZFS_ERR_IOC_ARG_BADTYPE an input argument has an invalid type
155 */
156
157 #include <sys/types.h>
158 #include <sys/param.h>
159 #include <sys/errno.h>
160 #include <sys/uio.h>
161 #include <sys/file.h>
162 #include <sys/kmem.h>
163 #include <sys/cmn_err.h>
164 #include <sys/stat.h>
165 #include <sys/zfs_ioctl.h>
166 #include <sys/zfs_quota.h>
167 #include <sys/zfs_vfsops.h>
168 #include <sys/zfs_znode.h>
169 #include <sys/zap.h>
170 #include <sys/spa.h>
171 #include <sys/spa_impl.h>
172 #include <sys/vdev.h>
173 #include <sys/vdev_impl.h>
174 #include <sys/dmu.h>
175 #include <sys/dsl_dir.h>
176 #include <sys/dsl_dataset.h>
177 #include <sys/dsl_prop.h>
178 #include <sys/dsl_deleg.h>
179 #include <sys/dmu_objset.h>
180 #include <sys/dmu_impl.h>
181 #include <sys/dmu_redact.h>
182 #include <sys/dmu_tx.h>
183 #include <sys/sunddi.h>
184 #include <sys/policy.h>
185 #include <sys/zone.h>
186 #include <sys/nvpair.h>
187 #include <sys/pathname.h>
188 #include <sys/fs/zfs.h>
189 #include <sys/zfs_ctldir.h>
190 #include <sys/zfs_dir.h>
191 #include <sys/zfs_onexit.h>
192 #include <sys/zvol.h>
193 #include <sys/dsl_scan.h>
194 #include <sys/fm/util.h>
195 #include <sys/dsl_crypt.h>
196 #include <sys/rrwlock.h>
197 #include <sys/zfs_file.h>
198
199 #include <sys/dmu_recv.h>
200 #include <sys/dmu_send.h>
201 #include <sys/dmu_recv.h>
202 #include <sys/dsl_destroy.h>
203 #include <sys/dsl_bookmark.h>
204 #include <sys/dsl_userhold.h>
205 #include <sys/zfeature.h>
206 #include <sys/zcp.h>
207 #include <sys/zio_checksum.h>
208 #include <sys/vdev_removal.h>
209 #include <sys/vdev_impl.h>
210 #include <sys/vdev_initialize.h>
211 #include <sys/vdev_trim.h>
212
213 #include "zfs_namecheck.h"
214 #include "zfs_prop.h"
215 #include "zfs_deleg.h"
216 #include "zfs_comutil.h"
217
218 #include <sys/lua/lua.h>
219 #include <sys/lua/lauxlib.h>
220 #include <sys/zfs_ioctl_impl.h>
221
222 kmutex_t zfsdev_state_lock;
223 zfsdev_state_t *zfsdev_state_list;
224
225 /*
226 * Limit maximum nvlist size. We don't want users passing in insane values
227 * for zc->zc_nvlist_src_size, since we will need to allocate that much memory.
228 */
229 #define MAX_NVLIST_SRC_SIZE KMALLOC_MAX_SIZE
230
231 uint_t zfs_fsyncer_key;
232 uint_t zfs_allow_log_key;
233
234 /* DATA_TYPE_ANY is used when zkey_type can vary. */
235 #define DATA_TYPE_ANY DATA_TYPE_UNKNOWN
236
237 typedef struct zfs_ioc_vec {
238 zfs_ioc_legacy_func_t *zvec_legacy_func;
239 zfs_ioc_func_t *zvec_func;
240 zfs_secpolicy_func_t *zvec_secpolicy;
241 zfs_ioc_namecheck_t zvec_namecheck;
242 boolean_t zvec_allow_log;
243 zfs_ioc_poolcheck_t zvec_pool_check;
244 boolean_t zvec_smush_outnvlist;
245 const char *zvec_name;
246 const zfs_ioc_key_t *zvec_nvl_keys;
247 size_t zvec_nvl_key_count;
248 } zfs_ioc_vec_t;
249
250 /* This array is indexed by zfs_userquota_prop_t */
251 static const char *userquota_perms[] = {
252 ZFS_DELEG_PERM_USERUSED,
253 ZFS_DELEG_PERM_USERQUOTA,
254 ZFS_DELEG_PERM_GROUPUSED,
255 ZFS_DELEG_PERM_GROUPQUOTA,
256 ZFS_DELEG_PERM_USEROBJUSED,
257 ZFS_DELEG_PERM_USEROBJQUOTA,
258 ZFS_DELEG_PERM_GROUPOBJUSED,
259 ZFS_DELEG_PERM_GROUPOBJQUOTA,
260 ZFS_DELEG_PERM_PROJECTUSED,
261 ZFS_DELEG_PERM_PROJECTQUOTA,
262 ZFS_DELEG_PERM_PROJECTOBJUSED,
263 ZFS_DELEG_PERM_PROJECTOBJQUOTA,
264 };
265
266 static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
267 static int zfs_ioc_id_quota_upgrade(zfs_cmd_t *zc);
268 static int zfs_check_settable(const char *name, nvpair_t *property,
269 cred_t *cr);
270 static int zfs_check_clearable(char *dataset, nvlist_t *props,
271 nvlist_t **errors);
272 static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
273 boolean_t *);
274 int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
275 static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
276
277 static void
278 history_str_free(char *buf)
279 {
280 kmem_free(buf, HIS_MAX_RECORD_LEN);
281 }
282
283 static char *
284 history_str_get(zfs_cmd_t *zc)
285 {
286 char *buf;
287
288 if (zc->zc_history == 0)
289 return (NULL);
290
291 buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
292 if (copyinstr((void *)(uintptr_t)zc->zc_history,
293 buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
294 history_str_free(buf);
295 return (NULL);
296 }
297
298 buf[HIS_MAX_RECORD_LEN -1] = '\0';
299
300 return (buf);
301 }
302
303 /*
304 * Check to see if the named dataset is currently defined as bootable
305 */
306 static boolean_t
307 zfs_is_bootfs(const char *name)
308 {
309 objset_t *os;
310
311 if (dmu_objset_hold(name, FTAG, &os) == 0) {
312 boolean_t ret;
313 ret = (dmu_objset_id(os) == spa_bootfs(dmu_objset_spa(os)));
314 dmu_objset_rele(os, FTAG);
315 return (ret);
316 }
317 return (B_FALSE);
318 }
319
320 /*
321 * Return non-zero if the spa version is less than requested version.
322 */
323 static int
324 zfs_earlier_version(const char *name, int version)
325 {
326 spa_t *spa;
327
328 if (spa_open(name, &spa, FTAG) == 0) {
329 if (spa_version(spa) < version) {
330 spa_close(spa, FTAG);
331 return (1);
332 }
333 spa_close(spa, FTAG);
334 }
335 return (0);
336 }
337
338 /*
339 * Return TRUE if the ZPL version is less than requested version.
340 */
341 static boolean_t
342 zpl_earlier_version(const char *name, int version)
343 {
344 objset_t *os;
345 boolean_t rc = B_TRUE;
346
347 if (dmu_objset_hold(name, FTAG, &os) == 0) {
348 uint64_t zplversion;
349
350 if (dmu_objset_type(os) != DMU_OST_ZFS) {
351 dmu_objset_rele(os, FTAG);
352 return (B_TRUE);
353 }
354 /* XXX reading from non-owned objset */
355 if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
356 rc = zplversion < version;
357 dmu_objset_rele(os, FTAG);
358 }
359 return (rc);
360 }
361
362 static void
363 zfs_log_history(zfs_cmd_t *zc)
364 {
365 spa_t *spa;
366 char *buf;
367
368 if ((buf = history_str_get(zc)) == NULL)
369 return;
370
371 if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
372 if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
373 (void) spa_history_log(spa, buf);
374 spa_close(spa, FTAG);
375 }
376 history_str_free(buf);
377 }
378
379 /*
380 * Policy for top-level read operations (list pools). Requires no privileges,
381 * and can be used in the local zone, as there is no associated dataset.
382 */
383 /* ARGSUSED */
384 static int
385 zfs_secpolicy_none(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
386 {
387 return (0);
388 }
389
390 /*
391 * Policy for dataset read operations (list children, get statistics). Requires
392 * no privileges, but must be visible in the local zone.
393 */
394 /* ARGSUSED */
395 static int
396 zfs_secpolicy_read(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
397 {
398 if (INGLOBALZONE(curproc) ||
399 zone_dataset_visible(zc->zc_name, NULL))
400 return (0);
401
402 return (SET_ERROR(ENOENT));
403 }
404
405 static int
406 zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr)
407 {
408 int writable = 1;
409
410 /*
411 * The dataset must be visible by this zone -- check this first
412 * so they don't see EPERM on something they shouldn't know about.
413 */
414 if (!INGLOBALZONE(curproc) &&
415 !zone_dataset_visible(dataset, &writable))
416 return (SET_ERROR(ENOENT));
417
418 if (INGLOBALZONE(curproc)) {
419 /*
420 * If the fs is zoned, only root can access it from the
421 * global zone.
422 */
423 if (secpolicy_zfs(cr) && zoned)
424 return (SET_ERROR(EPERM));
425 } else {
426 /*
427 * If we are in a local zone, the 'zoned' property must be set.
428 */
429 if (!zoned)
430 return (SET_ERROR(EPERM));
431
432 /* must be writable by this zone */
433 if (!writable)
434 return (SET_ERROR(EPERM));
435 }
436 return (0);
437 }
438
439 static int
440 zfs_dozonecheck(const char *dataset, cred_t *cr)
441 {
442 uint64_t zoned;
443
444 if (dsl_prop_get_integer(dataset, zfs_prop_to_name(ZFS_PROP_ZONED),
445 &zoned, NULL))
446 return (SET_ERROR(ENOENT));
447
448 return (zfs_dozonecheck_impl(dataset, zoned, cr));
449 }
450
451 static int
452 zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
453 {
454 uint64_t zoned;
455
456 if (dsl_prop_get_int_ds(ds, zfs_prop_to_name(ZFS_PROP_ZONED), &zoned))
457 return (SET_ERROR(ENOENT));
458
459 return (zfs_dozonecheck_impl(dataset, zoned, cr));
460 }
461
462 static int
463 zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
464 const char *perm, cred_t *cr)
465 {
466 int error;
467
468 error = zfs_dozonecheck_ds(name, ds, cr);
469 if (error == 0) {
470 error = secpolicy_zfs(cr);
471 if (error != 0)
472 error = dsl_deleg_access_impl(ds, perm, cr);
473 }
474 return (error);
475 }
476
477 static int
478 zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
479 {
480 int error;
481 dsl_dataset_t *ds;
482 dsl_pool_t *dp;
483
484 /*
485 * First do a quick check for root in the global zone, which
486 * is allowed to do all write_perms. This ensures that zfs_ioc_*
487 * will get to handle nonexistent datasets.
488 */
489 if (INGLOBALZONE(curproc) && secpolicy_zfs(cr) == 0)
490 return (0);
491
492 error = dsl_pool_hold(name, FTAG, &dp);
493 if (error != 0)
494 return (error);
495
496 error = dsl_dataset_hold(dp, name, FTAG, &ds);
497 if (error != 0) {
498 dsl_pool_rele(dp, FTAG);
499 return (error);
500 }
501
502 error = zfs_secpolicy_write_perms_ds(name, ds, perm, cr);
503
504 dsl_dataset_rele(ds, FTAG);
505 dsl_pool_rele(dp, FTAG);
506 return (error);
507 }
508
509 /*
510 * Policy for setting the security label property.
511 *
512 * Returns 0 for success, non-zero for access and other errors.
513 */
514 static int
515 zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
516 {
517 #ifdef HAVE_MLSLABEL
518 char ds_hexsl[MAXNAMELEN];
519 bslabel_t ds_sl, new_sl;
520 boolean_t new_default = FALSE;
521 uint64_t zoned;
522 int needed_priv = -1;
523 int error;
524
525 /* First get the existing dataset label. */
526 error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
527 1, sizeof (ds_hexsl), &ds_hexsl, NULL);
528 if (error != 0)
529 return (SET_ERROR(EPERM));
530
531 if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
532 new_default = TRUE;
533
534 /* The label must be translatable */
535 if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
536 return (SET_ERROR(EINVAL));
537
538 /*
539 * In a non-global zone, disallow attempts to set a label that
540 * doesn't match that of the zone; otherwise no other checks
541 * are needed.
542 */
543 if (!INGLOBALZONE(curproc)) {
544 if (new_default || !blequal(&new_sl, CR_SL(CRED())))
545 return (SET_ERROR(EPERM));
546 return (0);
547 }
548
549 /*
550 * For global-zone datasets (i.e., those whose zoned property is
551 * "off", verify that the specified new label is valid for the
552 * global zone.
553 */
554 if (dsl_prop_get_integer(name,
555 zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
556 return (SET_ERROR(EPERM));
557 if (!zoned) {
558 if (zfs_check_global_label(name, strval) != 0)
559 return (SET_ERROR(EPERM));
560 }
561
562 /*
563 * If the existing dataset label is nondefault, check if the
564 * dataset is mounted (label cannot be changed while mounted).
565 * Get the zfsvfs_t; if there isn't one, then the dataset isn't
566 * mounted (or isn't a dataset, doesn't exist, ...).
567 */
568 if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
569 objset_t *os;
570 static char *setsl_tag = "setsl_tag";
571
572 /*
573 * Try to own the dataset; abort if there is any error,
574 * (e.g., already mounted, in use, or other error).
575 */
576 error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE, B_TRUE,
577 setsl_tag, &os);
578 if (error != 0)
579 return (SET_ERROR(EPERM));
580
581 dmu_objset_disown(os, B_TRUE, setsl_tag);
582
583 if (new_default) {
584 needed_priv = PRIV_FILE_DOWNGRADE_SL;
585 goto out_check;
586 }
587
588 if (hexstr_to_label(strval, &new_sl) != 0)
589 return (SET_ERROR(EPERM));
590
591 if (blstrictdom(&ds_sl, &new_sl))
592 needed_priv = PRIV_FILE_DOWNGRADE_SL;
593 else if (blstrictdom(&new_sl, &ds_sl))
594 needed_priv = PRIV_FILE_UPGRADE_SL;
595 } else {
596 /* dataset currently has a default label */
597 if (!new_default)
598 needed_priv = PRIV_FILE_UPGRADE_SL;
599 }
600
601 out_check:
602 if (needed_priv != -1)
603 return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
604 return (0);
605 #else
606 return (SET_ERROR(ENOTSUP));
607 #endif /* HAVE_MLSLABEL */
608 }
609
610 static int
611 zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
612 cred_t *cr)
613 {
614 char *strval;
615
616 /*
617 * Check permissions for special properties.
618 */
619 switch (prop) {
620 default:
621 break;
622 case ZFS_PROP_ZONED:
623 /*
624 * Disallow setting of 'zoned' from within a local zone.
625 */
626 if (!INGLOBALZONE(curproc))
627 return (SET_ERROR(EPERM));
628 break;
629
630 case ZFS_PROP_QUOTA:
631 case ZFS_PROP_FILESYSTEM_LIMIT:
632 case ZFS_PROP_SNAPSHOT_LIMIT:
633 if (!INGLOBALZONE(curproc)) {
634 uint64_t zoned;
635 char setpoint[ZFS_MAX_DATASET_NAME_LEN];
636 /*
637 * Unprivileged users are allowed to modify the
638 * limit on things *under* (ie. contained by)
639 * the thing they own.
640 */
641 if (dsl_prop_get_integer(dsname,
642 zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, setpoint))
643 return (SET_ERROR(EPERM));
644 if (!zoned || strlen(dsname) <= strlen(setpoint))
645 return (SET_ERROR(EPERM));
646 }
647 break;
648
649 case ZFS_PROP_MLSLABEL:
650 if (!is_system_labeled())
651 return (SET_ERROR(EPERM));
652
653 if (nvpair_value_string(propval, &strval) == 0) {
654 int err;
655
656 err = zfs_set_slabel_policy(dsname, strval, CRED());
657 if (err != 0)
658 return (err);
659 }
660 break;
661 }
662
663 return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
664 }
665
666 /* ARGSUSED */
667 static int
668 zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
669 {
670 int error;
671
672 error = zfs_dozonecheck(zc->zc_name, cr);
673 if (error != 0)
674 return (error);
675
676 /*
677 * permission to set permissions will be evaluated later in
678 * dsl_deleg_can_allow()
679 */
680 return (0);
681 }
682
683 /* ARGSUSED */
684 static int
685 zfs_secpolicy_rollback(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
686 {
687 return (zfs_secpolicy_write_perms(zc->zc_name,
688 ZFS_DELEG_PERM_ROLLBACK, cr));
689 }
690
691 /* ARGSUSED */
692 static int
693 zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
694 {
695 dsl_pool_t *dp;
696 dsl_dataset_t *ds;
697 char *cp;
698 int error;
699
700 /*
701 * Generate the current snapshot name from the given objsetid, then
702 * use that name for the secpolicy/zone checks.
703 */
704 cp = strchr(zc->zc_name, '@');
705 if (cp == NULL)
706 return (SET_ERROR(EINVAL));
707 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
708 if (error != 0)
709 return (error);
710
711 error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
712 if (error != 0) {
713 dsl_pool_rele(dp, FTAG);
714 return (error);
715 }
716
717 dsl_dataset_name(ds, zc->zc_name);
718
719 error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
720 ZFS_DELEG_PERM_SEND, cr);
721 dsl_dataset_rele(ds, FTAG);
722 dsl_pool_rele(dp, FTAG);
723
724 return (error);
725 }
726
727 /* ARGSUSED */
728 static int
729 zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
730 {
731 return (zfs_secpolicy_write_perms(zc->zc_name,
732 ZFS_DELEG_PERM_SEND, cr));
733 }
734
735 static int
736 zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
737 {
738 return (SET_ERROR(ENOTSUP));
739 }
740
741 static int
742 zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
743 {
744 return (SET_ERROR(ENOTSUP));
745 }
746
747 static int
748 zfs_get_parent(const char *datasetname, char *parent, int parentsize)
749 {
750 char *cp;
751
752 /*
753 * Remove the @bla or /bla from the end of the name to get the parent.
754 */
755 (void) strncpy(parent, datasetname, parentsize);
756 cp = strrchr(parent, '@');
757 if (cp != NULL) {
758 cp[0] = '\0';
759 } else {
760 cp = strrchr(parent, '/');
761 if (cp == NULL)
762 return (SET_ERROR(ENOENT));
763 cp[0] = '\0';
764 }
765
766 return (0);
767 }
768
769 int
770 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
771 {
772 int error;
773
774 if ((error = zfs_secpolicy_write_perms(name,
775 ZFS_DELEG_PERM_MOUNT, cr)) != 0)
776 return (error);
777
778 return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
779 }
780
781 /* ARGSUSED */
782 static int
783 zfs_secpolicy_destroy(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
784 {
785 return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
786 }
787
788 /*
789 * Destroying snapshots with delegated permissions requires
790 * descendant mount and destroy permissions.
791 */
792 /* ARGSUSED */
793 static int
794 zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
795 {
796 nvlist_t *snaps;
797 nvpair_t *pair, *nextpair;
798 int error = 0;
799
800 snaps = fnvlist_lookup_nvlist(innvl, "snaps");
801
802 for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
803 pair = nextpair) {
804 nextpair = nvlist_next_nvpair(snaps, pair);
805 error = zfs_secpolicy_destroy_perms(nvpair_name(pair), cr);
806 if (error == ENOENT) {
807 /*
808 * Ignore any snapshots that don't exist (we consider
809 * them "already destroyed"). Remove the name from the
810 * nvl here in case the snapshot is created between
811 * now and when we try to destroy it (in which case
812 * we don't want to destroy it since we haven't
813 * checked for permission).
814 */
815 fnvlist_remove_nvpair(snaps, pair);
816 error = 0;
817 }
818 if (error != 0)
819 break;
820 }
821
822 return (error);
823 }
824
825 int
826 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
827 {
828 char parentname[ZFS_MAX_DATASET_NAME_LEN];
829 int error;
830
831 if ((error = zfs_secpolicy_write_perms(from,
832 ZFS_DELEG_PERM_RENAME, cr)) != 0)
833 return (error);
834
835 if ((error = zfs_secpolicy_write_perms(from,
836 ZFS_DELEG_PERM_MOUNT, cr)) != 0)
837 return (error);
838
839 if ((error = zfs_get_parent(to, parentname,
840 sizeof (parentname))) != 0)
841 return (error);
842
843 if ((error = zfs_secpolicy_write_perms(parentname,
844 ZFS_DELEG_PERM_CREATE, cr)) != 0)
845 return (error);
846
847 if ((error = zfs_secpolicy_write_perms(parentname,
848 ZFS_DELEG_PERM_MOUNT, cr)) != 0)
849 return (error);
850
851 return (error);
852 }
853
854 /* ARGSUSED */
855 static int
856 zfs_secpolicy_rename(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
857 {
858 return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));
859 }
860
861 /* ARGSUSED */
862 static int
863 zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
864 {
865 dsl_pool_t *dp;
866 dsl_dataset_t *clone;
867 int error;
868
869 error = zfs_secpolicy_write_perms(zc->zc_name,
870 ZFS_DELEG_PERM_PROMOTE, cr);
871 if (error != 0)
872 return (error);
873
874 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
875 if (error != 0)
876 return (error);
877
878 error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &clone);
879
880 if (error == 0) {
881 char parentname[ZFS_MAX_DATASET_NAME_LEN];
882 dsl_dataset_t *origin = NULL;
883 dsl_dir_t *dd;
884 dd = clone->ds_dir;
885
886 error = dsl_dataset_hold_obj(dd->dd_pool,
887 dsl_dir_phys(dd)->dd_origin_obj, FTAG, &origin);
888 if (error != 0) {
889 dsl_dataset_rele(clone, FTAG);
890 dsl_pool_rele(dp, FTAG);
891 return (error);
892 }
893
894 error = zfs_secpolicy_write_perms_ds(zc->zc_name, clone,
895 ZFS_DELEG_PERM_MOUNT, cr);
896
897 dsl_dataset_name(origin, parentname);
898 if (error == 0) {
899 error = zfs_secpolicy_write_perms_ds(parentname, origin,
900 ZFS_DELEG_PERM_PROMOTE, cr);
901 }
902 dsl_dataset_rele(clone, FTAG);
903 dsl_dataset_rele(origin, FTAG);
904 }
905 dsl_pool_rele(dp, FTAG);
906 return (error);
907 }
908
909 /* ARGSUSED */
910 static int
911 zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
912 {
913 int error;
914
915 if ((error = zfs_secpolicy_write_perms(zc->zc_name,
916 ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
917 return (error);
918
919 if ((error = zfs_secpolicy_write_perms(zc->zc_name,
920 ZFS_DELEG_PERM_MOUNT, cr)) != 0)
921 return (error);
922
923 return (zfs_secpolicy_write_perms(zc->zc_name,
924 ZFS_DELEG_PERM_CREATE, cr));
925 }
926
927 /* ARGSUSED */
928 static int
929 zfs_secpolicy_recv_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
930 {
931 return (zfs_secpolicy_recv(zc, innvl, cr));
932 }
933
934 int
935 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
936 {
937 return (zfs_secpolicy_write_perms(name,
938 ZFS_DELEG_PERM_SNAPSHOT, cr));
939 }
940
941 /*
942 * Check for permission to create each snapshot in the nvlist.
943 */
944 /* ARGSUSED */
945 static int
946 zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
947 {
948 nvlist_t *snaps;
949 int error = 0;
950 nvpair_t *pair;
951
952 snaps = fnvlist_lookup_nvlist(innvl, "snaps");
953
954 for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
955 pair = nvlist_next_nvpair(snaps, pair)) {
956 char *name = nvpair_name(pair);
957 char *atp = strchr(name, '@');
958
959 if (atp == NULL) {
960 error = SET_ERROR(EINVAL);
961 break;
962 }
963 *atp = '\0';
964 error = zfs_secpolicy_snapshot_perms(name, cr);
965 *atp = '@';
966 if (error != 0)
967 break;
968 }
969 return (error);
970 }
971
972 /*
973 * Check for permission to create each bookmark in the nvlist.
974 */
975 /* ARGSUSED */
976 static int
977 zfs_secpolicy_bookmark(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
978 {
979 int error = 0;
980
981 for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
982 pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
983 char *name = nvpair_name(pair);
984 char *hashp = strchr(name, '#');
985
986 if (hashp == NULL) {
987 error = SET_ERROR(EINVAL);
988 break;
989 }
990 *hashp = '\0';
991 error = zfs_secpolicy_write_perms(name,
992 ZFS_DELEG_PERM_BOOKMARK, cr);
993 *hashp = '#';
994 if (error != 0)
995 break;
996 }
997 return (error);
998 }
999
1000 /* ARGSUSED */
1001 static int
1002 zfs_secpolicy_destroy_bookmarks(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1003 {
1004 nvpair_t *pair, *nextpair;
1005 int error = 0;
1006
1007 for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1008 pair = nextpair) {
1009 char *name = nvpair_name(pair);
1010 char *hashp = strchr(name, '#');
1011 nextpair = nvlist_next_nvpair(innvl, pair);
1012
1013 if (hashp == NULL) {
1014 error = SET_ERROR(EINVAL);
1015 break;
1016 }
1017
1018 *hashp = '\0';
1019 error = zfs_secpolicy_write_perms(name,
1020 ZFS_DELEG_PERM_DESTROY, cr);
1021 *hashp = '#';
1022 if (error == ENOENT) {
1023 /*
1024 * Ignore any filesystems that don't exist (we consider
1025 * their bookmarks "already destroyed"). Remove
1026 * the name from the nvl here in case the filesystem
1027 * is created between now and when we try to destroy
1028 * the bookmark (in which case we don't want to
1029 * destroy it since we haven't checked for permission).
1030 */
1031 fnvlist_remove_nvpair(innvl, pair);
1032 error = 0;
1033 }
1034 if (error != 0)
1035 break;
1036 }
1037
1038 return (error);
1039 }
1040
1041 /* ARGSUSED */
1042 static int
1043 zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1044 {
1045 /*
1046 * Even root must have a proper TSD so that we know what pool
1047 * to log to.
1048 */
1049 if (tsd_get(zfs_allow_log_key) == NULL)
1050 return (SET_ERROR(EPERM));
1051 return (0);
1052 }
1053
1054 static int
1055 zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1056 {
1057 char parentname[ZFS_MAX_DATASET_NAME_LEN];
1058 int error;
1059 char *origin;
1060
1061 if ((error = zfs_get_parent(zc->zc_name, parentname,
1062 sizeof (parentname))) != 0)
1063 return (error);
1064
1065 if (nvlist_lookup_string(innvl, "origin", &origin) == 0 &&
1066 (error = zfs_secpolicy_write_perms(origin,
1067 ZFS_DELEG_PERM_CLONE, cr)) != 0)
1068 return (error);
1069
1070 if ((error = zfs_secpolicy_write_perms(parentname,
1071 ZFS_DELEG_PERM_CREATE, cr)) != 0)
1072 return (error);
1073
1074 return (zfs_secpolicy_write_perms(parentname,
1075 ZFS_DELEG_PERM_MOUNT, cr));
1076 }
1077
1078 /*
1079 * Policy for pool operations - create/destroy pools, add vdevs, etc. Requires
1080 * SYS_CONFIG privilege, which is not available in a local zone.
1081 */
1082 /* ARGSUSED */
1083 int
1084 zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1085 {
1086 if (secpolicy_sys_config(cr, B_FALSE) != 0)
1087 return (SET_ERROR(EPERM));
1088
1089 return (0);
1090 }
1091
1092 /*
1093 * Policy for object to name lookups.
1094 */
1095 /* ARGSUSED */
1096 static int
1097 zfs_secpolicy_diff(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1098 {
1099 int error;
1100
1101 if ((error = secpolicy_sys_config(cr, B_FALSE)) == 0)
1102 return (0);
1103
1104 error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr);
1105 return (error);
1106 }
1107
1108 /*
1109 * Policy for fault injection. Requires all privileges.
1110 */
1111 /* ARGSUSED */
1112 static int
1113 zfs_secpolicy_inject(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1114 {
1115 return (secpolicy_zinject(cr));
1116 }
1117
1118 /* ARGSUSED */
1119 static int
1120 zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1121 {
1122 zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
1123
1124 if (prop == ZPROP_INVAL) {
1125 if (!zfs_prop_user(zc->zc_value))
1126 return (SET_ERROR(EINVAL));
1127 return (zfs_secpolicy_write_perms(zc->zc_name,
1128 ZFS_DELEG_PERM_USERPROP, cr));
1129 } else {
1130 return (zfs_secpolicy_setprop(zc->zc_name, prop,
1131 NULL, cr));
1132 }
1133 }
1134
1135 static int
1136 zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1137 {
1138 int err = zfs_secpolicy_read(zc, innvl, cr);
1139 if (err)
1140 return (err);
1141
1142 if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1143 return (SET_ERROR(EINVAL));
1144
1145 if (zc->zc_value[0] == 0) {
1146 /*
1147 * They are asking about a posix uid/gid. If it's
1148 * themself, allow it.
1149 */
1150 if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
1151 zc->zc_objset_type == ZFS_PROP_USERQUOTA ||
1152 zc->zc_objset_type == ZFS_PROP_USEROBJUSED ||
1153 zc->zc_objset_type == ZFS_PROP_USEROBJQUOTA) {
1154 if (zc->zc_guid == crgetuid(cr))
1155 return (0);
1156 } else if (zc->zc_objset_type == ZFS_PROP_GROUPUSED ||
1157 zc->zc_objset_type == ZFS_PROP_GROUPQUOTA ||
1158 zc->zc_objset_type == ZFS_PROP_GROUPOBJUSED ||
1159 zc->zc_objset_type == ZFS_PROP_GROUPOBJQUOTA) {
1160 if (groupmember(zc->zc_guid, cr))
1161 return (0);
1162 }
1163 /* else is for project quota/used */
1164 }
1165
1166 return (zfs_secpolicy_write_perms(zc->zc_name,
1167 userquota_perms[zc->zc_objset_type], cr));
1168 }
1169
1170 static int
1171 zfs_secpolicy_userspace_many(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1172 {
1173 int err = zfs_secpolicy_read(zc, innvl, cr);
1174 if (err)
1175 return (err);
1176
1177 if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1178 return (SET_ERROR(EINVAL));
1179
1180 return (zfs_secpolicy_write_perms(zc->zc_name,
1181 userquota_perms[zc->zc_objset_type], cr));
1182 }
1183
1184 /* ARGSUSED */
1185 static int
1186 zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1187 {
1188 return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
1189 NULL, cr));
1190 }
1191
1192 /* ARGSUSED */
1193 static int
1194 zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1195 {
1196 nvpair_t *pair;
1197 nvlist_t *holds;
1198 int error;
1199
1200 holds = fnvlist_lookup_nvlist(innvl, "holds");
1201
1202 for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
1203 pair = nvlist_next_nvpair(holds, pair)) {
1204 char fsname[ZFS_MAX_DATASET_NAME_LEN];
1205 error = dmu_fsname(nvpair_name(pair), fsname);
1206 if (error != 0)
1207 return (error);
1208 error = zfs_secpolicy_write_perms(fsname,
1209 ZFS_DELEG_PERM_HOLD, cr);
1210 if (error != 0)
1211 return (error);
1212 }
1213 return (0);
1214 }
1215
1216 /* ARGSUSED */
1217 static int
1218 zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1219 {
1220 nvpair_t *pair;
1221 int error;
1222
1223 for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1224 pair = nvlist_next_nvpair(innvl, pair)) {
1225 char fsname[ZFS_MAX_DATASET_NAME_LEN];
1226 error = dmu_fsname(nvpair_name(pair), fsname);
1227 if (error != 0)
1228 return (error);
1229 error = zfs_secpolicy_write_perms(fsname,
1230 ZFS_DELEG_PERM_RELEASE, cr);
1231 if (error != 0)
1232 return (error);
1233 }
1234 return (0);
1235 }
1236
1237 /*
1238 * Policy for allowing temporary snapshots to be taken or released
1239 */
1240 static int
1241 zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1242 {
1243 /*
1244 * A temporary snapshot is the same as a snapshot,
1245 * hold, destroy and release all rolled into one.
1246 * Delegated diff alone is sufficient that we allow this.
1247 */
1248 int error;
1249
1250 if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1251 ZFS_DELEG_PERM_DIFF, cr)) == 0)
1252 return (0);
1253
1254 error = zfs_secpolicy_snapshot_perms(zc->zc_name, cr);
1255
1256 if (innvl != NULL) {
1257 if (error == 0)
1258 error = zfs_secpolicy_hold(zc, innvl, cr);
1259 if (error == 0)
1260 error = zfs_secpolicy_release(zc, innvl, cr);
1261 if (error == 0)
1262 error = zfs_secpolicy_destroy(zc, innvl, cr);
1263 }
1264 return (error);
1265 }
1266
1267 static int
1268 zfs_secpolicy_load_key(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1269 {
1270 return (zfs_secpolicy_write_perms(zc->zc_name,
1271 ZFS_DELEG_PERM_LOAD_KEY, cr));
1272 }
1273
1274 static int
1275 zfs_secpolicy_change_key(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1276 {
1277 return (zfs_secpolicy_write_perms(zc->zc_name,
1278 ZFS_DELEG_PERM_CHANGE_KEY, cr));
1279 }
1280
1281 /*
1282 * Returns the nvlist as specified by the user in the zfs_cmd_t.
1283 */
1284 static int
1285 get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
1286 {
1287 char *packed;
1288 int error;
1289 nvlist_t *list = NULL;
1290
1291 /*
1292 * Read in and unpack the user-supplied nvlist.
1293 */
1294 if (size == 0)
1295 return (SET_ERROR(EINVAL));
1296
1297 packed = vmem_alloc(size, KM_SLEEP);
1298
1299 if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
1300 iflag)) != 0) {
1301 vmem_free(packed, size);
1302 return (SET_ERROR(EFAULT));
1303 }
1304
1305 if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
1306 vmem_free(packed, size);
1307 return (error);
1308 }
1309
1310 vmem_free(packed, size);
1311
1312 *nvp = list;
1313 return (0);
1314 }
1315
1316 /*
1317 * Reduce the size of this nvlist until it can be serialized in 'max' bytes.
1318 * Entries will be removed from the end of the nvlist, and one int32 entry
1319 * named "N_MORE_ERRORS" will be added indicating how many entries were
1320 * removed.
1321 */
1322 static int
1323 nvlist_smush(nvlist_t *errors, size_t max)
1324 {
1325 size_t size;
1326
1327 size = fnvlist_size(errors);
1328
1329 if (size > max) {
1330 nvpair_t *more_errors;
1331 int n = 0;
1332
1333 if (max < 1024)
1334 return (SET_ERROR(ENOMEM));
1335
1336 fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, 0);
1337 more_errors = nvlist_prev_nvpair(errors, NULL);
1338
1339 do {
1340 nvpair_t *pair = nvlist_prev_nvpair(errors,
1341 more_errors);
1342 fnvlist_remove_nvpair(errors, pair);
1343 n++;
1344 size = fnvlist_size(errors);
1345 } while (size > max);
1346
1347 fnvlist_remove_nvpair(errors, more_errors);
1348 fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, n);
1349 ASSERT3U(fnvlist_size(errors), <=, max);
1350 }
1351
1352 return (0);
1353 }
1354
1355 static int
1356 put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
1357 {
1358 char *packed = NULL;
1359 int error = 0;
1360 size_t size;
1361
1362 size = fnvlist_size(nvl);
1363
1364 if (size > zc->zc_nvlist_dst_size) {
1365 error = SET_ERROR(ENOMEM);
1366 } else {
1367 packed = fnvlist_pack(nvl, &size);
1368 if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
1369 size, zc->zc_iflags) != 0)
1370 error = SET_ERROR(EFAULT);
1371 fnvlist_pack_free(packed, size);
1372 }
1373
1374 zc->zc_nvlist_dst_size = size;
1375 zc->zc_nvlist_dst_filled = B_TRUE;
1376 return (error);
1377 }
1378
1379 int
1380 getzfsvfs_impl(objset_t *os, zfsvfs_t **zfvp)
1381 {
1382 int error = 0;
1383 if (dmu_objset_type(os) != DMU_OST_ZFS) {
1384 return (SET_ERROR(EINVAL));
1385 }
1386
1387 mutex_enter(&os->os_user_ptr_lock);
1388 *zfvp = dmu_objset_get_user(os);
1389 /* bump s_active only when non-zero to prevent umount race */
1390 error = zfs_vfs_ref(zfvp);
1391 mutex_exit(&os->os_user_ptr_lock);
1392 return (error);
1393 }
1394
1395 int
1396 getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
1397 {
1398 objset_t *os;
1399 int error;
1400
1401 error = dmu_objset_hold(dsname, FTAG, &os);
1402 if (error != 0)
1403 return (error);
1404
1405 error = getzfsvfs_impl(os, zfvp);
1406 dmu_objset_rele(os, FTAG);
1407 return (error);
1408 }
1409
1410 /*
1411 * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
1412 * case its z_sb will be NULL, and it will be opened as the owner.
1413 * If 'writer' is set, the z_teardown_lock will be held for RW_WRITER,
1414 * which prevents all inode ops from running.
1415 */
1416 static int
1417 zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer)
1418 {
1419 int error = 0;
1420
1421 if (getzfsvfs(name, zfvp) != 0)
1422 error = zfsvfs_create(name, B_FALSE, zfvp);
1423 if (error == 0) {
1424 rrm_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
1425 RW_READER, tag);
1426 if ((*zfvp)->z_unmounted) {
1427 /*
1428 * XXX we could probably try again, since the unmounting
1429 * thread should be just about to disassociate the
1430 * objset from the zfsvfs.
1431 */
1432 rrm_exit(&(*zfvp)->z_teardown_lock, tag);
1433 return (SET_ERROR(EBUSY));
1434 }
1435 }
1436 return (error);
1437 }
1438
1439 static void
1440 zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
1441 {
1442 rrm_exit(&zfsvfs->z_teardown_lock, tag);
1443
1444 if (zfs_vfs_held(zfsvfs)) {
1445 zfs_vfs_rele(zfsvfs);
1446 } else {
1447 dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs);
1448 zfsvfs_free(zfsvfs);
1449 }
1450 }
1451
1452 static int
1453 zfs_ioc_pool_create(zfs_cmd_t *zc)
1454 {
1455 int error;
1456 nvlist_t *config, *props = NULL;
1457 nvlist_t *rootprops = NULL;
1458 nvlist_t *zplprops = NULL;
1459 dsl_crypto_params_t *dcp = NULL;
1460 char *spa_name = zc->zc_name;
1461 boolean_t unload_wkey = B_TRUE;
1462
1463 if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1464 zc->zc_iflags, &config)))
1465 return (error);
1466
1467 if (zc->zc_nvlist_src_size != 0 && (error =
1468 get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1469 zc->zc_iflags, &props))) {
1470 nvlist_free(config);
1471 return (error);
1472 }
1473
1474 if (props) {
1475 nvlist_t *nvl = NULL;
1476 nvlist_t *hidden_args = NULL;
1477 uint64_t version = SPA_VERSION;
1478 char *tname;
1479
1480 (void) nvlist_lookup_uint64(props,
1481 zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
1482 if (!SPA_VERSION_IS_SUPPORTED(version)) {
1483 error = SET_ERROR(EINVAL);
1484 goto pool_props_bad;
1485 }
1486 (void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
1487 if (nvl) {
1488 error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
1489 if (error != 0)
1490 goto pool_props_bad;
1491 (void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
1492 }
1493
1494 (void) nvlist_lookup_nvlist(props, ZPOOL_HIDDEN_ARGS,
1495 &hidden_args);
1496 error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE,
1497 rootprops, hidden_args, &dcp);
1498 if (error != 0)
1499 goto pool_props_bad;
1500 (void) nvlist_remove_all(props, ZPOOL_HIDDEN_ARGS);
1501
1502 VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1503 error = zfs_fill_zplprops_root(version, rootprops,
1504 zplprops, NULL);
1505 if (error != 0)
1506 goto pool_props_bad;
1507
1508 if (nvlist_lookup_string(props,
1509 zpool_prop_to_name(ZPOOL_PROP_TNAME), &tname) == 0)
1510 spa_name = tname;
1511 }
1512
1513 error = spa_create(zc->zc_name, config, props, zplprops, dcp);
1514
1515 /*
1516 * Set the remaining root properties
1517 */
1518 if (!error && (error = zfs_set_prop_nvlist(spa_name,
1519 ZPROP_SRC_LOCAL, rootprops, NULL)) != 0) {
1520 (void) spa_destroy(spa_name);
1521 unload_wkey = B_FALSE; /* spa_destroy() unloads wrapping keys */
1522 }
1523
1524 pool_props_bad:
1525 nvlist_free(rootprops);
1526 nvlist_free(zplprops);
1527 nvlist_free(config);
1528 nvlist_free(props);
1529 dsl_crypto_params_free(dcp, unload_wkey && !!error);
1530
1531 return (error);
1532 }
1533
1534 static int
1535 zfs_ioc_pool_destroy(zfs_cmd_t *zc)
1536 {
1537 int error;
1538 zfs_log_history(zc);
1539 error = spa_destroy(zc->zc_name);
1540
1541 return (error);
1542 }
1543
1544 static int
1545 zfs_ioc_pool_import(zfs_cmd_t *zc)
1546 {
1547 nvlist_t *config, *props = NULL;
1548 uint64_t guid;
1549 int error;
1550
1551 if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1552 zc->zc_iflags, &config)) != 0)
1553 return (error);
1554
1555 if (zc->zc_nvlist_src_size != 0 && (error =
1556 get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1557 zc->zc_iflags, &props))) {
1558 nvlist_free(config);
1559 return (error);
1560 }
1561
1562 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
1563 guid != zc->zc_guid)
1564 error = SET_ERROR(EINVAL);
1565 else
1566 error = spa_import(zc->zc_name, config, props, zc->zc_cookie);
1567
1568 if (zc->zc_nvlist_dst != 0) {
1569 int err;
1570
1571 if ((err = put_nvlist(zc, config)) != 0)
1572 error = err;
1573 }
1574
1575 nvlist_free(config);
1576 nvlist_free(props);
1577
1578 return (error);
1579 }
1580
1581 static int
1582 zfs_ioc_pool_export(zfs_cmd_t *zc)
1583 {
1584 int error;
1585 boolean_t force = (boolean_t)zc->zc_cookie;
1586 boolean_t hardforce = (boolean_t)zc->zc_guid;
1587
1588 zfs_log_history(zc);
1589 error = spa_export(zc->zc_name, NULL, force, hardforce);
1590
1591 return (error);
1592 }
1593
1594 static int
1595 zfs_ioc_pool_configs(zfs_cmd_t *zc)
1596 {
1597 nvlist_t *configs;
1598 int error;
1599
1600 if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
1601 return (SET_ERROR(EEXIST));
1602
1603 error = put_nvlist(zc, configs);
1604
1605 nvlist_free(configs);
1606
1607 return (error);
1608 }
1609
1610 /*
1611 * inputs:
1612 * zc_name name of the pool
1613 *
1614 * outputs:
1615 * zc_cookie real errno
1616 * zc_nvlist_dst config nvlist
1617 * zc_nvlist_dst_size size of config nvlist
1618 */
1619 static int
1620 zfs_ioc_pool_stats(zfs_cmd_t *zc)
1621 {
1622 nvlist_t *config;
1623 int error;
1624 int ret = 0;
1625
1626 error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
1627 sizeof (zc->zc_value));
1628
1629 if (config != NULL) {
1630 ret = put_nvlist(zc, config);
1631 nvlist_free(config);
1632
1633 /*
1634 * The config may be present even if 'error' is non-zero.
1635 * In this case we return success, and preserve the real errno
1636 * in 'zc_cookie'.
1637 */
1638 zc->zc_cookie = error;
1639 } else {
1640 ret = error;
1641 }
1642
1643 return (ret);
1644 }
1645
1646 /*
1647 * Try to import the given pool, returning pool stats as appropriate so that
1648 * user land knows which devices are available and overall pool health.
1649 */
1650 static int
1651 zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
1652 {
1653 nvlist_t *tryconfig, *config = NULL;
1654 int error;
1655
1656 if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1657 zc->zc_iflags, &tryconfig)) != 0)
1658 return (error);
1659
1660 config = spa_tryimport(tryconfig);
1661
1662 nvlist_free(tryconfig);
1663
1664 if (config == NULL)
1665 return (SET_ERROR(EINVAL));
1666
1667 error = put_nvlist(zc, config);
1668 nvlist_free(config);
1669
1670 return (error);
1671 }
1672
1673 /*
1674 * inputs:
1675 * zc_name name of the pool
1676 * zc_cookie scan func (pool_scan_func_t)
1677 * zc_flags scrub pause/resume flag (pool_scrub_cmd_t)
1678 */
1679 static int
1680 zfs_ioc_pool_scan(zfs_cmd_t *zc)
1681 {
1682 spa_t *spa;
1683 int error;
1684
1685 if (zc->zc_flags >= POOL_SCRUB_FLAGS_END)
1686 return (SET_ERROR(EINVAL));
1687
1688 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1689 return (error);
1690
1691 if (zc->zc_flags == POOL_SCRUB_PAUSE)
1692 error = spa_scrub_pause_resume(spa, POOL_SCRUB_PAUSE);
1693 else if (zc->zc_cookie == POOL_SCAN_NONE)
1694 error = spa_scan_stop(spa);
1695 else
1696 error = spa_scan(spa, zc->zc_cookie);
1697
1698 spa_close(spa, FTAG);
1699
1700 return (error);
1701 }
1702
1703 static int
1704 zfs_ioc_pool_freeze(zfs_cmd_t *zc)
1705 {
1706 spa_t *spa;
1707 int error;
1708
1709 error = spa_open(zc->zc_name, &spa, FTAG);
1710 if (error == 0) {
1711 spa_freeze(spa);
1712 spa_close(spa, FTAG);
1713 }
1714 return (error);
1715 }
1716
1717 static int
1718 zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
1719 {
1720 spa_t *spa;
1721 int error;
1722
1723 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1724 return (error);
1725
1726 if (zc->zc_cookie < spa_version(spa) ||
1727 !SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) {
1728 spa_close(spa, FTAG);
1729 return (SET_ERROR(EINVAL));
1730 }
1731
1732 spa_upgrade(spa, zc->zc_cookie);
1733 spa_close(spa, FTAG);
1734
1735 return (error);
1736 }
1737
1738 static int
1739 zfs_ioc_pool_get_history(zfs_cmd_t *zc)
1740 {
1741 spa_t *spa;
1742 char *hist_buf;
1743 uint64_t size;
1744 int error;
1745
1746 if ((size = zc->zc_history_len) == 0)
1747 return (SET_ERROR(EINVAL));
1748
1749 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1750 return (error);
1751
1752 if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
1753 spa_close(spa, FTAG);
1754 return (SET_ERROR(ENOTSUP));
1755 }
1756
1757 hist_buf = vmem_alloc(size, KM_SLEEP);
1758 if ((error = spa_history_get(spa, &zc->zc_history_offset,
1759 &zc->zc_history_len, hist_buf)) == 0) {
1760 error = ddi_copyout(hist_buf,
1761 (void *)(uintptr_t)zc->zc_history,
1762 zc->zc_history_len, zc->zc_iflags);
1763 }
1764
1765 spa_close(spa, FTAG);
1766 vmem_free(hist_buf, size);
1767 return (error);
1768 }
1769
1770 static int
1771 zfs_ioc_pool_reguid(zfs_cmd_t *zc)
1772 {
1773 spa_t *spa;
1774 int error;
1775
1776 error = spa_open(zc->zc_name, &spa, FTAG);
1777 if (error == 0) {
1778 error = spa_change_guid(spa);
1779 spa_close(spa, FTAG);
1780 }
1781 return (error);
1782 }
1783
1784 static int
1785 zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
1786 {
1787 return (dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value));
1788 }
1789
1790 /*
1791 * inputs:
1792 * zc_name name of filesystem
1793 * zc_obj object to find
1794 *
1795 * outputs:
1796 * zc_value name of object
1797 */
1798 static int
1799 zfs_ioc_obj_to_path(zfs_cmd_t *zc)
1800 {
1801 objset_t *os;
1802 int error;
1803
1804 /* XXX reading from objset not owned */
1805 if ((error = dmu_objset_hold_flags(zc->zc_name, B_TRUE,
1806 FTAG, &os)) != 0)
1807 return (error);
1808 if (dmu_objset_type(os) != DMU_OST_ZFS) {
1809 dmu_objset_rele_flags(os, B_TRUE, FTAG);
1810 return (SET_ERROR(EINVAL));
1811 }
1812 error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
1813 sizeof (zc->zc_value));
1814 dmu_objset_rele_flags(os, B_TRUE, FTAG);
1815
1816 return (error);
1817 }
1818
1819 /*
1820 * inputs:
1821 * zc_name name of filesystem
1822 * zc_obj object to find
1823 *
1824 * outputs:
1825 * zc_stat stats on object
1826 * zc_value path to object
1827 */
1828 static int
1829 zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
1830 {
1831 objset_t *os;
1832 int error;
1833
1834 /* XXX reading from objset not owned */
1835 if ((error = dmu_objset_hold_flags(zc->zc_name, B_TRUE,
1836 FTAG, &os)) != 0)
1837 return (error);
1838 if (dmu_objset_type(os) != DMU_OST_ZFS) {
1839 dmu_objset_rele_flags(os, B_TRUE, FTAG);
1840 return (SET_ERROR(EINVAL));
1841 }
1842 error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
1843 sizeof (zc->zc_value));
1844 dmu_objset_rele_flags(os, B_TRUE, FTAG);
1845
1846 return (error);
1847 }
1848
1849 static int
1850 zfs_ioc_vdev_add(zfs_cmd_t *zc)
1851 {
1852 spa_t *spa;
1853 int error;
1854 nvlist_t *config;
1855
1856 error = spa_open(zc->zc_name, &spa, FTAG);
1857 if (error != 0)
1858 return (error);
1859
1860 error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1861 zc->zc_iflags, &config);
1862 if (error == 0) {
1863 error = spa_vdev_add(spa, config);
1864 nvlist_free(config);
1865 }
1866 spa_close(spa, FTAG);
1867 return (error);
1868 }
1869
1870 /*
1871 * inputs:
1872 * zc_name name of the pool
1873 * zc_guid guid of vdev to remove
1874 * zc_cookie cancel removal
1875 */
1876 static int
1877 zfs_ioc_vdev_remove(zfs_cmd_t *zc)
1878 {
1879 spa_t *spa;
1880 int error;
1881
1882 error = spa_open(zc->zc_name, &spa, FTAG);
1883 if (error != 0)
1884 return (error);
1885 if (zc->zc_cookie != 0) {
1886 error = spa_vdev_remove_cancel(spa);
1887 } else {
1888 error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
1889 }
1890 spa_close(spa, FTAG);
1891 return (error);
1892 }
1893
1894 static int
1895 zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
1896 {
1897 spa_t *spa;
1898 int error;
1899 vdev_state_t newstate = VDEV_STATE_UNKNOWN;
1900
1901 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1902 return (error);
1903 switch (zc->zc_cookie) {
1904 case VDEV_STATE_ONLINE:
1905 error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
1906 break;
1907
1908 case VDEV_STATE_OFFLINE:
1909 error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
1910 break;
1911
1912 case VDEV_STATE_FAULTED:
1913 if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1914 zc->zc_obj != VDEV_AUX_EXTERNAL &&
1915 zc->zc_obj != VDEV_AUX_EXTERNAL_PERSIST)
1916 zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1917
1918 error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
1919 break;
1920
1921 case VDEV_STATE_DEGRADED:
1922 if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1923 zc->zc_obj != VDEV_AUX_EXTERNAL)
1924 zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1925
1926 error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
1927 break;
1928
1929 default:
1930 error = SET_ERROR(EINVAL);
1931 }
1932 zc->zc_cookie = newstate;
1933 spa_close(spa, FTAG);
1934 return (error);
1935 }
1936
1937 static int
1938 zfs_ioc_vdev_attach(zfs_cmd_t *zc)
1939 {
1940 spa_t *spa;
1941 nvlist_t *config;
1942 int replacing = zc->zc_cookie;
1943 int rebuild = zc->zc_simple;
1944 int error;
1945
1946 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1947 return (error);
1948
1949 if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1950 zc->zc_iflags, &config)) == 0) {
1951 error = spa_vdev_attach(spa, zc->zc_guid, config, replacing,
1952 rebuild);
1953 nvlist_free(config);
1954 }
1955
1956 spa_close(spa, FTAG);
1957 return (error);
1958 }
1959
1960 static int
1961 zfs_ioc_vdev_detach(zfs_cmd_t *zc)
1962 {
1963 spa_t *spa;
1964 int error;
1965
1966 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1967 return (error);
1968
1969 error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
1970
1971 spa_close(spa, FTAG);
1972 return (error);
1973 }
1974
1975 static int
1976 zfs_ioc_vdev_split(zfs_cmd_t *zc)
1977 {
1978 spa_t *spa;
1979 nvlist_t *config, *props = NULL;
1980 int error;
1981 boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
1982
1983 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1984 return (error);
1985
1986 if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1987 zc->zc_iflags, &config))) {
1988 spa_close(spa, FTAG);
1989 return (error);
1990 }
1991
1992 if (zc->zc_nvlist_src_size != 0 && (error =
1993 get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1994 zc->zc_iflags, &props))) {
1995 spa_close(spa, FTAG);
1996 nvlist_free(config);
1997 return (error);
1998 }
1999
2000 error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
2001
2002 spa_close(spa, FTAG);
2003
2004 nvlist_free(config);
2005 nvlist_free(props);
2006
2007 return (error);
2008 }
2009
2010 static int
2011 zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
2012 {
2013 spa_t *spa;
2014 char *path = zc->zc_value;
2015 uint64_t guid = zc->zc_guid;
2016 int error;
2017
2018 error = spa_open(zc->zc_name, &spa, FTAG);
2019 if (error != 0)
2020 return (error);
2021
2022 error = spa_vdev_setpath(spa, guid, path);
2023 spa_close(spa, FTAG);
2024 return (error);
2025 }
2026
2027 static int
2028 zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
2029 {
2030 spa_t *spa;
2031 char *fru = zc->zc_value;
2032 uint64_t guid = zc->zc_guid;
2033 int error;
2034
2035 error = spa_open(zc->zc_name, &spa, FTAG);
2036 if (error != 0)
2037 return (error);
2038
2039 error = spa_vdev_setfru(spa, guid, fru);
2040 spa_close(spa, FTAG);
2041 return (error);
2042 }
2043
2044 static int
2045 zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
2046 {
2047 int error = 0;
2048 nvlist_t *nv;
2049
2050 dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2051
2052 if (zc->zc_nvlist_dst != 0 &&
2053 (error = dsl_prop_get_all(os, &nv)) == 0) {
2054 dmu_objset_stats(os, nv);
2055 /*
2056 * NB: zvol_get_stats() will read the objset contents,
2057 * which we aren't supposed to do with a
2058 * DS_MODE_USER hold, because it could be
2059 * inconsistent. So this is a bit of a workaround...
2060 * XXX reading without owning
2061 */
2062 if (!zc->zc_objset_stats.dds_inconsistent &&
2063 dmu_objset_type(os) == DMU_OST_ZVOL) {
2064 error = zvol_get_stats(os, nv);
2065 if (error == EIO) {
2066 nvlist_free(nv);
2067 return (error);
2068 }
2069 VERIFY0(error);
2070 }
2071 if (error == 0)
2072 error = put_nvlist(zc, nv);
2073 nvlist_free(nv);
2074 }
2075
2076 return (error);
2077 }
2078
2079 /*
2080 * inputs:
2081 * zc_name name of filesystem
2082 * zc_nvlist_dst_size size of buffer for property nvlist
2083 *
2084 * outputs:
2085 * zc_objset_stats stats
2086 * zc_nvlist_dst property nvlist
2087 * zc_nvlist_dst_size size of property nvlist
2088 */
2089 static int
2090 zfs_ioc_objset_stats(zfs_cmd_t *zc)
2091 {
2092 objset_t *os;
2093 int error;
2094
2095 error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2096 if (error == 0) {
2097 error = zfs_ioc_objset_stats_impl(zc, os);
2098 dmu_objset_rele(os, FTAG);
2099 }
2100
2101 return (error);
2102 }
2103
2104 /*
2105 * inputs:
2106 * zc_name name of filesystem
2107 * zc_nvlist_dst_size size of buffer for property nvlist
2108 *
2109 * outputs:
2110 * zc_nvlist_dst received property nvlist
2111 * zc_nvlist_dst_size size of received property nvlist
2112 *
2113 * Gets received properties (distinct from local properties on or after
2114 * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
2115 * local property values.
2116 */
2117 static int
2118 zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
2119 {
2120 int error = 0;
2121 nvlist_t *nv;
2122
2123 /*
2124 * Without this check, we would return local property values if the
2125 * caller has not already received properties on or after
2126 * SPA_VERSION_RECVD_PROPS.
2127 */
2128 if (!dsl_prop_get_hasrecvd(zc->zc_name))
2129 return (SET_ERROR(ENOTSUP));
2130
2131 if (zc->zc_nvlist_dst != 0 &&
2132 (error = dsl_prop_get_received(zc->zc_name, &nv)) == 0) {
2133 error = put_nvlist(zc, nv);
2134 nvlist_free(nv);
2135 }
2136
2137 return (error);
2138 }
2139
2140 static int
2141 nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
2142 {
2143 uint64_t value;
2144 int error;
2145
2146 /*
2147 * zfs_get_zplprop() will either find a value or give us
2148 * the default value (if there is one).
2149 */
2150 if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
2151 return (error);
2152 VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
2153 return (0);
2154 }
2155
2156 /*
2157 * inputs:
2158 * zc_name name of filesystem
2159 * zc_nvlist_dst_size size of buffer for zpl property nvlist
2160 *
2161 * outputs:
2162 * zc_nvlist_dst zpl property nvlist
2163 * zc_nvlist_dst_size size of zpl property nvlist
2164 */
2165 static int
2166 zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
2167 {
2168 objset_t *os;
2169 int err;
2170
2171 /* XXX reading without owning */
2172 if ((err = dmu_objset_hold(zc->zc_name, FTAG, &os)))
2173 return (err);
2174
2175 dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2176
2177 /*
2178 * NB: nvl_add_zplprop() will read the objset contents,
2179 * which we aren't supposed to do with a DS_MODE_USER
2180 * hold, because it could be inconsistent.
2181 */
2182 if (zc->zc_nvlist_dst != 0 &&
2183 !zc->zc_objset_stats.dds_inconsistent &&
2184 dmu_objset_type(os) == DMU_OST_ZFS) {
2185 nvlist_t *nv;
2186
2187 VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2188 if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
2189 (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
2190 (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
2191 (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
2192 err = put_nvlist(zc, nv);
2193 nvlist_free(nv);
2194 } else {
2195 err = SET_ERROR(ENOENT);
2196 }
2197 dmu_objset_rele(os, FTAG);
2198 return (err);
2199 }
2200
2201 /*
2202 * inputs:
2203 * zc_name name of filesystem
2204 * zc_cookie zap cursor
2205 * zc_nvlist_dst_size size of buffer for property nvlist
2206 *
2207 * outputs:
2208 * zc_name name of next filesystem
2209 * zc_cookie zap cursor
2210 * zc_objset_stats stats
2211 * zc_nvlist_dst property nvlist
2212 * zc_nvlist_dst_size size of property nvlist
2213 */
2214 static int
2215 zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
2216 {
2217 objset_t *os;
2218 int error;
2219 char *p;
2220 size_t orig_len = strlen(zc->zc_name);
2221
2222 top:
2223 if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os))) {
2224 if (error == ENOENT)
2225 error = SET_ERROR(ESRCH);
2226 return (error);
2227 }
2228
2229 p = strrchr(zc->zc_name, '/');
2230 if (p == NULL || p[1] != '\0')
2231 (void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
2232 p = zc->zc_name + strlen(zc->zc_name);
2233
2234 do {
2235 error = dmu_dir_list_next(os,
2236 sizeof (zc->zc_name) - (p - zc->zc_name), p,
2237 NULL, &zc->zc_cookie);
2238 if (error == ENOENT)
2239 error = SET_ERROR(ESRCH);
2240 } while (error == 0 && zfs_dataset_name_hidden(zc->zc_name));
2241 dmu_objset_rele(os, FTAG);
2242
2243 /*
2244 * If it's an internal dataset (ie. with a '$' in its name),
2245 * don't try to get stats for it, otherwise we'll return ENOENT.
2246 */
2247 if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
2248 error = zfs_ioc_objset_stats(zc); /* fill in the stats */
2249 if (error == ENOENT) {
2250 /* We lost a race with destroy, get the next one. */
2251 zc->zc_name[orig_len] = '\0';
2252 goto top;
2253 }
2254 }
2255 return (error);
2256 }
2257
2258 /*
2259 * inputs:
2260 * zc_name name of filesystem
2261 * zc_cookie zap cursor
2262 * zc_nvlist_src iteration range nvlist
2263 * zc_nvlist_src_size size of iteration range nvlist
2264 *
2265 * outputs:
2266 * zc_name name of next snapshot
2267 * zc_objset_stats stats
2268 * zc_nvlist_dst property nvlist
2269 * zc_nvlist_dst_size size of property nvlist
2270 */
2271 static int
2272 zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
2273 {
2274 int error;
2275 objset_t *os, *ossnap;
2276 dsl_dataset_t *ds;
2277 uint64_t min_txg = 0, max_txg = 0;
2278
2279 if (zc->zc_nvlist_src_size != 0) {
2280 nvlist_t *props = NULL;
2281 error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2282 zc->zc_iflags, &props);
2283 if (error != 0)
2284 return (error);
2285 (void) nvlist_lookup_uint64(props, SNAP_ITER_MIN_TXG,
2286 &min_txg);
2287 (void) nvlist_lookup_uint64(props, SNAP_ITER_MAX_TXG,
2288 &max_txg);
2289 nvlist_free(props);
2290 }
2291
2292 error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2293 if (error != 0) {
2294 return (error == ENOENT ? SET_ERROR(ESRCH) : error);
2295 }
2296
2297 /*
2298 * A dataset name of maximum length cannot have any snapshots,
2299 * so exit immediately.
2300 */
2301 if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >=
2302 ZFS_MAX_DATASET_NAME_LEN) {
2303 dmu_objset_rele(os, FTAG);
2304 return (SET_ERROR(ESRCH));
2305 }
2306
2307 while (error == 0) {
2308 if (issig(JUSTLOOKING) && issig(FORREAL)) {
2309 error = SET_ERROR(EINTR);
2310 break;
2311 }
2312
2313 error = dmu_snapshot_list_next(os,
2314 sizeof (zc->zc_name) - strlen(zc->zc_name),
2315 zc->zc_name + strlen(zc->zc_name), &zc->zc_obj,
2316 &zc->zc_cookie, NULL);
2317 if (error == ENOENT) {
2318 error = SET_ERROR(ESRCH);
2319 break;
2320 } else if (error != 0) {
2321 break;
2322 }
2323
2324 error = dsl_dataset_hold_obj(dmu_objset_pool(os), zc->zc_obj,
2325 FTAG, &ds);
2326 if (error != 0)
2327 break;
2328
2329 if ((min_txg != 0 && dsl_get_creationtxg(ds) < min_txg) ||
2330 (max_txg != 0 && dsl_get_creationtxg(ds) > max_txg)) {
2331 dsl_dataset_rele(ds, FTAG);
2332 /* undo snapshot name append */
2333 *(strchr(zc->zc_name, '@') + 1) = '\0';
2334 /* skip snapshot */
2335 continue;
2336 }
2337
2338 if (zc->zc_simple) {
2339 dsl_dataset_rele(ds, FTAG);
2340 break;
2341 }
2342
2343 if ((error = dmu_objset_from_ds(ds, &ossnap)) != 0) {
2344 dsl_dataset_rele(ds, FTAG);
2345 break;
2346 }
2347 if ((error = zfs_ioc_objset_stats_impl(zc, ossnap)) != 0) {
2348 dsl_dataset_rele(ds, FTAG);
2349 break;
2350 }
2351 dsl_dataset_rele(ds, FTAG);
2352 break;
2353 }
2354
2355 dmu_objset_rele(os, FTAG);
2356 /* if we failed, undo the @ that we tacked on to zc_name */
2357 if (error != 0)
2358 *strchr(zc->zc_name, '@') = '\0';
2359 return (error);
2360 }
2361
2362 static int
2363 zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
2364 {
2365 const char *propname = nvpair_name(pair);
2366 uint64_t *valary;
2367 unsigned int vallen;
2368 const char *domain;
2369 char *dash;
2370 zfs_userquota_prop_t type;
2371 uint64_t rid;
2372 uint64_t quota;
2373 zfsvfs_t *zfsvfs;
2374 int err;
2375
2376 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2377 nvlist_t *attrs;
2378 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2379 if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2380 &pair) != 0)
2381 return (SET_ERROR(EINVAL));
2382 }
2383
2384 /*
2385 * A correctly constructed propname is encoded as
2386 * userquota@<rid>-<domain>.
2387 */
2388 if ((dash = strchr(propname, '-')) == NULL ||
2389 nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
2390 vallen != 3)
2391 return (SET_ERROR(EINVAL));
2392
2393 domain = dash + 1;
2394 type = valary[0];
2395 rid = valary[1];
2396 quota = valary[2];
2397
2398 err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE);
2399 if (err == 0) {
2400 err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
2401 zfsvfs_rele(zfsvfs, FTAG);
2402 }
2403
2404 return (err);
2405 }
2406
2407 /*
2408 * If the named property is one that has a special function to set its value,
2409 * return 0 on success and a positive error code on failure; otherwise if it is
2410 * not one of the special properties handled by this function, return -1.
2411 *
2412 * XXX: It would be better for callers of the property interface if we handled
2413 * these special cases in dsl_prop.c (in the dsl layer).
2414 */
2415 static int
2416 zfs_prop_set_special(const char *dsname, zprop_source_t source,
2417 nvpair_t *pair)
2418 {
2419 const char *propname = nvpair_name(pair);
2420 zfs_prop_t prop = zfs_name_to_prop(propname);
2421 uint64_t intval = 0;
2422 char *strval = NULL;
2423 int err = -1;
2424
2425 if (prop == ZPROP_INVAL) {
2426 if (zfs_prop_userquota(propname))
2427 return (zfs_prop_set_userquota(dsname, pair));
2428 return (-1);
2429 }
2430
2431 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2432 nvlist_t *attrs;
2433 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2434 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2435 &pair) == 0);
2436 }
2437
2438 /* all special properties are numeric except for keylocation */
2439 if (zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
2440 strval = fnvpair_value_string(pair);
2441 } else {
2442 intval = fnvpair_value_uint64(pair);
2443 }
2444
2445 switch (prop) {
2446 case ZFS_PROP_QUOTA:
2447 err = dsl_dir_set_quota(dsname, source, intval);
2448 break;
2449 case ZFS_PROP_REFQUOTA:
2450 err = dsl_dataset_set_refquota(dsname, source, intval);
2451 break;
2452 case ZFS_PROP_FILESYSTEM_LIMIT:
2453 case ZFS_PROP_SNAPSHOT_LIMIT:
2454 if (intval == UINT64_MAX) {
2455 /* clearing the limit, just do it */
2456 err = 0;
2457 } else {
2458 err = dsl_dir_activate_fs_ss_limit(dsname);
2459 }
2460 /*
2461 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2462 * default path to set the value in the nvlist.
2463 */
2464 if (err == 0)
2465 err = -1;
2466 break;
2467 case ZFS_PROP_KEYLOCATION:
2468 err = dsl_crypto_can_set_keylocation(dsname, strval);
2469
2470 /*
2471 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2472 * default path to set the value in the nvlist.
2473 */
2474 if (err == 0)
2475 err = -1;
2476 break;
2477 case ZFS_PROP_RESERVATION:
2478 err = dsl_dir_set_reservation(dsname, source, intval);
2479 break;
2480 case ZFS_PROP_REFRESERVATION:
2481 err = dsl_dataset_set_refreservation(dsname, source, intval);
2482 break;
2483 case ZFS_PROP_VOLSIZE:
2484 err = zvol_set_volsize(dsname, intval);
2485 break;
2486 case ZFS_PROP_SNAPDEV:
2487 err = zvol_set_snapdev(dsname, source, intval);
2488 break;
2489 case ZFS_PROP_VOLMODE:
2490 err = zvol_set_volmode(dsname, source, intval);
2491 break;
2492 case ZFS_PROP_VERSION:
2493 {
2494 zfsvfs_t *zfsvfs;
2495
2496 if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
2497 break;
2498
2499 err = zfs_set_version(zfsvfs, intval);
2500 zfsvfs_rele(zfsvfs, FTAG);
2501
2502 if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
2503 zfs_cmd_t *zc;
2504
2505 zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
2506 (void) strlcpy(zc->zc_name, dsname,
2507 sizeof (zc->zc_name));
2508 (void) zfs_ioc_userspace_upgrade(zc);
2509 (void) zfs_ioc_id_quota_upgrade(zc);
2510 kmem_free(zc, sizeof (zfs_cmd_t));
2511 }
2512 break;
2513 }
2514 default:
2515 err = -1;
2516 }
2517
2518 return (err);
2519 }
2520
2521 /*
2522 * This function is best effort. If it fails to set any of the given properties,
2523 * it continues to set as many as it can and returns the last error
2524 * encountered. If the caller provides a non-NULL errlist, it will be filled in
2525 * with the list of names of all the properties that failed along with the
2526 * corresponding error numbers.
2527 *
2528 * If every property is set successfully, zero is returned and errlist is not
2529 * modified.
2530 */
2531 int
2532 zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
2533 nvlist_t *errlist)
2534 {
2535 nvpair_t *pair;
2536 nvpair_t *propval;
2537 int rv = 0;
2538 uint64_t intval;
2539 char *strval;
2540
2541 nvlist_t *genericnvl = fnvlist_alloc();
2542 nvlist_t *retrynvl = fnvlist_alloc();
2543 retry:
2544 pair = NULL;
2545 while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2546 const char *propname = nvpair_name(pair);
2547 zfs_prop_t prop = zfs_name_to_prop(propname);
2548 int err = 0;
2549
2550 /* decode the property value */
2551 propval = pair;
2552 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2553 nvlist_t *attrs;
2554 attrs = fnvpair_value_nvlist(pair);
2555 if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2556 &propval) != 0)
2557 err = SET_ERROR(EINVAL);
2558 }
2559
2560 /* Validate value type */
2561 if (err == 0 && source == ZPROP_SRC_INHERITED) {
2562 /* inherited properties are expected to be booleans */
2563 if (nvpair_type(propval) != DATA_TYPE_BOOLEAN)
2564 err = SET_ERROR(EINVAL);
2565 } else if (err == 0 && prop == ZPROP_INVAL) {
2566 if (zfs_prop_user(propname)) {
2567 if (nvpair_type(propval) != DATA_TYPE_STRING)
2568 err = SET_ERROR(EINVAL);
2569 } else if (zfs_prop_userquota(propname)) {
2570 if (nvpair_type(propval) !=
2571 DATA_TYPE_UINT64_ARRAY)
2572 err = SET_ERROR(EINVAL);
2573 } else {
2574 err = SET_ERROR(EINVAL);
2575 }
2576 } else if (err == 0) {
2577 if (nvpair_type(propval) == DATA_TYPE_STRING) {
2578 if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
2579 err = SET_ERROR(EINVAL);
2580 } else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
2581 const char *unused;
2582
2583 intval = fnvpair_value_uint64(propval);
2584
2585 switch (zfs_prop_get_type(prop)) {
2586 case PROP_TYPE_NUMBER:
2587 break;
2588 case PROP_TYPE_STRING:
2589 err = SET_ERROR(EINVAL);
2590 break;
2591 case PROP_TYPE_INDEX:
2592 if (zfs_prop_index_to_string(prop,
2593 intval, &unused) != 0)
2594 err = SET_ERROR(EINVAL);
2595 break;
2596 default:
2597 cmn_err(CE_PANIC,
2598 "unknown property type");
2599 }
2600 } else {
2601 err = SET_ERROR(EINVAL);
2602 }
2603 }
2604
2605 /* Validate permissions */
2606 if (err == 0)
2607 err = zfs_check_settable(dsname, pair, CRED());
2608
2609 if (err == 0) {
2610 if (source == ZPROP_SRC_INHERITED)
2611 err = -1; /* does not need special handling */
2612 else
2613 err = zfs_prop_set_special(dsname, source,
2614 pair);
2615 if (err == -1) {
2616 /*
2617 * For better performance we build up a list of
2618 * properties to set in a single transaction.
2619 */
2620 err = nvlist_add_nvpair(genericnvl, pair);
2621 } else if (err != 0 && nvl != retrynvl) {
2622 /*
2623 * This may be a spurious error caused by
2624 * receiving quota and reservation out of order.
2625 * Try again in a second pass.
2626 */
2627 err = nvlist_add_nvpair(retrynvl, pair);
2628 }
2629 }
2630
2631 if (err != 0) {
2632 if (errlist != NULL)
2633 fnvlist_add_int32(errlist, propname, err);
2634 rv = err;
2635 }
2636 }
2637
2638 if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
2639 nvl = retrynvl;
2640 goto retry;
2641 }
2642
2643 if (!nvlist_empty(genericnvl) &&
2644 dsl_props_set(dsname, source, genericnvl) != 0) {
2645 /*
2646 * If this fails, we still want to set as many properties as we
2647 * can, so try setting them individually.
2648 */
2649 pair = NULL;
2650 while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
2651 const char *propname = nvpair_name(pair);
2652 int err = 0;
2653
2654 propval = pair;
2655 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2656 nvlist_t *attrs;
2657 attrs = fnvpair_value_nvlist(pair);
2658 propval = fnvlist_lookup_nvpair(attrs,
2659 ZPROP_VALUE);
2660 }
2661
2662 if (nvpair_type(propval) == DATA_TYPE_STRING) {
2663 strval = fnvpair_value_string(propval);
2664 err = dsl_prop_set_string(dsname, propname,
2665 source, strval);
2666 } else if (nvpair_type(propval) == DATA_TYPE_BOOLEAN) {
2667 err = dsl_prop_inherit(dsname, propname,
2668 source);
2669 } else {
2670 intval = fnvpair_value_uint64(propval);
2671 err = dsl_prop_set_int(dsname, propname, source,
2672 intval);
2673 }
2674
2675 if (err != 0) {
2676 if (errlist != NULL) {
2677 fnvlist_add_int32(errlist, propname,
2678 err);
2679 }
2680 rv = err;
2681 }
2682 }
2683 }
2684 nvlist_free(genericnvl);
2685 nvlist_free(retrynvl);
2686
2687 return (rv);
2688 }
2689
2690 /*
2691 * Check that all the properties are valid user properties.
2692 */
2693 static int
2694 zfs_check_userprops(nvlist_t *nvl)
2695 {
2696 nvpair_t *pair = NULL;
2697
2698 while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2699 const char *propname = nvpair_name(pair);
2700
2701 if (!zfs_prop_user(propname) ||
2702 nvpair_type(pair) != DATA_TYPE_STRING)
2703 return (SET_ERROR(EINVAL));
2704
2705 if (strlen(propname) >= ZAP_MAXNAMELEN)
2706 return (SET_ERROR(ENAMETOOLONG));
2707
2708 if (strlen(fnvpair_value_string(pair)) >= ZAP_MAXVALUELEN)
2709 return (SET_ERROR(E2BIG));
2710 }
2711 return (0);
2712 }
2713
2714 static void
2715 props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
2716 {
2717 nvpair_t *pair;
2718
2719 VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2720
2721 pair = NULL;
2722 while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
2723 if (nvlist_exists(skipped, nvpair_name(pair)))
2724 continue;
2725
2726 VERIFY(nvlist_add_nvpair(*newprops, pair) == 0);
2727 }
2728 }
2729
2730 static int
2731 clear_received_props(const char *dsname, nvlist_t *props,
2732 nvlist_t *skipped)
2733 {
2734 int err = 0;
2735 nvlist_t *cleared_props = NULL;
2736 props_skip(props, skipped, &cleared_props);
2737 if (!nvlist_empty(cleared_props)) {
2738 /*
2739 * Acts on local properties until the dataset has received
2740 * properties at least once on or after SPA_VERSION_RECVD_PROPS.
2741 */
2742 zprop_source_t flags = (ZPROP_SRC_NONE |
2743 (dsl_prop_get_hasrecvd(dsname) ? ZPROP_SRC_RECEIVED : 0));
2744 err = zfs_set_prop_nvlist(dsname, flags, cleared_props, NULL);
2745 }
2746 nvlist_free(cleared_props);
2747 return (err);
2748 }
2749
2750 /*
2751 * inputs:
2752 * zc_name name of filesystem
2753 * zc_value name of property to set
2754 * zc_nvlist_src{_size} nvlist of properties to apply
2755 * zc_cookie received properties flag
2756 *
2757 * outputs:
2758 * zc_nvlist_dst{_size} error for each unapplied received property
2759 */
2760 static int
2761 zfs_ioc_set_prop(zfs_cmd_t *zc)
2762 {
2763 nvlist_t *nvl;
2764 boolean_t received = zc->zc_cookie;
2765 zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
2766 ZPROP_SRC_LOCAL);
2767 nvlist_t *errors;
2768 int error;
2769
2770 if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2771 zc->zc_iflags, &nvl)) != 0)
2772 return (error);
2773
2774 if (received) {
2775 nvlist_t *origprops;
2776
2777 if (dsl_prop_get_received(zc->zc_name, &origprops) == 0) {
2778 (void) clear_received_props(zc->zc_name,
2779 origprops, nvl);
2780 nvlist_free(origprops);
2781 }
2782
2783 error = dsl_prop_set_hasrecvd(zc->zc_name);
2784 }
2785
2786 errors = fnvlist_alloc();
2787 if (error == 0)
2788 error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, errors);
2789
2790 if (zc->zc_nvlist_dst != 0 && errors != NULL) {
2791 (void) put_nvlist(zc, errors);
2792 }
2793
2794 nvlist_free(errors);
2795 nvlist_free(nvl);
2796 return (error);
2797 }
2798
2799 /*
2800 * inputs:
2801 * zc_name name of filesystem
2802 * zc_value name of property to inherit
2803 * zc_cookie revert to received value if TRUE
2804 *
2805 * outputs: none
2806 */
2807 static int
2808 zfs_ioc_inherit_prop(zfs_cmd_t *zc)
2809 {
2810 const char *propname = zc->zc_value;
2811 zfs_prop_t prop = zfs_name_to_prop(propname);
2812 boolean_t received = zc->zc_cookie;
2813 zprop_source_t source = (received
2814 ? ZPROP_SRC_NONE /* revert to received value, if any */
2815 : ZPROP_SRC_INHERITED); /* explicitly inherit */
2816 nvlist_t *dummy;
2817 nvpair_t *pair;
2818 zprop_type_t type;
2819 int err;
2820
2821 if (!received) {
2822 /*
2823 * Only check this in the non-received case. We want to allow
2824 * 'inherit -S' to revert non-inheritable properties like quota
2825 * and reservation to the received or default values even though
2826 * they are not considered inheritable.
2827 */
2828 if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop))
2829 return (SET_ERROR(EINVAL));
2830 }
2831
2832 if (prop == ZPROP_INVAL) {
2833 if (!zfs_prop_user(propname))
2834 return (SET_ERROR(EINVAL));
2835
2836 type = PROP_TYPE_STRING;
2837 } else if (prop == ZFS_PROP_VOLSIZE || prop == ZFS_PROP_VERSION) {
2838 return (SET_ERROR(EINVAL));
2839 } else {
2840 type = zfs_prop_get_type(prop);
2841 }
2842
2843 /*
2844 * zfs_prop_set_special() expects properties in the form of an
2845 * nvpair with type info.
2846 */
2847 dummy = fnvlist_alloc();
2848
2849 switch (type) {
2850 case PROP_TYPE_STRING:
2851 VERIFY(0 == nvlist_add_string(dummy, propname, ""));
2852 break;
2853 case PROP_TYPE_NUMBER:
2854 case PROP_TYPE_INDEX:
2855 VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
2856 break;
2857 default:
2858 err = SET_ERROR(EINVAL);
2859 goto errout;
2860 }
2861
2862 pair = nvlist_next_nvpair(dummy, NULL);
2863 if (pair == NULL) {
2864 err = SET_ERROR(EINVAL);
2865 } else {
2866 err = zfs_prop_set_special(zc->zc_name, source, pair);
2867 if (err == -1) /* property is not "special", needs handling */
2868 err = dsl_prop_inherit(zc->zc_name, zc->zc_value,
2869 source);
2870 }
2871
2872 errout:
2873 nvlist_free(dummy);
2874 return (err);
2875 }
2876
2877 static int
2878 zfs_ioc_pool_set_props(zfs_cmd_t *zc)
2879 {
2880 nvlist_t *props;
2881 spa_t *spa;
2882 int error;
2883 nvpair_t *pair;
2884
2885 if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2886 zc->zc_iflags, &props)))
2887 return (error);
2888
2889 /*
2890 * If the only property is the configfile, then just do a spa_lookup()
2891 * to handle the faulted case.
2892 */
2893 pair = nvlist_next_nvpair(props, NULL);
2894 if (pair != NULL && strcmp(nvpair_name(pair),
2895 zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
2896 nvlist_next_nvpair(props, pair) == NULL) {
2897 mutex_enter(&spa_namespace_lock);
2898 if ((spa = spa_lookup(zc->zc_name)) != NULL) {
2899 spa_configfile_set(spa, props, B_FALSE);
2900 spa_write_cachefile(spa, B_FALSE, B_TRUE);
2901 }
2902 mutex_exit(&spa_namespace_lock);
2903 if (spa != NULL) {
2904 nvlist_free(props);
2905 return (0);
2906 }
2907 }
2908
2909 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2910 nvlist_free(props);
2911 return (error);
2912 }
2913
2914 error = spa_prop_set(spa, props);
2915
2916 nvlist_free(props);
2917 spa_close(spa, FTAG);
2918
2919 return (error);
2920 }
2921
2922 static int
2923 zfs_ioc_pool_get_props(zfs_cmd_t *zc)
2924 {
2925 spa_t *spa;
2926 int error;
2927 nvlist_t *nvp = NULL;
2928
2929 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2930 /*
2931 * If the pool is faulted, there may be properties we can still
2932 * get (such as altroot and cachefile), so attempt to get them
2933 * anyway.
2934 */
2935 mutex_enter(&spa_namespace_lock);
2936 if ((spa = spa_lookup(zc->zc_name)) != NULL)
2937 error = spa_prop_get(spa, &nvp);
2938 mutex_exit(&spa_namespace_lock);
2939 } else {
2940 error = spa_prop_get(spa, &nvp);
2941 spa_close(spa, FTAG);
2942 }
2943
2944 if (error == 0 && zc->zc_nvlist_dst != 0)
2945 error = put_nvlist(zc, nvp);
2946 else
2947 error = SET_ERROR(EFAULT);
2948
2949 nvlist_free(nvp);
2950 return (error);
2951 }
2952
2953 /*
2954 * inputs:
2955 * zc_name name of filesystem
2956 * zc_nvlist_src{_size} nvlist of delegated permissions
2957 * zc_perm_action allow/unallow flag
2958 *
2959 * outputs: none
2960 */
2961 static int
2962 zfs_ioc_set_fsacl(zfs_cmd_t *zc)
2963 {
2964 int error;
2965 nvlist_t *fsaclnv = NULL;
2966
2967 if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2968 zc->zc_iflags, &fsaclnv)) != 0)
2969 return (error);
2970
2971 /*
2972 * Verify nvlist is constructed correctly
2973 */
2974 if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
2975 nvlist_free(fsaclnv);
2976 return (SET_ERROR(EINVAL));
2977 }
2978
2979 /*
2980 * If we don't have PRIV_SYS_MOUNT, then validate
2981 * that user is allowed to hand out each permission in
2982 * the nvlist(s)
2983 */
2984
2985 error = secpolicy_zfs(CRED());
2986 if (error != 0) {
2987 if (zc->zc_perm_action == B_FALSE) {
2988 error = dsl_deleg_can_allow(zc->zc_name,
2989 fsaclnv, CRED());
2990 } else {
2991 error = dsl_deleg_can_unallow(zc->zc_name,
2992 fsaclnv, CRED());
2993 }
2994 }
2995
2996 if (error == 0)
2997 error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
2998
2999 nvlist_free(fsaclnv);
3000 return (error);
3001 }
3002
3003 /*
3004 * inputs:
3005 * zc_name name of filesystem
3006 *
3007 * outputs:
3008 * zc_nvlist_src{_size} nvlist of delegated permissions
3009 */
3010 static int
3011 zfs_ioc_get_fsacl(zfs_cmd_t *zc)
3012 {
3013 nvlist_t *nvp;
3014 int error;
3015
3016 if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
3017 error = put_nvlist(zc, nvp);
3018 nvlist_free(nvp);
3019 }
3020
3021 return (error);
3022 }
3023
3024 /* ARGSUSED */
3025 static void
3026 zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
3027 {
3028 zfs_creat_t *zct = arg;
3029
3030 zfs_create_fs(os, cr, zct->zct_zplprops, tx);
3031 }
3032
3033 #define ZFS_PROP_UNDEFINED ((uint64_t)-1)
3034
3035 /*
3036 * inputs:
3037 * os parent objset pointer (NULL if root fs)
3038 * fuids_ok fuids allowed in this version of the spa?
3039 * sa_ok SAs allowed in this version of the spa?
3040 * createprops list of properties requested by creator
3041 *
3042 * outputs:
3043 * zplprops values for the zplprops we attach to the master node object
3044 * is_ci true if requested file system will be purely case-insensitive
3045 *
3046 * Determine the settings for utf8only, normalization and
3047 * casesensitivity. Specific values may have been requested by the
3048 * creator and/or we can inherit values from the parent dataset. If
3049 * the file system is of too early a vintage, a creator can not
3050 * request settings for these properties, even if the requested
3051 * setting is the default value. We don't actually want to create dsl
3052 * properties for these, so remove them from the source nvlist after
3053 * processing.
3054 */
3055 static int
3056 zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
3057 boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
3058 nvlist_t *zplprops, boolean_t *is_ci)
3059 {
3060 uint64_t sense = ZFS_PROP_UNDEFINED;
3061 uint64_t norm = ZFS_PROP_UNDEFINED;
3062 uint64_t u8 = ZFS_PROP_UNDEFINED;
3063 int error;
3064
3065 ASSERT(zplprops != NULL);
3066
3067 /* parent dataset must be a filesystem */
3068 if (os != NULL && os->os_phys->os_type != DMU_OST_ZFS)
3069 return (SET_ERROR(ZFS_ERR_WRONG_PARENT));
3070
3071 /*
3072 * Pull out creator prop choices, if any.
3073 */
3074 if (createprops) {
3075 (void) nvlist_lookup_uint64(createprops,
3076 zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
3077 (void) nvlist_lookup_uint64(createprops,
3078 zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
3079 (void) nvlist_remove_all(createprops,
3080 zfs_prop_to_name(ZFS_PROP_NORMALIZE));
3081 (void) nvlist_lookup_uint64(createprops,
3082 zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
3083 (void) nvlist_remove_all(createprops,
3084 zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
3085 (void) nvlist_lookup_uint64(createprops,
3086 zfs_prop_to_name(ZFS_PROP_CASE), &sense);
3087 (void) nvlist_remove_all(createprops,
3088 zfs_prop_to_name(ZFS_PROP_CASE));
3089 }
3090
3091 /*
3092 * If the zpl version requested is whacky or the file system
3093 * or pool is version is too "young" to support normalization
3094 * and the creator tried to set a value for one of the props,
3095 * error out.
3096 */
3097 if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
3098 (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
3099 (zplver >= ZPL_VERSION_SA && !sa_ok) ||
3100 (zplver < ZPL_VERSION_NORMALIZATION &&
3101 (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
3102 sense != ZFS_PROP_UNDEFINED)))
3103 return (SET_ERROR(ENOTSUP));
3104
3105 /*
3106 * Put the version in the zplprops
3107 */
3108 VERIFY(nvlist_add_uint64(zplprops,
3109 zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
3110
3111 if (norm == ZFS_PROP_UNDEFINED &&
3112 (error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm)) != 0)
3113 return (error);
3114 VERIFY(nvlist_add_uint64(zplprops,
3115 zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
3116
3117 /*
3118 * If we're normalizing, names must always be valid UTF-8 strings.
3119 */
3120 if (norm)
3121 u8 = 1;
3122 if (u8 == ZFS_PROP_UNDEFINED &&
3123 (error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8)) != 0)
3124 return (error);
3125 VERIFY(nvlist_add_uint64(zplprops,
3126 zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
3127
3128 if (sense == ZFS_PROP_UNDEFINED &&
3129 (error = zfs_get_zplprop(os, ZFS_PROP_CASE, &sense)) != 0)
3130 return (error);
3131 VERIFY(nvlist_add_uint64(zplprops,
3132 zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
3133
3134 if (is_ci)
3135 *is_ci = (sense == ZFS_CASE_INSENSITIVE);
3136
3137 return (0);
3138 }
3139
3140 static int
3141 zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
3142 nvlist_t *zplprops, boolean_t *is_ci)
3143 {
3144 boolean_t fuids_ok, sa_ok;
3145 uint64_t zplver = ZPL_VERSION;
3146 objset_t *os = NULL;
3147 char parentname[ZFS_MAX_DATASET_NAME_LEN];
3148 spa_t *spa;
3149 uint64_t spa_vers;
3150 int error;
3151
3152 zfs_get_parent(dataset, parentname, sizeof (parentname));
3153
3154 if ((error = spa_open(dataset, &spa, FTAG)) != 0)
3155 return (error);
3156
3157 spa_vers = spa_version(spa);
3158 spa_close(spa, FTAG);
3159
3160 zplver = zfs_zpl_version_map(spa_vers);
3161 fuids_ok = (zplver >= ZPL_VERSION_FUID);
3162 sa_ok = (zplver >= ZPL_VERSION_SA);
3163
3164 /*
3165 * Open parent object set so we can inherit zplprop values.
3166 */
3167 if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
3168 return (error);
3169
3170 error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
3171 zplprops, is_ci);
3172 dmu_objset_rele(os, FTAG);
3173 return (error);
3174 }
3175
3176 static int
3177 zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
3178 nvlist_t *zplprops, boolean_t *is_ci)
3179 {
3180 boolean_t fuids_ok;
3181 boolean_t sa_ok;
3182 uint64_t zplver = ZPL_VERSION;
3183 int error;
3184
3185 zplver = zfs_zpl_version_map(spa_vers);
3186 fuids_ok = (zplver >= ZPL_VERSION_FUID);
3187 sa_ok = (zplver >= ZPL_VERSION_SA);
3188
3189 error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
3190 createprops, zplprops, is_ci);
3191 return (error);
3192 }
3193
3194 /*
3195 * innvl: {
3196 * "type" -> dmu_objset_type_t (int32)
3197 * (optional) "props" -> { prop -> value }
3198 * (optional) "hidden_args" -> { "wkeydata" -> value }
3199 * raw uint8_t array of encryption wrapping key data (32 bytes)
3200 * }
3201 *
3202 * outnvl: propname -> error code (int32)
3203 */
3204
3205 static const zfs_ioc_key_t zfs_keys_create[] = {
3206 {"type", DATA_TYPE_INT32, 0},
3207 {"props", DATA_TYPE_NVLIST, ZK_OPTIONAL},
3208 {"hidden_args", DATA_TYPE_NVLIST, ZK_OPTIONAL},
3209 };
3210
3211 static int
3212 zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3213 {
3214 int error = 0;
3215 zfs_creat_t zct = { 0 };
3216 nvlist_t *nvprops = NULL;
3217 nvlist_t *hidden_args = NULL;
3218 void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
3219 dmu_objset_type_t type;
3220 boolean_t is_insensitive = B_FALSE;
3221 dsl_crypto_params_t *dcp = NULL;
3222
3223 type = (dmu_objset_type_t)fnvlist_lookup_int32(innvl, "type");
3224 (void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3225 (void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
3226
3227 switch (type) {
3228 case DMU_OST_ZFS:
3229 cbfunc = zfs_create_cb;
3230 break;
3231
3232 case DMU_OST_ZVOL:
3233 cbfunc = zvol_create_cb;
3234 break;
3235
3236 default:
3237 cbfunc = NULL;
3238 break;
3239 }
3240 if (strchr(fsname, '@') ||
3241 strchr(fsname, '%'))
3242 return (SET_ERROR(EINVAL));
3243
3244 zct.zct_props = nvprops;
3245
3246 if (cbfunc == NULL)
3247 return (SET_ERROR(EINVAL));
3248
3249 if (type == DMU_OST_ZVOL) {
3250 uint64_t volsize, volblocksize;
3251
3252 if (nvprops == NULL)
3253 return (SET_ERROR(EINVAL));
3254 if (nvlist_lookup_uint64(nvprops,
3255 zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0)
3256 return (SET_ERROR(EINVAL));
3257
3258 if ((error = nvlist_lookup_uint64(nvprops,
3259 zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
3260 &volblocksize)) != 0 && error != ENOENT)
3261 return (SET_ERROR(EINVAL));
3262
3263 if (error != 0)
3264 volblocksize = zfs_prop_default_numeric(
3265 ZFS_PROP_VOLBLOCKSIZE);
3266
3267 if ((error = zvol_check_volblocksize(fsname,
3268 volblocksize)) != 0 ||
3269 (error = zvol_check_volsize(volsize,
3270 volblocksize)) != 0)
3271 return (error);
3272 } else if (type == DMU_OST_ZFS) {
3273 int error;
3274
3275 /*
3276 * We have to have normalization and
3277 * case-folding flags correct when we do the
3278 * file system creation, so go figure them out
3279 * now.
3280 */
3281 VERIFY(nvlist_alloc(&zct.zct_zplprops,
3282 NV_UNIQUE_NAME, KM_SLEEP) == 0);
3283 error = zfs_fill_zplprops(fsname, nvprops,
3284 zct.zct_zplprops, &is_insensitive);
3285 if (error != 0) {
3286 nvlist_free(zct.zct_zplprops);
3287 return (error);
3288 }
3289 }
3290
3291 error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, nvprops,
3292 hidden_args, &dcp);
3293 if (error != 0) {
3294 nvlist_free(zct.zct_zplprops);
3295 return (error);
3296 }
3297
3298 error = dmu_objset_create(fsname, type,
3299 is_insensitive ? DS_FLAG_CI_DATASET : 0, dcp, cbfunc, &zct);
3300
3301 nvlist_free(zct.zct_zplprops);
3302 dsl_crypto_params_free(dcp, !!error);
3303
3304 /*
3305 * It would be nice to do this atomically.
3306 */
3307 if (error == 0) {
3308 error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3309 nvprops, outnvl);
3310 if (error != 0) {
3311 spa_t *spa;
3312 int error2;
3313
3314 /*
3315 * Volumes will return EBUSY and cannot be destroyed
3316 * until all asynchronous minor handling (e.g. from
3317 * setting the volmode property) has completed. Wait for
3318 * the spa_zvol_taskq to drain then retry.
3319 */
3320 error2 = dsl_destroy_head(fsname);
3321 while ((error2 == EBUSY) && (type == DMU_OST_ZVOL)) {
3322 error2 = spa_open(fsname, &spa, FTAG);
3323 if (error2 == 0) {
3324 taskq_wait(spa->spa_zvol_taskq);
3325 spa_close(spa, FTAG);
3326 }
3327 error2 = dsl_destroy_head(fsname);
3328 }
3329 }
3330 }
3331 return (error);
3332 }
3333
3334 /*
3335 * innvl: {
3336 * "origin" -> name of origin snapshot
3337 * (optional) "props" -> { prop -> value }
3338 * (optional) "hidden_args" -> { "wkeydata" -> value }
3339 * raw uint8_t array of encryption wrapping key data (32 bytes)
3340 * }
3341 *
3342 * outputs:
3343 * outnvl: propname -> error code (int32)
3344 */
3345 static const zfs_ioc_key_t zfs_keys_clone[] = {
3346 {"origin", DATA_TYPE_STRING, 0},
3347 {"props", DATA_TYPE_NVLIST, ZK_OPTIONAL},
3348 {"hidden_args", DATA_TYPE_NVLIST, ZK_OPTIONAL},
3349 };
3350
3351 static int
3352 zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3353 {
3354 int error = 0;
3355 nvlist_t *nvprops = NULL;
3356 char *origin_name;
3357
3358 origin_name = fnvlist_lookup_string(innvl, "origin");
3359 (void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3360
3361 if (strchr(fsname, '@') ||
3362 strchr(fsname, '%'))
3363 return (SET_ERROR(EINVAL));
3364
3365 if (dataset_namecheck(origin_name, NULL, NULL) != 0)
3366 return (SET_ERROR(EINVAL));
3367
3368 error = dmu_objset_clone(fsname, origin_name);
3369
3370 /*
3371 * It would be nice to do this atomically.
3372 */
3373 if (error == 0) {
3374 error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3375 nvprops, outnvl);
3376 if (error != 0)
3377 (void) dsl_destroy_head(fsname);
3378 }
3379 return (error);
3380 }
3381
3382 static const zfs_ioc_key_t zfs_keys_remap[] = {
3383 /* no nvl keys */
3384 };
3385
3386 /* ARGSUSED */
3387 static int
3388 zfs_ioc_remap(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3389 {
3390 /* This IOCTL is no longer supported. */
3391 return (0);
3392 }
3393
3394 /*
3395 * innvl: {
3396 * "snaps" -> { snapshot1, snapshot2 }
3397 * (optional) "props" -> { prop -> value (string) }
3398 * }
3399 *
3400 * outnvl: snapshot -> error code (int32)
3401 */
3402 static const zfs_ioc_key_t zfs_keys_snapshot[] = {
3403 {"snaps", DATA_TYPE_NVLIST, 0},
3404 {"props", DATA_TYPE_NVLIST, ZK_OPTIONAL},
3405 };
3406
3407 static int
3408 zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3409 {
3410 nvlist_t *snaps;
3411 nvlist_t *props = NULL;
3412 int error, poollen;
3413 nvpair_t *pair;
3414
3415 (void) nvlist_lookup_nvlist(innvl, "props", &props);
3416 if (!nvlist_empty(props) &&
3417 zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS))
3418 return (SET_ERROR(ENOTSUP));
3419 if ((error = zfs_check_userprops(props)) != 0)
3420 return (error);
3421
3422 snaps = fnvlist_lookup_nvlist(innvl, "snaps");
3423 poollen = strlen(poolname);
3424 for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3425 pair = nvlist_next_nvpair(snaps, pair)) {
3426 const char *name = nvpair_name(pair);
3427 char *cp = strchr(name, '@');
3428
3429 /*
3430 * The snap name must contain an @, and the part after it must
3431 * contain only valid characters.
3432 */
3433 if (cp == NULL ||
3434 zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3435 return (SET_ERROR(EINVAL));
3436
3437 /*
3438 * The snap must be in the specified pool.
3439 */
3440 if (strncmp(name, poolname, poollen) != 0 ||
3441 (name[poollen] != '/' && name[poollen] != '@'))
3442 return (SET_ERROR(EXDEV));
3443
3444 /*
3445 * Check for permission to set the properties on the fs.
3446 */
3447 if (!nvlist_empty(props)) {
3448 *cp = '\0';
3449 error = zfs_secpolicy_write_perms(name,
3450 ZFS_DELEG_PERM_USERPROP, CRED());
3451 *cp = '@';
3452 if (error != 0)
3453 return (error);
3454 }
3455
3456 /* This must be the only snap of this fs. */
3457 for (nvpair_t *pair2 = nvlist_next_nvpair(snaps, pair);
3458 pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) {
3459 if (strncmp(name, nvpair_name(pair2), cp - name + 1)
3460 == 0) {
3461 return (SET_ERROR(EXDEV));
3462 }
3463 }
3464 }
3465
3466 error = dsl_dataset_snapshot(snaps, props, outnvl);
3467
3468 return (error);
3469 }
3470
3471 /*
3472 * innvl: "message" -> string
3473 */
3474 static const zfs_ioc_key_t zfs_keys_log_history[] = {
3475 {"message", DATA_TYPE_STRING, 0},
3476 };
3477
3478 /* ARGSUSED */
3479 static int
3480 zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
3481 {
3482 char *message;
3483 spa_t *spa;
3484 int error;
3485 char *poolname;
3486
3487 /*
3488 * The poolname in the ioctl is not set, we get it from the TSD,
3489 * which was set at the end of the last successful ioctl that allows
3490 * logging. The secpolicy func already checked that it is set.
3491 * Only one log ioctl is allowed after each successful ioctl, so
3492 * we clear the TSD here.
3493 */
3494 poolname = tsd_get(zfs_allow_log_key);
3495 if (poolname == NULL)
3496 return (SET_ERROR(EINVAL));
3497 (void) tsd_set(zfs_allow_log_key, NULL);
3498 error = spa_open(poolname, &spa, FTAG);
3499 kmem_strfree(poolname);
3500 if (error != 0)
3501 return (error);
3502
3503 message = fnvlist_lookup_string(innvl, "message");
3504
3505 if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
3506 spa_close(spa, FTAG);
3507 return (SET_ERROR(ENOTSUP));
3508 }
3509
3510 error = spa_history_log(spa, message);
3511 spa_close(spa, FTAG);
3512 return (error);
3513 }
3514
3515 /*
3516 * This ioctl is used to set the bootenv configuration on the current
3517 * pool. This configuration is stored in the second padding area of the label,
3518 * and it is used by the GRUB bootloader used on Linux to store the contents
3519 * of the grubenv file. The file is stored as raw ASCII, and is protected by
3520 * an embedded checksum. By default, GRUB will check if the boot filesystem
3521 * supports storing the environment data in a special location, and if so,
3522 * will invoke filesystem specific logic to retrieve it. This can be overridden
3523 * by a variable, should the user so desire.
3524 */
3525 /* ARGSUSED */
3526 static const zfs_ioc_key_t zfs_keys_set_bootenv[] = {
3527 {"envmap", DATA_TYPE_STRING, 0},
3528 };
3529
3530 static int
3531 zfs_ioc_set_bootenv(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
3532 {
3533 char *envmap;
3534 int error;
3535 spa_t *spa;
3536
3537 envmap = fnvlist_lookup_string(innvl, "envmap");
3538 if ((error = spa_open(name, &spa, FTAG)) != 0)
3539 return (error);
3540 spa_vdev_state_enter(spa, SCL_ALL);
3541 error = vdev_label_write_bootenv(spa->spa_root_vdev, envmap);
3542 (void) spa_vdev_state_exit(spa, NULL, 0);
3543 spa_close(spa, FTAG);
3544 return (error);
3545 }
3546
3547 static const zfs_ioc_key_t zfs_keys_get_bootenv[] = {
3548 /* no nvl keys */
3549 };
3550
3551 /* ARGSUSED */
3552 static int
3553 zfs_ioc_get_bootenv(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
3554 {
3555 spa_t *spa;
3556 int error;
3557
3558 if ((error = spa_open(name, &spa, FTAG)) != 0)
3559 return (error);
3560 spa_vdev_state_enter(spa, SCL_ALL);
3561 error = vdev_label_read_bootenv(spa->spa_root_vdev, outnvl);
3562 (void) spa_vdev_state_exit(spa, NULL, 0);
3563 spa_close(spa, FTAG);
3564 return (error);
3565 }
3566
3567 /*
3568 * The dp_config_rwlock must not be held when calling this, because the
3569 * unmount may need to write out data.
3570 *
3571 * This function is best-effort. Callers must deal gracefully if it
3572 * remains mounted (or is remounted after this call).
3573 *
3574 * Returns 0 if the argument is not a snapshot, or it is not currently a
3575 * filesystem, or we were able to unmount it. Returns error code otherwise.
3576 */
3577 void
3578 zfs_unmount_snap(const char *snapname)
3579 {
3580 if (strchr(snapname, '@') == NULL)
3581 return;
3582
3583 (void) zfsctl_snapshot_unmount((char *)snapname, MNT_FORCE);
3584 }
3585
3586 /* ARGSUSED */
3587 static int
3588 zfs_unmount_snap_cb(const char *snapname, void *arg)
3589 {
3590 zfs_unmount_snap(snapname);
3591 return (0);
3592 }
3593
3594 /*
3595 * When a clone is destroyed, its origin may also need to be destroyed,
3596 * in which case it must be unmounted. This routine will do that unmount
3597 * if necessary.
3598 */
3599 void
3600 zfs_destroy_unmount_origin(const char *fsname)
3601 {
3602 int error;
3603 objset_t *os;
3604 dsl_dataset_t *ds;
3605
3606 error = dmu_objset_hold(fsname, FTAG, &os);
3607 if (error != 0)
3608 return;
3609 ds = dmu_objset_ds(os);
3610 if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev)) {
3611 char originname[ZFS_MAX_DATASET_NAME_LEN];
3612 dsl_dataset_name(ds->ds_prev, originname);
3613 dmu_objset_rele(os, FTAG);
3614 zfs_unmount_snap(originname);
3615 } else {
3616 dmu_objset_rele(os, FTAG);
3617 }
3618 }
3619
3620 /*
3621 * innvl: {
3622 * "snaps" -> { snapshot1, snapshot2 }
3623 * (optional boolean) "defer"
3624 * }
3625 *
3626 * outnvl: snapshot -> error code (int32)
3627 */
3628 static const zfs_ioc_key_t zfs_keys_destroy_snaps[] = {
3629 {"snaps", DATA_TYPE_NVLIST, 0},
3630 {"defer", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
3631 };
3632
3633 /* ARGSUSED */
3634 static int
3635 zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3636 {
3637 int poollen;
3638 nvlist_t *snaps;
3639 nvpair_t *pair;
3640 boolean_t defer;
3641 spa_t *spa;
3642
3643 snaps = fnvlist_lookup_nvlist(innvl, "snaps");
3644 defer = nvlist_exists(innvl, "defer");
3645
3646 poollen = strlen(poolname);
3647 for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3648 pair = nvlist_next_nvpair(snaps, pair)) {
3649 const char *name = nvpair_name(pair);
3650
3651 /*
3652 * The snap must be in the specified pool to prevent the
3653 * invalid removal of zvol minors below.
3654 */
3655 if (strncmp(name, poolname, poollen) != 0 ||
3656 (name[poollen] != '/' && name[poollen] != '@'))
3657 return (SET_ERROR(EXDEV));
3658
3659 zfs_unmount_snap(nvpair_name(pair));
3660 if (spa_open(name, &spa, FTAG) == 0) {
3661 zvol_remove_minors(spa, name, B_TRUE);
3662 spa_close(spa, FTAG);
3663 }
3664 }
3665
3666 return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
3667 }
3668
3669 /*
3670 * Create bookmarks. The bookmark names are of the form <fs>#<bmark>.
3671 * All bookmarks and snapshots must be in the same pool.
3672 * dsl_bookmark_create_nvl_validate describes the nvlist schema in more detail.
3673 *
3674 * innvl: {
3675 * new_bookmark1 -> existing_snapshot,
3676 * new_bookmark2 -> existing_bookmark,
3677 * }
3678 *
3679 * outnvl: bookmark -> error code (int32)
3680 *
3681 */
3682 static const zfs_ioc_key_t zfs_keys_bookmark[] = {
3683 {"<bookmark>...", DATA_TYPE_STRING, ZK_WILDCARDLIST},
3684 };
3685
3686 /* ARGSUSED */
3687 static int
3688 zfs_ioc_bookmark(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3689 {
3690 return (dsl_bookmark_create(innvl, outnvl));
3691 }
3692
3693 /*
3694 * innvl: {
3695 * property 1, property 2, ...
3696 * }
3697 *
3698 * outnvl: {
3699 * bookmark name 1 -> { property 1, property 2, ... },
3700 * bookmark name 2 -> { property 1, property 2, ... }
3701 * }
3702 *
3703 */
3704 static const zfs_ioc_key_t zfs_keys_get_bookmarks[] = {
3705 {"<property>...", DATA_TYPE_BOOLEAN, ZK_WILDCARDLIST | ZK_OPTIONAL},
3706 };
3707
3708 static int
3709 zfs_ioc_get_bookmarks(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3710 {
3711 return (dsl_get_bookmarks(fsname, innvl, outnvl));
3712 }
3713
3714 /*
3715 * innvl is not used.
3716 *
3717 * outnvl: {
3718 * property 1, property 2, ...
3719 * }
3720 *
3721 */
3722 static const zfs_ioc_key_t zfs_keys_get_bookmark_props[] = {
3723 /* no nvl keys */
3724 };
3725
3726 /* ARGSUSED */
3727 static int
3728 zfs_ioc_get_bookmark_props(const char *bookmark, nvlist_t *innvl,
3729 nvlist_t *outnvl)
3730 {
3731 char fsname[ZFS_MAX_DATASET_NAME_LEN];
3732 char *bmname;
3733
3734 bmname = strchr(bookmark, '#');
3735 if (bmname == NULL)
3736 return (SET_ERROR(EINVAL));
3737 bmname++;
3738
3739 (void) strlcpy(fsname, bookmark, sizeof (fsname));
3740 *(strchr(fsname, '#')) = '\0';
3741
3742 return (dsl_get_bookmark_props(fsname, bmname, outnvl));
3743 }
3744
3745 /*
3746 * innvl: {
3747 * bookmark name 1, bookmark name 2
3748 * }
3749 *
3750 * outnvl: bookmark -> error code (int32)
3751 *
3752 */
3753 static const zfs_ioc_key_t zfs_keys_destroy_bookmarks[] = {
3754 {"<bookmark>...", DATA_TYPE_BOOLEAN, ZK_WILDCARDLIST},
3755 };
3756
3757 static int
3758 zfs_ioc_destroy_bookmarks(const char *poolname, nvlist_t *innvl,
3759 nvlist_t *outnvl)
3760 {
3761 int error, poollen;
3762
3763 poollen = strlen(poolname);
3764 for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
3765 pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
3766 const char *name = nvpair_name(pair);
3767 const char *cp = strchr(name, '#');
3768
3769 /*
3770 * The bookmark name must contain an #, and the part after it
3771 * must contain only valid characters.
3772 */
3773 if (cp == NULL ||
3774 zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3775 return (SET_ERROR(EINVAL));
3776
3777 /*
3778 * The bookmark must be in the specified pool.
3779 */
3780 if (strncmp(name, poolname, poollen) != 0 ||
3781 (name[poollen] != '/' && name[poollen] != '#'))
3782 return (SET_ERROR(EXDEV));
3783 }
3784
3785 error = dsl_bookmark_destroy(innvl, outnvl);
3786 return (error);
3787 }
3788
3789 static const zfs_ioc_key_t zfs_keys_channel_program[] = {
3790 {"program", DATA_TYPE_STRING, 0},
3791 {"arg", DATA_TYPE_ANY, 0},
3792 {"sync", DATA_TYPE_BOOLEAN_VALUE, ZK_OPTIONAL},
3793 {"instrlimit", DATA_TYPE_UINT64, ZK_OPTIONAL},
3794 {"memlimit", DATA_TYPE_UINT64, ZK_OPTIONAL},
3795 };
3796
3797 static int
3798 zfs_ioc_channel_program(const char *poolname, nvlist_t *innvl,
3799 nvlist_t *outnvl)
3800 {
3801 char *program;
3802 uint64_t instrlimit, memlimit;
3803 boolean_t sync_flag;
3804 nvpair_t *nvarg = NULL;
3805
3806 program = fnvlist_lookup_string(innvl, ZCP_ARG_PROGRAM);
3807 if (0 != nvlist_lookup_boolean_value(innvl, ZCP_ARG_SYNC, &sync_flag)) {
3808 sync_flag = B_TRUE;
3809 }
3810 if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_INSTRLIMIT, &instrlimit)) {
3811 instrlimit = ZCP_DEFAULT_INSTRLIMIT;
3812 }
3813 if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_MEMLIMIT, &memlimit)) {
3814 memlimit = ZCP_DEFAULT_MEMLIMIT;
3815 }
3816 nvarg = fnvlist_lookup_nvpair(innvl, ZCP_ARG_ARGLIST);
3817
3818 if (instrlimit == 0 || instrlimit > zfs_lua_max_instrlimit)
3819 return (SET_ERROR(EINVAL));
3820 if (memlimit == 0 || memlimit > zfs_lua_max_memlimit)
3821 return (SET_ERROR(EINVAL));
3822
3823 return (zcp_eval(poolname, program, sync_flag, instrlimit, memlimit,
3824 nvarg, outnvl));
3825 }
3826
3827 /*
3828 * innvl: unused
3829 * outnvl: empty
3830 */
3831 static const zfs_ioc_key_t zfs_keys_pool_checkpoint[] = {
3832 /* no nvl keys */
3833 };
3834
3835 /* ARGSUSED */
3836 static int
3837 zfs_ioc_pool_checkpoint(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3838 {
3839 return (spa_checkpoint(poolname));
3840 }
3841
3842 /*
3843 * innvl: unused
3844 * outnvl: empty
3845 */
3846 static const zfs_ioc_key_t zfs_keys_pool_discard_checkpoint[] = {
3847 /* no nvl keys */
3848 };
3849
3850 /* ARGSUSED */
3851 static int
3852 zfs_ioc_pool_discard_checkpoint(const char *poolname, nvlist_t *innvl,
3853 nvlist_t *outnvl)
3854 {
3855 return (spa_checkpoint_discard(poolname));
3856 }
3857
3858 /*
3859 * inputs:
3860 * zc_name name of dataset to destroy
3861 * zc_defer_destroy mark for deferred destroy
3862 *
3863 * outputs: none
3864 */
3865 static int
3866 zfs_ioc_destroy(zfs_cmd_t *zc)
3867 {
3868 objset_t *os;
3869 dmu_objset_type_t ost;
3870 int err;
3871
3872 err = dmu_objset_hold(zc->zc_name, FTAG, &os);
3873 if (err != 0)
3874 return (err);
3875 ost = dmu_objset_type(os);
3876 dmu_objset_rele(os, FTAG);
3877
3878 if (ost == DMU_OST_ZFS)
3879 zfs_unmount_snap(zc->zc_name);
3880
3881 if (strchr(zc->zc_name, '@')) {
3882 err = dsl_destroy_snapshot(zc->zc_name, zc->zc_defer_destroy);
3883 } else {
3884 err = dsl_destroy_head(zc->zc_name);
3885 if (err == EEXIST) {
3886 /*
3887 * It is possible that the given DS may have
3888 * hidden child (%recv) datasets - "leftovers"
3889 * resulting from the previously interrupted
3890 * 'zfs receive'.
3891 *
3892 * 6 extra bytes for /%recv
3893 */
3894 char namebuf[ZFS_MAX_DATASET_NAME_LEN + 6];
3895
3896 if (snprintf(namebuf, sizeof (namebuf), "%s/%s",
3897 zc->zc_name, recv_clone_name) >=
3898 sizeof (namebuf))
3899 return (SET_ERROR(EINVAL));
3900
3901 /*
3902 * Try to remove the hidden child (%recv) and after
3903 * that try to remove the target dataset.
3904 * If the hidden child (%recv) does not exist
3905 * the original error (EEXIST) will be returned
3906 */
3907 err = dsl_destroy_head(namebuf);
3908 if (err == 0)
3909 err = dsl_destroy_head(zc->zc_name);
3910 else if (err == ENOENT)
3911 err = SET_ERROR(EEXIST);
3912 }
3913 }
3914
3915 return (err);
3916 }
3917
3918 /*
3919 * innvl: {
3920 * "initialize_command" -> POOL_INITIALIZE_{CANCEL|START|SUSPEND} (uint64)
3921 * "initialize_vdevs": { -> guids to initialize (nvlist)
3922 * "vdev_path_1": vdev_guid_1, (uint64),
3923 * "vdev_path_2": vdev_guid_2, (uint64),
3924 * ...
3925 * },
3926 * }
3927 *
3928 * outnvl: {
3929 * "initialize_vdevs": { -> initialization errors (nvlist)
3930 * "vdev_path_1": errno, see function body for possible errnos (uint64)
3931 * "vdev_path_2": errno, ... (uint64)
3932 * ...
3933 * }
3934 * }
3935 *
3936 * EINVAL is returned for an unknown commands or if any of the provided vdev
3937 * guids have be specified with a type other than uint64.
3938 */
3939 static const zfs_ioc_key_t zfs_keys_pool_initialize[] = {
3940 {ZPOOL_INITIALIZE_COMMAND, DATA_TYPE_UINT64, 0},
3941 {ZPOOL_INITIALIZE_VDEVS, DATA_TYPE_NVLIST, 0}
3942 };
3943
3944 static int
3945 zfs_ioc_pool_initialize(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3946 {
3947 uint64_t cmd_type;
3948 if (nvlist_lookup_uint64(innvl, ZPOOL_INITIALIZE_COMMAND,
3949 &cmd_type) != 0) {
3950 return (SET_ERROR(EINVAL));
3951 }
3952
3953 if (!(cmd_type == POOL_INITIALIZE_CANCEL ||
3954 cmd_type == POOL_INITIALIZE_START ||
3955 cmd_type == POOL_INITIALIZE_SUSPEND)) {
3956 return (SET_ERROR(EINVAL));
3957 }
3958
3959 nvlist_t *vdev_guids;
3960 if (nvlist_lookup_nvlist(innvl, ZPOOL_INITIALIZE_VDEVS,
3961 &vdev_guids) != 0) {
3962 return (SET_ERROR(EINVAL));
3963 }
3964
3965 for (nvpair_t *pair = nvlist_next_nvpair(vdev_guids, NULL);
3966 pair != NULL; pair = nvlist_next_nvpair(vdev_guids, pair)) {
3967 uint64_t vdev_guid;
3968 if (nvpair_value_uint64(pair, &vdev_guid) != 0) {
3969 return (SET_ERROR(EINVAL));
3970 }
3971 }
3972
3973 spa_t *spa;
3974 int error = spa_open(poolname, &spa, FTAG);
3975 if (error != 0)
3976 return (error);
3977
3978 nvlist_t *vdev_errlist = fnvlist_alloc();
3979 int total_errors = spa_vdev_initialize(spa, vdev_guids, cmd_type,
3980 vdev_errlist);
3981
3982 if (fnvlist_size(vdev_errlist) > 0) {
3983 fnvlist_add_nvlist(outnvl, ZPOOL_INITIALIZE_VDEVS,
3984 vdev_errlist);
3985 }
3986 fnvlist_free(vdev_errlist);
3987
3988 spa_close(spa, FTAG);
3989 return (total_errors > 0 ? EINVAL : 0);
3990 }
3991
3992 /*
3993 * innvl: {
3994 * "trim_command" -> POOL_TRIM_{CANCEL|START|SUSPEND} (uint64)
3995 * "trim_vdevs": { -> guids to TRIM (nvlist)
3996 * "vdev_path_1": vdev_guid_1, (uint64),
3997 * "vdev_path_2": vdev_guid_2, (uint64),
3998 * ...
3999 * },
4000 * "trim_rate" -> Target TRIM rate in bytes/sec.
4001 * "trim_secure" -> Set to request a secure TRIM.
4002 * }
4003 *
4004 * outnvl: {
4005 * "trim_vdevs": { -> TRIM errors (nvlist)
4006 * "vdev_path_1": errno, see function body for possible errnos (uint64)
4007 * "vdev_path_2": errno, ... (uint64)
4008 * ...
4009 * }
4010 * }
4011 *
4012 * EINVAL is returned for an unknown commands or if any of the provided vdev
4013 * guids have be specified with a type other than uint64.
4014 */
4015 static const zfs_ioc_key_t zfs_keys_pool_trim[] = {
4016 {ZPOOL_TRIM_COMMAND, DATA_TYPE_UINT64, 0},
4017 {ZPOOL_TRIM_VDEVS, DATA_TYPE_NVLIST, 0},
4018 {ZPOOL_TRIM_RATE, DATA_TYPE_UINT64, ZK_OPTIONAL},
4019 {ZPOOL_TRIM_SECURE, DATA_TYPE_BOOLEAN_VALUE, ZK_OPTIONAL},
4020 };
4021
4022 static int
4023 zfs_ioc_pool_trim(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
4024 {
4025 uint64_t cmd_type;
4026 if (nvlist_lookup_uint64(innvl, ZPOOL_TRIM_COMMAND, &cmd_type) != 0)
4027 return (SET_ERROR(EINVAL));
4028
4029 if (!(cmd_type == POOL_TRIM_CANCEL ||
4030 cmd_type == POOL_TRIM_START ||
4031 cmd_type == POOL_TRIM_SUSPEND)) {
4032 return (SET_ERROR(EINVAL));
4033 }
4034
4035 nvlist_t *vdev_guids;
4036 if (nvlist_lookup_nvlist(innvl, ZPOOL_TRIM_VDEVS, &vdev_guids) != 0)
4037 return (SET_ERROR(EINVAL));
4038
4039 for (nvpair_t *pair = nvlist_next_nvpair(vdev_guids, NULL);
4040 pair != NULL; pair = nvlist_next_nvpair(vdev_guids, pair)) {
4041 uint64_t vdev_guid;
4042 if (nvpair_value_uint64(pair, &vdev_guid) != 0) {
4043 return (SET_ERROR(EINVAL));
4044 }
4045 }
4046
4047 /* Optional, defaults to maximum rate when not provided */
4048 uint64_t rate;
4049 if (nvlist_lookup_uint64(innvl, ZPOOL_TRIM_RATE, &rate) != 0)
4050 rate = 0;
4051
4052 /* Optional, defaults to standard TRIM when not provided */
4053 boolean_t secure;
4054 if (nvlist_lookup_boolean_value(innvl, ZPOOL_TRIM_SECURE,
4055 &secure) != 0) {
4056 secure = B_FALSE;
4057 }
4058
4059 spa_t *spa;
4060 int error = spa_open(poolname, &spa, FTAG);
4061 if (error != 0)
4062 return (error);
4063
4064 nvlist_t *vdev_errlist = fnvlist_alloc();
4065 int total_errors = spa_vdev_trim(spa, vdev_guids, cmd_type,
4066 rate, !!zfs_trim_metaslab_skip, secure, vdev_errlist);
4067
4068 if (fnvlist_size(vdev_errlist) > 0)
4069 fnvlist_add_nvlist(outnvl, ZPOOL_TRIM_VDEVS, vdev_errlist);
4070
4071 fnvlist_free(vdev_errlist);
4072
4073 spa_close(spa, FTAG);
4074 return (total_errors > 0 ? EINVAL : 0);
4075 }
4076
4077 /*
4078 * This ioctl waits for activity of a particular type to complete. If there is
4079 * no activity of that type in progress, it returns immediately, and the
4080 * returned value "waited" is false. If there is activity in progress, and no
4081 * tag is passed in, the ioctl blocks until all activity of that type is
4082 * complete, and then returns with "waited" set to true.
4083 *
4084 * If a tag is provided, it identifies a particular instance of an activity to
4085 * wait for. Currently, this is only valid for use with 'initialize', because
4086 * that is the only activity for which there can be multiple instances running
4087 * concurrently. In the case of 'initialize', the tag corresponds to the guid of
4088 * the vdev on which to wait.
4089 *
4090 * If a thread waiting in the ioctl receives a signal, the call will return
4091 * immediately, and the return value will be EINTR.
4092 *
4093 * innvl: {
4094 * "wait_activity" -> int32_t
4095 * (optional) "wait_tag" -> uint64_t
4096 * }
4097 *
4098 * outnvl: "waited" -> boolean_t
4099 */
4100 static const zfs_ioc_key_t zfs_keys_pool_wait[] = {
4101 {ZPOOL_WAIT_ACTIVITY, DATA_TYPE_INT32, 0},
4102 {ZPOOL_WAIT_TAG, DATA_TYPE_UINT64, ZK_OPTIONAL},
4103 };
4104
4105 static int
4106 zfs_ioc_wait(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
4107 {
4108 int32_t activity;
4109 uint64_t tag;
4110 boolean_t waited;
4111 int error;
4112
4113 if (nvlist_lookup_int32(innvl, ZPOOL_WAIT_ACTIVITY, &activity) != 0)
4114 return (EINVAL);
4115
4116 if (nvlist_lookup_uint64(innvl, ZPOOL_WAIT_TAG, &tag) == 0)
4117 error = spa_wait_tag(name, activity, tag, &waited);
4118 else
4119 error = spa_wait(name, activity, &waited);
4120
4121 if (error == 0)
4122 fnvlist_add_boolean_value(outnvl, ZPOOL_WAIT_WAITED, waited);
4123
4124 return (error);
4125 }
4126
4127 /*
4128 * This ioctl waits for activity of a particular type to complete. If there is
4129 * no activity of that type in progress, it returns immediately, and the
4130 * returned value "waited" is false. If there is activity in progress, and no
4131 * tag is passed in, the ioctl blocks until all activity of that type is
4132 * complete, and then returns with "waited" set to true.
4133 *
4134 * If a thread waiting in the ioctl receives a signal, the call will return
4135 * immediately, and the return value will be EINTR.
4136 *
4137 * innvl: {
4138 * "wait_activity" -> int32_t
4139 * }
4140 *
4141 * outnvl: "waited" -> boolean_t
4142 */
4143 static const zfs_ioc_key_t zfs_keys_fs_wait[] = {
4144 {ZFS_WAIT_ACTIVITY, DATA_TYPE_INT32, 0},
4145 };
4146
4147 static int
4148 zfs_ioc_wait_fs(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
4149 {
4150 int32_t activity;
4151 boolean_t waited = B_FALSE;
4152 int error;
4153 dsl_pool_t *dp;
4154 dsl_dir_t *dd;
4155 dsl_dataset_t *ds;
4156
4157 if (nvlist_lookup_int32(innvl, ZFS_WAIT_ACTIVITY, &activity) != 0)
4158 return (SET_ERROR(EINVAL));
4159
4160 if (activity >= ZFS_WAIT_NUM_ACTIVITIES || activity < 0)
4161 return (SET_ERROR(EINVAL));
4162
4163 if ((error = dsl_pool_hold(name, FTAG, &dp)) != 0)
4164 return (error);
4165
4166 if ((error = dsl_dataset_hold(dp, name, FTAG, &ds)) != 0) {
4167 dsl_pool_rele(dp, FTAG);
4168 return (error);
4169 }
4170
4171 dd = ds->ds_dir;
4172 mutex_enter(&dd->dd_activity_lock);
4173 dd->dd_activity_waiters++;
4174
4175 /*
4176 * We get a long-hold here so that the dsl_dataset_t and dsl_dir_t
4177 * aren't evicted while we're waiting. Normally this is prevented by
4178 * holding the pool, but we can't do that while we're waiting since
4179 * that would prevent TXGs from syncing out. Some of the functionality
4180 * of long-holds (e.g. preventing deletion) is unnecessary for this
4181 * case, since we would cancel the waiters before proceeding with a
4182 * deletion. An alternative mechanism for keeping the dataset around
4183 * could be developed but this is simpler.
4184 */
4185 dsl_dataset_long_hold(ds, FTAG);
4186 dsl_pool_rele(dp, FTAG);
4187
4188 error = dsl_dir_wait(dd, ds, activity, &waited);
4189
4190 dsl_dataset_long_rele(ds, FTAG);
4191 dd->dd_activity_waiters--;
4192 if (dd->dd_activity_waiters == 0)
4193 cv_signal(&dd->dd_activity_cv);
4194 mutex_exit(&dd->dd_activity_lock);
4195
4196 dsl_dataset_rele(ds, FTAG);
4197
4198 if (error == 0)
4199 fnvlist_add_boolean_value(outnvl, ZFS_WAIT_WAITED, waited);
4200
4201 return (error);
4202 }
4203
4204 /*
4205 * fsname is name of dataset to rollback (to most recent snapshot)
4206 *
4207 * innvl may contain name of expected target snapshot
4208 *
4209 * outnvl: "target" -> name of most recent snapshot
4210 * }
4211 */
4212 static const zfs_ioc_key_t zfs_keys_rollback[] = {
4213 {"target", DATA_TYPE_STRING, ZK_OPTIONAL},
4214 };
4215
4216 /* ARGSUSED */
4217 static int
4218 zfs_ioc_rollback(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
4219 {
4220 zfsvfs_t *zfsvfs;
4221 zvol_state_handle_t *zv;
4222 char *target = NULL;
4223 int error;
4224
4225 (void) nvlist_lookup_string(innvl, "target", &target);
4226 if (target != NULL) {
4227 const char *cp = strchr(target, '@');
4228
4229 /*
4230 * The snap name must contain an @, and the part after it must
4231 * contain only valid characters.
4232 */
4233 if (cp == NULL ||
4234 zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
4235 return (SET_ERROR(EINVAL));
4236 }
4237
4238 if (getzfsvfs(fsname, &zfsvfs) == 0) {
4239 dsl_dataset_t *ds;
4240
4241 ds = dmu_objset_ds(zfsvfs->z_os);
4242 error = zfs_suspend_fs(zfsvfs);
4243 if (error == 0) {
4244 int resume_err;
4245
4246 error = dsl_dataset_rollback(fsname, target, zfsvfs,
4247 outnvl);
4248 resume_err = zfs_resume_fs(zfsvfs, ds);
4249 error = error ? error : resume_err;
4250 }
4251 zfs_vfs_rele(zfsvfs);
4252 } else if ((zv = zvol_suspend(fsname)) != NULL) {
4253 error = dsl_dataset_rollback(fsname, target, zvol_tag(zv),
4254 outnvl);
4255 zvol_resume(zv);
4256 } else {
4257 error = dsl_dataset_rollback(fsname, target, NULL, outnvl);
4258 }
4259 return (error);
4260 }
4261
4262 static int
4263 recursive_unmount(const char *fsname, void *arg)
4264 {
4265 const char *snapname = arg;
4266 char *fullname;
4267
4268 fullname = kmem_asprintf("%s@%s", fsname, snapname);
4269 zfs_unmount_snap(fullname);
4270 kmem_strfree(fullname);
4271
4272 return (0);
4273 }
4274
4275 /*
4276 *
4277 * snapname is the snapshot to redact.
4278 * innvl: {
4279 * "bookname" -> (string)
4280 * shortname of the redaction bookmark to generate
4281 * "snapnv" -> (nvlist, values ignored)
4282 * snapshots to redact snapname with respect to
4283 * }
4284 *
4285 * outnvl is unused
4286 */
4287
4288 /* ARGSUSED */
4289 static const zfs_ioc_key_t zfs_keys_redact[] = {
4290 {"bookname", DATA_TYPE_STRING, 0},
4291 {"snapnv", DATA_TYPE_NVLIST, 0},
4292 };
4293 static int
4294 zfs_ioc_redact(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
4295 {
4296 nvlist_t *redactnvl = NULL;
4297 char *redactbook = NULL;
4298
4299 if (nvlist_lookup_nvlist(innvl, "snapnv", &redactnvl) != 0)
4300 return (SET_ERROR(EINVAL));
4301 if (fnvlist_num_pairs(redactnvl) == 0)
4302 return (SET_ERROR(ENXIO));
4303 if (nvlist_lookup_string(innvl, "bookname", &redactbook) != 0)
4304 return (SET_ERROR(EINVAL));
4305
4306 return (dmu_redact_snap(snapname, redactnvl, redactbook));
4307 }
4308
4309 /*
4310 * inputs:
4311 * zc_name old name of dataset
4312 * zc_value new name of dataset
4313 * zc_cookie recursive flag (only valid for snapshots)
4314 *
4315 * outputs: none
4316 */
4317 static int
4318 zfs_ioc_rename(zfs_cmd_t *zc)
4319 {
4320 objset_t *os;
4321 dmu_objset_type_t ost;
4322 boolean_t recursive = zc->zc_cookie & 1;
4323 char *at;
4324 int err;
4325
4326 /* "zfs rename" from and to ...%recv datasets should both fail */
4327 zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
4328 zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
4329 if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0 ||
4330 dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
4331 strchr(zc->zc_name, '%') || strchr(zc->zc_value, '%'))
4332 return (SET_ERROR(EINVAL));
4333
4334 err = dmu_objset_hold(zc->zc_name, FTAG, &os);
4335 if (err != 0)
4336 return (err);
4337 ost = dmu_objset_type(os);
4338 dmu_objset_rele(os, FTAG);
4339
4340 at = strchr(zc->zc_name, '@');
4341 if (at != NULL) {
4342 /* snaps must be in same fs */
4343 int error;
4344
4345 if (strncmp(zc->zc_name, zc->zc_value, at - zc->zc_name + 1))
4346 return (SET_ERROR(EXDEV));
4347 *at = '\0';
4348 if (ost == DMU_OST_ZFS) {
4349 error = dmu_objset_find(zc->zc_name,
4350 recursive_unmount, at + 1,
4351 recursive ? DS_FIND_CHILDREN : 0);
4352 if (error != 0) {
4353 *at = '@';
4354 return (error);
4355 }
4356 }
4357 error = dsl_dataset_rename_snapshot(zc->zc_name,
4358 at + 1, strchr(zc->zc_value, '@') + 1, recursive);
4359 *at = '@';
4360
4361 return (error);
4362 } else {
4363 return (dsl_dir_rename(zc->zc_name, zc->zc_value));
4364 }
4365 }
4366
4367 static int
4368 zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
4369 {
4370 const char *propname = nvpair_name(pair);
4371 boolean_t issnap = (strchr(dsname, '@') != NULL);
4372 zfs_prop_t prop = zfs_name_to_prop(propname);
4373 uint64_t intval;
4374 int err;
4375
4376 if (prop == ZPROP_INVAL) {
4377 if (zfs_prop_user(propname)) {
4378 if ((err = zfs_secpolicy_write_perms(dsname,
4379 ZFS_DELEG_PERM_USERPROP, cr)))
4380 return (err);
4381 return (0);
4382 }
4383
4384 if (!issnap && zfs_prop_userquota(propname)) {
4385 const char *perm = NULL;
4386 const char *uq_prefix =
4387 zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
4388 const char *gq_prefix =
4389 zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
4390 const char *uiq_prefix =
4391 zfs_userquota_prop_prefixes[ZFS_PROP_USEROBJQUOTA];
4392 const char *giq_prefix =
4393 zfs_userquota_prop_prefixes[ZFS_PROP_GROUPOBJQUOTA];
4394 const char *pq_prefix =
4395 zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTQUOTA];
4396 const char *piq_prefix = zfs_userquota_prop_prefixes[\
4397 ZFS_PROP_PROJECTOBJQUOTA];
4398
4399 if (strncmp(propname, uq_prefix,
4400 strlen(uq_prefix)) == 0) {
4401 perm = ZFS_DELEG_PERM_USERQUOTA;
4402 } else if (strncmp(propname, uiq_prefix,
4403 strlen(uiq_prefix)) == 0) {
4404 perm = ZFS_DELEG_PERM_USEROBJQUOTA;
4405 } else if (strncmp(propname, gq_prefix,
4406 strlen(gq_prefix)) == 0) {
4407 perm = ZFS_DELEG_PERM_GROUPQUOTA;
4408 } else if (strncmp(propname, giq_prefix,
4409 strlen(giq_prefix)) == 0) {
4410 perm = ZFS_DELEG_PERM_GROUPOBJQUOTA;
4411 } else if (strncmp(propname, pq_prefix,
4412 strlen(pq_prefix)) == 0) {
4413 perm = ZFS_DELEG_PERM_PROJECTQUOTA;
4414 } else if (strncmp(propname, piq_prefix,
4415 strlen(piq_prefix)) == 0) {
4416 perm = ZFS_DELEG_PERM_PROJECTOBJQUOTA;
4417 } else {
4418 /* {USER|GROUP|PROJECT}USED are read-only */
4419 return (SET_ERROR(EINVAL));
4420 }
4421
4422 if ((err = zfs_secpolicy_write_perms(dsname, perm, cr)))
4423 return (err);
4424 return (0);
4425 }
4426
4427 return (SET_ERROR(EINVAL));
4428 }
4429
4430 if (issnap)
4431 return (SET_ERROR(EINVAL));
4432
4433 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
4434 /*
4435 * dsl_prop_get_all_impl() returns properties in this
4436 * format.
4437 */
4438 nvlist_t *attrs;
4439 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
4440 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4441 &pair) == 0);
4442 }
4443
4444 /*
4445 * Check that this value is valid for this pool version
4446 */
4447 switch (prop) {
4448 case ZFS_PROP_COMPRESSION:
4449 /*
4450 * If the user specified gzip compression, make sure
4451 * the SPA supports it. We ignore any errors here since
4452 * we'll catch them later.
4453 */
4454 if (nvpair_value_uint64(pair, &intval) == 0) {
4455 if (intval >= ZIO_COMPRESS_GZIP_1 &&
4456 intval <= ZIO_COMPRESS_GZIP_9 &&
4457 zfs_earlier_version(dsname,
4458 SPA_VERSION_GZIP_COMPRESSION)) {
4459 return (SET_ERROR(ENOTSUP));
4460 }
4461
4462 if (intval == ZIO_COMPRESS_ZLE &&
4463 zfs_earlier_version(dsname,
4464 SPA_VERSION_ZLE_COMPRESSION))
4465 return (SET_ERROR(ENOTSUP));
4466
4467 if (intval == ZIO_COMPRESS_LZ4) {
4468 spa_t *spa;
4469
4470 if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4471 return (err);
4472
4473 if (!spa_feature_is_enabled(spa,
4474 SPA_FEATURE_LZ4_COMPRESS)) {
4475 spa_close(spa, FTAG);
4476 return (SET_ERROR(ENOTSUP));
4477 }
4478 spa_close(spa, FTAG);
4479 }
4480
4481 /*
4482 * If this is a bootable dataset then
4483 * verify that the compression algorithm
4484 * is supported for booting. We must return
4485 * something other than ENOTSUP since it
4486 * implies a downrev pool version.
4487 */
4488 if (zfs_is_bootfs(dsname) &&
4489 !BOOTFS_COMPRESS_VALID(intval)) {
4490 return (SET_ERROR(ERANGE));
4491 }
4492 }
4493 break;
4494
4495 case ZFS_PROP_COPIES:
4496 if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
4497 return (SET_ERROR(ENOTSUP));
4498 break;
4499
4500 case ZFS_PROP_VOLBLOCKSIZE:
4501 case ZFS_PROP_RECORDSIZE:
4502 /* Record sizes above 128k need the feature to be enabled */
4503 if (nvpair_value_uint64(pair, &intval) == 0 &&
4504 intval > SPA_OLD_MAXBLOCKSIZE) {
4505 spa_t *spa;
4506
4507 /*
4508 * We don't allow setting the property above 1MB,
4509 * unless the tunable has been changed.
4510 */
4511 if (intval > zfs_max_recordsize ||
4512 intval > SPA_MAXBLOCKSIZE)
4513 return (SET_ERROR(ERANGE));
4514
4515 if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4516 return (err);
4517
4518 if (!spa_feature_is_enabled(spa,
4519 SPA_FEATURE_LARGE_BLOCKS)) {
4520 spa_close(spa, FTAG);
4521 return (SET_ERROR(ENOTSUP));
4522 }
4523 spa_close(spa, FTAG);
4524 }
4525 break;
4526
4527 case ZFS_PROP_DNODESIZE:
4528 /* Dnode sizes above 512 need the feature to be enabled */
4529 if (nvpair_value_uint64(pair, &intval) == 0 &&
4530 intval != ZFS_DNSIZE_LEGACY) {
4531 spa_t *spa;
4532
4533 /*
4534 * If this is a bootable dataset then
4535 * we don't allow large (>512B) dnodes,
4536 * because GRUB doesn't support them.
4537 */
4538 if (zfs_is_bootfs(dsname) &&
4539 intval != ZFS_DNSIZE_LEGACY) {
4540 return (SET_ERROR(EDOM));
4541 }
4542
4543 if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4544 return (err);
4545
4546 if (!spa_feature_is_enabled(spa,
4547 SPA_FEATURE_LARGE_DNODE)) {
4548 spa_close(spa, FTAG);
4549 return (SET_ERROR(ENOTSUP));
4550 }
4551 spa_close(spa, FTAG);
4552 }
4553 break;
4554
4555 case ZFS_PROP_SPECIAL_SMALL_BLOCKS:
4556 /*
4557 * This property could require the allocation classes
4558 * feature to be active for setting, however we allow
4559 * it so that tests of settable properties succeed.
4560 * The CLI will issue a warning in this case.
4561 */
4562 break;
4563
4564 case ZFS_PROP_SHARESMB:
4565 if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
4566 return (SET_ERROR(ENOTSUP));
4567 break;
4568
4569 case ZFS_PROP_ACLINHERIT:
4570 if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
4571 nvpair_value_uint64(pair, &intval) == 0) {
4572 if (intval == ZFS_ACL_PASSTHROUGH_X &&
4573 zfs_earlier_version(dsname,
4574 SPA_VERSION_PASSTHROUGH_X))
4575 return (SET_ERROR(ENOTSUP));
4576 }
4577 break;
4578 case ZFS_PROP_CHECKSUM:
4579 case ZFS_PROP_DEDUP:
4580 {
4581 spa_feature_t feature;
4582 spa_t *spa;
4583 int err;
4584
4585 /* dedup feature version checks */
4586 if (prop == ZFS_PROP_DEDUP &&
4587 zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
4588 return (SET_ERROR(ENOTSUP));
4589
4590 if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
4591 nvpair_value_uint64(pair, &intval) == 0) {
4592 /* check prop value is enabled in features */
4593 feature = zio_checksum_to_feature(
4594 intval & ZIO_CHECKSUM_MASK);
4595 if (feature == SPA_FEATURE_NONE)
4596 break;
4597
4598 if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4599 return (err);
4600
4601 if (!spa_feature_is_enabled(spa, feature)) {
4602 spa_close(spa, FTAG);
4603 return (SET_ERROR(ENOTSUP));
4604 }
4605 spa_close(spa, FTAG);
4606 }
4607 break;
4608 }
4609
4610 default:
4611 break;
4612 }
4613
4614 return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
4615 }
4616
4617 /*
4618 * Removes properties from the given props list that fail permission checks
4619 * needed to clear them and to restore them in case of a receive error. For each
4620 * property, make sure we have both set and inherit permissions.
4621 *
4622 * Returns the first error encountered if any permission checks fail. If the
4623 * caller provides a non-NULL errlist, it also gives the complete list of names
4624 * of all the properties that failed a permission check along with the
4625 * corresponding error numbers. The caller is responsible for freeing the
4626 * returned errlist.
4627 *
4628 * If every property checks out successfully, zero is returned and the list
4629 * pointed at by errlist is NULL.
4630 */
4631 static int
4632 zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist)
4633 {
4634 zfs_cmd_t *zc;
4635 nvpair_t *pair, *next_pair;
4636 nvlist_t *errors;
4637 int err, rv = 0;
4638
4639 if (props == NULL)
4640 return (0);
4641
4642 VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4643
4644 zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
4645 (void) strlcpy(zc->zc_name, dataset, sizeof (zc->zc_name));
4646 pair = nvlist_next_nvpair(props, NULL);
4647 while (pair != NULL) {
4648 next_pair = nvlist_next_nvpair(props, pair);
4649
4650 (void) strlcpy(zc->zc_value, nvpair_name(pair),
4651 sizeof (zc->zc_value));
4652 if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
4653 (err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) {
4654 VERIFY(nvlist_remove_nvpair(props, pair) == 0);
4655 VERIFY(nvlist_add_int32(errors,
4656 zc->zc_value, err) == 0);
4657 }
4658 pair = next_pair;
4659 }
4660 kmem_free(zc, sizeof (zfs_cmd_t));
4661
4662 if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
4663 nvlist_free(errors);
4664 errors = NULL;
4665 } else {
4666 VERIFY(nvpair_value_int32(pair, &rv) == 0);
4667 }
4668
4669 if (errlist == NULL)
4670 nvlist_free(errors);
4671 else
4672 *errlist = errors;
4673
4674 return (rv);
4675 }
4676
4677 static boolean_t
4678 propval_equals(nvpair_t *p1, nvpair_t *p2)
4679 {
4680 if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
4681 /* dsl_prop_get_all_impl() format */
4682 nvlist_t *attrs;
4683 VERIFY(nvpair_value_nvlist(p1, &attrs) == 0);
4684 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4685 &p1) == 0);
4686 }
4687
4688 if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
4689 nvlist_t *attrs;
4690 VERIFY(nvpair_value_nvlist(p2, &attrs) == 0);
4691 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4692 &p2) == 0);
4693 }
4694
4695 if (nvpair_type(p1) != nvpair_type(p2))
4696 return (B_FALSE);
4697
4698 if (nvpair_type(p1) == DATA_TYPE_STRING) {
4699 char *valstr1, *valstr2;
4700
4701 VERIFY(nvpair_value_string(p1, (char **)&valstr1) == 0);
4702 VERIFY(nvpair_value_string(p2, (char **)&valstr2) == 0);
4703 return (strcmp(valstr1, valstr2) == 0);
4704 } else {
4705 uint64_t intval1, intval2;
4706
4707 VERIFY(nvpair_value_uint64(p1, &intval1) == 0);
4708 VERIFY(nvpair_value_uint64(p2, &intval2) == 0);
4709 return (intval1 == intval2);
4710 }
4711 }
4712
4713 /*
4714 * Remove properties from props if they are not going to change (as determined
4715 * by comparison with origprops). Remove them from origprops as well, since we
4716 * do not need to clear or restore properties that won't change.
4717 */
4718 static void
4719 props_reduce(nvlist_t *props, nvlist_t *origprops)
4720 {
4721 nvpair_t *pair, *next_pair;
4722
4723 if (origprops == NULL)
4724 return; /* all props need to be received */
4725
4726 pair = nvlist_next_nvpair(props, NULL);
4727 while (pair != NULL) {
4728 const char *propname = nvpair_name(pair);
4729 nvpair_t *match;
4730
4731 next_pair = nvlist_next_nvpair(props, pair);
4732
4733 if ((nvlist_lookup_nvpair(origprops, propname,
4734 &match) != 0) || !propval_equals(pair, match))
4735 goto next; /* need to set received value */
4736
4737 /* don't clear the existing received value */
4738 (void) nvlist_remove_nvpair(origprops, match);
4739 /* don't bother receiving the property */
4740 (void) nvlist_remove_nvpair(props, pair);
4741 next:
4742 pair = next_pair;
4743 }
4744 }
4745
4746 /*
4747 * Extract properties that cannot be set PRIOR to the receipt of a dataset.
4748 * For example, refquota cannot be set until after the receipt of a dataset,
4749 * because in replication streams, an older/earlier snapshot may exceed the
4750 * refquota. We want to receive the older/earlier snapshot, but setting
4751 * refquota pre-receipt will set the dsl's ACTUAL quota, which will prevent
4752 * the older/earlier snapshot from being received (with EDQUOT).
4753 *
4754 * The ZFS test "zfs_receive_011_pos" demonstrates such a scenario.
4755 *
4756 * libzfs will need to be judicious handling errors encountered by props
4757 * extracted by this function.
4758 */
4759 static nvlist_t *
4760 extract_delay_props(nvlist_t *props)
4761 {
4762 nvlist_t *delayprops;
4763 nvpair_t *nvp, *tmp;
4764 static const zfs_prop_t delayable[] = {
4765 ZFS_PROP_REFQUOTA,
4766 ZFS_PROP_KEYLOCATION,
4767 0
4768 };
4769 int i;
4770
4771 VERIFY(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4772
4773 for (nvp = nvlist_next_nvpair(props, NULL); nvp != NULL;
4774 nvp = nvlist_next_nvpair(props, nvp)) {
4775 /*
4776 * strcmp() is safe because zfs_prop_to_name() always returns
4777 * a bounded string.
4778 */
4779 for (i = 0; delayable[i] != 0; i++) {
4780 if (strcmp(zfs_prop_to_name(delayable[i]),
4781 nvpair_name(nvp)) == 0) {
4782 break;
4783 }
4784 }
4785 if (delayable[i] != 0) {
4786 tmp = nvlist_prev_nvpair(props, nvp);
4787 VERIFY(nvlist_add_nvpair(delayprops, nvp) == 0);
4788 VERIFY(nvlist_remove_nvpair(props, nvp) == 0);
4789 nvp = tmp;
4790 }
4791 }
4792
4793 if (nvlist_empty(delayprops)) {
4794 nvlist_free(delayprops);
4795 delayprops = NULL;
4796 }
4797 return (delayprops);
4798 }
4799
4800 static void
4801 zfs_allow_log_destroy(void *arg)
4802 {
4803 char *poolname = arg;
4804
4805 if (poolname != NULL)
4806 kmem_strfree(poolname);
4807 }
4808
4809 #ifdef ZFS_DEBUG
4810 static boolean_t zfs_ioc_recv_inject_err;
4811 #endif
4812
4813 /*
4814 * nvlist 'errors' is always allocated. It will contain descriptions of
4815 * encountered errors, if any. It's the callers responsibility to free.
4816 */
4817 static int
4818 zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
4819 nvlist_t *localprops, nvlist_t *hidden_args, boolean_t force,
4820 boolean_t resumable, int input_fd,
4821 dmu_replay_record_t *begin_record, uint64_t *read_bytes,
4822 uint64_t *errflags, nvlist_t **errors)
4823 {
4824 dmu_recv_cookie_t drc;
4825 int error = 0;
4826 int props_error = 0;
4827 offset_t off, noff;
4828 nvlist_t *local_delayprops = NULL;
4829 nvlist_t *recv_delayprops = NULL;
4830 nvlist_t *origprops = NULL; /* existing properties */
4831 nvlist_t *origrecvd = NULL; /* existing received properties */
4832 boolean_t first_recvd_props = B_FALSE;
4833 boolean_t tofs_was_redacted;
4834 zfs_file_t *input_fp;
4835
4836 *read_bytes = 0;
4837 *errflags = 0;
4838 *errors = fnvlist_alloc();
4839 off = 0;
4840
4841 if ((error = zfs_file_get(input_fd, &input_fp)))
4842 return (error);
4843
4844 noff = off = zfs_file_off(input_fp);
4845 error = dmu_recv_begin(tofs, tosnap, begin_record, force,
4846 resumable, localprops, hidden_args, origin, &drc, input_fp,
4847 &off);
4848 if (error != 0)
4849 goto out;
4850 tofs_was_redacted = dsl_get_redacted(drc.drc_ds);
4851
4852 /*
4853 * Set properties before we receive the stream so that they are applied
4854 * to the new data. Note that we must call dmu_recv_stream() if
4855 * dmu_recv_begin() succeeds.
4856 */
4857 if (recvprops != NULL && !drc.drc_newfs) {
4858 if (spa_version(dsl_dataset_get_spa(drc.drc_ds)) >=
4859 SPA_VERSION_RECVD_PROPS &&
4860 !dsl_prop_get_hasrecvd(tofs))
4861 first_recvd_props = B_TRUE;
4862
4863 /*
4864 * If new received properties are supplied, they are to
4865 * completely replace the existing received properties,
4866 * so stash away the existing ones.
4867 */
4868 if (dsl_prop_get_received(tofs, &origrecvd) == 0) {
4869 nvlist_t *errlist = NULL;
4870 /*
4871 * Don't bother writing a property if its value won't
4872 * change (and avoid the unnecessary security checks).
4873 *
4874 * The first receive after SPA_VERSION_RECVD_PROPS is a
4875 * special case where we blow away all local properties
4876 * regardless.
4877 */
4878 if (!first_recvd_props)
4879 props_reduce(recvprops, origrecvd);
4880 if (zfs_check_clearable(tofs, origrecvd, &errlist) != 0)
4881 (void) nvlist_merge(*errors, errlist, 0);
4882 nvlist_free(errlist);
4883
4884 if (clear_received_props(tofs, origrecvd,
4885 first_recvd_props ? NULL : recvprops) != 0)
4886 *errflags |= ZPROP_ERR_NOCLEAR;
4887 } else {
4888 *errflags |= ZPROP_ERR_NOCLEAR;
4889 }
4890 }
4891
4892 /*
4893 * Stash away existing properties so we can restore them on error unless
4894 * we're doing the first receive after SPA_VERSION_RECVD_PROPS, in which
4895 * case "origrecvd" will take care of that.
4896 */
4897 if (localprops != NULL && !drc.drc_newfs && !first_recvd_props) {
4898 objset_t *os;
4899 if (dmu_objset_hold(tofs, FTAG, &os) == 0) {
4900 if (dsl_prop_get_all(os, &origprops) != 0) {
4901 *errflags |= ZPROP_ERR_NOCLEAR;
4902 }
4903 dmu_objset_rele(os, FTAG);
4904 } else {
4905 *errflags |= ZPROP_ERR_NOCLEAR;
4906 }
4907 }
4908
4909 if (recvprops != NULL) {
4910 props_error = dsl_prop_set_hasrecvd(tofs);
4911
4912 if (props_error == 0) {
4913 recv_delayprops = extract_delay_props(recvprops);
4914 (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
4915 recvprops, *errors);
4916 }
4917 }
4918
4919 if (localprops != NULL) {
4920 nvlist_t *oprops = fnvlist_alloc();
4921 nvlist_t *xprops = fnvlist_alloc();
4922 nvpair_t *nvp = NULL;
4923
4924 while ((nvp = nvlist_next_nvpair(localprops, nvp)) != NULL) {
4925 if (nvpair_type(nvp) == DATA_TYPE_BOOLEAN) {
4926 /* -x property */
4927 const char *name = nvpair_name(nvp);
4928 zfs_prop_t prop = zfs_name_to_prop(name);
4929 if (prop != ZPROP_INVAL) {
4930 if (!zfs_prop_inheritable(prop))
4931 continue;
4932 } else if (!zfs_prop_user(name))
4933 continue;
4934 fnvlist_add_boolean(xprops, name);
4935 } else {
4936 /* -o property=value */
4937 fnvlist_add_nvpair(oprops, nvp);
4938 }
4939 }
4940
4941 local_delayprops = extract_delay_props(oprops);
4942 (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL,
4943 oprops, *errors);
4944 (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED,
4945 xprops, *errors);
4946
4947 nvlist_free(oprops);
4948 nvlist_free(xprops);
4949 }
4950
4951 error = dmu_recv_stream(&drc, &off);
4952
4953 if (error == 0) {
4954 zfsvfs_t *zfsvfs = NULL;
4955 zvol_state_handle_t *zv = NULL;
4956
4957 if (getzfsvfs(tofs, &zfsvfs) == 0) {
4958 /* online recv */
4959 dsl_dataset_t *ds;
4960 int end_err;
4961 boolean_t stream_is_redacted = DMU_GET_FEATUREFLAGS(
4962 begin_record->drr_u.drr_begin.
4963 drr_versioninfo) & DMU_BACKUP_FEATURE_REDACTED;
4964
4965 ds = dmu_objset_ds(zfsvfs->z_os);
4966 error = zfs_suspend_fs(zfsvfs);
4967 /*
4968 * If the suspend fails, then the recv_end will
4969 * likely also fail, and clean up after itself.
4970 */
4971 end_err = dmu_recv_end(&drc, zfsvfs);
4972 /*
4973 * If the dataset was not redacted, but we received a
4974 * redacted stream onto it, we need to unmount the
4975 * dataset. Otherwise, resume the filesystem.
4976 */
4977 if (error == 0 && !drc.drc_newfs &&
4978 stream_is_redacted && !tofs_was_redacted) {
4979 error = zfs_end_fs(zfsvfs, ds);
4980 } else if (error == 0) {
4981 error = zfs_resume_fs(zfsvfs, ds);
4982 }
4983 error = error ? error : end_err;
4984 zfs_vfs_rele(zfsvfs);
4985 } else if ((zv = zvol_suspend(tofs)) != NULL) {
4986 error = dmu_recv_end(&drc, zvol_tag(zv));
4987 zvol_resume(zv);
4988 } else {
4989 error = dmu_recv_end(&drc, NULL);
4990 }
4991
4992 /* Set delayed properties now, after we're done receiving. */
4993 if (recv_delayprops != NULL && error == 0) {
4994 (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
4995 recv_delayprops, *errors);
4996 }
4997 if (local_delayprops != NULL && error == 0) {
4998 (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL,
4999 local_delayprops, *errors);
5000 }
5001 }
5002
5003 /*
5004 * Merge delayed props back in with initial props, in case
5005 * we're DEBUG and zfs_ioc_recv_inject_err is set (which means
5006 * we have to make sure clear_received_props() includes
5007 * the delayed properties).
5008 *
5009 * Since zfs_ioc_recv_inject_err is only in DEBUG kernels,
5010 * using ASSERT() will be just like a VERIFY.
5011 */
5012 if (recv_delayprops != NULL) {
5013 ASSERT(nvlist_merge(recvprops, recv_delayprops, 0) == 0);
5014 nvlist_free(recv_delayprops);
5015 }
5016 if (local_delayprops != NULL) {
5017 ASSERT(nvlist_merge(localprops, local_delayprops, 0) == 0);
5018 nvlist_free(local_delayprops);
5019 }
5020 *read_bytes = off - noff;
5021
5022 #ifdef ZFS_DEBUG
5023 if (zfs_ioc_recv_inject_err) {
5024 zfs_ioc_recv_inject_err = B_FALSE;
5025 error = 1;
5026 }
5027 #endif
5028
5029 /*
5030 * On error, restore the original props.
5031 */
5032 if (error != 0 && recvprops != NULL && !drc.drc_newfs) {
5033 if (clear_received_props(tofs, recvprops, NULL) != 0) {
5034 /*
5035 * We failed to clear the received properties.
5036 * Since we may have left a $recvd value on the
5037 * system, we can't clear the $hasrecvd flag.
5038 */
5039 *errflags |= ZPROP_ERR_NORESTORE;
5040 } else if (first_recvd_props) {
5041 dsl_prop_unset_hasrecvd(tofs);
5042 }
5043
5044 if (origrecvd == NULL && !drc.drc_newfs) {
5045 /* We failed to stash the original properties. */
5046 *errflags |= ZPROP_ERR_NORESTORE;
5047 }
5048
5049 /*
5050 * dsl_props_set() will not convert RECEIVED to LOCAL on or
5051 * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
5052 * explicitly if we're restoring local properties cleared in the
5053 * first new-style receive.
5054 */
5055 if (origrecvd != NULL &&
5056 zfs_set_prop_nvlist(tofs, (first_recvd_props ?
5057 ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
5058 origrecvd, NULL) != 0) {
5059 /*
5060 * We stashed the original properties but failed to
5061 * restore them.
5062 */
5063 *errflags |= ZPROP_ERR_NORESTORE;
5064 }
5065 }
5066 if (error != 0 && localprops != NULL && !drc.drc_newfs &&
5067 !first_recvd_props) {
5068 nvlist_t *setprops;
5069 nvlist_t *inheritprops;
5070 nvpair_t *nvp;
5071
5072 if (origprops == NULL) {
5073 /* We failed to stash the original properties. */
5074 *errflags |= ZPROP_ERR_NORESTORE;
5075 goto out;
5076 }
5077
5078 /* Restore original props */
5079 setprops = fnvlist_alloc();
5080 inheritprops = fnvlist_alloc();
5081 nvp = NULL;
5082 while ((nvp = nvlist_next_nvpair(localprops, nvp)) != NULL) {
5083 const char *name = nvpair_name(nvp);
5084 const char *source;
5085 nvlist_t *attrs;
5086
5087 if (!nvlist_exists(origprops, name)) {
5088 /*
5089 * Property was not present or was explicitly
5090 * inherited before the receive, restore this.
5091 */
5092 fnvlist_add_boolean(inheritprops, name);
5093 continue;
5094 }
5095 attrs = fnvlist_lookup_nvlist(origprops, name);
5096 source = fnvlist_lookup_string(attrs, ZPROP_SOURCE);
5097
5098 /* Skip received properties */
5099 if (strcmp(source, ZPROP_SOURCE_VAL_RECVD) == 0)
5100 continue;
5101
5102 if (strcmp(source, tofs) == 0) {
5103 /* Property was locally set */
5104 fnvlist_add_nvlist(setprops, name, attrs);
5105 } else {
5106 /* Property was implicitly inherited */
5107 fnvlist_add_boolean(inheritprops, name);
5108 }
5109 }
5110
5111 if (zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL, setprops,
5112 NULL) != 0)
5113 *errflags |= ZPROP_ERR_NORESTORE;
5114 if (zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED, inheritprops,
5115 NULL) != 0)
5116 *errflags |= ZPROP_ERR_NORESTORE;
5117
5118 nvlist_free(setprops);
5119 nvlist_free(inheritprops);
5120 }
5121 out:
5122 zfs_file_put(input_fd);
5123 nvlist_free(origrecvd);
5124 nvlist_free(origprops);
5125
5126 if (error == 0)
5127 error = props_error;
5128
5129 return (error);
5130 }
5131
5132 /*
5133 * inputs:
5134 * zc_name name of containing filesystem (unused)
5135 * zc_nvlist_src{_size} nvlist of properties to apply
5136 * zc_nvlist_conf{_size} nvlist of properties to exclude
5137 * (DATA_TYPE_BOOLEAN) and override (everything else)
5138 * zc_value name of snapshot to create
5139 * zc_string name of clone origin (if DRR_FLAG_CLONE)
5140 * zc_cookie file descriptor to recv from
5141 * zc_begin_record the BEGIN record of the stream (not byteswapped)
5142 * zc_guid force flag
5143 *
5144 * outputs:
5145 * zc_cookie number of bytes read
5146 * zc_obj zprop_errflags_t
5147 * zc_nvlist_dst{_size} error for each unapplied received property
5148 */
5149 static int
5150 zfs_ioc_recv(zfs_cmd_t *zc)
5151 {
5152 dmu_replay_record_t begin_record;
5153 nvlist_t *errors = NULL;
5154 nvlist_t *recvdprops = NULL;
5155 nvlist_t *localprops = NULL;
5156 char *origin = NULL;
5157 char *tosnap;
5158 char tofs[ZFS_MAX_DATASET_NAME_LEN];
5159 int error = 0;
5160
5161 if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
5162 strchr(zc->zc_value, '@') == NULL ||
5163 strchr(zc->zc_value, '%'))
5164 return (SET_ERROR(EINVAL));
5165
5166 (void) strlcpy(tofs, zc->zc_value, sizeof (tofs));
5167 tosnap = strchr(tofs, '@');
5168 *tosnap++ = '\0';
5169
5170 if (zc->zc_nvlist_src != 0 &&
5171 (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
5172 zc->zc_iflags, &recvdprops)) != 0)
5173 return (error);
5174
5175 if (zc->zc_nvlist_conf != 0 &&
5176 (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
5177 zc->zc_iflags, &localprops)) != 0)
5178 return (error);
5179
5180 if (zc->zc_string[0])
5181 origin = zc->zc_string;
5182
5183 begin_record.drr_type = DRR_BEGIN;
5184 begin_record.drr_payloadlen = 0;
5185 begin_record.drr_u.drr_begin = zc->zc_begin_record;
5186
5187 error = zfs_ioc_recv_impl(tofs, tosnap, origin, recvdprops, localprops,
5188 NULL, zc->zc_guid, B_FALSE, zc->zc_cookie, &begin_record,
5189 &zc->zc_cookie, &zc->zc_obj, &errors);
5190 nvlist_free(recvdprops);
5191 nvlist_free(localprops);
5192
5193 /*
5194 * Now that all props, initial and delayed, are set, report the prop
5195 * errors to the caller.
5196 */
5197 if (zc->zc_nvlist_dst_size != 0 && errors != NULL &&
5198 (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
5199 put_nvlist(zc, errors) != 0)) {
5200 /*
5201 * Caller made zc->zc_nvlist_dst less than the minimum expected
5202 * size or supplied an invalid address.
5203 */
5204 error = SET_ERROR(EINVAL);
5205 }
5206
5207 nvlist_free(errors);
5208
5209 return (error);
5210 }
5211
5212 /*
5213 * innvl: {
5214 * "snapname" -> full name of the snapshot to create
5215 * (optional) "props" -> received properties to set (nvlist)
5216 * (optional) "localprops" -> override and exclude properties (nvlist)
5217 * (optional) "origin" -> name of clone origin (DRR_FLAG_CLONE)
5218 * "begin_record" -> non-byteswapped dmu_replay_record_t
5219 * "input_fd" -> file descriptor to read stream from (int32)
5220 * (optional) "force" -> force flag (value ignored)
5221 * (optional) "resumable" -> resumable flag (value ignored)
5222 * (optional) "cleanup_fd" -> unused
5223 * (optional) "action_handle" -> unused
5224 * (optional) "hidden_args" -> { "wkeydata" -> value }
5225 * }
5226 *
5227 * outnvl: {
5228 * "read_bytes" -> number of bytes read
5229 * "error_flags" -> zprop_errflags_t
5230 * "errors" -> error for each unapplied received property (nvlist)
5231 * }
5232 */
5233 static const zfs_ioc_key_t zfs_keys_recv_new[] = {
5234 {"snapname", DATA_TYPE_STRING, 0},
5235 {"props", DATA_TYPE_NVLIST, ZK_OPTIONAL},
5236 {"localprops", DATA_TYPE_NVLIST, ZK_OPTIONAL},
5237 {"origin", DATA_TYPE_STRING, ZK_OPTIONAL},
5238 {"begin_record", DATA_TYPE_BYTE_ARRAY, 0},
5239 {"input_fd", DATA_TYPE_INT32, 0},
5240 {"force", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
5241 {"resumable", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
5242 {"cleanup_fd", DATA_TYPE_INT32, ZK_OPTIONAL},
5243 {"action_handle", DATA_TYPE_UINT64, ZK_OPTIONAL},
5244 {"hidden_args", DATA_TYPE_NVLIST, ZK_OPTIONAL},
5245 };
5246
5247 static int
5248 zfs_ioc_recv_new(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
5249 {
5250 dmu_replay_record_t *begin_record;
5251 uint_t begin_record_size;
5252 nvlist_t *errors = NULL;
5253 nvlist_t *recvprops = NULL;
5254 nvlist_t *localprops = NULL;
5255 nvlist_t *hidden_args = NULL;
5256 char *snapname;
5257 char *origin = NULL;
5258 char *tosnap;
5259 char tofs[ZFS_MAX_DATASET_NAME_LEN];
5260 boolean_t force;
5261 boolean_t resumable;
5262 uint64_t read_bytes = 0;
5263 uint64_t errflags = 0;
5264 int input_fd = -1;
5265 int error;
5266
5267 snapname = fnvlist_lookup_string(innvl, "snapname");
5268
5269 if (dataset_namecheck(snapname, NULL, NULL) != 0 ||
5270 strchr(snapname, '@') == NULL ||
5271 strchr(snapname, '%'))
5272 return (SET_ERROR(EINVAL));
5273
5274 (void) strlcpy(tofs, snapname, sizeof (tofs));
5275 tosnap = strchr(tofs, '@');
5276 *tosnap++ = '\0';
5277
5278 error = nvlist_lookup_string(innvl, "origin", &origin);
5279 if (error && error != ENOENT)
5280 return (error);
5281
5282 error = nvlist_lookup_byte_array(innvl, "begin_record",
5283 (uchar_t **)&begin_record, &begin_record_size);
5284 if (error != 0 || begin_record_size != sizeof (*begin_record))
5285 return (SET_ERROR(EINVAL));
5286
5287 input_fd = fnvlist_lookup_int32(innvl, "input_fd");
5288
5289 force = nvlist_exists(innvl, "force");
5290 resumable = nvlist_exists(innvl, "resumable");
5291
5292 /* we still use "props" here for backwards compatibility */
5293 error = nvlist_lookup_nvlist(innvl, "props", &recvprops);
5294 if (error && error != ENOENT)
5295 return (error);
5296
5297 error = nvlist_lookup_nvlist(innvl, "localprops", &localprops);
5298 if (error && error != ENOENT)
5299 return (error);
5300
5301 error = nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
5302 if (error && error != ENOENT)
5303 return (error);
5304
5305 error = zfs_ioc_recv_impl(tofs, tosnap, origin, recvprops, localprops,
5306 hidden_args, force, resumable, input_fd, begin_record,
5307 &read_bytes, &errflags, &errors);
5308
5309 fnvlist_add_uint64(outnvl, "read_bytes", read_bytes);
5310 fnvlist_add_uint64(outnvl, "error_flags", errflags);
5311 fnvlist_add_nvlist(outnvl, "errors", errors);
5312
5313 nvlist_free(errors);
5314 nvlist_free(recvprops);
5315 nvlist_free(localprops);
5316
5317 return (error);
5318 }
5319
5320 typedef struct dump_bytes_io {
5321 zfs_file_t *dbi_fp;
5322 caddr_t dbi_buf;
5323 int dbi_len;
5324 int dbi_err;
5325 } dump_bytes_io_t;
5326
5327 static void
5328 dump_bytes_cb(void *arg)
5329 {
5330 dump_bytes_io_t *dbi = (dump_bytes_io_t *)arg;
5331 zfs_file_t *fp;
5332 caddr_t buf;
5333
5334 fp = dbi->dbi_fp;
5335 buf = dbi->dbi_buf;
5336
5337 dbi->dbi_err = zfs_file_write(fp, buf, dbi->dbi_len, NULL);
5338 }
5339
5340 static int
5341 dump_bytes(objset_t *os, void *buf, int len, void *arg)
5342 {
5343 dump_bytes_io_t dbi;
5344
5345 dbi.dbi_fp = arg;
5346 dbi.dbi_buf = buf;
5347 dbi.dbi_len = len;
5348
5349 #if defined(HAVE_LARGE_STACKS)
5350 dump_bytes_cb(&dbi);
5351 #else
5352 /*
5353 * The vn_rdwr() call is performed in a taskq to ensure that there is
5354 * always enough stack space to write safely to the target filesystem.
5355 * The ZIO_TYPE_FREE threads are used because there can be a lot of
5356 * them and they are used in vdev_file.c for a similar purpose.
5357 */
5358 spa_taskq_dispatch_sync(dmu_objset_spa(os), ZIO_TYPE_FREE,
5359 ZIO_TASKQ_ISSUE, dump_bytes_cb, &dbi, TQ_SLEEP);
5360 #endif /* HAVE_LARGE_STACKS */
5361
5362 return (dbi.dbi_err);
5363 }
5364
5365 /*
5366 * inputs:
5367 * zc_name name of snapshot to send
5368 * zc_cookie file descriptor to send stream to
5369 * zc_obj fromorigin flag (mutually exclusive with zc_fromobj)
5370 * zc_sendobj objsetid of snapshot to send
5371 * zc_fromobj objsetid of incremental fromsnap (may be zero)
5372 * zc_guid if set, estimate size of stream only. zc_cookie is ignored.
5373 * output size in zc_objset_type.
5374 * zc_flags lzc_send_flags
5375 *
5376 * outputs:
5377 * zc_objset_type estimated size, if zc_guid is set
5378 *
5379 * NOTE: This is no longer the preferred interface, any new functionality
5380 * should be added to zfs_ioc_send_new() instead.
5381 */
5382 static int
5383 zfs_ioc_send(zfs_cmd_t *zc)
5384 {
5385 int error;
5386 offset_t off;
5387 boolean_t estimate = (zc->zc_guid != 0);
5388 boolean_t embedok = (zc->zc_flags & 0x1);
5389 boolean_t large_block_ok = (zc->zc_flags & 0x2);
5390 boolean_t compressok = (zc->zc_flags & 0x4);
5391 boolean_t rawok = (zc->zc_flags & 0x8);
5392 boolean_t savedok = (zc->zc_flags & 0x10);
5393
5394 if (zc->zc_obj != 0) {
5395 dsl_pool_t *dp;
5396 dsl_dataset_t *tosnap;
5397
5398 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5399 if (error != 0)
5400 return (error);
5401
5402 error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
5403 if (error != 0) {
5404 dsl_pool_rele(dp, FTAG);
5405 return (error);
5406 }
5407
5408 if (dsl_dir_is_clone(tosnap->ds_dir))
5409 zc->zc_fromobj =
5410 dsl_dir_phys(tosnap->ds_dir)->dd_origin_obj;
5411 dsl_dataset_rele(tosnap, FTAG);
5412 dsl_pool_rele(dp, FTAG);
5413 }
5414
5415 if (estimate) {
5416 dsl_pool_t *dp;
5417 dsl_dataset_t *tosnap;
5418 dsl_dataset_t *fromsnap = NULL;
5419
5420 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5421 if (error != 0)
5422 return (error);
5423
5424 error = dsl_dataset_hold_obj(dp, zc->zc_sendobj,
5425 FTAG, &tosnap);
5426 if (error != 0) {
5427 dsl_pool_rele(dp, FTAG);
5428 return (error);
5429 }
5430
5431 if (zc->zc_fromobj != 0) {
5432 error = dsl_dataset_hold_obj(dp, zc->zc_fromobj,
5433 FTAG, &fromsnap);
5434 if (error != 0) {
5435 dsl_dataset_rele(tosnap, FTAG);
5436 dsl_pool_rele(dp, FTAG);
5437 return (error);
5438 }
5439 }
5440
5441 error = dmu_send_estimate_fast(tosnap, fromsnap, NULL,
5442 compressok || rawok, savedok, &zc->zc_objset_type);
5443
5444 if (fromsnap != NULL)
5445 dsl_dataset_rele(fromsnap, FTAG);
5446 dsl_dataset_rele(tosnap, FTAG);
5447 dsl_pool_rele(dp, FTAG);
5448 } else {
5449 zfs_file_t *fp;
5450 dmu_send_outparams_t out = {0};
5451
5452 if ((error = zfs_file_get(zc->zc_cookie, &fp)))
5453 return (error);
5454
5455 off = zfs_file_off(fp);
5456 out.dso_outfunc = dump_bytes;
5457 out.dso_arg = fp;
5458 out.dso_dryrun = B_FALSE;
5459 error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
5460 zc->zc_fromobj, embedok, large_block_ok, compressok,
5461 rawok, savedok, zc->zc_cookie, &off, &out);
5462
5463 zfs_file_put(zc->zc_cookie);
5464 }
5465 return (error);
5466 }
5467
5468 /*
5469 * inputs:
5470 * zc_name name of snapshot on which to report progress
5471 * zc_cookie file descriptor of send stream
5472 *
5473 * outputs:
5474 * zc_cookie number of bytes written in send stream thus far
5475 * zc_objset_type logical size of data traversed by send thus far
5476 */
5477 static int
5478 zfs_ioc_send_progress(zfs_cmd_t *zc)
5479 {
5480 dsl_pool_t *dp;
5481 dsl_dataset_t *ds;
5482 dmu_sendstatus_t *dsp = NULL;
5483 int error;
5484
5485 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5486 if (error != 0)
5487 return (error);
5488
5489 error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
5490 if (error != 0) {
5491 dsl_pool_rele(dp, FTAG);
5492 return (error);
5493 }
5494
5495 mutex_enter(&ds->ds_sendstream_lock);
5496
5497 /*
5498 * Iterate over all the send streams currently active on this dataset.
5499 * If there's one which matches the specified file descriptor _and_ the
5500 * stream was started by the current process, return the progress of
5501 * that stream.
5502 */
5503
5504 for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
5505 dsp = list_next(&ds->ds_sendstreams, dsp)) {
5506 if (dsp->dss_outfd == zc->zc_cookie &&
5507 zfs_proc_is_caller(dsp->dss_proc))
5508 break;
5509 }
5510
5511 if (dsp != NULL) {
5512 zc->zc_cookie = atomic_cas_64((volatile uint64_t *)dsp->dss_off,
5513 0, 0);
5514 /* This is the closest thing we have to atomic_read_64. */
5515 zc->zc_objset_type = atomic_cas_64(&dsp->dss_blocks, 0, 0);
5516 } else {
5517 error = SET_ERROR(ENOENT);
5518 }
5519
5520 mutex_exit(&ds->ds_sendstream_lock);
5521 dsl_dataset_rele(ds, FTAG);
5522 dsl_pool_rele(dp, FTAG);
5523 return (error);
5524 }
5525
5526 static int
5527 zfs_ioc_inject_fault(zfs_cmd_t *zc)
5528 {
5529 int id, error;
5530
5531 error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
5532 &zc->zc_inject_record);
5533
5534 if (error == 0)
5535 zc->zc_guid = (uint64_t)id;
5536
5537 return (error);
5538 }
5539
5540 static int
5541 zfs_ioc_clear_fault(zfs_cmd_t *zc)
5542 {
5543 return (zio_clear_fault((int)zc->zc_guid));
5544 }
5545
5546 static int
5547 zfs_ioc_inject_list_next(zfs_cmd_t *zc)
5548 {
5549 int id = (int)zc->zc_guid;
5550 int error;
5551
5552 error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
5553 &zc->zc_inject_record);
5554
5555 zc->zc_guid = id;
5556
5557 return (error);
5558 }
5559
5560 static int
5561 zfs_ioc_error_log(zfs_cmd_t *zc)
5562 {
5563 spa_t *spa;
5564 int error;
5565 size_t count = (size_t)zc->zc_nvlist_dst_size;
5566
5567 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
5568 return (error);
5569
5570 error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
5571 &count);
5572 if (error == 0)
5573 zc->zc_nvlist_dst_size = count;
5574 else
5575 zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
5576
5577 spa_close(spa, FTAG);
5578
5579 return (error);
5580 }
5581
5582 static int
5583 zfs_ioc_clear(zfs_cmd_t *zc)
5584 {
5585 spa_t *spa;
5586 vdev_t *vd;
5587 int error;
5588
5589 /*
5590 * On zpool clear we also fix up missing slogs
5591 */
5592 mutex_enter(&spa_namespace_lock);
5593 spa = spa_lookup(zc->zc_name);
5594 if (spa == NULL) {
5595 mutex_exit(&spa_namespace_lock);
5596 return (SET_ERROR(EIO));
5597 }
5598 if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
5599 /* we need to let spa_open/spa_load clear the chains */
5600 spa_set_log_state(spa, SPA_LOG_CLEAR);
5601 }
5602 spa->spa_last_open_failed = 0;
5603 mutex_exit(&spa_namespace_lock);
5604
5605 if (zc->zc_cookie & ZPOOL_NO_REWIND) {
5606 error = spa_open(zc->zc_name, &spa, FTAG);
5607 } else {
5608 nvlist_t *policy;
5609 nvlist_t *config = NULL;
5610
5611 if (zc->zc_nvlist_src == 0)
5612 return (SET_ERROR(EINVAL));
5613
5614 if ((error = get_nvlist(zc->zc_nvlist_src,
5615 zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
5616 error = spa_open_rewind(zc->zc_name, &spa, FTAG,
5617 policy, &config);
5618 if (config != NULL) {
5619 int err;
5620
5621 if ((err = put_nvlist(zc, config)) != 0)
5622 error = err;
5623 nvlist_free(config);
5624 }
5625 nvlist_free(policy);
5626 }
5627 }
5628
5629 if (error != 0)
5630 return (error);
5631
5632 /*
5633 * If multihost is enabled, resuming I/O is unsafe as another
5634 * host may have imported the pool.
5635 */
5636 if (spa_multihost(spa) && spa_suspended(spa))
5637 return (SET_ERROR(EINVAL));
5638
5639 spa_vdev_state_enter(spa, SCL_NONE);
5640
5641 if (zc->zc_guid == 0) {
5642 vd = NULL;
5643 } else {
5644 vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
5645 if (vd == NULL) {
5646 error = SET_ERROR(ENODEV);
5647 (void) spa_vdev_state_exit(spa, NULL, error);
5648 spa_close(spa, FTAG);
5649 return (error);
5650 }
5651 }
5652
5653 vdev_clear(spa, vd);
5654
5655 (void) spa_vdev_state_exit(spa, spa_suspended(spa) ?
5656 NULL : spa->spa_root_vdev, 0);
5657
5658 /*
5659 * Resume any suspended I/Os.
5660 */
5661 if (zio_resume(spa) != 0)
5662 error = SET_ERROR(EIO);
5663
5664 spa_close(spa, FTAG);
5665
5666 return (error);
5667 }
5668
5669 /*
5670 * Reopen all the vdevs associated with the pool.
5671 *
5672 * innvl: {
5673 * "scrub_restart" -> when true and scrub is running, allow to restart
5674 * scrub as the side effect of the reopen (boolean).
5675 * }
5676 *
5677 * outnvl is unused
5678 */
5679 static const zfs_ioc_key_t zfs_keys_pool_reopen[] = {
5680 {"scrub_restart", DATA_TYPE_BOOLEAN_VALUE, ZK_OPTIONAL},
5681 };
5682
5683 /* ARGSUSED */
5684 static int
5685 zfs_ioc_pool_reopen(const char *pool, nvlist_t *innvl, nvlist_t *outnvl)
5686 {
5687 spa_t *spa;
5688 int error;
5689 boolean_t rc, scrub_restart = B_TRUE;
5690
5691 if (innvl) {
5692 error = nvlist_lookup_boolean_value(innvl,
5693 "scrub_restart", &rc);
5694 if (error == 0)
5695 scrub_restart = rc;
5696 }
5697
5698 error = spa_open(pool, &spa, FTAG);
5699 if (error != 0)
5700 return (error);
5701
5702 spa_vdev_state_enter(spa, SCL_NONE);
5703
5704 /*
5705 * If the scrub_restart flag is B_FALSE and a scrub is already
5706 * in progress then set spa_scrub_reopen flag to B_TRUE so that
5707 * we don't restart the scrub as a side effect of the reopen.
5708 * Otherwise, let vdev_open() decided if a resilver is required.
5709 */
5710
5711 spa->spa_scrub_reopen = (!scrub_restart &&
5712 dsl_scan_scrubbing(spa->spa_dsl_pool));
5713 vdev_reopen(spa->spa_root_vdev);
5714 spa->spa_scrub_reopen = B_FALSE;
5715
5716 (void) spa_vdev_state_exit(spa, NULL, 0);
5717 spa_close(spa, FTAG);
5718 return (0);
5719 }
5720
5721 /*
5722 * inputs:
5723 * zc_name name of filesystem
5724 *
5725 * outputs:
5726 * zc_string name of conflicting snapshot, if there is one
5727 */
5728 static int
5729 zfs_ioc_promote(zfs_cmd_t *zc)
5730 {
5731 dsl_pool_t *dp;
5732 dsl_dataset_t *ds, *ods;
5733 char origin[ZFS_MAX_DATASET_NAME_LEN];
5734 char *cp;
5735 int error;
5736
5737 zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
5738 if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0 ||
5739 strchr(zc->zc_name, '%'))
5740 return (SET_ERROR(EINVAL));
5741
5742 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5743 if (error != 0)
5744 return (error);
5745
5746 error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
5747 if (error != 0) {
5748 dsl_pool_rele(dp, FTAG);
5749 return (error);
5750 }
5751
5752 if (!dsl_dir_is_clone(ds->ds_dir)) {
5753 dsl_dataset_rele(ds, FTAG);
5754 dsl_pool_rele(dp, FTAG);
5755 return (SET_ERROR(EINVAL));
5756 }
5757
5758 error = dsl_dataset_hold_obj(dp,
5759 dsl_dir_phys(ds->ds_dir)->dd_origin_obj, FTAG, &ods);
5760 if (error != 0) {
5761 dsl_dataset_rele(ds, FTAG);
5762 dsl_pool_rele(dp, FTAG);
5763 return (error);
5764 }
5765
5766 dsl_dataset_name(ods, origin);
5767 dsl_dataset_rele(ods, FTAG);
5768 dsl_dataset_rele(ds, FTAG);
5769 dsl_pool_rele(dp, FTAG);
5770
5771 /*
5772 * We don't need to unmount *all* the origin fs's snapshots, but
5773 * it's easier.
5774 */
5775 cp = strchr(origin, '@');
5776 if (cp)
5777 *cp = '\0';
5778 (void) dmu_objset_find(origin,
5779 zfs_unmount_snap_cb, NULL, DS_FIND_SNAPSHOTS);
5780 return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
5781 }
5782
5783 /*
5784 * Retrieve a single {user|group|project}{used|quota}@... property.
5785 *
5786 * inputs:
5787 * zc_name name of filesystem
5788 * zc_objset_type zfs_userquota_prop_t
5789 * zc_value domain name (eg. "S-1-234-567-89")
5790 * zc_guid RID/UID/GID
5791 *
5792 * outputs:
5793 * zc_cookie property value
5794 */
5795 static int
5796 zfs_ioc_userspace_one(zfs_cmd_t *zc)
5797 {
5798 zfsvfs_t *zfsvfs;
5799 int error;
5800
5801 if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
5802 return (SET_ERROR(EINVAL));
5803
5804 error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
5805 if (error != 0)
5806 return (error);
5807
5808 error = zfs_userspace_one(zfsvfs,
5809 zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
5810 zfsvfs_rele(zfsvfs, FTAG);
5811
5812 return (error);
5813 }
5814
5815 /*
5816 * inputs:
5817 * zc_name name of filesystem
5818 * zc_cookie zap cursor
5819 * zc_objset_type zfs_userquota_prop_t
5820 * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
5821 *
5822 * outputs:
5823 * zc_nvlist_dst[_size] data buffer (array of zfs_useracct_t)
5824 * zc_cookie zap cursor
5825 */
5826 static int
5827 zfs_ioc_userspace_many(zfs_cmd_t *zc)
5828 {
5829 zfsvfs_t *zfsvfs;
5830 int bufsize = zc->zc_nvlist_dst_size;
5831
5832 if (bufsize <= 0)
5833 return (SET_ERROR(ENOMEM));
5834
5835 int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
5836 if (error != 0)
5837 return (error);
5838
5839 void *buf = vmem_alloc(bufsize, KM_SLEEP);
5840
5841 error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
5842 buf, &zc->zc_nvlist_dst_size);
5843
5844 if (error == 0) {
5845 error = xcopyout(buf,
5846 (void *)(uintptr_t)zc->zc_nvlist_dst,
5847 zc->zc_nvlist_dst_size);
5848 }
5849 vmem_free(buf, bufsize);
5850 zfsvfs_rele(zfsvfs, FTAG);
5851
5852 return (error);
5853 }
5854
5855 /*
5856 * inputs:
5857 * zc_name name of filesystem
5858 *
5859 * outputs:
5860 * none
5861 */
5862 static int
5863 zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
5864 {
5865 objset_t *os;
5866 int error = 0;
5867 zfsvfs_t *zfsvfs;
5868
5869 if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
5870 if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
5871 /*
5872 * If userused is not enabled, it may be because the
5873 * objset needs to be closed & reopened (to grow the
5874 * objset_phys_t). Suspend/resume the fs will do that.
5875 */
5876 dsl_dataset_t *ds, *newds;
5877
5878 ds = dmu_objset_ds(zfsvfs->z_os);
5879 error = zfs_suspend_fs(zfsvfs);
5880 if (error == 0) {
5881 dmu_objset_refresh_ownership(ds, &newds,
5882 B_TRUE, zfsvfs);
5883 error = zfs_resume_fs(zfsvfs, newds);
5884 }
5885 }
5886 if (error == 0)
5887 error = dmu_objset_userspace_upgrade(zfsvfs->z_os);
5888 zfs_vfs_rele(zfsvfs);
5889 } else {
5890 /* XXX kind of reading contents without owning */
5891 error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, FTAG, &os);
5892 if (error != 0)
5893 return (error);
5894
5895 error = dmu_objset_userspace_upgrade(os);
5896 dmu_objset_rele_flags(os, B_TRUE, FTAG);
5897 }
5898
5899 return (error);
5900 }
5901
5902 /*
5903 * inputs:
5904 * zc_name name of filesystem
5905 *
5906 * outputs:
5907 * none
5908 */
5909 static int
5910 zfs_ioc_id_quota_upgrade(zfs_cmd_t *zc)
5911 {
5912 objset_t *os;
5913 int error;
5914
5915 error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, FTAG, &os);
5916 if (error != 0)
5917 return (error);
5918
5919 if (dmu_objset_userobjspace_upgradable(os) ||
5920 dmu_objset_projectquota_upgradable(os)) {
5921 mutex_enter(&os->os_upgrade_lock);
5922 if (os->os_upgrade_id == 0) {
5923 /* clear potential error code and retry */
5924 os->os_upgrade_status = 0;
5925 mutex_exit(&os->os_upgrade_lock);
5926
5927 dmu_objset_id_quota_upgrade(os);
5928 } else {
5929 mutex_exit(&os->os_upgrade_lock);
5930 }
5931
5932 dsl_pool_rele(dmu_objset_pool(os), FTAG);
5933
5934 taskq_wait_id(os->os_spa->spa_upgrade_taskq, os->os_upgrade_id);
5935 error = os->os_upgrade_status;
5936 } else {
5937 dsl_pool_rele(dmu_objset_pool(os), FTAG);
5938 }
5939
5940 dsl_dataset_rele_flags(dmu_objset_ds(os), DS_HOLD_FLAG_DECRYPT, FTAG);
5941
5942 return (error);
5943 }
5944
5945 static int
5946 zfs_ioc_share(zfs_cmd_t *zc)
5947 {
5948 return (SET_ERROR(ENOSYS));
5949 }
5950
5951 ace_t full_access[] = {
5952 {(uid_t)-1, ACE_ALL_PERMS, ACE_EVERYONE, 0}
5953 };
5954
5955 /*
5956 * inputs:
5957 * zc_name name of containing filesystem
5958 * zc_obj object # beyond which we want next in-use object #
5959 *
5960 * outputs:
5961 * zc_obj next in-use object #
5962 */
5963 static int
5964 zfs_ioc_next_obj(zfs_cmd_t *zc)
5965 {
5966 objset_t *os = NULL;
5967 int error;
5968
5969 error = dmu_objset_hold(zc->zc_name, FTAG, &os);
5970 if (error != 0)
5971 return (error);
5972
5973 error = dmu_object_next(os, &zc->zc_obj, B_FALSE, 0);
5974
5975 dmu_objset_rele(os, FTAG);
5976 return (error);
5977 }
5978
5979 /*
5980 * inputs:
5981 * zc_name name of filesystem
5982 * zc_value prefix name for snapshot
5983 * zc_cleanup_fd cleanup-on-exit file descriptor for calling process
5984 *
5985 * outputs:
5986 * zc_value short name of new snapshot
5987 */
5988 static int
5989 zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
5990 {
5991 char *snap_name;
5992 char *hold_name;
5993 int error;
5994 minor_t minor;
5995
5996 error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
5997 if (error != 0)
5998 return (error);
5999
6000 snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
6001 (u_longlong_t)ddi_get_lbolt64());
6002 hold_name = kmem_asprintf("%%%s", zc->zc_value);
6003
6004 error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor,
6005 hold_name);
6006 if (error == 0)
6007 (void) strlcpy(zc->zc_value, snap_name,
6008 sizeof (zc->zc_value));
6009 kmem_strfree(snap_name);
6010 kmem_strfree(hold_name);
6011 zfs_onexit_fd_rele(zc->zc_cleanup_fd);
6012 return (error);
6013 }
6014
6015 /*
6016 * inputs:
6017 * zc_name name of "to" snapshot
6018 * zc_value name of "from" snapshot
6019 * zc_cookie file descriptor to write diff data on
6020 *
6021 * outputs:
6022 * dmu_diff_record_t's to the file descriptor
6023 */
6024 static int
6025 zfs_ioc_diff(zfs_cmd_t *zc)
6026 {
6027 zfs_file_t *fp;
6028 offset_t off;
6029 int error;
6030
6031 if ((error = zfs_file_get(zc->zc_cookie, &fp)))
6032 return (error);
6033
6034 off = zfs_file_off(fp);
6035 error = dmu_diff(zc->zc_name, zc->zc_value, fp, &off);
6036
6037 zfs_file_put(zc->zc_cookie);
6038
6039 return (error);
6040 }
6041
6042 static int
6043 zfs_ioc_smb_acl(zfs_cmd_t *zc)
6044 {
6045 return (SET_ERROR(ENOTSUP));
6046 }
6047
6048 /*
6049 * innvl: {
6050 * "holds" -> { snapname -> holdname (string), ... }
6051 * (optional) "cleanup_fd" -> fd (int32)
6052 * }
6053 *
6054 * outnvl: {
6055 * snapname -> error value (int32)
6056 * ...
6057 * }
6058 */
6059 static const zfs_ioc_key_t zfs_keys_hold[] = {
6060 {"holds", DATA_TYPE_NVLIST, 0},
6061 {"cleanup_fd", DATA_TYPE_INT32, ZK_OPTIONAL},
6062 };
6063
6064 /* ARGSUSED */
6065 static int
6066 zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
6067 {
6068 nvpair_t *pair;
6069 nvlist_t *holds;
6070 int cleanup_fd = -1;
6071 int error;
6072 minor_t minor = 0;
6073
6074 holds = fnvlist_lookup_nvlist(args, "holds");
6075
6076 /* make sure the user didn't pass us any invalid (empty) tags */
6077 for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
6078 pair = nvlist_next_nvpair(holds, pair)) {
6079 char *htag;
6080
6081 error = nvpair_value_string(pair, &htag);
6082 if (error != 0)
6083 return (SET_ERROR(error));
6084
6085 if (strlen(htag) == 0)
6086 return (SET_ERROR(EINVAL));
6087 }
6088
6089 if (nvlist_lookup_int32(args, "cleanup_fd", &cleanup_fd) == 0) {
6090 error = zfs_onexit_fd_hold(cleanup_fd, &minor);
6091 if (error != 0)
6092 return (SET_ERROR(error));
6093 }
6094
6095 error = dsl_dataset_user_hold(holds, minor, errlist);
6096 if (minor != 0)
6097 zfs_onexit_fd_rele(cleanup_fd);
6098 return (SET_ERROR(error));
6099 }
6100
6101 /*
6102 * innvl is not used.
6103 *
6104 * outnvl: {
6105 * holdname -> time added (uint64 seconds since epoch)
6106 * ...
6107 * }
6108 */
6109 static const zfs_ioc_key_t zfs_keys_get_holds[] = {
6110 /* no nvl keys */
6111 };
6112
6113 /* ARGSUSED */
6114 static int
6115 zfs_ioc_get_holds(const char *snapname, nvlist_t *args, nvlist_t *outnvl)
6116 {
6117 return (dsl_dataset_get_holds(snapname, outnvl));
6118 }
6119
6120 /*
6121 * innvl: {
6122 * snapname -> { holdname, ... }
6123 * ...
6124 * }
6125 *
6126 * outnvl: {
6127 * snapname -> error value (int32)
6128 * ...
6129 * }
6130 */
6131 static const zfs_ioc_key_t zfs_keys_release[] = {
6132 {"<snapname>...", DATA_TYPE_NVLIST, ZK_WILDCARDLIST},
6133 };
6134
6135 /* ARGSUSED */
6136 static int
6137 zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist)
6138 {
6139 return (dsl_dataset_user_release(holds, errlist));
6140 }
6141
6142 /*
6143 * inputs:
6144 * zc_guid flags (ZEVENT_NONBLOCK)
6145 * zc_cleanup_fd zevent file descriptor
6146 *
6147 * outputs:
6148 * zc_nvlist_dst next nvlist event
6149 * zc_cookie dropped events since last get
6150 */
6151 static int
6152 zfs_ioc_events_next(zfs_cmd_t *zc)
6153 {
6154 zfs_zevent_t *ze;
6155 nvlist_t *event = NULL;
6156 minor_t minor;
6157 uint64_t dropped = 0;
6158 int error;
6159
6160 error = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
6161 if (error != 0)
6162 return (error);
6163
6164 do {
6165 error = zfs_zevent_next(ze, &event,
6166 &zc->zc_nvlist_dst_size, &dropped);
6167 if (event != NULL) {
6168 zc->zc_cookie = dropped;
6169 error = put_nvlist(zc, event);
6170 nvlist_free(event);
6171 }
6172
6173 if (zc->zc_guid & ZEVENT_NONBLOCK)
6174 break;
6175
6176 if ((error == 0) || (error != ENOENT))
6177 break;
6178
6179 error = zfs_zevent_wait(ze);
6180 if (error != 0)
6181 break;
6182 } while (1);
6183
6184 zfs_zevent_fd_rele(zc->zc_cleanup_fd);
6185
6186 return (error);
6187 }
6188
6189 /*
6190 * outputs:
6191 * zc_cookie cleared events count
6192 */
6193 static int
6194 zfs_ioc_events_clear(zfs_cmd_t *zc)
6195 {
6196 int count;
6197
6198 zfs_zevent_drain_all(&count);
6199 zc->zc_cookie = count;
6200
6201 return (0);
6202 }
6203
6204 /*
6205 * inputs:
6206 * zc_guid eid | ZEVENT_SEEK_START | ZEVENT_SEEK_END
6207 * zc_cleanup zevent file descriptor
6208 */
6209 static int
6210 zfs_ioc_events_seek(zfs_cmd_t *zc)
6211 {
6212 zfs_zevent_t *ze;
6213 minor_t minor;
6214 int error;
6215
6216 error = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
6217 if (error != 0)
6218 return (error);
6219
6220 error = zfs_zevent_seek(ze, zc->zc_guid);
6221 zfs_zevent_fd_rele(zc->zc_cleanup_fd);
6222
6223 return (error);
6224 }
6225
6226 /*
6227 * inputs:
6228 * zc_name name of later filesystem or snapshot
6229 * zc_value full name of old snapshot or bookmark
6230 *
6231 * outputs:
6232 * zc_cookie space in bytes
6233 * zc_objset_type compressed space in bytes
6234 * zc_perm_action uncompressed space in bytes
6235 */
6236 static int
6237 zfs_ioc_space_written(zfs_cmd_t *zc)
6238 {
6239 int error;
6240 dsl_pool_t *dp;
6241 dsl_dataset_t *new;
6242
6243 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
6244 if (error != 0)
6245 return (error);
6246 error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &new);
6247 if (error != 0) {
6248 dsl_pool_rele(dp, FTAG);
6249 return (error);
6250 }
6251 if (strchr(zc->zc_value, '#') != NULL) {
6252 zfs_bookmark_phys_t bmp;
6253 error = dsl_bookmark_lookup(dp, zc->zc_value,
6254 new, &bmp);
6255 if (error == 0) {
6256 error = dsl_dataset_space_written_bookmark(&bmp, new,
6257 &zc->zc_cookie,
6258 &zc->zc_objset_type, &zc->zc_perm_action);
6259 }
6260 } else {
6261 dsl_dataset_t *old;
6262 error = dsl_dataset_hold(dp, zc->zc_value, FTAG, &old);
6263
6264 if (error == 0) {
6265 error = dsl_dataset_space_written(old, new,
6266 &zc->zc_cookie,
6267 &zc->zc_objset_type, &zc->zc_perm_action);
6268 dsl_dataset_rele(old, FTAG);
6269 }
6270 }
6271 dsl_dataset_rele(new, FTAG);
6272 dsl_pool_rele(dp, FTAG);
6273 return (error);
6274 }
6275
6276 /*
6277 * innvl: {
6278 * "firstsnap" -> snapshot name
6279 * }
6280 *
6281 * outnvl: {
6282 * "used" -> space in bytes
6283 * "compressed" -> compressed space in bytes
6284 * "uncompressed" -> uncompressed space in bytes
6285 * }
6286 */
6287 static const zfs_ioc_key_t zfs_keys_space_snaps[] = {
6288 {"firstsnap", DATA_TYPE_STRING, 0},
6289 };
6290
6291 static int
6292 zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
6293 {
6294 int error;
6295 dsl_pool_t *dp;
6296 dsl_dataset_t *new, *old;
6297 char *firstsnap;
6298 uint64_t used, comp, uncomp;
6299
6300 firstsnap = fnvlist_lookup_string(innvl, "firstsnap");
6301
6302 error = dsl_pool_hold(lastsnap, FTAG, &dp);
6303 if (error != 0)
6304 return (error);
6305
6306 error = dsl_dataset_hold(dp, lastsnap, FTAG, &new);
6307 if (error == 0 && !new->ds_is_snapshot) {
6308 dsl_dataset_rele(new, FTAG);
6309 error = SET_ERROR(EINVAL);
6310 }
6311 if (error != 0) {
6312 dsl_pool_rele(dp, FTAG);
6313 return (error);
6314 }
6315 error = dsl_dataset_hold(dp, firstsnap, FTAG, &old);
6316 if (error == 0 && !old->ds_is_snapshot) {
6317 dsl_dataset_rele(old, FTAG);
6318 error = SET_ERROR(EINVAL);
6319 }
6320 if (error != 0) {
6321 dsl_dataset_rele(new, FTAG);
6322 dsl_pool_rele(dp, FTAG);
6323 return (error);
6324 }
6325
6326 error = dsl_dataset_space_wouldfree(old, new, &used, &comp, &uncomp);
6327 dsl_dataset_rele(old, FTAG);
6328 dsl_dataset_rele(new, FTAG);
6329 dsl_pool_rele(dp, FTAG);
6330 fnvlist_add_uint64(outnvl, "used", used);
6331 fnvlist_add_uint64(outnvl, "compressed", comp);
6332 fnvlist_add_uint64(outnvl, "uncompressed", uncomp);
6333 return (error);
6334 }
6335
6336 /*
6337 * innvl: {
6338 * "fd" -> file descriptor to write stream to (int32)
6339 * (optional) "fromsnap" -> full snap name to send an incremental from
6340 * (optional) "largeblockok" -> (value ignored)
6341 * indicates that blocks > 128KB are permitted
6342 * (optional) "embedok" -> (value ignored)
6343 * presence indicates DRR_WRITE_EMBEDDED records are permitted
6344 * (optional) "compressok" -> (value ignored)
6345 * presence indicates compressed DRR_WRITE records are permitted
6346 * (optional) "rawok" -> (value ignored)
6347 * presence indicates raw encrypted records should be used.
6348 * (optional) "savedok" -> (value ignored)
6349 * presence indicates we should send a partially received snapshot
6350 * (optional) "resume_object" and "resume_offset" -> (uint64)
6351 * if present, resume send stream from specified object and offset.
6352 * (optional) "redactbook" -> (string)
6353 * if present, use this bookmark's redaction list to generate a redacted
6354 * send stream
6355 * }
6356 *
6357 * outnvl is unused
6358 */
6359 static const zfs_ioc_key_t zfs_keys_send_new[] = {
6360 {"fd", DATA_TYPE_INT32, 0},
6361 {"fromsnap", DATA_TYPE_STRING, ZK_OPTIONAL},
6362 {"largeblockok", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
6363 {"embedok", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
6364 {"compressok", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
6365 {"rawok", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
6366 {"savedok", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
6367 {"resume_object", DATA_TYPE_UINT64, ZK_OPTIONAL},
6368 {"resume_offset", DATA_TYPE_UINT64, ZK_OPTIONAL},
6369 {"redactbook", DATA_TYPE_STRING, ZK_OPTIONAL},
6370 };
6371
6372 /* ARGSUSED */
6373 static int
6374 zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
6375 {
6376 int error;
6377 offset_t off;
6378 char *fromname = NULL;
6379 int fd;
6380 zfs_file_t *fp;
6381 boolean_t largeblockok;
6382 boolean_t embedok;
6383 boolean_t compressok;
6384 boolean_t rawok;
6385 boolean_t savedok;
6386 uint64_t resumeobj = 0;
6387 uint64_t resumeoff = 0;
6388 char *redactbook = NULL;
6389
6390 fd = fnvlist_lookup_int32(innvl, "fd");
6391
6392 (void) nvlist_lookup_string(innvl, "fromsnap", &fromname);
6393
6394 largeblockok = nvlist_exists(innvl, "largeblockok");
6395 embedok = nvlist_exists(innvl, "embedok");
6396 compressok = nvlist_exists(innvl, "compressok");
6397 rawok = nvlist_exists(innvl, "rawok");
6398 savedok = nvlist_exists(innvl, "savedok");
6399
6400 (void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
6401 (void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
6402
6403 (void) nvlist_lookup_string(innvl, "redactbook", &redactbook);
6404
6405 if ((error = zfs_file_get(fd, &fp)))
6406 return (error);
6407
6408 off = zfs_file_off(fp);
6409
6410 dmu_send_outparams_t out = {0};
6411 out.dso_outfunc = dump_bytes;
6412 out.dso_arg = fp;
6413 out.dso_dryrun = B_FALSE;
6414 error = dmu_send(snapname, fromname, embedok, largeblockok,
6415 compressok, rawok, savedok, resumeobj, resumeoff,
6416 redactbook, fd, &off, &out);
6417
6418 zfs_file_put(fd);
6419 return (error);
6420 }
6421
6422 /* ARGSUSED */
6423 static int
6424 send_space_sum(objset_t *os, void *buf, int len, void *arg)
6425 {
6426 uint64_t *size = arg;
6427 *size += len;
6428 return (0);
6429 }
6430
6431 /*
6432 * Determine approximately how large a zfs send stream will be -- the number
6433 * of bytes that will be written to the fd supplied to zfs_ioc_send_new().
6434 *
6435 * innvl: {
6436 * (optional) "from" -> full snap or bookmark name to send an incremental
6437 * from
6438 * (optional) "largeblockok" -> (value ignored)
6439 * indicates that blocks > 128KB are permitted
6440 * (optional) "embedok" -> (value ignored)
6441 * presence indicates DRR_WRITE_EMBEDDED records are permitted
6442 * (optional) "compressok" -> (value ignored)
6443 * presence indicates compressed DRR_WRITE records are permitted
6444 * (optional) "rawok" -> (value ignored)
6445 * presence indicates raw encrypted records should be used.
6446 * (optional) "fd" -> file descriptor to use as a cookie for progress
6447 * tracking (int32)
6448 * }
6449 *
6450 * outnvl: {
6451 * "space" -> bytes of space (uint64)
6452 * }
6453 */
6454 static const zfs_ioc_key_t zfs_keys_send_space[] = {
6455 {"from", DATA_TYPE_STRING, ZK_OPTIONAL},
6456 {"fromsnap", DATA_TYPE_STRING, ZK_OPTIONAL},
6457 {"largeblockok", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
6458 {"embedok", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
6459 {"compressok", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
6460 {"rawok", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
6461 {"fd", DATA_TYPE_INT32, ZK_OPTIONAL},
6462 {"redactbook", DATA_TYPE_STRING, ZK_OPTIONAL},
6463 {"resumeobj", DATA_TYPE_UINT64, ZK_OPTIONAL},
6464 {"resumeoff", DATA_TYPE_UINT64, ZK_OPTIONAL},
6465 {"bytes", DATA_TYPE_UINT64, ZK_OPTIONAL},
6466 };
6467
6468 static int
6469 zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
6470 {
6471 dsl_pool_t *dp;
6472 dsl_dataset_t *tosnap;
6473 dsl_dataset_t *fromsnap = NULL;
6474 int error;
6475 char *fromname = NULL;
6476 char *redactlist_book = NULL;
6477 boolean_t largeblockok;
6478 boolean_t embedok;
6479 boolean_t compressok;
6480 boolean_t rawok;
6481 boolean_t savedok;
6482 uint64_t space = 0;
6483 boolean_t full_estimate = B_FALSE;
6484 uint64_t resumeobj = 0;
6485 uint64_t resumeoff = 0;
6486 uint64_t resume_bytes = 0;
6487 int32_t fd = -1;
6488 zfs_bookmark_phys_t zbm = {0};
6489
6490 error = dsl_pool_hold(snapname, FTAG, &dp);
6491 if (error != 0)
6492 return (error);
6493
6494 error = dsl_dataset_hold(dp, snapname, FTAG, &tosnap);
6495 if (error != 0) {
6496 dsl_pool_rele(dp, FTAG);
6497 return (error);
6498 }
6499 (void) nvlist_lookup_int32(innvl, "fd", &fd);
6500
6501 largeblockok = nvlist_exists(innvl, "largeblockok");
6502 embedok = nvlist_exists(innvl, "embedok");
6503 compressok = nvlist_exists(innvl, "compressok");
6504 rawok = nvlist_exists(innvl, "rawok");
6505 savedok = nvlist_exists(innvl, "savedok");
6506 boolean_t from = (nvlist_lookup_string(innvl, "from", &fromname) == 0);
6507 boolean_t altbook = (nvlist_lookup_string(innvl, "redactbook",
6508 &redactlist_book) == 0);
6509
6510 (void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
6511 (void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
6512 (void) nvlist_lookup_uint64(innvl, "bytes", &resume_bytes);
6513
6514 if (altbook) {
6515 full_estimate = B_TRUE;
6516 } else if (from) {
6517 if (strchr(fromname, '#')) {
6518 error = dsl_bookmark_lookup(dp, fromname, tosnap, &zbm);
6519
6520 /*
6521 * dsl_bookmark_lookup() will fail with EXDEV if
6522 * the from-bookmark and tosnap are at the same txg.
6523 * However, it's valid to do a send (and therefore,
6524 * a send estimate) from and to the same time point,
6525 * if the bookmark is redacted (the incremental send
6526 * can change what's redacted on the target). In
6527 * this case, dsl_bookmark_lookup() fills in zbm
6528 * but returns EXDEV. Ignore this error.
6529 */
6530 if (error == EXDEV && zbm.zbm_redaction_obj != 0 &&
6531 zbm.zbm_guid ==
6532 dsl_dataset_phys(tosnap)->ds_guid)
6533 error = 0;
6534
6535 if (error != 0) {
6536 dsl_dataset_rele(tosnap, FTAG);
6537 dsl_pool_rele(dp, FTAG);
6538 return (error);
6539 }
6540 if (zbm.zbm_redaction_obj != 0 || !(zbm.zbm_flags &
6541 ZBM_FLAG_HAS_FBN)) {
6542 full_estimate = B_TRUE;
6543 }
6544 } else if (strchr(fromname, '@')) {
6545 error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
6546 if (error != 0) {
6547 dsl_dataset_rele(tosnap, FTAG);
6548 dsl_pool_rele(dp, FTAG);
6549 return (error);
6550 }
6551
6552 if (!dsl_dataset_is_before(tosnap, fromsnap, 0)) {
6553 full_estimate = B_TRUE;
6554 dsl_dataset_rele(fromsnap, FTAG);
6555 }
6556 } else {
6557 /*
6558 * from is not properly formatted as a snapshot or
6559 * bookmark
6560 */
6561 dsl_dataset_rele(tosnap, FTAG);
6562 dsl_pool_rele(dp, FTAG);
6563 return (SET_ERROR(EINVAL));
6564 }
6565 }
6566
6567 if (full_estimate) {
6568 dmu_send_outparams_t out = {0};
6569 offset_t off = 0;
6570 out.dso_outfunc = send_space_sum;
6571 out.dso_arg = &space;
6572 out.dso_dryrun = B_TRUE;
6573 /*
6574 * We have to release these holds so dmu_send can take them. It
6575 * will do all the error checking we need.
6576 */
6577 dsl_dataset_rele(tosnap, FTAG);
6578 dsl_pool_rele(dp, FTAG);
6579 error = dmu_send(snapname, fromname, embedok, largeblockok,
6580 compressok, rawok, savedok, resumeobj, resumeoff,
6581 redactlist_book, fd, &off, &out);
6582 } else {
6583 error = dmu_send_estimate_fast(tosnap, fromsnap,
6584 (from && strchr(fromname, '#') != NULL ? &zbm : NULL),
6585 compressok || rawok, savedok, &space);
6586 space -= resume_bytes;
6587 if (fromsnap != NULL)
6588 dsl_dataset_rele(fromsnap, FTAG);
6589 dsl_dataset_rele(tosnap, FTAG);
6590 dsl_pool_rele(dp, FTAG);
6591 }
6592
6593 fnvlist_add_uint64(outnvl, "space", space);
6594
6595 return (error);
6596 }
6597
6598 /*
6599 * Sync the currently open TXG to disk for the specified pool.
6600 * This is somewhat similar to 'zfs_sync()'.
6601 * For cases that do not result in error this ioctl will wait for
6602 * the currently open TXG to commit before returning back to the caller.
6603 *
6604 * innvl: {
6605 * "force" -> when true, force uberblock update even if there is no dirty data.
6606 * In addition this will cause the vdev configuration to be written
6607 * out including updating the zpool cache file. (boolean_t)
6608 * }
6609 *
6610 * onvl is unused
6611 */
6612 static const zfs_ioc_key_t zfs_keys_pool_sync[] = {
6613 {"force", DATA_TYPE_BOOLEAN_VALUE, 0},
6614 };
6615
6616 /* ARGSUSED */
6617 static int
6618 zfs_ioc_pool_sync(const char *pool, nvlist_t *innvl, nvlist_t *onvl)
6619 {
6620 int err;
6621 boolean_t force = B_FALSE;
6622 spa_t *spa;
6623
6624 if ((err = spa_open(pool, &spa, FTAG)) != 0)
6625 return (err);
6626
6627 if (innvl)
6628 force = fnvlist_lookup_boolean_value(innvl, "force");
6629
6630 if (force) {
6631 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_WRITER);
6632 vdev_config_dirty(spa->spa_root_vdev);
6633 spa_config_exit(spa, SCL_CONFIG, FTAG);
6634 }
6635 txg_wait_synced(spa_get_dsl(spa), 0);
6636
6637 spa_close(spa, FTAG);
6638
6639 return (err);
6640 }
6641
6642 /*
6643 * Load a user's wrapping key into the kernel.
6644 * innvl: {
6645 * "hidden_args" -> { "wkeydata" -> value }
6646 * raw uint8_t array of encryption wrapping key data (32 bytes)
6647 * (optional) "noop" -> (value ignored)
6648 * presence indicated key should only be verified, not loaded
6649 * }
6650 */
6651 static const zfs_ioc_key_t zfs_keys_load_key[] = {
6652 {"hidden_args", DATA_TYPE_NVLIST, 0},
6653 {"noop", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
6654 };
6655
6656 /* ARGSUSED */
6657 static int
6658 zfs_ioc_load_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
6659 {
6660 int ret;
6661 dsl_crypto_params_t *dcp = NULL;
6662 nvlist_t *hidden_args;
6663 boolean_t noop = nvlist_exists(innvl, "noop");
6664
6665 if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
6666 ret = SET_ERROR(EINVAL);
6667 goto error;
6668 }
6669
6670 hidden_args = fnvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS);
6671
6672 ret = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, NULL,
6673 hidden_args, &dcp);
6674 if (ret != 0)
6675 goto error;
6676
6677 ret = spa_keystore_load_wkey(dsname, dcp, noop);
6678 if (ret != 0)
6679 goto error;
6680
6681 dsl_crypto_params_free(dcp, noop);
6682
6683 return (0);
6684
6685 error:
6686 dsl_crypto_params_free(dcp, B_TRUE);
6687 return (ret);
6688 }
6689
6690 /*
6691 * Unload a user's wrapping key from the kernel.
6692 * Both innvl and outnvl are unused.
6693 */
6694 static const zfs_ioc_key_t zfs_keys_unload_key[] = {
6695 /* no nvl keys */
6696 };
6697
6698 /* ARGSUSED */
6699 static int
6700 zfs_ioc_unload_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
6701 {
6702 int ret = 0;
6703
6704 if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
6705 ret = (SET_ERROR(EINVAL));
6706 goto out;
6707 }
6708
6709 ret = spa_keystore_unload_wkey(dsname);
6710 if (ret != 0)
6711 goto out;
6712
6713 out:
6714 return (ret);
6715 }
6716
6717 /*
6718 * Changes a user's wrapping key used to decrypt a dataset. The keyformat,
6719 * keylocation, pbkdf2salt, and pbkdf2iters properties can also be specified
6720 * here to change how the key is derived in userspace.
6721 *
6722 * innvl: {
6723 * "hidden_args" (optional) -> { "wkeydata" -> value }
6724 * raw uint8_t array of new encryption wrapping key data (32 bytes)
6725 * "props" (optional) -> { prop -> value }
6726 * }
6727 *
6728 * outnvl is unused
6729 */
6730 static const zfs_ioc_key_t zfs_keys_change_key[] = {
6731 {"crypt_cmd", DATA_TYPE_UINT64, ZK_OPTIONAL},
6732 {"hidden_args", DATA_TYPE_NVLIST, ZK_OPTIONAL},
6733 {"props", DATA_TYPE_NVLIST, ZK_OPTIONAL},
6734 };
6735
6736 /* ARGSUSED */
6737 static int
6738 zfs_ioc_change_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
6739 {
6740 int ret;
6741 uint64_t cmd = DCP_CMD_NONE;
6742 dsl_crypto_params_t *dcp = NULL;
6743 nvlist_t *args = NULL, *hidden_args = NULL;
6744
6745 if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
6746 ret = (SET_ERROR(EINVAL));
6747 goto error;
6748 }
6749
6750 (void) nvlist_lookup_uint64(innvl, "crypt_cmd", &cmd);
6751 (void) nvlist_lookup_nvlist(innvl, "props", &args);
6752 (void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
6753
6754 ret = dsl_crypto_params_create_nvlist(cmd, args, hidden_args, &dcp);
6755 if (ret != 0)
6756 goto error;
6757
6758 ret = spa_keystore_change_key(dsname, dcp);
6759 if (ret != 0)
6760 goto error;
6761
6762 dsl_crypto_params_free(dcp, B_FALSE);
6763
6764 return (0);
6765
6766 error:
6767 dsl_crypto_params_free(dcp, B_TRUE);
6768 return (ret);
6769 }
6770
6771 static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
6772
6773 static void
6774 zfs_ioctl_register_legacy(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6775 zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
6776 boolean_t log_history, zfs_ioc_poolcheck_t pool_check)
6777 {
6778 zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
6779
6780 ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
6781 ASSERT3U(ioc, <, ZFS_IOC_LAST);
6782 ASSERT3P(vec->zvec_legacy_func, ==, NULL);
6783 ASSERT3P(vec->zvec_func, ==, NULL);
6784
6785 vec->zvec_legacy_func = func;
6786 vec->zvec_secpolicy = secpolicy;
6787 vec->zvec_namecheck = namecheck;
6788 vec->zvec_allow_log = log_history;
6789 vec->zvec_pool_check = pool_check;
6790 }
6791
6792 /*
6793 * See the block comment at the beginning of this file for details on
6794 * each argument to this function.
6795 */
6796 void
6797 zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func,
6798 zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
6799 zfs_ioc_poolcheck_t pool_check, boolean_t smush_outnvlist,
6800 boolean_t allow_log, const zfs_ioc_key_t *nvl_keys, size_t num_keys)
6801 {
6802 zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
6803
6804 ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
6805 ASSERT3U(ioc, <, ZFS_IOC_LAST);
6806 ASSERT3P(vec->zvec_legacy_func, ==, NULL);
6807 ASSERT3P(vec->zvec_func, ==, NULL);
6808
6809 /* if we are logging, the name must be valid */
6810 ASSERT(!allow_log || namecheck != NO_NAME);
6811
6812 vec->zvec_name = name;
6813 vec->zvec_func = func;
6814 vec->zvec_secpolicy = secpolicy;
6815 vec->zvec_namecheck = namecheck;
6816 vec->zvec_pool_check = pool_check;
6817 vec->zvec_smush_outnvlist = smush_outnvlist;
6818 vec->zvec_allow_log = allow_log;
6819 vec->zvec_nvl_keys = nvl_keys;
6820 vec->zvec_nvl_key_count = num_keys;
6821 }
6822
6823 static void
6824 zfs_ioctl_register_pool(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6825 zfs_secpolicy_func_t *secpolicy, boolean_t log_history,
6826 zfs_ioc_poolcheck_t pool_check)
6827 {
6828 zfs_ioctl_register_legacy(ioc, func, secpolicy,
6829 POOL_NAME, log_history, pool_check);
6830 }
6831
6832 void
6833 zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6834 zfs_secpolicy_func_t *secpolicy, zfs_ioc_poolcheck_t pool_check)
6835 {
6836 zfs_ioctl_register_legacy(ioc, func, secpolicy,
6837 DATASET_NAME, B_FALSE, pool_check);
6838 }
6839
6840 static void
6841 zfs_ioctl_register_pool_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
6842 {
6843 zfs_ioctl_register_legacy(ioc, func, zfs_secpolicy_config,
6844 POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
6845 }
6846
6847 static void
6848 zfs_ioctl_register_pool_meta(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6849 zfs_secpolicy_func_t *secpolicy)
6850 {
6851 zfs_ioctl_register_legacy(ioc, func, secpolicy,
6852 NO_NAME, B_FALSE, POOL_CHECK_NONE);
6853 }
6854
6855 static void
6856 zfs_ioctl_register_dataset_read_secpolicy(zfs_ioc_t ioc,
6857 zfs_ioc_legacy_func_t *func, zfs_secpolicy_func_t *secpolicy)
6858 {
6859 zfs_ioctl_register_legacy(ioc, func, secpolicy,
6860 DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED);
6861 }
6862
6863 static void
6864 zfs_ioctl_register_dataset_read(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
6865 {
6866 zfs_ioctl_register_dataset_read_secpolicy(ioc, func,
6867 zfs_secpolicy_read);
6868 }
6869
6870 static void
6871 zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6872 zfs_secpolicy_func_t *secpolicy)
6873 {
6874 zfs_ioctl_register_legacy(ioc, func, secpolicy,
6875 DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
6876 }
6877
6878 static void
6879 zfs_ioctl_init(void)
6880 {
6881 zfs_ioctl_register("snapshot", ZFS_IOC_SNAPSHOT,
6882 zfs_ioc_snapshot, zfs_secpolicy_snapshot, POOL_NAME,
6883 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
6884 zfs_keys_snapshot, ARRAY_SIZE(zfs_keys_snapshot));
6885
6886 zfs_ioctl_register("log_history", ZFS_IOC_LOG_HISTORY,
6887 zfs_ioc_log_history, zfs_secpolicy_log_history, NO_NAME,
6888 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
6889 zfs_keys_log_history, ARRAY_SIZE(zfs_keys_log_history));
6890
6891 zfs_ioctl_register("space_snaps", ZFS_IOC_SPACE_SNAPS,
6892 zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME,
6893 POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
6894 zfs_keys_space_snaps, ARRAY_SIZE(zfs_keys_space_snaps));
6895
6896 zfs_ioctl_register("send", ZFS_IOC_SEND_NEW,
6897 zfs_ioc_send_new, zfs_secpolicy_send_new, DATASET_NAME,
6898 POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
6899 zfs_keys_send_new, ARRAY_SIZE(zfs_keys_send_new));
6900
6901 zfs_ioctl_register("send_space", ZFS_IOC_SEND_SPACE,
6902 zfs_ioc_send_space, zfs_secpolicy_read, DATASET_NAME,
6903 POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
6904 zfs_keys_send_space, ARRAY_SIZE(zfs_keys_send_space));
6905
6906 zfs_ioctl_register("create", ZFS_IOC_CREATE,
6907 zfs_ioc_create, zfs_secpolicy_create_clone, DATASET_NAME,
6908 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
6909 zfs_keys_create, ARRAY_SIZE(zfs_keys_create));
6910
6911 zfs_ioctl_register("clone", ZFS_IOC_CLONE,
6912 zfs_ioc_clone, zfs_secpolicy_create_clone, DATASET_NAME,
6913 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
6914 zfs_keys_clone, ARRAY_SIZE(zfs_keys_clone));
6915
6916 zfs_ioctl_register("remap", ZFS_IOC_REMAP,
6917 zfs_ioc_remap, zfs_secpolicy_none, DATASET_NAME,
6918 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE,
6919 zfs_keys_remap, ARRAY_SIZE(zfs_keys_remap));
6920
6921 zfs_ioctl_register("destroy_snaps", ZFS_IOC_DESTROY_SNAPS,
6922 zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, POOL_NAME,
6923 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
6924 zfs_keys_destroy_snaps, ARRAY_SIZE(zfs_keys_destroy_snaps));
6925
6926 zfs_ioctl_register("hold", ZFS_IOC_HOLD,
6927 zfs_ioc_hold, zfs_secpolicy_hold, POOL_NAME,
6928 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
6929 zfs_keys_hold, ARRAY_SIZE(zfs_keys_hold));
6930 zfs_ioctl_register("release", ZFS_IOC_RELEASE,
6931 zfs_ioc_release, zfs_secpolicy_release, POOL_NAME,
6932 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
6933 zfs_keys_release, ARRAY_SIZE(zfs_keys_release));
6934
6935 zfs_ioctl_register("get_holds", ZFS_IOC_GET_HOLDS,
6936 zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME,
6937 POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
6938 zfs_keys_get_holds, ARRAY_SIZE(zfs_keys_get_holds));
6939
6940 zfs_ioctl_register("rollback", ZFS_IOC_ROLLBACK,
6941 zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME,
6942 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE,
6943 zfs_keys_rollback, ARRAY_SIZE(zfs_keys_rollback));
6944
6945 zfs_ioctl_register("bookmark", ZFS_IOC_BOOKMARK,
6946 zfs_ioc_bookmark, zfs_secpolicy_bookmark, POOL_NAME,
6947 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
6948 zfs_keys_bookmark, ARRAY_SIZE(zfs_keys_bookmark));
6949
6950 zfs_ioctl_register("get_bookmarks", ZFS_IOC_GET_BOOKMARKS,
6951 zfs_ioc_get_bookmarks, zfs_secpolicy_read, DATASET_NAME,
6952 POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
6953 zfs_keys_get_bookmarks, ARRAY_SIZE(zfs_keys_get_bookmarks));
6954
6955 zfs_ioctl_register("get_bookmark_props", ZFS_IOC_GET_BOOKMARK_PROPS,
6956 zfs_ioc_get_bookmark_props, zfs_secpolicy_read, ENTITY_NAME,
6957 POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE, zfs_keys_get_bookmark_props,
6958 ARRAY_SIZE(zfs_keys_get_bookmark_props));
6959
6960 zfs_ioctl_register("destroy_bookmarks", ZFS_IOC_DESTROY_BOOKMARKS,
6961 zfs_ioc_destroy_bookmarks, zfs_secpolicy_destroy_bookmarks,
6962 POOL_NAME,
6963 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
6964 zfs_keys_destroy_bookmarks,
6965 ARRAY_SIZE(zfs_keys_destroy_bookmarks));
6966
6967 zfs_ioctl_register("receive", ZFS_IOC_RECV_NEW,
6968 zfs_ioc_recv_new, zfs_secpolicy_recv_new, DATASET_NAME,
6969 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
6970 zfs_keys_recv_new, ARRAY_SIZE(zfs_keys_recv_new));
6971 zfs_ioctl_register("load-key", ZFS_IOC_LOAD_KEY,
6972 zfs_ioc_load_key, zfs_secpolicy_load_key,
6973 DATASET_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE,
6974 zfs_keys_load_key, ARRAY_SIZE(zfs_keys_load_key));
6975 zfs_ioctl_register("unload-key", ZFS_IOC_UNLOAD_KEY,
6976 zfs_ioc_unload_key, zfs_secpolicy_load_key,
6977 DATASET_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE,
6978 zfs_keys_unload_key, ARRAY_SIZE(zfs_keys_unload_key));
6979 zfs_ioctl_register("change-key", ZFS_IOC_CHANGE_KEY,
6980 zfs_ioc_change_key, zfs_secpolicy_change_key,
6981 DATASET_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY,
6982 B_TRUE, B_TRUE, zfs_keys_change_key,
6983 ARRAY_SIZE(zfs_keys_change_key));
6984
6985 zfs_ioctl_register("sync", ZFS_IOC_POOL_SYNC,
6986 zfs_ioc_pool_sync, zfs_secpolicy_none, POOL_NAME,
6987 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
6988 zfs_keys_pool_sync, ARRAY_SIZE(zfs_keys_pool_sync));
6989 zfs_ioctl_register("reopen", ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen,
6990 zfs_secpolicy_config, POOL_NAME, POOL_CHECK_SUSPENDED, B_TRUE,
6991 B_TRUE, zfs_keys_pool_reopen, ARRAY_SIZE(zfs_keys_pool_reopen));
6992
6993 zfs_ioctl_register("channel_program", ZFS_IOC_CHANNEL_PROGRAM,
6994 zfs_ioc_channel_program, zfs_secpolicy_config,
6995 POOL_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE,
6996 B_TRUE, zfs_keys_channel_program,
6997 ARRAY_SIZE(zfs_keys_channel_program));
6998
6999 zfs_ioctl_register("redact", ZFS_IOC_REDACT,
7000 zfs_ioc_redact, zfs_secpolicy_config, DATASET_NAME,
7001 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7002 zfs_keys_redact, ARRAY_SIZE(zfs_keys_redact));
7003
7004 zfs_ioctl_register("zpool_checkpoint", ZFS_IOC_POOL_CHECKPOINT,
7005 zfs_ioc_pool_checkpoint, zfs_secpolicy_config, POOL_NAME,
7006 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7007 zfs_keys_pool_checkpoint, ARRAY_SIZE(zfs_keys_pool_checkpoint));
7008
7009 zfs_ioctl_register("zpool_discard_checkpoint",
7010 ZFS_IOC_POOL_DISCARD_CHECKPOINT, zfs_ioc_pool_discard_checkpoint,
7011 zfs_secpolicy_config, POOL_NAME,
7012 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7013 zfs_keys_pool_discard_checkpoint,
7014 ARRAY_SIZE(zfs_keys_pool_discard_checkpoint));
7015
7016 zfs_ioctl_register("initialize", ZFS_IOC_POOL_INITIALIZE,
7017 zfs_ioc_pool_initialize, zfs_secpolicy_config, POOL_NAME,
7018 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7019 zfs_keys_pool_initialize, ARRAY_SIZE(zfs_keys_pool_initialize));
7020
7021 zfs_ioctl_register("trim", ZFS_IOC_POOL_TRIM,
7022 zfs_ioc_pool_trim, zfs_secpolicy_config, POOL_NAME,
7023 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7024 zfs_keys_pool_trim, ARRAY_SIZE(zfs_keys_pool_trim));
7025
7026 zfs_ioctl_register("wait", ZFS_IOC_WAIT,
7027 zfs_ioc_wait, zfs_secpolicy_none, POOL_NAME,
7028 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
7029 zfs_keys_pool_wait, ARRAY_SIZE(zfs_keys_pool_wait));
7030
7031 zfs_ioctl_register("wait_fs", ZFS_IOC_WAIT_FS,
7032 zfs_ioc_wait_fs, zfs_secpolicy_none, DATASET_NAME,
7033 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
7034 zfs_keys_fs_wait, ARRAY_SIZE(zfs_keys_fs_wait));
7035
7036 zfs_ioctl_register("set_bootenv", ZFS_IOC_SET_BOOTENV,
7037 zfs_ioc_set_bootenv, zfs_secpolicy_config, POOL_NAME,
7038 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE,
7039 zfs_keys_set_bootenv, ARRAY_SIZE(zfs_keys_set_bootenv));
7040
7041 zfs_ioctl_register("get_bootenv", ZFS_IOC_GET_BOOTENV,
7042 zfs_ioc_get_bootenv, zfs_secpolicy_none, POOL_NAME,
7043 POOL_CHECK_SUSPENDED, B_FALSE, B_TRUE,
7044 zfs_keys_get_bootenv, ARRAY_SIZE(zfs_keys_get_bootenv));
7045
7046 /* IOCTLS that use the legacy function signature */
7047
7048 zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
7049 zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_READONLY);
7050
7051 zfs_ioctl_register_pool(ZFS_IOC_POOL_CREATE, zfs_ioc_pool_create,
7052 zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
7053 zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SCAN,
7054 zfs_ioc_pool_scan);
7055 zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_UPGRADE,
7056 zfs_ioc_pool_upgrade);
7057 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ADD,
7058 zfs_ioc_vdev_add);
7059 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_REMOVE,
7060 zfs_ioc_vdev_remove);
7061 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SET_STATE,
7062 zfs_ioc_vdev_set_state);
7063 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ATTACH,
7064 zfs_ioc_vdev_attach);
7065 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_DETACH,
7066 zfs_ioc_vdev_detach);
7067 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETPATH,
7068 zfs_ioc_vdev_setpath);
7069 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETFRU,
7070 zfs_ioc_vdev_setfru);
7071 zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SET_PROPS,
7072 zfs_ioc_pool_set_props);
7073 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SPLIT,
7074 zfs_ioc_vdev_split);
7075 zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_REGUID,
7076 zfs_ioc_pool_reguid);
7077
7078 zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_CONFIGS,
7079 zfs_ioc_pool_configs, zfs_secpolicy_none);
7080 zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_TRYIMPORT,
7081 zfs_ioc_pool_tryimport, zfs_secpolicy_config);
7082 zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_FAULT,
7083 zfs_ioc_inject_fault, zfs_secpolicy_inject);
7084 zfs_ioctl_register_pool_meta(ZFS_IOC_CLEAR_FAULT,
7085 zfs_ioc_clear_fault, zfs_secpolicy_inject);
7086 zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_LIST_NEXT,
7087 zfs_ioc_inject_list_next, zfs_secpolicy_inject);
7088
7089 /*
7090 * pool destroy, and export don't log the history as part of
7091 * zfsdev_ioctl, but rather zfs_ioc_pool_export
7092 * does the logging of those commands.
7093 */
7094 zfs_ioctl_register_pool(ZFS_IOC_POOL_DESTROY, zfs_ioc_pool_destroy,
7095 zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
7096 zfs_ioctl_register_pool(ZFS_IOC_POOL_EXPORT, zfs_ioc_pool_export,
7097 zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
7098
7099 zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats,
7100 zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
7101 zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_PROPS, zfs_ioc_pool_get_props,
7102 zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
7103
7104 zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log,
7105 zfs_secpolicy_inject, B_FALSE, POOL_CHECK_SUSPENDED);
7106 zfs_ioctl_register_pool(ZFS_IOC_DSOBJ_TO_DSNAME,
7107 zfs_ioc_dsobj_to_dsname,
7108 zfs_secpolicy_diff, B_FALSE, POOL_CHECK_SUSPENDED);
7109 zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_HISTORY,
7110 zfs_ioc_pool_get_history,
7111 zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
7112
7113 zfs_ioctl_register_pool(ZFS_IOC_POOL_IMPORT, zfs_ioc_pool_import,
7114 zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
7115
7116 zfs_ioctl_register_pool(ZFS_IOC_CLEAR, zfs_ioc_clear,
7117 zfs_secpolicy_config, B_TRUE, POOL_CHECK_READONLY);
7118
7119 zfs_ioctl_register_dataset_read(ZFS_IOC_SPACE_WRITTEN,
7120 zfs_ioc_space_written);
7121 zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_RECVD_PROPS,
7122 zfs_ioc_objset_recvd_props);
7123 zfs_ioctl_register_dataset_read(ZFS_IOC_NEXT_OBJ,
7124 zfs_ioc_next_obj);
7125 zfs_ioctl_register_dataset_read(ZFS_IOC_GET_FSACL,
7126 zfs_ioc_get_fsacl);
7127 zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_STATS,
7128 zfs_ioc_objset_stats);
7129 zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_ZPLPROPS,
7130 zfs_ioc_objset_zplprops);
7131 zfs_ioctl_register_dataset_read(ZFS_IOC_DATASET_LIST_NEXT,
7132 zfs_ioc_dataset_list_next);
7133 zfs_ioctl_register_dataset_read(ZFS_IOC_SNAPSHOT_LIST_NEXT,
7134 zfs_ioc_snapshot_list_next);
7135 zfs_ioctl_register_dataset_read(ZFS_IOC_SEND_PROGRESS,
7136 zfs_ioc_send_progress);
7137
7138 zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_DIFF,
7139 zfs_ioc_diff, zfs_secpolicy_diff);
7140 zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_STATS,
7141 zfs_ioc_obj_to_stats, zfs_secpolicy_diff);
7142 zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_PATH,
7143 zfs_ioc_obj_to_path, zfs_secpolicy_diff);
7144 zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_ONE,
7145 zfs_ioc_userspace_one, zfs_secpolicy_userspace_one);
7146 zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_MANY,
7147 zfs_ioc_userspace_many, zfs_secpolicy_userspace_many);
7148 zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_SEND,
7149 zfs_ioc_send, zfs_secpolicy_send);
7150
7151 zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_PROP, zfs_ioc_set_prop,
7152 zfs_secpolicy_none);
7153 zfs_ioctl_register_dataset_modify(ZFS_IOC_DESTROY, zfs_ioc_destroy,
7154 zfs_secpolicy_destroy);
7155 zfs_ioctl_register_dataset_modify(ZFS_IOC_RENAME, zfs_ioc_rename,
7156 zfs_secpolicy_rename);
7157 zfs_ioctl_register_dataset_modify(ZFS_IOC_RECV, zfs_ioc_recv,
7158 zfs_secpolicy_recv);
7159 zfs_ioctl_register_dataset_modify(ZFS_IOC_PROMOTE, zfs_ioc_promote,
7160 zfs_secpolicy_promote);
7161 zfs_ioctl_register_dataset_modify(ZFS_IOC_INHERIT_PROP,
7162 zfs_ioc_inherit_prop, zfs_secpolicy_inherit_prop);
7163 zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_FSACL, zfs_ioc_set_fsacl,
7164 zfs_secpolicy_set_fsacl);
7165
7166 zfs_ioctl_register_dataset_nolog(ZFS_IOC_SHARE, zfs_ioc_share,
7167 zfs_secpolicy_share, POOL_CHECK_NONE);
7168 zfs_ioctl_register_dataset_nolog(ZFS_IOC_SMB_ACL, zfs_ioc_smb_acl,
7169 zfs_secpolicy_smb_acl, POOL_CHECK_NONE);
7170 zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERSPACE_UPGRADE,
7171 zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
7172 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
7173 zfs_ioctl_register_dataset_nolog(ZFS_IOC_TMP_SNAPSHOT,
7174 zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot,
7175 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
7176
7177 zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_NEXT, zfs_ioc_events_next,
7178 zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
7179 zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_CLEAR, zfs_ioc_events_clear,
7180 zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
7181 zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_SEEK, zfs_ioc_events_seek,
7182 zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
7183
7184 zfs_ioctl_init_os();
7185 }
7186
7187 /*
7188 * Verify that for non-legacy ioctls the input nvlist
7189 * pairs match against the expected input.
7190 *
7191 * Possible errors are:
7192 * ZFS_ERR_IOC_ARG_UNAVAIL An unrecognized nvpair was encountered
7193 * ZFS_ERR_IOC_ARG_REQUIRED A required nvpair is missing
7194 * ZFS_ERR_IOC_ARG_BADTYPE Invalid type for nvpair
7195 */
7196 static int
7197 zfs_check_input_nvpairs(nvlist_t *innvl, const zfs_ioc_vec_t *vec)
7198 {
7199 const zfs_ioc_key_t *nvl_keys = vec->zvec_nvl_keys;
7200 boolean_t required_keys_found = B_FALSE;
7201
7202 /*
7203 * examine each input pair
7204 */
7205 for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
7206 pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
7207 char *name = nvpair_name(pair);
7208 data_type_t type = nvpair_type(pair);
7209 boolean_t identified = B_FALSE;
7210
7211 /*
7212 * check pair against the documented names and type
7213 */
7214 for (int k = 0; k < vec->zvec_nvl_key_count; k++) {
7215 /* if not a wild card name, check for an exact match */
7216 if ((nvl_keys[k].zkey_flags & ZK_WILDCARDLIST) == 0 &&
7217 strcmp(nvl_keys[k].zkey_name, name) != 0)
7218 continue;
7219
7220 identified = B_TRUE;
7221
7222 if (nvl_keys[k].zkey_type != DATA_TYPE_ANY &&
7223 nvl_keys[k].zkey_type != type) {
7224 return (SET_ERROR(ZFS_ERR_IOC_ARG_BADTYPE));
7225 }
7226
7227 if (nvl_keys[k].zkey_flags & ZK_OPTIONAL)
7228 continue;
7229
7230 required_keys_found = B_TRUE;
7231 break;
7232 }
7233
7234 /* allow an 'optional' key, everything else is invalid */
7235 if (!identified &&
7236 (strcmp(name, "optional") != 0 ||
7237 type != DATA_TYPE_NVLIST)) {
7238 return (SET_ERROR(ZFS_ERR_IOC_ARG_UNAVAIL));
7239 }
7240 }
7241
7242 /* verify that all required keys were found */
7243 for (int k = 0; k < vec->zvec_nvl_key_count; k++) {
7244 if (nvl_keys[k].zkey_flags & ZK_OPTIONAL)
7245 continue;
7246
7247 if (nvl_keys[k].zkey_flags & ZK_WILDCARDLIST) {
7248 /* at least one non-optional key is expected here */
7249 if (!required_keys_found)
7250 return (SET_ERROR(ZFS_ERR_IOC_ARG_REQUIRED));
7251 continue;
7252 }
7253
7254 if (!nvlist_exists(innvl, nvl_keys[k].zkey_name))
7255 return (SET_ERROR(ZFS_ERR_IOC_ARG_REQUIRED));
7256 }
7257
7258 return (0);
7259 }
7260
7261 static int
7262 pool_status_check(const char *name, zfs_ioc_namecheck_t type,
7263 zfs_ioc_poolcheck_t check)
7264 {
7265 spa_t *spa;
7266 int error;
7267
7268 ASSERT(type == POOL_NAME || type == DATASET_NAME ||
7269 type == ENTITY_NAME);
7270
7271 if (check & POOL_CHECK_NONE)
7272 return (0);
7273
7274 error = spa_open(name, &spa, FTAG);
7275 if (error == 0) {
7276 if ((check & POOL_CHECK_SUSPENDED) && spa_suspended(spa))
7277 error = SET_ERROR(EAGAIN);
7278 else if ((check & POOL_CHECK_READONLY) && !spa_writeable(spa))
7279 error = SET_ERROR(EROFS);
7280 spa_close(spa, FTAG);
7281 }
7282 return (error);
7283 }
7284
7285 int
7286 zfsdev_getminor(int fd, minor_t *minorp)
7287 {
7288 zfsdev_state_t *zs, *fpd;
7289 zfs_file_t *fp;
7290 int rc;
7291
7292 ASSERT(!MUTEX_HELD(&zfsdev_state_lock));
7293
7294 if ((rc = zfs_file_get(fd, &fp)))
7295 return (rc);
7296
7297 fpd = zfs_file_private(fp);
7298 if (fpd == NULL)
7299 return (SET_ERROR(EBADF));
7300
7301 mutex_enter(&zfsdev_state_lock);
7302
7303 for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
7304
7305 if (zs->zs_minor == -1)
7306 continue;
7307
7308 if (fpd == zs) {
7309 *minorp = fpd->zs_minor;
7310 mutex_exit(&zfsdev_state_lock);
7311 return (0);
7312 }
7313 }
7314
7315 mutex_exit(&zfsdev_state_lock);
7316
7317 return (SET_ERROR(EBADF));
7318 }
7319
7320 static void *
7321 zfsdev_get_state_impl(minor_t minor, enum zfsdev_state_type which)
7322 {
7323 zfsdev_state_t *zs;
7324
7325 for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
7326 if (zs->zs_minor == minor) {
7327 smp_rmb();
7328 switch (which) {
7329 case ZST_ONEXIT:
7330 return (zs->zs_onexit);
7331 case ZST_ZEVENT:
7332 return (zs->zs_zevent);
7333 case ZST_ALL:
7334 return (zs);
7335 }
7336 }
7337 }
7338
7339 return (NULL);
7340 }
7341
7342 void *
7343 zfsdev_get_state(minor_t minor, enum zfsdev_state_type which)
7344 {
7345 void *ptr;
7346
7347 ptr = zfsdev_get_state_impl(minor, which);
7348
7349 return (ptr);
7350 }
7351
7352 /*
7353 * Find a free minor number. The zfsdev_state_list is expected to
7354 * be short since it is only a list of currently open file handles.
7355 */
7356 minor_t
7357 zfsdev_minor_alloc(void)
7358 {
7359 static minor_t last_minor = 0;
7360 minor_t m;
7361
7362 ASSERT(MUTEX_HELD(&zfsdev_state_lock));
7363
7364 for (m = last_minor + 1; m != last_minor; m++) {
7365 if (m > ZFSDEV_MAX_MINOR)
7366 m = 1;
7367 if (zfsdev_get_state_impl(m, ZST_ALL) == NULL) {
7368 last_minor = m;
7369 return (m);
7370 }
7371 }
7372
7373 return (0);
7374 }
7375
7376 long
7377 zfsdev_ioctl_common(uint_t vecnum, zfs_cmd_t *zc, int flag)
7378 {
7379 int error, cmd;
7380 const zfs_ioc_vec_t *vec;
7381 char *saved_poolname = NULL;
7382 size_t saved_poolname_len = 0;
7383 nvlist_t *innvl = NULL;
7384 fstrans_cookie_t cookie;
7385
7386 cmd = vecnum;
7387 error = 0;
7388 if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
7389 return (SET_ERROR(ZFS_ERR_IOC_CMD_UNAVAIL));
7390
7391 vec = &zfs_ioc_vec[vecnum];
7392
7393 /*
7394 * The registered ioctl list may be sparse, verify that either
7395 * a normal or legacy handler are registered.
7396 */
7397 if (vec->zvec_func == NULL && vec->zvec_legacy_func == NULL)
7398 return (SET_ERROR(ZFS_ERR_IOC_CMD_UNAVAIL));
7399
7400 zc->zc_iflags = flag & FKIOCTL;
7401 if (zc->zc_nvlist_src_size > MAX_NVLIST_SRC_SIZE) {
7402 /*
7403 * Make sure the user doesn't pass in an insane value for
7404 * zc_nvlist_src_size. We have to check, since we will end
7405 * up allocating that much memory inside of get_nvlist(). This
7406 * prevents a nefarious user from allocating tons of kernel
7407 * memory.
7408 *
7409 * Also, we return EINVAL instead of ENOMEM here. The reason
7410 * being that returning ENOMEM from an ioctl() has a special
7411 * connotation; that the user's size value is too small and
7412 * needs to be expanded to hold the nvlist. See
7413 * zcmd_expand_dst_nvlist() for details.
7414 */
7415 error = SET_ERROR(EINVAL); /* User's size too big */
7416
7417 } else if (zc->zc_nvlist_src_size != 0) {
7418 error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
7419 zc->zc_iflags, &innvl);
7420 if (error != 0)
7421 goto out;
7422 }
7423
7424 /*
7425 * Ensure that all pool/dataset names are valid before we pass down to
7426 * the lower layers.
7427 */
7428 zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
7429 switch (vec->zvec_namecheck) {
7430 case POOL_NAME:
7431 if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
7432 error = SET_ERROR(EINVAL);
7433 else
7434 error = pool_status_check(zc->zc_name,
7435 vec->zvec_namecheck, vec->zvec_pool_check);
7436 break;
7437
7438 case DATASET_NAME:
7439 if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
7440 error = SET_ERROR(EINVAL);
7441 else
7442 error = pool_status_check(zc->zc_name,
7443 vec->zvec_namecheck, vec->zvec_pool_check);
7444 break;
7445
7446 case ENTITY_NAME:
7447 if (entity_namecheck(zc->zc_name, NULL, NULL) != 0) {
7448 error = SET_ERROR(EINVAL);
7449 } else {
7450 error = pool_status_check(zc->zc_name,
7451 vec->zvec_namecheck, vec->zvec_pool_check);
7452 }
7453 break;
7454
7455 case NO_NAME:
7456 break;
7457 }
7458 /*
7459 * Ensure that all input pairs are valid before we pass them down
7460 * to the lower layers.
7461 *
7462 * The vectored functions can use fnvlist_lookup_{type} for any
7463 * required pairs since zfs_check_input_nvpairs() confirmed that
7464 * they exist and are of the correct type.
7465 */
7466 if (error == 0 && vec->zvec_func != NULL) {
7467 error = zfs_check_input_nvpairs(innvl, vec);
7468 if (error != 0)
7469 goto out;
7470 }
7471
7472 if (error == 0) {
7473 cookie = spl_fstrans_mark();
7474 error = vec->zvec_secpolicy(zc, innvl, CRED());
7475 spl_fstrans_unmark(cookie);
7476 }
7477
7478 if (error != 0)
7479 goto out;
7480
7481 /* legacy ioctls can modify zc_name */
7482 /*
7483 * Can't use kmem_strdup() as we might truncate the string and
7484 * kmem_strfree() would then free with incorrect size.
7485 */
7486 saved_poolname_len = strlen(zc->zc_name) + 1;
7487 saved_poolname = kmem_alloc(saved_poolname_len, KM_SLEEP);
7488
7489 strlcpy(saved_poolname, zc->zc_name, saved_poolname_len);
7490 saved_poolname[strcspn(saved_poolname, "/@#")] = '\0';
7491
7492 if (vec->zvec_func != NULL) {
7493 nvlist_t *outnvl;
7494 int puterror = 0;
7495 spa_t *spa;
7496 nvlist_t *lognv = NULL;
7497
7498 ASSERT(vec->zvec_legacy_func == NULL);
7499
7500 /*
7501 * Add the innvl to the lognv before calling the func,
7502 * in case the func changes the innvl.
7503 */
7504 if (vec->zvec_allow_log) {
7505 lognv = fnvlist_alloc();
7506 fnvlist_add_string(lognv, ZPOOL_HIST_IOCTL,
7507 vec->zvec_name);
7508 if (!nvlist_empty(innvl)) {
7509 fnvlist_add_nvlist(lognv, ZPOOL_HIST_INPUT_NVL,
7510 innvl);
7511 }
7512 }
7513
7514 outnvl = fnvlist_alloc();
7515 cookie = spl_fstrans_mark();
7516 error = vec->zvec_func(zc->zc_name, innvl, outnvl);
7517 spl_fstrans_unmark(cookie);
7518
7519 /*
7520 * Some commands can partially execute, modify state, and still
7521 * return an error. In these cases, attempt to record what
7522 * was modified.
7523 */
7524 if ((error == 0 ||
7525 (cmd == ZFS_IOC_CHANNEL_PROGRAM && error != EINVAL)) &&
7526 vec->zvec_allow_log &&
7527 spa_open(zc->zc_name, &spa, FTAG) == 0) {
7528 if (!nvlist_empty(outnvl)) {
7529 fnvlist_add_nvlist(lognv, ZPOOL_HIST_OUTPUT_NVL,
7530 outnvl);
7531 }
7532 if (error != 0) {
7533 fnvlist_add_int64(lognv, ZPOOL_HIST_ERRNO,
7534 error);
7535 }
7536 (void) spa_history_log_nvl(spa, lognv);
7537 spa_close(spa, FTAG);
7538 }
7539 fnvlist_free(lognv);
7540
7541 if (!nvlist_empty(outnvl) || zc->zc_nvlist_dst_size != 0) {
7542 int smusherror = 0;
7543 if (vec->zvec_smush_outnvlist) {
7544 smusherror = nvlist_smush(outnvl,
7545 zc->zc_nvlist_dst_size);
7546 }
7547 if (smusherror == 0)
7548 puterror = put_nvlist(zc, outnvl);
7549 }
7550
7551 if (puterror != 0)
7552 error = puterror;
7553
7554 nvlist_free(outnvl);
7555 } else {
7556 cookie = spl_fstrans_mark();
7557 error = vec->zvec_legacy_func(zc);
7558 spl_fstrans_unmark(cookie);
7559 }
7560
7561 out:
7562 nvlist_free(innvl);
7563 if (error == 0 && vec->zvec_allow_log) {
7564 char *s = tsd_get(zfs_allow_log_key);
7565 if (s != NULL)
7566 kmem_strfree(s);
7567 (void) tsd_set(zfs_allow_log_key, kmem_strdup(saved_poolname));
7568 }
7569 if (saved_poolname != NULL)
7570 kmem_free(saved_poolname, saved_poolname_len);
7571
7572 return (error);
7573 }
7574
7575 int
7576 zfs_kmod_init(void)
7577 {
7578 int error;
7579
7580 if ((error = zvol_init()) != 0)
7581 return (error);
7582
7583 spa_init(SPA_MODE_READ | SPA_MODE_WRITE);
7584 zfs_init();
7585
7586 zfs_ioctl_init();
7587
7588 mutex_init(&zfsdev_state_lock, NULL, MUTEX_DEFAULT, NULL);
7589 zfsdev_state_list = kmem_zalloc(sizeof (zfsdev_state_t), KM_SLEEP);
7590 zfsdev_state_list->zs_minor = -1;
7591
7592 if ((error = zfsdev_attach()) != 0)
7593 goto out;
7594
7595 tsd_create(&zfs_fsyncer_key, NULL);
7596 tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
7597 tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
7598
7599 return (0);
7600 out:
7601 zfs_fini();
7602 spa_fini();
7603 zvol_fini();
7604
7605 return (error);
7606 }
7607
7608 void
7609 zfs_kmod_fini(void)
7610 {
7611 zfsdev_state_t *zs, *zsprev = NULL;
7612
7613 zfsdev_detach();
7614
7615 mutex_destroy(&zfsdev_state_lock);
7616
7617 for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
7618 if (zsprev)
7619 kmem_free(zsprev, sizeof (zfsdev_state_t));
7620 zsprev = zs;
7621 }
7622 if (zsprev)
7623 kmem_free(zsprev, sizeof (zfsdev_state_t));
7624
7625 zfs_fini();
7626 spa_fini();
7627 zvol_fini();
7628
7629 tsd_destroy(&zfs_fsyncer_key);
7630 tsd_destroy(&rrw_tsd_key);
7631 tsd_destroy(&zfs_allow_log_key);
7632 }