]> git.proxmox.com Git - mirror_zfs.git/blob - module/zfs/zfs_ioctl.c
Teach zpool scrub to scrub only blocks in error log
[mirror_zfs.git] / module / zfs / zfs_ioctl.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Portions Copyright 2011 Martin Matuska
25 * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
26 * Copyright (c) 2012 Pawel Jakub Dawidek
27 * Copyright (c) 2014, 2016 Joyent, Inc. All rights reserved.
28 * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
29 * Copyright (c) 2014, Joyent, Inc. All rights reserved.
30 * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
31 * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
32 * Copyright (c) 2013 Steven Hartland. All rights reserved.
33 * Copyright (c) 2014 Integros [integros.com]
34 * Copyright 2016 Toomas Soome <tsoome@me.com>
35 * Copyright (c) 2016 Actifio, Inc. All rights reserved.
36 * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
37 * Copyright 2017 RackTop Systems.
38 * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
39 * Copyright (c) 2019 Datto Inc.
40 * Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved.
41 * Copyright (c) 2019, 2021, Klara Inc.
42 * Copyright (c) 2019, Allan Jude
43 */
44
45 /*
46 * ZFS ioctls.
47 *
48 * This file handles the ioctls to /dev/zfs, used for configuring ZFS storage
49 * pools and filesystems, e.g. with /sbin/zfs and /sbin/zpool.
50 *
51 * There are two ways that we handle ioctls: the legacy way where almost
52 * all of the logic is in the ioctl callback, and the new way where most
53 * of the marshalling is handled in the common entry point, zfsdev_ioctl().
54 *
55 * Non-legacy ioctls should be registered by calling
56 * zfs_ioctl_register() from zfs_ioctl_init(). The ioctl is invoked
57 * from userland by lzc_ioctl().
58 *
59 * The registration arguments are as follows:
60 *
61 * const char *name
62 * The name of the ioctl. This is used for history logging. If the
63 * ioctl returns successfully (the callback returns 0), and allow_log
64 * is true, then a history log entry will be recorded with the input &
65 * output nvlists. The log entry can be printed with "zpool history -i".
66 *
67 * zfs_ioc_t ioc
68 * The ioctl request number, which userland will pass to ioctl(2).
69 * We want newer versions of libzfs and libzfs_core to run against
70 * existing zfs kernel modules (i.e. a deferred reboot after an update).
71 * Therefore the ioctl numbers cannot change from release to release.
72 *
73 * zfs_secpolicy_func_t *secpolicy
74 * This function will be called before the zfs_ioc_func_t, to
75 * determine if this operation is permitted. It should return EPERM
76 * on failure, and 0 on success. Checks include determining if the
77 * dataset is visible in this zone, and if the user has either all
78 * zfs privileges in the zone (SYS_MOUNT), or has been granted permission
79 * to do this operation on this dataset with "zfs allow".
80 *
81 * zfs_ioc_namecheck_t namecheck
82 * This specifies what to expect in the zfs_cmd_t:zc_name -- a pool
83 * name, a dataset name, or nothing. If the name is not well-formed,
84 * the ioctl will fail and the callback will not be called.
85 * Therefore, the callback can assume that the name is well-formed
86 * (e.g. is null-terminated, doesn't have more than one '@' character,
87 * doesn't have invalid characters).
88 *
89 * zfs_ioc_poolcheck_t pool_check
90 * This specifies requirements on the pool state. If the pool does
91 * not meet them (is suspended or is readonly), the ioctl will fail
92 * and the callback will not be called. If any checks are specified
93 * (i.e. it is not POOL_CHECK_NONE), namecheck must not be NO_NAME.
94 * Multiple checks can be or-ed together (e.g. POOL_CHECK_SUSPENDED |
95 * POOL_CHECK_READONLY).
96 *
97 * zfs_ioc_key_t *nvl_keys
98 * The list of expected/allowable innvl input keys. This list is used
99 * to validate the nvlist input to the ioctl.
100 *
101 * boolean_t smush_outnvlist
102 * If smush_outnvlist is true, then the output is presumed to be a
103 * list of errors, and it will be "smushed" down to fit into the
104 * caller's buffer, by removing some entries and replacing them with a
105 * single "N_MORE_ERRORS" entry indicating how many were removed. See
106 * nvlist_smush() for details. If smush_outnvlist is false, and the
107 * outnvlist does not fit into the userland-provided buffer, then the
108 * ioctl will fail with ENOMEM.
109 *
110 * zfs_ioc_func_t *func
111 * The callback function that will perform the operation.
112 *
113 * The callback should return 0 on success, or an error number on
114 * failure. If the function fails, the userland ioctl will return -1,
115 * and errno will be set to the callback's return value. The callback
116 * will be called with the following arguments:
117 *
118 * const char *name
119 * The name of the pool or dataset to operate on, from
120 * zfs_cmd_t:zc_name. The 'namecheck' argument specifies the
121 * expected type (pool, dataset, or none).
122 *
123 * nvlist_t *innvl
124 * The input nvlist, deserialized from zfs_cmd_t:zc_nvlist_src. Or
125 * NULL if no input nvlist was provided. Changes to this nvlist are
126 * ignored. If the input nvlist could not be deserialized, the
127 * ioctl will fail and the callback will not be called.
128 *
129 * nvlist_t *outnvl
130 * The output nvlist, initially empty. The callback can fill it in,
131 * and it will be returned to userland by serializing it into
132 * zfs_cmd_t:zc_nvlist_dst. If it is non-empty, and serialization
133 * fails (e.g. because the caller didn't supply a large enough
134 * buffer), then the overall ioctl will fail. See the
135 * 'smush_nvlist' argument above for additional behaviors.
136 *
137 * There are two typical uses of the output nvlist:
138 * - To return state, e.g. property values. In this case,
139 * smush_outnvlist should be false. If the buffer was not large
140 * enough, the caller will reallocate a larger buffer and try
141 * the ioctl again.
142 *
143 * - To return multiple errors from an ioctl which makes on-disk
144 * changes. In this case, smush_outnvlist should be true.
145 * Ioctls which make on-disk modifications should generally not
146 * use the outnvl if they succeed, because the caller can not
147 * distinguish between the operation failing, and
148 * deserialization failing.
149 *
150 * IOCTL Interface Errors
151 *
152 * The following ioctl input errors can be returned:
153 * ZFS_ERR_IOC_CMD_UNAVAIL the ioctl number is not supported by kernel
154 * ZFS_ERR_IOC_ARG_UNAVAIL an input argument is not supported by kernel
155 * ZFS_ERR_IOC_ARG_REQUIRED a required input argument is missing
156 * ZFS_ERR_IOC_ARG_BADTYPE an input argument has an invalid type
157 */
158
159 #include <sys/types.h>
160 #include <sys/param.h>
161 #include <sys/errno.h>
162 #include <sys/uio_impl.h>
163 #include <sys/file.h>
164 #include <sys/kmem.h>
165 #include <sys/cmn_err.h>
166 #include <sys/stat.h>
167 #include <sys/zfs_ioctl.h>
168 #include <sys/zfs_quota.h>
169 #include <sys/zfs_vfsops.h>
170 #include <sys/zfs_znode.h>
171 #include <sys/zap.h>
172 #include <sys/spa.h>
173 #include <sys/spa_impl.h>
174 #include <sys/vdev.h>
175 #include <sys/vdev_impl.h>
176 #include <sys/dmu.h>
177 #include <sys/dsl_dir.h>
178 #include <sys/dsl_dataset.h>
179 #include <sys/dsl_prop.h>
180 #include <sys/dsl_deleg.h>
181 #include <sys/dmu_objset.h>
182 #include <sys/dmu_impl.h>
183 #include <sys/dmu_redact.h>
184 #include <sys/dmu_tx.h>
185 #include <sys/sunddi.h>
186 #include <sys/policy.h>
187 #include <sys/zone.h>
188 #include <sys/nvpair.h>
189 #include <sys/pathname.h>
190 #include <sys/fs/zfs.h>
191 #include <sys/zfs_ctldir.h>
192 #include <sys/zfs_dir.h>
193 #include <sys/zfs_onexit.h>
194 #include <sys/zvol.h>
195 #include <sys/dsl_scan.h>
196 #include <sys/fm/util.h>
197 #include <sys/dsl_crypt.h>
198 #include <sys/rrwlock.h>
199 #include <sys/zfs_file.h>
200
201 #include <sys/dmu_recv.h>
202 #include <sys/dmu_send.h>
203 #include <sys/dmu_recv.h>
204 #include <sys/dsl_destroy.h>
205 #include <sys/dsl_bookmark.h>
206 #include <sys/dsl_userhold.h>
207 #include <sys/zfeature.h>
208 #include <sys/zcp.h>
209 #include <sys/zio_checksum.h>
210 #include <sys/vdev_removal.h>
211 #include <sys/vdev_impl.h>
212 #include <sys/vdev_initialize.h>
213 #include <sys/vdev_trim.h>
214
215 #include "zfs_namecheck.h"
216 #include "zfs_prop.h"
217 #include "zfs_deleg.h"
218 #include "zfs_comutil.h"
219
220 #include <sys/lua/lua.h>
221 #include <sys/lua/lauxlib.h>
222 #include <sys/zfs_ioctl_impl.h>
223
224 kmutex_t zfsdev_state_lock;
225 static zfsdev_state_t zfsdev_state_listhead;
226
227 /*
228 * Limit maximum nvlist size. We don't want users passing in insane values
229 * for zc->zc_nvlist_src_size, since we will need to allocate that much memory.
230 * Defaults to 0=auto which is handled by platform code.
231 */
232 uint64_t zfs_max_nvlist_src_size = 0;
233
234 /*
235 * When logging the output nvlist of an ioctl in the on-disk history, limit
236 * the logged size to this many bytes. This must be less than DMU_MAX_ACCESS.
237 * This applies primarily to zfs_ioc_channel_program().
238 */
239 static uint64_t zfs_history_output_max = 1024 * 1024;
240
241 uint_t zfs_fsyncer_key;
242 uint_t zfs_allow_log_key;
243
244 /* DATA_TYPE_ANY is used when zkey_type can vary. */
245 #define DATA_TYPE_ANY DATA_TYPE_UNKNOWN
246
247 typedef struct zfs_ioc_vec {
248 zfs_ioc_legacy_func_t *zvec_legacy_func;
249 zfs_ioc_func_t *zvec_func;
250 zfs_secpolicy_func_t *zvec_secpolicy;
251 zfs_ioc_namecheck_t zvec_namecheck;
252 boolean_t zvec_allow_log;
253 zfs_ioc_poolcheck_t zvec_pool_check;
254 boolean_t zvec_smush_outnvlist;
255 const char *zvec_name;
256 const zfs_ioc_key_t *zvec_nvl_keys;
257 size_t zvec_nvl_key_count;
258 } zfs_ioc_vec_t;
259
260 /* This array is indexed by zfs_userquota_prop_t */
261 static const char *userquota_perms[] = {
262 ZFS_DELEG_PERM_USERUSED,
263 ZFS_DELEG_PERM_USERQUOTA,
264 ZFS_DELEG_PERM_GROUPUSED,
265 ZFS_DELEG_PERM_GROUPQUOTA,
266 ZFS_DELEG_PERM_USEROBJUSED,
267 ZFS_DELEG_PERM_USEROBJQUOTA,
268 ZFS_DELEG_PERM_GROUPOBJUSED,
269 ZFS_DELEG_PERM_GROUPOBJQUOTA,
270 ZFS_DELEG_PERM_PROJECTUSED,
271 ZFS_DELEG_PERM_PROJECTQUOTA,
272 ZFS_DELEG_PERM_PROJECTOBJUSED,
273 ZFS_DELEG_PERM_PROJECTOBJQUOTA,
274 };
275
276 static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
277 static int zfs_ioc_id_quota_upgrade(zfs_cmd_t *zc);
278 static int zfs_check_settable(const char *name, nvpair_t *property,
279 cred_t *cr);
280 static int zfs_check_clearable(const char *dataset, nvlist_t *props,
281 nvlist_t **errors);
282 static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
283 boolean_t *);
284 int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
285 static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
286
287 static void
288 history_str_free(char *buf)
289 {
290 kmem_free(buf, HIS_MAX_RECORD_LEN);
291 }
292
293 static char *
294 history_str_get(zfs_cmd_t *zc)
295 {
296 char *buf;
297
298 if (zc->zc_history == 0)
299 return (NULL);
300
301 buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
302 if (copyinstr((void *)(uintptr_t)zc->zc_history,
303 buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
304 history_str_free(buf);
305 return (NULL);
306 }
307
308 buf[HIS_MAX_RECORD_LEN -1] = '\0';
309
310 return (buf);
311 }
312
313 /*
314 * Return non-zero if the spa version is less than requested version.
315 */
316 static int
317 zfs_earlier_version(const char *name, int version)
318 {
319 spa_t *spa;
320
321 if (spa_open(name, &spa, FTAG) == 0) {
322 if (spa_version(spa) < version) {
323 spa_close(spa, FTAG);
324 return (1);
325 }
326 spa_close(spa, FTAG);
327 }
328 return (0);
329 }
330
331 /*
332 * Return TRUE if the ZPL version is less than requested version.
333 */
334 static boolean_t
335 zpl_earlier_version(const char *name, int version)
336 {
337 objset_t *os;
338 boolean_t rc = B_TRUE;
339
340 if (dmu_objset_hold(name, FTAG, &os) == 0) {
341 uint64_t zplversion;
342
343 if (dmu_objset_type(os) != DMU_OST_ZFS) {
344 dmu_objset_rele(os, FTAG);
345 return (B_TRUE);
346 }
347 /* XXX reading from non-owned objset */
348 if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
349 rc = zplversion < version;
350 dmu_objset_rele(os, FTAG);
351 }
352 return (rc);
353 }
354
355 static void
356 zfs_log_history(zfs_cmd_t *zc)
357 {
358 spa_t *spa;
359 char *buf;
360
361 if ((buf = history_str_get(zc)) == NULL)
362 return;
363
364 if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
365 if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
366 (void) spa_history_log(spa, buf);
367 spa_close(spa, FTAG);
368 }
369 history_str_free(buf);
370 }
371
372 /*
373 * Policy for top-level read operations (list pools). Requires no privileges,
374 * and can be used in the local zone, as there is no associated dataset.
375 */
376 static int
377 zfs_secpolicy_none(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
378 {
379 (void) zc, (void) innvl, (void) cr;
380 return (0);
381 }
382
383 /*
384 * Policy for dataset read operations (list children, get statistics). Requires
385 * no privileges, but must be visible in the local zone.
386 */
387 static int
388 zfs_secpolicy_read(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
389 {
390 (void) innvl, (void) cr;
391 if (INGLOBALZONE(curproc) ||
392 zone_dataset_visible(zc->zc_name, NULL))
393 return (0);
394
395 return (SET_ERROR(ENOENT));
396 }
397
398 static int
399 zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr)
400 {
401 int writable = 1;
402
403 /*
404 * The dataset must be visible by this zone -- check this first
405 * so they don't see EPERM on something they shouldn't know about.
406 */
407 if (!INGLOBALZONE(curproc) &&
408 !zone_dataset_visible(dataset, &writable))
409 return (SET_ERROR(ENOENT));
410
411 if (INGLOBALZONE(curproc)) {
412 /*
413 * If the fs is zoned, only root can access it from the
414 * global zone.
415 */
416 if (secpolicy_zfs(cr) && zoned)
417 return (SET_ERROR(EPERM));
418 } else {
419 /*
420 * If we are in a local zone, the 'zoned' property must be set.
421 */
422 if (!zoned)
423 return (SET_ERROR(EPERM));
424
425 /* must be writable by this zone */
426 if (!writable)
427 return (SET_ERROR(EPERM));
428 }
429 return (0);
430 }
431
432 static int
433 zfs_dozonecheck(const char *dataset, cred_t *cr)
434 {
435 uint64_t zoned;
436
437 if (dsl_prop_get_integer(dataset, zfs_prop_to_name(ZFS_PROP_ZONED),
438 &zoned, NULL))
439 return (SET_ERROR(ENOENT));
440
441 return (zfs_dozonecheck_impl(dataset, zoned, cr));
442 }
443
444 static int
445 zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
446 {
447 uint64_t zoned;
448
449 if (dsl_prop_get_int_ds(ds, zfs_prop_to_name(ZFS_PROP_ZONED), &zoned))
450 return (SET_ERROR(ENOENT));
451
452 return (zfs_dozonecheck_impl(dataset, zoned, cr));
453 }
454
455 static int
456 zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
457 const char *perm, cred_t *cr)
458 {
459 int error;
460
461 error = zfs_dozonecheck_ds(name, ds, cr);
462 if (error == 0) {
463 error = secpolicy_zfs(cr);
464 if (error != 0)
465 error = dsl_deleg_access_impl(ds, perm, cr);
466 }
467 return (error);
468 }
469
470 static int
471 zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
472 {
473 int error;
474 dsl_dataset_t *ds;
475 dsl_pool_t *dp;
476
477 /*
478 * First do a quick check for root in the global zone, which
479 * is allowed to do all write_perms. This ensures that zfs_ioc_*
480 * will get to handle nonexistent datasets.
481 */
482 if (INGLOBALZONE(curproc) && secpolicy_zfs(cr) == 0)
483 return (0);
484
485 error = dsl_pool_hold(name, FTAG, &dp);
486 if (error != 0)
487 return (error);
488
489 error = dsl_dataset_hold(dp, name, FTAG, &ds);
490 if (error != 0) {
491 dsl_pool_rele(dp, FTAG);
492 return (error);
493 }
494
495 error = zfs_secpolicy_write_perms_ds(name, ds, perm, cr);
496
497 dsl_dataset_rele(ds, FTAG);
498 dsl_pool_rele(dp, FTAG);
499 return (error);
500 }
501
502 /*
503 * Policy for setting the security label property.
504 *
505 * Returns 0 for success, non-zero for access and other errors.
506 */
507 static int
508 zfs_set_slabel_policy(const char *name, const char *strval, cred_t *cr)
509 {
510 #ifdef HAVE_MLSLABEL
511 char ds_hexsl[MAXNAMELEN];
512 bslabel_t ds_sl, new_sl;
513 boolean_t new_default = FALSE;
514 uint64_t zoned;
515 int needed_priv = -1;
516 int error;
517
518 /* First get the existing dataset label. */
519 error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
520 1, sizeof (ds_hexsl), &ds_hexsl, NULL);
521 if (error != 0)
522 return (SET_ERROR(EPERM));
523
524 if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
525 new_default = TRUE;
526
527 /* The label must be translatable */
528 if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
529 return (SET_ERROR(EINVAL));
530
531 /*
532 * In a non-global zone, disallow attempts to set a label that
533 * doesn't match that of the zone; otherwise no other checks
534 * are needed.
535 */
536 if (!INGLOBALZONE(curproc)) {
537 if (new_default || !blequal(&new_sl, CR_SL(CRED())))
538 return (SET_ERROR(EPERM));
539 return (0);
540 }
541
542 /*
543 * For global-zone datasets (i.e., those whose zoned property is
544 * "off", verify that the specified new label is valid for the
545 * global zone.
546 */
547 if (dsl_prop_get_integer(name,
548 zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
549 return (SET_ERROR(EPERM));
550 if (!zoned) {
551 if (zfs_check_global_label(name, strval) != 0)
552 return (SET_ERROR(EPERM));
553 }
554
555 /*
556 * If the existing dataset label is nondefault, check if the
557 * dataset is mounted (label cannot be changed while mounted).
558 * Get the zfsvfs_t; if there isn't one, then the dataset isn't
559 * mounted (or isn't a dataset, doesn't exist, ...).
560 */
561 if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
562 objset_t *os;
563 static const char *setsl_tag = "setsl_tag";
564
565 /*
566 * Try to own the dataset; abort if there is any error,
567 * (e.g., already mounted, in use, or other error).
568 */
569 error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE, B_TRUE,
570 setsl_tag, &os);
571 if (error != 0)
572 return (SET_ERROR(EPERM));
573
574 dmu_objset_disown(os, B_TRUE, setsl_tag);
575
576 if (new_default) {
577 needed_priv = PRIV_FILE_DOWNGRADE_SL;
578 goto out_check;
579 }
580
581 if (hexstr_to_label(strval, &new_sl) != 0)
582 return (SET_ERROR(EPERM));
583
584 if (blstrictdom(&ds_sl, &new_sl))
585 needed_priv = PRIV_FILE_DOWNGRADE_SL;
586 else if (blstrictdom(&new_sl, &ds_sl))
587 needed_priv = PRIV_FILE_UPGRADE_SL;
588 } else {
589 /* dataset currently has a default label */
590 if (!new_default)
591 needed_priv = PRIV_FILE_UPGRADE_SL;
592 }
593
594 out_check:
595 if (needed_priv != -1)
596 return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
597 return (0);
598 #else
599 return (SET_ERROR(ENOTSUP));
600 #endif /* HAVE_MLSLABEL */
601 }
602
603 static int
604 zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
605 cred_t *cr)
606 {
607 const char *strval;
608
609 /*
610 * Check permissions for special properties.
611 */
612 switch (prop) {
613 default:
614 break;
615 case ZFS_PROP_ZONED:
616 /*
617 * Disallow setting of 'zoned' from within a local zone.
618 */
619 if (!INGLOBALZONE(curproc))
620 return (SET_ERROR(EPERM));
621 break;
622
623 case ZFS_PROP_QUOTA:
624 case ZFS_PROP_FILESYSTEM_LIMIT:
625 case ZFS_PROP_SNAPSHOT_LIMIT:
626 if (!INGLOBALZONE(curproc)) {
627 uint64_t zoned;
628 char setpoint[ZFS_MAX_DATASET_NAME_LEN];
629 /*
630 * Unprivileged users are allowed to modify the
631 * limit on things *under* (ie. contained by)
632 * the thing they own.
633 */
634 if (dsl_prop_get_integer(dsname,
635 zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, setpoint))
636 return (SET_ERROR(EPERM));
637 if (!zoned || strlen(dsname) <= strlen(setpoint))
638 return (SET_ERROR(EPERM));
639 }
640 break;
641
642 case ZFS_PROP_MLSLABEL:
643 if (!is_system_labeled())
644 return (SET_ERROR(EPERM));
645
646 if (nvpair_value_string(propval, &strval) == 0) {
647 int err;
648
649 err = zfs_set_slabel_policy(dsname, strval, CRED());
650 if (err != 0)
651 return (err);
652 }
653 break;
654 }
655
656 return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
657 }
658
659 static int
660 zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
661 {
662 /*
663 * permission to set permissions will be evaluated later in
664 * dsl_deleg_can_allow()
665 */
666 (void) innvl;
667 return (zfs_dozonecheck(zc->zc_name, cr));
668 }
669
670 static int
671 zfs_secpolicy_rollback(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
672 {
673 (void) innvl;
674 return (zfs_secpolicy_write_perms(zc->zc_name,
675 ZFS_DELEG_PERM_ROLLBACK, cr));
676 }
677
678 static int
679 zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
680 {
681 (void) innvl;
682 dsl_pool_t *dp;
683 dsl_dataset_t *ds;
684 const char *cp;
685 int error;
686
687 /*
688 * Generate the current snapshot name from the given objsetid, then
689 * use that name for the secpolicy/zone checks.
690 */
691 cp = strchr(zc->zc_name, '@');
692 if (cp == NULL)
693 return (SET_ERROR(EINVAL));
694 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
695 if (error != 0)
696 return (error);
697
698 error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
699 if (error != 0) {
700 dsl_pool_rele(dp, FTAG);
701 return (error);
702 }
703
704 dsl_dataset_name(ds, zc->zc_name);
705
706 error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
707 ZFS_DELEG_PERM_SEND, cr);
708 dsl_dataset_rele(ds, FTAG);
709 dsl_pool_rele(dp, FTAG);
710
711 return (error);
712 }
713
714 static int
715 zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
716 {
717 (void) innvl;
718 return (zfs_secpolicy_write_perms(zc->zc_name,
719 ZFS_DELEG_PERM_SEND, cr));
720 }
721
722 static int
723 zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
724 {
725 (void) zc, (void) innvl, (void) cr;
726 return (SET_ERROR(ENOTSUP));
727 }
728
729 static int
730 zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
731 {
732 (void) zc, (void) innvl, (void) cr;
733 return (SET_ERROR(ENOTSUP));
734 }
735
736 static int
737 zfs_get_parent(const char *datasetname, char *parent, int parentsize)
738 {
739 char *cp;
740
741 /*
742 * Remove the @bla or /bla from the end of the name to get the parent.
743 */
744 (void) strlcpy(parent, datasetname, parentsize);
745 cp = strrchr(parent, '@');
746 if (cp != NULL) {
747 cp[0] = '\0';
748 } else {
749 cp = strrchr(parent, '/');
750 if (cp == NULL)
751 return (SET_ERROR(ENOENT));
752 cp[0] = '\0';
753 }
754
755 return (0);
756 }
757
758 int
759 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
760 {
761 int error;
762
763 if ((error = zfs_secpolicy_write_perms(name,
764 ZFS_DELEG_PERM_MOUNT, cr)) != 0)
765 return (error);
766
767 return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
768 }
769
770 static int
771 zfs_secpolicy_destroy(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
772 {
773 (void) innvl;
774 return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
775 }
776
777 /*
778 * Destroying snapshots with delegated permissions requires
779 * descendant mount and destroy permissions.
780 */
781 static int
782 zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
783 {
784 (void) zc;
785 nvlist_t *snaps;
786 nvpair_t *pair, *nextpair;
787 int error = 0;
788
789 snaps = fnvlist_lookup_nvlist(innvl, "snaps");
790
791 for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
792 pair = nextpair) {
793 nextpair = nvlist_next_nvpair(snaps, pair);
794 error = zfs_secpolicy_destroy_perms(nvpair_name(pair), cr);
795 if (error == ENOENT) {
796 /*
797 * Ignore any snapshots that don't exist (we consider
798 * them "already destroyed"). Remove the name from the
799 * nvl here in case the snapshot is created between
800 * now and when we try to destroy it (in which case
801 * we don't want to destroy it since we haven't
802 * checked for permission).
803 */
804 fnvlist_remove_nvpair(snaps, pair);
805 error = 0;
806 }
807 if (error != 0)
808 break;
809 }
810
811 return (error);
812 }
813
814 int
815 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
816 {
817 char parentname[ZFS_MAX_DATASET_NAME_LEN];
818 int error;
819
820 if ((error = zfs_secpolicy_write_perms(from,
821 ZFS_DELEG_PERM_RENAME, cr)) != 0)
822 return (error);
823
824 if ((error = zfs_secpolicy_write_perms(from,
825 ZFS_DELEG_PERM_MOUNT, cr)) != 0)
826 return (error);
827
828 if ((error = zfs_get_parent(to, parentname,
829 sizeof (parentname))) != 0)
830 return (error);
831
832 if ((error = zfs_secpolicy_write_perms(parentname,
833 ZFS_DELEG_PERM_CREATE, cr)) != 0)
834 return (error);
835
836 if ((error = zfs_secpolicy_write_perms(parentname,
837 ZFS_DELEG_PERM_MOUNT, cr)) != 0)
838 return (error);
839
840 return (error);
841 }
842
843 static int
844 zfs_secpolicy_rename(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
845 {
846 (void) innvl;
847 return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));
848 }
849
850 static int
851 zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
852 {
853 (void) innvl;
854 dsl_pool_t *dp;
855 dsl_dataset_t *clone;
856 int error;
857
858 error = zfs_secpolicy_write_perms(zc->zc_name,
859 ZFS_DELEG_PERM_PROMOTE, cr);
860 if (error != 0)
861 return (error);
862
863 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
864 if (error != 0)
865 return (error);
866
867 error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &clone);
868
869 if (error == 0) {
870 char parentname[ZFS_MAX_DATASET_NAME_LEN];
871 dsl_dataset_t *origin = NULL;
872 dsl_dir_t *dd;
873 dd = clone->ds_dir;
874
875 error = dsl_dataset_hold_obj(dd->dd_pool,
876 dsl_dir_phys(dd)->dd_origin_obj, FTAG, &origin);
877 if (error != 0) {
878 dsl_dataset_rele(clone, FTAG);
879 dsl_pool_rele(dp, FTAG);
880 return (error);
881 }
882
883 error = zfs_secpolicy_write_perms_ds(zc->zc_name, clone,
884 ZFS_DELEG_PERM_MOUNT, cr);
885
886 dsl_dataset_name(origin, parentname);
887 if (error == 0) {
888 error = zfs_secpolicy_write_perms_ds(parentname, origin,
889 ZFS_DELEG_PERM_PROMOTE, cr);
890 }
891 dsl_dataset_rele(clone, FTAG);
892 dsl_dataset_rele(origin, FTAG);
893 }
894 dsl_pool_rele(dp, FTAG);
895 return (error);
896 }
897
898 static int
899 zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
900 {
901 (void) innvl;
902 int error;
903
904 if ((error = zfs_secpolicy_write_perms(zc->zc_name,
905 ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
906 return (error);
907
908 if ((error = zfs_secpolicy_write_perms(zc->zc_name,
909 ZFS_DELEG_PERM_MOUNT, cr)) != 0)
910 return (error);
911
912 return (zfs_secpolicy_write_perms(zc->zc_name,
913 ZFS_DELEG_PERM_CREATE, cr));
914 }
915
916 int
917 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
918 {
919 return (zfs_secpolicy_write_perms(name,
920 ZFS_DELEG_PERM_SNAPSHOT, cr));
921 }
922
923 /*
924 * Check for permission to create each snapshot in the nvlist.
925 */
926 static int
927 zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
928 {
929 (void) zc;
930 nvlist_t *snaps;
931 int error = 0;
932 nvpair_t *pair;
933
934 snaps = fnvlist_lookup_nvlist(innvl, "snaps");
935
936 for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
937 pair = nvlist_next_nvpair(snaps, pair)) {
938 char *name = (char *)nvpair_name(pair);
939 char *atp = strchr(name, '@');
940
941 if (atp == NULL) {
942 error = SET_ERROR(EINVAL);
943 break;
944 }
945 *atp = '\0';
946 error = zfs_secpolicy_snapshot_perms(name, cr);
947 *atp = '@';
948 if (error != 0)
949 break;
950 }
951 return (error);
952 }
953
954 /*
955 * Check for permission to create each bookmark in the nvlist.
956 */
957 static int
958 zfs_secpolicy_bookmark(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
959 {
960 (void) zc;
961 int error = 0;
962
963 for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
964 pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
965 char *name = (char *)nvpair_name(pair);
966 char *hashp = strchr(name, '#');
967
968 if (hashp == NULL) {
969 error = SET_ERROR(EINVAL);
970 break;
971 }
972 *hashp = '\0';
973 error = zfs_secpolicy_write_perms(name,
974 ZFS_DELEG_PERM_BOOKMARK, cr);
975 *hashp = '#';
976 if (error != 0)
977 break;
978 }
979 return (error);
980 }
981
982 static int
983 zfs_secpolicy_destroy_bookmarks(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
984 {
985 (void) zc;
986 nvpair_t *pair, *nextpair;
987 int error = 0;
988
989 for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
990 pair = nextpair) {
991 char *name = (char *)nvpair_name(pair);
992 char *hashp = strchr(name, '#');
993 nextpair = nvlist_next_nvpair(innvl, pair);
994
995 if (hashp == NULL) {
996 error = SET_ERROR(EINVAL);
997 break;
998 }
999
1000 *hashp = '\0';
1001 error = zfs_secpolicy_write_perms(name,
1002 ZFS_DELEG_PERM_DESTROY, cr);
1003 *hashp = '#';
1004 if (error == ENOENT) {
1005 /*
1006 * Ignore any filesystems that don't exist (we consider
1007 * their bookmarks "already destroyed"). Remove
1008 * the name from the nvl here in case the filesystem
1009 * is created between now and when we try to destroy
1010 * the bookmark (in which case we don't want to
1011 * destroy it since we haven't checked for permission).
1012 */
1013 fnvlist_remove_nvpair(innvl, pair);
1014 error = 0;
1015 }
1016 if (error != 0)
1017 break;
1018 }
1019
1020 return (error);
1021 }
1022
1023 static int
1024 zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1025 {
1026 (void) zc, (void) innvl, (void) cr;
1027 /*
1028 * Even root must have a proper TSD so that we know what pool
1029 * to log to.
1030 */
1031 if (tsd_get(zfs_allow_log_key) == NULL)
1032 return (SET_ERROR(EPERM));
1033 return (0);
1034 }
1035
1036 static int
1037 zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1038 {
1039 char parentname[ZFS_MAX_DATASET_NAME_LEN];
1040 int error;
1041 const char *origin;
1042
1043 if ((error = zfs_get_parent(zc->zc_name, parentname,
1044 sizeof (parentname))) != 0)
1045 return (error);
1046
1047 if (nvlist_lookup_string(innvl, "origin", &origin) == 0 &&
1048 (error = zfs_secpolicy_write_perms(origin,
1049 ZFS_DELEG_PERM_CLONE, cr)) != 0)
1050 return (error);
1051
1052 if ((error = zfs_secpolicy_write_perms(parentname,
1053 ZFS_DELEG_PERM_CREATE, cr)) != 0)
1054 return (error);
1055
1056 return (zfs_secpolicy_write_perms(parentname,
1057 ZFS_DELEG_PERM_MOUNT, cr));
1058 }
1059
1060 /*
1061 * Policy for pool operations - create/destroy pools, add vdevs, etc. Requires
1062 * SYS_CONFIG privilege, which is not available in a local zone.
1063 */
1064 int
1065 zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1066 {
1067 (void) zc, (void) innvl;
1068
1069 if (secpolicy_sys_config(cr, B_FALSE) != 0)
1070 return (SET_ERROR(EPERM));
1071
1072 return (0);
1073 }
1074
1075 /*
1076 * Policy for object to name lookups.
1077 */
1078 static int
1079 zfs_secpolicy_diff(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1080 {
1081 (void) innvl;
1082 int error;
1083
1084 if (secpolicy_sys_config(cr, B_FALSE) == 0)
1085 return (0);
1086
1087 error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr);
1088 return (error);
1089 }
1090
1091 /*
1092 * Policy for fault injection. Requires all privileges.
1093 */
1094 static int
1095 zfs_secpolicy_inject(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1096 {
1097 (void) zc, (void) innvl;
1098 return (secpolicy_zinject(cr));
1099 }
1100
1101 static int
1102 zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1103 {
1104 (void) innvl;
1105 zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
1106
1107 if (prop == ZPROP_USERPROP) {
1108 if (!zfs_prop_user(zc->zc_value))
1109 return (SET_ERROR(EINVAL));
1110 return (zfs_secpolicy_write_perms(zc->zc_name,
1111 ZFS_DELEG_PERM_USERPROP, cr));
1112 } else {
1113 return (zfs_secpolicy_setprop(zc->zc_name, prop,
1114 NULL, cr));
1115 }
1116 }
1117
1118 static int
1119 zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1120 {
1121 int err = zfs_secpolicy_read(zc, innvl, cr);
1122 if (err)
1123 return (err);
1124
1125 if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1126 return (SET_ERROR(EINVAL));
1127
1128 if (zc->zc_value[0] == 0) {
1129 /*
1130 * They are asking about a posix uid/gid. If it's
1131 * themself, allow it.
1132 */
1133 if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
1134 zc->zc_objset_type == ZFS_PROP_USERQUOTA ||
1135 zc->zc_objset_type == ZFS_PROP_USEROBJUSED ||
1136 zc->zc_objset_type == ZFS_PROP_USEROBJQUOTA) {
1137 if (zc->zc_guid == crgetuid(cr))
1138 return (0);
1139 } else if (zc->zc_objset_type == ZFS_PROP_GROUPUSED ||
1140 zc->zc_objset_type == ZFS_PROP_GROUPQUOTA ||
1141 zc->zc_objset_type == ZFS_PROP_GROUPOBJUSED ||
1142 zc->zc_objset_type == ZFS_PROP_GROUPOBJQUOTA) {
1143 if (groupmember(zc->zc_guid, cr))
1144 return (0);
1145 }
1146 /* else is for project quota/used */
1147 }
1148
1149 return (zfs_secpolicy_write_perms(zc->zc_name,
1150 userquota_perms[zc->zc_objset_type], cr));
1151 }
1152
1153 static int
1154 zfs_secpolicy_userspace_many(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1155 {
1156 int err = zfs_secpolicy_read(zc, innvl, cr);
1157 if (err)
1158 return (err);
1159
1160 if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1161 return (SET_ERROR(EINVAL));
1162
1163 return (zfs_secpolicy_write_perms(zc->zc_name,
1164 userquota_perms[zc->zc_objset_type], cr));
1165 }
1166
1167 static int
1168 zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1169 {
1170 (void) innvl;
1171 return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
1172 NULL, cr));
1173 }
1174
1175 static int
1176 zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1177 {
1178 (void) zc;
1179 nvpair_t *pair;
1180 nvlist_t *holds;
1181 int error;
1182
1183 holds = fnvlist_lookup_nvlist(innvl, "holds");
1184
1185 for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
1186 pair = nvlist_next_nvpair(holds, pair)) {
1187 char fsname[ZFS_MAX_DATASET_NAME_LEN];
1188 error = dmu_fsname(nvpair_name(pair), fsname);
1189 if (error != 0)
1190 return (error);
1191 error = zfs_secpolicy_write_perms(fsname,
1192 ZFS_DELEG_PERM_HOLD, cr);
1193 if (error != 0)
1194 return (error);
1195 }
1196 return (0);
1197 }
1198
1199 static int
1200 zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1201 {
1202 (void) zc;
1203 nvpair_t *pair;
1204 int error;
1205
1206 for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1207 pair = nvlist_next_nvpair(innvl, pair)) {
1208 char fsname[ZFS_MAX_DATASET_NAME_LEN];
1209 error = dmu_fsname(nvpair_name(pair), fsname);
1210 if (error != 0)
1211 return (error);
1212 error = zfs_secpolicy_write_perms(fsname,
1213 ZFS_DELEG_PERM_RELEASE, cr);
1214 if (error != 0)
1215 return (error);
1216 }
1217 return (0);
1218 }
1219
1220 /*
1221 * Policy for allowing temporary snapshots to be taken or released
1222 */
1223 static int
1224 zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1225 {
1226 /*
1227 * A temporary snapshot is the same as a snapshot,
1228 * hold, destroy and release all rolled into one.
1229 * Delegated diff alone is sufficient that we allow this.
1230 */
1231 int error;
1232
1233 if (zfs_secpolicy_write_perms(zc->zc_name,
1234 ZFS_DELEG_PERM_DIFF, cr) == 0)
1235 return (0);
1236
1237 error = zfs_secpolicy_snapshot_perms(zc->zc_name, cr);
1238
1239 if (innvl != NULL) {
1240 if (error == 0)
1241 error = zfs_secpolicy_hold(zc, innvl, cr);
1242 if (error == 0)
1243 error = zfs_secpolicy_release(zc, innvl, cr);
1244 if (error == 0)
1245 error = zfs_secpolicy_destroy(zc, innvl, cr);
1246 }
1247 return (error);
1248 }
1249
1250 static int
1251 zfs_secpolicy_load_key(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1252 {
1253 return (zfs_secpolicy_write_perms(zc->zc_name,
1254 ZFS_DELEG_PERM_LOAD_KEY, cr));
1255 }
1256
1257 static int
1258 zfs_secpolicy_change_key(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1259 {
1260 return (zfs_secpolicy_write_perms(zc->zc_name,
1261 ZFS_DELEG_PERM_CHANGE_KEY, cr));
1262 }
1263
1264 /*
1265 * Returns the nvlist as specified by the user in the zfs_cmd_t.
1266 */
1267 static int
1268 get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
1269 {
1270 char *packed;
1271 int error;
1272 nvlist_t *list = NULL;
1273
1274 /*
1275 * Read in and unpack the user-supplied nvlist.
1276 */
1277 if (size == 0)
1278 return (SET_ERROR(EINVAL));
1279
1280 packed = vmem_alloc(size, KM_SLEEP);
1281
1282 if (ddi_copyin((void *)(uintptr_t)nvl, packed, size, iflag) != 0) {
1283 vmem_free(packed, size);
1284 return (SET_ERROR(EFAULT));
1285 }
1286
1287 if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
1288 vmem_free(packed, size);
1289 return (error);
1290 }
1291
1292 vmem_free(packed, size);
1293
1294 *nvp = list;
1295 return (0);
1296 }
1297
1298 /*
1299 * Reduce the size of this nvlist until it can be serialized in 'max' bytes.
1300 * Entries will be removed from the end of the nvlist, and one int32 entry
1301 * named "N_MORE_ERRORS" will be added indicating how many entries were
1302 * removed.
1303 */
1304 static int
1305 nvlist_smush(nvlist_t *errors, size_t max)
1306 {
1307 size_t size;
1308
1309 size = fnvlist_size(errors);
1310
1311 if (size > max) {
1312 nvpair_t *more_errors;
1313 int n = 0;
1314
1315 if (max < 1024)
1316 return (SET_ERROR(ENOMEM));
1317
1318 fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, 0);
1319 more_errors = nvlist_prev_nvpair(errors, NULL);
1320
1321 do {
1322 nvpair_t *pair = nvlist_prev_nvpair(errors,
1323 more_errors);
1324 fnvlist_remove_nvpair(errors, pair);
1325 n++;
1326 size = fnvlist_size(errors);
1327 } while (size > max);
1328
1329 fnvlist_remove_nvpair(errors, more_errors);
1330 fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, n);
1331 ASSERT3U(fnvlist_size(errors), <=, max);
1332 }
1333
1334 return (0);
1335 }
1336
1337 static int
1338 put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
1339 {
1340 char *packed = NULL;
1341 int error = 0;
1342 size_t size;
1343
1344 size = fnvlist_size(nvl);
1345
1346 if (size > zc->zc_nvlist_dst_size) {
1347 error = SET_ERROR(ENOMEM);
1348 } else {
1349 packed = fnvlist_pack(nvl, &size);
1350 if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
1351 size, zc->zc_iflags) != 0)
1352 error = SET_ERROR(EFAULT);
1353 fnvlist_pack_free(packed, size);
1354 }
1355
1356 zc->zc_nvlist_dst_size = size;
1357 zc->zc_nvlist_dst_filled = B_TRUE;
1358 return (error);
1359 }
1360
1361 int
1362 getzfsvfs_impl(objset_t *os, zfsvfs_t **zfvp)
1363 {
1364 int error = 0;
1365 if (dmu_objset_type(os) != DMU_OST_ZFS) {
1366 return (SET_ERROR(EINVAL));
1367 }
1368
1369 mutex_enter(&os->os_user_ptr_lock);
1370 *zfvp = dmu_objset_get_user(os);
1371 /* bump s_active only when non-zero to prevent umount race */
1372 error = zfs_vfs_ref(zfvp);
1373 mutex_exit(&os->os_user_ptr_lock);
1374 return (error);
1375 }
1376
1377 int
1378 getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
1379 {
1380 objset_t *os;
1381 int error;
1382
1383 error = dmu_objset_hold(dsname, FTAG, &os);
1384 if (error != 0)
1385 return (error);
1386
1387 error = getzfsvfs_impl(os, zfvp);
1388 dmu_objset_rele(os, FTAG);
1389 return (error);
1390 }
1391
1392 /*
1393 * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
1394 * case its z_sb will be NULL, and it will be opened as the owner.
1395 * If 'writer' is set, the z_teardown_lock will be held for RW_WRITER,
1396 * which prevents all inode ops from running.
1397 */
1398 static int
1399 zfsvfs_hold(const char *name, const void *tag, zfsvfs_t **zfvp,
1400 boolean_t writer)
1401 {
1402 int error = 0;
1403
1404 if (getzfsvfs(name, zfvp) != 0)
1405 error = zfsvfs_create(name, B_FALSE, zfvp);
1406 if (error == 0) {
1407 if (writer)
1408 ZFS_TEARDOWN_ENTER_WRITE(*zfvp, tag);
1409 else
1410 ZFS_TEARDOWN_ENTER_READ(*zfvp, tag);
1411 if ((*zfvp)->z_unmounted) {
1412 /*
1413 * XXX we could probably try again, since the unmounting
1414 * thread should be just about to disassociate the
1415 * objset from the zfsvfs.
1416 */
1417 ZFS_TEARDOWN_EXIT(*zfvp, tag);
1418 return (SET_ERROR(EBUSY));
1419 }
1420 }
1421 return (error);
1422 }
1423
1424 static void
1425 zfsvfs_rele(zfsvfs_t *zfsvfs, const void *tag)
1426 {
1427 ZFS_TEARDOWN_EXIT(zfsvfs, tag);
1428
1429 if (zfs_vfs_held(zfsvfs)) {
1430 zfs_vfs_rele(zfsvfs);
1431 } else {
1432 dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs);
1433 zfsvfs_free(zfsvfs);
1434 }
1435 }
1436
1437 static int
1438 zfs_ioc_pool_create(zfs_cmd_t *zc)
1439 {
1440 int error;
1441 nvlist_t *config, *props = NULL;
1442 nvlist_t *rootprops = NULL;
1443 nvlist_t *zplprops = NULL;
1444 dsl_crypto_params_t *dcp = NULL;
1445 const char *spa_name = zc->zc_name;
1446 boolean_t unload_wkey = B_TRUE;
1447
1448 if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1449 zc->zc_iflags, &config)))
1450 return (error);
1451
1452 if (zc->zc_nvlist_src_size != 0 && (error =
1453 get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1454 zc->zc_iflags, &props))) {
1455 nvlist_free(config);
1456 return (error);
1457 }
1458
1459 if (props) {
1460 nvlist_t *nvl = NULL;
1461 nvlist_t *hidden_args = NULL;
1462 uint64_t version = SPA_VERSION;
1463 const char *tname;
1464
1465 (void) nvlist_lookup_uint64(props,
1466 zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
1467 if (!SPA_VERSION_IS_SUPPORTED(version)) {
1468 error = SET_ERROR(EINVAL);
1469 goto pool_props_bad;
1470 }
1471 (void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
1472 if (nvl) {
1473 error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
1474 if (error != 0)
1475 goto pool_props_bad;
1476 (void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
1477 }
1478
1479 (void) nvlist_lookup_nvlist(props, ZPOOL_HIDDEN_ARGS,
1480 &hidden_args);
1481 error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE,
1482 rootprops, hidden_args, &dcp);
1483 if (error != 0)
1484 goto pool_props_bad;
1485 (void) nvlist_remove_all(props, ZPOOL_HIDDEN_ARGS);
1486
1487 VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1488 error = zfs_fill_zplprops_root(version, rootprops,
1489 zplprops, NULL);
1490 if (error != 0)
1491 goto pool_props_bad;
1492
1493 if (nvlist_lookup_string(props,
1494 zpool_prop_to_name(ZPOOL_PROP_TNAME), &tname) == 0)
1495 spa_name = tname;
1496 }
1497
1498 error = spa_create(zc->zc_name, config, props, zplprops, dcp);
1499
1500 /*
1501 * Set the remaining root properties
1502 */
1503 if (!error && (error = zfs_set_prop_nvlist(spa_name,
1504 ZPROP_SRC_LOCAL, rootprops, NULL)) != 0) {
1505 (void) spa_destroy(spa_name);
1506 unload_wkey = B_FALSE; /* spa_destroy() unloads wrapping keys */
1507 }
1508
1509 pool_props_bad:
1510 nvlist_free(rootprops);
1511 nvlist_free(zplprops);
1512 nvlist_free(config);
1513 nvlist_free(props);
1514 dsl_crypto_params_free(dcp, unload_wkey && !!error);
1515
1516 return (error);
1517 }
1518
1519 static int
1520 zfs_ioc_pool_destroy(zfs_cmd_t *zc)
1521 {
1522 int error;
1523 zfs_log_history(zc);
1524 error = spa_destroy(zc->zc_name);
1525
1526 return (error);
1527 }
1528
1529 static int
1530 zfs_ioc_pool_import(zfs_cmd_t *zc)
1531 {
1532 nvlist_t *config, *props = NULL;
1533 uint64_t guid;
1534 int error;
1535
1536 if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1537 zc->zc_iflags, &config)) != 0)
1538 return (error);
1539
1540 if (zc->zc_nvlist_src_size != 0 && (error =
1541 get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1542 zc->zc_iflags, &props))) {
1543 nvlist_free(config);
1544 return (error);
1545 }
1546
1547 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
1548 guid != zc->zc_guid)
1549 error = SET_ERROR(EINVAL);
1550 else
1551 error = spa_import(zc->zc_name, config, props, zc->zc_cookie);
1552
1553 if (zc->zc_nvlist_dst != 0) {
1554 int err;
1555
1556 if ((err = put_nvlist(zc, config)) != 0)
1557 error = err;
1558 }
1559
1560 nvlist_free(config);
1561 nvlist_free(props);
1562
1563 return (error);
1564 }
1565
1566 static int
1567 zfs_ioc_pool_export(zfs_cmd_t *zc)
1568 {
1569 int error;
1570 boolean_t force = (boolean_t)zc->zc_cookie;
1571 boolean_t hardforce = (boolean_t)zc->zc_guid;
1572
1573 zfs_log_history(zc);
1574 error = spa_export(zc->zc_name, NULL, force, hardforce);
1575
1576 return (error);
1577 }
1578
1579 static int
1580 zfs_ioc_pool_configs(zfs_cmd_t *zc)
1581 {
1582 nvlist_t *configs;
1583 int error;
1584
1585 if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
1586 return (SET_ERROR(EEXIST));
1587
1588 error = put_nvlist(zc, configs);
1589
1590 nvlist_free(configs);
1591
1592 return (error);
1593 }
1594
1595 /*
1596 * inputs:
1597 * zc_name name of the pool
1598 *
1599 * outputs:
1600 * zc_cookie real errno
1601 * zc_nvlist_dst config nvlist
1602 * zc_nvlist_dst_size size of config nvlist
1603 */
1604 static int
1605 zfs_ioc_pool_stats(zfs_cmd_t *zc)
1606 {
1607 nvlist_t *config;
1608 int error;
1609 int ret = 0;
1610
1611 error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
1612 sizeof (zc->zc_value));
1613
1614 if (config != NULL) {
1615 ret = put_nvlist(zc, config);
1616 nvlist_free(config);
1617
1618 /*
1619 * The config may be present even if 'error' is non-zero.
1620 * In this case we return success, and preserve the real errno
1621 * in 'zc_cookie'.
1622 */
1623 zc->zc_cookie = error;
1624 } else {
1625 ret = error;
1626 }
1627
1628 return (ret);
1629 }
1630
1631 /*
1632 * Try to import the given pool, returning pool stats as appropriate so that
1633 * user land knows which devices are available and overall pool health.
1634 */
1635 static int
1636 zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
1637 {
1638 nvlist_t *tryconfig, *config = NULL;
1639 int error;
1640
1641 if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1642 zc->zc_iflags, &tryconfig)) != 0)
1643 return (error);
1644
1645 config = spa_tryimport(tryconfig);
1646
1647 nvlist_free(tryconfig);
1648
1649 if (config == NULL)
1650 return (SET_ERROR(EINVAL));
1651
1652 error = put_nvlist(zc, config);
1653 nvlist_free(config);
1654
1655 return (error);
1656 }
1657
1658 /*
1659 * inputs:
1660 * zc_name name of the pool
1661 * zc_cookie scan func (pool_scan_func_t)
1662 * zc_flags scrub pause/resume flag (pool_scrub_cmd_t)
1663 */
1664 static int
1665 zfs_ioc_pool_scan(zfs_cmd_t *zc)
1666 {
1667 spa_t *spa;
1668 int error;
1669
1670 if (zc->zc_flags >= POOL_SCRUB_FLAGS_END)
1671 return (SET_ERROR(EINVAL));
1672
1673 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1674 return (error);
1675
1676 if (zc->zc_flags == POOL_SCRUB_PAUSE)
1677 error = spa_scrub_pause_resume(spa, POOL_SCRUB_PAUSE);
1678 else if (zc->zc_cookie == POOL_SCAN_NONE)
1679 error = spa_scan_stop(spa);
1680 else
1681 error = spa_scan(spa, zc->zc_cookie);
1682
1683 spa_close(spa, FTAG);
1684
1685 return (error);
1686 }
1687
1688 /*
1689 * inputs:
1690 * poolname name of the pool
1691 * scan_type scan func (pool_scan_func_t)
1692 * scan_command scrub pause/resume flag (pool_scrub_cmd_t)
1693 */
1694 static const zfs_ioc_key_t zfs_keys_pool_scrub[] = {
1695 {"scan_type", DATA_TYPE_UINT64, 0},
1696 {"scan_command", DATA_TYPE_UINT64, 0},
1697 };
1698
1699 static int
1700 zfs_ioc_pool_scrub(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
1701 {
1702 spa_t *spa;
1703 int error;
1704 uint64_t scan_type, scan_cmd;
1705
1706 if (nvlist_lookup_uint64(innvl, "scan_type", &scan_type) != 0)
1707 return (SET_ERROR(EINVAL));
1708 if (nvlist_lookup_uint64(innvl, "scan_command", &scan_cmd) != 0)
1709 return (SET_ERROR(EINVAL));
1710
1711 if (scan_cmd >= POOL_SCRUB_FLAGS_END)
1712 return (SET_ERROR(EINVAL));
1713
1714 if ((error = spa_open(poolname, &spa, FTAG)) != 0)
1715 return (error);
1716
1717 if (scan_cmd == POOL_SCRUB_PAUSE) {
1718 error = spa_scrub_pause_resume(spa, POOL_SCRUB_PAUSE);
1719 } else if (scan_type == POOL_SCAN_NONE) {
1720 error = spa_scan_stop(spa);
1721 } else {
1722 error = spa_scan(spa, scan_type);
1723 }
1724
1725 spa_close(spa, FTAG);
1726 return (error);
1727 }
1728
1729 static int
1730 zfs_ioc_pool_freeze(zfs_cmd_t *zc)
1731 {
1732 spa_t *spa;
1733 int error;
1734
1735 error = spa_open(zc->zc_name, &spa, FTAG);
1736 if (error == 0) {
1737 spa_freeze(spa);
1738 spa_close(spa, FTAG);
1739 }
1740 return (error);
1741 }
1742
1743 static int
1744 zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
1745 {
1746 spa_t *spa;
1747 int error;
1748
1749 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1750 return (error);
1751
1752 if (zc->zc_cookie < spa_version(spa) ||
1753 !SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) {
1754 spa_close(spa, FTAG);
1755 return (SET_ERROR(EINVAL));
1756 }
1757
1758 spa_upgrade(spa, zc->zc_cookie);
1759 spa_close(spa, FTAG);
1760
1761 return (error);
1762 }
1763
1764 static int
1765 zfs_ioc_pool_get_history(zfs_cmd_t *zc)
1766 {
1767 spa_t *spa;
1768 char *hist_buf;
1769 uint64_t size;
1770 int error;
1771
1772 if ((size = zc->zc_history_len) == 0)
1773 return (SET_ERROR(EINVAL));
1774
1775 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1776 return (error);
1777
1778 if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
1779 spa_close(spa, FTAG);
1780 return (SET_ERROR(ENOTSUP));
1781 }
1782
1783 hist_buf = vmem_alloc(size, KM_SLEEP);
1784 if ((error = spa_history_get(spa, &zc->zc_history_offset,
1785 &zc->zc_history_len, hist_buf)) == 0) {
1786 error = ddi_copyout(hist_buf,
1787 (void *)(uintptr_t)zc->zc_history,
1788 zc->zc_history_len, zc->zc_iflags);
1789 }
1790
1791 spa_close(spa, FTAG);
1792 vmem_free(hist_buf, size);
1793 return (error);
1794 }
1795
1796 static int
1797 zfs_ioc_pool_reguid(zfs_cmd_t *zc)
1798 {
1799 spa_t *spa;
1800 int error;
1801
1802 error = spa_open(zc->zc_name, &spa, FTAG);
1803 if (error == 0) {
1804 error = spa_change_guid(spa);
1805 spa_close(spa, FTAG);
1806 }
1807 return (error);
1808 }
1809
1810 static int
1811 zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
1812 {
1813 return (dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value));
1814 }
1815
1816 /*
1817 * inputs:
1818 * zc_name name of filesystem
1819 * zc_obj object to find
1820 *
1821 * outputs:
1822 * zc_value name of object
1823 */
1824 static int
1825 zfs_ioc_obj_to_path(zfs_cmd_t *zc)
1826 {
1827 objset_t *os;
1828 int error;
1829
1830 /* XXX reading from objset not owned */
1831 if ((error = dmu_objset_hold_flags(zc->zc_name, B_TRUE,
1832 FTAG, &os)) != 0)
1833 return (error);
1834 if (dmu_objset_type(os) != DMU_OST_ZFS) {
1835 dmu_objset_rele_flags(os, B_TRUE, FTAG);
1836 return (SET_ERROR(EINVAL));
1837 }
1838 error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
1839 sizeof (zc->zc_value));
1840 dmu_objset_rele_flags(os, B_TRUE, FTAG);
1841
1842 return (error);
1843 }
1844
1845 /*
1846 * inputs:
1847 * zc_name name of filesystem
1848 * zc_obj object to find
1849 *
1850 * outputs:
1851 * zc_stat stats on object
1852 * zc_value path to object
1853 */
1854 static int
1855 zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
1856 {
1857 objset_t *os;
1858 int error;
1859
1860 /* XXX reading from objset not owned */
1861 if ((error = dmu_objset_hold_flags(zc->zc_name, B_TRUE,
1862 FTAG, &os)) != 0)
1863 return (error);
1864 if (dmu_objset_type(os) != DMU_OST_ZFS) {
1865 dmu_objset_rele_flags(os, B_TRUE, FTAG);
1866 return (SET_ERROR(EINVAL));
1867 }
1868 error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
1869 sizeof (zc->zc_value));
1870 dmu_objset_rele_flags(os, B_TRUE, FTAG);
1871
1872 return (error);
1873 }
1874
1875 static int
1876 zfs_ioc_vdev_add(zfs_cmd_t *zc)
1877 {
1878 spa_t *spa;
1879 int error;
1880 nvlist_t *config;
1881
1882 error = spa_open(zc->zc_name, &spa, FTAG);
1883 if (error != 0)
1884 return (error);
1885
1886 error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1887 zc->zc_iflags, &config);
1888 if (error == 0) {
1889 error = spa_vdev_add(spa, config);
1890 nvlist_free(config);
1891 }
1892 spa_close(spa, FTAG);
1893 return (error);
1894 }
1895
1896 /*
1897 * inputs:
1898 * zc_name name of the pool
1899 * zc_guid guid of vdev to remove
1900 * zc_cookie cancel removal
1901 */
1902 static int
1903 zfs_ioc_vdev_remove(zfs_cmd_t *zc)
1904 {
1905 spa_t *spa;
1906 int error;
1907
1908 error = spa_open(zc->zc_name, &spa, FTAG);
1909 if (error != 0)
1910 return (error);
1911 if (zc->zc_cookie != 0) {
1912 error = spa_vdev_remove_cancel(spa);
1913 } else {
1914 error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
1915 }
1916 spa_close(spa, FTAG);
1917 return (error);
1918 }
1919
1920 static int
1921 zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
1922 {
1923 spa_t *spa;
1924 int error;
1925 vdev_state_t newstate = VDEV_STATE_UNKNOWN;
1926
1927 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1928 return (error);
1929 switch (zc->zc_cookie) {
1930 case VDEV_STATE_ONLINE:
1931 error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
1932 break;
1933
1934 case VDEV_STATE_OFFLINE:
1935 error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
1936 break;
1937
1938 case VDEV_STATE_FAULTED:
1939 if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1940 zc->zc_obj != VDEV_AUX_EXTERNAL &&
1941 zc->zc_obj != VDEV_AUX_EXTERNAL_PERSIST)
1942 zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1943
1944 error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
1945 break;
1946
1947 case VDEV_STATE_DEGRADED:
1948 if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1949 zc->zc_obj != VDEV_AUX_EXTERNAL)
1950 zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1951
1952 error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
1953 break;
1954
1955 case VDEV_STATE_REMOVED:
1956 error = vdev_remove_wanted(spa, zc->zc_guid);
1957 break;
1958
1959 default:
1960 error = SET_ERROR(EINVAL);
1961 }
1962 zc->zc_cookie = newstate;
1963 spa_close(spa, FTAG);
1964 return (error);
1965 }
1966
1967 static int
1968 zfs_ioc_vdev_attach(zfs_cmd_t *zc)
1969 {
1970 spa_t *spa;
1971 nvlist_t *config;
1972 int replacing = zc->zc_cookie;
1973 int rebuild = zc->zc_simple;
1974 int error;
1975
1976 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1977 return (error);
1978
1979 if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1980 zc->zc_iflags, &config)) == 0) {
1981 error = spa_vdev_attach(spa, zc->zc_guid, config, replacing,
1982 rebuild);
1983 nvlist_free(config);
1984 }
1985
1986 spa_close(spa, FTAG);
1987 return (error);
1988 }
1989
1990 static int
1991 zfs_ioc_vdev_detach(zfs_cmd_t *zc)
1992 {
1993 spa_t *spa;
1994 int error;
1995
1996 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1997 return (error);
1998
1999 error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
2000
2001 spa_close(spa, FTAG);
2002 return (error);
2003 }
2004
2005 static int
2006 zfs_ioc_vdev_split(zfs_cmd_t *zc)
2007 {
2008 spa_t *spa;
2009 nvlist_t *config, *props = NULL;
2010 int error;
2011 boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
2012
2013 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2014 return (error);
2015
2016 if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
2017 zc->zc_iflags, &config))) {
2018 spa_close(spa, FTAG);
2019 return (error);
2020 }
2021
2022 if (zc->zc_nvlist_src_size != 0 && (error =
2023 get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2024 zc->zc_iflags, &props))) {
2025 spa_close(spa, FTAG);
2026 nvlist_free(config);
2027 return (error);
2028 }
2029
2030 error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
2031
2032 spa_close(spa, FTAG);
2033
2034 nvlist_free(config);
2035 nvlist_free(props);
2036
2037 return (error);
2038 }
2039
2040 static int
2041 zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
2042 {
2043 spa_t *spa;
2044 const char *path = zc->zc_value;
2045 uint64_t guid = zc->zc_guid;
2046 int error;
2047
2048 error = spa_open(zc->zc_name, &spa, FTAG);
2049 if (error != 0)
2050 return (error);
2051
2052 error = spa_vdev_setpath(spa, guid, path);
2053 spa_close(spa, FTAG);
2054 return (error);
2055 }
2056
2057 static int
2058 zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
2059 {
2060 spa_t *spa;
2061 const char *fru = zc->zc_value;
2062 uint64_t guid = zc->zc_guid;
2063 int error;
2064
2065 error = spa_open(zc->zc_name, &spa, FTAG);
2066 if (error != 0)
2067 return (error);
2068
2069 error = spa_vdev_setfru(spa, guid, fru);
2070 spa_close(spa, FTAG);
2071 return (error);
2072 }
2073
2074 static int
2075 zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
2076 {
2077 int error = 0;
2078 nvlist_t *nv;
2079
2080 dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2081
2082 if (!zc->zc_simple && zc->zc_nvlist_dst != 0 &&
2083 (error = dsl_prop_get_all(os, &nv)) == 0) {
2084 dmu_objset_stats(os, nv);
2085 /*
2086 * NB: zvol_get_stats() will read the objset contents,
2087 * which we aren't supposed to do with a
2088 * DS_MODE_USER hold, because it could be
2089 * inconsistent. So this is a bit of a workaround...
2090 * XXX reading without owning
2091 */
2092 if (!zc->zc_objset_stats.dds_inconsistent &&
2093 dmu_objset_type(os) == DMU_OST_ZVOL) {
2094 error = zvol_get_stats(os, nv);
2095 if (error == EIO) {
2096 nvlist_free(nv);
2097 return (error);
2098 }
2099 VERIFY0(error);
2100 }
2101 if (error == 0)
2102 error = put_nvlist(zc, nv);
2103 nvlist_free(nv);
2104 }
2105
2106 return (error);
2107 }
2108
2109 /*
2110 * inputs:
2111 * zc_name name of filesystem
2112 * zc_nvlist_dst_size size of buffer for property nvlist
2113 *
2114 * outputs:
2115 * zc_objset_stats stats
2116 * zc_nvlist_dst property nvlist
2117 * zc_nvlist_dst_size size of property nvlist
2118 */
2119 static int
2120 zfs_ioc_objset_stats(zfs_cmd_t *zc)
2121 {
2122 objset_t *os;
2123 int error;
2124
2125 error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2126 if (error == 0) {
2127 error = zfs_ioc_objset_stats_impl(zc, os);
2128 dmu_objset_rele(os, FTAG);
2129 }
2130
2131 return (error);
2132 }
2133
2134 /*
2135 * inputs:
2136 * zc_name name of filesystem
2137 * zc_nvlist_dst_size size of buffer for property nvlist
2138 *
2139 * outputs:
2140 * zc_nvlist_dst received property nvlist
2141 * zc_nvlist_dst_size size of received property nvlist
2142 *
2143 * Gets received properties (distinct from local properties on or after
2144 * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
2145 * local property values.
2146 */
2147 static int
2148 zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
2149 {
2150 int error = 0;
2151 nvlist_t *nv;
2152
2153 /*
2154 * Without this check, we would return local property values if the
2155 * caller has not already received properties on or after
2156 * SPA_VERSION_RECVD_PROPS.
2157 */
2158 if (!dsl_prop_get_hasrecvd(zc->zc_name))
2159 return (SET_ERROR(ENOTSUP));
2160
2161 if (zc->zc_nvlist_dst != 0 &&
2162 (error = dsl_prop_get_received(zc->zc_name, &nv)) == 0) {
2163 error = put_nvlist(zc, nv);
2164 nvlist_free(nv);
2165 }
2166
2167 return (error);
2168 }
2169
2170 static int
2171 nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
2172 {
2173 uint64_t value;
2174 int error;
2175
2176 /*
2177 * zfs_get_zplprop() will either find a value or give us
2178 * the default value (if there is one).
2179 */
2180 if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
2181 return (error);
2182 VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
2183 return (0);
2184 }
2185
2186 /*
2187 * inputs:
2188 * zc_name name of filesystem
2189 * zc_nvlist_dst_size size of buffer for zpl property nvlist
2190 *
2191 * outputs:
2192 * zc_nvlist_dst zpl property nvlist
2193 * zc_nvlist_dst_size size of zpl property nvlist
2194 */
2195 static int
2196 zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
2197 {
2198 objset_t *os;
2199 int err;
2200
2201 /* XXX reading without owning */
2202 if ((err = dmu_objset_hold(zc->zc_name, FTAG, &os)))
2203 return (err);
2204
2205 dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2206
2207 /*
2208 * NB: nvl_add_zplprop() will read the objset contents,
2209 * which we aren't supposed to do with a DS_MODE_USER
2210 * hold, because it could be inconsistent.
2211 */
2212 if (zc->zc_nvlist_dst != 0 &&
2213 !zc->zc_objset_stats.dds_inconsistent &&
2214 dmu_objset_type(os) == DMU_OST_ZFS) {
2215 nvlist_t *nv;
2216
2217 VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2218 if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
2219 (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
2220 (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
2221 (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
2222 err = put_nvlist(zc, nv);
2223 nvlist_free(nv);
2224 } else {
2225 err = SET_ERROR(ENOENT);
2226 }
2227 dmu_objset_rele(os, FTAG);
2228 return (err);
2229 }
2230
2231 /*
2232 * inputs:
2233 * zc_name name of filesystem
2234 * zc_cookie zap cursor
2235 * zc_nvlist_dst_size size of buffer for property nvlist
2236 *
2237 * outputs:
2238 * zc_name name of next filesystem
2239 * zc_cookie zap cursor
2240 * zc_objset_stats stats
2241 * zc_nvlist_dst property nvlist
2242 * zc_nvlist_dst_size size of property nvlist
2243 */
2244 static int
2245 zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
2246 {
2247 objset_t *os;
2248 int error;
2249 char *p;
2250 size_t orig_len = strlen(zc->zc_name);
2251
2252 top:
2253 if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os))) {
2254 if (error == ENOENT)
2255 error = SET_ERROR(ESRCH);
2256 return (error);
2257 }
2258
2259 p = strrchr(zc->zc_name, '/');
2260 if (p == NULL || p[1] != '\0')
2261 (void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
2262 p = zc->zc_name + strlen(zc->zc_name);
2263
2264 do {
2265 error = dmu_dir_list_next(os,
2266 sizeof (zc->zc_name) - (p - zc->zc_name), p,
2267 NULL, &zc->zc_cookie);
2268 if (error == ENOENT)
2269 error = SET_ERROR(ESRCH);
2270 } while (error == 0 && zfs_dataset_name_hidden(zc->zc_name));
2271 dmu_objset_rele(os, FTAG);
2272
2273 /*
2274 * If it's an internal dataset (ie. with a '$' in its name),
2275 * don't try to get stats for it, otherwise we'll return ENOENT.
2276 */
2277 if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
2278 error = zfs_ioc_objset_stats(zc); /* fill in the stats */
2279 if (error == ENOENT) {
2280 /* We lost a race with destroy, get the next one. */
2281 zc->zc_name[orig_len] = '\0';
2282 goto top;
2283 }
2284 }
2285 return (error);
2286 }
2287
2288 /*
2289 * inputs:
2290 * zc_name name of filesystem
2291 * zc_cookie zap cursor
2292 * zc_nvlist_src iteration range nvlist
2293 * zc_nvlist_src_size size of iteration range nvlist
2294 *
2295 * outputs:
2296 * zc_name name of next snapshot
2297 * zc_objset_stats stats
2298 * zc_nvlist_dst property nvlist
2299 * zc_nvlist_dst_size size of property nvlist
2300 */
2301 static int
2302 zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
2303 {
2304 int error;
2305 objset_t *os, *ossnap;
2306 dsl_dataset_t *ds;
2307 uint64_t min_txg = 0, max_txg = 0;
2308
2309 if (zc->zc_nvlist_src_size != 0) {
2310 nvlist_t *props = NULL;
2311 error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2312 zc->zc_iflags, &props);
2313 if (error != 0)
2314 return (error);
2315 (void) nvlist_lookup_uint64(props, SNAP_ITER_MIN_TXG,
2316 &min_txg);
2317 (void) nvlist_lookup_uint64(props, SNAP_ITER_MAX_TXG,
2318 &max_txg);
2319 nvlist_free(props);
2320 }
2321
2322 error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2323 if (error != 0) {
2324 return (error == ENOENT ? SET_ERROR(ESRCH) : error);
2325 }
2326
2327 /*
2328 * A dataset name of maximum length cannot have any snapshots,
2329 * so exit immediately.
2330 */
2331 if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >=
2332 ZFS_MAX_DATASET_NAME_LEN) {
2333 dmu_objset_rele(os, FTAG);
2334 return (SET_ERROR(ESRCH));
2335 }
2336
2337 while (error == 0) {
2338 if (issig(JUSTLOOKING) && issig(FORREAL)) {
2339 error = SET_ERROR(EINTR);
2340 break;
2341 }
2342
2343 error = dmu_snapshot_list_next(os,
2344 sizeof (zc->zc_name) - strlen(zc->zc_name),
2345 zc->zc_name + strlen(zc->zc_name), &zc->zc_obj,
2346 &zc->zc_cookie, NULL);
2347 if (error == ENOENT) {
2348 error = SET_ERROR(ESRCH);
2349 break;
2350 } else if (error != 0) {
2351 break;
2352 }
2353
2354 error = dsl_dataset_hold_obj(dmu_objset_pool(os), zc->zc_obj,
2355 FTAG, &ds);
2356 if (error != 0)
2357 break;
2358
2359 if ((min_txg != 0 && dsl_get_creationtxg(ds) < min_txg) ||
2360 (max_txg != 0 && dsl_get_creationtxg(ds) > max_txg)) {
2361 dsl_dataset_rele(ds, FTAG);
2362 /* undo snapshot name append */
2363 *(strchr(zc->zc_name, '@') + 1) = '\0';
2364 /* skip snapshot */
2365 continue;
2366 }
2367
2368 if (zc->zc_simple) {
2369 dsl_dataset_fast_stat(ds, &zc->zc_objset_stats);
2370 dsl_dataset_rele(ds, FTAG);
2371 break;
2372 }
2373
2374 if ((error = dmu_objset_from_ds(ds, &ossnap)) != 0) {
2375 dsl_dataset_rele(ds, FTAG);
2376 break;
2377 }
2378 if ((error = zfs_ioc_objset_stats_impl(zc, ossnap)) != 0) {
2379 dsl_dataset_rele(ds, FTAG);
2380 break;
2381 }
2382 dsl_dataset_rele(ds, FTAG);
2383 break;
2384 }
2385
2386 dmu_objset_rele(os, FTAG);
2387 /* if we failed, undo the @ that we tacked on to zc_name */
2388 if (error != 0)
2389 *strchr(zc->zc_name, '@') = '\0';
2390 return (error);
2391 }
2392
2393 static int
2394 zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
2395 {
2396 const char *propname = nvpair_name(pair);
2397 uint64_t *valary;
2398 unsigned int vallen;
2399 const char *dash, *domain;
2400 zfs_userquota_prop_t type;
2401 uint64_t rid;
2402 uint64_t quota;
2403 zfsvfs_t *zfsvfs;
2404 int err;
2405
2406 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2407 nvlist_t *attrs;
2408 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2409 if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2410 &pair) != 0)
2411 return (SET_ERROR(EINVAL));
2412 }
2413
2414 /*
2415 * A correctly constructed propname is encoded as
2416 * userquota@<rid>-<domain>.
2417 */
2418 if ((dash = strchr(propname, '-')) == NULL ||
2419 nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
2420 vallen != 3)
2421 return (SET_ERROR(EINVAL));
2422
2423 domain = dash + 1;
2424 type = valary[0];
2425 rid = valary[1];
2426 quota = valary[2];
2427
2428 err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE);
2429 if (err == 0) {
2430 err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
2431 zfsvfs_rele(zfsvfs, FTAG);
2432 }
2433
2434 return (err);
2435 }
2436
2437 /*
2438 * If the named property is one that has a special function to set its value,
2439 * return 0 on success and a positive error code on failure; otherwise if it is
2440 * not one of the special properties handled by this function, return -1.
2441 *
2442 * XXX: It would be better for callers of the property interface if we handled
2443 * these special cases in dsl_prop.c (in the dsl layer).
2444 */
2445 static int
2446 zfs_prop_set_special(const char *dsname, zprop_source_t source,
2447 nvpair_t *pair)
2448 {
2449 const char *propname = nvpair_name(pair);
2450 zfs_prop_t prop = zfs_name_to_prop(propname);
2451 uint64_t intval = 0;
2452 const char *strval = NULL;
2453 int err = -1;
2454
2455 if (prop == ZPROP_USERPROP) {
2456 if (zfs_prop_userquota(propname))
2457 return (zfs_prop_set_userquota(dsname, pair));
2458 return (-1);
2459 }
2460
2461 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2462 nvlist_t *attrs;
2463 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2464 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2465 &pair) == 0);
2466 }
2467
2468 /* all special properties are numeric except for keylocation */
2469 if (zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
2470 strval = fnvpair_value_string(pair);
2471 } else {
2472 intval = fnvpair_value_uint64(pair);
2473 }
2474
2475 switch (prop) {
2476 case ZFS_PROP_QUOTA:
2477 err = dsl_dir_set_quota(dsname, source, intval);
2478 break;
2479 case ZFS_PROP_REFQUOTA:
2480 err = dsl_dataset_set_refquota(dsname, source, intval);
2481 break;
2482 case ZFS_PROP_FILESYSTEM_LIMIT:
2483 case ZFS_PROP_SNAPSHOT_LIMIT:
2484 if (intval == UINT64_MAX) {
2485 /* clearing the limit, just do it */
2486 err = 0;
2487 } else {
2488 err = dsl_dir_activate_fs_ss_limit(dsname);
2489 }
2490 /*
2491 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2492 * default path to set the value in the nvlist.
2493 */
2494 if (err == 0)
2495 err = -1;
2496 break;
2497 case ZFS_PROP_KEYLOCATION:
2498 err = dsl_crypto_can_set_keylocation(dsname, strval);
2499
2500 /*
2501 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2502 * default path to set the value in the nvlist.
2503 */
2504 if (err == 0)
2505 err = -1;
2506 break;
2507 case ZFS_PROP_RESERVATION:
2508 err = dsl_dir_set_reservation(dsname, source, intval);
2509 break;
2510 case ZFS_PROP_REFRESERVATION:
2511 err = dsl_dataset_set_refreservation(dsname, source, intval);
2512 break;
2513 case ZFS_PROP_COMPRESSION:
2514 err = dsl_dataset_set_compression(dsname, source, intval);
2515 /*
2516 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2517 * default path to set the value in the nvlist.
2518 */
2519 if (err == 0)
2520 err = -1;
2521 break;
2522 case ZFS_PROP_VOLSIZE:
2523 err = zvol_set_volsize(dsname, intval);
2524 break;
2525 case ZFS_PROP_SNAPDEV:
2526 err = zvol_set_snapdev(dsname, source, intval);
2527 break;
2528 case ZFS_PROP_VOLMODE:
2529 err = zvol_set_volmode(dsname, source, intval);
2530 break;
2531 case ZFS_PROP_VERSION:
2532 {
2533 zfsvfs_t *zfsvfs;
2534
2535 if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
2536 break;
2537
2538 err = zfs_set_version(zfsvfs, intval);
2539 zfsvfs_rele(zfsvfs, FTAG);
2540
2541 if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
2542 zfs_cmd_t *zc;
2543
2544 zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
2545 (void) strlcpy(zc->zc_name, dsname,
2546 sizeof (zc->zc_name));
2547 (void) zfs_ioc_userspace_upgrade(zc);
2548 (void) zfs_ioc_id_quota_upgrade(zc);
2549 kmem_free(zc, sizeof (zfs_cmd_t));
2550 }
2551 break;
2552 }
2553 default:
2554 err = -1;
2555 }
2556
2557 return (err);
2558 }
2559
2560 static boolean_t
2561 zfs_is_namespace_prop(zfs_prop_t prop)
2562 {
2563 switch (prop) {
2564
2565 case ZFS_PROP_ATIME:
2566 case ZFS_PROP_RELATIME:
2567 case ZFS_PROP_DEVICES:
2568 case ZFS_PROP_EXEC:
2569 case ZFS_PROP_SETUID:
2570 case ZFS_PROP_READONLY:
2571 case ZFS_PROP_XATTR:
2572 case ZFS_PROP_NBMAND:
2573 return (B_TRUE);
2574
2575 default:
2576 return (B_FALSE);
2577 }
2578 }
2579
2580 /*
2581 * This function is best effort. If it fails to set any of the given properties,
2582 * it continues to set as many as it can and returns the last error
2583 * encountered. If the caller provides a non-NULL errlist, it will be filled in
2584 * with the list of names of all the properties that failed along with the
2585 * corresponding error numbers.
2586 *
2587 * If every property is set successfully, zero is returned and errlist is not
2588 * modified.
2589 */
2590 int
2591 zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
2592 nvlist_t *errlist)
2593 {
2594 nvpair_t *pair;
2595 nvpair_t *propval;
2596 int rv = 0;
2597 int err;
2598 uint64_t intval;
2599 const char *strval;
2600 boolean_t should_update_mount_cache = B_FALSE;
2601
2602 nvlist_t *genericnvl = fnvlist_alloc();
2603 nvlist_t *retrynvl = fnvlist_alloc();
2604 retry:
2605 pair = NULL;
2606 while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2607 const char *propname = nvpair_name(pair);
2608 zfs_prop_t prop = zfs_name_to_prop(propname);
2609 err = 0;
2610
2611 /* decode the property value */
2612 propval = pair;
2613 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2614 nvlist_t *attrs;
2615 attrs = fnvpair_value_nvlist(pair);
2616 if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2617 &propval) != 0)
2618 err = SET_ERROR(EINVAL);
2619 }
2620
2621 /* Validate value type */
2622 if (err == 0 && source == ZPROP_SRC_INHERITED) {
2623 /* inherited properties are expected to be booleans */
2624 if (nvpair_type(propval) != DATA_TYPE_BOOLEAN)
2625 err = SET_ERROR(EINVAL);
2626 } else if (err == 0 && prop == ZPROP_USERPROP) {
2627 if (zfs_prop_user(propname)) {
2628 if (nvpair_type(propval) != DATA_TYPE_STRING)
2629 err = SET_ERROR(EINVAL);
2630 } else if (zfs_prop_userquota(propname)) {
2631 if (nvpair_type(propval) !=
2632 DATA_TYPE_UINT64_ARRAY)
2633 err = SET_ERROR(EINVAL);
2634 } else {
2635 err = SET_ERROR(EINVAL);
2636 }
2637 } else if (err == 0) {
2638 if (nvpair_type(propval) == DATA_TYPE_STRING) {
2639 if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
2640 err = SET_ERROR(EINVAL);
2641 } else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
2642 const char *unused;
2643
2644 intval = fnvpair_value_uint64(propval);
2645
2646 switch (zfs_prop_get_type(prop)) {
2647 case PROP_TYPE_NUMBER:
2648 break;
2649 case PROP_TYPE_STRING:
2650 err = SET_ERROR(EINVAL);
2651 break;
2652 case PROP_TYPE_INDEX:
2653 if (zfs_prop_index_to_string(prop,
2654 intval, &unused) != 0)
2655 err =
2656 SET_ERROR(ZFS_ERR_BADPROP);
2657 break;
2658 default:
2659 cmn_err(CE_PANIC,
2660 "unknown property type");
2661 }
2662 } else {
2663 err = SET_ERROR(EINVAL);
2664 }
2665 }
2666
2667 /* Validate permissions */
2668 if (err == 0)
2669 err = zfs_check_settable(dsname, pair, CRED());
2670
2671 if (err == 0) {
2672 if (source == ZPROP_SRC_INHERITED)
2673 err = -1; /* does not need special handling */
2674 else
2675 err = zfs_prop_set_special(dsname, source,
2676 pair);
2677 if (err == -1) {
2678 /*
2679 * For better performance we build up a list of
2680 * properties to set in a single transaction.
2681 */
2682 err = nvlist_add_nvpair(genericnvl, pair);
2683 } else if (err != 0 && nvl != retrynvl) {
2684 /*
2685 * This may be a spurious error caused by
2686 * receiving quota and reservation out of order.
2687 * Try again in a second pass.
2688 */
2689 err = nvlist_add_nvpair(retrynvl, pair);
2690 }
2691 }
2692
2693 if (err != 0) {
2694 if (errlist != NULL)
2695 fnvlist_add_int32(errlist, propname, err);
2696 rv = err;
2697 }
2698
2699 if (zfs_is_namespace_prop(prop))
2700 should_update_mount_cache = B_TRUE;
2701 }
2702
2703 if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
2704 nvl = retrynvl;
2705 goto retry;
2706 }
2707
2708 if (nvlist_empty(genericnvl))
2709 goto out;
2710
2711 /*
2712 * Try to set them all in one batch.
2713 */
2714 err = dsl_props_set(dsname, source, genericnvl);
2715 if (err == 0)
2716 goto out;
2717
2718 /*
2719 * If batching fails, we still want to set as many properties as we
2720 * can, so try setting them individually.
2721 */
2722 pair = NULL;
2723 while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
2724 const char *propname = nvpair_name(pair);
2725
2726 propval = pair;
2727 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2728 nvlist_t *attrs;
2729 attrs = fnvpair_value_nvlist(pair);
2730 propval = fnvlist_lookup_nvpair(attrs, ZPROP_VALUE);
2731 }
2732
2733 if (nvpair_type(propval) == DATA_TYPE_STRING) {
2734 strval = fnvpair_value_string(propval);
2735 err = dsl_prop_set_string(dsname, propname,
2736 source, strval);
2737 } else if (nvpair_type(propval) == DATA_TYPE_BOOLEAN) {
2738 err = dsl_prop_inherit(dsname, propname, source);
2739 } else {
2740 intval = fnvpair_value_uint64(propval);
2741 err = dsl_prop_set_int(dsname, propname, source,
2742 intval);
2743 }
2744
2745 if (err != 0) {
2746 if (errlist != NULL) {
2747 fnvlist_add_int32(errlist, propname, err);
2748 }
2749 rv = err;
2750 }
2751 }
2752
2753 out:
2754 if (should_update_mount_cache)
2755 zfs_ioctl_update_mount_cache(dsname);
2756
2757 nvlist_free(genericnvl);
2758 nvlist_free(retrynvl);
2759
2760 return (rv);
2761 }
2762
2763 /*
2764 * Check that all the properties are valid user properties.
2765 */
2766 static int
2767 zfs_check_userprops(nvlist_t *nvl)
2768 {
2769 nvpair_t *pair = NULL;
2770
2771 while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2772 const char *propname = nvpair_name(pair);
2773
2774 if (!zfs_prop_user(propname) ||
2775 nvpair_type(pair) != DATA_TYPE_STRING)
2776 return (SET_ERROR(EINVAL));
2777
2778 if (strlen(propname) >= ZAP_MAXNAMELEN)
2779 return (SET_ERROR(ENAMETOOLONG));
2780
2781 if (strlen(fnvpair_value_string(pair)) >= ZAP_MAXVALUELEN)
2782 return (SET_ERROR(E2BIG));
2783 }
2784 return (0);
2785 }
2786
2787 static void
2788 props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
2789 {
2790 nvpair_t *pair;
2791
2792 VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2793
2794 pair = NULL;
2795 while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
2796 if (nvlist_exists(skipped, nvpair_name(pair)))
2797 continue;
2798
2799 VERIFY(nvlist_add_nvpair(*newprops, pair) == 0);
2800 }
2801 }
2802
2803 static int
2804 clear_received_props(const char *dsname, nvlist_t *props,
2805 nvlist_t *skipped)
2806 {
2807 int err = 0;
2808 nvlist_t *cleared_props = NULL;
2809 props_skip(props, skipped, &cleared_props);
2810 if (!nvlist_empty(cleared_props)) {
2811 /*
2812 * Acts on local properties until the dataset has received
2813 * properties at least once on or after SPA_VERSION_RECVD_PROPS.
2814 */
2815 zprop_source_t flags = (ZPROP_SRC_NONE |
2816 (dsl_prop_get_hasrecvd(dsname) ? ZPROP_SRC_RECEIVED : 0));
2817 err = zfs_set_prop_nvlist(dsname, flags, cleared_props, NULL);
2818 }
2819 nvlist_free(cleared_props);
2820 return (err);
2821 }
2822
2823 /*
2824 * inputs:
2825 * zc_name name of filesystem
2826 * zc_value name of property to set
2827 * zc_nvlist_src{_size} nvlist of properties to apply
2828 * zc_cookie received properties flag
2829 *
2830 * outputs:
2831 * zc_nvlist_dst{_size} error for each unapplied received property
2832 */
2833 static int
2834 zfs_ioc_set_prop(zfs_cmd_t *zc)
2835 {
2836 nvlist_t *nvl;
2837 boolean_t received = zc->zc_cookie;
2838 zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
2839 ZPROP_SRC_LOCAL);
2840 nvlist_t *errors;
2841 int error;
2842
2843 if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2844 zc->zc_iflags, &nvl)) != 0)
2845 return (error);
2846
2847 if (received) {
2848 nvlist_t *origprops;
2849
2850 if (dsl_prop_get_received(zc->zc_name, &origprops) == 0) {
2851 (void) clear_received_props(zc->zc_name,
2852 origprops, nvl);
2853 nvlist_free(origprops);
2854 }
2855
2856 error = dsl_prop_set_hasrecvd(zc->zc_name);
2857 }
2858
2859 errors = fnvlist_alloc();
2860 if (error == 0)
2861 error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, errors);
2862
2863 if (zc->zc_nvlist_dst != 0 && errors != NULL) {
2864 (void) put_nvlist(zc, errors);
2865 }
2866
2867 nvlist_free(errors);
2868 nvlist_free(nvl);
2869 return (error);
2870 }
2871
2872 /*
2873 * inputs:
2874 * zc_name name of filesystem
2875 * zc_value name of property to inherit
2876 * zc_cookie revert to received value if TRUE
2877 *
2878 * outputs: none
2879 */
2880 static int
2881 zfs_ioc_inherit_prop(zfs_cmd_t *zc)
2882 {
2883 const char *propname = zc->zc_value;
2884 zfs_prop_t prop = zfs_name_to_prop(propname);
2885 boolean_t received = zc->zc_cookie;
2886 zprop_source_t source = (received
2887 ? ZPROP_SRC_NONE /* revert to received value, if any */
2888 : ZPROP_SRC_INHERITED); /* explicitly inherit */
2889 nvlist_t *dummy;
2890 nvpair_t *pair;
2891 zprop_type_t type;
2892 int err;
2893
2894 if (!received) {
2895 /*
2896 * Only check this in the non-received case. We want to allow
2897 * 'inherit -S' to revert non-inheritable properties like quota
2898 * and reservation to the received or default values even though
2899 * they are not considered inheritable.
2900 */
2901 if (prop != ZPROP_USERPROP && !zfs_prop_inheritable(prop))
2902 return (SET_ERROR(EINVAL));
2903 }
2904
2905 if (prop == ZPROP_USERPROP) {
2906 if (!zfs_prop_user(propname))
2907 return (SET_ERROR(EINVAL));
2908
2909 type = PROP_TYPE_STRING;
2910 } else if (prop == ZFS_PROP_VOLSIZE || prop == ZFS_PROP_VERSION) {
2911 return (SET_ERROR(EINVAL));
2912 } else {
2913 type = zfs_prop_get_type(prop);
2914 }
2915
2916 /*
2917 * zfs_prop_set_special() expects properties in the form of an
2918 * nvpair with type info.
2919 */
2920 dummy = fnvlist_alloc();
2921
2922 switch (type) {
2923 case PROP_TYPE_STRING:
2924 VERIFY(0 == nvlist_add_string(dummy, propname, ""));
2925 break;
2926 case PROP_TYPE_NUMBER:
2927 case PROP_TYPE_INDEX:
2928 VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
2929 break;
2930 default:
2931 err = SET_ERROR(EINVAL);
2932 goto errout;
2933 }
2934
2935 pair = nvlist_next_nvpair(dummy, NULL);
2936 if (pair == NULL) {
2937 err = SET_ERROR(EINVAL);
2938 } else {
2939 err = zfs_prop_set_special(zc->zc_name, source, pair);
2940 if (err == -1) /* property is not "special", needs handling */
2941 err = dsl_prop_inherit(zc->zc_name, zc->zc_value,
2942 source);
2943 }
2944
2945 errout:
2946 nvlist_free(dummy);
2947 return (err);
2948 }
2949
2950 static int
2951 zfs_ioc_pool_set_props(zfs_cmd_t *zc)
2952 {
2953 nvlist_t *props;
2954 spa_t *spa;
2955 int error;
2956 nvpair_t *pair;
2957
2958 if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2959 zc->zc_iflags, &props)))
2960 return (error);
2961
2962 /*
2963 * If the only property is the configfile, then just do a spa_lookup()
2964 * to handle the faulted case.
2965 */
2966 pair = nvlist_next_nvpair(props, NULL);
2967 if (pair != NULL && strcmp(nvpair_name(pair),
2968 zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
2969 nvlist_next_nvpair(props, pair) == NULL) {
2970 mutex_enter(&spa_namespace_lock);
2971 if ((spa = spa_lookup(zc->zc_name)) != NULL) {
2972 spa_configfile_set(spa, props, B_FALSE);
2973 spa_write_cachefile(spa, B_FALSE, B_TRUE, B_FALSE);
2974 }
2975 mutex_exit(&spa_namespace_lock);
2976 if (spa != NULL) {
2977 nvlist_free(props);
2978 return (0);
2979 }
2980 }
2981
2982 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2983 nvlist_free(props);
2984 return (error);
2985 }
2986
2987 error = spa_prop_set(spa, props);
2988
2989 nvlist_free(props);
2990 spa_close(spa, FTAG);
2991
2992 return (error);
2993 }
2994
2995 static int
2996 zfs_ioc_pool_get_props(zfs_cmd_t *zc)
2997 {
2998 spa_t *spa;
2999 int error;
3000 nvlist_t *nvp = NULL;
3001
3002 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
3003 /*
3004 * If the pool is faulted, there may be properties we can still
3005 * get (such as altroot and cachefile), so attempt to get them
3006 * anyway.
3007 */
3008 mutex_enter(&spa_namespace_lock);
3009 if ((spa = spa_lookup(zc->zc_name)) != NULL)
3010 error = spa_prop_get(spa, &nvp);
3011 mutex_exit(&spa_namespace_lock);
3012 } else {
3013 error = spa_prop_get(spa, &nvp);
3014 spa_close(spa, FTAG);
3015 }
3016
3017 if (error == 0 && zc->zc_nvlist_dst != 0)
3018 error = put_nvlist(zc, nvp);
3019 else
3020 error = SET_ERROR(EFAULT);
3021
3022 nvlist_free(nvp);
3023 return (error);
3024 }
3025
3026 /*
3027 * innvl: {
3028 * "vdevprops_set_vdev" -> guid
3029 * "vdevprops_set_props" -> { prop -> value }
3030 * }
3031 *
3032 * outnvl: propname -> error code (int32)
3033 */
3034 static const zfs_ioc_key_t zfs_keys_vdev_set_props[] = {
3035 {ZPOOL_VDEV_PROPS_SET_VDEV, DATA_TYPE_UINT64, 0},
3036 {ZPOOL_VDEV_PROPS_SET_PROPS, DATA_TYPE_NVLIST, 0}
3037 };
3038
3039 static int
3040 zfs_ioc_vdev_set_props(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3041 {
3042 spa_t *spa;
3043 int error;
3044 vdev_t *vd;
3045 uint64_t vdev_guid;
3046
3047 /* Early validation */
3048 if (nvlist_lookup_uint64(innvl, ZPOOL_VDEV_PROPS_SET_VDEV,
3049 &vdev_guid) != 0)
3050 return (SET_ERROR(EINVAL));
3051
3052 if (outnvl == NULL)
3053 return (SET_ERROR(EINVAL));
3054
3055 if ((error = spa_open(poolname, &spa, FTAG)) != 0)
3056 return (error);
3057
3058 ASSERT(spa_writeable(spa));
3059
3060 if ((vd = spa_lookup_by_guid(spa, vdev_guid, B_TRUE)) == NULL) {
3061 spa_close(spa, FTAG);
3062 return (SET_ERROR(ENOENT));
3063 }
3064
3065 error = vdev_prop_set(vd, innvl, outnvl);
3066
3067 spa_close(spa, FTAG);
3068
3069 return (error);
3070 }
3071
3072 /*
3073 * innvl: {
3074 * "vdevprops_get_vdev" -> guid
3075 * (optional) "vdevprops_get_props" -> { propname -> propid }
3076 * }
3077 *
3078 * outnvl: propname -> value
3079 */
3080 static const zfs_ioc_key_t zfs_keys_vdev_get_props[] = {
3081 {ZPOOL_VDEV_PROPS_GET_VDEV, DATA_TYPE_UINT64, 0},
3082 {ZPOOL_VDEV_PROPS_GET_PROPS, DATA_TYPE_NVLIST, ZK_OPTIONAL}
3083 };
3084
3085 static int
3086 zfs_ioc_vdev_get_props(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3087 {
3088 spa_t *spa;
3089 int error;
3090 vdev_t *vd;
3091 uint64_t vdev_guid;
3092
3093 /* Early validation */
3094 if (nvlist_lookup_uint64(innvl, ZPOOL_VDEV_PROPS_GET_VDEV,
3095 &vdev_guid) != 0)
3096 return (SET_ERROR(EINVAL));
3097
3098 if (outnvl == NULL)
3099 return (SET_ERROR(EINVAL));
3100
3101 if ((error = spa_open(poolname, &spa, FTAG)) != 0)
3102 return (error);
3103
3104 if ((vd = spa_lookup_by_guid(spa, vdev_guid, B_TRUE)) == NULL) {
3105 spa_close(spa, FTAG);
3106 return (SET_ERROR(ENOENT));
3107 }
3108
3109 error = vdev_prop_get(vd, innvl, outnvl);
3110
3111 spa_close(spa, FTAG);
3112
3113 return (error);
3114 }
3115
3116 /*
3117 * inputs:
3118 * zc_name name of filesystem
3119 * zc_nvlist_src{_size} nvlist of delegated permissions
3120 * zc_perm_action allow/unallow flag
3121 *
3122 * outputs: none
3123 */
3124 static int
3125 zfs_ioc_set_fsacl(zfs_cmd_t *zc)
3126 {
3127 int error;
3128 nvlist_t *fsaclnv = NULL;
3129
3130 if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3131 zc->zc_iflags, &fsaclnv)) != 0)
3132 return (error);
3133
3134 /*
3135 * Verify nvlist is constructed correctly
3136 */
3137 if (zfs_deleg_verify_nvlist(fsaclnv) != 0) {
3138 nvlist_free(fsaclnv);
3139 return (SET_ERROR(EINVAL));
3140 }
3141
3142 /*
3143 * If we don't have PRIV_SYS_MOUNT, then validate
3144 * that user is allowed to hand out each permission in
3145 * the nvlist(s)
3146 */
3147
3148 error = secpolicy_zfs(CRED());
3149 if (error != 0) {
3150 if (zc->zc_perm_action == B_FALSE) {
3151 error = dsl_deleg_can_allow(zc->zc_name,
3152 fsaclnv, CRED());
3153 } else {
3154 error = dsl_deleg_can_unallow(zc->zc_name,
3155 fsaclnv, CRED());
3156 }
3157 }
3158
3159 if (error == 0)
3160 error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
3161
3162 nvlist_free(fsaclnv);
3163 return (error);
3164 }
3165
3166 /*
3167 * inputs:
3168 * zc_name name of filesystem
3169 *
3170 * outputs:
3171 * zc_nvlist_src{_size} nvlist of delegated permissions
3172 */
3173 static int
3174 zfs_ioc_get_fsacl(zfs_cmd_t *zc)
3175 {
3176 nvlist_t *nvp;
3177 int error;
3178
3179 if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
3180 error = put_nvlist(zc, nvp);
3181 nvlist_free(nvp);
3182 }
3183
3184 return (error);
3185 }
3186
3187 static void
3188 zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
3189 {
3190 zfs_creat_t *zct = arg;
3191
3192 zfs_create_fs(os, cr, zct->zct_zplprops, tx);
3193 }
3194
3195 #define ZFS_PROP_UNDEFINED ((uint64_t)-1)
3196
3197 /*
3198 * inputs:
3199 * os parent objset pointer (NULL if root fs)
3200 * fuids_ok fuids allowed in this version of the spa?
3201 * sa_ok SAs allowed in this version of the spa?
3202 * createprops list of properties requested by creator
3203 *
3204 * outputs:
3205 * zplprops values for the zplprops we attach to the master node object
3206 * is_ci true if requested file system will be purely case-insensitive
3207 *
3208 * Determine the settings for utf8only, normalization and
3209 * casesensitivity. Specific values may have been requested by the
3210 * creator and/or we can inherit values from the parent dataset. If
3211 * the file system is of too early a vintage, a creator can not
3212 * request settings for these properties, even if the requested
3213 * setting is the default value. We don't actually want to create dsl
3214 * properties for these, so remove them from the source nvlist after
3215 * processing.
3216 */
3217 static int
3218 zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
3219 boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
3220 nvlist_t *zplprops, boolean_t *is_ci)
3221 {
3222 uint64_t sense = ZFS_PROP_UNDEFINED;
3223 uint64_t norm = ZFS_PROP_UNDEFINED;
3224 uint64_t u8 = ZFS_PROP_UNDEFINED;
3225 int error;
3226
3227 ASSERT(zplprops != NULL);
3228
3229 /* parent dataset must be a filesystem */
3230 if (os != NULL && os->os_phys->os_type != DMU_OST_ZFS)
3231 return (SET_ERROR(ZFS_ERR_WRONG_PARENT));
3232
3233 /*
3234 * Pull out creator prop choices, if any.
3235 */
3236 if (createprops) {
3237 (void) nvlist_lookup_uint64(createprops,
3238 zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
3239 (void) nvlist_lookup_uint64(createprops,
3240 zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
3241 (void) nvlist_remove_all(createprops,
3242 zfs_prop_to_name(ZFS_PROP_NORMALIZE));
3243 (void) nvlist_lookup_uint64(createprops,
3244 zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
3245 (void) nvlist_remove_all(createprops,
3246 zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
3247 (void) nvlist_lookup_uint64(createprops,
3248 zfs_prop_to_name(ZFS_PROP_CASE), &sense);
3249 (void) nvlist_remove_all(createprops,
3250 zfs_prop_to_name(ZFS_PROP_CASE));
3251 }
3252
3253 /*
3254 * If the zpl version requested is whacky or the file system
3255 * or pool is version is too "young" to support normalization
3256 * and the creator tried to set a value for one of the props,
3257 * error out.
3258 */
3259 if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
3260 (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
3261 (zplver >= ZPL_VERSION_SA && !sa_ok) ||
3262 (zplver < ZPL_VERSION_NORMALIZATION &&
3263 (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
3264 sense != ZFS_PROP_UNDEFINED)))
3265 return (SET_ERROR(ENOTSUP));
3266
3267 /*
3268 * Put the version in the zplprops
3269 */
3270 VERIFY(nvlist_add_uint64(zplprops,
3271 zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
3272
3273 if (norm == ZFS_PROP_UNDEFINED &&
3274 (error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm)) != 0)
3275 return (error);
3276 VERIFY(nvlist_add_uint64(zplprops,
3277 zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
3278
3279 /*
3280 * If we're normalizing, names must always be valid UTF-8 strings.
3281 */
3282 if (norm)
3283 u8 = 1;
3284 if (u8 == ZFS_PROP_UNDEFINED &&
3285 (error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8)) != 0)
3286 return (error);
3287 VERIFY(nvlist_add_uint64(zplprops,
3288 zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
3289
3290 if (sense == ZFS_PROP_UNDEFINED &&
3291 (error = zfs_get_zplprop(os, ZFS_PROP_CASE, &sense)) != 0)
3292 return (error);
3293 VERIFY(nvlist_add_uint64(zplprops,
3294 zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
3295
3296 if (is_ci)
3297 *is_ci = (sense == ZFS_CASE_INSENSITIVE);
3298
3299 return (0);
3300 }
3301
3302 static int
3303 zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
3304 nvlist_t *zplprops, boolean_t *is_ci)
3305 {
3306 boolean_t fuids_ok, sa_ok;
3307 uint64_t zplver = ZPL_VERSION;
3308 objset_t *os = NULL;
3309 char parentname[ZFS_MAX_DATASET_NAME_LEN];
3310 spa_t *spa;
3311 uint64_t spa_vers;
3312 int error;
3313
3314 zfs_get_parent(dataset, parentname, sizeof (parentname));
3315
3316 if ((error = spa_open(dataset, &spa, FTAG)) != 0)
3317 return (error);
3318
3319 spa_vers = spa_version(spa);
3320 spa_close(spa, FTAG);
3321
3322 zplver = zfs_zpl_version_map(spa_vers);
3323 fuids_ok = (zplver >= ZPL_VERSION_FUID);
3324 sa_ok = (zplver >= ZPL_VERSION_SA);
3325
3326 /*
3327 * Open parent object set so we can inherit zplprop values.
3328 */
3329 if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
3330 return (error);
3331
3332 error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
3333 zplprops, is_ci);
3334 dmu_objset_rele(os, FTAG);
3335 return (error);
3336 }
3337
3338 static int
3339 zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
3340 nvlist_t *zplprops, boolean_t *is_ci)
3341 {
3342 boolean_t fuids_ok;
3343 boolean_t sa_ok;
3344 uint64_t zplver = ZPL_VERSION;
3345 int error;
3346
3347 zplver = zfs_zpl_version_map(spa_vers);
3348 fuids_ok = (zplver >= ZPL_VERSION_FUID);
3349 sa_ok = (zplver >= ZPL_VERSION_SA);
3350
3351 error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
3352 createprops, zplprops, is_ci);
3353 return (error);
3354 }
3355
3356 /*
3357 * innvl: {
3358 * "type" -> dmu_objset_type_t (int32)
3359 * (optional) "props" -> { prop -> value }
3360 * (optional) "hidden_args" -> { "wkeydata" -> value }
3361 * raw uint8_t array of encryption wrapping key data (32 bytes)
3362 * }
3363 *
3364 * outnvl: propname -> error code (int32)
3365 */
3366
3367 static const zfs_ioc_key_t zfs_keys_create[] = {
3368 {"type", DATA_TYPE_INT32, 0},
3369 {"props", DATA_TYPE_NVLIST, ZK_OPTIONAL},
3370 {"hidden_args", DATA_TYPE_NVLIST, ZK_OPTIONAL},
3371 };
3372
3373 static int
3374 zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3375 {
3376 int error = 0;
3377 zfs_creat_t zct = { 0 };
3378 nvlist_t *nvprops = NULL;
3379 nvlist_t *hidden_args = NULL;
3380 void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
3381 dmu_objset_type_t type;
3382 boolean_t is_insensitive = B_FALSE;
3383 dsl_crypto_params_t *dcp = NULL;
3384
3385 type = (dmu_objset_type_t)fnvlist_lookup_int32(innvl, "type");
3386 (void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3387 (void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
3388
3389 switch (type) {
3390 case DMU_OST_ZFS:
3391 cbfunc = zfs_create_cb;
3392 break;
3393
3394 case DMU_OST_ZVOL:
3395 cbfunc = zvol_create_cb;
3396 break;
3397
3398 default:
3399 cbfunc = NULL;
3400 break;
3401 }
3402 if (strchr(fsname, '@') ||
3403 strchr(fsname, '%'))
3404 return (SET_ERROR(EINVAL));
3405
3406 zct.zct_props = nvprops;
3407
3408 if (cbfunc == NULL)
3409 return (SET_ERROR(EINVAL));
3410
3411 if (type == DMU_OST_ZVOL) {
3412 uint64_t volsize, volblocksize;
3413
3414 if (nvprops == NULL)
3415 return (SET_ERROR(EINVAL));
3416 if (nvlist_lookup_uint64(nvprops,
3417 zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0)
3418 return (SET_ERROR(EINVAL));
3419
3420 if ((error = nvlist_lookup_uint64(nvprops,
3421 zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
3422 &volblocksize)) != 0 && error != ENOENT)
3423 return (SET_ERROR(EINVAL));
3424
3425 if (error != 0)
3426 volblocksize = zfs_prop_default_numeric(
3427 ZFS_PROP_VOLBLOCKSIZE);
3428
3429 if ((error = zvol_check_volblocksize(fsname,
3430 volblocksize)) != 0 ||
3431 (error = zvol_check_volsize(volsize,
3432 volblocksize)) != 0)
3433 return (error);
3434 } else if (type == DMU_OST_ZFS) {
3435 int error;
3436
3437 /*
3438 * We have to have normalization and
3439 * case-folding flags correct when we do the
3440 * file system creation, so go figure them out
3441 * now.
3442 */
3443 VERIFY(nvlist_alloc(&zct.zct_zplprops,
3444 NV_UNIQUE_NAME, KM_SLEEP) == 0);
3445 error = zfs_fill_zplprops(fsname, nvprops,
3446 zct.zct_zplprops, &is_insensitive);
3447 if (error != 0) {
3448 nvlist_free(zct.zct_zplprops);
3449 return (error);
3450 }
3451 }
3452
3453 error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, nvprops,
3454 hidden_args, &dcp);
3455 if (error != 0) {
3456 nvlist_free(zct.zct_zplprops);
3457 return (error);
3458 }
3459
3460 error = dmu_objset_create(fsname, type,
3461 is_insensitive ? DS_FLAG_CI_DATASET : 0, dcp, cbfunc, &zct);
3462
3463 nvlist_free(zct.zct_zplprops);
3464 dsl_crypto_params_free(dcp, !!error);
3465
3466 /*
3467 * It would be nice to do this atomically.
3468 */
3469 if (error == 0) {
3470 error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3471 nvprops, outnvl);
3472 if (error != 0) {
3473 spa_t *spa;
3474 int error2;
3475
3476 /*
3477 * Volumes will return EBUSY and cannot be destroyed
3478 * until all asynchronous minor handling (e.g. from
3479 * setting the volmode property) has completed. Wait for
3480 * the spa_zvol_taskq to drain then retry.
3481 */
3482 error2 = dsl_destroy_head(fsname);
3483 while ((error2 == EBUSY) && (type == DMU_OST_ZVOL)) {
3484 error2 = spa_open(fsname, &spa, FTAG);
3485 if (error2 == 0) {
3486 taskq_wait(spa->spa_zvol_taskq);
3487 spa_close(spa, FTAG);
3488 }
3489 error2 = dsl_destroy_head(fsname);
3490 }
3491 }
3492 }
3493 return (error);
3494 }
3495
3496 /*
3497 * innvl: {
3498 * "origin" -> name of origin snapshot
3499 * (optional) "props" -> { prop -> value }
3500 * (optional) "hidden_args" -> { "wkeydata" -> value }
3501 * raw uint8_t array of encryption wrapping key data (32 bytes)
3502 * }
3503 *
3504 * outputs:
3505 * outnvl: propname -> error code (int32)
3506 */
3507 static const zfs_ioc_key_t zfs_keys_clone[] = {
3508 {"origin", DATA_TYPE_STRING, 0},
3509 {"props", DATA_TYPE_NVLIST, ZK_OPTIONAL},
3510 {"hidden_args", DATA_TYPE_NVLIST, ZK_OPTIONAL},
3511 };
3512
3513 static int
3514 zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3515 {
3516 int error = 0;
3517 nvlist_t *nvprops = NULL;
3518 const char *origin_name;
3519
3520 origin_name = fnvlist_lookup_string(innvl, "origin");
3521 (void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3522
3523 if (strchr(fsname, '@') ||
3524 strchr(fsname, '%'))
3525 return (SET_ERROR(EINVAL));
3526
3527 if (dataset_namecheck(origin_name, NULL, NULL) != 0)
3528 return (SET_ERROR(EINVAL));
3529
3530 error = dmu_objset_clone(fsname, origin_name);
3531
3532 /*
3533 * It would be nice to do this atomically.
3534 */
3535 if (error == 0) {
3536 error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3537 nvprops, outnvl);
3538 if (error != 0)
3539 (void) dsl_destroy_head(fsname);
3540 }
3541 return (error);
3542 }
3543
3544 static const zfs_ioc_key_t zfs_keys_remap[] = {
3545 /* no nvl keys */
3546 };
3547
3548 static int
3549 zfs_ioc_remap(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3550 {
3551 /* This IOCTL is no longer supported. */
3552 (void) fsname, (void) innvl, (void) outnvl;
3553 return (0);
3554 }
3555
3556 /*
3557 * innvl: {
3558 * "snaps" -> { snapshot1, snapshot2 }
3559 * (optional) "props" -> { prop -> value (string) }
3560 * }
3561 *
3562 * outnvl: snapshot -> error code (int32)
3563 */
3564 static const zfs_ioc_key_t zfs_keys_snapshot[] = {
3565 {"snaps", DATA_TYPE_NVLIST, 0},
3566 {"props", DATA_TYPE_NVLIST, ZK_OPTIONAL},
3567 };
3568
3569 static int
3570 zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3571 {
3572 nvlist_t *snaps;
3573 nvlist_t *props = NULL;
3574 int error, poollen;
3575 nvpair_t *pair;
3576
3577 (void) nvlist_lookup_nvlist(innvl, "props", &props);
3578 if (!nvlist_empty(props) &&
3579 zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS))
3580 return (SET_ERROR(ENOTSUP));
3581 if ((error = zfs_check_userprops(props)) != 0)
3582 return (error);
3583
3584 snaps = fnvlist_lookup_nvlist(innvl, "snaps");
3585 poollen = strlen(poolname);
3586 for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3587 pair = nvlist_next_nvpair(snaps, pair)) {
3588 const char *name = nvpair_name(pair);
3589 char *cp = strchr(name, '@');
3590
3591 /*
3592 * The snap name must contain an @, and the part after it must
3593 * contain only valid characters.
3594 */
3595 if (cp == NULL ||
3596 zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3597 return (SET_ERROR(EINVAL));
3598
3599 /*
3600 * The snap must be in the specified pool.
3601 */
3602 if (strncmp(name, poolname, poollen) != 0 ||
3603 (name[poollen] != '/' && name[poollen] != '@'))
3604 return (SET_ERROR(EXDEV));
3605
3606 /*
3607 * Check for permission to set the properties on the fs.
3608 */
3609 if (!nvlist_empty(props)) {
3610 *cp = '\0';
3611 error = zfs_secpolicy_write_perms(name,
3612 ZFS_DELEG_PERM_USERPROP, CRED());
3613 *cp = '@';
3614 if (error != 0)
3615 return (error);
3616 }
3617
3618 /* This must be the only snap of this fs. */
3619 for (nvpair_t *pair2 = nvlist_next_nvpair(snaps, pair);
3620 pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) {
3621 if (strncmp(name, nvpair_name(pair2), cp - name + 1)
3622 == 0) {
3623 return (SET_ERROR(EXDEV));
3624 }
3625 }
3626 }
3627
3628 error = dsl_dataset_snapshot(snaps, props, outnvl);
3629
3630 return (error);
3631 }
3632
3633 /*
3634 * innvl: "message" -> string
3635 */
3636 static const zfs_ioc_key_t zfs_keys_log_history[] = {
3637 {"message", DATA_TYPE_STRING, 0},
3638 };
3639
3640 static int
3641 zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
3642 {
3643 (void) unused, (void) outnvl;
3644 const char *message;
3645 char *poolname;
3646 spa_t *spa;
3647 int error;
3648
3649 /*
3650 * The poolname in the ioctl is not set, we get it from the TSD,
3651 * which was set at the end of the last successful ioctl that allows
3652 * logging. The secpolicy func already checked that it is set.
3653 * Only one log ioctl is allowed after each successful ioctl, so
3654 * we clear the TSD here.
3655 */
3656 poolname = tsd_get(zfs_allow_log_key);
3657 if (poolname == NULL)
3658 return (SET_ERROR(EINVAL));
3659 (void) tsd_set(zfs_allow_log_key, NULL);
3660 error = spa_open(poolname, &spa, FTAG);
3661 kmem_strfree(poolname);
3662 if (error != 0)
3663 return (error);
3664
3665 message = fnvlist_lookup_string(innvl, "message");
3666
3667 if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
3668 spa_close(spa, FTAG);
3669 return (SET_ERROR(ENOTSUP));
3670 }
3671
3672 error = spa_history_log(spa, message);
3673 spa_close(spa, FTAG);
3674 return (error);
3675 }
3676
3677 /*
3678 * This ioctl is used to set the bootenv configuration on the current
3679 * pool. This configuration is stored in the second padding area of the label,
3680 * and it is used by the bootloader(s) to store the bootloader and/or system
3681 * specific data.
3682 * The data is stored as nvlist data stream, and is protected by
3683 * an embedded checksum.
3684 * The version can have two possible values:
3685 * VB_RAW: nvlist should have key GRUB_ENVMAP, value DATA_TYPE_STRING.
3686 * VB_NVLIST: nvlist with arbitrary <key, value> pairs.
3687 */
3688 static const zfs_ioc_key_t zfs_keys_set_bootenv[] = {
3689 {"version", DATA_TYPE_UINT64, 0},
3690 {"<keys>", DATA_TYPE_ANY, ZK_OPTIONAL | ZK_WILDCARDLIST},
3691 };
3692
3693 static int
3694 zfs_ioc_set_bootenv(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
3695 {
3696 int error;
3697 spa_t *spa;
3698
3699 if ((error = spa_open(name, &spa, FTAG)) != 0)
3700 return (error);
3701 spa_vdev_state_enter(spa, SCL_ALL);
3702 error = vdev_label_write_bootenv(spa->spa_root_vdev, innvl);
3703 (void) spa_vdev_state_exit(spa, NULL, 0);
3704 spa_close(spa, FTAG);
3705 return (error);
3706 }
3707
3708 static const zfs_ioc_key_t zfs_keys_get_bootenv[] = {
3709 /* no nvl keys */
3710 };
3711
3712 static int
3713 zfs_ioc_get_bootenv(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
3714 {
3715 spa_t *spa;
3716 int error;
3717
3718 if ((error = spa_open(name, &spa, FTAG)) != 0)
3719 return (error);
3720 spa_vdev_state_enter(spa, SCL_ALL);
3721 error = vdev_label_read_bootenv(spa->spa_root_vdev, outnvl);
3722 (void) spa_vdev_state_exit(spa, NULL, 0);
3723 spa_close(spa, FTAG);
3724 return (error);
3725 }
3726
3727 /*
3728 * The dp_config_rwlock must not be held when calling this, because the
3729 * unmount may need to write out data.
3730 *
3731 * This function is best-effort. Callers must deal gracefully if it
3732 * remains mounted (or is remounted after this call).
3733 *
3734 * Returns 0 if the argument is not a snapshot, or it is not currently a
3735 * filesystem, or we were able to unmount it. Returns error code otherwise.
3736 */
3737 void
3738 zfs_unmount_snap(const char *snapname)
3739 {
3740 if (strchr(snapname, '@') == NULL)
3741 return;
3742
3743 (void) zfsctl_snapshot_unmount(snapname, MNT_FORCE);
3744 }
3745
3746 static int
3747 zfs_unmount_snap_cb(const char *snapname, void *arg)
3748 {
3749 (void) arg;
3750 zfs_unmount_snap(snapname);
3751 return (0);
3752 }
3753
3754 /*
3755 * When a clone is destroyed, its origin may also need to be destroyed,
3756 * in which case it must be unmounted. This routine will do that unmount
3757 * if necessary.
3758 */
3759 void
3760 zfs_destroy_unmount_origin(const char *fsname)
3761 {
3762 int error;
3763 objset_t *os;
3764 dsl_dataset_t *ds;
3765
3766 error = dmu_objset_hold(fsname, FTAG, &os);
3767 if (error != 0)
3768 return;
3769 ds = dmu_objset_ds(os);
3770 if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev)) {
3771 char originname[ZFS_MAX_DATASET_NAME_LEN];
3772 dsl_dataset_name(ds->ds_prev, originname);
3773 dmu_objset_rele(os, FTAG);
3774 zfs_unmount_snap(originname);
3775 } else {
3776 dmu_objset_rele(os, FTAG);
3777 }
3778 }
3779
3780 /*
3781 * innvl: {
3782 * "snaps" -> { snapshot1, snapshot2 }
3783 * (optional boolean) "defer"
3784 * }
3785 *
3786 * outnvl: snapshot -> error code (int32)
3787 */
3788 static const zfs_ioc_key_t zfs_keys_destroy_snaps[] = {
3789 {"snaps", DATA_TYPE_NVLIST, 0},
3790 {"defer", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
3791 };
3792
3793 static int
3794 zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3795 {
3796 int poollen;
3797 nvlist_t *snaps;
3798 nvpair_t *pair;
3799 boolean_t defer;
3800 spa_t *spa;
3801
3802 snaps = fnvlist_lookup_nvlist(innvl, "snaps");
3803 defer = nvlist_exists(innvl, "defer");
3804
3805 poollen = strlen(poolname);
3806 for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3807 pair = nvlist_next_nvpair(snaps, pair)) {
3808 const char *name = nvpair_name(pair);
3809
3810 /*
3811 * The snap must be in the specified pool to prevent the
3812 * invalid removal of zvol minors below.
3813 */
3814 if (strncmp(name, poolname, poollen) != 0 ||
3815 (name[poollen] != '/' && name[poollen] != '@'))
3816 return (SET_ERROR(EXDEV));
3817
3818 zfs_unmount_snap(nvpair_name(pair));
3819 if (spa_open(name, &spa, FTAG) == 0) {
3820 zvol_remove_minors(spa, name, B_TRUE);
3821 spa_close(spa, FTAG);
3822 }
3823 }
3824
3825 return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
3826 }
3827
3828 /*
3829 * Create bookmarks. The bookmark names are of the form <fs>#<bmark>.
3830 * All bookmarks and snapshots must be in the same pool.
3831 * dsl_bookmark_create_nvl_validate describes the nvlist schema in more detail.
3832 *
3833 * innvl: {
3834 * new_bookmark1 -> existing_snapshot,
3835 * new_bookmark2 -> existing_bookmark,
3836 * }
3837 *
3838 * outnvl: bookmark -> error code (int32)
3839 *
3840 */
3841 static const zfs_ioc_key_t zfs_keys_bookmark[] = {
3842 {"<bookmark>...", DATA_TYPE_STRING, ZK_WILDCARDLIST},
3843 };
3844
3845 static int
3846 zfs_ioc_bookmark(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3847 {
3848 (void) poolname;
3849 return (dsl_bookmark_create(innvl, outnvl));
3850 }
3851
3852 /*
3853 * innvl: {
3854 * property 1, property 2, ...
3855 * }
3856 *
3857 * outnvl: {
3858 * bookmark name 1 -> { property 1, property 2, ... },
3859 * bookmark name 2 -> { property 1, property 2, ... }
3860 * }
3861 *
3862 */
3863 static const zfs_ioc_key_t zfs_keys_get_bookmarks[] = {
3864 {"<property>...", DATA_TYPE_BOOLEAN, ZK_WILDCARDLIST | ZK_OPTIONAL},
3865 };
3866
3867 static int
3868 zfs_ioc_get_bookmarks(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3869 {
3870 return (dsl_get_bookmarks(fsname, innvl, outnvl));
3871 }
3872
3873 /*
3874 * innvl is not used.
3875 *
3876 * outnvl: {
3877 * property 1, property 2, ...
3878 * }
3879 *
3880 */
3881 static const zfs_ioc_key_t zfs_keys_get_bookmark_props[] = {
3882 /* no nvl keys */
3883 };
3884
3885 static int
3886 zfs_ioc_get_bookmark_props(const char *bookmark, nvlist_t *innvl,
3887 nvlist_t *outnvl)
3888 {
3889 (void) innvl;
3890 char fsname[ZFS_MAX_DATASET_NAME_LEN];
3891 char *bmname;
3892
3893 bmname = strchr(bookmark, '#');
3894 if (bmname == NULL)
3895 return (SET_ERROR(EINVAL));
3896 bmname++;
3897
3898 (void) strlcpy(fsname, bookmark, sizeof (fsname));
3899 *(strchr(fsname, '#')) = '\0';
3900
3901 return (dsl_get_bookmark_props(fsname, bmname, outnvl));
3902 }
3903
3904 /*
3905 * innvl: {
3906 * bookmark name 1, bookmark name 2
3907 * }
3908 *
3909 * outnvl: bookmark -> error code (int32)
3910 *
3911 */
3912 static const zfs_ioc_key_t zfs_keys_destroy_bookmarks[] = {
3913 {"<bookmark>...", DATA_TYPE_BOOLEAN, ZK_WILDCARDLIST},
3914 };
3915
3916 static int
3917 zfs_ioc_destroy_bookmarks(const char *poolname, nvlist_t *innvl,
3918 nvlist_t *outnvl)
3919 {
3920 int error, poollen;
3921
3922 poollen = strlen(poolname);
3923 for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
3924 pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
3925 const char *name = nvpair_name(pair);
3926 const char *cp = strchr(name, '#');
3927
3928 /*
3929 * The bookmark name must contain an #, and the part after it
3930 * must contain only valid characters.
3931 */
3932 if (cp == NULL ||
3933 zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3934 return (SET_ERROR(EINVAL));
3935
3936 /*
3937 * The bookmark must be in the specified pool.
3938 */
3939 if (strncmp(name, poolname, poollen) != 0 ||
3940 (name[poollen] != '/' && name[poollen] != '#'))
3941 return (SET_ERROR(EXDEV));
3942 }
3943
3944 error = dsl_bookmark_destroy(innvl, outnvl);
3945 return (error);
3946 }
3947
3948 static const zfs_ioc_key_t zfs_keys_channel_program[] = {
3949 {"program", DATA_TYPE_STRING, 0},
3950 {"arg", DATA_TYPE_ANY, 0},
3951 {"sync", DATA_TYPE_BOOLEAN_VALUE, ZK_OPTIONAL},
3952 {"instrlimit", DATA_TYPE_UINT64, ZK_OPTIONAL},
3953 {"memlimit", DATA_TYPE_UINT64, ZK_OPTIONAL},
3954 };
3955
3956 static int
3957 zfs_ioc_channel_program(const char *poolname, nvlist_t *innvl,
3958 nvlist_t *outnvl)
3959 {
3960 const char *program;
3961 uint64_t instrlimit, memlimit;
3962 boolean_t sync_flag;
3963 nvpair_t *nvarg = NULL;
3964
3965 program = fnvlist_lookup_string(innvl, ZCP_ARG_PROGRAM);
3966 if (0 != nvlist_lookup_boolean_value(innvl, ZCP_ARG_SYNC, &sync_flag)) {
3967 sync_flag = B_TRUE;
3968 }
3969 if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_INSTRLIMIT, &instrlimit)) {
3970 instrlimit = ZCP_DEFAULT_INSTRLIMIT;
3971 }
3972 if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_MEMLIMIT, &memlimit)) {
3973 memlimit = ZCP_DEFAULT_MEMLIMIT;
3974 }
3975 nvarg = fnvlist_lookup_nvpair(innvl, ZCP_ARG_ARGLIST);
3976
3977 if (instrlimit == 0 || instrlimit > zfs_lua_max_instrlimit)
3978 return (SET_ERROR(EINVAL));
3979 if (memlimit == 0 || memlimit > zfs_lua_max_memlimit)
3980 return (SET_ERROR(EINVAL));
3981
3982 return (zcp_eval(poolname, program, sync_flag, instrlimit, memlimit,
3983 nvarg, outnvl));
3984 }
3985
3986 /*
3987 * innvl: unused
3988 * outnvl: empty
3989 */
3990 static const zfs_ioc_key_t zfs_keys_pool_checkpoint[] = {
3991 /* no nvl keys */
3992 };
3993
3994 static int
3995 zfs_ioc_pool_checkpoint(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3996 {
3997 (void) innvl, (void) outnvl;
3998 return (spa_checkpoint(poolname));
3999 }
4000
4001 /*
4002 * innvl: unused
4003 * outnvl: empty
4004 */
4005 static const zfs_ioc_key_t zfs_keys_pool_discard_checkpoint[] = {
4006 /* no nvl keys */
4007 };
4008
4009 static int
4010 zfs_ioc_pool_discard_checkpoint(const char *poolname, nvlist_t *innvl,
4011 nvlist_t *outnvl)
4012 {
4013 (void) innvl, (void) outnvl;
4014 return (spa_checkpoint_discard(poolname));
4015 }
4016
4017 /*
4018 * inputs:
4019 * zc_name name of dataset to destroy
4020 * zc_defer_destroy mark for deferred destroy
4021 *
4022 * outputs: none
4023 */
4024 static int
4025 zfs_ioc_destroy(zfs_cmd_t *zc)
4026 {
4027 objset_t *os;
4028 dmu_objset_type_t ost;
4029 int err;
4030
4031 err = dmu_objset_hold(zc->zc_name, FTAG, &os);
4032 if (err != 0)
4033 return (err);
4034 ost = dmu_objset_type(os);
4035 dmu_objset_rele(os, FTAG);
4036
4037 if (ost == DMU_OST_ZFS)
4038 zfs_unmount_snap(zc->zc_name);
4039
4040 if (strchr(zc->zc_name, '@')) {
4041 err = dsl_destroy_snapshot(zc->zc_name, zc->zc_defer_destroy);
4042 } else {
4043 err = dsl_destroy_head(zc->zc_name);
4044 if (err == EEXIST) {
4045 /*
4046 * It is possible that the given DS may have
4047 * hidden child (%recv) datasets - "leftovers"
4048 * resulting from the previously interrupted
4049 * 'zfs receive'.
4050 *
4051 * 6 extra bytes for /%recv
4052 */
4053 char namebuf[ZFS_MAX_DATASET_NAME_LEN + 6];
4054
4055 if (snprintf(namebuf, sizeof (namebuf), "%s/%s",
4056 zc->zc_name, recv_clone_name) >=
4057 sizeof (namebuf))
4058 return (SET_ERROR(EINVAL));
4059
4060 /*
4061 * Try to remove the hidden child (%recv) and after
4062 * that try to remove the target dataset.
4063 * If the hidden child (%recv) does not exist
4064 * the original error (EEXIST) will be returned
4065 */
4066 err = dsl_destroy_head(namebuf);
4067 if (err == 0)
4068 err = dsl_destroy_head(zc->zc_name);
4069 else if (err == ENOENT)
4070 err = SET_ERROR(EEXIST);
4071 }
4072 }
4073
4074 return (err);
4075 }
4076
4077 /*
4078 * innvl: {
4079 * "initialize_command" -> POOL_INITIALIZE_{CANCEL|START|SUSPEND} (uint64)
4080 * "initialize_vdevs": { -> guids to initialize (nvlist)
4081 * "vdev_path_1": vdev_guid_1, (uint64),
4082 * "vdev_path_2": vdev_guid_2, (uint64),
4083 * ...
4084 * },
4085 * }
4086 *
4087 * outnvl: {
4088 * "initialize_vdevs": { -> initialization errors (nvlist)
4089 * "vdev_path_1": errno, see function body for possible errnos (uint64)
4090 * "vdev_path_2": errno, ... (uint64)
4091 * ...
4092 * }
4093 * }
4094 *
4095 * EINVAL is returned for an unknown commands or if any of the provided vdev
4096 * guids have be specified with a type other than uint64.
4097 */
4098 static const zfs_ioc_key_t zfs_keys_pool_initialize[] = {
4099 {ZPOOL_INITIALIZE_COMMAND, DATA_TYPE_UINT64, 0},
4100 {ZPOOL_INITIALIZE_VDEVS, DATA_TYPE_NVLIST, 0}
4101 };
4102
4103 static int
4104 zfs_ioc_pool_initialize(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
4105 {
4106 uint64_t cmd_type;
4107 if (nvlist_lookup_uint64(innvl, ZPOOL_INITIALIZE_COMMAND,
4108 &cmd_type) != 0) {
4109 return (SET_ERROR(EINVAL));
4110 }
4111
4112 if (!(cmd_type == POOL_INITIALIZE_CANCEL ||
4113 cmd_type == POOL_INITIALIZE_START ||
4114 cmd_type == POOL_INITIALIZE_SUSPEND ||
4115 cmd_type == POOL_INITIALIZE_UNINIT)) {
4116 return (SET_ERROR(EINVAL));
4117 }
4118
4119 nvlist_t *vdev_guids;
4120 if (nvlist_lookup_nvlist(innvl, ZPOOL_INITIALIZE_VDEVS,
4121 &vdev_guids) != 0) {
4122 return (SET_ERROR(EINVAL));
4123 }
4124
4125 for (nvpair_t *pair = nvlist_next_nvpair(vdev_guids, NULL);
4126 pair != NULL; pair = nvlist_next_nvpair(vdev_guids, pair)) {
4127 uint64_t vdev_guid;
4128 if (nvpair_value_uint64(pair, &vdev_guid) != 0) {
4129 return (SET_ERROR(EINVAL));
4130 }
4131 }
4132
4133 spa_t *spa;
4134 int error = spa_open(poolname, &spa, FTAG);
4135 if (error != 0)
4136 return (error);
4137
4138 nvlist_t *vdev_errlist = fnvlist_alloc();
4139 int total_errors = spa_vdev_initialize(spa, vdev_guids, cmd_type,
4140 vdev_errlist);
4141
4142 if (fnvlist_size(vdev_errlist) > 0) {
4143 fnvlist_add_nvlist(outnvl, ZPOOL_INITIALIZE_VDEVS,
4144 vdev_errlist);
4145 }
4146 fnvlist_free(vdev_errlist);
4147
4148 spa_close(spa, FTAG);
4149 return (total_errors > 0 ? SET_ERROR(EINVAL) : 0);
4150 }
4151
4152 /*
4153 * innvl: {
4154 * "trim_command" -> POOL_TRIM_{CANCEL|START|SUSPEND} (uint64)
4155 * "trim_vdevs": { -> guids to TRIM (nvlist)
4156 * "vdev_path_1": vdev_guid_1, (uint64),
4157 * "vdev_path_2": vdev_guid_2, (uint64),
4158 * ...
4159 * },
4160 * "trim_rate" -> Target TRIM rate in bytes/sec.
4161 * "trim_secure" -> Set to request a secure TRIM.
4162 * }
4163 *
4164 * outnvl: {
4165 * "trim_vdevs": { -> TRIM errors (nvlist)
4166 * "vdev_path_1": errno, see function body for possible errnos (uint64)
4167 * "vdev_path_2": errno, ... (uint64)
4168 * ...
4169 * }
4170 * }
4171 *
4172 * EINVAL is returned for an unknown commands or if any of the provided vdev
4173 * guids have be specified with a type other than uint64.
4174 */
4175 static const zfs_ioc_key_t zfs_keys_pool_trim[] = {
4176 {ZPOOL_TRIM_COMMAND, DATA_TYPE_UINT64, 0},
4177 {ZPOOL_TRIM_VDEVS, DATA_TYPE_NVLIST, 0},
4178 {ZPOOL_TRIM_RATE, DATA_TYPE_UINT64, ZK_OPTIONAL},
4179 {ZPOOL_TRIM_SECURE, DATA_TYPE_BOOLEAN_VALUE, ZK_OPTIONAL},
4180 };
4181
4182 static int
4183 zfs_ioc_pool_trim(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
4184 {
4185 uint64_t cmd_type;
4186 if (nvlist_lookup_uint64(innvl, ZPOOL_TRIM_COMMAND, &cmd_type) != 0)
4187 return (SET_ERROR(EINVAL));
4188
4189 if (!(cmd_type == POOL_TRIM_CANCEL ||
4190 cmd_type == POOL_TRIM_START ||
4191 cmd_type == POOL_TRIM_SUSPEND)) {
4192 return (SET_ERROR(EINVAL));
4193 }
4194
4195 nvlist_t *vdev_guids;
4196 if (nvlist_lookup_nvlist(innvl, ZPOOL_TRIM_VDEVS, &vdev_guids) != 0)
4197 return (SET_ERROR(EINVAL));
4198
4199 for (nvpair_t *pair = nvlist_next_nvpair(vdev_guids, NULL);
4200 pair != NULL; pair = nvlist_next_nvpair(vdev_guids, pair)) {
4201 uint64_t vdev_guid;
4202 if (nvpair_value_uint64(pair, &vdev_guid) != 0) {
4203 return (SET_ERROR(EINVAL));
4204 }
4205 }
4206
4207 /* Optional, defaults to maximum rate when not provided */
4208 uint64_t rate;
4209 if (nvlist_lookup_uint64(innvl, ZPOOL_TRIM_RATE, &rate) != 0)
4210 rate = 0;
4211
4212 /* Optional, defaults to standard TRIM when not provided */
4213 boolean_t secure;
4214 if (nvlist_lookup_boolean_value(innvl, ZPOOL_TRIM_SECURE,
4215 &secure) != 0) {
4216 secure = B_FALSE;
4217 }
4218
4219 spa_t *spa;
4220 int error = spa_open(poolname, &spa, FTAG);
4221 if (error != 0)
4222 return (error);
4223
4224 nvlist_t *vdev_errlist = fnvlist_alloc();
4225 int total_errors = spa_vdev_trim(spa, vdev_guids, cmd_type,
4226 rate, !!zfs_trim_metaslab_skip, secure, vdev_errlist);
4227
4228 if (fnvlist_size(vdev_errlist) > 0)
4229 fnvlist_add_nvlist(outnvl, ZPOOL_TRIM_VDEVS, vdev_errlist);
4230
4231 fnvlist_free(vdev_errlist);
4232
4233 spa_close(spa, FTAG);
4234 return (total_errors > 0 ? SET_ERROR(EINVAL) : 0);
4235 }
4236
4237 /*
4238 * This ioctl waits for activity of a particular type to complete. If there is
4239 * no activity of that type in progress, it returns immediately, and the
4240 * returned value "waited" is false. If there is activity in progress, and no
4241 * tag is passed in, the ioctl blocks until all activity of that type is
4242 * complete, and then returns with "waited" set to true.
4243 *
4244 * If a tag is provided, it identifies a particular instance of an activity to
4245 * wait for. Currently, this is only valid for use with 'initialize', because
4246 * that is the only activity for which there can be multiple instances running
4247 * concurrently. In the case of 'initialize', the tag corresponds to the guid of
4248 * the vdev on which to wait.
4249 *
4250 * If a thread waiting in the ioctl receives a signal, the call will return
4251 * immediately, and the return value will be EINTR.
4252 *
4253 * innvl: {
4254 * "wait_activity" -> int32_t
4255 * (optional) "wait_tag" -> uint64_t
4256 * }
4257 *
4258 * outnvl: "waited" -> boolean_t
4259 */
4260 static const zfs_ioc_key_t zfs_keys_pool_wait[] = {
4261 {ZPOOL_WAIT_ACTIVITY, DATA_TYPE_INT32, 0},
4262 {ZPOOL_WAIT_TAG, DATA_TYPE_UINT64, ZK_OPTIONAL},
4263 };
4264
4265 static int
4266 zfs_ioc_wait(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
4267 {
4268 int32_t activity;
4269 uint64_t tag;
4270 boolean_t waited;
4271 int error;
4272
4273 if (nvlist_lookup_int32(innvl, ZPOOL_WAIT_ACTIVITY, &activity) != 0)
4274 return (EINVAL);
4275
4276 if (nvlist_lookup_uint64(innvl, ZPOOL_WAIT_TAG, &tag) == 0)
4277 error = spa_wait_tag(name, activity, tag, &waited);
4278 else
4279 error = spa_wait(name, activity, &waited);
4280
4281 if (error == 0)
4282 fnvlist_add_boolean_value(outnvl, ZPOOL_WAIT_WAITED, waited);
4283
4284 return (error);
4285 }
4286
4287 /*
4288 * This ioctl waits for activity of a particular type to complete. If there is
4289 * no activity of that type in progress, it returns immediately, and the
4290 * returned value "waited" is false. If there is activity in progress, and no
4291 * tag is passed in, the ioctl blocks until all activity of that type is
4292 * complete, and then returns with "waited" set to true.
4293 *
4294 * If a thread waiting in the ioctl receives a signal, the call will return
4295 * immediately, and the return value will be EINTR.
4296 *
4297 * innvl: {
4298 * "wait_activity" -> int32_t
4299 * }
4300 *
4301 * outnvl: "waited" -> boolean_t
4302 */
4303 static const zfs_ioc_key_t zfs_keys_fs_wait[] = {
4304 {ZFS_WAIT_ACTIVITY, DATA_TYPE_INT32, 0},
4305 };
4306
4307 static int
4308 zfs_ioc_wait_fs(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
4309 {
4310 int32_t activity;
4311 boolean_t waited = B_FALSE;
4312 int error;
4313 dsl_pool_t *dp;
4314 dsl_dir_t *dd;
4315 dsl_dataset_t *ds;
4316
4317 if (nvlist_lookup_int32(innvl, ZFS_WAIT_ACTIVITY, &activity) != 0)
4318 return (SET_ERROR(EINVAL));
4319
4320 if (activity >= ZFS_WAIT_NUM_ACTIVITIES || activity < 0)
4321 return (SET_ERROR(EINVAL));
4322
4323 if ((error = dsl_pool_hold(name, FTAG, &dp)) != 0)
4324 return (error);
4325
4326 if ((error = dsl_dataset_hold(dp, name, FTAG, &ds)) != 0) {
4327 dsl_pool_rele(dp, FTAG);
4328 return (error);
4329 }
4330
4331 dd = ds->ds_dir;
4332 mutex_enter(&dd->dd_activity_lock);
4333 dd->dd_activity_waiters++;
4334
4335 /*
4336 * We get a long-hold here so that the dsl_dataset_t and dsl_dir_t
4337 * aren't evicted while we're waiting. Normally this is prevented by
4338 * holding the pool, but we can't do that while we're waiting since
4339 * that would prevent TXGs from syncing out. Some of the functionality
4340 * of long-holds (e.g. preventing deletion) is unnecessary for this
4341 * case, since we would cancel the waiters before proceeding with a
4342 * deletion. An alternative mechanism for keeping the dataset around
4343 * could be developed but this is simpler.
4344 */
4345 dsl_dataset_long_hold(ds, FTAG);
4346 dsl_pool_rele(dp, FTAG);
4347
4348 error = dsl_dir_wait(dd, ds, activity, &waited);
4349
4350 dsl_dataset_long_rele(ds, FTAG);
4351 dd->dd_activity_waiters--;
4352 if (dd->dd_activity_waiters == 0)
4353 cv_signal(&dd->dd_activity_cv);
4354 mutex_exit(&dd->dd_activity_lock);
4355
4356 dsl_dataset_rele(ds, FTAG);
4357
4358 if (error == 0)
4359 fnvlist_add_boolean_value(outnvl, ZFS_WAIT_WAITED, waited);
4360
4361 return (error);
4362 }
4363
4364 /*
4365 * fsname is name of dataset to rollback (to most recent snapshot)
4366 *
4367 * innvl may contain name of expected target snapshot
4368 *
4369 * outnvl: "target" -> name of most recent snapshot
4370 * }
4371 */
4372 static const zfs_ioc_key_t zfs_keys_rollback[] = {
4373 {"target", DATA_TYPE_STRING, ZK_OPTIONAL},
4374 };
4375
4376 static int
4377 zfs_ioc_rollback(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
4378 {
4379 zfsvfs_t *zfsvfs;
4380 zvol_state_handle_t *zv;
4381 const char *target = NULL;
4382 int error;
4383
4384 (void) nvlist_lookup_string(innvl, "target", &target);
4385 if (target != NULL) {
4386 const char *cp = strchr(target, '@');
4387
4388 /*
4389 * The snap name must contain an @, and the part after it must
4390 * contain only valid characters.
4391 */
4392 if (cp == NULL ||
4393 zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
4394 return (SET_ERROR(EINVAL));
4395 }
4396
4397 if (getzfsvfs(fsname, &zfsvfs) == 0) {
4398 dsl_dataset_t *ds;
4399
4400 ds = dmu_objset_ds(zfsvfs->z_os);
4401 error = zfs_suspend_fs(zfsvfs);
4402 if (error == 0) {
4403 int resume_err;
4404
4405 error = dsl_dataset_rollback(fsname, target, zfsvfs,
4406 outnvl);
4407 resume_err = zfs_resume_fs(zfsvfs, ds);
4408 error = error ? error : resume_err;
4409 }
4410 zfs_vfs_rele(zfsvfs);
4411 } else if ((zv = zvol_suspend(fsname)) != NULL) {
4412 error = dsl_dataset_rollback(fsname, target, zvol_tag(zv),
4413 outnvl);
4414 zvol_resume(zv);
4415 } else {
4416 error = dsl_dataset_rollback(fsname, target, NULL, outnvl);
4417 }
4418 return (error);
4419 }
4420
4421 static int
4422 recursive_unmount(const char *fsname, void *arg)
4423 {
4424 const char *snapname = arg;
4425 char *fullname;
4426
4427 fullname = kmem_asprintf("%s@%s", fsname, snapname);
4428 zfs_unmount_snap(fullname);
4429 kmem_strfree(fullname);
4430
4431 return (0);
4432 }
4433
4434 /*
4435 *
4436 * snapname is the snapshot to redact.
4437 * innvl: {
4438 * "bookname" -> (string)
4439 * shortname of the redaction bookmark to generate
4440 * "snapnv" -> (nvlist, values ignored)
4441 * snapshots to redact snapname with respect to
4442 * }
4443 *
4444 * outnvl is unused
4445 */
4446
4447 static const zfs_ioc_key_t zfs_keys_redact[] = {
4448 {"bookname", DATA_TYPE_STRING, 0},
4449 {"snapnv", DATA_TYPE_NVLIST, 0},
4450 };
4451
4452 static int
4453 zfs_ioc_redact(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
4454 {
4455 (void) outnvl;
4456 nvlist_t *redactnvl = NULL;
4457 const char *redactbook = NULL;
4458
4459 if (nvlist_lookup_nvlist(innvl, "snapnv", &redactnvl) != 0)
4460 return (SET_ERROR(EINVAL));
4461 if (fnvlist_num_pairs(redactnvl) == 0)
4462 return (SET_ERROR(ENXIO));
4463 if (nvlist_lookup_string(innvl, "bookname", &redactbook) != 0)
4464 return (SET_ERROR(EINVAL));
4465
4466 return (dmu_redact_snap(snapname, redactnvl, redactbook));
4467 }
4468
4469 /*
4470 * inputs:
4471 * zc_name old name of dataset
4472 * zc_value new name of dataset
4473 * zc_cookie recursive flag (only valid for snapshots)
4474 *
4475 * outputs: none
4476 */
4477 static int
4478 zfs_ioc_rename(zfs_cmd_t *zc)
4479 {
4480 objset_t *os;
4481 dmu_objset_type_t ost;
4482 boolean_t recursive = zc->zc_cookie & 1;
4483 boolean_t nounmount = !!(zc->zc_cookie & 2);
4484 char *at;
4485 int err;
4486
4487 /* "zfs rename" from and to ...%recv datasets should both fail */
4488 zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
4489 zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
4490 if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0 ||
4491 dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
4492 strchr(zc->zc_name, '%') || strchr(zc->zc_value, '%'))
4493 return (SET_ERROR(EINVAL));
4494
4495 err = dmu_objset_hold(zc->zc_name, FTAG, &os);
4496 if (err != 0)
4497 return (err);
4498 ost = dmu_objset_type(os);
4499 dmu_objset_rele(os, FTAG);
4500
4501 at = strchr(zc->zc_name, '@');
4502 if (at != NULL) {
4503 /* snaps must be in same fs */
4504 int error;
4505
4506 if (strncmp(zc->zc_name, zc->zc_value, at - zc->zc_name + 1))
4507 return (SET_ERROR(EXDEV));
4508 *at = '\0';
4509 if (ost == DMU_OST_ZFS && !nounmount) {
4510 error = dmu_objset_find(zc->zc_name,
4511 recursive_unmount, at + 1,
4512 recursive ? DS_FIND_CHILDREN : 0);
4513 if (error != 0) {
4514 *at = '@';
4515 return (error);
4516 }
4517 }
4518 error = dsl_dataset_rename_snapshot(zc->zc_name,
4519 at + 1, strchr(zc->zc_value, '@') + 1, recursive);
4520 *at = '@';
4521
4522 return (error);
4523 } else {
4524 return (dsl_dir_rename(zc->zc_name, zc->zc_value));
4525 }
4526 }
4527
4528 static int
4529 zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
4530 {
4531 const char *propname = nvpair_name(pair);
4532 boolean_t issnap = (strchr(dsname, '@') != NULL);
4533 zfs_prop_t prop = zfs_name_to_prop(propname);
4534 uint64_t intval, compval;
4535 int err;
4536
4537 if (prop == ZPROP_USERPROP) {
4538 if (zfs_prop_user(propname)) {
4539 if ((err = zfs_secpolicy_write_perms(dsname,
4540 ZFS_DELEG_PERM_USERPROP, cr)))
4541 return (err);
4542 return (0);
4543 }
4544
4545 if (!issnap && zfs_prop_userquota(propname)) {
4546 const char *perm = NULL;
4547 const char *uq_prefix =
4548 zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
4549 const char *gq_prefix =
4550 zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
4551 const char *uiq_prefix =
4552 zfs_userquota_prop_prefixes[ZFS_PROP_USEROBJQUOTA];
4553 const char *giq_prefix =
4554 zfs_userquota_prop_prefixes[ZFS_PROP_GROUPOBJQUOTA];
4555 const char *pq_prefix =
4556 zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTQUOTA];
4557 const char *piq_prefix = zfs_userquota_prop_prefixes[\
4558 ZFS_PROP_PROJECTOBJQUOTA];
4559
4560 if (strncmp(propname, uq_prefix,
4561 strlen(uq_prefix)) == 0) {
4562 perm = ZFS_DELEG_PERM_USERQUOTA;
4563 } else if (strncmp(propname, uiq_prefix,
4564 strlen(uiq_prefix)) == 0) {
4565 perm = ZFS_DELEG_PERM_USEROBJQUOTA;
4566 } else if (strncmp(propname, gq_prefix,
4567 strlen(gq_prefix)) == 0) {
4568 perm = ZFS_DELEG_PERM_GROUPQUOTA;
4569 } else if (strncmp(propname, giq_prefix,
4570 strlen(giq_prefix)) == 0) {
4571 perm = ZFS_DELEG_PERM_GROUPOBJQUOTA;
4572 } else if (strncmp(propname, pq_prefix,
4573 strlen(pq_prefix)) == 0) {
4574 perm = ZFS_DELEG_PERM_PROJECTQUOTA;
4575 } else if (strncmp(propname, piq_prefix,
4576 strlen(piq_prefix)) == 0) {
4577 perm = ZFS_DELEG_PERM_PROJECTOBJQUOTA;
4578 } else {
4579 /* {USER|GROUP|PROJECT}USED are read-only */
4580 return (SET_ERROR(EINVAL));
4581 }
4582
4583 if ((err = zfs_secpolicy_write_perms(dsname, perm, cr)))
4584 return (err);
4585 return (0);
4586 }
4587
4588 return (SET_ERROR(EINVAL));
4589 }
4590
4591 if (issnap)
4592 return (SET_ERROR(EINVAL));
4593
4594 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
4595 /*
4596 * dsl_prop_get_all_impl() returns properties in this
4597 * format.
4598 */
4599 nvlist_t *attrs;
4600 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
4601 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4602 &pair) == 0);
4603 }
4604
4605 /*
4606 * Check that this value is valid for this pool version
4607 */
4608 switch (prop) {
4609 case ZFS_PROP_COMPRESSION:
4610 /*
4611 * If the user specified gzip compression, make sure
4612 * the SPA supports it. We ignore any errors here since
4613 * we'll catch them later.
4614 */
4615 if (nvpair_value_uint64(pair, &intval) == 0) {
4616 compval = ZIO_COMPRESS_ALGO(intval);
4617 if (compval >= ZIO_COMPRESS_GZIP_1 &&
4618 compval <= ZIO_COMPRESS_GZIP_9 &&
4619 zfs_earlier_version(dsname,
4620 SPA_VERSION_GZIP_COMPRESSION)) {
4621 return (SET_ERROR(ENOTSUP));
4622 }
4623
4624 if (compval == ZIO_COMPRESS_ZLE &&
4625 zfs_earlier_version(dsname,
4626 SPA_VERSION_ZLE_COMPRESSION))
4627 return (SET_ERROR(ENOTSUP));
4628
4629 if (compval == ZIO_COMPRESS_LZ4) {
4630 spa_t *spa;
4631
4632 if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4633 return (err);
4634
4635 if (!spa_feature_is_enabled(spa,
4636 SPA_FEATURE_LZ4_COMPRESS)) {
4637 spa_close(spa, FTAG);
4638 return (SET_ERROR(ENOTSUP));
4639 }
4640 spa_close(spa, FTAG);
4641 }
4642
4643 if (compval == ZIO_COMPRESS_ZSTD) {
4644 spa_t *spa;
4645
4646 if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4647 return (err);
4648
4649 if (!spa_feature_is_enabled(spa,
4650 SPA_FEATURE_ZSTD_COMPRESS)) {
4651 spa_close(spa, FTAG);
4652 return (SET_ERROR(ENOTSUP));
4653 }
4654 spa_close(spa, FTAG);
4655 }
4656 }
4657 break;
4658
4659 case ZFS_PROP_COPIES:
4660 if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
4661 return (SET_ERROR(ENOTSUP));
4662 break;
4663
4664 case ZFS_PROP_VOLBLOCKSIZE:
4665 case ZFS_PROP_RECORDSIZE:
4666 /* Record sizes above 128k need the feature to be enabled */
4667 if (nvpair_value_uint64(pair, &intval) == 0 &&
4668 intval > SPA_OLD_MAXBLOCKSIZE) {
4669 spa_t *spa;
4670
4671 /*
4672 * We don't allow setting the property above 1MB,
4673 * unless the tunable has been changed.
4674 */
4675 if (intval > zfs_max_recordsize ||
4676 intval > SPA_MAXBLOCKSIZE)
4677 return (SET_ERROR(ERANGE));
4678
4679 if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4680 return (err);
4681
4682 if (!spa_feature_is_enabled(spa,
4683 SPA_FEATURE_LARGE_BLOCKS)) {
4684 spa_close(spa, FTAG);
4685 return (SET_ERROR(ENOTSUP));
4686 }
4687 spa_close(spa, FTAG);
4688 }
4689 break;
4690
4691 case ZFS_PROP_DNODESIZE:
4692 /* Dnode sizes above 512 need the feature to be enabled */
4693 if (nvpair_value_uint64(pair, &intval) == 0 &&
4694 intval != ZFS_DNSIZE_LEGACY) {
4695 spa_t *spa;
4696
4697 if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4698 return (err);
4699
4700 if (!spa_feature_is_enabled(spa,
4701 SPA_FEATURE_LARGE_DNODE)) {
4702 spa_close(spa, FTAG);
4703 return (SET_ERROR(ENOTSUP));
4704 }
4705 spa_close(spa, FTAG);
4706 }
4707 break;
4708
4709 case ZFS_PROP_SPECIAL_SMALL_BLOCKS:
4710 /*
4711 * This property could require the allocation classes
4712 * feature to be active for setting, however we allow
4713 * it so that tests of settable properties succeed.
4714 * The CLI will issue a warning in this case.
4715 */
4716 break;
4717
4718 case ZFS_PROP_SHARESMB:
4719 if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
4720 return (SET_ERROR(ENOTSUP));
4721 break;
4722
4723 case ZFS_PROP_ACLINHERIT:
4724 if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
4725 nvpair_value_uint64(pair, &intval) == 0) {
4726 if (intval == ZFS_ACL_PASSTHROUGH_X &&
4727 zfs_earlier_version(dsname,
4728 SPA_VERSION_PASSTHROUGH_X))
4729 return (SET_ERROR(ENOTSUP));
4730 }
4731 break;
4732 case ZFS_PROP_CHECKSUM:
4733 case ZFS_PROP_DEDUP:
4734 {
4735 spa_feature_t feature;
4736 spa_t *spa;
4737 int err;
4738
4739 /* dedup feature version checks */
4740 if (prop == ZFS_PROP_DEDUP &&
4741 zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
4742 return (SET_ERROR(ENOTSUP));
4743
4744 if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
4745 nvpair_value_uint64(pair, &intval) == 0) {
4746 /* check prop value is enabled in features */
4747 feature = zio_checksum_to_feature(
4748 intval & ZIO_CHECKSUM_MASK);
4749 if (feature == SPA_FEATURE_NONE)
4750 break;
4751
4752 if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4753 return (err);
4754
4755 if (!spa_feature_is_enabled(spa, feature)) {
4756 spa_close(spa, FTAG);
4757 return (SET_ERROR(ENOTSUP));
4758 }
4759 spa_close(spa, FTAG);
4760 }
4761 break;
4762 }
4763
4764 default:
4765 break;
4766 }
4767
4768 return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
4769 }
4770
4771 /*
4772 * Removes properties from the given props list that fail permission checks
4773 * needed to clear them and to restore them in case of a receive error. For each
4774 * property, make sure we have both set and inherit permissions.
4775 *
4776 * Returns the first error encountered if any permission checks fail. If the
4777 * caller provides a non-NULL errlist, it also gives the complete list of names
4778 * of all the properties that failed a permission check along with the
4779 * corresponding error numbers. The caller is responsible for freeing the
4780 * returned errlist.
4781 *
4782 * If every property checks out successfully, zero is returned and the list
4783 * pointed at by errlist is NULL.
4784 */
4785 static int
4786 zfs_check_clearable(const char *dataset, nvlist_t *props, nvlist_t **errlist)
4787 {
4788 zfs_cmd_t *zc;
4789 nvpair_t *pair, *next_pair;
4790 nvlist_t *errors;
4791 int err, rv = 0;
4792
4793 if (props == NULL)
4794 return (0);
4795
4796 VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4797
4798 zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
4799 (void) strlcpy(zc->zc_name, dataset, sizeof (zc->zc_name));
4800 pair = nvlist_next_nvpair(props, NULL);
4801 while (pair != NULL) {
4802 next_pair = nvlist_next_nvpair(props, pair);
4803
4804 (void) strlcpy(zc->zc_value, nvpair_name(pair),
4805 sizeof (zc->zc_value));
4806 if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
4807 (err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) {
4808 VERIFY(nvlist_remove_nvpair(props, pair) == 0);
4809 VERIFY(nvlist_add_int32(errors,
4810 zc->zc_value, err) == 0);
4811 }
4812 pair = next_pair;
4813 }
4814 kmem_free(zc, sizeof (zfs_cmd_t));
4815
4816 if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
4817 nvlist_free(errors);
4818 errors = NULL;
4819 } else {
4820 VERIFY(nvpair_value_int32(pair, &rv) == 0);
4821 }
4822
4823 if (errlist == NULL)
4824 nvlist_free(errors);
4825 else
4826 *errlist = errors;
4827
4828 return (rv);
4829 }
4830
4831 static boolean_t
4832 propval_equals(nvpair_t *p1, nvpair_t *p2)
4833 {
4834 if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
4835 /* dsl_prop_get_all_impl() format */
4836 nvlist_t *attrs;
4837 VERIFY(nvpair_value_nvlist(p1, &attrs) == 0);
4838 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4839 &p1) == 0);
4840 }
4841
4842 if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
4843 nvlist_t *attrs;
4844 VERIFY(nvpair_value_nvlist(p2, &attrs) == 0);
4845 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4846 &p2) == 0);
4847 }
4848
4849 if (nvpair_type(p1) != nvpair_type(p2))
4850 return (B_FALSE);
4851
4852 if (nvpair_type(p1) == DATA_TYPE_STRING) {
4853 const char *valstr1, *valstr2;
4854
4855 VERIFY(nvpair_value_string(p1, &valstr1) == 0);
4856 VERIFY(nvpair_value_string(p2, &valstr2) == 0);
4857 return (strcmp(valstr1, valstr2) == 0);
4858 } else {
4859 uint64_t intval1, intval2;
4860
4861 VERIFY(nvpair_value_uint64(p1, &intval1) == 0);
4862 VERIFY(nvpair_value_uint64(p2, &intval2) == 0);
4863 return (intval1 == intval2);
4864 }
4865 }
4866
4867 /*
4868 * Remove properties from props if they are not going to change (as determined
4869 * by comparison with origprops). Remove them from origprops as well, since we
4870 * do not need to clear or restore properties that won't change.
4871 */
4872 static void
4873 props_reduce(nvlist_t *props, nvlist_t *origprops)
4874 {
4875 nvpair_t *pair, *next_pair;
4876
4877 if (origprops == NULL)
4878 return; /* all props need to be received */
4879
4880 pair = nvlist_next_nvpair(props, NULL);
4881 while (pair != NULL) {
4882 const char *propname = nvpair_name(pair);
4883 nvpair_t *match;
4884
4885 next_pair = nvlist_next_nvpair(props, pair);
4886
4887 if ((nvlist_lookup_nvpair(origprops, propname,
4888 &match) != 0) || !propval_equals(pair, match))
4889 goto next; /* need to set received value */
4890
4891 /* don't clear the existing received value */
4892 (void) nvlist_remove_nvpair(origprops, match);
4893 /* don't bother receiving the property */
4894 (void) nvlist_remove_nvpair(props, pair);
4895 next:
4896 pair = next_pair;
4897 }
4898 }
4899
4900 /*
4901 * Extract properties that cannot be set PRIOR to the receipt of a dataset.
4902 * For example, refquota cannot be set until after the receipt of a dataset,
4903 * because in replication streams, an older/earlier snapshot may exceed the
4904 * refquota. We want to receive the older/earlier snapshot, but setting
4905 * refquota pre-receipt will set the dsl's ACTUAL quota, which will prevent
4906 * the older/earlier snapshot from being received (with EDQUOT).
4907 *
4908 * The ZFS test "zfs_receive_011_pos" demonstrates such a scenario.
4909 *
4910 * libzfs will need to be judicious handling errors encountered by props
4911 * extracted by this function.
4912 */
4913 static nvlist_t *
4914 extract_delay_props(nvlist_t *props)
4915 {
4916 nvlist_t *delayprops;
4917 nvpair_t *nvp, *tmp;
4918 static const zfs_prop_t delayable[] = {
4919 ZFS_PROP_REFQUOTA,
4920 ZFS_PROP_KEYLOCATION,
4921 /*
4922 * Setting ZFS_PROP_SHARESMB requires the objset type to be
4923 * known, which is not possible prior to receipt of raw sends.
4924 */
4925 ZFS_PROP_SHARESMB,
4926 0
4927 };
4928 int i;
4929
4930 VERIFY(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4931
4932 for (nvp = nvlist_next_nvpair(props, NULL); nvp != NULL;
4933 nvp = nvlist_next_nvpair(props, nvp)) {
4934 /*
4935 * strcmp() is safe because zfs_prop_to_name() always returns
4936 * a bounded string.
4937 */
4938 for (i = 0; delayable[i] != 0; i++) {
4939 if (strcmp(zfs_prop_to_name(delayable[i]),
4940 nvpair_name(nvp)) == 0) {
4941 break;
4942 }
4943 }
4944 if (delayable[i] != 0) {
4945 tmp = nvlist_prev_nvpair(props, nvp);
4946 VERIFY(nvlist_add_nvpair(delayprops, nvp) == 0);
4947 VERIFY(nvlist_remove_nvpair(props, nvp) == 0);
4948 nvp = tmp;
4949 }
4950 }
4951
4952 if (nvlist_empty(delayprops)) {
4953 nvlist_free(delayprops);
4954 delayprops = NULL;
4955 }
4956 return (delayprops);
4957 }
4958
4959 static void
4960 zfs_allow_log_destroy(void *arg)
4961 {
4962 char *poolname = arg;
4963
4964 if (poolname != NULL)
4965 kmem_strfree(poolname);
4966 }
4967
4968 #ifdef ZFS_DEBUG
4969 static boolean_t zfs_ioc_recv_inject_err;
4970 #endif
4971
4972 /*
4973 * nvlist 'errors' is always allocated. It will contain descriptions of
4974 * encountered errors, if any. It's the callers responsibility to free.
4975 */
4976 static int
4977 zfs_ioc_recv_impl(char *tofs, char *tosnap, const char *origin,
4978 nvlist_t *recvprops, nvlist_t *localprops, nvlist_t *hidden_args,
4979 boolean_t force, boolean_t heal, boolean_t resumable, int input_fd,
4980 dmu_replay_record_t *begin_record, uint64_t *read_bytes,
4981 uint64_t *errflags, nvlist_t **errors)
4982 {
4983 dmu_recv_cookie_t drc;
4984 int error = 0;
4985 int props_error = 0;
4986 offset_t off, noff;
4987 nvlist_t *local_delayprops = NULL;
4988 nvlist_t *recv_delayprops = NULL;
4989 nvlist_t *inherited_delayprops = NULL;
4990 nvlist_t *origprops = NULL; /* existing properties */
4991 nvlist_t *origrecvd = NULL; /* existing received properties */
4992 boolean_t first_recvd_props = B_FALSE;
4993 boolean_t tofs_was_redacted;
4994 zfs_file_t *input_fp;
4995
4996 *read_bytes = 0;
4997 *errflags = 0;
4998 *errors = fnvlist_alloc();
4999 off = 0;
5000
5001 if ((input_fp = zfs_file_get(input_fd)) == NULL)
5002 return (SET_ERROR(EBADF));
5003
5004 noff = off = zfs_file_off(input_fp);
5005 error = dmu_recv_begin(tofs, tosnap, begin_record, force, heal,
5006 resumable, localprops, hidden_args, origin, &drc, input_fp,
5007 &off);
5008 if (error != 0)
5009 goto out;
5010 tofs_was_redacted = dsl_get_redacted(drc.drc_ds);
5011
5012 /*
5013 * Set properties before we receive the stream so that they are applied
5014 * to the new data. Note that we must call dmu_recv_stream() if
5015 * dmu_recv_begin() succeeds.
5016 */
5017 if (recvprops != NULL && !drc.drc_newfs) {
5018 if (spa_version(dsl_dataset_get_spa(drc.drc_ds)) >=
5019 SPA_VERSION_RECVD_PROPS &&
5020 !dsl_prop_get_hasrecvd(tofs))
5021 first_recvd_props = B_TRUE;
5022
5023 /*
5024 * If new received properties are supplied, they are to
5025 * completely replace the existing received properties,
5026 * so stash away the existing ones.
5027 */
5028 if (dsl_prop_get_received(tofs, &origrecvd) == 0) {
5029 nvlist_t *errlist = NULL;
5030 /*
5031 * Don't bother writing a property if its value won't
5032 * change (and avoid the unnecessary security checks).
5033 *
5034 * The first receive after SPA_VERSION_RECVD_PROPS is a
5035 * special case where we blow away all local properties
5036 * regardless.
5037 */
5038 if (!first_recvd_props)
5039 props_reduce(recvprops, origrecvd);
5040 if (zfs_check_clearable(tofs, origrecvd, &errlist) != 0)
5041 (void) nvlist_merge(*errors, errlist, 0);
5042 nvlist_free(errlist);
5043
5044 if (clear_received_props(tofs, origrecvd,
5045 first_recvd_props ? NULL : recvprops) != 0)
5046 *errflags |= ZPROP_ERR_NOCLEAR;
5047 } else {
5048 *errflags |= ZPROP_ERR_NOCLEAR;
5049 }
5050 }
5051
5052 /*
5053 * Stash away existing properties so we can restore them on error unless
5054 * we're doing the first receive after SPA_VERSION_RECVD_PROPS, in which
5055 * case "origrecvd" will take care of that.
5056 */
5057 if (localprops != NULL && !drc.drc_newfs && !first_recvd_props) {
5058 objset_t *os;
5059 if (dmu_objset_hold(tofs, FTAG, &os) == 0) {
5060 if (dsl_prop_get_all(os, &origprops) != 0) {
5061 *errflags |= ZPROP_ERR_NOCLEAR;
5062 }
5063 dmu_objset_rele(os, FTAG);
5064 } else {
5065 *errflags |= ZPROP_ERR_NOCLEAR;
5066 }
5067 }
5068
5069 if (recvprops != NULL) {
5070 props_error = dsl_prop_set_hasrecvd(tofs);
5071
5072 if (props_error == 0) {
5073 recv_delayprops = extract_delay_props(recvprops);
5074 (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
5075 recvprops, *errors);
5076 }
5077 }
5078
5079 if (localprops != NULL) {
5080 nvlist_t *oprops = fnvlist_alloc();
5081 nvlist_t *xprops = fnvlist_alloc();
5082 nvpair_t *nvp = NULL;
5083
5084 while ((nvp = nvlist_next_nvpair(localprops, nvp)) != NULL) {
5085 if (nvpair_type(nvp) == DATA_TYPE_BOOLEAN) {
5086 /* -x property */
5087 const char *name = nvpair_name(nvp);
5088 zfs_prop_t prop = zfs_name_to_prop(name);
5089 if (prop != ZPROP_USERPROP) {
5090 if (!zfs_prop_inheritable(prop))
5091 continue;
5092 } else if (!zfs_prop_user(name))
5093 continue;
5094 fnvlist_add_boolean(xprops, name);
5095 } else {
5096 /* -o property=value */
5097 fnvlist_add_nvpair(oprops, nvp);
5098 }
5099 }
5100
5101 local_delayprops = extract_delay_props(oprops);
5102 (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL,
5103 oprops, *errors);
5104 inherited_delayprops = extract_delay_props(xprops);
5105 (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED,
5106 xprops, *errors);
5107
5108 nvlist_free(oprops);
5109 nvlist_free(xprops);
5110 }
5111
5112 error = dmu_recv_stream(&drc, &off);
5113
5114 if (error == 0) {
5115 zfsvfs_t *zfsvfs = NULL;
5116 zvol_state_handle_t *zv = NULL;
5117
5118 if (getzfsvfs(tofs, &zfsvfs) == 0) {
5119 /* online recv */
5120 dsl_dataset_t *ds;
5121 int end_err;
5122 boolean_t stream_is_redacted = DMU_GET_FEATUREFLAGS(
5123 begin_record->drr_u.drr_begin.
5124 drr_versioninfo) & DMU_BACKUP_FEATURE_REDACTED;
5125
5126 ds = dmu_objset_ds(zfsvfs->z_os);
5127 error = zfs_suspend_fs(zfsvfs);
5128 /*
5129 * If the suspend fails, then the recv_end will
5130 * likely also fail, and clean up after itself.
5131 */
5132 end_err = dmu_recv_end(&drc, zfsvfs);
5133 /*
5134 * If the dataset was not redacted, but we received a
5135 * redacted stream onto it, we need to unmount the
5136 * dataset. Otherwise, resume the filesystem.
5137 */
5138 if (error == 0 && !drc.drc_newfs &&
5139 stream_is_redacted && !tofs_was_redacted) {
5140 error = zfs_end_fs(zfsvfs, ds);
5141 } else if (error == 0) {
5142 error = zfs_resume_fs(zfsvfs, ds);
5143 }
5144 error = error ? error : end_err;
5145 zfs_vfs_rele(zfsvfs);
5146 } else if ((zv = zvol_suspend(tofs)) != NULL) {
5147 error = dmu_recv_end(&drc, zvol_tag(zv));
5148 zvol_resume(zv);
5149 } else {
5150 error = dmu_recv_end(&drc, NULL);
5151 }
5152
5153 /* Set delayed properties now, after we're done receiving. */
5154 if (recv_delayprops != NULL && error == 0) {
5155 (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
5156 recv_delayprops, *errors);
5157 }
5158 if (local_delayprops != NULL && error == 0) {
5159 (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL,
5160 local_delayprops, *errors);
5161 }
5162 if (inherited_delayprops != NULL && error == 0) {
5163 (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED,
5164 inherited_delayprops, *errors);
5165 }
5166 }
5167
5168 /*
5169 * Merge delayed props back in with initial props, in case
5170 * we're DEBUG and zfs_ioc_recv_inject_err is set (which means
5171 * we have to make sure clear_received_props() includes
5172 * the delayed properties).
5173 *
5174 * Since zfs_ioc_recv_inject_err is only in DEBUG kernels,
5175 * using ASSERT() will be just like a VERIFY.
5176 */
5177 if (recv_delayprops != NULL) {
5178 ASSERT(nvlist_merge(recvprops, recv_delayprops, 0) == 0);
5179 nvlist_free(recv_delayprops);
5180 }
5181 if (local_delayprops != NULL) {
5182 ASSERT(nvlist_merge(localprops, local_delayprops, 0) == 0);
5183 nvlist_free(local_delayprops);
5184 }
5185 if (inherited_delayprops != NULL) {
5186 ASSERT(nvlist_merge(localprops, inherited_delayprops, 0) == 0);
5187 nvlist_free(inherited_delayprops);
5188 }
5189 *read_bytes = off - noff;
5190
5191 #ifdef ZFS_DEBUG
5192 if (zfs_ioc_recv_inject_err) {
5193 zfs_ioc_recv_inject_err = B_FALSE;
5194 error = 1;
5195 }
5196 #endif
5197
5198 /*
5199 * On error, restore the original props.
5200 */
5201 if (error != 0 && recvprops != NULL && !drc.drc_newfs) {
5202 if (clear_received_props(tofs, recvprops, NULL) != 0) {
5203 /*
5204 * We failed to clear the received properties.
5205 * Since we may have left a $recvd value on the
5206 * system, we can't clear the $hasrecvd flag.
5207 */
5208 *errflags |= ZPROP_ERR_NORESTORE;
5209 } else if (first_recvd_props) {
5210 dsl_prop_unset_hasrecvd(tofs);
5211 }
5212
5213 if (origrecvd == NULL && !drc.drc_newfs) {
5214 /* We failed to stash the original properties. */
5215 *errflags |= ZPROP_ERR_NORESTORE;
5216 }
5217
5218 /*
5219 * dsl_props_set() will not convert RECEIVED to LOCAL on or
5220 * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
5221 * explicitly if we're restoring local properties cleared in the
5222 * first new-style receive.
5223 */
5224 if (origrecvd != NULL &&
5225 zfs_set_prop_nvlist(tofs, (first_recvd_props ?
5226 ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
5227 origrecvd, NULL) != 0) {
5228 /*
5229 * We stashed the original properties but failed to
5230 * restore them.
5231 */
5232 *errflags |= ZPROP_ERR_NORESTORE;
5233 }
5234 }
5235 if (error != 0 && localprops != NULL && !drc.drc_newfs &&
5236 !first_recvd_props) {
5237 nvlist_t *setprops;
5238 nvlist_t *inheritprops;
5239 nvpair_t *nvp;
5240
5241 if (origprops == NULL) {
5242 /* We failed to stash the original properties. */
5243 *errflags |= ZPROP_ERR_NORESTORE;
5244 goto out;
5245 }
5246
5247 /* Restore original props */
5248 setprops = fnvlist_alloc();
5249 inheritprops = fnvlist_alloc();
5250 nvp = NULL;
5251 while ((nvp = nvlist_next_nvpair(localprops, nvp)) != NULL) {
5252 const char *name = nvpair_name(nvp);
5253 const char *source;
5254 nvlist_t *attrs;
5255
5256 if (!nvlist_exists(origprops, name)) {
5257 /*
5258 * Property was not present or was explicitly
5259 * inherited before the receive, restore this.
5260 */
5261 fnvlist_add_boolean(inheritprops, name);
5262 continue;
5263 }
5264 attrs = fnvlist_lookup_nvlist(origprops, name);
5265 source = fnvlist_lookup_string(attrs, ZPROP_SOURCE);
5266
5267 /* Skip received properties */
5268 if (strcmp(source, ZPROP_SOURCE_VAL_RECVD) == 0)
5269 continue;
5270
5271 if (strcmp(source, tofs) == 0) {
5272 /* Property was locally set */
5273 fnvlist_add_nvlist(setprops, name, attrs);
5274 } else {
5275 /* Property was implicitly inherited */
5276 fnvlist_add_boolean(inheritprops, name);
5277 }
5278 }
5279
5280 if (zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL, setprops,
5281 NULL) != 0)
5282 *errflags |= ZPROP_ERR_NORESTORE;
5283 if (zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED, inheritprops,
5284 NULL) != 0)
5285 *errflags |= ZPROP_ERR_NORESTORE;
5286
5287 nvlist_free(setprops);
5288 nvlist_free(inheritprops);
5289 }
5290 out:
5291 zfs_file_put(input_fp);
5292 nvlist_free(origrecvd);
5293 nvlist_free(origprops);
5294
5295 if (error == 0)
5296 error = props_error;
5297
5298 return (error);
5299 }
5300
5301 /*
5302 * inputs:
5303 * zc_name name of containing filesystem (unused)
5304 * zc_nvlist_src{_size} nvlist of properties to apply
5305 * zc_nvlist_conf{_size} nvlist of properties to exclude
5306 * (DATA_TYPE_BOOLEAN) and override (everything else)
5307 * zc_value name of snapshot to create
5308 * zc_string name of clone origin (if DRR_FLAG_CLONE)
5309 * zc_cookie file descriptor to recv from
5310 * zc_begin_record the BEGIN record of the stream (not byteswapped)
5311 * zc_guid force flag
5312 *
5313 * outputs:
5314 * zc_cookie number of bytes read
5315 * zc_obj zprop_errflags_t
5316 * zc_nvlist_dst{_size} error for each unapplied received property
5317 */
5318 static int
5319 zfs_ioc_recv(zfs_cmd_t *zc)
5320 {
5321 dmu_replay_record_t begin_record;
5322 nvlist_t *errors = NULL;
5323 nvlist_t *recvdprops = NULL;
5324 nvlist_t *localprops = NULL;
5325 const char *origin = NULL;
5326 char *tosnap;
5327 char tofs[ZFS_MAX_DATASET_NAME_LEN];
5328 int error = 0;
5329
5330 if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
5331 strchr(zc->zc_value, '@') == NULL ||
5332 strchr(zc->zc_value, '%'))
5333 return (SET_ERROR(EINVAL));
5334
5335 (void) strlcpy(tofs, zc->zc_value, sizeof (tofs));
5336 tosnap = strchr(tofs, '@');
5337 *tosnap++ = '\0';
5338
5339 if (zc->zc_nvlist_src != 0 &&
5340 (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
5341 zc->zc_iflags, &recvdprops)) != 0)
5342 return (error);
5343
5344 if (zc->zc_nvlist_conf != 0 &&
5345 (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
5346 zc->zc_iflags, &localprops)) != 0)
5347 return (error);
5348
5349 if (zc->zc_string[0])
5350 origin = zc->zc_string;
5351
5352 begin_record.drr_type = DRR_BEGIN;
5353 begin_record.drr_payloadlen = 0;
5354 begin_record.drr_u.drr_begin = zc->zc_begin_record;
5355
5356 error = zfs_ioc_recv_impl(tofs, tosnap, origin, recvdprops, localprops,
5357 NULL, zc->zc_guid, B_FALSE, B_FALSE, zc->zc_cookie, &begin_record,
5358 &zc->zc_cookie, &zc->zc_obj, &errors);
5359 nvlist_free(recvdprops);
5360 nvlist_free(localprops);
5361
5362 /*
5363 * Now that all props, initial and delayed, are set, report the prop
5364 * errors to the caller.
5365 */
5366 if (zc->zc_nvlist_dst_size != 0 && errors != NULL &&
5367 (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
5368 put_nvlist(zc, errors) != 0)) {
5369 /*
5370 * Caller made zc->zc_nvlist_dst less than the minimum expected
5371 * size or supplied an invalid address.
5372 */
5373 error = SET_ERROR(EINVAL);
5374 }
5375
5376 nvlist_free(errors);
5377
5378 return (error);
5379 }
5380
5381 /*
5382 * innvl: {
5383 * "snapname" -> full name of the snapshot to create
5384 * (optional) "props" -> received properties to set (nvlist)
5385 * (optional) "localprops" -> override and exclude properties (nvlist)
5386 * (optional) "origin" -> name of clone origin (DRR_FLAG_CLONE)
5387 * "begin_record" -> non-byteswapped dmu_replay_record_t
5388 * "input_fd" -> file descriptor to read stream from (int32)
5389 * (optional) "force" -> force flag (value ignored)
5390 * (optional) "heal" -> use send stream to heal data corruption
5391 * (optional) "resumable" -> resumable flag (value ignored)
5392 * (optional) "cleanup_fd" -> unused
5393 * (optional) "action_handle" -> unused
5394 * (optional) "hidden_args" -> { "wkeydata" -> value }
5395 * }
5396 *
5397 * outnvl: {
5398 * "read_bytes" -> number of bytes read
5399 * "error_flags" -> zprop_errflags_t
5400 * "errors" -> error for each unapplied received property (nvlist)
5401 * }
5402 */
5403 static const zfs_ioc_key_t zfs_keys_recv_new[] = {
5404 {"snapname", DATA_TYPE_STRING, 0},
5405 {"props", DATA_TYPE_NVLIST, ZK_OPTIONAL},
5406 {"localprops", DATA_TYPE_NVLIST, ZK_OPTIONAL},
5407 {"origin", DATA_TYPE_STRING, ZK_OPTIONAL},
5408 {"begin_record", DATA_TYPE_BYTE_ARRAY, 0},
5409 {"input_fd", DATA_TYPE_INT32, 0},
5410 {"force", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
5411 {"heal", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
5412 {"resumable", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
5413 {"cleanup_fd", DATA_TYPE_INT32, ZK_OPTIONAL},
5414 {"action_handle", DATA_TYPE_UINT64, ZK_OPTIONAL},
5415 {"hidden_args", DATA_TYPE_NVLIST, ZK_OPTIONAL},
5416 };
5417
5418 static int
5419 zfs_ioc_recv_new(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
5420 {
5421 dmu_replay_record_t *begin_record;
5422 uint_t begin_record_size;
5423 nvlist_t *errors = NULL;
5424 nvlist_t *recvprops = NULL;
5425 nvlist_t *localprops = NULL;
5426 nvlist_t *hidden_args = NULL;
5427 const char *snapname;
5428 const char *origin = NULL;
5429 char *tosnap;
5430 char tofs[ZFS_MAX_DATASET_NAME_LEN];
5431 boolean_t force;
5432 boolean_t heal;
5433 boolean_t resumable;
5434 uint64_t read_bytes = 0;
5435 uint64_t errflags = 0;
5436 int input_fd = -1;
5437 int error;
5438
5439 snapname = fnvlist_lookup_string(innvl, "snapname");
5440
5441 if (dataset_namecheck(snapname, NULL, NULL) != 0 ||
5442 strchr(snapname, '@') == NULL ||
5443 strchr(snapname, '%'))
5444 return (SET_ERROR(EINVAL));
5445
5446 (void) strlcpy(tofs, snapname, sizeof (tofs));
5447 tosnap = strchr(tofs, '@');
5448 *tosnap++ = '\0';
5449
5450 error = nvlist_lookup_string(innvl, "origin", &origin);
5451 if (error && error != ENOENT)
5452 return (error);
5453
5454 error = nvlist_lookup_byte_array(innvl, "begin_record",
5455 (uchar_t **)&begin_record, &begin_record_size);
5456 if (error != 0 || begin_record_size != sizeof (*begin_record))
5457 return (SET_ERROR(EINVAL));
5458
5459 input_fd = fnvlist_lookup_int32(innvl, "input_fd");
5460
5461 force = nvlist_exists(innvl, "force");
5462 heal = nvlist_exists(innvl, "heal");
5463 resumable = nvlist_exists(innvl, "resumable");
5464
5465 /* we still use "props" here for backwards compatibility */
5466 error = nvlist_lookup_nvlist(innvl, "props", &recvprops);
5467 if (error && error != ENOENT)
5468 return (error);
5469
5470 error = nvlist_lookup_nvlist(innvl, "localprops", &localprops);
5471 if (error && error != ENOENT)
5472 return (error);
5473
5474 error = nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
5475 if (error && error != ENOENT)
5476 return (error);
5477
5478 error = zfs_ioc_recv_impl(tofs, tosnap, origin, recvprops, localprops,
5479 hidden_args, force, heal, resumable, input_fd, begin_record,
5480 &read_bytes, &errflags, &errors);
5481
5482 fnvlist_add_uint64(outnvl, "read_bytes", read_bytes);
5483 fnvlist_add_uint64(outnvl, "error_flags", errflags);
5484 fnvlist_add_nvlist(outnvl, "errors", errors);
5485
5486 nvlist_free(errors);
5487 nvlist_free(recvprops);
5488 nvlist_free(localprops);
5489
5490 return (error);
5491 }
5492
5493 typedef struct dump_bytes_io {
5494 zfs_file_t *dbi_fp;
5495 caddr_t dbi_buf;
5496 int dbi_len;
5497 int dbi_err;
5498 } dump_bytes_io_t;
5499
5500 static void
5501 dump_bytes_cb(void *arg)
5502 {
5503 dump_bytes_io_t *dbi = (dump_bytes_io_t *)arg;
5504 zfs_file_t *fp;
5505 caddr_t buf;
5506
5507 fp = dbi->dbi_fp;
5508 buf = dbi->dbi_buf;
5509
5510 dbi->dbi_err = zfs_file_write(fp, buf, dbi->dbi_len, NULL);
5511 }
5512
5513 static int
5514 dump_bytes(objset_t *os, void *buf, int len, void *arg)
5515 {
5516 dump_bytes_io_t dbi;
5517
5518 dbi.dbi_fp = arg;
5519 dbi.dbi_buf = buf;
5520 dbi.dbi_len = len;
5521
5522 #if defined(HAVE_LARGE_STACKS)
5523 dump_bytes_cb(&dbi);
5524 #else
5525 /*
5526 * The vn_rdwr() call is performed in a taskq to ensure that there is
5527 * always enough stack space to write safely to the target filesystem.
5528 * The ZIO_TYPE_FREE threads are used because there can be a lot of
5529 * them and they are used in vdev_file.c for a similar purpose.
5530 */
5531 spa_taskq_dispatch_sync(dmu_objset_spa(os), ZIO_TYPE_FREE,
5532 ZIO_TASKQ_ISSUE, dump_bytes_cb, &dbi, TQ_SLEEP);
5533 #endif /* HAVE_LARGE_STACKS */
5534
5535 return (dbi.dbi_err);
5536 }
5537
5538 /*
5539 * inputs:
5540 * zc_name name of snapshot to send
5541 * zc_cookie file descriptor to send stream to
5542 * zc_obj fromorigin flag (mutually exclusive with zc_fromobj)
5543 * zc_sendobj objsetid of snapshot to send
5544 * zc_fromobj objsetid of incremental fromsnap (may be zero)
5545 * zc_guid if set, estimate size of stream only. zc_cookie is ignored.
5546 * output size in zc_objset_type.
5547 * zc_flags lzc_send_flags
5548 *
5549 * outputs:
5550 * zc_objset_type estimated size, if zc_guid is set
5551 *
5552 * NOTE: This is no longer the preferred interface, any new functionality
5553 * should be added to zfs_ioc_send_new() instead.
5554 */
5555 static int
5556 zfs_ioc_send(zfs_cmd_t *zc)
5557 {
5558 int error;
5559 offset_t off;
5560 boolean_t estimate = (zc->zc_guid != 0);
5561 boolean_t embedok = (zc->zc_flags & 0x1);
5562 boolean_t large_block_ok = (zc->zc_flags & 0x2);
5563 boolean_t compressok = (zc->zc_flags & 0x4);
5564 boolean_t rawok = (zc->zc_flags & 0x8);
5565 boolean_t savedok = (zc->zc_flags & 0x10);
5566
5567 if (zc->zc_obj != 0) {
5568 dsl_pool_t *dp;
5569 dsl_dataset_t *tosnap;
5570
5571 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5572 if (error != 0)
5573 return (error);
5574
5575 error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
5576 if (error != 0) {
5577 dsl_pool_rele(dp, FTAG);
5578 return (error);
5579 }
5580
5581 if (dsl_dir_is_clone(tosnap->ds_dir))
5582 zc->zc_fromobj =
5583 dsl_dir_phys(tosnap->ds_dir)->dd_origin_obj;
5584 dsl_dataset_rele(tosnap, FTAG);
5585 dsl_pool_rele(dp, FTAG);
5586 }
5587
5588 if (estimate) {
5589 dsl_pool_t *dp;
5590 dsl_dataset_t *tosnap;
5591 dsl_dataset_t *fromsnap = NULL;
5592
5593 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5594 if (error != 0)
5595 return (error);
5596
5597 error = dsl_dataset_hold_obj(dp, zc->zc_sendobj,
5598 FTAG, &tosnap);
5599 if (error != 0) {
5600 dsl_pool_rele(dp, FTAG);
5601 return (error);
5602 }
5603
5604 if (zc->zc_fromobj != 0) {
5605 error = dsl_dataset_hold_obj(dp, zc->zc_fromobj,
5606 FTAG, &fromsnap);
5607 if (error != 0) {
5608 dsl_dataset_rele(tosnap, FTAG);
5609 dsl_pool_rele(dp, FTAG);
5610 return (error);
5611 }
5612 }
5613
5614 error = dmu_send_estimate_fast(tosnap, fromsnap, NULL,
5615 compressok || rawok, savedok, &zc->zc_objset_type);
5616
5617 if (fromsnap != NULL)
5618 dsl_dataset_rele(fromsnap, FTAG);
5619 dsl_dataset_rele(tosnap, FTAG);
5620 dsl_pool_rele(dp, FTAG);
5621 } else {
5622 zfs_file_t *fp;
5623 dmu_send_outparams_t out = {0};
5624
5625 if ((fp = zfs_file_get(zc->zc_cookie)) == NULL)
5626 return (SET_ERROR(EBADF));
5627
5628 off = zfs_file_off(fp);
5629 out.dso_outfunc = dump_bytes;
5630 out.dso_arg = fp;
5631 out.dso_dryrun = B_FALSE;
5632 error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
5633 zc->zc_fromobj, embedok, large_block_ok, compressok,
5634 rawok, savedok, zc->zc_cookie, &off, &out);
5635
5636 zfs_file_put(fp);
5637 }
5638 return (error);
5639 }
5640
5641 /*
5642 * inputs:
5643 * zc_name name of snapshot on which to report progress
5644 * zc_cookie file descriptor of send stream
5645 *
5646 * outputs:
5647 * zc_cookie number of bytes written in send stream thus far
5648 * zc_objset_type logical size of data traversed by send thus far
5649 */
5650 static int
5651 zfs_ioc_send_progress(zfs_cmd_t *zc)
5652 {
5653 dsl_pool_t *dp;
5654 dsl_dataset_t *ds;
5655 dmu_sendstatus_t *dsp = NULL;
5656 int error;
5657
5658 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5659 if (error != 0)
5660 return (error);
5661
5662 error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
5663 if (error != 0) {
5664 dsl_pool_rele(dp, FTAG);
5665 return (error);
5666 }
5667
5668 mutex_enter(&ds->ds_sendstream_lock);
5669
5670 /*
5671 * Iterate over all the send streams currently active on this dataset.
5672 * If there's one which matches the specified file descriptor _and_ the
5673 * stream was started by the current process, return the progress of
5674 * that stream.
5675 */
5676
5677 for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
5678 dsp = list_next(&ds->ds_sendstreams, dsp)) {
5679 if (dsp->dss_outfd == zc->zc_cookie &&
5680 zfs_proc_is_caller(dsp->dss_proc))
5681 break;
5682 }
5683
5684 if (dsp != NULL) {
5685 zc->zc_cookie = atomic_cas_64((volatile uint64_t *)dsp->dss_off,
5686 0, 0);
5687 /* This is the closest thing we have to atomic_read_64. */
5688 zc->zc_objset_type = atomic_cas_64(&dsp->dss_blocks, 0, 0);
5689 } else {
5690 error = SET_ERROR(ENOENT);
5691 }
5692
5693 mutex_exit(&ds->ds_sendstream_lock);
5694 dsl_dataset_rele(ds, FTAG);
5695 dsl_pool_rele(dp, FTAG);
5696 return (error);
5697 }
5698
5699 static int
5700 zfs_ioc_inject_fault(zfs_cmd_t *zc)
5701 {
5702 int id, error;
5703
5704 error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
5705 &zc->zc_inject_record);
5706
5707 if (error == 0)
5708 zc->zc_guid = (uint64_t)id;
5709
5710 return (error);
5711 }
5712
5713 static int
5714 zfs_ioc_clear_fault(zfs_cmd_t *zc)
5715 {
5716 return (zio_clear_fault((int)zc->zc_guid));
5717 }
5718
5719 static int
5720 zfs_ioc_inject_list_next(zfs_cmd_t *zc)
5721 {
5722 int id = (int)zc->zc_guid;
5723 int error;
5724
5725 error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
5726 &zc->zc_inject_record);
5727
5728 zc->zc_guid = id;
5729
5730 return (error);
5731 }
5732
5733 static int
5734 zfs_ioc_error_log(zfs_cmd_t *zc)
5735 {
5736 spa_t *spa;
5737 int error;
5738
5739 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
5740 return (error);
5741
5742 error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
5743 &zc->zc_nvlist_dst_size);
5744
5745 spa_close(spa, FTAG);
5746
5747 return (error);
5748 }
5749
5750 static int
5751 zfs_ioc_clear(zfs_cmd_t *zc)
5752 {
5753 spa_t *spa;
5754 vdev_t *vd;
5755 int error;
5756
5757 /*
5758 * On zpool clear we also fix up missing slogs
5759 */
5760 mutex_enter(&spa_namespace_lock);
5761 spa = spa_lookup(zc->zc_name);
5762 if (spa == NULL) {
5763 mutex_exit(&spa_namespace_lock);
5764 return (SET_ERROR(EIO));
5765 }
5766 if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
5767 /* we need to let spa_open/spa_load clear the chains */
5768 spa_set_log_state(spa, SPA_LOG_CLEAR);
5769 }
5770 spa->spa_last_open_failed = 0;
5771 mutex_exit(&spa_namespace_lock);
5772
5773 if (zc->zc_cookie & ZPOOL_NO_REWIND) {
5774 error = spa_open(zc->zc_name, &spa, FTAG);
5775 } else {
5776 nvlist_t *policy;
5777 nvlist_t *config = NULL;
5778
5779 if (zc->zc_nvlist_src == 0)
5780 return (SET_ERROR(EINVAL));
5781
5782 if ((error = get_nvlist(zc->zc_nvlist_src,
5783 zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
5784 error = spa_open_rewind(zc->zc_name, &spa, FTAG,
5785 policy, &config);
5786 if (config != NULL) {
5787 int err;
5788
5789 if ((err = put_nvlist(zc, config)) != 0)
5790 error = err;
5791 nvlist_free(config);
5792 }
5793 nvlist_free(policy);
5794 }
5795 }
5796
5797 if (error != 0)
5798 return (error);
5799
5800 /*
5801 * If multihost is enabled, resuming I/O is unsafe as another
5802 * host may have imported the pool.
5803 */
5804 if (spa_multihost(spa) && spa_suspended(spa))
5805 return (SET_ERROR(EINVAL));
5806
5807 spa_vdev_state_enter(spa, SCL_NONE);
5808
5809 if (zc->zc_guid == 0) {
5810 vd = NULL;
5811 } else {
5812 vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
5813 if (vd == NULL) {
5814 error = SET_ERROR(ENODEV);
5815 (void) spa_vdev_state_exit(spa, NULL, error);
5816 spa_close(spa, FTAG);
5817 return (error);
5818 }
5819 }
5820
5821 vdev_clear(spa, vd);
5822
5823 (void) spa_vdev_state_exit(spa, spa_suspended(spa) ?
5824 NULL : spa->spa_root_vdev, 0);
5825
5826 /*
5827 * Resume any suspended I/Os.
5828 */
5829 if (zio_resume(spa) != 0)
5830 error = SET_ERROR(EIO);
5831
5832 spa_close(spa, FTAG);
5833
5834 return (error);
5835 }
5836
5837 /*
5838 * Reopen all the vdevs associated with the pool.
5839 *
5840 * innvl: {
5841 * "scrub_restart" -> when true and scrub is running, allow to restart
5842 * scrub as the side effect of the reopen (boolean).
5843 * }
5844 *
5845 * outnvl is unused
5846 */
5847 static const zfs_ioc_key_t zfs_keys_pool_reopen[] = {
5848 {"scrub_restart", DATA_TYPE_BOOLEAN_VALUE, ZK_OPTIONAL},
5849 };
5850
5851 static int
5852 zfs_ioc_pool_reopen(const char *pool, nvlist_t *innvl, nvlist_t *outnvl)
5853 {
5854 (void) outnvl;
5855 spa_t *spa;
5856 int error;
5857 boolean_t rc, scrub_restart = B_TRUE;
5858
5859 if (innvl) {
5860 error = nvlist_lookup_boolean_value(innvl,
5861 "scrub_restart", &rc);
5862 if (error == 0)
5863 scrub_restart = rc;
5864 }
5865
5866 error = spa_open(pool, &spa, FTAG);
5867 if (error != 0)
5868 return (error);
5869
5870 spa_vdev_state_enter(spa, SCL_NONE);
5871
5872 /*
5873 * If the scrub_restart flag is B_FALSE and a scrub is already
5874 * in progress then set spa_scrub_reopen flag to B_TRUE so that
5875 * we don't restart the scrub as a side effect of the reopen.
5876 * Otherwise, let vdev_open() decided if a resilver is required.
5877 */
5878
5879 spa->spa_scrub_reopen = (!scrub_restart &&
5880 dsl_scan_scrubbing(spa->spa_dsl_pool));
5881 vdev_reopen(spa->spa_root_vdev);
5882 spa->spa_scrub_reopen = B_FALSE;
5883
5884 (void) spa_vdev_state_exit(spa, NULL, 0);
5885 spa_close(spa, FTAG);
5886 return (0);
5887 }
5888
5889 /*
5890 * inputs:
5891 * zc_name name of filesystem
5892 *
5893 * outputs:
5894 * zc_string name of conflicting snapshot, if there is one
5895 */
5896 static int
5897 zfs_ioc_promote(zfs_cmd_t *zc)
5898 {
5899 dsl_pool_t *dp;
5900 dsl_dataset_t *ds, *ods;
5901 char origin[ZFS_MAX_DATASET_NAME_LEN];
5902 char *cp;
5903 int error;
5904
5905 zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
5906 if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0 ||
5907 strchr(zc->zc_name, '%'))
5908 return (SET_ERROR(EINVAL));
5909
5910 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5911 if (error != 0)
5912 return (error);
5913
5914 error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
5915 if (error != 0) {
5916 dsl_pool_rele(dp, FTAG);
5917 return (error);
5918 }
5919
5920 if (!dsl_dir_is_clone(ds->ds_dir)) {
5921 dsl_dataset_rele(ds, FTAG);
5922 dsl_pool_rele(dp, FTAG);
5923 return (SET_ERROR(EINVAL));
5924 }
5925
5926 error = dsl_dataset_hold_obj(dp,
5927 dsl_dir_phys(ds->ds_dir)->dd_origin_obj, FTAG, &ods);
5928 if (error != 0) {
5929 dsl_dataset_rele(ds, FTAG);
5930 dsl_pool_rele(dp, FTAG);
5931 return (error);
5932 }
5933
5934 dsl_dataset_name(ods, origin);
5935 dsl_dataset_rele(ods, FTAG);
5936 dsl_dataset_rele(ds, FTAG);
5937 dsl_pool_rele(dp, FTAG);
5938
5939 /*
5940 * We don't need to unmount *all* the origin fs's snapshots, but
5941 * it's easier.
5942 */
5943 cp = strchr(origin, '@');
5944 if (cp)
5945 *cp = '\0';
5946 (void) dmu_objset_find(origin,
5947 zfs_unmount_snap_cb, NULL, DS_FIND_SNAPSHOTS);
5948 return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
5949 }
5950
5951 /*
5952 * Retrieve a single {user|group|project}{used|quota}@... property.
5953 *
5954 * inputs:
5955 * zc_name name of filesystem
5956 * zc_objset_type zfs_userquota_prop_t
5957 * zc_value domain name (eg. "S-1-234-567-89")
5958 * zc_guid RID/UID/GID
5959 *
5960 * outputs:
5961 * zc_cookie property value
5962 */
5963 static int
5964 zfs_ioc_userspace_one(zfs_cmd_t *zc)
5965 {
5966 zfsvfs_t *zfsvfs;
5967 int error;
5968
5969 if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
5970 return (SET_ERROR(EINVAL));
5971
5972 error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
5973 if (error != 0)
5974 return (error);
5975
5976 error = zfs_userspace_one(zfsvfs,
5977 zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
5978 zfsvfs_rele(zfsvfs, FTAG);
5979
5980 return (error);
5981 }
5982
5983 /*
5984 * inputs:
5985 * zc_name name of filesystem
5986 * zc_cookie zap cursor
5987 * zc_objset_type zfs_userquota_prop_t
5988 * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
5989 *
5990 * outputs:
5991 * zc_nvlist_dst[_size] data buffer (array of zfs_useracct_t)
5992 * zc_cookie zap cursor
5993 */
5994 static int
5995 zfs_ioc_userspace_many(zfs_cmd_t *zc)
5996 {
5997 zfsvfs_t *zfsvfs;
5998 int bufsize = zc->zc_nvlist_dst_size;
5999
6000 if (bufsize <= 0)
6001 return (SET_ERROR(ENOMEM));
6002
6003 int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
6004 if (error != 0)
6005 return (error);
6006
6007 void *buf = vmem_alloc(bufsize, KM_SLEEP);
6008
6009 error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
6010 buf, &zc->zc_nvlist_dst_size);
6011
6012 if (error == 0) {
6013 error = xcopyout(buf,
6014 (void *)(uintptr_t)zc->zc_nvlist_dst,
6015 zc->zc_nvlist_dst_size);
6016 }
6017 vmem_free(buf, bufsize);
6018 zfsvfs_rele(zfsvfs, FTAG);
6019
6020 return (error);
6021 }
6022
6023 /*
6024 * inputs:
6025 * zc_name name of filesystem
6026 *
6027 * outputs:
6028 * none
6029 */
6030 static int
6031 zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
6032 {
6033 int error = 0;
6034 zfsvfs_t *zfsvfs;
6035
6036 if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
6037 if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
6038 /*
6039 * If userused is not enabled, it may be because the
6040 * objset needs to be closed & reopened (to grow the
6041 * objset_phys_t). Suspend/resume the fs will do that.
6042 */
6043 dsl_dataset_t *ds, *newds;
6044
6045 ds = dmu_objset_ds(zfsvfs->z_os);
6046 error = zfs_suspend_fs(zfsvfs);
6047 if (error == 0) {
6048 dmu_objset_refresh_ownership(ds, &newds,
6049 B_TRUE, zfsvfs);
6050 error = zfs_resume_fs(zfsvfs, newds);
6051 }
6052 }
6053 if (error == 0) {
6054 mutex_enter(&zfsvfs->z_os->os_upgrade_lock);
6055 if (zfsvfs->z_os->os_upgrade_id == 0) {
6056 /* clear potential error code and retry */
6057 zfsvfs->z_os->os_upgrade_status = 0;
6058 mutex_exit(&zfsvfs->z_os->os_upgrade_lock);
6059
6060 dsl_pool_config_enter(
6061 dmu_objset_pool(zfsvfs->z_os), FTAG);
6062 dmu_objset_userspace_upgrade(zfsvfs->z_os);
6063 dsl_pool_config_exit(
6064 dmu_objset_pool(zfsvfs->z_os), FTAG);
6065 } else {
6066 mutex_exit(&zfsvfs->z_os->os_upgrade_lock);
6067 }
6068
6069 taskq_wait_id(zfsvfs->z_os->os_spa->spa_upgrade_taskq,
6070 zfsvfs->z_os->os_upgrade_id);
6071 error = zfsvfs->z_os->os_upgrade_status;
6072 }
6073 zfs_vfs_rele(zfsvfs);
6074 } else {
6075 objset_t *os;
6076
6077 /* XXX kind of reading contents without owning */
6078 error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, FTAG, &os);
6079 if (error != 0)
6080 return (error);
6081
6082 mutex_enter(&os->os_upgrade_lock);
6083 if (os->os_upgrade_id == 0) {
6084 /* clear potential error code and retry */
6085 os->os_upgrade_status = 0;
6086 mutex_exit(&os->os_upgrade_lock);
6087
6088 dmu_objset_userspace_upgrade(os);
6089 } else {
6090 mutex_exit(&os->os_upgrade_lock);
6091 }
6092
6093 dsl_pool_rele(dmu_objset_pool(os), FTAG);
6094
6095 taskq_wait_id(os->os_spa->spa_upgrade_taskq, os->os_upgrade_id);
6096 error = os->os_upgrade_status;
6097
6098 dsl_dataset_rele_flags(dmu_objset_ds(os), DS_HOLD_FLAG_DECRYPT,
6099 FTAG);
6100 }
6101 return (error);
6102 }
6103
6104 /*
6105 * inputs:
6106 * zc_name name of filesystem
6107 *
6108 * outputs:
6109 * none
6110 */
6111 static int
6112 zfs_ioc_id_quota_upgrade(zfs_cmd_t *zc)
6113 {
6114 objset_t *os;
6115 int error;
6116
6117 error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, FTAG, &os);
6118 if (error != 0)
6119 return (error);
6120
6121 if (dmu_objset_userobjspace_upgradable(os) ||
6122 dmu_objset_projectquota_upgradable(os)) {
6123 mutex_enter(&os->os_upgrade_lock);
6124 if (os->os_upgrade_id == 0) {
6125 /* clear potential error code and retry */
6126 os->os_upgrade_status = 0;
6127 mutex_exit(&os->os_upgrade_lock);
6128
6129 dmu_objset_id_quota_upgrade(os);
6130 } else {
6131 mutex_exit(&os->os_upgrade_lock);
6132 }
6133
6134 dsl_pool_rele(dmu_objset_pool(os), FTAG);
6135
6136 taskq_wait_id(os->os_spa->spa_upgrade_taskq, os->os_upgrade_id);
6137 error = os->os_upgrade_status;
6138 } else {
6139 dsl_pool_rele(dmu_objset_pool(os), FTAG);
6140 }
6141
6142 dsl_dataset_rele_flags(dmu_objset_ds(os), DS_HOLD_FLAG_DECRYPT, FTAG);
6143
6144 return (error);
6145 }
6146
6147 static int
6148 zfs_ioc_share(zfs_cmd_t *zc)
6149 {
6150 return (SET_ERROR(ENOSYS));
6151 }
6152
6153 /*
6154 * inputs:
6155 * zc_name name of containing filesystem
6156 * zc_obj object # beyond which we want next in-use object #
6157 *
6158 * outputs:
6159 * zc_obj next in-use object #
6160 */
6161 static int
6162 zfs_ioc_next_obj(zfs_cmd_t *zc)
6163 {
6164 objset_t *os = NULL;
6165 int error;
6166
6167 error = dmu_objset_hold(zc->zc_name, FTAG, &os);
6168 if (error != 0)
6169 return (error);
6170
6171 error = dmu_object_next(os, &zc->zc_obj, B_FALSE, 0);
6172
6173 dmu_objset_rele(os, FTAG);
6174 return (error);
6175 }
6176
6177 /*
6178 * inputs:
6179 * zc_name name of filesystem
6180 * zc_value prefix name for snapshot
6181 * zc_cleanup_fd cleanup-on-exit file descriptor for calling process
6182 *
6183 * outputs:
6184 * zc_value short name of new snapshot
6185 */
6186 static int
6187 zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
6188 {
6189 char *snap_name;
6190 char *hold_name;
6191 minor_t minor;
6192
6193 zfs_file_t *fp = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
6194 if (fp == NULL)
6195 return (SET_ERROR(EBADF));
6196
6197 snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
6198 (u_longlong_t)ddi_get_lbolt64());
6199 hold_name = kmem_asprintf("%%%s", zc->zc_value);
6200
6201 int error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor,
6202 hold_name);
6203 if (error == 0)
6204 (void) strlcpy(zc->zc_value, snap_name,
6205 sizeof (zc->zc_value));
6206 kmem_strfree(snap_name);
6207 kmem_strfree(hold_name);
6208 zfs_onexit_fd_rele(fp);
6209 return (error);
6210 }
6211
6212 /*
6213 * inputs:
6214 * zc_name name of "to" snapshot
6215 * zc_value name of "from" snapshot
6216 * zc_cookie file descriptor to write diff data on
6217 *
6218 * outputs:
6219 * dmu_diff_record_t's to the file descriptor
6220 */
6221 static int
6222 zfs_ioc_diff(zfs_cmd_t *zc)
6223 {
6224 zfs_file_t *fp;
6225 offset_t off;
6226 int error;
6227
6228 if ((fp = zfs_file_get(zc->zc_cookie)) == NULL)
6229 return (SET_ERROR(EBADF));
6230
6231 off = zfs_file_off(fp);
6232 error = dmu_diff(zc->zc_name, zc->zc_value, fp, &off);
6233
6234 zfs_file_put(fp);
6235
6236 return (error);
6237 }
6238
6239 static int
6240 zfs_ioc_smb_acl(zfs_cmd_t *zc)
6241 {
6242 return (SET_ERROR(ENOTSUP));
6243 }
6244
6245 /*
6246 * innvl: {
6247 * "holds" -> { snapname -> holdname (string), ... }
6248 * (optional) "cleanup_fd" -> fd (int32)
6249 * }
6250 *
6251 * outnvl: {
6252 * snapname -> error value (int32)
6253 * ...
6254 * }
6255 */
6256 static const zfs_ioc_key_t zfs_keys_hold[] = {
6257 {"holds", DATA_TYPE_NVLIST, 0},
6258 {"cleanup_fd", DATA_TYPE_INT32, ZK_OPTIONAL},
6259 };
6260
6261 static int
6262 zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
6263 {
6264 (void) pool;
6265 nvpair_t *pair;
6266 nvlist_t *holds;
6267 int cleanup_fd = -1;
6268 int error;
6269 minor_t minor = 0;
6270 zfs_file_t *fp = NULL;
6271
6272 holds = fnvlist_lookup_nvlist(args, "holds");
6273
6274 /* make sure the user didn't pass us any invalid (empty) tags */
6275 for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
6276 pair = nvlist_next_nvpair(holds, pair)) {
6277 const char *htag;
6278
6279 error = nvpair_value_string(pair, &htag);
6280 if (error != 0)
6281 return (SET_ERROR(error));
6282
6283 if (strlen(htag) == 0)
6284 return (SET_ERROR(EINVAL));
6285 }
6286
6287 if (nvlist_lookup_int32(args, "cleanup_fd", &cleanup_fd) == 0) {
6288 fp = zfs_onexit_fd_hold(cleanup_fd, &minor);
6289 if (fp == NULL)
6290 return (SET_ERROR(EBADF));
6291 }
6292
6293 error = dsl_dataset_user_hold(holds, minor, errlist);
6294 if (fp != NULL) {
6295 ASSERT3U(minor, !=, 0);
6296 zfs_onexit_fd_rele(fp);
6297 }
6298 return (SET_ERROR(error));
6299 }
6300
6301 /*
6302 * innvl is not used.
6303 *
6304 * outnvl: {
6305 * holdname -> time added (uint64 seconds since epoch)
6306 * ...
6307 * }
6308 */
6309 static const zfs_ioc_key_t zfs_keys_get_holds[] = {
6310 /* no nvl keys */
6311 };
6312
6313 static int
6314 zfs_ioc_get_holds(const char *snapname, nvlist_t *args, nvlist_t *outnvl)
6315 {
6316 (void) args;
6317 return (dsl_dataset_get_holds(snapname, outnvl));
6318 }
6319
6320 /*
6321 * innvl: {
6322 * snapname -> { holdname, ... }
6323 * ...
6324 * }
6325 *
6326 * outnvl: {
6327 * snapname -> error value (int32)
6328 * ...
6329 * }
6330 */
6331 static const zfs_ioc_key_t zfs_keys_release[] = {
6332 {"<snapname>...", DATA_TYPE_NVLIST, ZK_WILDCARDLIST},
6333 };
6334
6335 static int
6336 zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist)
6337 {
6338 (void) pool;
6339 return (dsl_dataset_user_release(holds, errlist));
6340 }
6341
6342 /*
6343 * inputs:
6344 * zc_guid flags (ZEVENT_NONBLOCK)
6345 * zc_cleanup_fd zevent file descriptor
6346 *
6347 * outputs:
6348 * zc_nvlist_dst next nvlist event
6349 * zc_cookie dropped events since last get
6350 */
6351 static int
6352 zfs_ioc_events_next(zfs_cmd_t *zc)
6353 {
6354 zfs_zevent_t *ze;
6355 nvlist_t *event = NULL;
6356 minor_t minor;
6357 uint64_t dropped = 0;
6358 int error;
6359
6360 zfs_file_t *fp = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
6361 if (fp == NULL)
6362 return (SET_ERROR(EBADF));
6363
6364 do {
6365 error = zfs_zevent_next(ze, &event,
6366 &zc->zc_nvlist_dst_size, &dropped);
6367 if (event != NULL) {
6368 zc->zc_cookie = dropped;
6369 error = put_nvlist(zc, event);
6370 nvlist_free(event);
6371 }
6372
6373 if (zc->zc_guid & ZEVENT_NONBLOCK)
6374 break;
6375
6376 if ((error == 0) || (error != ENOENT))
6377 break;
6378
6379 error = zfs_zevent_wait(ze);
6380 if (error != 0)
6381 break;
6382 } while (1);
6383
6384 zfs_zevent_fd_rele(fp);
6385
6386 return (error);
6387 }
6388
6389 /*
6390 * outputs:
6391 * zc_cookie cleared events count
6392 */
6393 static int
6394 zfs_ioc_events_clear(zfs_cmd_t *zc)
6395 {
6396 uint_t count;
6397
6398 zfs_zevent_drain_all(&count);
6399 zc->zc_cookie = count;
6400
6401 return (0);
6402 }
6403
6404 /*
6405 * inputs:
6406 * zc_guid eid | ZEVENT_SEEK_START | ZEVENT_SEEK_END
6407 * zc_cleanup zevent file descriptor
6408 */
6409 static int
6410 zfs_ioc_events_seek(zfs_cmd_t *zc)
6411 {
6412 zfs_zevent_t *ze;
6413 minor_t minor;
6414 int error;
6415
6416 zfs_file_t *fp = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
6417 if (fp == NULL)
6418 return (SET_ERROR(EBADF));
6419
6420 error = zfs_zevent_seek(ze, zc->zc_guid);
6421 zfs_zevent_fd_rele(fp);
6422
6423 return (error);
6424 }
6425
6426 /*
6427 * inputs:
6428 * zc_name name of later filesystem or snapshot
6429 * zc_value full name of old snapshot or bookmark
6430 *
6431 * outputs:
6432 * zc_cookie space in bytes
6433 * zc_objset_type compressed space in bytes
6434 * zc_perm_action uncompressed space in bytes
6435 */
6436 static int
6437 zfs_ioc_space_written(zfs_cmd_t *zc)
6438 {
6439 int error;
6440 dsl_pool_t *dp;
6441 dsl_dataset_t *new;
6442
6443 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
6444 if (error != 0)
6445 return (error);
6446 error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &new);
6447 if (error != 0) {
6448 dsl_pool_rele(dp, FTAG);
6449 return (error);
6450 }
6451 if (strchr(zc->zc_value, '#') != NULL) {
6452 zfs_bookmark_phys_t bmp;
6453 error = dsl_bookmark_lookup(dp, zc->zc_value,
6454 new, &bmp);
6455 if (error == 0) {
6456 error = dsl_dataset_space_written_bookmark(&bmp, new,
6457 &zc->zc_cookie,
6458 &zc->zc_objset_type, &zc->zc_perm_action);
6459 }
6460 } else {
6461 dsl_dataset_t *old;
6462 error = dsl_dataset_hold(dp, zc->zc_value, FTAG, &old);
6463
6464 if (error == 0) {
6465 error = dsl_dataset_space_written(old, new,
6466 &zc->zc_cookie,
6467 &zc->zc_objset_type, &zc->zc_perm_action);
6468 dsl_dataset_rele(old, FTAG);
6469 }
6470 }
6471 dsl_dataset_rele(new, FTAG);
6472 dsl_pool_rele(dp, FTAG);
6473 return (error);
6474 }
6475
6476 /*
6477 * innvl: {
6478 * "firstsnap" -> snapshot name
6479 * }
6480 *
6481 * outnvl: {
6482 * "used" -> space in bytes
6483 * "compressed" -> compressed space in bytes
6484 * "uncompressed" -> uncompressed space in bytes
6485 * }
6486 */
6487 static const zfs_ioc_key_t zfs_keys_space_snaps[] = {
6488 {"firstsnap", DATA_TYPE_STRING, 0},
6489 };
6490
6491 static int
6492 zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
6493 {
6494 int error;
6495 dsl_pool_t *dp;
6496 dsl_dataset_t *new, *old;
6497 const char *firstsnap;
6498 uint64_t used, comp, uncomp;
6499
6500 firstsnap = fnvlist_lookup_string(innvl, "firstsnap");
6501
6502 error = dsl_pool_hold(lastsnap, FTAG, &dp);
6503 if (error != 0)
6504 return (error);
6505
6506 error = dsl_dataset_hold(dp, lastsnap, FTAG, &new);
6507 if (error == 0 && !new->ds_is_snapshot) {
6508 dsl_dataset_rele(new, FTAG);
6509 error = SET_ERROR(EINVAL);
6510 }
6511 if (error != 0) {
6512 dsl_pool_rele(dp, FTAG);
6513 return (error);
6514 }
6515 error = dsl_dataset_hold(dp, firstsnap, FTAG, &old);
6516 if (error == 0 && !old->ds_is_snapshot) {
6517 dsl_dataset_rele(old, FTAG);
6518 error = SET_ERROR(EINVAL);
6519 }
6520 if (error != 0) {
6521 dsl_dataset_rele(new, FTAG);
6522 dsl_pool_rele(dp, FTAG);
6523 return (error);
6524 }
6525
6526 error = dsl_dataset_space_wouldfree(old, new, &used, &comp, &uncomp);
6527 dsl_dataset_rele(old, FTAG);
6528 dsl_dataset_rele(new, FTAG);
6529 dsl_pool_rele(dp, FTAG);
6530 fnvlist_add_uint64(outnvl, "used", used);
6531 fnvlist_add_uint64(outnvl, "compressed", comp);
6532 fnvlist_add_uint64(outnvl, "uncompressed", uncomp);
6533 return (error);
6534 }
6535
6536 /*
6537 * innvl: {
6538 * "fd" -> file descriptor to write stream to (int32)
6539 * (optional) "fromsnap" -> full snap name to send an incremental from
6540 * (optional) "largeblockok" -> (value ignored)
6541 * indicates that blocks > 128KB are permitted
6542 * (optional) "embedok" -> (value ignored)
6543 * presence indicates DRR_WRITE_EMBEDDED records are permitted
6544 * (optional) "compressok" -> (value ignored)
6545 * presence indicates compressed DRR_WRITE records are permitted
6546 * (optional) "rawok" -> (value ignored)
6547 * presence indicates raw encrypted records should be used.
6548 * (optional) "savedok" -> (value ignored)
6549 * presence indicates we should send a partially received snapshot
6550 * (optional) "resume_object" and "resume_offset" -> (uint64)
6551 * if present, resume send stream from specified object and offset.
6552 * (optional) "redactbook" -> (string)
6553 * if present, use this bookmark's redaction list to generate a redacted
6554 * send stream
6555 * }
6556 *
6557 * outnvl is unused
6558 */
6559 static const zfs_ioc_key_t zfs_keys_send_new[] = {
6560 {"fd", DATA_TYPE_INT32, 0},
6561 {"fromsnap", DATA_TYPE_STRING, ZK_OPTIONAL},
6562 {"largeblockok", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
6563 {"embedok", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
6564 {"compressok", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
6565 {"rawok", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
6566 {"savedok", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
6567 {"resume_object", DATA_TYPE_UINT64, ZK_OPTIONAL},
6568 {"resume_offset", DATA_TYPE_UINT64, ZK_OPTIONAL},
6569 {"redactbook", DATA_TYPE_STRING, ZK_OPTIONAL},
6570 };
6571
6572 static int
6573 zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
6574 {
6575 (void) outnvl;
6576 int error;
6577 offset_t off;
6578 const char *fromname = NULL;
6579 int fd;
6580 zfs_file_t *fp;
6581 boolean_t largeblockok;
6582 boolean_t embedok;
6583 boolean_t compressok;
6584 boolean_t rawok;
6585 boolean_t savedok;
6586 uint64_t resumeobj = 0;
6587 uint64_t resumeoff = 0;
6588 const char *redactbook = NULL;
6589
6590 fd = fnvlist_lookup_int32(innvl, "fd");
6591
6592 (void) nvlist_lookup_string(innvl, "fromsnap", &fromname);
6593
6594 largeblockok = nvlist_exists(innvl, "largeblockok");
6595 embedok = nvlist_exists(innvl, "embedok");
6596 compressok = nvlist_exists(innvl, "compressok");
6597 rawok = nvlist_exists(innvl, "rawok");
6598 savedok = nvlist_exists(innvl, "savedok");
6599
6600 (void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
6601 (void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
6602
6603 (void) nvlist_lookup_string(innvl, "redactbook", &redactbook);
6604
6605 if ((fp = zfs_file_get(fd)) == NULL)
6606 return (SET_ERROR(EBADF));
6607
6608 off = zfs_file_off(fp);
6609
6610 dmu_send_outparams_t out = {0};
6611 out.dso_outfunc = dump_bytes;
6612 out.dso_arg = fp;
6613 out.dso_dryrun = B_FALSE;
6614 error = dmu_send(snapname, fromname, embedok, largeblockok,
6615 compressok, rawok, savedok, resumeobj, resumeoff,
6616 redactbook, fd, &off, &out);
6617
6618 zfs_file_put(fp);
6619 return (error);
6620 }
6621
6622 static int
6623 send_space_sum(objset_t *os, void *buf, int len, void *arg)
6624 {
6625 (void) os, (void) buf;
6626 uint64_t *size = arg;
6627
6628 *size += len;
6629 return (0);
6630 }
6631
6632 /*
6633 * Determine approximately how large a zfs send stream will be -- the number
6634 * of bytes that will be written to the fd supplied to zfs_ioc_send_new().
6635 *
6636 * innvl: {
6637 * (optional) "from" -> full snap or bookmark name to send an incremental
6638 * from
6639 * (optional) "largeblockok" -> (value ignored)
6640 * indicates that blocks > 128KB are permitted
6641 * (optional) "embedok" -> (value ignored)
6642 * presence indicates DRR_WRITE_EMBEDDED records are permitted
6643 * (optional) "compressok" -> (value ignored)
6644 * presence indicates compressed DRR_WRITE records are permitted
6645 * (optional) "rawok" -> (value ignored)
6646 * presence indicates raw encrypted records should be used.
6647 * (optional) "resume_object" and "resume_offset" -> (uint64)
6648 * if present, resume send stream from specified object and offset.
6649 * (optional) "fd" -> file descriptor to use as a cookie for progress
6650 * tracking (int32)
6651 * }
6652 *
6653 * outnvl: {
6654 * "space" -> bytes of space (uint64)
6655 * }
6656 */
6657 static const zfs_ioc_key_t zfs_keys_send_space[] = {
6658 {"from", DATA_TYPE_STRING, ZK_OPTIONAL},
6659 {"fromsnap", DATA_TYPE_STRING, ZK_OPTIONAL},
6660 {"largeblockok", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
6661 {"embedok", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
6662 {"compressok", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
6663 {"rawok", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
6664 {"fd", DATA_TYPE_INT32, ZK_OPTIONAL},
6665 {"redactbook", DATA_TYPE_STRING, ZK_OPTIONAL},
6666 {"resume_object", DATA_TYPE_UINT64, ZK_OPTIONAL},
6667 {"resume_offset", DATA_TYPE_UINT64, ZK_OPTIONAL},
6668 {"bytes", DATA_TYPE_UINT64, ZK_OPTIONAL},
6669 };
6670
6671 static int
6672 zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
6673 {
6674 dsl_pool_t *dp;
6675 dsl_dataset_t *tosnap;
6676 dsl_dataset_t *fromsnap = NULL;
6677 int error;
6678 const char *fromname = NULL;
6679 const char *redactlist_book = NULL;
6680 boolean_t largeblockok;
6681 boolean_t embedok;
6682 boolean_t compressok;
6683 boolean_t rawok;
6684 boolean_t savedok;
6685 uint64_t space = 0;
6686 boolean_t full_estimate = B_FALSE;
6687 uint64_t resumeobj = 0;
6688 uint64_t resumeoff = 0;
6689 uint64_t resume_bytes = 0;
6690 int32_t fd = -1;
6691 zfs_bookmark_phys_t zbm = {0};
6692
6693 error = dsl_pool_hold(snapname, FTAG, &dp);
6694 if (error != 0)
6695 return (error);
6696
6697 error = dsl_dataset_hold(dp, snapname, FTAG, &tosnap);
6698 if (error != 0) {
6699 dsl_pool_rele(dp, FTAG);
6700 return (error);
6701 }
6702 (void) nvlist_lookup_int32(innvl, "fd", &fd);
6703
6704 largeblockok = nvlist_exists(innvl, "largeblockok");
6705 embedok = nvlist_exists(innvl, "embedok");
6706 compressok = nvlist_exists(innvl, "compressok");
6707 rawok = nvlist_exists(innvl, "rawok");
6708 savedok = nvlist_exists(innvl, "savedok");
6709 boolean_t from = (nvlist_lookup_string(innvl, "from", &fromname) == 0);
6710 boolean_t altbook = (nvlist_lookup_string(innvl, "redactbook",
6711 &redactlist_book) == 0);
6712
6713 (void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
6714 (void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
6715 (void) nvlist_lookup_uint64(innvl, "bytes", &resume_bytes);
6716
6717 if (altbook) {
6718 full_estimate = B_TRUE;
6719 } else if (from) {
6720 if (strchr(fromname, '#')) {
6721 error = dsl_bookmark_lookup(dp, fromname, tosnap, &zbm);
6722
6723 /*
6724 * dsl_bookmark_lookup() will fail with EXDEV if
6725 * the from-bookmark and tosnap are at the same txg.
6726 * However, it's valid to do a send (and therefore,
6727 * a send estimate) from and to the same time point,
6728 * if the bookmark is redacted (the incremental send
6729 * can change what's redacted on the target). In
6730 * this case, dsl_bookmark_lookup() fills in zbm
6731 * but returns EXDEV. Ignore this error.
6732 */
6733 if (error == EXDEV && zbm.zbm_redaction_obj != 0 &&
6734 zbm.zbm_guid ==
6735 dsl_dataset_phys(tosnap)->ds_guid)
6736 error = 0;
6737
6738 if (error != 0) {
6739 dsl_dataset_rele(tosnap, FTAG);
6740 dsl_pool_rele(dp, FTAG);
6741 return (error);
6742 }
6743 if (zbm.zbm_redaction_obj != 0 || !(zbm.zbm_flags &
6744 ZBM_FLAG_HAS_FBN)) {
6745 full_estimate = B_TRUE;
6746 }
6747 } else if (strchr(fromname, '@')) {
6748 error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
6749 if (error != 0) {
6750 dsl_dataset_rele(tosnap, FTAG);
6751 dsl_pool_rele(dp, FTAG);
6752 return (error);
6753 }
6754
6755 if (!dsl_dataset_is_before(tosnap, fromsnap, 0)) {
6756 full_estimate = B_TRUE;
6757 dsl_dataset_rele(fromsnap, FTAG);
6758 }
6759 } else {
6760 /*
6761 * from is not properly formatted as a snapshot or
6762 * bookmark
6763 */
6764 dsl_dataset_rele(tosnap, FTAG);
6765 dsl_pool_rele(dp, FTAG);
6766 return (SET_ERROR(EINVAL));
6767 }
6768 }
6769
6770 if (full_estimate) {
6771 dmu_send_outparams_t out = {0};
6772 offset_t off = 0;
6773 out.dso_outfunc = send_space_sum;
6774 out.dso_arg = &space;
6775 out.dso_dryrun = B_TRUE;
6776 /*
6777 * We have to release these holds so dmu_send can take them. It
6778 * will do all the error checking we need.
6779 */
6780 dsl_dataset_rele(tosnap, FTAG);
6781 dsl_pool_rele(dp, FTAG);
6782 error = dmu_send(snapname, fromname, embedok, largeblockok,
6783 compressok, rawok, savedok, resumeobj, resumeoff,
6784 redactlist_book, fd, &off, &out);
6785 } else {
6786 error = dmu_send_estimate_fast(tosnap, fromsnap,
6787 (from && strchr(fromname, '#') != NULL ? &zbm : NULL),
6788 compressok || rawok, savedok, &space);
6789 space -= resume_bytes;
6790 if (fromsnap != NULL)
6791 dsl_dataset_rele(fromsnap, FTAG);
6792 dsl_dataset_rele(tosnap, FTAG);
6793 dsl_pool_rele(dp, FTAG);
6794 }
6795
6796 fnvlist_add_uint64(outnvl, "space", space);
6797
6798 return (error);
6799 }
6800
6801 /*
6802 * Sync the currently open TXG to disk for the specified pool.
6803 * This is somewhat similar to 'zfs_sync()'.
6804 * For cases that do not result in error this ioctl will wait for
6805 * the currently open TXG to commit before returning back to the caller.
6806 *
6807 * innvl: {
6808 * "force" -> when true, force uberblock update even if there is no dirty data.
6809 * In addition this will cause the vdev configuration to be written
6810 * out including updating the zpool cache file. (boolean_t)
6811 * }
6812 *
6813 * onvl is unused
6814 */
6815 static const zfs_ioc_key_t zfs_keys_pool_sync[] = {
6816 {"force", DATA_TYPE_BOOLEAN_VALUE, 0},
6817 };
6818
6819 static int
6820 zfs_ioc_pool_sync(const char *pool, nvlist_t *innvl, nvlist_t *onvl)
6821 {
6822 (void) onvl;
6823 int err;
6824 boolean_t rc, force = B_FALSE;
6825 spa_t *spa;
6826
6827 if ((err = spa_open(pool, &spa, FTAG)) != 0)
6828 return (err);
6829
6830 if (innvl) {
6831 err = nvlist_lookup_boolean_value(innvl, "force", &rc);
6832 if (err == 0)
6833 force = rc;
6834 }
6835
6836 if (force) {
6837 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_WRITER);
6838 vdev_config_dirty(spa->spa_root_vdev);
6839 spa_config_exit(spa, SCL_CONFIG, FTAG);
6840 }
6841 txg_wait_synced(spa_get_dsl(spa), 0);
6842
6843 spa_close(spa, FTAG);
6844
6845 return (0);
6846 }
6847
6848 /*
6849 * Load a user's wrapping key into the kernel.
6850 * innvl: {
6851 * "hidden_args" -> { "wkeydata" -> value }
6852 * raw uint8_t array of encryption wrapping key data (32 bytes)
6853 * (optional) "noop" -> (value ignored)
6854 * presence indicated key should only be verified, not loaded
6855 * }
6856 */
6857 static const zfs_ioc_key_t zfs_keys_load_key[] = {
6858 {"hidden_args", DATA_TYPE_NVLIST, 0},
6859 {"noop", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
6860 };
6861
6862 static int
6863 zfs_ioc_load_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
6864 {
6865 (void) outnvl;
6866 int ret;
6867 dsl_crypto_params_t *dcp = NULL;
6868 nvlist_t *hidden_args;
6869 boolean_t noop = nvlist_exists(innvl, "noop");
6870
6871 if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
6872 ret = SET_ERROR(EINVAL);
6873 goto error;
6874 }
6875
6876 hidden_args = fnvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS);
6877
6878 ret = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, NULL,
6879 hidden_args, &dcp);
6880 if (ret != 0)
6881 goto error;
6882
6883 ret = spa_keystore_load_wkey(dsname, dcp, noop);
6884 if (ret != 0)
6885 goto error;
6886
6887 dsl_crypto_params_free(dcp, noop);
6888
6889 return (0);
6890
6891 error:
6892 dsl_crypto_params_free(dcp, B_TRUE);
6893 return (ret);
6894 }
6895
6896 /*
6897 * Unload a user's wrapping key from the kernel.
6898 * Both innvl and outnvl are unused.
6899 */
6900 static const zfs_ioc_key_t zfs_keys_unload_key[] = {
6901 /* no nvl keys */
6902 };
6903
6904 static int
6905 zfs_ioc_unload_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
6906 {
6907 (void) innvl, (void) outnvl;
6908 int ret = 0;
6909
6910 if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
6911 ret = (SET_ERROR(EINVAL));
6912 goto out;
6913 }
6914
6915 ret = spa_keystore_unload_wkey(dsname);
6916 if (ret != 0)
6917 goto out;
6918
6919 out:
6920 return (ret);
6921 }
6922
6923 /*
6924 * Changes a user's wrapping key used to decrypt a dataset. The keyformat,
6925 * keylocation, pbkdf2salt, and pbkdf2iters properties can also be specified
6926 * here to change how the key is derived in userspace.
6927 *
6928 * innvl: {
6929 * "hidden_args" (optional) -> { "wkeydata" -> value }
6930 * raw uint8_t array of new encryption wrapping key data (32 bytes)
6931 * "props" (optional) -> { prop -> value }
6932 * }
6933 *
6934 * outnvl is unused
6935 */
6936 static const zfs_ioc_key_t zfs_keys_change_key[] = {
6937 {"crypt_cmd", DATA_TYPE_UINT64, ZK_OPTIONAL},
6938 {"hidden_args", DATA_TYPE_NVLIST, ZK_OPTIONAL},
6939 {"props", DATA_TYPE_NVLIST, ZK_OPTIONAL},
6940 };
6941
6942 static int
6943 zfs_ioc_change_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
6944 {
6945 (void) outnvl;
6946 int ret;
6947 uint64_t cmd = DCP_CMD_NONE;
6948 dsl_crypto_params_t *dcp = NULL;
6949 nvlist_t *args = NULL, *hidden_args = NULL;
6950
6951 if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
6952 ret = (SET_ERROR(EINVAL));
6953 goto error;
6954 }
6955
6956 (void) nvlist_lookup_uint64(innvl, "crypt_cmd", &cmd);
6957 (void) nvlist_lookup_nvlist(innvl, "props", &args);
6958 (void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
6959
6960 ret = dsl_crypto_params_create_nvlist(cmd, args, hidden_args, &dcp);
6961 if (ret != 0)
6962 goto error;
6963
6964 ret = spa_keystore_change_key(dsname, dcp);
6965 if (ret != 0)
6966 goto error;
6967
6968 dsl_crypto_params_free(dcp, B_FALSE);
6969
6970 return (0);
6971
6972 error:
6973 dsl_crypto_params_free(dcp, B_TRUE);
6974 return (ret);
6975 }
6976
6977 static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
6978
6979 static void
6980 zfs_ioctl_register_legacy(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6981 zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
6982 boolean_t log_history, zfs_ioc_poolcheck_t pool_check)
6983 {
6984 zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
6985
6986 ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
6987 ASSERT3U(ioc, <, ZFS_IOC_LAST);
6988 ASSERT3P(vec->zvec_legacy_func, ==, NULL);
6989 ASSERT3P(vec->zvec_func, ==, NULL);
6990
6991 vec->zvec_legacy_func = func;
6992 vec->zvec_secpolicy = secpolicy;
6993 vec->zvec_namecheck = namecheck;
6994 vec->zvec_allow_log = log_history;
6995 vec->zvec_pool_check = pool_check;
6996 }
6997
6998 /*
6999 * See the block comment at the beginning of this file for details on
7000 * each argument to this function.
7001 */
7002 void
7003 zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func,
7004 zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
7005 zfs_ioc_poolcheck_t pool_check, boolean_t smush_outnvlist,
7006 boolean_t allow_log, const zfs_ioc_key_t *nvl_keys, size_t num_keys)
7007 {
7008 zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
7009
7010 ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
7011 ASSERT3U(ioc, <, ZFS_IOC_LAST);
7012 ASSERT3P(vec->zvec_legacy_func, ==, NULL);
7013 ASSERT3P(vec->zvec_func, ==, NULL);
7014
7015 /* if we are logging, the name must be valid */
7016 ASSERT(!allow_log || namecheck != NO_NAME);
7017
7018 vec->zvec_name = name;
7019 vec->zvec_func = func;
7020 vec->zvec_secpolicy = secpolicy;
7021 vec->zvec_namecheck = namecheck;
7022 vec->zvec_pool_check = pool_check;
7023 vec->zvec_smush_outnvlist = smush_outnvlist;
7024 vec->zvec_allow_log = allow_log;
7025 vec->zvec_nvl_keys = nvl_keys;
7026 vec->zvec_nvl_key_count = num_keys;
7027 }
7028
7029 static void
7030 zfs_ioctl_register_pool(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
7031 zfs_secpolicy_func_t *secpolicy, boolean_t log_history,
7032 zfs_ioc_poolcheck_t pool_check)
7033 {
7034 zfs_ioctl_register_legacy(ioc, func, secpolicy,
7035 POOL_NAME, log_history, pool_check);
7036 }
7037
7038 void
7039 zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
7040 zfs_secpolicy_func_t *secpolicy, zfs_ioc_poolcheck_t pool_check)
7041 {
7042 zfs_ioctl_register_legacy(ioc, func, secpolicy,
7043 DATASET_NAME, B_FALSE, pool_check);
7044 }
7045
7046 static void
7047 zfs_ioctl_register_pool_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
7048 {
7049 zfs_ioctl_register_legacy(ioc, func, zfs_secpolicy_config,
7050 POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
7051 }
7052
7053 static void
7054 zfs_ioctl_register_pool_meta(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
7055 zfs_secpolicy_func_t *secpolicy)
7056 {
7057 zfs_ioctl_register_legacy(ioc, func, secpolicy,
7058 NO_NAME, B_FALSE, POOL_CHECK_NONE);
7059 }
7060
7061 static void
7062 zfs_ioctl_register_dataset_read_secpolicy(zfs_ioc_t ioc,
7063 zfs_ioc_legacy_func_t *func, zfs_secpolicy_func_t *secpolicy)
7064 {
7065 zfs_ioctl_register_legacy(ioc, func, secpolicy,
7066 DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED);
7067 }
7068
7069 static void
7070 zfs_ioctl_register_dataset_read(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
7071 {
7072 zfs_ioctl_register_dataset_read_secpolicy(ioc, func,
7073 zfs_secpolicy_read);
7074 }
7075
7076 static void
7077 zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
7078 zfs_secpolicy_func_t *secpolicy)
7079 {
7080 zfs_ioctl_register_legacy(ioc, func, secpolicy,
7081 DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
7082 }
7083
7084 static void
7085 zfs_ioctl_init(void)
7086 {
7087 zfs_ioctl_register("snapshot", ZFS_IOC_SNAPSHOT,
7088 zfs_ioc_snapshot, zfs_secpolicy_snapshot, POOL_NAME,
7089 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7090 zfs_keys_snapshot, ARRAY_SIZE(zfs_keys_snapshot));
7091
7092 zfs_ioctl_register("log_history", ZFS_IOC_LOG_HISTORY,
7093 zfs_ioc_log_history, zfs_secpolicy_log_history, NO_NAME,
7094 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
7095 zfs_keys_log_history, ARRAY_SIZE(zfs_keys_log_history));
7096
7097 zfs_ioctl_register("space_snaps", ZFS_IOC_SPACE_SNAPS,
7098 zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME,
7099 POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
7100 zfs_keys_space_snaps, ARRAY_SIZE(zfs_keys_space_snaps));
7101
7102 zfs_ioctl_register("send", ZFS_IOC_SEND_NEW,
7103 zfs_ioc_send_new, zfs_secpolicy_send_new, DATASET_NAME,
7104 POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
7105 zfs_keys_send_new, ARRAY_SIZE(zfs_keys_send_new));
7106
7107 zfs_ioctl_register("send_space", ZFS_IOC_SEND_SPACE,
7108 zfs_ioc_send_space, zfs_secpolicy_read, DATASET_NAME,
7109 POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
7110 zfs_keys_send_space, ARRAY_SIZE(zfs_keys_send_space));
7111
7112 zfs_ioctl_register("create", ZFS_IOC_CREATE,
7113 zfs_ioc_create, zfs_secpolicy_create_clone, DATASET_NAME,
7114 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7115 zfs_keys_create, ARRAY_SIZE(zfs_keys_create));
7116
7117 zfs_ioctl_register("clone", ZFS_IOC_CLONE,
7118 zfs_ioc_clone, zfs_secpolicy_create_clone, DATASET_NAME,
7119 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7120 zfs_keys_clone, ARRAY_SIZE(zfs_keys_clone));
7121
7122 zfs_ioctl_register("remap", ZFS_IOC_REMAP,
7123 zfs_ioc_remap, zfs_secpolicy_none, DATASET_NAME,
7124 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE,
7125 zfs_keys_remap, ARRAY_SIZE(zfs_keys_remap));
7126
7127 zfs_ioctl_register("destroy_snaps", ZFS_IOC_DESTROY_SNAPS,
7128 zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, POOL_NAME,
7129 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7130 zfs_keys_destroy_snaps, ARRAY_SIZE(zfs_keys_destroy_snaps));
7131
7132 zfs_ioctl_register("hold", ZFS_IOC_HOLD,
7133 zfs_ioc_hold, zfs_secpolicy_hold, POOL_NAME,
7134 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7135 zfs_keys_hold, ARRAY_SIZE(zfs_keys_hold));
7136 zfs_ioctl_register("release", ZFS_IOC_RELEASE,
7137 zfs_ioc_release, zfs_secpolicy_release, POOL_NAME,
7138 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7139 zfs_keys_release, ARRAY_SIZE(zfs_keys_release));
7140
7141 zfs_ioctl_register("get_holds", ZFS_IOC_GET_HOLDS,
7142 zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME,
7143 POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
7144 zfs_keys_get_holds, ARRAY_SIZE(zfs_keys_get_holds));
7145
7146 zfs_ioctl_register("rollback", ZFS_IOC_ROLLBACK,
7147 zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME,
7148 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE,
7149 zfs_keys_rollback, ARRAY_SIZE(zfs_keys_rollback));
7150
7151 zfs_ioctl_register("bookmark", ZFS_IOC_BOOKMARK,
7152 zfs_ioc_bookmark, zfs_secpolicy_bookmark, POOL_NAME,
7153 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7154 zfs_keys_bookmark, ARRAY_SIZE(zfs_keys_bookmark));
7155
7156 zfs_ioctl_register("get_bookmarks", ZFS_IOC_GET_BOOKMARKS,
7157 zfs_ioc_get_bookmarks, zfs_secpolicy_read, DATASET_NAME,
7158 POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
7159 zfs_keys_get_bookmarks, ARRAY_SIZE(zfs_keys_get_bookmarks));
7160
7161 zfs_ioctl_register("get_bookmark_props", ZFS_IOC_GET_BOOKMARK_PROPS,
7162 zfs_ioc_get_bookmark_props, zfs_secpolicy_read, ENTITY_NAME,
7163 POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE, zfs_keys_get_bookmark_props,
7164 ARRAY_SIZE(zfs_keys_get_bookmark_props));
7165
7166 zfs_ioctl_register("destroy_bookmarks", ZFS_IOC_DESTROY_BOOKMARKS,
7167 zfs_ioc_destroy_bookmarks, zfs_secpolicy_destroy_bookmarks,
7168 POOL_NAME,
7169 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7170 zfs_keys_destroy_bookmarks,
7171 ARRAY_SIZE(zfs_keys_destroy_bookmarks));
7172
7173 zfs_ioctl_register("receive", ZFS_IOC_RECV_NEW,
7174 zfs_ioc_recv_new, zfs_secpolicy_recv, DATASET_NAME,
7175 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7176 zfs_keys_recv_new, ARRAY_SIZE(zfs_keys_recv_new));
7177 zfs_ioctl_register("load-key", ZFS_IOC_LOAD_KEY,
7178 zfs_ioc_load_key, zfs_secpolicy_load_key,
7179 DATASET_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE,
7180 zfs_keys_load_key, ARRAY_SIZE(zfs_keys_load_key));
7181 zfs_ioctl_register("unload-key", ZFS_IOC_UNLOAD_KEY,
7182 zfs_ioc_unload_key, zfs_secpolicy_load_key,
7183 DATASET_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE,
7184 zfs_keys_unload_key, ARRAY_SIZE(zfs_keys_unload_key));
7185 zfs_ioctl_register("change-key", ZFS_IOC_CHANGE_KEY,
7186 zfs_ioc_change_key, zfs_secpolicy_change_key,
7187 DATASET_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY,
7188 B_TRUE, B_TRUE, zfs_keys_change_key,
7189 ARRAY_SIZE(zfs_keys_change_key));
7190
7191 zfs_ioctl_register("sync", ZFS_IOC_POOL_SYNC,
7192 zfs_ioc_pool_sync, zfs_secpolicy_none, POOL_NAME,
7193 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
7194 zfs_keys_pool_sync, ARRAY_SIZE(zfs_keys_pool_sync));
7195 zfs_ioctl_register("reopen", ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen,
7196 zfs_secpolicy_config, POOL_NAME, POOL_CHECK_SUSPENDED, B_TRUE,
7197 B_TRUE, zfs_keys_pool_reopen, ARRAY_SIZE(zfs_keys_pool_reopen));
7198
7199 zfs_ioctl_register("channel_program", ZFS_IOC_CHANNEL_PROGRAM,
7200 zfs_ioc_channel_program, zfs_secpolicy_config,
7201 POOL_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE,
7202 B_TRUE, zfs_keys_channel_program,
7203 ARRAY_SIZE(zfs_keys_channel_program));
7204
7205 zfs_ioctl_register("redact", ZFS_IOC_REDACT,
7206 zfs_ioc_redact, zfs_secpolicy_config, DATASET_NAME,
7207 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7208 zfs_keys_redact, ARRAY_SIZE(zfs_keys_redact));
7209
7210 zfs_ioctl_register("zpool_checkpoint", ZFS_IOC_POOL_CHECKPOINT,
7211 zfs_ioc_pool_checkpoint, zfs_secpolicy_config, POOL_NAME,
7212 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7213 zfs_keys_pool_checkpoint, ARRAY_SIZE(zfs_keys_pool_checkpoint));
7214
7215 zfs_ioctl_register("zpool_discard_checkpoint",
7216 ZFS_IOC_POOL_DISCARD_CHECKPOINT, zfs_ioc_pool_discard_checkpoint,
7217 zfs_secpolicy_config, POOL_NAME,
7218 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7219 zfs_keys_pool_discard_checkpoint,
7220 ARRAY_SIZE(zfs_keys_pool_discard_checkpoint));
7221
7222 zfs_ioctl_register("initialize", ZFS_IOC_POOL_INITIALIZE,
7223 zfs_ioc_pool_initialize, zfs_secpolicy_config, POOL_NAME,
7224 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7225 zfs_keys_pool_initialize, ARRAY_SIZE(zfs_keys_pool_initialize));
7226
7227 zfs_ioctl_register("trim", ZFS_IOC_POOL_TRIM,
7228 zfs_ioc_pool_trim, zfs_secpolicy_config, POOL_NAME,
7229 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7230 zfs_keys_pool_trim, ARRAY_SIZE(zfs_keys_pool_trim));
7231
7232 zfs_ioctl_register("wait", ZFS_IOC_WAIT,
7233 zfs_ioc_wait, zfs_secpolicy_none, POOL_NAME,
7234 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
7235 zfs_keys_pool_wait, ARRAY_SIZE(zfs_keys_pool_wait));
7236
7237 zfs_ioctl_register("wait_fs", ZFS_IOC_WAIT_FS,
7238 zfs_ioc_wait_fs, zfs_secpolicy_none, DATASET_NAME,
7239 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
7240 zfs_keys_fs_wait, ARRAY_SIZE(zfs_keys_fs_wait));
7241
7242 zfs_ioctl_register("set_bootenv", ZFS_IOC_SET_BOOTENV,
7243 zfs_ioc_set_bootenv, zfs_secpolicy_config, POOL_NAME,
7244 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE,
7245 zfs_keys_set_bootenv, ARRAY_SIZE(zfs_keys_set_bootenv));
7246
7247 zfs_ioctl_register("get_bootenv", ZFS_IOC_GET_BOOTENV,
7248 zfs_ioc_get_bootenv, zfs_secpolicy_none, POOL_NAME,
7249 POOL_CHECK_SUSPENDED, B_FALSE, B_TRUE,
7250 zfs_keys_get_bootenv, ARRAY_SIZE(zfs_keys_get_bootenv));
7251
7252 zfs_ioctl_register("zpool_vdev_get_props", ZFS_IOC_VDEV_GET_PROPS,
7253 zfs_ioc_vdev_get_props, zfs_secpolicy_read, POOL_NAME,
7254 POOL_CHECK_NONE, B_FALSE, B_FALSE, zfs_keys_vdev_get_props,
7255 ARRAY_SIZE(zfs_keys_vdev_get_props));
7256
7257 zfs_ioctl_register("zpool_vdev_set_props", ZFS_IOC_VDEV_SET_PROPS,
7258 zfs_ioc_vdev_set_props, zfs_secpolicy_config, POOL_NAME,
7259 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
7260 zfs_keys_vdev_set_props, ARRAY_SIZE(zfs_keys_vdev_set_props));
7261
7262 zfs_ioctl_register("scrub", ZFS_IOC_POOL_SCRUB,
7263 zfs_ioc_pool_scrub, zfs_secpolicy_config, POOL_NAME,
7264 POOL_CHECK_NONE, B_TRUE, B_TRUE,
7265 zfs_keys_pool_scrub, ARRAY_SIZE(zfs_keys_pool_scrub));
7266
7267 /* IOCTLS that use the legacy function signature */
7268
7269 zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
7270 zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_READONLY);
7271
7272 zfs_ioctl_register_pool(ZFS_IOC_POOL_CREATE, zfs_ioc_pool_create,
7273 zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
7274 zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SCAN,
7275 zfs_ioc_pool_scan);
7276 zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_UPGRADE,
7277 zfs_ioc_pool_upgrade);
7278 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ADD,
7279 zfs_ioc_vdev_add);
7280 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_REMOVE,
7281 zfs_ioc_vdev_remove);
7282 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SET_STATE,
7283 zfs_ioc_vdev_set_state);
7284 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ATTACH,
7285 zfs_ioc_vdev_attach);
7286 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_DETACH,
7287 zfs_ioc_vdev_detach);
7288 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETPATH,
7289 zfs_ioc_vdev_setpath);
7290 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETFRU,
7291 zfs_ioc_vdev_setfru);
7292 zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SET_PROPS,
7293 zfs_ioc_pool_set_props);
7294 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SPLIT,
7295 zfs_ioc_vdev_split);
7296 zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_REGUID,
7297 zfs_ioc_pool_reguid);
7298
7299 zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_CONFIGS,
7300 zfs_ioc_pool_configs, zfs_secpolicy_none);
7301 zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_TRYIMPORT,
7302 zfs_ioc_pool_tryimport, zfs_secpolicy_config);
7303 zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_FAULT,
7304 zfs_ioc_inject_fault, zfs_secpolicy_inject);
7305 zfs_ioctl_register_pool_meta(ZFS_IOC_CLEAR_FAULT,
7306 zfs_ioc_clear_fault, zfs_secpolicy_inject);
7307 zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_LIST_NEXT,
7308 zfs_ioc_inject_list_next, zfs_secpolicy_inject);
7309
7310 /*
7311 * pool destroy, and export don't log the history as part of
7312 * zfsdev_ioctl, but rather zfs_ioc_pool_export
7313 * does the logging of those commands.
7314 */
7315 zfs_ioctl_register_pool(ZFS_IOC_POOL_DESTROY, zfs_ioc_pool_destroy,
7316 zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
7317 zfs_ioctl_register_pool(ZFS_IOC_POOL_EXPORT, zfs_ioc_pool_export,
7318 zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
7319
7320 zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats,
7321 zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
7322 zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_PROPS, zfs_ioc_pool_get_props,
7323 zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
7324
7325 zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log,
7326 zfs_secpolicy_inject, B_FALSE, POOL_CHECK_SUSPENDED);
7327 zfs_ioctl_register_pool(ZFS_IOC_DSOBJ_TO_DSNAME,
7328 zfs_ioc_dsobj_to_dsname,
7329 zfs_secpolicy_diff, B_FALSE, POOL_CHECK_SUSPENDED);
7330 zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_HISTORY,
7331 zfs_ioc_pool_get_history,
7332 zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
7333
7334 zfs_ioctl_register_pool(ZFS_IOC_POOL_IMPORT, zfs_ioc_pool_import,
7335 zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
7336
7337 zfs_ioctl_register_pool(ZFS_IOC_CLEAR, zfs_ioc_clear,
7338 zfs_secpolicy_config, B_TRUE, POOL_CHECK_READONLY);
7339
7340 zfs_ioctl_register_dataset_read(ZFS_IOC_SPACE_WRITTEN,
7341 zfs_ioc_space_written);
7342 zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_RECVD_PROPS,
7343 zfs_ioc_objset_recvd_props);
7344 zfs_ioctl_register_dataset_read(ZFS_IOC_NEXT_OBJ,
7345 zfs_ioc_next_obj);
7346 zfs_ioctl_register_dataset_read(ZFS_IOC_GET_FSACL,
7347 zfs_ioc_get_fsacl);
7348 zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_STATS,
7349 zfs_ioc_objset_stats);
7350 zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_ZPLPROPS,
7351 zfs_ioc_objset_zplprops);
7352 zfs_ioctl_register_dataset_read(ZFS_IOC_DATASET_LIST_NEXT,
7353 zfs_ioc_dataset_list_next);
7354 zfs_ioctl_register_dataset_read(ZFS_IOC_SNAPSHOT_LIST_NEXT,
7355 zfs_ioc_snapshot_list_next);
7356 zfs_ioctl_register_dataset_read(ZFS_IOC_SEND_PROGRESS,
7357 zfs_ioc_send_progress);
7358
7359 zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_DIFF,
7360 zfs_ioc_diff, zfs_secpolicy_diff);
7361 zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_STATS,
7362 zfs_ioc_obj_to_stats, zfs_secpolicy_diff);
7363 zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_PATH,
7364 zfs_ioc_obj_to_path, zfs_secpolicy_diff);
7365 zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_ONE,
7366 zfs_ioc_userspace_one, zfs_secpolicy_userspace_one);
7367 zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_MANY,
7368 zfs_ioc_userspace_many, zfs_secpolicy_userspace_many);
7369 zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_SEND,
7370 zfs_ioc_send, zfs_secpolicy_send);
7371
7372 zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_PROP, zfs_ioc_set_prop,
7373 zfs_secpolicy_none);
7374 zfs_ioctl_register_dataset_modify(ZFS_IOC_DESTROY, zfs_ioc_destroy,
7375 zfs_secpolicy_destroy);
7376 zfs_ioctl_register_dataset_modify(ZFS_IOC_RENAME, zfs_ioc_rename,
7377 zfs_secpolicy_rename);
7378 zfs_ioctl_register_dataset_modify(ZFS_IOC_RECV, zfs_ioc_recv,
7379 zfs_secpolicy_recv);
7380 zfs_ioctl_register_dataset_modify(ZFS_IOC_PROMOTE, zfs_ioc_promote,
7381 zfs_secpolicy_promote);
7382 zfs_ioctl_register_dataset_modify(ZFS_IOC_INHERIT_PROP,
7383 zfs_ioc_inherit_prop, zfs_secpolicy_inherit_prop);
7384 zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_FSACL, zfs_ioc_set_fsacl,
7385 zfs_secpolicy_set_fsacl);
7386
7387 zfs_ioctl_register_dataset_nolog(ZFS_IOC_SHARE, zfs_ioc_share,
7388 zfs_secpolicy_share, POOL_CHECK_NONE);
7389 zfs_ioctl_register_dataset_nolog(ZFS_IOC_SMB_ACL, zfs_ioc_smb_acl,
7390 zfs_secpolicy_smb_acl, POOL_CHECK_NONE);
7391 zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERSPACE_UPGRADE,
7392 zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
7393 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
7394 zfs_ioctl_register_dataset_nolog(ZFS_IOC_TMP_SNAPSHOT,
7395 zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot,
7396 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
7397
7398 zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_NEXT, zfs_ioc_events_next,
7399 zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
7400 zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_CLEAR, zfs_ioc_events_clear,
7401 zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
7402 zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_SEEK, zfs_ioc_events_seek,
7403 zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
7404
7405 zfs_ioctl_init_os();
7406 }
7407
7408 /*
7409 * Verify that for non-legacy ioctls the input nvlist
7410 * pairs match against the expected input.
7411 *
7412 * Possible errors are:
7413 * ZFS_ERR_IOC_ARG_UNAVAIL An unrecognized nvpair was encountered
7414 * ZFS_ERR_IOC_ARG_REQUIRED A required nvpair is missing
7415 * ZFS_ERR_IOC_ARG_BADTYPE Invalid type for nvpair
7416 */
7417 static int
7418 zfs_check_input_nvpairs(nvlist_t *innvl, const zfs_ioc_vec_t *vec)
7419 {
7420 const zfs_ioc_key_t *nvl_keys = vec->zvec_nvl_keys;
7421 boolean_t required_keys_found = B_FALSE;
7422
7423 /*
7424 * examine each input pair
7425 */
7426 for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
7427 pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
7428 const char *name = nvpair_name(pair);
7429 data_type_t type = nvpair_type(pair);
7430 boolean_t identified = B_FALSE;
7431
7432 /*
7433 * check pair against the documented names and type
7434 */
7435 for (int k = 0; k < vec->zvec_nvl_key_count; k++) {
7436 /* if not a wild card name, check for an exact match */
7437 if ((nvl_keys[k].zkey_flags & ZK_WILDCARDLIST) == 0 &&
7438 strcmp(nvl_keys[k].zkey_name, name) != 0)
7439 continue;
7440
7441 identified = B_TRUE;
7442
7443 if (nvl_keys[k].zkey_type != DATA_TYPE_ANY &&
7444 nvl_keys[k].zkey_type != type) {
7445 return (SET_ERROR(ZFS_ERR_IOC_ARG_BADTYPE));
7446 }
7447
7448 if (nvl_keys[k].zkey_flags & ZK_OPTIONAL)
7449 continue;
7450
7451 required_keys_found = B_TRUE;
7452 break;
7453 }
7454
7455 /* allow an 'optional' key, everything else is invalid */
7456 if (!identified &&
7457 (strcmp(name, "optional") != 0 ||
7458 type != DATA_TYPE_NVLIST)) {
7459 return (SET_ERROR(ZFS_ERR_IOC_ARG_UNAVAIL));
7460 }
7461 }
7462
7463 /* verify that all required keys were found */
7464 for (int k = 0; k < vec->zvec_nvl_key_count; k++) {
7465 if (nvl_keys[k].zkey_flags & ZK_OPTIONAL)
7466 continue;
7467
7468 if (nvl_keys[k].zkey_flags & ZK_WILDCARDLIST) {
7469 /* at least one non-optional key is expected here */
7470 if (!required_keys_found)
7471 return (SET_ERROR(ZFS_ERR_IOC_ARG_REQUIRED));
7472 continue;
7473 }
7474
7475 if (!nvlist_exists(innvl, nvl_keys[k].zkey_name))
7476 return (SET_ERROR(ZFS_ERR_IOC_ARG_REQUIRED));
7477 }
7478
7479 return (0);
7480 }
7481
7482 static int
7483 pool_status_check(const char *name, zfs_ioc_namecheck_t type,
7484 zfs_ioc_poolcheck_t check)
7485 {
7486 spa_t *spa;
7487 int error;
7488
7489 ASSERT(type == POOL_NAME || type == DATASET_NAME ||
7490 type == ENTITY_NAME);
7491
7492 if (check & POOL_CHECK_NONE)
7493 return (0);
7494
7495 error = spa_open(name, &spa, FTAG);
7496 if (error == 0) {
7497 if ((check & POOL_CHECK_SUSPENDED) && spa_suspended(spa))
7498 error = SET_ERROR(EAGAIN);
7499 else if ((check & POOL_CHECK_READONLY) && !spa_writeable(spa))
7500 error = SET_ERROR(EROFS);
7501 spa_close(spa, FTAG);
7502 }
7503 return (error);
7504 }
7505
7506 int
7507 zfsdev_getminor(zfs_file_t *fp, minor_t *minorp)
7508 {
7509 zfsdev_state_t *zs, *fpd;
7510
7511 ASSERT(!MUTEX_HELD(&zfsdev_state_lock));
7512
7513 fpd = zfs_file_private(fp);
7514 if (fpd == NULL)
7515 return (SET_ERROR(EBADF));
7516
7517 mutex_enter(&zfsdev_state_lock);
7518
7519 for (zs = &zfsdev_state_listhead; zs != NULL; zs = zs->zs_next) {
7520
7521 if (zs->zs_minor == -1)
7522 continue;
7523
7524 if (fpd == zs) {
7525 *minorp = fpd->zs_minor;
7526 mutex_exit(&zfsdev_state_lock);
7527 return (0);
7528 }
7529 }
7530
7531 mutex_exit(&zfsdev_state_lock);
7532
7533 return (SET_ERROR(EBADF));
7534 }
7535
7536 void *
7537 zfsdev_get_state(minor_t minor, enum zfsdev_state_type which)
7538 {
7539 zfsdev_state_t *zs;
7540
7541 for (zs = &zfsdev_state_listhead; zs != NULL; zs = zs->zs_next) {
7542 if (zs->zs_minor == minor) {
7543 membar_consumer();
7544 switch (which) {
7545 case ZST_ONEXIT:
7546 return (zs->zs_onexit);
7547 case ZST_ZEVENT:
7548 return (zs->zs_zevent);
7549 case ZST_ALL:
7550 return (zs);
7551 }
7552 }
7553 }
7554
7555 return (NULL);
7556 }
7557
7558 /*
7559 * Find a free minor number. The zfsdev_state_list is expected to
7560 * be short since it is only a list of currently open file handles.
7561 */
7562 static minor_t
7563 zfsdev_minor_alloc(void)
7564 {
7565 static minor_t last_minor = 0;
7566 minor_t m;
7567
7568 ASSERT(MUTEX_HELD(&zfsdev_state_lock));
7569
7570 for (m = last_minor + 1; m != last_minor; m++) {
7571 if (m > ZFSDEV_MAX_MINOR)
7572 m = 1;
7573 if (zfsdev_get_state(m, ZST_ALL) == NULL) {
7574 last_minor = m;
7575 return (m);
7576 }
7577 }
7578
7579 return (0);
7580 }
7581
7582 int
7583 zfsdev_state_init(void *priv)
7584 {
7585 zfsdev_state_t *zs, *zsprev = NULL;
7586 minor_t minor;
7587 boolean_t newzs = B_FALSE;
7588
7589 ASSERT(MUTEX_HELD(&zfsdev_state_lock));
7590
7591 minor = zfsdev_minor_alloc();
7592 if (minor == 0)
7593 return (SET_ERROR(ENXIO));
7594
7595 for (zs = &zfsdev_state_listhead; zs != NULL; zs = zs->zs_next) {
7596 if (zs->zs_minor == -1)
7597 break;
7598 zsprev = zs;
7599 }
7600
7601 if (!zs) {
7602 zs = kmem_zalloc(sizeof (zfsdev_state_t), KM_SLEEP);
7603 newzs = B_TRUE;
7604 }
7605
7606 zfsdev_private_set_state(priv, zs);
7607
7608 zfs_onexit_init((zfs_onexit_t **)&zs->zs_onexit);
7609 zfs_zevent_init((zfs_zevent_t **)&zs->zs_zevent);
7610
7611 /*
7612 * In order to provide for lock-free concurrent read access
7613 * to the minor list in zfsdev_get_state(), new entries
7614 * must be completely written before linking them into the
7615 * list whereas existing entries are already linked; the last
7616 * operation must be updating zs_minor (from -1 to the new
7617 * value).
7618 */
7619 if (newzs) {
7620 zs->zs_minor = minor;
7621 membar_producer();
7622 zsprev->zs_next = zs;
7623 } else {
7624 membar_producer();
7625 zs->zs_minor = minor;
7626 }
7627
7628 return (0);
7629 }
7630
7631 void
7632 zfsdev_state_destroy(void *priv)
7633 {
7634 zfsdev_state_t *zs = zfsdev_private_get_state(priv);
7635
7636 ASSERT(zs != NULL);
7637 ASSERT3S(zs->zs_minor, >, 0);
7638
7639 /*
7640 * The last reference to this zfsdev file descriptor is being dropped.
7641 * We don't have to worry about lookup grabbing this state object, and
7642 * zfsdev_state_init() will not try to reuse this object until it is
7643 * invalidated by setting zs_minor to -1. Invalidation must be done
7644 * last, with a memory barrier to ensure ordering. This lets us avoid
7645 * taking the global zfsdev state lock around destruction.
7646 */
7647 zfs_onexit_destroy(zs->zs_onexit);
7648 zfs_zevent_destroy(zs->zs_zevent);
7649 zs->zs_onexit = NULL;
7650 zs->zs_zevent = NULL;
7651 membar_producer();
7652 zs->zs_minor = -1;
7653 }
7654
7655 long
7656 zfsdev_ioctl_common(uint_t vecnum, zfs_cmd_t *zc, int flag)
7657 {
7658 int error, cmd;
7659 const zfs_ioc_vec_t *vec;
7660 char *saved_poolname = NULL;
7661 uint64_t max_nvlist_src_size;
7662 size_t saved_poolname_len = 0;
7663 nvlist_t *innvl = NULL;
7664 fstrans_cookie_t cookie;
7665 hrtime_t start_time = gethrtime();
7666
7667 cmd = vecnum;
7668 error = 0;
7669 if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
7670 return (SET_ERROR(ZFS_ERR_IOC_CMD_UNAVAIL));
7671
7672 vec = &zfs_ioc_vec[vecnum];
7673
7674 /*
7675 * The registered ioctl list may be sparse, verify that either
7676 * a normal or legacy handler are registered.
7677 */
7678 if (vec->zvec_func == NULL && vec->zvec_legacy_func == NULL)
7679 return (SET_ERROR(ZFS_ERR_IOC_CMD_UNAVAIL));
7680
7681 zc->zc_iflags = flag & FKIOCTL;
7682 max_nvlist_src_size = zfs_max_nvlist_src_size_os();
7683 if (zc->zc_nvlist_src_size > max_nvlist_src_size) {
7684 /*
7685 * Make sure the user doesn't pass in an insane value for
7686 * zc_nvlist_src_size. We have to check, since we will end
7687 * up allocating that much memory inside of get_nvlist(). This
7688 * prevents a nefarious user from allocating tons of kernel
7689 * memory.
7690 *
7691 * Also, we return EINVAL instead of ENOMEM here. The reason
7692 * being that returning ENOMEM from an ioctl() has a special
7693 * connotation; that the user's size value is too small and
7694 * needs to be expanded to hold the nvlist. See
7695 * zcmd_expand_dst_nvlist() for details.
7696 */
7697 error = SET_ERROR(EINVAL); /* User's size too big */
7698
7699 } else if (zc->zc_nvlist_src_size != 0) {
7700 error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
7701 zc->zc_iflags, &innvl);
7702 if (error != 0)
7703 goto out;
7704 }
7705
7706 /*
7707 * Ensure that all pool/dataset names are valid before we pass down to
7708 * the lower layers.
7709 */
7710 zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
7711 switch (vec->zvec_namecheck) {
7712 case POOL_NAME:
7713 if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
7714 error = SET_ERROR(EINVAL);
7715 else
7716 error = pool_status_check(zc->zc_name,
7717 vec->zvec_namecheck, vec->zvec_pool_check);
7718 break;
7719
7720 case DATASET_NAME:
7721 if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
7722 error = SET_ERROR(EINVAL);
7723 else
7724 error = pool_status_check(zc->zc_name,
7725 vec->zvec_namecheck, vec->zvec_pool_check);
7726 break;
7727
7728 case ENTITY_NAME:
7729 if (entity_namecheck(zc->zc_name, NULL, NULL) != 0) {
7730 error = SET_ERROR(EINVAL);
7731 } else {
7732 error = pool_status_check(zc->zc_name,
7733 vec->zvec_namecheck, vec->zvec_pool_check);
7734 }
7735 break;
7736
7737 case NO_NAME:
7738 break;
7739 }
7740 /*
7741 * Ensure that all input pairs are valid before we pass them down
7742 * to the lower layers.
7743 *
7744 * The vectored functions can use fnvlist_lookup_{type} for any
7745 * required pairs since zfs_check_input_nvpairs() confirmed that
7746 * they exist and are of the correct type.
7747 */
7748 if (error == 0 && vec->zvec_func != NULL) {
7749 error = zfs_check_input_nvpairs(innvl, vec);
7750 if (error != 0)
7751 goto out;
7752 }
7753
7754 if (error == 0) {
7755 cookie = spl_fstrans_mark();
7756 error = vec->zvec_secpolicy(zc, innvl, CRED());
7757 spl_fstrans_unmark(cookie);
7758 }
7759
7760 if (error != 0)
7761 goto out;
7762
7763 /* legacy ioctls can modify zc_name */
7764 /*
7765 * Can't use kmem_strdup() as we might truncate the string and
7766 * kmem_strfree() would then free with incorrect size.
7767 */
7768 saved_poolname_len = strlen(zc->zc_name) + 1;
7769 saved_poolname = kmem_alloc(saved_poolname_len, KM_SLEEP);
7770
7771 strlcpy(saved_poolname, zc->zc_name, saved_poolname_len);
7772 saved_poolname[strcspn(saved_poolname, "/@#")] = '\0';
7773
7774 if (vec->zvec_func != NULL) {
7775 nvlist_t *outnvl;
7776 int puterror = 0;
7777 spa_t *spa;
7778 nvlist_t *lognv = NULL;
7779
7780 ASSERT(vec->zvec_legacy_func == NULL);
7781
7782 /*
7783 * Add the innvl to the lognv before calling the func,
7784 * in case the func changes the innvl.
7785 */
7786 if (vec->zvec_allow_log) {
7787 lognv = fnvlist_alloc();
7788 fnvlist_add_string(lognv, ZPOOL_HIST_IOCTL,
7789 vec->zvec_name);
7790 if (!nvlist_empty(innvl)) {
7791 fnvlist_add_nvlist(lognv, ZPOOL_HIST_INPUT_NVL,
7792 innvl);
7793 }
7794 }
7795
7796 outnvl = fnvlist_alloc();
7797 cookie = spl_fstrans_mark();
7798 error = vec->zvec_func(zc->zc_name, innvl, outnvl);
7799 spl_fstrans_unmark(cookie);
7800
7801 /*
7802 * Some commands can partially execute, modify state, and still
7803 * return an error. In these cases, attempt to record what
7804 * was modified.
7805 */
7806 if ((error == 0 ||
7807 (cmd == ZFS_IOC_CHANNEL_PROGRAM && error != EINVAL)) &&
7808 vec->zvec_allow_log &&
7809 spa_open(zc->zc_name, &spa, FTAG) == 0) {
7810 if (!nvlist_empty(outnvl)) {
7811 size_t out_size = fnvlist_size(outnvl);
7812 if (out_size > zfs_history_output_max) {
7813 fnvlist_add_int64(lognv,
7814 ZPOOL_HIST_OUTPUT_SIZE, out_size);
7815 } else {
7816 fnvlist_add_nvlist(lognv,
7817 ZPOOL_HIST_OUTPUT_NVL, outnvl);
7818 }
7819 }
7820 if (error != 0) {
7821 fnvlist_add_int64(lognv, ZPOOL_HIST_ERRNO,
7822 error);
7823 }
7824 fnvlist_add_int64(lognv, ZPOOL_HIST_ELAPSED_NS,
7825 gethrtime() - start_time);
7826 (void) spa_history_log_nvl(spa, lognv);
7827 spa_close(spa, FTAG);
7828 }
7829 fnvlist_free(lognv);
7830
7831 if (!nvlist_empty(outnvl) || zc->zc_nvlist_dst_size != 0) {
7832 int smusherror = 0;
7833 if (vec->zvec_smush_outnvlist) {
7834 smusherror = nvlist_smush(outnvl,
7835 zc->zc_nvlist_dst_size);
7836 }
7837 if (smusherror == 0)
7838 puterror = put_nvlist(zc, outnvl);
7839 }
7840
7841 if (puterror != 0)
7842 error = puterror;
7843
7844 nvlist_free(outnvl);
7845 } else {
7846 cookie = spl_fstrans_mark();
7847 error = vec->zvec_legacy_func(zc);
7848 spl_fstrans_unmark(cookie);
7849 }
7850
7851 out:
7852 nvlist_free(innvl);
7853 if (error == 0 && vec->zvec_allow_log) {
7854 char *s = tsd_get(zfs_allow_log_key);
7855 if (s != NULL)
7856 kmem_strfree(s);
7857 (void) tsd_set(zfs_allow_log_key, kmem_strdup(saved_poolname));
7858 }
7859 if (saved_poolname != NULL)
7860 kmem_free(saved_poolname, saved_poolname_len);
7861
7862 return (error);
7863 }
7864
7865 int
7866 zfs_kmod_init(void)
7867 {
7868 int error;
7869
7870 if ((error = zvol_init()) != 0)
7871 return (error);
7872
7873 spa_init(SPA_MODE_READ | SPA_MODE_WRITE);
7874 zfs_init();
7875
7876 zfs_ioctl_init();
7877
7878 mutex_init(&zfsdev_state_lock, NULL, MUTEX_DEFAULT, NULL);
7879 zfsdev_state_listhead.zs_minor = -1;
7880
7881 if ((error = zfsdev_attach()) != 0)
7882 goto out;
7883
7884 tsd_create(&zfs_fsyncer_key, NULL);
7885 tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
7886 tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
7887
7888 return (0);
7889 out:
7890 zfs_fini();
7891 spa_fini();
7892 zvol_fini();
7893
7894 return (error);
7895 }
7896
7897 void
7898 zfs_kmod_fini(void)
7899 {
7900 zfsdev_state_t *zs, *zsnext = NULL;
7901
7902 zfsdev_detach();
7903
7904 mutex_destroy(&zfsdev_state_lock);
7905
7906 for (zs = &zfsdev_state_listhead; zs != NULL; zs = zsnext) {
7907 zsnext = zs->zs_next;
7908 if (zs->zs_onexit)
7909 zfs_onexit_destroy(zs->zs_onexit);
7910 if (zs->zs_zevent)
7911 zfs_zevent_destroy(zs->zs_zevent);
7912 if (zs != &zfsdev_state_listhead)
7913 kmem_free(zs, sizeof (zfsdev_state_t));
7914 }
7915
7916 zfs_ereport_taskq_fini(); /* run before zfs_fini() on Linux */
7917 zfs_fini();
7918 spa_fini();
7919 zvol_fini();
7920
7921 tsd_destroy(&zfs_fsyncer_key);
7922 tsd_destroy(&rrw_tsd_key);
7923 tsd_destroy(&zfs_allow_log_key);
7924 }
7925
7926 ZFS_MODULE_PARAM(zfs, zfs_, max_nvlist_src_size, U64, ZMOD_RW,
7927 "Maximum size in bytes allowed for src nvlist passed with ZFS ioctls");
7928
7929 ZFS_MODULE_PARAM(zfs, zfs_, history_output_max, U64, ZMOD_RW,
7930 "Maximum size in bytes of ZFS ioctl output that will be logged");