]> git.proxmox.com Git - mirror_zfs.git/blob - module/zfs/zfs_ioctl.c
e3166a40e979ac40c759262fd0ba352d796bd519
[mirror_zfs.git] / module / zfs / zfs_ioctl.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Portions Copyright 2011 Martin Matuska
25 * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
26 * Portions Copyright 2012 Pawel Jakub Dawidek <pawel@dawidek.net>
27 * Copyright (c) 2012, Joyent, Inc. All rights reserved.
28 * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
29 * Copyright (c) 2014, Joyent, Inc. All rights reserved.
30 * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
31 * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
32 * Copyright (c) 2013 Steven Hartland. All rights reserved.
33 * Copyright (c) 2016 Actifio, Inc. All rights reserved.
34 */
35
36 /*
37 * ZFS ioctls.
38 *
39 * This file handles the ioctls to /dev/zfs, used for configuring ZFS storage
40 * pools and filesystems, e.g. with /sbin/zfs and /sbin/zpool.
41 *
42 * There are two ways that we handle ioctls: the legacy way where almost
43 * all of the logic is in the ioctl callback, and the new way where most
44 * of the marshalling is handled in the common entry point, zfsdev_ioctl().
45 *
46 * Non-legacy ioctls should be registered by calling
47 * zfs_ioctl_register() from zfs_ioctl_init(). The ioctl is invoked
48 * from userland by lzc_ioctl().
49 *
50 * The registration arguments are as follows:
51 *
52 * const char *name
53 * The name of the ioctl. This is used for history logging. If the
54 * ioctl returns successfully (the callback returns 0), and allow_log
55 * is true, then a history log entry will be recorded with the input &
56 * output nvlists. The log entry can be printed with "zpool history -i".
57 *
58 * zfs_ioc_t ioc
59 * The ioctl request number, which userland will pass to ioctl(2).
60 * The ioctl numbers can change from release to release, because
61 * the caller (libzfs) must be matched to the kernel.
62 *
63 * zfs_secpolicy_func_t *secpolicy
64 * This function will be called before the zfs_ioc_func_t, to
65 * determine if this operation is permitted. It should return EPERM
66 * on failure, and 0 on success. Checks include determining if the
67 * dataset is visible in this zone, and if the user has either all
68 * zfs privileges in the zone (SYS_MOUNT), or has been granted permission
69 * to do this operation on this dataset with "zfs allow".
70 *
71 * zfs_ioc_namecheck_t namecheck
72 * This specifies what to expect in the zfs_cmd_t:zc_name -- a pool
73 * name, a dataset name, or nothing. If the name is not well-formed,
74 * the ioctl will fail and the callback will not be called.
75 * Therefore, the callback can assume that the name is well-formed
76 * (e.g. is null-terminated, doesn't have more than one '@' character,
77 * doesn't have invalid characters).
78 *
79 * zfs_ioc_poolcheck_t pool_check
80 * This specifies requirements on the pool state. If the pool does
81 * not meet them (is suspended or is readonly), the ioctl will fail
82 * and the callback will not be called. If any checks are specified
83 * (i.e. it is not POOL_CHECK_NONE), namecheck must not be NO_NAME.
84 * Multiple checks can be or-ed together (e.g. POOL_CHECK_SUSPENDED |
85 * POOL_CHECK_READONLY).
86 *
87 * boolean_t smush_outnvlist
88 * If smush_outnvlist is true, then the output is presumed to be a
89 * list of errors, and it will be "smushed" down to fit into the
90 * caller's buffer, by removing some entries and replacing them with a
91 * single "N_MORE_ERRORS" entry indicating how many were removed. See
92 * nvlist_smush() for details. If smush_outnvlist is false, and the
93 * outnvlist does not fit into the userland-provided buffer, then the
94 * ioctl will fail with ENOMEM.
95 *
96 * zfs_ioc_func_t *func
97 * The callback function that will perform the operation.
98 *
99 * The callback should return 0 on success, or an error number on
100 * failure. If the function fails, the userland ioctl will return -1,
101 * and errno will be set to the callback's return value. The callback
102 * will be called with the following arguments:
103 *
104 * const char *name
105 * The name of the pool or dataset to operate on, from
106 * zfs_cmd_t:zc_name. The 'namecheck' argument specifies the
107 * expected type (pool, dataset, or none).
108 *
109 * nvlist_t *innvl
110 * The input nvlist, deserialized from zfs_cmd_t:zc_nvlist_src. Or
111 * NULL if no input nvlist was provided. Changes to this nvlist are
112 * ignored. If the input nvlist could not be deserialized, the
113 * ioctl will fail and the callback will not be called.
114 *
115 * nvlist_t *outnvl
116 * The output nvlist, initially empty. The callback can fill it in,
117 * and it will be returned to userland by serializing it into
118 * zfs_cmd_t:zc_nvlist_dst. If it is non-empty, and serialization
119 * fails (e.g. because the caller didn't supply a large enough
120 * buffer), then the overall ioctl will fail. See the
121 * 'smush_nvlist' argument above for additional behaviors.
122 *
123 * There are two typical uses of the output nvlist:
124 * - To return state, e.g. property values. In this case,
125 * smush_outnvlist should be false. If the buffer was not large
126 * enough, the caller will reallocate a larger buffer and try
127 * the ioctl again.
128 *
129 * - To return multiple errors from an ioctl which makes on-disk
130 * changes. In this case, smush_outnvlist should be true.
131 * Ioctls which make on-disk modifications should generally not
132 * use the outnvl if they succeed, because the caller can not
133 * distinguish between the operation failing, and
134 * deserialization failing.
135 */
136
137 #include <sys/types.h>
138 #include <sys/param.h>
139 #include <sys/errno.h>
140 #include <sys/uio.h>
141 #include <sys/buf.h>
142 #include <sys/modctl.h>
143 #include <sys/open.h>
144 #include <sys/file.h>
145 #include <sys/kmem.h>
146 #include <sys/conf.h>
147 #include <sys/cmn_err.h>
148 #include <sys/stat.h>
149 #include <sys/zfs_ioctl.h>
150 #include <sys/zfs_vfsops.h>
151 #include <sys/zfs_znode.h>
152 #include <sys/zap.h>
153 #include <sys/spa.h>
154 #include <sys/spa_impl.h>
155 #include <sys/vdev.h>
156 #include <sys/priv_impl.h>
157 #include <sys/dmu.h>
158 #include <sys/dsl_dir.h>
159 #include <sys/dsl_dataset.h>
160 #include <sys/dsl_prop.h>
161 #include <sys/dsl_deleg.h>
162 #include <sys/dmu_objset.h>
163 #include <sys/dmu_impl.h>
164 #include <sys/dmu_tx.h>
165 #include <sys/ddi.h>
166 #include <sys/sunddi.h>
167 #include <sys/sunldi.h>
168 #include <sys/policy.h>
169 #include <sys/zone.h>
170 #include <sys/nvpair.h>
171 #include <sys/pathname.h>
172 #include <sys/mount.h>
173 #include <sys/sdt.h>
174 #include <sys/fs/zfs.h>
175 #include <sys/zfs_ctldir.h>
176 #include <sys/zfs_dir.h>
177 #include <sys/zfs_onexit.h>
178 #include <sys/zvol.h>
179 #include <sys/dsl_scan.h>
180 #include <sharefs/share.h>
181 #include <sys/fm/util.h>
182
183 #include <sys/dmu_send.h>
184 #include <sys/dsl_destroy.h>
185 #include <sys/dsl_bookmark.h>
186 #include <sys/dsl_userhold.h>
187 #include <sys/zfeature.h>
188 #include <sys/zio_checksum.h>
189
190 #include <linux/miscdevice.h>
191 #include <linux/slab.h>
192
193 #include "zfs_namecheck.h"
194 #include "zfs_prop.h"
195 #include "zfs_deleg.h"
196 #include "zfs_comutil.h"
197
198 /*
199 * Limit maximum nvlist size. We don't want users passing in insane values
200 * for zc->zc_nvlist_src_size, since we will need to allocate that much memory.
201 */
202 #define MAX_NVLIST_SRC_SIZE KMALLOC_MAX_SIZE
203
204 kmutex_t zfsdev_state_lock;
205 zfsdev_state_t *zfsdev_state_list;
206
207 extern void zfs_init(void);
208 extern void zfs_fini(void);
209
210 uint_t zfs_fsyncer_key;
211 extern uint_t rrw_tsd_key;
212 static uint_t zfs_allow_log_key;
213
214 typedef int zfs_ioc_legacy_func_t(zfs_cmd_t *);
215 typedef int zfs_ioc_func_t(const char *, nvlist_t *, nvlist_t *);
216 typedef int zfs_secpolicy_func_t(zfs_cmd_t *, nvlist_t *, cred_t *);
217
218 typedef enum {
219 NO_NAME,
220 POOL_NAME,
221 DATASET_NAME
222 } zfs_ioc_namecheck_t;
223
224 typedef enum {
225 POOL_CHECK_NONE = 1 << 0,
226 POOL_CHECK_SUSPENDED = 1 << 1,
227 POOL_CHECK_READONLY = 1 << 2,
228 } zfs_ioc_poolcheck_t;
229
230 typedef struct zfs_ioc_vec {
231 zfs_ioc_legacy_func_t *zvec_legacy_func;
232 zfs_ioc_func_t *zvec_func;
233 zfs_secpolicy_func_t *zvec_secpolicy;
234 zfs_ioc_namecheck_t zvec_namecheck;
235 boolean_t zvec_allow_log;
236 zfs_ioc_poolcheck_t zvec_pool_check;
237 boolean_t zvec_smush_outnvlist;
238 const char *zvec_name;
239 } zfs_ioc_vec_t;
240
241 /* This array is indexed by zfs_userquota_prop_t */
242 static const char *userquota_perms[] = {
243 ZFS_DELEG_PERM_USERUSED,
244 ZFS_DELEG_PERM_USERQUOTA,
245 ZFS_DELEG_PERM_GROUPUSED,
246 ZFS_DELEG_PERM_GROUPQUOTA,
247 ZFS_DELEG_PERM_USEROBJUSED,
248 ZFS_DELEG_PERM_USEROBJQUOTA,
249 ZFS_DELEG_PERM_GROUPOBJUSED,
250 ZFS_DELEG_PERM_GROUPOBJQUOTA,
251 };
252
253 static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
254 static int zfs_ioc_userobjspace_upgrade(zfs_cmd_t *zc);
255 static int zfs_check_settable(const char *name, nvpair_t *property,
256 cred_t *cr);
257 static int zfs_check_clearable(char *dataset, nvlist_t *props,
258 nvlist_t **errors);
259 static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
260 boolean_t *);
261 int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
262 static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
263
264 static void
265 history_str_free(char *buf)
266 {
267 kmem_free(buf, HIS_MAX_RECORD_LEN);
268 }
269
270 static char *
271 history_str_get(zfs_cmd_t *zc)
272 {
273 char *buf;
274
275 if (zc->zc_history == 0)
276 return (NULL);
277
278 buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
279 if (copyinstr((void *)(uintptr_t)zc->zc_history,
280 buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
281 history_str_free(buf);
282 return (NULL);
283 }
284
285 buf[HIS_MAX_RECORD_LEN -1] = '\0';
286
287 return (buf);
288 }
289
290 /*
291 * Check to see if the named dataset is currently defined as bootable
292 */
293 static boolean_t
294 zfs_is_bootfs(const char *name)
295 {
296 objset_t *os;
297
298 if (dmu_objset_hold(name, FTAG, &os) == 0) {
299 boolean_t ret;
300 ret = (dmu_objset_id(os) == spa_bootfs(dmu_objset_spa(os)));
301 dmu_objset_rele(os, FTAG);
302 return (ret);
303 }
304 return (B_FALSE);
305 }
306
307 /*
308 * Return non-zero if the spa version is less than requested version.
309 */
310 static int
311 zfs_earlier_version(const char *name, int version)
312 {
313 spa_t *spa;
314
315 if (spa_open(name, &spa, FTAG) == 0) {
316 if (spa_version(spa) < version) {
317 spa_close(spa, FTAG);
318 return (1);
319 }
320 spa_close(spa, FTAG);
321 }
322 return (0);
323 }
324
325 /*
326 * Return TRUE if the ZPL version is less than requested version.
327 */
328 static boolean_t
329 zpl_earlier_version(const char *name, int version)
330 {
331 objset_t *os;
332 boolean_t rc = B_TRUE;
333
334 if (dmu_objset_hold(name, FTAG, &os) == 0) {
335 uint64_t zplversion;
336
337 if (dmu_objset_type(os) != DMU_OST_ZFS) {
338 dmu_objset_rele(os, FTAG);
339 return (B_TRUE);
340 }
341 /* XXX reading from non-owned objset */
342 if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
343 rc = zplversion < version;
344 dmu_objset_rele(os, FTAG);
345 }
346 return (rc);
347 }
348
349 static void
350 zfs_log_history(zfs_cmd_t *zc)
351 {
352 spa_t *spa;
353 char *buf;
354
355 if ((buf = history_str_get(zc)) == NULL)
356 return;
357
358 if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
359 if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
360 (void) spa_history_log(spa, buf);
361 spa_close(spa, FTAG);
362 }
363 history_str_free(buf);
364 }
365
366 /*
367 * Policy for top-level read operations (list pools). Requires no privileges,
368 * and can be used in the local zone, as there is no associated dataset.
369 */
370 /* ARGSUSED */
371 static int
372 zfs_secpolicy_none(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
373 {
374 return (0);
375 }
376
377 /*
378 * Policy for dataset read operations (list children, get statistics). Requires
379 * no privileges, but must be visible in the local zone.
380 */
381 /* ARGSUSED */
382 static int
383 zfs_secpolicy_read(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
384 {
385 if (INGLOBALZONE(curproc) ||
386 zone_dataset_visible(zc->zc_name, NULL))
387 return (0);
388
389 return (SET_ERROR(ENOENT));
390 }
391
392 static int
393 zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr)
394 {
395 int writable = 1;
396
397 /*
398 * The dataset must be visible by this zone -- check this first
399 * so they don't see EPERM on something they shouldn't know about.
400 */
401 if (!INGLOBALZONE(curproc) &&
402 !zone_dataset_visible(dataset, &writable))
403 return (SET_ERROR(ENOENT));
404
405 if (INGLOBALZONE(curproc)) {
406 /*
407 * If the fs is zoned, only root can access it from the
408 * global zone.
409 */
410 if (secpolicy_zfs(cr) && zoned)
411 return (SET_ERROR(EPERM));
412 } else {
413 /*
414 * If we are in a local zone, the 'zoned' property must be set.
415 */
416 if (!zoned)
417 return (SET_ERROR(EPERM));
418
419 /* must be writable by this zone */
420 if (!writable)
421 return (SET_ERROR(EPERM));
422 }
423 return (0);
424 }
425
426 static int
427 zfs_dozonecheck(const char *dataset, cred_t *cr)
428 {
429 uint64_t zoned;
430
431 if (dsl_prop_get_integer(dataset, "zoned", &zoned, NULL))
432 return (SET_ERROR(ENOENT));
433
434 return (zfs_dozonecheck_impl(dataset, zoned, cr));
435 }
436
437 static int
438 zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
439 {
440 uint64_t zoned;
441
442 if (dsl_prop_get_int_ds(ds, "zoned", &zoned))
443 return (SET_ERROR(ENOENT));
444
445 return (zfs_dozonecheck_impl(dataset, zoned, cr));
446 }
447
448 static int
449 zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
450 const char *perm, cred_t *cr)
451 {
452 int error;
453
454 error = zfs_dozonecheck_ds(name, ds, cr);
455 if (error == 0) {
456 error = secpolicy_zfs(cr);
457 if (error != 0)
458 error = dsl_deleg_access_impl(ds, perm, cr);
459 }
460 return (error);
461 }
462
463 static int
464 zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
465 {
466 int error;
467 dsl_dataset_t *ds;
468 dsl_pool_t *dp;
469
470 error = dsl_pool_hold(name, FTAG, &dp);
471 if (error != 0)
472 return (error);
473
474 error = dsl_dataset_hold(dp, name, FTAG, &ds);
475 if (error != 0) {
476 dsl_pool_rele(dp, FTAG);
477 return (error);
478 }
479
480 error = zfs_secpolicy_write_perms_ds(name, ds, perm, cr);
481
482 dsl_dataset_rele(ds, FTAG);
483 dsl_pool_rele(dp, FTAG);
484 return (error);
485 }
486
487 /*
488 * Policy for setting the security label property.
489 *
490 * Returns 0 for success, non-zero for access and other errors.
491 */
492 static int
493 zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
494 {
495 #ifdef HAVE_MLSLABEL
496 char ds_hexsl[MAXNAMELEN];
497 bslabel_t ds_sl, new_sl;
498 boolean_t new_default = FALSE;
499 uint64_t zoned;
500 int needed_priv = -1;
501 int error;
502
503 /* First get the existing dataset label. */
504 error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
505 1, sizeof (ds_hexsl), &ds_hexsl, NULL);
506 if (error != 0)
507 return (SET_ERROR(EPERM));
508
509 if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
510 new_default = TRUE;
511
512 /* The label must be translatable */
513 if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
514 return (SET_ERROR(EINVAL));
515
516 /*
517 * In a non-global zone, disallow attempts to set a label that
518 * doesn't match that of the zone; otherwise no other checks
519 * are needed.
520 */
521 if (!INGLOBALZONE(curproc)) {
522 if (new_default || !blequal(&new_sl, CR_SL(CRED())))
523 return (SET_ERROR(EPERM));
524 return (0);
525 }
526
527 /*
528 * For global-zone datasets (i.e., those whose zoned property is
529 * "off", verify that the specified new label is valid for the
530 * global zone.
531 */
532 if (dsl_prop_get_integer(name,
533 zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
534 return (SET_ERROR(EPERM));
535 if (!zoned) {
536 if (zfs_check_global_label(name, strval) != 0)
537 return (SET_ERROR(EPERM));
538 }
539
540 /*
541 * If the existing dataset label is nondefault, check if the
542 * dataset is mounted (label cannot be changed while mounted).
543 * Get the zfs_sb_t; if there isn't one, then the dataset isn't
544 * mounted (or isn't a dataset, doesn't exist, ...).
545 */
546 if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
547 objset_t *os;
548 static char *setsl_tag = "setsl_tag";
549
550 /*
551 * Try to own the dataset; abort if there is any error,
552 * (e.g., already mounted, in use, or other error).
553 */
554 error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE,
555 setsl_tag, &os);
556 if (error != 0)
557 return (SET_ERROR(EPERM));
558
559 dmu_objset_disown(os, setsl_tag);
560
561 if (new_default) {
562 needed_priv = PRIV_FILE_DOWNGRADE_SL;
563 goto out_check;
564 }
565
566 if (hexstr_to_label(strval, &new_sl) != 0)
567 return (SET_ERROR(EPERM));
568
569 if (blstrictdom(&ds_sl, &new_sl))
570 needed_priv = PRIV_FILE_DOWNGRADE_SL;
571 else if (blstrictdom(&new_sl, &ds_sl))
572 needed_priv = PRIV_FILE_UPGRADE_SL;
573 } else {
574 /* dataset currently has a default label */
575 if (!new_default)
576 needed_priv = PRIV_FILE_UPGRADE_SL;
577 }
578
579 out_check:
580 if (needed_priv != -1)
581 return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
582 return (0);
583 #else
584 return (ENOTSUP);
585 #endif /* HAVE_MLSLABEL */
586 }
587
588 static int
589 zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
590 cred_t *cr)
591 {
592 char *strval;
593
594 /*
595 * Check permissions for special properties.
596 */
597 switch (prop) {
598 default:
599 break;
600 case ZFS_PROP_ZONED:
601 /*
602 * Disallow setting of 'zoned' from within a local zone.
603 */
604 if (!INGLOBALZONE(curproc))
605 return (SET_ERROR(EPERM));
606 break;
607
608 case ZFS_PROP_QUOTA:
609 case ZFS_PROP_FILESYSTEM_LIMIT:
610 case ZFS_PROP_SNAPSHOT_LIMIT:
611 if (!INGLOBALZONE(curproc)) {
612 uint64_t zoned;
613 char setpoint[ZFS_MAX_DATASET_NAME_LEN];
614 /*
615 * Unprivileged users are allowed to modify the
616 * limit on things *under* (ie. contained by)
617 * the thing they own.
618 */
619 if (dsl_prop_get_integer(dsname, "zoned", &zoned,
620 setpoint))
621 return (SET_ERROR(EPERM));
622 if (!zoned || strlen(dsname) <= strlen(setpoint))
623 return (SET_ERROR(EPERM));
624 }
625 break;
626
627 case ZFS_PROP_MLSLABEL:
628 if (!is_system_labeled())
629 return (SET_ERROR(EPERM));
630
631 if (nvpair_value_string(propval, &strval) == 0) {
632 int err;
633
634 err = zfs_set_slabel_policy(dsname, strval, CRED());
635 if (err != 0)
636 return (err);
637 }
638 break;
639 }
640
641 return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
642 }
643
644 /* ARGSUSED */
645 static int
646 zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
647 {
648 int error;
649
650 error = zfs_dozonecheck(zc->zc_name, cr);
651 if (error != 0)
652 return (error);
653
654 /*
655 * permission to set permissions will be evaluated later in
656 * dsl_deleg_can_allow()
657 */
658 return (0);
659 }
660
661 /* ARGSUSED */
662 static int
663 zfs_secpolicy_rollback(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
664 {
665 return (zfs_secpolicy_write_perms(zc->zc_name,
666 ZFS_DELEG_PERM_ROLLBACK, cr));
667 }
668
669 /* ARGSUSED */
670 static int
671 zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
672 {
673 dsl_pool_t *dp;
674 dsl_dataset_t *ds;
675 char *cp;
676 int error;
677
678 /*
679 * Generate the current snapshot name from the given objsetid, then
680 * use that name for the secpolicy/zone checks.
681 */
682 cp = strchr(zc->zc_name, '@');
683 if (cp == NULL)
684 return (SET_ERROR(EINVAL));
685 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
686 if (error != 0)
687 return (error);
688
689 error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
690 if (error != 0) {
691 dsl_pool_rele(dp, FTAG);
692 return (error);
693 }
694
695 dsl_dataset_name(ds, zc->zc_name);
696
697 error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
698 ZFS_DELEG_PERM_SEND, cr);
699 dsl_dataset_rele(ds, FTAG);
700 dsl_pool_rele(dp, FTAG);
701
702 return (error);
703 }
704
705 /* ARGSUSED */
706 static int
707 zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
708 {
709 return (zfs_secpolicy_write_perms(zc->zc_name,
710 ZFS_DELEG_PERM_SEND, cr));
711 }
712
713 #ifdef HAVE_SMB_SHARE
714 /* ARGSUSED */
715 static int
716 zfs_secpolicy_deleg_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
717 {
718 vnode_t *vp;
719 int error;
720
721 if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
722 NO_FOLLOW, NULL, &vp)) != 0)
723 return (error);
724
725 /* Now make sure mntpnt and dataset are ZFS */
726
727 if (vp->v_vfsp->vfs_fstype != zfsfstype ||
728 (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
729 zc->zc_name) != 0)) {
730 VN_RELE(vp);
731 return (SET_ERROR(EPERM));
732 }
733
734 VN_RELE(vp);
735 return (dsl_deleg_access(zc->zc_name,
736 ZFS_DELEG_PERM_SHARE, cr));
737 }
738 #endif /* HAVE_SMB_SHARE */
739
740 int
741 zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
742 {
743 #ifdef HAVE_SMB_SHARE
744 if (!INGLOBALZONE(curproc))
745 return (SET_ERROR(EPERM));
746
747 if (secpolicy_nfs(cr) == 0) {
748 return (0);
749 } else {
750 return (zfs_secpolicy_deleg_share(zc, innvl, cr));
751 }
752 #else
753 return (SET_ERROR(ENOTSUP));
754 #endif /* HAVE_SMB_SHARE */
755 }
756
757 int
758 zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
759 {
760 #ifdef HAVE_SMB_SHARE
761 if (!INGLOBALZONE(curproc))
762 return (SET_ERROR(EPERM));
763
764 if (secpolicy_smb(cr) == 0) {
765 return (0);
766 } else {
767 return (zfs_secpolicy_deleg_share(zc, innvl, cr));
768 }
769 #else
770 return (SET_ERROR(ENOTSUP));
771 #endif /* HAVE_SMB_SHARE */
772 }
773
774 static int
775 zfs_get_parent(const char *datasetname, char *parent, int parentsize)
776 {
777 char *cp;
778
779 /*
780 * Remove the @bla or /bla from the end of the name to get the parent.
781 */
782 (void) strncpy(parent, datasetname, parentsize);
783 cp = strrchr(parent, '@');
784 if (cp != NULL) {
785 cp[0] = '\0';
786 } else {
787 cp = strrchr(parent, '/');
788 if (cp == NULL)
789 return (SET_ERROR(ENOENT));
790 cp[0] = '\0';
791 }
792
793 return (0);
794 }
795
796 int
797 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
798 {
799 int error;
800
801 if ((error = zfs_secpolicy_write_perms(name,
802 ZFS_DELEG_PERM_MOUNT, cr)) != 0)
803 return (error);
804
805 return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
806 }
807
808 /* ARGSUSED */
809 static int
810 zfs_secpolicy_destroy(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
811 {
812 return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
813 }
814
815 /*
816 * Destroying snapshots with delegated permissions requires
817 * descendant mount and destroy permissions.
818 */
819 /* ARGSUSED */
820 static int
821 zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
822 {
823 nvlist_t *snaps;
824 nvpair_t *pair, *nextpair;
825 int error = 0;
826
827 if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
828 return (SET_ERROR(EINVAL));
829 for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
830 pair = nextpair) {
831 nextpair = nvlist_next_nvpair(snaps, pair);
832 error = zfs_secpolicy_destroy_perms(nvpair_name(pair), cr);
833 if (error == ENOENT) {
834 /*
835 * Ignore any snapshots that don't exist (we consider
836 * them "already destroyed"). Remove the name from the
837 * nvl here in case the snapshot is created between
838 * now and when we try to destroy it (in which case
839 * we don't want to destroy it since we haven't
840 * checked for permission).
841 */
842 fnvlist_remove_nvpair(snaps, pair);
843 error = 0;
844 }
845 if (error != 0)
846 break;
847 }
848
849 return (error);
850 }
851
852 int
853 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
854 {
855 char parentname[ZFS_MAX_DATASET_NAME_LEN];
856 int error;
857
858 if ((error = zfs_secpolicy_write_perms(from,
859 ZFS_DELEG_PERM_RENAME, cr)) != 0)
860 return (error);
861
862 if ((error = zfs_secpolicy_write_perms(from,
863 ZFS_DELEG_PERM_MOUNT, cr)) != 0)
864 return (error);
865
866 if ((error = zfs_get_parent(to, parentname,
867 sizeof (parentname))) != 0)
868 return (error);
869
870 if ((error = zfs_secpolicy_write_perms(parentname,
871 ZFS_DELEG_PERM_CREATE, cr)) != 0)
872 return (error);
873
874 if ((error = zfs_secpolicy_write_perms(parentname,
875 ZFS_DELEG_PERM_MOUNT, cr)) != 0)
876 return (error);
877
878 return (error);
879 }
880
881 /* ARGSUSED */
882 static int
883 zfs_secpolicy_rename(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
884 {
885 return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));
886 }
887
888 /* ARGSUSED */
889 static int
890 zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
891 {
892 dsl_pool_t *dp;
893 dsl_dataset_t *clone;
894 int error;
895
896 error = zfs_secpolicy_write_perms(zc->zc_name,
897 ZFS_DELEG_PERM_PROMOTE, cr);
898 if (error != 0)
899 return (error);
900
901 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
902 if (error != 0)
903 return (error);
904
905 error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &clone);
906
907 if (error == 0) {
908 char parentname[ZFS_MAX_DATASET_NAME_LEN];
909 dsl_dataset_t *origin = NULL;
910 dsl_dir_t *dd;
911 dd = clone->ds_dir;
912
913 error = dsl_dataset_hold_obj(dd->dd_pool,
914 dsl_dir_phys(dd)->dd_origin_obj, FTAG, &origin);
915 if (error != 0) {
916 dsl_dataset_rele(clone, FTAG);
917 dsl_pool_rele(dp, FTAG);
918 return (error);
919 }
920
921 error = zfs_secpolicy_write_perms_ds(zc->zc_name, clone,
922 ZFS_DELEG_PERM_MOUNT, cr);
923
924 dsl_dataset_name(origin, parentname);
925 if (error == 0) {
926 error = zfs_secpolicy_write_perms_ds(parentname, origin,
927 ZFS_DELEG_PERM_PROMOTE, cr);
928 }
929 dsl_dataset_rele(clone, FTAG);
930 dsl_dataset_rele(origin, FTAG);
931 }
932 dsl_pool_rele(dp, FTAG);
933 return (error);
934 }
935
936 /* ARGSUSED */
937 static int
938 zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
939 {
940 int error;
941
942 if ((error = zfs_secpolicy_write_perms(zc->zc_name,
943 ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
944 return (error);
945
946 if ((error = zfs_secpolicy_write_perms(zc->zc_name,
947 ZFS_DELEG_PERM_MOUNT, cr)) != 0)
948 return (error);
949
950 return (zfs_secpolicy_write_perms(zc->zc_name,
951 ZFS_DELEG_PERM_CREATE, cr));
952 }
953
954 /* ARGSUSED */
955 static int
956 zfs_secpolicy_recv_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
957 {
958 return (zfs_secpolicy_recv(zc, innvl, cr));
959 }
960
961 int
962 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
963 {
964 return (zfs_secpolicy_write_perms(name,
965 ZFS_DELEG_PERM_SNAPSHOT, cr));
966 }
967
968 /*
969 * Check for permission to create each snapshot in the nvlist.
970 */
971 /* ARGSUSED */
972 static int
973 zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
974 {
975 nvlist_t *snaps;
976 int error = 0;
977 nvpair_t *pair;
978
979 if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
980 return (SET_ERROR(EINVAL));
981 for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
982 pair = nvlist_next_nvpair(snaps, pair)) {
983 char *name = nvpair_name(pair);
984 char *atp = strchr(name, '@');
985
986 if (atp == NULL) {
987 error = SET_ERROR(EINVAL);
988 break;
989 }
990 *atp = '\0';
991 error = zfs_secpolicy_snapshot_perms(name, cr);
992 *atp = '@';
993 if (error != 0)
994 break;
995 }
996 return (error);
997 }
998
999 /*
1000 * Check for permission to create each snapshot in the nvlist.
1001 */
1002 /* ARGSUSED */
1003 static int
1004 zfs_secpolicy_bookmark(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1005 {
1006 int error = 0;
1007 nvpair_t *pair;
1008
1009 for (pair = nvlist_next_nvpair(innvl, NULL);
1010 pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
1011 char *name = nvpair_name(pair);
1012 char *hashp = strchr(name, '#');
1013
1014 if (hashp == NULL) {
1015 error = SET_ERROR(EINVAL);
1016 break;
1017 }
1018 *hashp = '\0';
1019 error = zfs_secpolicy_write_perms(name,
1020 ZFS_DELEG_PERM_BOOKMARK, cr);
1021 *hashp = '#';
1022 if (error != 0)
1023 break;
1024 }
1025 return (error);
1026 }
1027
1028 /* ARGSUSED */
1029 static int
1030 zfs_secpolicy_destroy_bookmarks(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1031 {
1032 nvpair_t *pair, *nextpair;
1033 int error = 0;
1034
1035 for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1036 pair = nextpair) {
1037 char *name = nvpair_name(pair);
1038 char *hashp = strchr(name, '#');
1039 nextpair = nvlist_next_nvpair(innvl, pair);
1040
1041 if (hashp == NULL) {
1042 error = SET_ERROR(EINVAL);
1043 break;
1044 }
1045
1046 *hashp = '\0';
1047 error = zfs_secpolicy_write_perms(name,
1048 ZFS_DELEG_PERM_DESTROY, cr);
1049 *hashp = '#';
1050 if (error == ENOENT) {
1051 /*
1052 * Ignore any filesystems that don't exist (we consider
1053 * their bookmarks "already destroyed"). Remove
1054 * the name from the nvl here in case the filesystem
1055 * is created between now and when we try to destroy
1056 * the bookmark (in which case we don't want to
1057 * destroy it since we haven't checked for permission).
1058 */
1059 fnvlist_remove_nvpair(innvl, pair);
1060 error = 0;
1061 }
1062 if (error != 0)
1063 break;
1064 }
1065
1066 return (error);
1067 }
1068
1069 /* ARGSUSED */
1070 static int
1071 zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1072 {
1073 /*
1074 * Even root must have a proper TSD so that we know what pool
1075 * to log to.
1076 */
1077 if (tsd_get(zfs_allow_log_key) == NULL)
1078 return (SET_ERROR(EPERM));
1079 return (0);
1080 }
1081
1082 static int
1083 zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1084 {
1085 char parentname[ZFS_MAX_DATASET_NAME_LEN];
1086 int error;
1087 char *origin;
1088
1089 if ((error = zfs_get_parent(zc->zc_name, parentname,
1090 sizeof (parentname))) != 0)
1091 return (error);
1092
1093 if (nvlist_lookup_string(innvl, "origin", &origin) == 0 &&
1094 (error = zfs_secpolicy_write_perms(origin,
1095 ZFS_DELEG_PERM_CLONE, cr)) != 0)
1096 return (error);
1097
1098 if ((error = zfs_secpolicy_write_perms(parentname,
1099 ZFS_DELEG_PERM_CREATE, cr)) != 0)
1100 return (error);
1101
1102 return (zfs_secpolicy_write_perms(parentname,
1103 ZFS_DELEG_PERM_MOUNT, cr));
1104 }
1105
1106 /*
1107 * Policy for pool operations - create/destroy pools, add vdevs, etc. Requires
1108 * SYS_CONFIG privilege, which is not available in a local zone.
1109 */
1110 /* ARGSUSED */
1111 static int
1112 zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1113 {
1114 if (secpolicy_sys_config(cr, B_FALSE) != 0)
1115 return (SET_ERROR(EPERM));
1116
1117 return (0);
1118 }
1119
1120 /*
1121 * Policy for object to name lookups.
1122 */
1123 /* ARGSUSED */
1124 static int
1125 zfs_secpolicy_diff(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1126 {
1127 int error;
1128
1129 if ((error = secpolicy_sys_config(cr, B_FALSE)) == 0)
1130 return (0);
1131
1132 error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr);
1133 return (error);
1134 }
1135
1136 /*
1137 * Policy for fault injection. Requires all privileges.
1138 */
1139 /* ARGSUSED */
1140 static int
1141 zfs_secpolicy_inject(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1142 {
1143 return (secpolicy_zinject(cr));
1144 }
1145
1146 /* ARGSUSED */
1147 static int
1148 zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1149 {
1150 zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
1151
1152 if (prop == ZPROP_INVAL) {
1153 if (!zfs_prop_user(zc->zc_value))
1154 return (SET_ERROR(EINVAL));
1155 return (zfs_secpolicy_write_perms(zc->zc_name,
1156 ZFS_DELEG_PERM_USERPROP, cr));
1157 } else {
1158 return (zfs_secpolicy_setprop(zc->zc_name, prop,
1159 NULL, cr));
1160 }
1161 }
1162
1163 static int
1164 zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1165 {
1166 int err = zfs_secpolicy_read(zc, innvl, cr);
1167 if (err)
1168 return (err);
1169
1170 if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1171 return (SET_ERROR(EINVAL));
1172
1173 if (zc->zc_value[0] == 0) {
1174 /*
1175 * They are asking about a posix uid/gid. If it's
1176 * themself, allow it.
1177 */
1178 if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
1179 zc->zc_objset_type == ZFS_PROP_USERQUOTA ||
1180 zc->zc_objset_type == ZFS_PROP_USEROBJUSED ||
1181 zc->zc_objset_type == ZFS_PROP_USEROBJQUOTA) {
1182 if (zc->zc_guid == crgetuid(cr))
1183 return (0);
1184 } else {
1185 if (groupmember(zc->zc_guid, cr))
1186 return (0);
1187 }
1188 }
1189
1190 return (zfs_secpolicy_write_perms(zc->zc_name,
1191 userquota_perms[zc->zc_objset_type], cr));
1192 }
1193
1194 static int
1195 zfs_secpolicy_userspace_many(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1196 {
1197 int err = zfs_secpolicy_read(zc, innvl, cr);
1198 if (err)
1199 return (err);
1200
1201 if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1202 return (SET_ERROR(EINVAL));
1203
1204 return (zfs_secpolicy_write_perms(zc->zc_name,
1205 userquota_perms[zc->zc_objset_type], cr));
1206 }
1207
1208 /* ARGSUSED */
1209 static int
1210 zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1211 {
1212 return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
1213 NULL, cr));
1214 }
1215
1216 /* ARGSUSED */
1217 static int
1218 zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1219 {
1220 nvpair_t *pair;
1221 nvlist_t *holds;
1222 int error;
1223
1224 error = nvlist_lookup_nvlist(innvl, "holds", &holds);
1225 if (error != 0)
1226 return (SET_ERROR(EINVAL));
1227
1228 for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
1229 pair = nvlist_next_nvpair(holds, pair)) {
1230 char fsname[ZFS_MAX_DATASET_NAME_LEN];
1231 error = dmu_fsname(nvpair_name(pair), fsname);
1232 if (error != 0)
1233 return (error);
1234 error = zfs_secpolicy_write_perms(fsname,
1235 ZFS_DELEG_PERM_HOLD, cr);
1236 if (error != 0)
1237 return (error);
1238 }
1239 return (0);
1240 }
1241
1242 /* ARGSUSED */
1243 static int
1244 zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1245 {
1246 nvpair_t *pair;
1247 int error;
1248
1249 for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1250 pair = nvlist_next_nvpair(innvl, pair)) {
1251 char fsname[ZFS_MAX_DATASET_NAME_LEN];
1252 error = dmu_fsname(nvpair_name(pair), fsname);
1253 if (error != 0)
1254 return (error);
1255 error = zfs_secpolicy_write_perms(fsname,
1256 ZFS_DELEG_PERM_RELEASE, cr);
1257 if (error != 0)
1258 return (error);
1259 }
1260 return (0);
1261 }
1262
1263 /*
1264 * Policy for allowing temporary snapshots to be taken or released
1265 */
1266 static int
1267 zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1268 {
1269 /*
1270 * A temporary snapshot is the same as a snapshot,
1271 * hold, destroy and release all rolled into one.
1272 * Delegated diff alone is sufficient that we allow this.
1273 */
1274 int error;
1275
1276 if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1277 ZFS_DELEG_PERM_DIFF, cr)) == 0)
1278 return (0);
1279
1280 error = zfs_secpolicy_snapshot_perms(zc->zc_name, cr);
1281 if (error == 0)
1282 error = zfs_secpolicy_hold(zc, innvl, cr);
1283 if (error == 0)
1284 error = zfs_secpolicy_release(zc, innvl, cr);
1285 if (error == 0)
1286 error = zfs_secpolicy_destroy(zc, innvl, cr);
1287 return (error);
1288 }
1289
1290 /*
1291 * Returns the nvlist as specified by the user in the zfs_cmd_t.
1292 */
1293 static int
1294 get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
1295 {
1296 char *packed;
1297 int error;
1298 nvlist_t *list = NULL;
1299
1300 /*
1301 * Read in and unpack the user-supplied nvlist.
1302 */
1303 if (size == 0)
1304 return (SET_ERROR(EINVAL));
1305
1306 packed = vmem_alloc(size, KM_SLEEP);
1307
1308 if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
1309 iflag)) != 0) {
1310 vmem_free(packed, size);
1311 return (SET_ERROR(EFAULT));
1312 }
1313
1314 if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
1315 vmem_free(packed, size);
1316 return (error);
1317 }
1318
1319 vmem_free(packed, size);
1320
1321 *nvp = list;
1322 return (0);
1323 }
1324
1325 /*
1326 * Reduce the size of this nvlist until it can be serialized in 'max' bytes.
1327 * Entries will be removed from the end of the nvlist, and one int32 entry
1328 * named "N_MORE_ERRORS" will be added indicating how many entries were
1329 * removed.
1330 */
1331 static int
1332 nvlist_smush(nvlist_t *errors, size_t max)
1333 {
1334 size_t size;
1335
1336 size = fnvlist_size(errors);
1337
1338 if (size > max) {
1339 nvpair_t *more_errors;
1340 int n = 0;
1341
1342 if (max < 1024)
1343 return (SET_ERROR(ENOMEM));
1344
1345 fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, 0);
1346 more_errors = nvlist_prev_nvpair(errors, NULL);
1347
1348 do {
1349 nvpair_t *pair = nvlist_prev_nvpair(errors,
1350 more_errors);
1351 fnvlist_remove_nvpair(errors, pair);
1352 n++;
1353 size = fnvlist_size(errors);
1354 } while (size > max);
1355
1356 fnvlist_remove_nvpair(errors, more_errors);
1357 fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, n);
1358 ASSERT3U(fnvlist_size(errors), <=, max);
1359 }
1360
1361 return (0);
1362 }
1363
1364 static int
1365 put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
1366 {
1367 char *packed = NULL;
1368 int error = 0;
1369 size_t size;
1370
1371 size = fnvlist_size(nvl);
1372
1373 if (size > zc->zc_nvlist_dst_size) {
1374 error = SET_ERROR(ENOMEM);
1375 } else {
1376 packed = fnvlist_pack(nvl, &size);
1377 if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
1378 size, zc->zc_iflags) != 0)
1379 error = SET_ERROR(EFAULT);
1380 fnvlist_pack_free(packed, size);
1381 }
1382
1383 zc->zc_nvlist_dst_size = size;
1384 zc->zc_nvlist_dst_filled = B_TRUE;
1385 return (error);
1386 }
1387
1388 static int
1389 get_zfs_sb(const char *dsname, zfs_sb_t **zsbp)
1390 {
1391 objset_t *os;
1392 int error;
1393
1394 error = dmu_objset_hold(dsname, FTAG, &os);
1395 if (error != 0)
1396 return (error);
1397 if (dmu_objset_type(os) != DMU_OST_ZFS) {
1398 dmu_objset_rele(os, FTAG);
1399 return (SET_ERROR(EINVAL));
1400 }
1401
1402 mutex_enter(&os->os_user_ptr_lock);
1403 *zsbp = dmu_objset_get_user(os);
1404 /* bump s_active only when non-zero to prevent umount race */
1405 if (*zsbp == NULL || (*zsbp)->z_sb == NULL ||
1406 !atomic_inc_not_zero(&((*zsbp)->z_sb->s_active))) {
1407 error = SET_ERROR(ESRCH);
1408 }
1409 mutex_exit(&os->os_user_ptr_lock);
1410 dmu_objset_rele(os, FTAG);
1411 return (error);
1412 }
1413
1414 /*
1415 * Find a zfs_sb_t for a mounted filesystem, or create our own, in which
1416 * case its z_sb will be NULL, and it will be opened as the owner.
1417 * If 'writer' is set, the z_teardown_lock will be held for RW_WRITER,
1418 * which prevents all inode ops from running.
1419 */
1420 static int
1421 zfs_sb_hold(const char *name, void *tag, zfs_sb_t **zsbp, boolean_t writer)
1422 {
1423 int error = 0;
1424
1425 if (get_zfs_sb(name, zsbp) != 0)
1426 error = zfs_sb_create(name, NULL, zsbp);
1427 if (error == 0) {
1428 rrm_enter(&(*zsbp)->z_teardown_lock, (writer) ? RW_WRITER :
1429 RW_READER, tag);
1430 if ((*zsbp)->z_unmounted) {
1431 /*
1432 * XXX we could probably try again, since the unmounting
1433 * thread should be just about to disassociate the
1434 * objset from the zsb.
1435 */
1436 rrm_exit(&(*zsbp)->z_teardown_lock, tag);
1437 return (SET_ERROR(EBUSY));
1438 }
1439 }
1440 return (error);
1441 }
1442
1443 static void
1444 zfs_sb_rele(zfs_sb_t *zsb, void *tag)
1445 {
1446 rrm_exit(&zsb->z_teardown_lock, tag);
1447
1448 if (zsb->z_sb) {
1449 deactivate_super(zsb->z_sb);
1450 } else {
1451 dmu_objset_disown(zsb->z_os, zsb);
1452 zfs_sb_free(zsb);
1453 }
1454 }
1455
1456 static int
1457 zfs_ioc_pool_create(zfs_cmd_t *zc)
1458 {
1459 int error;
1460 nvlist_t *config, *props = NULL;
1461 nvlist_t *rootprops = NULL;
1462 nvlist_t *zplprops = NULL;
1463
1464 if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1465 zc->zc_iflags, &config)))
1466 return (error);
1467
1468 if (zc->zc_nvlist_src_size != 0 && (error =
1469 get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1470 zc->zc_iflags, &props))) {
1471 nvlist_free(config);
1472 return (error);
1473 }
1474
1475 if (props) {
1476 nvlist_t *nvl = NULL;
1477 uint64_t version = SPA_VERSION;
1478
1479 (void) nvlist_lookup_uint64(props,
1480 zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
1481 if (!SPA_VERSION_IS_SUPPORTED(version)) {
1482 error = SET_ERROR(EINVAL);
1483 goto pool_props_bad;
1484 }
1485 (void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
1486 if (nvl) {
1487 error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
1488 if (error != 0) {
1489 nvlist_free(config);
1490 nvlist_free(props);
1491 return (error);
1492 }
1493 (void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
1494 }
1495 VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1496 error = zfs_fill_zplprops_root(version, rootprops,
1497 zplprops, NULL);
1498 if (error != 0)
1499 goto pool_props_bad;
1500 }
1501
1502 error = spa_create(zc->zc_name, config, props, zplprops);
1503
1504 /*
1505 * Set the remaining root properties
1506 */
1507 if (!error && (error = zfs_set_prop_nvlist(zc->zc_name,
1508 ZPROP_SRC_LOCAL, rootprops, NULL)) != 0)
1509 (void) spa_destroy(zc->zc_name);
1510
1511 pool_props_bad:
1512 nvlist_free(rootprops);
1513 nvlist_free(zplprops);
1514 nvlist_free(config);
1515 nvlist_free(props);
1516
1517 return (error);
1518 }
1519
1520 static int
1521 zfs_ioc_pool_destroy(zfs_cmd_t *zc)
1522 {
1523 int error;
1524 zfs_log_history(zc);
1525 error = spa_destroy(zc->zc_name);
1526
1527 return (error);
1528 }
1529
1530 static int
1531 zfs_ioc_pool_import(zfs_cmd_t *zc)
1532 {
1533 nvlist_t *config, *props = NULL;
1534 uint64_t guid;
1535 int error;
1536
1537 if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1538 zc->zc_iflags, &config)) != 0)
1539 return (error);
1540
1541 if (zc->zc_nvlist_src_size != 0 && (error =
1542 get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1543 zc->zc_iflags, &props))) {
1544 nvlist_free(config);
1545 return (error);
1546 }
1547
1548 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
1549 guid != zc->zc_guid)
1550 error = SET_ERROR(EINVAL);
1551 else
1552 error = spa_import(zc->zc_name, config, props, zc->zc_cookie);
1553
1554 if (zc->zc_nvlist_dst != 0) {
1555 int err;
1556
1557 if ((err = put_nvlist(zc, config)) != 0)
1558 error = err;
1559 }
1560
1561 nvlist_free(config);
1562 nvlist_free(props);
1563
1564 return (error);
1565 }
1566
1567 static int
1568 zfs_ioc_pool_export(zfs_cmd_t *zc)
1569 {
1570 int error;
1571 boolean_t force = (boolean_t)zc->zc_cookie;
1572 boolean_t hardforce = (boolean_t)zc->zc_guid;
1573
1574 zfs_log_history(zc);
1575 error = spa_export(zc->zc_name, NULL, force, hardforce);
1576
1577 return (error);
1578 }
1579
1580 static int
1581 zfs_ioc_pool_configs(zfs_cmd_t *zc)
1582 {
1583 nvlist_t *configs;
1584 int error;
1585
1586 if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
1587 return (SET_ERROR(EEXIST));
1588
1589 error = put_nvlist(zc, configs);
1590
1591 nvlist_free(configs);
1592
1593 return (error);
1594 }
1595
1596 /*
1597 * inputs:
1598 * zc_name name of the pool
1599 *
1600 * outputs:
1601 * zc_cookie real errno
1602 * zc_nvlist_dst config nvlist
1603 * zc_nvlist_dst_size size of config nvlist
1604 */
1605 static int
1606 zfs_ioc_pool_stats(zfs_cmd_t *zc)
1607 {
1608 nvlist_t *config;
1609 int error;
1610 int ret = 0;
1611
1612 error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
1613 sizeof (zc->zc_value));
1614
1615 if (config != NULL) {
1616 ret = put_nvlist(zc, config);
1617 nvlist_free(config);
1618
1619 /*
1620 * The config may be present even if 'error' is non-zero.
1621 * In this case we return success, and preserve the real errno
1622 * in 'zc_cookie'.
1623 */
1624 zc->zc_cookie = error;
1625 } else {
1626 ret = error;
1627 }
1628
1629 return (ret);
1630 }
1631
1632 /*
1633 * Try to import the given pool, returning pool stats as appropriate so that
1634 * user land knows which devices are available and overall pool health.
1635 */
1636 static int
1637 zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
1638 {
1639 nvlist_t *tryconfig, *config;
1640 int error;
1641
1642 if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1643 zc->zc_iflags, &tryconfig)) != 0)
1644 return (error);
1645
1646 config = spa_tryimport(tryconfig);
1647
1648 nvlist_free(tryconfig);
1649
1650 if (config == NULL)
1651 return (SET_ERROR(EINVAL));
1652
1653 error = put_nvlist(zc, config);
1654 nvlist_free(config);
1655
1656 return (error);
1657 }
1658
1659 /*
1660 * inputs:
1661 * zc_name name of the pool
1662 * zc_cookie scan func (pool_scan_func_t)
1663 */
1664 static int
1665 zfs_ioc_pool_scan(zfs_cmd_t *zc)
1666 {
1667 spa_t *spa;
1668 int error;
1669
1670 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1671 return (error);
1672
1673 if (zc->zc_cookie == POOL_SCAN_NONE)
1674 error = spa_scan_stop(spa);
1675 else
1676 error = spa_scan(spa, zc->zc_cookie);
1677
1678 spa_close(spa, FTAG);
1679
1680 return (error);
1681 }
1682
1683 static int
1684 zfs_ioc_pool_freeze(zfs_cmd_t *zc)
1685 {
1686 spa_t *spa;
1687 int error;
1688
1689 error = spa_open(zc->zc_name, &spa, FTAG);
1690 if (error == 0) {
1691 spa_freeze(spa);
1692 spa_close(spa, FTAG);
1693 }
1694 return (error);
1695 }
1696
1697 static int
1698 zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
1699 {
1700 spa_t *spa;
1701 int error;
1702
1703 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1704 return (error);
1705
1706 if (zc->zc_cookie < spa_version(spa) ||
1707 !SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) {
1708 spa_close(spa, FTAG);
1709 return (SET_ERROR(EINVAL));
1710 }
1711
1712 spa_upgrade(spa, zc->zc_cookie);
1713 spa_close(spa, FTAG);
1714
1715 return (error);
1716 }
1717
1718 static int
1719 zfs_ioc_pool_get_history(zfs_cmd_t *zc)
1720 {
1721 spa_t *spa;
1722 char *hist_buf;
1723 uint64_t size;
1724 int error;
1725
1726 if ((size = zc->zc_history_len) == 0)
1727 return (SET_ERROR(EINVAL));
1728
1729 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1730 return (error);
1731
1732 if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
1733 spa_close(spa, FTAG);
1734 return (SET_ERROR(ENOTSUP));
1735 }
1736
1737 hist_buf = vmem_alloc(size, KM_SLEEP);
1738 if ((error = spa_history_get(spa, &zc->zc_history_offset,
1739 &zc->zc_history_len, hist_buf)) == 0) {
1740 error = ddi_copyout(hist_buf,
1741 (void *)(uintptr_t)zc->zc_history,
1742 zc->zc_history_len, zc->zc_iflags);
1743 }
1744
1745 spa_close(spa, FTAG);
1746 vmem_free(hist_buf, size);
1747 return (error);
1748 }
1749
1750 static int
1751 zfs_ioc_pool_reguid(zfs_cmd_t *zc)
1752 {
1753 spa_t *spa;
1754 int error;
1755
1756 error = spa_open(zc->zc_name, &spa, FTAG);
1757 if (error == 0) {
1758 error = spa_change_guid(spa);
1759 spa_close(spa, FTAG);
1760 }
1761 return (error);
1762 }
1763
1764 static int
1765 zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
1766 {
1767 return (dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value));
1768 }
1769
1770 /*
1771 * inputs:
1772 * zc_name name of filesystem
1773 * zc_obj object to find
1774 *
1775 * outputs:
1776 * zc_value name of object
1777 */
1778 static int
1779 zfs_ioc_obj_to_path(zfs_cmd_t *zc)
1780 {
1781 objset_t *os;
1782 int error;
1783
1784 /* XXX reading from objset not owned */
1785 if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1786 return (error);
1787 if (dmu_objset_type(os) != DMU_OST_ZFS) {
1788 dmu_objset_rele(os, FTAG);
1789 return (SET_ERROR(EINVAL));
1790 }
1791 error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
1792 sizeof (zc->zc_value));
1793 dmu_objset_rele(os, FTAG);
1794
1795 return (error);
1796 }
1797
1798 /*
1799 * inputs:
1800 * zc_name name of filesystem
1801 * zc_obj object to find
1802 *
1803 * outputs:
1804 * zc_stat stats on object
1805 * zc_value path to object
1806 */
1807 static int
1808 zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
1809 {
1810 objset_t *os;
1811 int error;
1812
1813 /* XXX reading from objset not owned */
1814 if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1815 return (error);
1816 if (dmu_objset_type(os) != DMU_OST_ZFS) {
1817 dmu_objset_rele(os, FTAG);
1818 return (SET_ERROR(EINVAL));
1819 }
1820 error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
1821 sizeof (zc->zc_value));
1822 dmu_objset_rele(os, FTAG);
1823
1824 return (error);
1825 }
1826
1827 static int
1828 zfs_ioc_vdev_add(zfs_cmd_t *zc)
1829 {
1830 spa_t *spa;
1831 int error;
1832 nvlist_t *config;
1833
1834 error = spa_open(zc->zc_name, &spa, FTAG);
1835 if (error != 0)
1836 return (error);
1837
1838 error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1839 zc->zc_iflags, &config);
1840 if (error == 0) {
1841 error = spa_vdev_add(spa, config);
1842 nvlist_free(config);
1843 }
1844 spa_close(spa, FTAG);
1845 return (error);
1846 }
1847
1848 /*
1849 * inputs:
1850 * zc_name name of the pool
1851 * zc_nvlist_conf nvlist of devices to remove
1852 * zc_cookie to stop the remove?
1853 */
1854 static int
1855 zfs_ioc_vdev_remove(zfs_cmd_t *zc)
1856 {
1857 spa_t *spa;
1858 int error;
1859
1860 error = spa_open(zc->zc_name, &spa, FTAG);
1861 if (error != 0)
1862 return (error);
1863 error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
1864 spa_close(spa, FTAG);
1865 return (error);
1866 }
1867
1868 static int
1869 zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
1870 {
1871 spa_t *spa;
1872 int error;
1873 vdev_state_t newstate = VDEV_STATE_UNKNOWN;
1874
1875 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1876 return (error);
1877 switch (zc->zc_cookie) {
1878 case VDEV_STATE_ONLINE:
1879 error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
1880 break;
1881
1882 case VDEV_STATE_OFFLINE:
1883 error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
1884 break;
1885
1886 case VDEV_STATE_FAULTED:
1887 if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1888 zc->zc_obj != VDEV_AUX_EXTERNAL)
1889 zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1890
1891 error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
1892 break;
1893
1894 case VDEV_STATE_DEGRADED:
1895 if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1896 zc->zc_obj != VDEV_AUX_EXTERNAL)
1897 zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1898
1899 error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
1900 break;
1901
1902 default:
1903 error = SET_ERROR(EINVAL);
1904 }
1905 zc->zc_cookie = newstate;
1906 spa_close(spa, FTAG);
1907 return (error);
1908 }
1909
1910 static int
1911 zfs_ioc_vdev_attach(zfs_cmd_t *zc)
1912 {
1913 spa_t *spa;
1914 int replacing = zc->zc_cookie;
1915 nvlist_t *config;
1916 int error;
1917
1918 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1919 return (error);
1920
1921 if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1922 zc->zc_iflags, &config)) == 0) {
1923 error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
1924 nvlist_free(config);
1925 }
1926
1927 spa_close(spa, FTAG);
1928 return (error);
1929 }
1930
1931 static int
1932 zfs_ioc_vdev_detach(zfs_cmd_t *zc)
1933 {
1934 spa_t *spa;
1935 int error;
1936
1937 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1938 return (error);
1939
1940 error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
1941
1942 spa_close(spa, FTAG);
1943 return (error);
1944 }
1945
1946 static int
1947 zfs_ioc_vdev_split(zfs_cmd_t *zc)
1948 {
1949 spa_t *spa;
1950 nvlist_t *config, *props = NULL;
1951 int error;
1952 boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
1953
1954 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1955 return (error);
1956
1957 if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1958 zc->zc_iflags, &config))) {
1959 spa_close(spa, FTAG);
1960 return (error);
1961 }
1962
1963 if (zc->zc_nvlist_src_size != 0 && (error =
1964 get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1965 zc->zc_iflags, &props))) {
1966 spa_close(spa, FTAG);
1967 nvlist_free(config);
1968 return (error);
1969 }
1970
1971 error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
1972
1973 spa_close(spa, FTAG);
1974
1975 nvlist_free(config);
1976 nvlist_free(props);
1977
1978 return (error);
1979 }
1980
1981 static int
1982 zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
1983 {
1984 spa_t *spa;
1985 char *path = zc->zc_value;
1986 uint64_t guid = zc->zc_guid;
1987 int error;
1988
1989 error = spa_open(zc->zc_name, &spa, FTAG);
1990 if (error != 0)
1991 return (error);
1992
1993 error = spa_vdev_setpath(spa, guid, path);
1994 spa_close(spa, FTAG);
1995 return (error);
1996 }
1997
1998 static int
1999 zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
2000 {
2001 spa_t *spa;
2002 char *fru = zc->zc_value;
2003 uint64_t guid = zc->zc_guid;
2004 int error;
2005
2006 error = spa_open(zc->zc_name, &spa, FTAG);
2007 if (error != 0)
2008 return (error);
2009
2010 error = spa_vdev_setfru(spa, guid, fru);
2011 spa_close(spa, FTAG);
2012 return (error);
2013 }
2014
2015 static int
2016 zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
2017 {
2018 int error = 0;
2019 nvlist_t *nv;
2020
2021 dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2022
2023 if (zc->zc_nvlist_dst != 0 &&
2024 (error = dsl_prop_get_all(os, &nv)) == 0) {
2025 dmu_objset_stats(os, nv);
2026 /*
2027 * NB: zvol_get_stats() will read the objset contents,
2028 * which we aren't supposed to do with a
2029 * DS_MODE_USER hold, because it could be
2030 * inconsistent. So this is a bit of a workaround...
2031 * XXX reading with out owning
2032 */
2033 if (!zc->zc_objset_stats.dds_inconsistent &&
2034 dmu_objset_type(os) == DMU_OST_ZVOL) {
2035 error = zvol_get_stats(os, nv);
2036 if (error == EIO)
2037 return (error);
2038 VERIFY0(error);
2039 }
2040 if (error == 0)
2041 error = put_nvlist(zc, nv);
2042 nvlist_free(nv);
2043 }
2044
2045 return (error);
2046 }
2047
2048 /*
2049 * inputs:
2050 * zc_name name of filesystem
2051 * zc_nvlist_dst_size size of buffer for property nvlist
2052 *
2053 * outputs:
2054 * zc_objset_stats stats
2055 * zc_nvlist_dst property nvlist
2056 * zc_nvlist_dst_size size of property nvlist
2057 */
2058 static int
2059 zfs_ioc_objset_stats(zfs_cmd_t *zc)
2060 {
2061 objset_t *os;
2062 int error;
2063
2064 error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2065 if (error == 0) {
2066 error = zfs_ioc_objset_stats_impl(zc, os);
2067 dmu_objset_rele(os, FTAG);
2068 }
2069
2070 return (error);
2071 }
2072
2073 /*
2074 * inputs:
2075 * zc_name name of filesystem
2076 * zc_nvlist_dst_size size of buffer for property nvlist
2077 *
2078 * outputs:
2079 * zc_nvlist_dst received property nvlist
2080 * zc_nvlist_dst_size size of received property nvlist
2081 *
2082 * Gets received properties (distinct from local properties on or after
2083 * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
2084 * local property values.
2085 */
2086 static int
2087 zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
2088 {
2089 int error = 0;
2090 nvlist_t *nv;
2091
2092 /*
2093 * Without this check, we would return local property values if the
2094 * caller has not already received properties on or after
2095 * SPA_VERSION_RECVD_PROPS.
2096 */
2097 if (!dsl_prop_get_hasrecvd(zc->zc_name))
2098 return (SET_ERROR(ENOTSUP));
2099
2100 if (zc->zc_nvlist_dst != 0 &&
2101 (error = dsl_prop_get_received(zc->zc_name, &nv)) == 0) {
2102 error = put_nvlist(zc, nv);
2103 nvlist_free(nv);
2104 }
2105
2106 return (error);
2107 }
2108
2109 static int
2110 nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
2111 {
2112 uint64_t value;
2113 int error;
2114
2115 /*
2116 * zfs_get_zplprop() will either find a value or give us
2117 * the default value (if there is one).
2118 */
2119 if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
2120 return (error);
2121 VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
2122 return (0);
2123 }
2124
2125 /*
2126 * inputs:
2127 * zc_name name of filesystem
2128 * zc_nvlist_dst_size size of buffer for zpl property nvlist
2129 *
2130 * outputs:
2131 * zc_nvlist_dst zpl property nvlist
2132 * zc_nvlist_dst_size size of zpl property nvlist
2133 */
2134 static int
2135 zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
2136 {
2137 objset_t *os;
2138 int err;
2139
2140 /* XXX reading without owning */
2141 if ((err = dmu_objset_hold(zc->zc_name, FTAG, &os)))
2142 return (err);
2143
2144 dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2145
2146 /*
2147 * NB: nvl_add_zplprop() will read the objset contents,
2148 * which we aren't supposed to do with a DS_MODE_USER
2149 * hold, because it could be inconsistent.
2150 */
2151 if (zc->zc_nvlist_dst != 0 &&
2152 !zc->zc_objset_stats.dds_inconsistent &&
2153 dmu_objset_type(os) == DMU_OST_ZFS) {
2154 nvlist_t *nv;
2155
2156 VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2157 if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
2158 (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
2159 (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
2160 (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
2161 err = put_nvlist(zc, nv);
2162 nvlist_free(nv);
2163 } else {
2164 err = SET_ERROR(ENOENT);
2165 }
2166 dmu_objset_rele(os, FTAG);
2167 return (err);
2168 }
2169
2170 boolean_t
2171 dataset_name_hidden(const char *name)
2172 {
2173 /*
2174 * Skip over datasets that are not visible in this zone,
2175 * internal datasets (which have a $ in their name), and
2176 * temporary datasets (which have a % in their name).
2177 */
2178 if (strchr(name, '$') != NULL)
2179 return (B_TRUE);
2180 if (strchr(name, '%') != NULL)
2181 return (B_TRUE);
2182 if (!INGLOBALZONE(curproc) && !zone_dataset_visible(name, NULL))
2183 return (B_TRUE);
2184 return (B_FALSE);
2185 }
2186
2187 /*
2188 * inputs:
2189 * zc_name name of filesystem
2190 * zc_cookie zap cursor
2191 * zc_nvlist_dst_size size of buffer for property nvlist
2192 *
2193 * outputs:
2194 * zc_name name of next filesystem
2195 * zc_cookie zap cursor
2196 * zc_objset_stats stats
2197 * zc_nvlist_dst property nvlist
2198 * zc_nvlist_dst_size size of property nvlist
2199 */
2200 static int
2201 zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
2202 {
2203 objset_t *os;
2204 int error;
2205 char *p;
2206 size_t orig_len = strlen(zc->zc_name);
2207
2208 top:
2209 if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os))) {
2210 if (error == ENOENT)
2211 error = SET_ERROR(ESRCH);
2212 return (error);
2213 }
2214
2215 p = strrchr(zc->zc_name, '/');
2216 if (p == NULL || p[1] != '\0')
2217 (void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
2218 p = zc->zc_name + strlen(zc->zc_name);
2219
2220 do {
2221 error = dmu_dir_list_next(os,
2222 sizeof (zc->zc_name) - (p - zc->zc_name), p,
2223 NULL, &zc->zc_cookie);
2224 if (error == ENOENT)
2225 error = SET_ERROR(ESRCH);
2226 } while (error == 0 && dataset_name_hidden(zc->zc_name));
2227 dmu_objset_rele(os, FTAG);
2228
2229 /*
2230 * If it's an internal dataset (ie. with a '$' in its name),
2231 * don't try to get stats for it, otherwise we'll return ENOENT.
2232 */
2233 if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
2234 error = zfs_ioc_objset_stats(zc); /* fill in the stats */
2235 if (error == ENOENT) {
2236 /* We lost a race with destroy, get the next one. */
2237 zc->zc_name[orig_len] = '\0';
2238 goto top;
2239 }
2240 }
2241 return (error);
2242 }
2243
2244 /*
2245 * inputs:
2246 * zc_name name of filesystem
2247 * zc_cookie zap cursor
2248 * zc_nvlist_dst_size size of buffer for property nvlist
2249 *
2250 * outputs:
2251 * zc_name name of next snapshot
2252 * zc_objset_stats stats
2253 * zc_nvlist_dst property nvlist
2254 * zc_nvlist_dst_size size of property nvlist
2255 */
2256 static int
2257 zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
2258 {
2259 objset_t *os;
2260 int error;
2261
2262 error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2263 if (error != 0) {
2264 return (error == ENOENT ? ESRCH : error);
2265 }
2266
2267 /*
2268 * A dataset name of maximum length cannot have any snapshots,
2269 * so exit immediately.
2270 */
2271 if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >=
2272 ZFS_MAX_DATASET_NAME_LEN) {
2273 dmu_objset_rele(os, FTAG);
2274 return (SET_ERROR(ESRCH));
2275 }
2276
2277 error = dmu_snapshot_list_next(os,
2278 sizeof (zc->zc_name) - strlen(zc->zc_name),
2279 zc->zc_name + strlen(zc->zc_name), &zc->zc_obj, &zc->zc_cookie,
2280 NULL);
2281
2282 if (error == 0 && !zc->zc_simple) {
2283 dsl_dataset_t *ds;
2284 dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool;
2285
2286 error = dsl_dataset_hold_obj(dp, zc->zc_obj, FTAG, &ds);
2287 if (error == 0) {
2288 objset_t *ossnap;
2289
2290 error = dmu_objset_from_ds(ds, &ossnap);
2291 if (error == 0)
2292 error = zfs_ioc_objset_stats_impl(zc, ossnap);
2293 dsl_dataset_rele(ds, FTAG);
2294 }
2295 } else if (error == ENOENT) {
2296 error = SET_ERROR(ESRCH);
2297 }
2298
2299 dmu_objset_rele(os, FTAG);
2300 /* if we failed, undo the @ that we tacked on to zc_name */
2301 if (error != 0)
2302 *strchr(zc->zc_name, '@') = '\0';
2303 return (error);
2304 }
2305
2306 static int
2307 zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
2308 {
2309 const char *propname = nvpair_name(pair);
2310 uint64_t *valary;
2311 unsigned int vallen;
2312 const char *domain;
2313 char *dash;
2314 zfs_userquota_prop_t type;
2315 uint64_t rid;
2316 uint64_t quota;
2317 zfs_sb_t *zsb;
2318 int err;
2319
2320 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2321 nvlist_t *attrs;
2322 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2323 if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2324 &pair) != 0)
2325 return (SET_ERROR(EINVAL));
2326 }
2327
2328 /*
2329 * A correctly constructed propname is encoded as
2330 * userquota@<rid>-<domain>.
2331 */
2332 if ((dash = strchr(propname, '-')) == NULL ||
2333 nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
2334 vallen != 3)
2335 return (SET_ERROR(EINVAL));
2336
2337 domain = dash + 1;
2338 type = valary[0];
2339 rid = valary[1];
2340 quota = valary[2];
2341
2342 err = zfs_sb_hold(dsname, FTAG, &zsb, B_FALSE);
2343 if (err == 0) {
2344 err = zfs_set_userquota(zsb, type, domain, rid, quota);
2345 zfs_sb_rele(zsb, FTAG);
2346 }
2347
2348 return (err);
2349 }
2350
2351 /*
2352 * If the named property is one that has a special function to set its value,
2353 * return 0 on success and a positive error code on failure; otherwise if it is
2354 * not one of the special properties handled by this function, return -1.
2355 *
2356 * XXX: It would be better for callers of the property interface if we handled
2357 * these special cases in dsl_prop.c (in the dsl layer).
2358 */
2359 static int
2360 zfs_prop_set_special(const char *dsname, zprop_source_t source,
2361 nvpair_t *pair)
2362 {
2363 const char *propname = nvpair_name(pair);
2364 zfs_prop_t prop = zfs_name_to_prop(propname);
2365 uint64_t intval;
2366 int err = -1;
2367
2368 if (prop == ZPROP_INVAL) {
2369 if (zfs_prop_userquota(propname))
2370 return (zfs_prop_set_userquota(dsname, pair));
2371 return (-1);
2372 }
2373
2374 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2375 nvlist_t *attrs;
2376 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2377 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2378 &pair) == 0);
2379 }
2380
2381 if (zfs_prop_get_type(prop) == PROP_TYPE_STRING)
2382 return (-1);
2383
2384 VERIFY(0 == nvpair_value_uint64(pair, &intval));
2385
2386 switch (prop) {
2387 case ZFS_PROP_QUOTA:
2388 err = dsl_dir_set_quota(dsname, source, intval);
2389 break;
2390 case ZFS_PROP_REFQUOTA:
2391 err = dsl_dataset_set_refquota(dsname, source, intval);
2392 break;
2393 case ZFS_PROP_FILESYSTEM_LIMIT:
2394 case ZFS_PROP_SNAPSHOT_LIMIT:
2395 if (intval == UINT64_MAX) {
2396 /* clearing the limit, just do it */
2397 err = 0;
2398 } else {
2399 err = dsl_dir_activate_fs_ss_limit(dsname);
2400 }
2401 /*
2402 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2403 * default path to set the value in the nvlist.
2404 */
2405 if (err == 0)
2406 err = -1;
2407 break;
2408 case ZFS_PROP_RESERVATION:
2409 err = dsl_dir_set_reservation(dsname, source, intval);
2410 break;
2411 case ZFS_PROP_REFRESERVATION:
2412 err = dsl_dataset_set_refreservation(dsname, source, intval);
2413 break;
2414 case ZFS_PROP_VOLSIZE:
2415 err = zvol_set_volsize(dsname, intval);
2416 break;
2417 case ZFS_PROP_SNAPDEV:
2418 err = zvol_set_snapdev(dsname, source, intval);
2419 break;
2420 case ZFS_PROP_VERSION:
2421 {
2422 zfs_sb_t *zsb;
2423
2424 if ((err = zfs_sb_hold(dsname, FTAG, &zsb, B_TRUE)) != 0)
2425 break;
2426
2427 err = zfs_set_version(zsb, intval);
2428 zfs_sb_rele(zsb, FTAG);
2429
2430 if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
2431 zfs_cmd_t *zc;
2432
2433 zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
2434 (void) strcpy(zc->zc_name, dsname);
2435 (void) zfs_ioc_userspace_upgrade(zc);
2436 (void) zfs_ioc_userobjspace_upgrade(zc);
2437 kmem_free(zc, sizeof (zfs_cmd_t));
2438 }
2439 break;
2440 }
2441 default:
2442 err = -1;
2443 }
2444
2445 return (err);
2446 }
2447
2448 /*
2449 * This function is best effort. If it fails to set any of the given properties,
2450 * it continues to set as many as it can and returns the last error
2451 * encountered. If the caller provides a non-NULL errlist, it will be filled in
2452 * with the list of names of all the properties that failed along with the
2453 * corresponding error numbers.
2454 *
2455 * If every property is set successfully, zero is returned and errlist is not
2456 * modified.
2457 */
2458 int
2459 zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
2460 nvlist_t *errlist)
2461 {
2462 nvpair_t *pair;
2463 nvpair_t *propval;
2464 int rv = 0;
2465 uint64_t intval;
2466 char *strval;
2467
2468 nvlist_t *genericnvl = fnvlist_alloc();
2469 nvlist_t *retrynvl = fnvlist_alloc();
2470 retry:
2471 pair = NULL;
2472 while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2473 const char *propname = nvpair_name(pair);
2474 zfs_prop_t prop = zfs_name_to_prop(propname);
2475 int err = 0;
2476
2477 /* decode the property value */
2478 propval = pair;
2479 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2480 nvlist_t *attrs;
2481 attrs = fnvpair_value_nvlist(pair);
2482 if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2483 &propval) != 0)
2484 err = SET_ERROR(EINVAL);
2485 }
2486
2487 /* Validate value type */
2488 if (err == 0 && prop == ZPROP_INVAL) {
2489 if (zfs_prop_user(propname)) {
2490 if (nvpair_type(propval) != DATA_TYPE_STRING)
2491 err = SET_ERROR(EINVAL);
2492 } else if (zfs_prop_userquota(propname)) {
2493 if (nvpair_type(propval) !=
2494 DATA_TYPE_UINT64_ARRAY)
2495 err = SET_ERROR(EINVAL);
2496 } else {
2497 err = SET_ERROR(EINVAL);
2498 }
2499 } else if (err == 0) {
2500 if (nvpair_type(propval) == DATA_TYPE_STRING) {
2501 if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
2502 err = SET_ERROR(EINVAL);
2503 } else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
2504 const char *unused;
2505
2506 intval = fnvpair_value_uint64(propval);
2507
2508 switch (zfs_prop_get_type(prop)) {
2509 case PROP_TYPE_NUMBER:
2510 break;
2511 case PROP_TYPE_STRING:
2512 err = SET_ERROR(EINVAL);
2513 break;
2514 case PROP_TYPE_INDEX:
2515 if (zfs_prop_index_to_string(prop,
2516 intval, &unused) != 0)
2517 err = SET_ERROR(EINVAL);
2518 break;
2519 default:
2520 cmn_err(CE_PANIC,
2521 "unknown property type");
2522 }
2523 } else {
2524 err = SET_ERROR(EINVAL);
2525 }
2526 }
2527
2528 /* Validate permissions */
2529 if (err == 0)
2530 err = zfs_check_settable(dsname, pair, CRED());
2531
2532 if (err == 0) {
2533 err = zfs_prop_set_special(dsname, source, pair);
2534 if (err == -1) {
2535 /*
2536 * For better performance we build up a list of
2537 * properties to set in a single transaction.
2538 */
2539 err = nvlist_add_nvpair(genericnvl, pair);
2540 } else if (err != 0 && nvl != retrynvl) {
2541 /*
2542 * This may be a spurious error caused by
2543 * receiving quota and reservation out of order.
2544 * Try again in a second pass.
2545 */
2546 err = nvlist_add_nvpair(retrynvl, pair);
2547 }
2548 }
2549
2550 if (err != 0) {
2551 if (errlist != NULL)
2552 fnvlist_add_int32(errlist, propname, err);
2553 rv = err;
2554 }
2555 }
2556
2557 if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
2558 nvl = retrynvl;
2559 goto retry;
2560 }
2561
2562 if (!nvlist_empty(genericnvl) &&
2563 dsl_props_set(dsname, source, genericnvl) != 0) {
2564 /*
2565 * If this fails, we still want to set as many properties as we
2566 * can, so try setting them individually.
2567 */
2568 pair = NULL;
2569 while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
2570 const char *propname = nvpair_name(pair);
2571 int err = 0;
2572
2573 propval = pair;
2574 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2575 nvlist_t *attrs;
2576 attrs = fnvpair_value_nvlist(pair);
2577 propval = fnvlist_lookup_nvpair(attrs,
2578 ZPROP_VALUE);
2579 }
2580
2581 if (nvpair_type(propval) == DATA_TYPE_STRING) {
2582 strval = fnvpair_value_string(propval);
2583 err = dsl_prop_set_string(dsname, propname,
2584 source, strval);
2585 } else {
2586 intval = fnvpair_value_uint64(propval);
2587 err = dsl_prop_set_int(dsname, propname, source,
2588 intval);
2589 }
2590
2591 if (err != 0) {
2592 if (errlist != NULL) {
2593 fnvlist_add_int32(errlist, propname,
2594 err);
2595 }
2596 rv = err;
2597 }
2598 }
2599 }
2600 nvlist_free(genericnvl);
2601 nvlist_free(retrynvl);
2602
2603 return (rv);
2604 }
2605
2606 /*
2607 * Check that all the properties are valid user properties.
2608 */
2609 static int
2610 zfs_check_userprops(const char *fsname, nvlist_t *nvl)
2611 {
2612 nvpair_t *pair = NULL;
2613 int error = 0;
2614
2615 while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2616 const char *propname = nvpair_name(pair);
2617
2618 if (!zfs_prop_user(propname) ||
2619 nvpair_type(pair) != DATA_TYPE_STRING)
2620 return (SET_ERROR(EINVAL));
2621
2622 if ((error = zfs_secpolicy_write_perms(fsname,
2623 ZFS_DELEG_PERM_USERPROP, CRED())))
2624 return (error);
2625
2626 if (strlen(propname) >= ZAP_MAXNAMELEN)
2627 return (SET_ERROR(ENAMETOOLONG));
2628
2629 if (strlen(fnvpair_value_string(pair)) >= ZAP_MAXVALUELEN)
2630 return (SET_ERROR(E2BIG));
2631 }
2632 return (0);
2633 }
2634
2635 static void
2636 props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
2637 {
2638 nvpair_t *pair;
2639
2640 VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2641
2642 pair = NULL;
2643 while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
2644 if (nvlist_exists(skipped, nvpair_name(pair)))
2645 continue;
2646
2647 VERIFY(nvlist_add_nvpair(*newprops, pair) == 0);
2648 }
2649 }
2650
2651 static int
2652 clear_received_props(const char *dsname, nvlist_t *props,
2653 nvlist_t *skipped)
2654 {
2655 int err = 0;
2656 nvlist_t *cleared_props = NULL;
2657 props_skip(props, skipped, &cleared_props);
2658 if (!nvlist_empty(cleared_props)) {
2659 /*
2660 * Acts on local properties until the dataset has received
2661 * properties at least once on or after SPA_VERSION_RECVD_PROPS.
2662 */
2663 zprop_source_t flags = (ZPROP_SRC_NONE |
2664 (dsl_prop_get_hasrecvd(dsname) ? ZPROP_SRC_RECEIVED : 0));
2665 err = zfs_set_prop_nvlist(dsname, flags, cleared_props, NULL);
2666 }
2667 nvlist_free(cleared_props);
2668 return (err);
2669 }
2670
2671 /*
2672 * inputs:
2673 * zc_name name of filesystem
2674 * zc_value name of property to set
2675 * zc_nvlist_src{_size} nvlist of properties to apply
2676 * zc_cookie received properties flag
2677 *
2678 * outputs:
2679 * zc_nvlist_dst{_size} error for each unapplied received property
2680 */
2681 static int
2682 zfs_ioc_set_prop(zfs_cmd_t *zc)
2683 {
2684 nvlist_t *nvl;
2685 boolean_t received = zc->zc_cookie;
2686 zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
2687 ZPROP_SRC_LOCAL);
2688 nvlist_t *errors;
2689 int error;
2690
2691 if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2692 zc->zc_iflags, &nvl)) != 0)
2693 return (error);
2694
2695 if (received) {
2696 nvlist_t *origprops;
2697
2698 if (dsl_prop_get_received(zc->zc_name, &origprops) == 0) {
2699 (void) clear_received_props(zc->zc_name,
2700 origprops, nvl);
2701 nvlist_free(origprops);
2702 }
2703
2704 error = dsl_prop_set_hasrecvd(zc->zc_name);
2705 }
2706
2707 errors = fnvlist_alloc();
2708 if (error == 0)
2709 error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, errors);
2710
2711 if (zc->zc_nvlist_dst != 0 && errors != NULL) {
2712 (void) put_nvlist(zc, errors);
2713 }
2714
2715 nvlist_free(errors);
2716 nvlist_free(nvl);
2717 return (error);
2718 }
2719
2720 /*
2721 * inputs:
2722 * zc_name name of filesystem
2723 * zc_value name of property to inherit
2724 * zc_cookie revert to received value if TRUE
2725 *
2726 * outputs: none
2727 */
2728 static int
2729 zfs_ioc_inherit_prop(zfs_cmd_t *zc)
2730 {
2731 const char *propname = zc->zc_value;
2732 zfs_prop_t prop = zfs_name_to_prop(propname);
2733 boolean_t received = zc->zc_cookie;
2734 zprop_source_t source = (received
2735 ? ZPROP_SRC_NONE /* revert to received value, if any */
2736 : ZPROP_SRC_INHERITED); /* explicitly inherit */
2737
2738 if (received) {
2739 nvlist_t *dummy;
2740 nvpair_t *pair;
2741 zprop_type_t type;
2742 int err;
2743
2744 /*
2745 * zfs_prop_set_special() expects properties in the form of an
2746 * nvpair with type info.
2747 */
2748 if (prop == ZPROP_INVAL) {
2749 if (!zfs_prop_user(propname))
2750 return (SET_ERROR(EINVAL));
2751
2752 type = PROP_TYPE_STRING;
2753 } else if (prop == ZFS_PROP_VOLSIZE ||
2754 prop == ZFS_PROP_VERSION) {
2755 return (SET_ERROR(EINVAL));
2756 } else {
2757 type = zfs_prop_get_type(prop);
2758 }
2759
2760 VERIFY(nvlist_alloc(&dummy, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2761
2762 switch (type) {
2763 case PROP_TYPE_STRING:
2764 VERIFY(0 == nvlist_add_string(dummy, propname, ""));
2765 break;
2766 case PROP_TYPE_NUMBER:
2767 case PROP_TYPE_INDEX:
2768 VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
2769 break;
2770 default:
2771 nvlist_free(dummy);
2772 return (SET_ERROR(EINVAL));
2773 }
2774
2775 pair = nvlist_next_nvpair(dummy, NULL);
2776 if (pair == NULL) {
2777 nvlist_free(dummy);
2778 return (SET_ERROR(EINVAL));
2779 }
2780 err = zfs_prop_set_special(zc->zc_name, source, pair);
2781 nvlist_free(dummy);
2782 if (err != -1)
2783 return (err); /* special property already handled */
2784 } else {
2785 /*
2786 * Only check this in the non-received case. We want to allow
2787 * 'inherit -S' to revert non-inheritable properties like quota
2788 * and reservation to the received or default values even though
2789 * they are not considered inheritable.
2790 */
2791 if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop))
2792 return (SET_ERROR(EINVAL));
2793 }
2794
2795 /* property name has been validated by zfs_secpolicy_inherit_prop() */
2796 return (dsl_prop_inherit(zc->zc_name, zc->zc_value, source));
2797 }
2798
2799 static int
2800 zfs_ioc_pool_set_props(zfs_cmd_t *zc)
2801 {
2802 nvlist_t *props;
2803 spa_t *spa;
2804 int error;
2805 nvpair_t *pair;
2806
2807 if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2808 zc->zc_iflags, &props)))
2809 return (error);
2810
2811 /*
2812 * If the only property is the configfile, then just do a spa_lookup()
2813 * to handle the faulted case.
2814 */
2815 pair = nvlist_next_nvpair(props, NULL);
2816 if (pair != NULL && strcmp(nvpair_name(pair),
2817 zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
2818 nvlist_next_nvpair(props, pair) == NULL) {
2819 mutex_enter(&spa_namespace_lock);
2820 if ((spa = spa_lookup(zc->zc_name)) != NULL) {
2821 spa_configfile_set(spa, props, B_FALSE);
2822 spa_config_sync(spa, B_FALSE, B_TRUE);
2823 }
2824 mutex_exit(&spa_namespace_lock);
2825 if (spa != NULL) {
2826 nvlist_free(props);
2827 return (0);
2828 }
2829 }
2830
2831 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2832 nvlist_free(props);
2833 return (error);
2834 }
2835
2836 error = spa_prop_set(spa, props);
2837
2838 nvlist_free(props);
2839 spa_close(spa, FTAG);
2840
2841 return (error);
2842 }
2843
2844 static int
2845 zfs_ioc_pool_get_props(zfs_cmd_t *zc)
2846 {
2847 spa_t *spa;
2848 int error;
2849 nvlist_t *nvp = NULL;
2850
2851 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2852 /*
2853 * If the pool is faulted, there may be properties we can still
2854 * get (such as altroot and cachefile), so attempt to get them
2855 * anyway.
2856 */
2857 mutex_enter(&spa_namespace_lock);
2858 if ((spa = spa_lookup(zc->zc_name)) != NULL)
2859 error = spa_prop_get(spa, &nvp);
2860 mutex_exit(&spa_namespace_lock);
2861 } else {
2862 error = spa_prop_get(spa, &nvp);
2863 spa_close(spa, FTAG);
2864 }
2865
2866 if (error == 0 && zc->zc_nvlist_dst != 0)
2867 error = put_nvlist(zc, nvp);
2868 else
2869 error = SET_ERROR(EFAULT);
2870
2871 nvlist_free(nvp);
2872 return (error);
2873 }
2874
2875 /*
2876 * inputs:
2877 * zc_name name of filesystem
2878 * zc_nvlist_src{_size} nvlist of delegated permissions
2879 * zc_perm_action allow/unallow flag
2880 *
2881 * outputs: none
2882 */
2883 static int
2884 zfs_ioc_set_fsacl(zfs_cmd_t *zc)
2885 {
2886 int error;
2887 nvlist_t *fsaclnv = NULL;
2888
2889 if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2890 zc->zc_iflags, &fsaclnv)) != 0)
2891 return (error);
2892
2893 /*
2894 * Verify nvlist is constructed correctly
2895 */
2896 if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
2897 nvlist_free(fsaclnv);
2898 return (SET_ERROR(EINVAL));
2899 }
2900
2901 /*
2902 * If we don't have PRIV_SYS_MOUNT, then validate
2903 * that user is allowed to hand out each permission in
2904 * the nvlist(s)
2905 */
2906
2907 error = secpolicy_zfs(CRED());
2908 if (error != 0) {
2909 if (zc->zc_perm_action == B_FALSE) {
2910 error = dsl_deleg_can_allow(zc->zc_name,
2911 fsaclnv, CRED());
2912 } else {
2913 error = dsl_deleg_can_unallow(zc->zc_name,
2914 fsaclnv, CRED());
2915 }
2916 }
2917
2918 if (error == 0)
2919 error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
2920
2921 nvlist_free(fsaclnv);
2922 return (error);
2923 }
2924
2925 /*
2926 * inputs:
2927 * zc_name name of filesystem
2928 *
2929 * outputs:
2930 * zc_nvlist_src{_size} nvlist of delegated permissions
2931 */
2932 static int
2933 zfs_ioc_get_fsacl(zfs_cmd_t *zc)
2934 {
2935 nvlist_t *nvp;
2936 int error;
2937
2938 if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
2939 error = put_nvlist(zc, nvp);
2940 nvlist_free(nvp);
2941 }
2942
2943 return (error);
2944 }
2945
2946 /* ARGSUSED */
2947 static void
2948 zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
2949 {
2950 zfs_creat_t *zct = arg;
2951
2952 zfs_create_fs(os, cr, zct->zct_zplprops, tx);
2953 }
2954
2955 #define ZFS_PROP_UNDEFINED ((uint64_t)-1)
2956
2957 /*
2958 * inputs:
2959 * os parent objset pointer (NULL if root fs)
2960 * fuids_ok fuids allowed in this version of the spa?
2961 * sa_ok SAs allowed in this version of the spa?
2962 * createprops list of properties requested by creator
2963 *
2964 * outputs:
2965 * zplprops values for the zplprops we attach to the master node object
2966 * is_ci true if requested file system will be purely case-insensitive
2967 *
2968 * Determine the settings for utf8only, normalization and
2969 * casesensitivity. Specific values may have been requested by the
2970 * creator and/or we can inherit values from the parent dataset. If
2971 * the file system is of too early a vintage, a creator can not
2972 * request settings for these properties, even if the requested
2973 * setting is the default value. We don't actually want to create dsl
2974 * properties for these, so remove them from the source nvlist after
2975 * processing.
2976 */
2977 static int
2978 zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
2979 boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
2980 nvlist_t *zplprops, boolean_t *is_ci)
2981 {
2982 uint64_t sense = ZFS_PROP_UNDEFINED;
2983 uint64_t norm = ZFS_PROP_UNDEFINED;
2984 uint64_t u8 = ZFS_PROP_UNDEFINED;
2985 int error;
2986
2987 ASSERT(zplprops != NULL);
2988
2989 /*
2990 * Pull out creator prop choices, if any.
2991 */
2992 if (createprops) {
2993 (void) nvlist_lookup_uint64(createprops,
2994 zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
2995 (void) nvlist_lookup_uint64(createprops,
2996 zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
2997 (void) nvlist_remove_all(createprops,
2998 zfs_prop_to_name(ZFS_PROP_NORMALIZE));
2999 (void) nvlist_lookup_uint64(createprops,
3000 zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
3001 (void) nvlist_remove_all(createprops,
3002 zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
3003 (void) nvlist_lookup_uint64(createprops,
3004 zfs_prop_to_name(ZFS_PROP_CASE), &sense);
3005 (void) nvlist_remove_all(createprops,
3006 zfs_prop_to_name(ZFS_PROP_CASE));
3007 }
3008
3009 /*
3010 * If the zpl version requested is whacky or the file system
3011 * or pool is version is too "young" to support normalization
3012 * and the creator tried to set a value for one of the props,
3013 * error out.
3014 */
3015 if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
3016 (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
3017 (zplver >= ZPL_VERSION_SA && !sa_ok) ||
3018 (zplver < ZPL_VERSION_NORMALIZATION &&
3019 (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
3020 sense != ZFS_PROP_UNDEFINED)))
3021 return (SET_ERROR(ENOTSUP));
3022
3023 /*
3024 * Put the version in the zplprops
3025 */
3026 VERIFY(nvlist_add_uint64(zplprops,
3027 zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
3028
3029 if (norm == ZFS_PROP_UNDEFINED &&
3030 (error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm)) != 0)
3031 return (error);
3032 VERIFY(nvlist_add_uint64(zplprops,
3033 zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
3034
3035 /*
3036 * If we're normalizing, names must always be valid UTF-8 strings.
3037 */
3038 if (norm)
3039 u8 = 1;
3040 if (u8 == ZFS_PROP_UNDEFINED &&
3041 (error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8)) != 0)
3042 return (error);
3043 VERIFY(nvlist_add_uint64(zplprops,
3044 zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
3045
3046 if (sense == ZFS_PROP_UNDEFINED &&
3047 (error = zfs_get_zplprop(os, ZFS_PROP_CASE, &sense)) != 0)
3048 return (error);
3049 VERIFY(nvlist_add_uint64(zplprops,
3050 zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
3051
3052 if (is_ci)
3053 *is_ci = (sense == ZFS_CASE_INSENSITIVE);
3054
3055 return (0);
3056 }
3057
3058 static int
3059 zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
3060 nvlist_t *zplprops, boolean_t *is_ci)
3061 {
3062 boolean_t fuids_ok, sa_ok;
3063 uint64_t zplver = ZPL_VERSION;
3064 objset_t *os = NULL;
3065 char parentname[ZFS_MAX_DATASET_NAME_LEN];
3066 char *cp;
3067 spa_t *spa;
3068 uint64_t spa_vers;
3069 int error;
3070
3071 (void) strlcpy(parentname, dataset, sizeof (parentname));
3072 cp = strrchr(parentname, '/');
3073 ASSERT(cp != NULL);
3074 cp[0] = '\0';
3075
3076 if ((error = spa_open(dataset, &spa, FTAG)) != 0)
3077 return (error);
3078
3079 spa_vers = spa_version(spa);
3080 spa_close(spa, FTAG);
3081
3082 zplver = zfs_zpl_version_map(spa_vers);
3083 fuids_ok = (zplver >= ZPL_VERSION_FUID);
3084 sa_ok = (zplver >= ZPL_VERSION_SA);
3085
3086 /*
3087 * Open parent object set so we can inherit zplprop values.
3088 */
3089 if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
3090 return (error);
3091
3092 error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
3093 zplprops, is_ci);
3094 dmu_objset_rele(os, FTAG);
3095 return (error);
3096 }
3097
3098 static int
3099 zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
3100 nvlist_t *zplprops, boolean_t *is_ci)
3101 {
3102 boolean_t fuids_ok;
3103 boolean_t sa_ok;
3104 uint64_t zplver = ZPL_VERSION;
3105 int error;
3106
3107 zplver = zfs_zpl_version_map(spa_vers);
3108 fuids_ok = (zplver >= ZPL_VERSION_FUID);
3109 sa_ok = (zplver >= ZPL_VERSION_SA);
3110
3111 error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
3112 createprops, zplprops, is_ci);
3113 return (error);
3114 }
3115
3116 /*
3117 * innvl: {
3118 * "type" -> dmu_objset_type_t (int32)
3119 * (optional) "props" -> { prop -> value }
3120 * }
3121 *
3122 * outnvl: propname -> error code (int32)
3123 */
3124 static int
3125 zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3126 {
3127 int error = 0;
3128 zfs_creat_t zct = { 0 };
3129 nvlist_t *nvprops = NULL;
3130 void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
3131 int32_t type32;
3132 dmu_objset_type_t type;
3133 boolean_t is_insensitive = B_FALSE;
3134
3135 if (nvlist_lookup_int32(innvl, "type", &type32) != 0)
3136 return (SET_ERROR(EINVAL));
3137 type = type32;
3138 (void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3139
3140 switch (type) {
3141 case DMU_OST_ZFS:
3142 cbfunc = zfs_create_cb;
3143 break;
3144
3145 case DMU_OST_ZVOL:
3146 cbfunc = zvol_create_cb;
3147 break;
3148
3149 default:
3150 cbfunc = NULL;
3151 break;
3152 }
3153 if (strchr(fsname, '@') ||
3154 strchr(fsname, '%'))
3155 return (SET_ERROR(EINVAL));
3156
3157 zct.zct_props = nvprops;
3158
3159 if (cbfunc == NULL)
3160 return (SET_ERROR(EINVAL));
3161
3162 if (type == DMU_OST_ZVOL) {
3163 uint64_t volsize, volblocksize;
3164
3165 if (nvprops == NULL)
3166 return (SET_ERROR(EINVAL));
3167 if (nvlist_lookup_uint64(nvprops,
3168 zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0)
3169 return (SET_ERROR(EINVAL));
3170
3171 if ((error = nvlist_lookup_uint64(nvprops,
3172 zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
3173 &volblocksize)) != 0 && error != ENOENT)
3174 return (SET_ERROR(EINVAL));
3175
3176 if (error != 0)
3177 volblocksize = zfs_prop_default_numeric(
3178 ZFS_PROP_VOLBLOCKSIZE);
3179
3180 if ((error = zvol_check_volblocksize(fsname,
3181 volblocksize)) != 0 ||
3182 (error = zvol_check_volsize(volsize,
3183 volblocksize)) != 0)
3184 return (error);
3185 } else if (type == DMU_OST_ZFS) {
3186 int error;
3187
3188 /*
3189 * We have to have normalization and
3190 * case-folding flags correct when we do the
3191 * file system creation, so go figure them out
3192 * now.
3193 */
3194 VERIFY(nvlist_alloc(&zct.zct_zplprops,
3195 NV_UNIQUE_NAME, KM_SLEEP) == 0);
3196 error = zfs_fill_zplprops(fsname, nvprops,
3197 zct.zct_zplprops, &is_insensitive);
3198 if (error != 0) {
3199 nvlist_free(zct.zct_zplprops);
3200 return (error);
3201 }
3202 }
3203
3204 error = dmu_objset_create(fsname, type,
3205 is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct);
3206 nvlist_free(zct.zct_zplprops);
3207
3208 /*
3209 * It would be nice to do this atomically.
3210 */
3211 if (error == 0) {
3212 error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3213 nvprops, outnvl);
3214 if (error != 0) {
3215 spa_t *spa;
3216 int error2;
3217
3218 /*
3219 * Volumes will return EBUSY and cannot be destroyed
3220 * until all asynchronous minor handling has completed.
3221 * Wait for the spa_zvol_taskq to drain then retry.
3222 */
3223 error2 = dsl_destroy_head(fsname);
3224 while ((error2 == EBUSY) && (type == DMU_OST_ZVOL)) {
3225 error2 = spa_open(fsname, &spa, FTAG);
3226 if (error2 == 0) {
3227 taskq_wait(spa->spa_zvol_taskq);
3228 spa_close(spa, FTAG);
3229 }
3230 error2 = dsl_destroy_head(fsname);
3231 }
3232 }
3233 }
3234 return (error);
3235 }
3236
3237 /*
3238 * innvl: {
3239 * "origin" -> name of origin snapshot
3240 * (optional) "props" -> { prop -> value }
3241 * }
3242 *
3243 * outputs:
3244 * outnvl: propname -> error code (int32)
3245 */
3246 static int
3247 zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3248 {
3249 int error = 0;
3250 nvlist_t *nvprops = NULL;
3251 char *origin_name;
3252
3253 if (nvlist_lookup_string(innvl, "origin", &origin_name) != 0)
3254 return (SET_ERROR(EINVAL));
3255 (void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3256
3257 if (strchr(fsname, '@') ||
3258 strchr(fsname, '%'))
3259 return (SET_ERROR(EINVAL));
3260
3261 if (dataset_namecheck(origin_name, NULL, NULL) != 0)
3262 return (SET_ERROR(EINVAL));
3263 error = dmu_objset_clone(fsname, origin_name);
3264 if (error != 0)
3265 return (error);
3266
3267 /*
3268 * It would be nice to do this atomically.
3269 */
3270 if (error == 0) {
3271 error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3272 nvprops, outnvl);
3273 if (error != 0)
3274 (void) dsl_destroy_head(fsname);
3275 }
3276 return (error);
3277 }
3278
3279 /*
3280 * innvl: {
3281 * "snaps" -> { snapshot1, snapshot2 }
3282 * (optional) "props" -> { prop -> value (string) }
3283 * }
3284 *
3285 * outnvl: snapshot -> error code (int32)
3286 */
3287 static int
3288 zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3289 {
3290 nvlist_t *snaps;
3291 nvlist_t *props = NULL;
3292 int error, poollen;
3293 nvpair_t *pair, *pair2;
3294
3295 (void) nvlist_lookup_nvlist(innvl, "props", &props);
3296 if ((error = zfs_check_userprops(poolname, props)) != 0)
3297 return (error);
3298
3299 if (!nvlist_empty(props) &&
3300 zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS))
3301 return (SET_ERROR(ENOTSUP));
3302
3303 if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
3304 return (SET_ERROR(EINVAL));
3305 poollen = strlen(poolname);
3306 for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3307 pair = nvlist_next_nvpair(snaps, pair)) {
3308 const char *name = nvpair_name(pair);
3309 const char *cp = strchr(name, '@');
3310
3311 /*
3312 * The snap name must contain an @, and the part after it must
3313 * contain only valid characters.
3314 */
3315 if (cp == NULL ||
3316 zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3317 return (SET_ERROR(EINVAL));
3318
3319 /*
3320 * The snap must be in the specified pool.
3321 */
3322 if (strncmp(name, poolname, poollen) != 0 ||
3323 (name[poollen] != '/' && name[poollen] != '@'))
3324 return (SET_ERROR(EXDEV));
3325
3326 /* This must be the only snap of this fs. */
3327 for (pair2 = nvlist_next_nvpair(snaps, pair);
3328 pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) {
3329 if (strncmp(name, nvpair_name(pair2), cp - name + 1)
3330 == 0) {
3331 return (SET_ERROR(EXDEV));
3332 }
3333 }
3334 }
3335
3336 error = dsl_dataset_snapshot(snaps, props, outnvl);
3337
3338 return (error);
3339 }
3340
3341 /*
3342 * innvl: "message" -> string
3343 */
3344 /* ARGSUSED */
3345 static int
3346 zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
3347 {
3348 char *message;
3349 spa_t *spa;
3350 int error;
3351 char *poolname;
3352
3353 /*
3354 * The poolname in the ioctl is not set, we get it from the TSD,
3355 * which was set at the end of the last successful ioctl that allows
3356 * logging. The secpolicy func already checked that it is set.
3357 * Only one log ioctl is allowed after each successful ioctl, so
3358 * we clear the TSD here.
3359 */
3360 poolname = tsd_get(zfs_allow_log_key);
3361 if (poolname == NULL)
3362 return (SET_ERROR(EINVAL));
3363 (void) tsd_set(zfs_allow_log_key, NULL);
3364 error = spa_open(poolname, &spa, FTAG);
3365 strfree(poolname);
3366 if (error != 0)
3367 return (error);
3368
3369 if (nvlist_lookup_string(innvl, "message", &message) != 0) {
3370 spa_close(spa, FTAG);
3371 return (SET_ERROR(EINVAL));
3372 }
3373
3374 if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
3375 spa_close(spa, FTAG);
3376 return (SET_ERROR(ENOTSUP));
3377 }
3378
3379 error = spa_history_log(spa, message);
3380 spa_close(spa, FTAG);
3381 return (error);
3382 }
3383
3384 /*
3385 * The dp_config_rwlock must not be held when calling this, because the
3386 * unmount may need to write out data.
3387 *
3388 * This function is best-effort. Callers must deal gracefully if it
3389 * remains mounted (or is remounted after this call).
3390 *
3391 * Returns 0 if the argument is not a snapshot, or it is not currently a
3392 * filesystem, or we were able to unmount it. Returns error code otherwise.
3393 */
3394 int
3395 zfs_unmount_snap(const char *snapname)
3396 {
3397 int err;
3398
3399 if (strchr(snapname, '@') == NULL)
3400 return (0);
3401
3402 err = zfsctl_snapshot_unmount((char *)snapname, MNT_FORCE);
3403 if (err != 0 && err != ENOENT)
3404 return (SET_ERROR(err));
3405
3406 return (0);
3407 }
3408
3409 /* ARGSUSED */
3410 static int
3411 zfs_unmount_snap_cb(const char *snapname, void *arg)
3412 {
3413 return (zfs_unmount_snap(snapname));
3414 }
3415
3416 /*
3417 * When a clone is destroyed, its origin may also need to be destroyed,
3418 * in which case it must be unmounted. This routine will do that unmount
3419 * if necessary.
3420 */
3421 void
3422 zfs_destroy_unmount_origin(const char *fsname)
3423 {
3424 int error;
3425 objset_t *os;
3426 dsl_dataset_t *ds;
3427
3428 error = dmu_objset_hold(fsname, FTAG, &os);
3429 if (error != 0)
3430 return;
3431 ds = dmu_objset_ds(os);
3432 if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev)) {
3433 char originname[ZFS_MAX_DATASET_NAME_LEN];
3434 dsl_dataset_name(ds->ds_prev, originname);
3435 dmu_objset_rele(os, FTAG);
3436 (void) zfs_unmount_snap(originname);
3437 } else {
3438 dmu_objset_rele(os, FTAG);
3439 }
3440 }
3441
3442 /*
3443 * innvl: {
3444 * "snaps" -> { snapshot1, snapshot2 }
3445 * (optional boolean) "defer"
3446 * }
3447 *
3448 * outnvl: snapshot -> error code (int32)
3449 */
3450 /* ARGSUSED */
3451 static int
3452 zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3453 {
3454 nvlist_t *snaps;
3455 nvpair_t *pair;
3456 boolean_t defer;
3457
3458 if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
3459 return (SET_ERROR(EINVAL));
3460 defer = nvlist_exists(innvl, "defer");
3461
3462 for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3463 pair = nvlist_next_nvpair(snaps, pair)) {
3464 (void) zfs_unmount_snap(nvpair_name(pair));
3465 }
3466
3467 return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
3468 }
3469
3470 /*
3471 * Create bookmarks. Bookmark names are of the form <fs>#<bmark>.
3472 * All bookmarks must be in the same pool.
3473 *
3474 * innvl: {
3475 * bookmark1 -> snapshot1, bookmark2 -> snapshot2
3476 * }
3477 *
3478 * outnvl: bookmark -> error code (int32)
3479 *
3480 */
3481 /* ARGSUSED */
3482 static int
3483 zfs_ioc_bookmark(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3484 {
3485 nvpair_t *pair, *pair2;
3486
3487 for (pair = nvlist_next_nvpair(innvl, NULL);
3488 pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
3489 char *snap_name;
3490
3491 /*
3492 * Verify the snapshot argument.
3493 */
3494 if (nvpair_value_string(pair, &snap_name) != 0)
3495 return (SET_ERROR(EINVAL));
3496
3497
3498 /* Verify that the keys (bookmarks) are unique */
3499 for (pair2 = nvlist_next_nvpair(innvl, pair);
3500 pair2 != NULL; pair2 = nvlist_next_nvpair(innvl, pair2)) {
3501 if (strcmp(nvpair_name(pair), nvpair_name(pair2)) == 0)
3502 return (SET_ERROR(EINVAL));
3503 }
3504 }
3505
3506 return (dsl_bookmark_create(innvl, outnvl));
3507 }
3508
3509 /*
3510 * innvl: {
3511 * property 1, property 2, ...
3512 * }
3513 *
3514 * outnvl: {
3515 * bookmark name 1 -> { property 1, property 2, ... },
3516 * bookmark name 2 -> { property 1, property 2, ... }
3517 * }
3518 *
3519 */
3520 static int
3521 zfs_ioc_get_bookmarks(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3522 {
3523 return (dsl_get_bookmarks(fsname, innvl, outnvl));
3524 }
3525
3526 /*
3527 * innvl: {
3528 * bookmark name 1, bookmark name 2
3529 * }
3530 *
3531 * outnvl: bookmark -> error code (int32)
3532 *
3533 */
3534 static int
3535 zfs_ioc_destroy_bookmarks(const char *poolname, nvlist_t *innvl,
3536 nvlist_t *outnvl)
3537 {
3538 int error, poollen;
3539 nvpair_t *pair;
3540
3541 poollen = strlen(poolname);
3542 for (pair = nvlist_next_nvpair(innvl, NULL);
3543 pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
3544 const char *name = nvpair_name(pair);
3545 const char *cp = strchr(name, '#');
3546
3547 /*
3548 * The bookmark name must contain an #, and the part after it
3549 * must contain only valid characters.
3550 */
3551 if (cp == NULL ||
3552 zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3553 return (SET_ERROR(EINVAL));
3554
3555 /*
3556 * The bookmark must be in the specified pool.
3557 */
3558 if (strncmp(name, poolname, poollen) != 0 ||
3559 (name[poollen] != '/' && name[poollen] != '#'))
3560 return (SET_ERROR(EXDEV));
3561 }
3562
3563 error = dsl_bookmark_destroy(innvl, outnvl);
3564 return (error);
3565 }
3566
3567 /*
3568 * inputs:
3569 * zc_name name of dataset to destroy
3570 * zc_objset_type type of objset
3571 * zc_defer_destroy mark for deferred destroy
3572 *
3573 * outputs: none
3574 */
3575 static int
3576 zfs_ioc_destroy(zfs_cmd_t *zc)
3577 {
3578 int err;
3579
3580 if (zc->zc_objset_type == DMU_OST_ZFS) {
3581 err = zfs_unmount_snap(zc->zc_name);
3582 if (err != 0)
3583 return (err);
3584 }
3585
3586 if (strchr(zc->zc_name, '@')) {
3587 err = dsl_destroy_snapshot(zc->zc_name, zc->zc_defer_destroy);
3588 } else {
3589 err = dsl_destroy_head(zc->zc_name);
3590 if (err == EEXIST) {
3591 /*
3592 * It is possible that the given DS may have
3593 * hidden child (%recv) datasets - "leftovers"
3594 * resulting from the previously interrupted
3595 * 'zfs receive'.
3596 *
3597 * 6 extra bytes for /%recv
3598 */
3599 char namebuf[ZFS_MAX_DATASET_NAME_LEN + 6];
3600
3601 (void) snprintf(namebuf, sizeof (namebuf),
3602 "%s/%s", zc->zc_name, recv_clone_name);
3603
3604 /*
3605 * Try to remove the hidden child (%recv) and after
3606 * that try to remove the target dataset.
3607 * If the hidden child (%recv) does not exist
3608 * the original error (EEXIST) will be returned
3609 */
3610 err = dsl_destroy_head(namebuf);
3611 if (err == 0)
3612 err = dsl_destroy_head(zc->zc_name);
3613 else if (err == ENOENT)
3614 err = EEXIST;
3615 }
3616 }
3617
3618 return (err);
3619 }
3620
3621 /*
3622 * fsname is name of dataset to rollback (to most recent snapshot)
3623 *
3624 * innvl is not used.
3625 *
3626 * outnvl: "target" -> name of most recent snapshot
3627 * }
3628 */
3629 /* ARGSUSED */
3630 static int
3631 zfs_ioc_rollback(const char *fsname, nvlist_t *args, nvlist_t *outnvl)
3632 {
3633 zfs_sb_t *zsb;
3634 int error;
3635
3636 if (get_zfs_sb(fsname, &zsb) == 0) {
3637 error = zfs_suspend_fs(zsb);
3638 if (error == 0) {
3639 int resume_err;
3640
3641 error = dsl_dataset_rollback(fsname, zsb, outnvl);
3642 resume_err = zfs_resume_fs(zsb, fsname);
3643 error = error ? error : resume_err;
3644 }
3645 deactivate_super(zsb->z_sb);
3646 } else {
3647 error = dsl_dataset_rollback(fsname, NULL, outnvl);
3648 }
3649 return (error);
3650 }
3651
3652 static int
3653 recursive_unmount(const char *fsname, void *arg)
3654 {
3655 const char *snapname = arg;
3656 char *fullname;
3657 int error;
3658
3659 fullname = kmem_asprintf("%s@%s", fsname, snapname);
3660 error = zfs_unmount_snap(fullname);
3661 strfree(fullname);
3662
3663 return (error);
3664 }
3665
3666 /*
3667 * inputs:
3668 * zc_name old name of dataset
3669 * zc_value new name of dataset
3670 * zc_cookie recursive flag (only valid for snapshots)
3671 *
3672 * outputs: none
3673 */
3674 static int
3675 zfs_ioc_rename(zfs_cmd_t *zc)
3676 {
3677 boolean_t recursive = zc->zc_cookie & 1;
3678 char *at;
3679
3680 zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
3681 if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
3682 strchr(zc->zc_value, '%'))
3683 return (SET_ERROR(EINVAL));
3684
3685 at = strchr(zc->zc_name, '@');
3686 if (at != NULL) {
3687 /* snaps must be in same fs */
3688 int error;
3689
3690 if (strncmp(zc->zc_name, zc->zc_value, at - zc->zc_name + 1))
3691 return (SET_ERROR(EXDEV));
3692 *at = '\0';
3693 if (zc->zc_objset_type == DMU_OST_ZFS) {
3694 error = dmu_objset_find(zc->zc_name,
3695 recursive_unmount, at + 1,
3696 recursive ? DS_FIND_CHILDREN : 0);
3697 if (error != 0) {
3698 *at = '@';
3699 return (error);
3700 }
3701 }
3702 error = dsl_dataset_rename_snapshot(zc->zc_name,
3703 at + 1, strchr(zc->zc_value, '@') + 1, recursive);
3704 *at = '@';
3705
3706 return (error);
3707 } else {
3708 return (dsl_dir_rename(zc->zc_name, zc->zc_value));
3709 }
3710 }
3711
3712 static int
3713 zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
3714 {
3715 const char *propname = nvpair_name(pair);
3716 boolean_t issnap = (strchr(dsname, '@') != NULL);
3717 zfs_prop_t prop = zfs_name_to_prop(propname);
3718 uint64_t intval;
3719 int err;
3720
3721 if (prop == ZPROP_INVAL) {
3722 if (zfs_prop_user(propname)) {
3723 if ((err = zfs_secpolicy_write_perms(dsname,
3724 ZFS_DELEG_PERM_USERPROP, cr)))
3725 return (err);
3726 return (0);
3727 }
3728
3729 if (!issnap && zfs_prop_userquota(propname)) {
3730 const char *perm = NULL;
3731 const char *uq_prefix =
3732 zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
3733 const char *gq_prefix =
3734 zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
3735 const char *uiq_prefix =
3736 zfs_userquota_prop_prefixes[ZFS_PROP_USEROBJQUOTA];
3737 const char *giq_prefix =
3738 zfs_userquota_prop_prefixes[ZFS_PROP_GROUPOBJQUOTA];
3739
3740 if (strncmp(propname, uq_prefix,
3741 strlen(uq_prefix)) == 0) {
3742 perm = ZFS_DELEG_PERM_USERQUOTA;
3743 } else if (strncmp(propname, uiq_prefix,
3744 strlen(uiq_prefix)) == 0) {
3745 perm = ZFS_DELEG_PERM_USEROBJQUOTA;
3746 } else if (strncmp(propname, gq_prefix,
3747 strlen(gq_prefix)) == 0) {
3748 perm = ZFS_DELEG_PERM_GROUPQUOTA;
3749 } else if (strncmp(propname, giq_prefix,
3750 strlen(giq_prefix)) == 0) {
3751 perm = ZFS_DELEG_PERM_GROUPOBJQUOTA;
3752 } else {
3753 /* USERUSED and GROUPUSED are read-only */
3754 return (SET_ERROR(EINVAL));
3755 }
3756
3757 if ((err = zfs_secpolicy_write_perms(dsname, perm, cr)))
3758 return (err);
3759 return (0);
3760 }
3761
3762 return (SET_ERROR(EINVAL));
3763 }
3764
3765 if (issnap)
3766 return (SET_ERROR(EINVAL));
3767
3768 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
3769 /*
3770 * dsl_prop_get_all_impl() returns properties in this
3771 * format.
3772 */
3773 nvlist_t *attrs;
3774 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
3775 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3776 &pair) == 0);
3777 }
3778
3779 /*
3780 * Check that this value is valid for this pool version
3781 */
3782 switch (prop) {
3783 case ZFS_PROP_COMPRESSION:
3784 /*
3785 * If the user specified gzip compression, make sure
3786 * the SPA supports it. We ignore any errors here since
3787 * we'll catch them later.
3788 */
3789 if (nvpair_value_uint64(pair, &intval) == 0) {
3790 if (intval >= ZIO_COMPRESS_GZIP_1 &&
3791 intval <= ZIO_COMPRESS_GZIP_9 &&
3792 zfs_earlier_version(dsname,
3793 SPA_VERSION_GZIP_COMPRESSION)) {
3794 return (SET_ERROR(ENOTSUP));
3795 }
3796
3797 if (intval == ZIO_COMPRESS_ZLE &&
3798 zfs_earlier_version(dsname,
3799 SPA_VERSION_ZLE_COMPRESSION))
3800 return (SET_ERROR(ENOTSUP));
3801
3802 if (intval == ZIO_COMPRESS_LZ4) {
3803 spa_t *spa;
3804
3805 if ((err = spa_open(dsname, &spa, FTAG)) != 0)
3806 return (err);
3807
3808 if (!spa_feature_is_enabled(spa,
3809 SPA_FEATURE_LZ4_COMPRESS)) {
3810 spa_close(spa, FTAG);
3811 return (SET_ERROR(ENOTSUP));
3812 }
3813 spa_close(spa, FTAG);
3814 }
3815
3816 /*
3817 * If this is a bootable dataset then
3818 * verify that the compression algorithm
3819 * is supported for booting. We must return
3820 * something other than ENOTSUP since it
3821 * implies a downrev pool version.
3822 */
3823 if (zfs_is_bootfs(dsname) &&
3824 !BOOTFS_COMPRESS_VALID(intval)) {
3825 return (SET_ERROR(ERANGE));
3826 }
3827 }
3828 break;
3829
3830 case ZFS_PROP_COPIES:
3831 if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
3832 return (SET_ERROR(ENOTSUP));
3833 break;
3834
3835 case ZFS_PROP_VOLBLOCKSIZE:
3836 case ZFS_PROP_RECORDSIZE:
3837 /* Record sizes above 128k need the feature to be enabled */
3838 if (nvpair_value_uint64(pair, &intval) == 0 &&
3839 intval > SPA_OLD_MAXBLOCKSIZE) {
3840 spa_t *spa;
3841
3842 /*
3843 * If this is a bootable dataset then
3844 * we don't allow large (>128K) blocks,
3845 * because GRUB doesn't support them.
3846 */
3847 if (zfs_is_bootfs(dsname) &&
3848 intval > SPA_OLD_MAXBLOCKSIZE) {
3849 return (SET_ERROR(ERANGE));
3850 }
3851
3852 /*
3853 * We don't allow setting the property above 1MB,
3854 * unless the tunable has been changed.
3855 */
3856 if (intval > zfs_max_recordsize ||
3857 intval > SPA_MAXBLOCKSIZE)
3858 return (SET_ERROR(ERANGE));
3859
3860 if ((err = spa_open(dsname, &spa, FTAG)) != 0)
3861 return (err);
3862
3863 if (!spa_feature_is_enabled(spa,
3864 SPA_FEATURE_LARGE_BLOCKS)) {
3865 spa_close(spa, FTAG);
3866 return (SET_ERROR(ENOTSUP));
3867 }
3868 spa_close(spa, FTAG);
3869 }
3870 break;
3871
3872 case ZFS_PROP_DNODESIZE:
3873 /* Dnode sizes above 512 need the feature to be enabled */
3874 if (nvpair_value_uint64(pair, &intval) == 0 &&
3875 intval != ZFS_DNSIZE_LEGACY) {
3876 spa_t *spa;
3877
3878 /*
3879 * If this is a bootable dataset then
3880 * we don't allow large (>512B) dnodes,
3881 * because GRUB doesn't support them.
3882 */
3883 if (zfs_is_bootfs(dsname) &&
3884 intval != ZFS_DNSIZE_LEGACY) {
3885 return (SET_ERROR(EDOM));
3886 }
3887
3888 if ((err = spa_open(dsname, &spa, FTAG)) != 0)
3889 return (err);
3890
3891 if (!spa_feature_is_enabled(spa,
3892 SPA_FEATURE_LARGE_DNODE)) {
3893 spa_close(spa, FTAG);
3894 return (SET_ERROR(ENOTSUP));
3895 }
3896 spa_close(spa, FTAG);
3897 }
3898 break;
3899
3900 case ZFS_PROP_SHARESMB:
3901 if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
3902 return (SET_ERROR(ENOTSUP));
3903 break;
3904
3905 case ZFS_PROP_ACLINHERIT:
3906 if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
3907 nvpair_value_uint64(pair, &intval) == 0) {
3908 if (intval == ZFS_ACL_PASSTHROUGH_X &&
3909 zfs_earlier_version(dsname,
3910 SPA_VERSION_PASSTHROUGH_X))
3911 return (SET_ERROR(ENOTSUP));
3912 }
3913 break;
3914 case ZFS_PROP_CHECKSUM:
3915 case ZFS_PROP_DEDUP:
3916 {
3917 spa_feature_t feature;
3918 spa_t *spa;
3919 uint64_t intval;
3920 int err;
3921
3922 /* dedup feature version checks */
3923 if (prop == ZFS_PROP_DEDUP &&
3924 zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
3925 return (SET_ERROR(ENOTSUP));
3926
3927 if (nvpair_value_uint64(pair, &intval) != 0)
3928 return (SET_ERROR(EINVAL));
3929
3930 /* check prop value is enabled in features */
3931 feature = zio_checksum_to_feature(intval & ZIO_CHECKSUM_MASK);
3932 if (feature == SPA_FEATURE_NONE)
3933 break;
3934
3935 if ((err = spa_open(dsname, &spa, FTAG)) != 0)
3936 return (err);
3937 /*
3938 * Salted checksums are not supported on root pools.
3939 */
3940 if (spa_bootfs(spa) != 0 &&
3941 intval < ZIO_CHECKSUM_FUNCTIONS &&
3942 (zio_checksum_table[intval].ci_flags &
3943 ZCHECKSUM_FLAG_SALTED)) {
3944 spa_close(spa, FTAG);
3945 return (SET_ERROR(ERANGE));
3946 }
3947 if (!spa_feature_is_enabled(spa, feature)) {
3948 spa_close(spa, FTAG);
3949 return (SET_ERROR(ENOTSUP));
3950 }
3951 spa_close(spa, FTAG);
3952 break;
3953 }
3954
3955 default:
3956 break;
3957 }
3958
3959 return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
3960 }
3961
3962 /*
3963 * Removes properties from the given props list that fail permission checks
3964 * needed to clear them and to restore them in case of a receive error. For each
3965 * property, make sure we have both set and inherit permissions.
3966 *
3967 * Returns the first error encountered if any permission checks fail. If the
3968 * caller provides a non-NULL errlist, it also gives the complete list of names
3969 * of all the properties that failed a permission check along with the
3970 * corresponding error numbers. The caller is responsible for freeing the
3971 * returned errlist.
3972 *
3973 * If every property checks out successfully, zero is returned and the list
3974 * pointed at by errlist is NULL.
3975 */
3976 static int
3977 zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist)
3978 {
3979 zfs_cmd_t *zc;
3980 nvpair_t *pair, *next_pair;
3981 nvlist_t *errors;
3982 int err, rv = 0;
3983
3984 if (props == NULL)
3985 return (0);
3986
3987 VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
3988
3989 zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
3990 (void) strlcpy(zc->zc_name, dataset, sizeof (zc->zc_name));
3991 pair = nvlist_next_nvpair(props, NULL);
3992 while (pair != NULL) {
3993 next_pair = nvlist_next_nvpair(props, pair);
3994
3995 (void) strlcpy(zc->zc_value, nvpair_name(pair),
3996 sizeof (zc->zc_value));
3997 if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
3998 (err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) {
3999 VERIFY(nvlist_remove_nvpair(props, pair) == 0);
4000 VERIFY(nvlist_add_int32(errors,
4001 zc->zc_value, err) == 0);
4002 }
4003 pair = next_pair;
4004 }
4005 kmem_free(zc, sizeof (zfs_cmd_t));
4006
4007 if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
4008 nvlist_free(errors);
4009 errors = NULL;
4010 } else {
4011 VERIFY(nvpair_value_int32(pair, &rv) == 0);
4012 }
4013
4014 if (errlist == NULL)
4015 nvlist_free(errors);
4016 else
4017 *errlist = errors;
4018
4019 return (rv);
4020 }
4021
4022 static boolean_t
4023 propval_equals(nvpair_t *p1, nvpair_t *p2)
4024 {
4025 if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
4026 /* dsl_prop_get_all_impl() format */
4027 nvlist_t *attrs;
4028 VERIFY(nvpair_value_nvlist(p1, &attrs) == 0);
4029 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4030 &p1) == 0);
4031 }
4032
4033 if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
4034 nvlist_t *attrs;
4035 VERIFY(nvpair_value_nvlist(p2, &attrs) == 0);
4036 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4037 &p2) == 0);
4038 }
4039
4040 if (nvpair_type(p1) != nvpair_type(p2))
4041 return (B_FALSE);
4042
4043 if (nvpair_type(p1) == DATA_TYPE_STRING) {
4044 char *valstr1, *valstr2;
4045
4046 VERIFY(nvpair_value_string(p1, (char **)&valstr1) == 0);
4047 VERIFY(nvpair_value_string(p2, (char **)&valstr2) == 0);
4048 return (strcmp(valstr1, valstr2) == 0);
4049 } else {
4050 uint64_t intval1, intval2;
4051
4052 VERIFY(nvpair_value_uint64(p1, &intval1) == 0);
4053 VERIFY(nvpair_value_uint64(p2, &intval2) == 0);
4054 return (intval1 == intval2);
4055 }
4056 }
4057
4058 /*
4059 * Remove properties from props if they are not going to change (as determined
4060 * by comparison with origprops). Remove them from origprops as well, since we
4061 * do not need to clear or restore properties that won't change.
4062 */
4063 static void
4064 props_reduce(nvlist_t *props, nvlist_t *origprops)
4065 {
4066 nvpair_t *pair, *next_pair;
4067
4068 if (origprops == NULL)
4069 return; /* all props need to be received */
4070
4071 pair = nvlist_next_nvpair(props, NULL);
4072 while (pair != NULL) {
4073 const char *propname = nvpair_name(pair);
4074 nvpair_t *match;
4075
4076 next_pair = nvlist_next_nvpair(props, pair);
4077
4078 if ((nvlist_lookup_nvpair(origprops, propname,
4079 &match) != 0) || !propval_equals(pair, match))
4080 goto next; /* need to set received value */
4081
4082 /* don't clear the existing received value */
4083 (void) nvlist_remove_nvpair(origprops, match);
4084 /* don't bother receiving the property */
4085 (void) nvlist_remove_nvpair(props, pair);
4086 next:
4087 pair = next_pair;
4088 }
4089 }
4090
4091 /*
4092 * Extract properties that cannot be set PRIOR to the receipt of a dataset.
4093 * For example, refquota cannot be set until after the receipt of a dataset,
4094 * because in replication streams, an older/earlier snapshot may exceed the
4095 * refquota. We want to receive the older/earlier snapshot, but setting
4096 * refquota pre-receipt will set the dsl's ACTUAL quota, which will prevent
4097 * the older/earlier snapshot from being received (with EDQUOT).
4098 *
4099 * The ZFS test "zfs_receive_011_pos" demonstrates such a scenario.
4100 *
4101 * libzfs will need to be judicious handling errors encountered by props
4102 * extracted by this function.
4103 */
4104 static nvlist_t *
4105 extract_delay_props(nvlist_t *props)
4106 {
4107 nvlist_t *delayprops;
4108 nvpair_t *nvp, *tmp;
4109 static const zfs_prop_t delayable[] = { ZFS_PROP_REFQUOTA, 0 };
4110 int i;
4111
4112 VERIFY(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4113
4114 for (nvp = nvlist_next_nvpair(props, NULL); nvp != NULL;
4115 nvp = nvlist_next_nvpair(props, nvp)) {
4116 /*
4117 * strcmp() is safe because zfs_prop_to_name() always returns
4118 * a bounded string.
4119 */
4120 for (i = 0; delayable[i] != 0; i++) {
4121 if (strcmp(zfs_prop_to_name(delayable[i]),
4122 nvpair_name(nvp)) == 0) {
4123 break;
4124 }
4125 }
4126 if (delayable[i] != 0) {
4127 tmp = nvlist_prev_nvpair(props, nvp);
4128 VERIFY(nvlist_add_nvpair(delayprops, nvp) == 0);
4129 VERIFY(nvlist_remove_nvpair(props, nvp) == 0);
4130 nvp = tmp;
4131 }
4132 }
4133
4134 if (nvlist_empty(delayprops)) {
4135 nvlist_free(delayprops);
4136 delayprops = NULL;
4137 }
4138 return (delayprops);
4139 }
4140
4141 #ifdef DEBUG
4142 static boolean_t zfs_ioc_recv_inject_err;
4143 #endif
4144
4145 /*
4146 * nvlist 'errors' is always allocated. It will contain descriptions of
4147 * encountered errors, if any. It's the callers responsibility to free.
4148 */
4149 static int
4150 zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin,
4151 nvlist_t *props, boolean_t force, boolean_t resumable, int input_fd,
4152 dmu_replay_record_t *begin_record, int cleanup_fd, uint64_t *read_bytes,
4153 uint64_t *errflags, uint64_t *action_handle, nvlist_t **errors)
4154 {
4155 dmu_recv_cookie_t drc;
4156 int error = 0;
4157 int props_error = 0;
4158 offset_t off;
4159 nvlist_t *delayprops = NULL; /* sent properties applied post-receive */
4160 nvlist_t *origprops = NULL; /* existing properties */
4161 boolean_t first_recvd_props = B_FALSE;
4162 file_t *input_fp;
4163
4164 *read_bytes = 0;
4165 *errflags = 0;
4166 *errors = fnvlist_alloc();
4167
4168 input_fp = getf(input_fd);
4169 if (input_fp == NULL)
4170 return (SET_ERROR(EBADF));
4171
4172 error = dmu_recv_begin(tofs, tosnap,
4173 begin_record, force, resumable, origin, &drc);
4174 if (error != 0)
4175 goto out;
4176
4177 /*
4178 * Set properties before we receive the stream so that they are applied
4179 * to the new data. Note that we must call dmu_recv_stream() if
4180 * dmu_recv_begin() succeeds.
4181 */
4182 if (props != NULL && !drc.drc_newfs) {
4183 if (spa_version(dsl_dataset_get_spa(drc.drc_ds)) >=
4184 SPA_VERSION_RECVD_PROPS &&
4185 !dsl_prop_get_hasrecvd(tofs))
4186 first_recvd_props = B_TRUE;
4187
4188 /*
4189 * If new received properties are supplied, they are to
4190 * completely replace the existing received properties, so stash
4191 * away the existing ones.
4192 */
4193 if (dsl_prop_get_received(tofs, &origprops) == 0) {
4194 nvlist_t *errlist = NULL;
4195 /*
4196 * Don't bother writing a property if its value won't
4197 * change (and avoid the unnecessary security checks).
4198 *
4199 * The first receive after SPA_VERSION_RECVD_PROPS is a
4200 * special case where we blow away all local properties
4201 * regardless.
4202 */
4203 if (!first_recvd_props)
4204 props_reduce(props, origprops);
4205 if (zfs_check_clearable(tofs, origprops, &errlist) != 0)
4206 (void) nvlist_merge(*errors, errlist, 0);
4207 nvlist_free(errlist);
4208
4209 if (clear_received_props(tofs, origprops,
4210 first_recvd_props ? NULL : props) != 0)
4211 *errflags |= ZPROP_ERR_NOCLEAR;
4212 } else {
4213 *errflags |= ZPROP_ERR_NOCLEAR;
4214 }
4215 }
4216
4217 if (props != NULL) {
4218 props_error = dsl_prop_set_hasrecvd(tofs);
4219
4220 if (props_error == 0) {
4221 delayprops = extract_delay_props(props);
4222 (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
4223 props, *errors);
4224 }
4225 }
4226
4227 off = input_fp->f_offset;
4228 error = dmu_recv_stream(&drc, input_fp->f_vnode, &off, cleanup_fd,
4229 action_handle);
4230
4231 if (error == 0) {
4232 zfs_sb_t *zsb = NULL;
4233
4234 if (get_zfs_sb(tofs, &zsb) == 0) {
4235 /* online recv */
4236 int end_err;
4237
4238 error = zfs_suspend_fs(zsb);
4239 /*
4240 * If the suspend fails, then the recv_end will
4241 * likely also fail, and clean up after itself.
4242 */
4243 end_err = dmu_recv_end(&drc, zsb);
4244 if (error == 0)
4245 error = zfs_resume_fs(zsb, tofs);
4246 error = error ? error : end_err;
4247 deactivate_super(zsb->z_sb);
4248 } else {
4249 error = dmu_recv_end(&drc, NULL);
4250 }
4251
4252 /* Set delayed properties now, after we're done receiving. */
4253 if (delayprops != NULL && error == 0) {
4254 (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
4255 delayprops, *errors);
4256 }
4257 }
4258
4259 if (delayprops != NULL) {
4260 /*
4261 * Merge delayed props back in with initial props, in case
4262 * we're DEBUG and zfs_ioc_recv_inject_err is set (which means
4263 * we have to make sure clear_received_props() includes
4264 * the delayed properties).
4265 *
4266 * Since zfs_ioc_recv_inject_err is only in DEBUG kernels,
4267 * using ASSERT() will be just like a VERIFY.
4268 */
4269 ASSERT(nvlist_merge(props, delayprops, 0) == 0);
4270 nvlist_free(delayprops);
4271 }
4272
4273
4274 *read_bytes = off - input_fp->f_offset;
4275 if (VOP_SEEK(input_fp->f_vnode, input_fp->f_offset, &off, NULL) == 0)
4276 input_fp->f_offset = off;
4277
4278 #ifdef DEBUG
4279 if (zfs_ioc_recv_inject_err) {
4280 zfs_ioc_recv_inject_err = B_FALSE;
4281 error = 1;
4282 }
4283 #endif
4284
4285 /*
4286 * On error, restore the original props.
4287 */
4288 if (error != 0 && props != NULL && !drc.drc_newfs) {
4289 if (clear_received_props(tofs, props, NULL) != 0) {
4290 /*
4291 * We failed to clear the received properties.
4292 * Since we may have left a $recvd value on the
4293 * system, we can't clear the $hasrecvd flag.
4294 */
4295 *errflags |= ZPROP_ERR_NORESTORE;
4296 } else if (first_recvd_props) {
4297 dsl_prop_unset_hasrecvd(tofs);
4298 }
4299
4300 if (origprops == NULL && !drc.drc_newfs) {
4301 /* We failed to stash the original properties. */
4302 *errflags |= ZPROP_ERR_NORESTORE;
4303 }
4304
4305 /*
4306 * dsl_props_set() will not convert RECEIVED to LOCAL on or
4307 * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
4308 * explictly if we're restoring local properties cleared in the
4309 * first new-style receive.
4310 */
4311 if (origprops != NULL &&
4312 zfs_set_prop_nvlist(tofs, (first_recvd_props ?
4313 ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
4314 origprops, NULL) != 0) {
4315 /*
4316 * We stashed the original properties but failed to
4317 * restore them.
4318 */
4319 *errflags |= ZPROP_ERR_NORESTORE;
4320 }
4321 }
4322 out:
4323 releasef(input_fd);
4324 nvlist_free(origprops);
4325
4326 if (error == 0)
4327 error = props_error;
4328
4329 return (error);
4330 }
4331
4332 /*
4333 * inputs:
4334 * zc_name name of containing filesystem (unused)
4335 * zc_nvlist_src{_size} nvlist of properties to apply
4336 * zc_value name of snapshot to create
4337 * zc_string name of clone origin (if DRR_FLAG_CLONE)
4338 * zc_cookie file descriptor to recv from
4339 * zc_begin_record the BEGIN record of the stream (not byteswapped)
4340 * zc_guid force flag
4341 * zc_cleanup_fd cleanup-on-exit file descriptor
4342 * zc_action_handle handle for this guid/ds mapping (or zero on first call)
4343 *
4344 * outputs:
4345 * zc_cookie number of bytes read
4346 * zc_obj zprop_errflags_t
4347 * zc_action_handle handle for this guid/ds mapping
4348 * zc_nvlist_dst{_size} error for each unapplied received property
4349 */
4350 static int
4351 zfs_ioc_recv(zfs_cmd_t *zc)
4352 {
4353 dmu_replay_record_t begin_record;
4354 nvlist_t *errors = NULL;
4355 nvlist_t *props = NULL;
4356 char *origin = NULL;
4357 char *tosnap;
4358 char tofs[ZFS_MAX_DATASET_NAME_LEN];
4359 int error = 0;
4360
4361 if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
4362 strchr(zc->zc_value, '@') == NULL ||
4363 strchr(zc->zc_value, '%'))
4364 return (SET_ERROR(EINVAL));
4365
4366 (void) strlcpy(tofs, zc->zc_value, sizeof (tofs));
4367 tosnap = strchr(tofs, '@');
4368 *tosnap++ = '\0';
4369
4370 if (zc->zc_nvlist_src != 0 &&
4371 (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
4372 zc->zc_iflags, &props)) != 0)
4373 return (error);
4374
4375 if (zc->zc_string[0])
4376 origin = zc->zc_string;
4377
4378 begin_record.drr_type = DRR_BEGIN;
4379 begin_record.drr_payloadlen = 0;
4380 begin_record.drr_u.drr_begin = zc->zc_begin_record;
4381
4382 error = zfs_ioc_recv_impl(tofs, tosnap, origin, props, zc->zc_guid,
4383 B_FALSE, zc->zc_cookie, &begin_record, zc->zc_cleanup_fd,
4384 &zc->zc_cookie, &zc->zc_obj, &zc->zc_action_handle, &errors);
4385 nvlist_free(props);
4386
4387 /*
4388 * Now that all props, initial and delayed, are set, report the prop
4389 * errors to the caller.
4390 */
4391 if (zc->zc_nvlist_dst_size != 0 && errors != NULL &&
4392 (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
4393 put_nvlist(zc, errors) != 0)) {
4394 /*
4395 * Caller made zc->zc_nvlist_dst less than the minimum expected
4396 * size or supplied an invalid address.
4397 */
4398 error = SET_ERROR(EINVAL);
4399 }
4400
4401 nvlist_free(errors);
4402
4403 return (error);
4404 }
4405
4406 /*
4407 * innvl: {
4408 * "snapname" -> full name of the snapshot to create
4409 * (optional) "props" -> properties to set (nvlist)
4410 * (optional) "origin" -> name of clone origin (DRR_FLAG_CLONE)
4411 * "begin_record" -> non-byteswapped dmu_replay_record_t
4412 * "input_fd" -> file descriptor to read stream from (int32)
4413 * (optional) "force" -> force flag (value ignored)
4414 * (optional) "resumable" -> resumable flag (value ignored)
4415 * (optional) "cleanup_fd" -> cleanup-on-exit file descriptor
4416 * (optional) "action_handle" -> handle for this guid/ds mapping
4417 * }
4418 *
4419 * outnvl: {
4420 * "read_bytes" -> number of bytes read
4421 * "error_flags" -> zprop_errflags_t
4422 * "action_handle" -> handle for this guid/ds mapping
4423 * "errors" -> error for each unapplied received property (nvlist)
4424 * }
4425 */
4426 static int
4427 zfs_ioc_recv_new(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
4428 {
4429 dmu_replay_record_t *begin_record;
4430 uint_t begin_record_size;
4431 nvlist_t *errors = NULL;
4432 nvlist_t *props = NULL;
4433 char *snapname = NULL;
4434 char *origin = NULL;
4435 char *tosnap;
4436 char tofs[ZFS_MAX_DATASET_NAME_LEN];
4437 boolean_t force;
4438 boolean_t resumable;
4439 uint64_t action_handle = 0;
4440 uint64_t read_bytes = 0;
4441 uint64_t errflags = 0;
4442 int input_fd = -1;
4443 int cleanup_fd = -1;
4444 int error;
4445
4446 error = nvlist_lookup_string(innvl, "snapname", &snapname);
4447 if (error != 0)
4448 return (SET_ERROR(EINVAL));
4449
4450 if (dataset_namecheck(snapname, NULL, NULL) != 0 ||
4451 strchr(snapname, '@') == NULL ||
4452 strchr(snapname, '%'))
4453 return (SET_ERROR(EINVAL));
4454
4455 (void) strcpy(tofs, snapname);
4456 tosnap = strchr(tofs, '@');
4457 *tosnap++ = '\0';
4458
4459 error = nvlist_lookup_string(innvl, "origin", &origin);
4460 if (error && error != ENOENT)
4461 return (error);
4462
4463 error = nvlist_lookup_byte_array(innvl, "begin_record",
4464 (uchar_t **) &begin_record, &begin_record_size);
4465 if (error != 0 || begin_record_size != sizeof (*begin_record))
4466 return (SET_ERROR(EINVAL));
4467
4468 error = nvlist_lookup_int32(innvl, "input_fd", &input_fd);
4469 if (error != 0)
4470 return (SET_ERROR(EINVAL));
4471
4472 force = nvlist_exists(innvl, "force");
4473 resumable = nvlist_exists(innvl, "resumable");
4474
4475 error = nvlist_lookup_int32(innvl, "cleanup_fd", &cleanup_fd);
4476 if (error && error != ENOENT)
4477 return (error);
4478
4479 error = nvlist_lookup_uint64(innvl, "action_handle", &action_handle);
4480 if (error && error != ENOENT)
4481 return (error);
4482
4483 error = nvlist_lookup_nvlist(innvl, "props", &props);
4484 if (error && error != ENOENT)
4485 return (error);
4486
4487 error = zfs_ioc_recv_impl(tofs, tosnap, origin, props, force,
4488 resumable, input_fd, begin_record, cleanup_fd, &read_bytes,
4489 &errflags, &action_handle, &errors);
4490
4491 fnvlist_add_uint64(outnvl, "read_bytes", read_bytes);
4492 fnvlist_add_uint64(outnvl, "error_flags", errflags);
4493 fnvlist_add_uint64(outnvl, "action_handle", action_handle);
4494 fnvlist_add_nvlist(outnvl, "errors", errors);
4495
4496 nvlist_free(errors);
4497 nvlist_free(props);
4498
4499 return (error);
4500 }
4501
4502 /*
4503 * inputs:
4504 * zc_name name of snapshot to send
4505 * zc_cookie file descriptor to send stream to
4506 * zc_obj fromorigin flag (mutually exclusive with zc_fromobj)
4507 * zc_sendobj objsetid of snapshot to send
4508 * zc_fromobj objsetid of incremental fromsnap (may be zero)
4509 * zc_guid if set, estimate size of stream only. zc_cookie is ignored.
4510 * output size in zc_objset_type.
4511 * zc_flags lzc_send_flags
4512 *
4513 * outputs:
4514 * zc_objset_type estimated size, if zc_guid is set
4515 */
4516 static int
4517 zfs_ioc_send(zfs_cmd_t *zc)
4518 {
4519 int error;
4520 offset_t off;
4521 boolean_t estimate = (zc->zc_guid != 0);
4522 boolean_t embedok = (zc->zc_flags & 0x1);
4523 boolean_t large_block_ok = (zc->zc_flags & 0x2);
4524 boolean_t compressok = (zc->zc_flags & 0x4);
4525
4526 if (zc->zc_obj != 0) {
4527 dsl_pool_t *dp;
4528 dsl_dataset_t *tosnap;
4529
4530 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
4531 if (error != 0)
4532 return (error);
4533
4534 error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
4535 if (error != 0) {
4536 dsl_pool_rele(dp, FTAG);
4537 return (error);
4538 }
4539
4540 if (dsl_dir_is_clone(tosnap->ds_dir))
4541 zc->zc_fromobj =
4542 dsl_dir_phys(tosnap->ds_dir)->dd_origin_obj;
4543 dsl_dataset_rele(tosnap, FTAG);
4544 dsl_pool_rele(dp, FTAG);
4545 }
4546
4547 if (estimate) {
4548 dsl_pool_t *dp;
4549 dsl_dataset_t *tosnap;
4550 dsl_dataset_t *fromsnap = NULL;
4551
4552 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
4553 if (error != 0)
4554 return (error);
4555
4556 error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
4557 if (error != 0) {
4558 dsl_pool_rele(dp, FTAG);
4559 return (error);
4560 }
4561
4562 if (zc->zc_fromobj != 0) {
4563 error = dsl_dataset_hold_obj(dp, zc->zc_fromobj,
4564 FTAG, &fromsnap);
4565 if (error != 0) {
4566 dsl_dataset_rele(tosnap, FTAG);
4567 dsl_pool_rele(dp, FTAG);
4568 return (error);
4569 }
4570 }
4571
4572 error = dmu_send_estimate(tosnap, fromsnap, compressok,
4573 &zc->zc_objset_type);
4574
4575 if (fromsnap != NULL)
4576 dsl_dataset_rele(fromsnap, FTAG);
4577 dsl_dataset_rele(tosnap, FTAG);
4578 dsl_pool_rele(dp, FTAG);
4579 } else {
4580 file_t *fp = getf(zc->zc_cookie);
4581 if (fp == NULL)
4582 return (SET_ERROR(EBADF));
4583
4584 off = fp->f_offset;
4585 error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
4586 zc->zc_fromobj, embedok, large_block_ok, compressok,
4587 zc->zc_cookie, fp->f_vnode, &off);
4588
4589 if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
4590 fp->f_offset = off;
4591 releasef(zc->zc_cookie);
4592 }
4593 return (error);
4594 }
4595
4596 /*
4597 * inputs:
4598 * zc_name name of snapshot on which to report progress
4599 * zc_cookie file descriptor of send stream
4600 *
4601 * outputs:
4602 * zc_cookie number of bytes written in send stream thus far
4603 */
4604 static int
4605 zfs_ioc_send_progress(zfs_cmd_t *zc)
4606 {
4607 dsl_pool_t *dp;
4608 dsl_dataset_t *ds;
4609 dmu_sendarg_t *dsp = NULL;
4610 int error;
4611
4612 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
4613 if (error != 0)
4614 return (error);
4615
4616 error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
4617 if (error != 0) {
4618 dsl_pool_rele(dp, FTAG);
4619 return (error);
4620 }
4621
4622 mutex_enter(&ds->ds_sendstream_lock);
4623
4624 /*
4625 * Iterate over all the send streams currently active on this dataset.
4626 * If there's one which matches the specified file descriptor _and_ the
4627 * stream was started by the current process, return the progress of
4628 * that stream.
4629 */
4630
4631 for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
4632 dsp = list_next(&ds->ds_sendstreams, dsp)) {
4633 if (dsp->dsa_outfd == zc->zc_cookie &&
4634 dsp->dsa_proc->group_leader == curproc->group_leader)
4635 break;
4636 }
4637
4638 if (dsp != NULL)
4639 zc->zc_cookie = *(dsp->dsa_off);
4640 else
4641 error = SET_ERROR(ENOENT);
4642
4643 mutex_exit(&ds->ds_sendstream_lock);
4644 dsl_dataset_rele(ds, FTAG);
4645 dsl_pool_rele(dp, FTAG);
4646 return (error);
4647 }
4648
4649 static int
4650 zfs_ioc_inject_fault(zfs_cmd_t *zc)
4651 {
4652 int id, error;
4653
4654 error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
4655 &zc->zc_inject_record);
4656
4657 if (error == 0)
4658 zc->zc_guid = (uint64_t)id;
4659
4660 return (error);
4661 }
4662
4663 static int
4664 zfs_ioc_clear_fault(zfs_cmd_t *zc)
4665 {
4666 return (zio_clear_fault((int)zc->zc_guid));
4667 }
4668
4669 static int
4670 zfs_ioc_inject_list_next(zfs_cmd_t *zc)
4671 {
4672 int id = (int)zc->zc_guid;
4673 int error;
4674
4675 error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
4676 &zc->zc_inject_record);
4677
4678 zc->zc_guid = id;
4679
4680 return (error);
4681 }
4682
4683 static int
4684 zfs_ioc_error_log(zfs_cmd_t *zc)
4685 {
4686 spa_t *spa;
4687 int error;
4688 size_t count = (size_t)zc->zc_nvlist_dst_size;
4689
4690 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
4691 return (error);
4692
4693 error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
4694 &count);
4695 if (error == 0)
4696 zc->zc_nvlist_dst_size = count;
4697 else
4698 zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
4699
4700 spa_close(spa, FTAG);
4701
4702 return (error);
4703 }
4704
4705 static int
4706 zfs_ioc_clear(zfs_cmd_t *zc)
4707 {
4708 spa_t *spa;
4709 vdev_t *vd;
4710 int error;
4711
4712 /*
4713 * On zpool clear we also fix up missing slogs
4714 */
4715 mutex_enter(&spa_namespace_lock);
4716 spa = spa_lookup(zc->zc_name);
4717 if (spa == NULL) {
4718 mutex_exit(&spa_namespace_lock);
4719 return (SET_ERROR(EIO));
4720 }
4721 if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
4722 /* we need to let spa_open/spa_load clear the chains */
4723 spa_set_log_state(spa, SPA_LOG_CLEAR);
4724 }
4725 spa->spa_last_open_failed = 0;
4726 mutex_exit(&spa_namespace_lock);
4727
4728 if (zc->zc_cookie & ZPOOL_NO_REWIND) {
4729 error = spa_open(zc->zc_name, &spa, FTAG);
4730 } else {
4731 nvlist_t *policy;
4732 nvlist_t *config = NULL;
4733
4734 if (zc->zc_nvlist_src == 0)
4735 return (SET_ERROR(EINVAL));
4736
4737 if ((error = get_nvlist(zc->zc_nvlist_src,
4738 zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
4739 error = spa_open_rewind(zc->zc_name, &spa, FTAG,
4740 policy, &config);
4741 if (config != NULL) {
4742 int err;
4743
4744 if ((err = put_nvlist(zc, config)) != 0)
4745 error = err;
4746 nvlist_free(config);
4747 }
4748 nvlist_free(policy);
4749 }
4750 }
4751
4752 if (error != 0)
4753 return (error);
4754
4755 spa_vdev_state_enter(spa, SCL_NONE);
4756
4757 if (zc->zc_guid == 0) {
4758 vd = NULL;
4759 } else {
4760 vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
4761 if (vd == NULL) {
4762 (void) spa_vdev_state_exit(spa, NULL, ENODEV);
4763 spa_close(spa, FTAG);
4764 return (SET_ERROR(ENODEV));
4765 }
4766 }
4767
4768 vdev_clear(spa, vd);
4769
4770 (void) spa_vdev_state_exit(spa, NULL, 0);
4771
4772 /*
4773 * Resume any suspended I/Os.
4774 */
4775 if (zio_resume(spa) != 0)
4776 error = SET_ERROR(EIO);
4777
4778 spa_close(spa, FTAG);
4779
4780 return (error);
4781 }
4782
4783 static int
4784 zfs_ioc_pool_reopen(zfs_cmd_t *zc)
4785 {
4786 spa_t *spa;
4787 int error;
4788
4789 error = spa_open(zc->zc_name, &spa, FTAG);
4790 if (error != 0)
4791 return (error);
4792
4793 spa_vdev_state_enter(spa, SCL_NONE);
4794
4795 /*
4796 * If a resilver is already in progress then set the
4797 * spa_scrub_reopen flag to B_TRUE so that we don't restart
4798 * the scan as a side effect of the reopen. Otherwise, let
4799 * vdev_open() decided if a resilver is required.
4800 */
4801 spa->spa_scrub_reopen = dsl_scan_resilvering(spa->spa_dsl_pool);
4802 vdev_reopen(spa->spa_root_vdev);
4803 spa->spa_scrub_reopen = B_FALSE;
4804
4805 (void) spa_vdev_state_exit(spa, NULL, 0);
4806 spa_close(spa, FTAG);
4807 return (0);
4808 }
4809 /*
4810 * inputs:
4811 * zc_name name of filesystem
4812 * zc_value name of origin snapshot
4813 *
4814 * outputs:
4815 * zc_string name of conflicting snapshot, if there is one
4816 */
4817 static int
4818 zfs_ioc_promote(zfs_cmd_t *zc)
4819 {
4820 char *cp;
4821
4822 /*
4823 * We don't need to unmount *all* the origin fs's snapshots, but
4824 * it's easier.
4825 */
4826 cp = strchr(zc->zc_value, '@');
4827 if (cp)
4828 *cp = '\0';
4829 (void) dmu_objset_find(zc->zc_value,
4830 zfs_unmount_snap_cb, NULL, DS_FIND_SNAPSHOTS);
4831 return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
4832 }
4833
4834 /*
4835 * Retrieve a single {user|group}{used|quota}@... property.
4836 *
4837 * inputs:
4838 * zc_name name of filesystem
4839 * zc_objset_type zfs_userquota_prop_t
4840 * zc_value domain name (eg. "S-1-234-567-89")
4841 * zc_guid RID/UID/GID
4842 *
4843 * outputs:
4844 * zc_cookie property value
4845 */
4846 static int
4847 zfs_ioc_userspace_one(zfs_cmd_t *zc)
4848 {
4849 zfs_sb_t *zsb;
4850 int error;
4851
4852 if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
4853 return (SET_ERROR(EINVAL));
4854
4855 error = zfs_sb_hold(zc->zc_name, FTAG, &zsb, B_FALSE);
4856 if (error != 0)
4857 return (error);
4858
4859 error = zfs_userspace_one(zsb,
4860 zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
4861 zfs_sb_rele(zsb, FTAG);
4862
4863 return (error);
4864 }
4865
4866 /*
4867 * inputs:
4868 * zc_name name of filesystem
4869 * zc_cookie zap cursor
4870 * zc_objset_type zfs_userquota_prop_t
4871 * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
4872 *
4873 * outputs:
4874 * zc_nvlist_dst[_size] data buffer (array of zfs_useracct_t)
4875 * zc_cookie zap cursor
4876 */
4877 static int
4878 zfs_ioc_userspace_many(zfs_cmd_t *zc)
4879 {
4880 zfs_sb_t *zsb;
4881 int bufsize = zc->zc_nvlist_dst_size;
4882 int error;
4883 void *buf;
4884
4885 if (bufsize <= 0)
4886 return (SET_ERROR(ENOMEM));
4887
4888 error = zfs_sb_hold(zc->zc_name, FTAG, &zsb, B_FALSE);
4889 if (error != 0)
4890 return (error);
4891
4892 buf = vmem_alloc(bufsize, KM_SLEEP);
4893
4894 error = zfs_userspace_many(zsb, zc->zc_objset_type, &zc->zc_cookie,
4895 buf, &zc->zc_nvlist_dst_size);
4896
4897 if (error == 0) {
4898 error = xcopyout(buf,
4899 (void *)(uintptr_t)zc->zc_nvlist_dst,
4900 zc->zc_nvlist_dst_size);
4901 }
4902 vmem_free(buf, bufsize);
4903 zfs_sb_rele(zsb, FTAG);
4904
4905 return (error);
4906 }
4907
4908 /*
4909 * inputs:
4910 * zc_name name of filesystem
4911 *
4912 * outputs:
4913 * none
4914 */
4915 static int
4916 zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
4917 {
4918 objset_t *os;
4919 int error = 0;
4920 zfs_sb_t *zsb;
4921
4922 if (get_zfs_sb(zc->zc_name, &zsb) == 0) {
4923 if (!dmu_objset_userused_enabled(zsb->z_os)) {
4924 /*
4925 * If userused is not enabled, it may be because the
4926 * objset needs to be closed & reopened (to grow the
4927 * objset_phys_t). Suspend/resume the fs will do that.
4928 */
4929 error = zfs_suspend_fs(zsb);
4930 if (error == 0) {
4931 dmu_objset_refresh_ownership(zsb->z_os,
4932 zsb);
4933 error = zfs_resume_fs(zsb, zc->zc_name);
4934 }
4935 }
4936 if (error == 0)
4937 error = dmu_objset_userspace_upgrade(zsb->z_os);
4938 deactivate_super(zsb->z_sb);
4939 } else {
4940 /* XXX kind of reading contents without owning */
4941 error = dmu_objset_hold(zc->zc_name, FTAG, &os);
4942 if (error != 0)
4943 return (error);
4944
4945 error = dmu_objset_userspace_upgrade(os);
4946 dmu_objset_rele(os, FTAG);
4947 }
4948
4949 return (error);
4950 }
4951
4952 /*
4953 * inputs:
4954 * zc_name name of filesystem
4955 *
4956 * outputs:
4957 * none
4958 */
4959 static int
4960 zfs_ioc_userobjspace_upgrade(zfs_cmd_t *zc)
4961 {
4962 objset_t *os;
4963 int error;
4964
4965 error = dmu_objset_hold(zc->zc_name, FTAG, &os);
4966 if (error != 0)
4967 return (error);
4968
4969 dsl_dataset_long_hold(dmu_objset_ds(os), FTAG);
4970 dsl_pool_rele(dmu_objset_pool(os), FTAG);
4971
4972 if (dmu_objset_userobjspace_upgradable(os)) {
4973 mutex_enter(&os->os_upgrade_lock);
4974 if (os->os_upgrade_id == 0) {
4975 /* clear potential error code and retry */
4976 os->os_upgrade_status = 0;
4977 mutex_exit(&os->os_upgrade_lock);
4978
4979 dmu_objset_userobjspace_upgrade(os);
4980 } else {
4981 mutex_exit(&os->os_upgrade_lock);
4982 }
4983
4984 taskq_wait_id(os->os_spa->spa_upgrade_taskq, os->os_upgrade_id);
4985 error = os->os_upgrade_status;
4986 }
4987
4988 dsl_dataset_long_rele(dmu_objset_ds(os), FTAG);
4989 dsl_dataset_rele(dmu_objset_ds(os), FTAG);
4990
4991 return (error);
4992 }
4993
4994 static int
4995 zfs_ioc_share(zfs_cmd_t *zc)
4996 {
4997 return (SET_ERROR(ENOSYS));
4998 }
4999
5000 ace_t full_access[] = {
5001 {(uid_t)-1, ACE_ALL_PERMS, ACE_EVERYONE, 0}
5002 };
5003
5004 /*
5005 * inputs:
5006 * zc_name name of containing filesystem
5007 * zc_obj object # beyond which we want next in-use object #
5008 *
5009 * outputs:
5010 * zc_obj next in-use object #
5011 */
5012 static int
5013 zfs_ioc_next_obj(zfs_cmd_t *zc)
5014 {
5015 objset_t *os = NULL;
5016 int error;
5017
5018 error = dmu_objset_hold(zc->zc_name, FTAG, &os);
5019 if (error != 0)
5020 return (error);
5021
5022 error = dmu_object_next(os, &zc->zc_obj, B_FALSE, 0);
5023
5024 dmu_objset_rele(os, FTAG);
5025 return (error);
5026 }
5027
5028 /*
5029 * inputs:
5030 * zc_name name of filesystem
5031 * zc_value prefix name for snapshot
5032 * zc_cleanup_fd cleanup-on-exit file descriptor for calling process
5033 *
5034 * outputs:
5035 * zc_value short name of new snapshot
5036 */
5037 static int
5038 zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
5039 {
5040 char *snap_name;
5041 char *hold_name;
5042 int error;
5043 minor_t minor;
5044
5045 error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
5046 if (error != 0)
5047 return (error);
5048
5049 snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
5050 (u_longlong_t)ddi_get_lbolt64());
5051 hold_name = kmem_asprintf("%%%s", zc->zc_value);
5052
5053 error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor,
5054 hold_name);
5055 if (error == 0)
5056 (void) strlcpy(zc->zc_value, snap_name,
5057 sizeof (zc->zc_value));
5058 strfree(snap_name);
5059 strfree(hold_name);
5060 zfs_onexit_fd_rele(zc->zc_cleanup_fd);
5061 return (error);
5062 }
5063
5064 /*
5065 * inputs:
5066 * zc_name name of "to" snapshot
5067 * zc_value name of "from" snapshot
5068 * zc_cookie file descriptor to write diff data on
5069 *
5070 * outputs:
5071 * dmu_diff_record_t's to the file descriptor
5072 */
5073 static int
5074 zfs_ioc_diff(zfs_cmd_t *zc)
5075 {
5076 file_t *fp;
5077 offset_t off;
5078 int error;
5079
5080 fp = getf(zc->zc_cookie);
5081 if (fp == NULL)
5082 return (SET_ERROR(EBADF));
5083
5084 off = fp->f_offset;
5085
5086 error = dmu_diff(zc->zc_name, zc->zc_value, fp->f_vnode, &off);
5087
5088 if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
5089 fp->f_offset = off;
5090 releasef(zc->zc_cookie);
5091
5092 return (error);
5093 }
5094
5095 /*
5096 * Remove all ACL files in shares dir
5097 */
5098 #ifdef HAVE_SMB_SHARE
5099 static int
5100 zfs_smb_acl_purge(znode_t *dzp)
5101 {
5102 zap_cursor_t zc;
5103 zap_attribute_t zap;
5104 zfs_sb_t *zsb = ZTOZSB(dzp);
5105 int error;
5106
5107 for (zap_cursor_init(&zc, zsb->z_os, dzp->z_id);
5108 (error = zap_cursor_retrieve(&zc, &zap)) == 0;
5109 zap_cursor_advance(&zc)) {
5110 if ((error = VOP_REMOVE(ZTOV(dzp), zap.za_name, kcred,
5111 NULL, 0)) != 0)
5112 break;
5113 }
5114 zap_cursor_fini(&zc);
5115 return (error);
5116 }
5117 #endif /* HAVE_SMB_SHARE */
5118
5119 static int
5120 zfs_ioc_smb_acl(zfs_cmd_t *zc)
5121 {
5122 #ifdef HAVE_SMB_SHARE
5123 vnode_t *vp;
5124 znode_t *dzp;
5125 vnode_t *resourcevp = NULL;
5126 znode_t *sharedir;
5127 zfs_sb_t *zsb;
5128 nvlist_t *nvlist;
5129 char *src, *target;
5130 vattr_t vattr;
5131 vsecattr_t vsec;
5132 int error = 0;
5133
5134 if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
5135 NO_FOLLOW, NULL, &vp)) != 0)
5136 return (error);
5137
5138 /* Now make sure mntpnt and dataset are ZFS */
5139
5140 if (vp->v_vfsp->vfs_fstype != zfsfstype ||
5141 (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
5142 zc->zc_name) != 0)) {
5143 VN_RELE(vp);
5144 return (SET_ERROR(EINVAL));
5145 }
5146
5147 dzp = VTOZ(vp);
5148 zsb = ZTOZSB(dzp);
5149 ZFS_ENTER(zsb);
5150
5151 /*
5152 * Create share dir if its missing.
5153 */
5154 mutex_enter(&zsb->z_lock);
5155 if (zsb->z_shares_dir == 0) {
5156 dmu_tx_t *tx;
5157
5158 tx = dmu_tx_create(zsb->z_os);
5159 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE,
5160 ZFS_SHARES_DIR);
5161 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
5162 error = dmu_tx_assign(tx, TXG_WAIT);
5163 if (error != 0) {
5164 dmu_tx_abort(tx);
5165 } else {
5166 error = zfs_create_share_dir(zsb, tx);
5167 dmu_tx_commit(tx);
5168 }
5169 if (error != 0) {
5170 mutex_exit(&zsb->z_lock);
5171 VN_RELE(vp);
5172 ZFS_EXIT(zsb);
5173 return (error);
5174 }
5175 }
5176 mutex_exit(&zsb->z_lock);
5177
5178 ASSERT(zsb->z_shares_dir);
5179 if ((error = zfs_zget(zsb, zsb->z_shares_dir, &sharedir)) != 0) {
5180 VN_RELE(vp);
5181 ZFS_EXIT(zsb);
5182 return (error);
5183 }
5184
5185 switch (zc->zc_cookie) {
5186 case ZFS_SMB_ACL_ADD:
5187 vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
5188 vattr.va_mode = S_IFREG|0777;
5189 vattr.va_uid = 0;
5190 vattr.va_gid = 0;
5191
5192 vsec.vsa_mask = VSA_ACE;
5193 vsec.vsa_aclentp = &full_access;
5194 vsec.vsa_aclentsz = sizeof (full_access);
5195 vsec.vsa_aclcnt = 1;
5196
5197 error = VOP_CREATE(ZTOV(sharedir), zc->zc_string,
5198 &vattr, EXCL, 0, &resourcevp, kcred, 0, NULL, &vsec);
5199 if (resourcevp)
5200 VN_RELE(resourcevp);
5201 break;
5202
5203 case ZFS_SMB_ACL_REMOVE:
5204 error = VOP_REMOVE(ZTOV(sharedir), zc->zc_string, kcred,
5205 NULL, 0);
5206 break;
5207
5208 case ZFS_SMB_ACL_RENAME:
5209 if ((error = get_nvlist(zc->zc_nvlist_src,
5210 zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) {
5211 VN_RELE(vp);
5212 VN_RELE(ZTOV(sharedir));
5213 ZFS_EXIT(zsb);
5214 return (error);
5215 }
5216 if (nvlist_lookup_string(nvlist, ZFS_SMB_ACL_SRC, &src) ||
5217 nvlist_lookup_string(nvlist, ZFS_SMB_ACL_TARGET,
5218 &target)) {
5219 VN_RELE(vp);
5220 VN_RELE(ZTOV(sharedir));
5221 ZFS_EXIT(zsb);
5222 nvlist_free(nvlist);
5223 return (error);
5224 }
5225 error = VOP_RENAME(ZTOV(sharedir), src, ZTOV(sharedir), target,
5226 kcred, NULL, 0);
5227 nvlist_free(nvlist);
5228 break;
5229
5230 case ZFS_SMB_ACL_PURGE:
5231 error = zfs_smb_acl_purge(sharedir);
5232 break;
5233
5234 default:
5235 error = SET_ERROR(EINVAL);
5236 break;
5237 }
5238
5239 VN_RELE(vp);
5240 VN_RELE(ZTOV(sharedir));
5241
5242 ZFS_EXIT(zsb);
5243
5244 return (error);
5245 #else
5246 return (SET_ERROR(ENOTSUP));
5247 #endif /* HAVE_SMB_SHARE */
5248 }
5249
5250 /*
5251 * innvl: {
5252 * "holds" -> { snapname -> holdname (string), ... }
5253 * (optional) "cleanup_fd" -> fd (int32)
5254 * }
5255 *
5256 * outnvl: {
5257 * snapname -> error value (int32)
5258 * ...
5259 * }
5260 */
5261 /* ARGSUSED */
5262 static int
5263 zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
5264 {
5265 nvpair_t *pair;
5266 nvlist_t *holds;
5267 int cleanup_fd = -1;
5268 int error;
5269 minor_t minor = 0;
5270
5271 error = nvlist_lookup_nvlist(args, "holds", &holds);
5272 if (error != 0)
5273 return (SET_ERROR(EINVAL));
5274
5275 /* make sure the user didn't pass us any invalid (empty) tags */
5276 for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
5277 pair = nvlist_next_nvpair(holds, pair)) {
5278 char *htag;
5279
5280 error = nvpair_value_string(pair, &htag);
5281 if (error != 0)
5282 return (SET_ERROR(error));
5283
5284 if (strlen(htag) == 0)
5285 return (SET_ERROR(EINVAL));
5286 }
5287
5288 if (nvlist_lookup_int32(args, "cleanup_fd", &cleanup_fd) == 0) {
5289 error = zfs_onexit_fd_hold(cleanup_fd, &minor);
5290 if (error != 0)
5291 return (error);
5292 }
5293
5294 error = dsl_dataset_user_hold(holds, minor, errlist);
5295 if (minor != 0)
5296 zfs_onexit_fd_rele(cleanup_fd);
5297 return (error);
5298 }
5299
5300 /*
5301 * innvl is not used.
5302 *
5303 * outnvl: {
5304 * holdname -> time added (uint64 seconds since epoch)
5305 * ...
5306 * }
5307 */
5308 /* ARGSUSED */
5309 static int
5310 zfs_ioc_get_holds(const char *snapname, nvlist_t *args, nvlist_t *outnvl)
5311 {
5312 return (dsl_dataset_get_holds(snapname, outnvl));
5313 }
5314
5315 /*
5316 * innvl: {
5317 * snapname -> { holdname, ... }
5318 * ...
5319 * }
5320 *
5321 * outnvl: {
5322 * snapname -> error value (int32)
5323 * ...
5324 * }
5325 */
5326 /* ARGSUSED */
5327 static int
5328 zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist)
5329 {
5330 return (dsl_dataset_user_release(holds, errlist));
5331 }
5332
5333 /*
5334 * inputs:
5335 * zc_guid flags (ZEVENT_NONBLOCK)
5336 * zc_cleanup_fd zevent file descriptor
5337 *
5338 * outputs:
5339 * zc_nvlist_dst next nvlist event
5340 * zc_cookie dropped events since last get
5341 */
5342 static int
5343 zfs_ioc_events_next(zfs_cmd_t *zc)
5344 {
5345 zfs_zevent_t *ze;
5346 nvlist_t *event = NULL;
5347 minor_t minor;
5348 uint64_t dropped = 0;
5349 int error;
5350
5351 error = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
5352 if (error != 0)
5353 return (error);
5354
5355 do {
5356 error = zfs_zevent_next(ze, &event,
5357 &zc->zc_nvlist_dst_size, &dropped);
5358 if (event != NULL) {
5359 zc->zc_cookie = dropped;
5360 error = put_nvlist(zc, event);
5361 nvlist_free(event);
5362 }
5363
5364 if (zc->zc_guid & ZEVENT_NONBLOCK)
5365 break;
5366
5367 if ((error == 0) || (error != ENOENT))
5368 break;
5369
5370 error = zfs_zevent_wait(ze);
5371 if (error != 0)
5372 break;
5373 } while (1);
5374
5375 zfs_zevent_fd_rele(zc->zc_cleanup_fd);
5376
5377 return (error);
5378 }
5379
5380 /*
5381 * outputs:
5382 * zc_cookie cleared events count
5383 */
5384 static int
5385 zfs_ioc_events_clear(zfs_cmd_t *zc)
5386 {
5387 int count;
5388
5389 zfs_zevent_drain_all(&count);
5390 zc->zc_cookie = count;
5391
5392 return (0);
5393 }
5394
5395 /*
5396 * inputs:
5397 * zc_guid eid | ZEVENT_SEEK_START | ZEVENT_SEEK_END
5398 * zc_cleanup zevent file descriptor
5399 */
5400 static int
5401 zfs_ioc_events_seek(zfs_cmd_t *zc)
5402 {
5403 zfs_zevent_t *ze;
5404 minor_t minor;
5405 int error;
5406
5407 error = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
5408 if (error != 0)
5409 return (error);
5410
5411 error = zfs_zevent_seek(ze, zc->zc_guid);
5412 zfs_zevent_fd_rele(zc->zc_cleanup_fd);
5413
5414 return (error);
5415 }
5416
5417 /*
5418 * inputs:
5419 * zc_name name of new filesystem or snapshot
5420 * zc_value full name of old snapshot
5421 *
5422 * outputs:
5423 * zc_cookie space in bytes
5424 * zc_objset_type compressed space in bytes
5425 * zc_perm_action uncompressed space in bytes
5426 */
5427 static int
5428 zfs_ioc_space_written(zfs_cmd_t *zc)
5429 {
5430 int error;
5431 dsl_pool_t *dp;
5432 dsl_dataset_t *new, *old;
5433
5434 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5435 if (error != 0)
5436 return (error);
5437 error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &new);
5438 if (error != 0) {
5439 dsl_pool_rele(dp, FTAG);
5440 return (error);
5441 }
5442 error = dsl_dataset_hold(dp, zc->zc_value, FTAG, &old);
5443 if (error != 0) {
5444 dsl_dataset_rele(new, FTAG);
5445 dsl_pool_rele(dp, FTAG);
5446 return (error);
5447 }
5448
5449 error = dsl_dataset_space_written(old, new, &zc->zc_cookie,
5450 &zc->zc_objset_type, &zc->zc_perm_action);
5451 dsl_dataset_rele(old, FTAG);
5452 dsl_dataset_rele(new, FTAG);
5453 dsl_pool_rele(dp, FTAG);
5454 return (error);
5455 }
5456
5457 /*
5458 * innvl: {
5459 * "firstsnap" -> snapshot name
5460 * }
5461 *
5462 * outnvl: {
5463 * "used" -> space in bytes
5464 * "compressed" -> compressed space in bytes
5465 * "uncompressed" -> uncompressed space in bytes
5466 * }
5467 */
5468 static int
5469 zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
5470 {
5471 int error;
5472 dsl_pool_t *dp;
5473 dsl_dataset_t *new, *old;
5474 char *firstsnap;
5475 uint64_t used, comp, uncomp;
5476
5477 if (nvlist_lookup_string(innvl, "firstsnap", &firstsnap) != 0)
5478 return (SET_ERROR(EINVAL));
5479
5480 error = dsl_pool_hold(lastsnap, FTAG, &dp);
5481 if (error != 0)
5482 return (error);
5483
5484 error = dsl_dataset_hold(dp, lastsnap, FTAG, &new);
5485 if (error == 0 && !new->ds_is_snapshot) {
5486 dsl_dataset_rele(new, FTAG);
5487 error = SET_ERROR(EINVAL);
5488 }
5489 if (error != 0) {
5490 dsl_pool_rele(dp, FTAG);
5491 return (error);
5492 }
5493 error = dsl_dataset_hold(dp, firstsnap, FTAG, &old);
5494 if (error == 0 && !old->ds_is_snapshot) {
5495 dsl_dataset_rele(old, FTAG);
5496 error = SET_ERROR(EINVAL);
5497 }
5498 if (error != 0) {
5499 dsl_dataset_rele(new, FTAG);
5500 dsl_pool_rele(dp, FTAG);
5501 return (error);
5502 }
5503
5504 error = dsl_dataset_space_wouldfree(old, new, &used, &comp, &uncomp);
5505 dsl_dataset_rele(old, FTAG);
5506 dsl_dataset_rele(new, FTAG);
5507 dsl_pool_rele(dp, FTAG);
5508 fnvlist_add_uint64(outnvl, "used", used);
5509 fnvlist_add_uint64(outnvl, "compressed", comp);
5510 fnvlist_add_uint64(outnvl, "uncompressed", uncomp);
5511 return (error);
5512 }
5513
5514 /*
5515 * innvl: {
5516 * "fd" -> file descriptor to write stream to (int32)
5517 * (optional) "fromsnap" -> full snap name to send an incremental from
5518 * (optional) "largeblockok" -> (value ignored)
5519 * indicates that blocks > 128KB are permitted
5520 * (optional) "embedok" -> (value ignored)
5521 * presence indicates DRR_WRITE_EMBEDDED records are permitted
5522 * (optional) "compressok" -> (value ignored)
5523 * presence indicates compressed DRR_WRITE records are permitted
5524 * (optional) "resume_object" and "resume_offset" -> (uint64)
5525 * if present, resume send stream from specified object and offset.
5526 * }
5527 *
5528 * outnvl is unused
5529 */
5530 /* ARGSUSED */
5531 static int
5532 zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
5533 {
5534 int error;
5535 offset_t off;
5536 char *fromname = NULL;
5537 int fd;
5538 file_t *fp;
5539 boolean_t largeblockok;
5540 boolean_t embedok;
5541 boolean_t compressok;
5542 uint64_t resumeobj = 0;
5543 uint64_t resumeoff = 0;
5544
5545 error = nvlist_lookup_int32(innvl, "fd", &fd);
5546 if (error != 0)
5547 return (SET_ERROR(EINVAL));
5548
5549 (void) nvlist_lookup_string(innvl, "fromsnap", &fromname);
5550
5551 largeblockok = nvlist_exists(innvl, "largeblockok");
5552 embedok = nvlist_exists(innvl, "embedok");
5553 compressok = nvlist_exists(innvl, "compressok");
5554
5555 (void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
5556 (void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
5557
5558 if ((fp = getf(fd)) == NULL)
5559 return (SET_ERROR(EBADF));
5560
5561 off = fp->f_offset;
5562 error = dmu_send(snapname, fromname, embedok, largeblockok, compressok,
5563 fd, resumeobj, resumeoff, fp->f_vnode, &off);
5564
5565 if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
5566 fp->f_offset = off;
5567
5568 releasef(fd);
5569 return (error);
5570 }
5571
5572 /*
5573 * Determine approximately how large a zfs send stream will be -- the number
5574 * of bytes that will be written to the fd supplied to zfs_ioc_send_new().
5575 *
5576 * innvl: {
5577 * (optional) "from" -> full snap or bookmark name to send an incremental
5578 * from
5579 * (optional) "largeblockok" -> (value ignored)
5580 * indicates that blocks > 128KB are permitted
5581 * (optional) "embedok" -> (value ignored)
5582 * presence indicates DRR_WRITE_EMBEDDED records are permitted
5583 * (optional) "compressok" -> (value ignored)
5584 * presence indicates compressed DRR_WRITE records are permitted
5585 * }
5586 *
5587 * outnvl: {
5588 * "space" -> bytes of space (uint64)
5589 * }
5590 */
5591 static int
5592 zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
5593 {
5594 dsl_pool_t *dp;
5595 dsl_dataset_t *tosnap;
5596 int error;
5597 char *fromname;
5598 /* LINTED E_FUNC_SET_NOT_USED */
5599 boolean_t largeblockok;
5600 /* LINTED E_FUNC_SET_NOT_USED */
5601 boolean_t embedok;
5602 boolean_t compressok;
5603 uint64_t space;
5604
5605 error = dsl_pool_hold(snapname, FTAG, &dp);
5606 if (error != 0)
5607 return (error);
5608
5609 error = dsl_dataset_hold(dp, snapname, FTAG, &tosnap);
5610 if (error != 0) {
5611 dsl_pool_rele(dp, FTAG);
5612 return (error);
5613 }
5614
5615 largeblockok = nvlist_exists(innvl, "largeblockok");
5616 embedok = nvlist_exists(innvl, "embedok");
5617 compressok = nvlist_exists(innvl, "compressok");
5618
5619 error = nvlist_lookup_string(innvl, "from", &fromname);
5620 if (error == 0) {
5621 if (strchr(fromname, '@') != NULL) {
5622 /*
5623 * If from is a snapshot, hold it and use the more
5624 * efficient dmu_send_estimate to estimate send space
5625 * size using deadlists.
5626 */
5627 dsl_dataset_t *fromsnap;
5628 error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
5629 if (error != 0)
5630 goto out;
5631 error = dmu_send_estimate(tosnap, fromsnap, compressok,
5632 &space);
5633 dsl_dataset_rele(fromsnap, FTAG);
5634 } else if (strchr(fromname, '#') != NULL) {
5635 /*
5636 * If from is a bookmark, fetch the creation TXG of the
5637 * snapshot it was created from and use that to find
5638 * blocks that were born after it.
5639 */
5640 zfs_bookmark_phys_t frombm;
5641
5642 error = dsl_bookmark_lookup(dp, fromname, tosnap,
5643 &frombm);
5644 if (error != 0)
5645 goto out;
5646 error = dmu_send_estimate_from_txg(tosnap,
5647 frombm.zbm_creation_txg, compressok, &space);
5648 } else {
5649 /*
5650 * from is not properly formatted as a snapshot or
5651 * bookmark
5652 */
5653 error = SET_ERROR(EINVAL);
5654 goto out;
5655 }
5656 } else {
5657 // If estimating the size of a full send, use dmu_send_estimate
5658 error = dmu_send_estimate(tosnap, NULL, compressok, &space);
5659 }
5660
5661 fnvlist_add_uint64(outnvl, "space", space);
5662
5663 out:
5664 dsl_dataset_rele(tosnap, FTAG);
5665 dsl_pool_rele(dp, FTAG);
5666 return (error);
5667 }
5668
5669 static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
5670
5671 static void
5672 zfs_ioctl_register_legacy(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5673 zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
5674 boolean_t log_history, zfs_ioc_poolcheck_t pool_check)
5675 {
5676 zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
5677
5678 ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
5679 ASSERT3U(ioc, <, ZFS_IOC_LAST);
5680 ASSERT3P(vec->zvec_legacy_func, ==, NULL);
5681 ASSERT3P(vec->zvec_func, ==, NULL);
5682
5683 vec->zvec_legacy_func = func;
5684 vec->zvec_secpolicy = secpolicy;
5685 vec->zvec_namecheck = namecheck;
5686 vec->zvec_allow_log = log_history;
5687 vec->zvec_pool_check = pool_check;
5688 }
5689
5690 /*
5691 * See the block comment at the beginning of this file for details on
5692 * each argument to this function.
5693 */
5694 static void
5695 zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func,
5696 zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
5697 zfs_ioc_poolcheck_t pool_check, boolean_t smush_outnvlist,
5698 boolean_t allow_log)
5699 {
5700 zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
5701
5702 ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
5703 ASSERT3U(ioc, <, ZFS_IOC_LAST);
5704 ASSERT3P(vec->zvec_legacy_func, ==, NULL);
5705 ASSERT3P(vec->zvec_func, ==, NULL);
5706
5707 /* if we are logging, the name must be valid */
5708 ASSERT(!allow_log || namecheck != NO_NAME);
5709
5710 vec->zvec_name = name;
5711 vec->zvec_func = func;
5712 vec->zvec_secpolicy = secpolicy;
5713 vec->zvec_namecheck = namecheck;
5714 vec->zvec_pool_check = pool_check;
5715 vec->zvec_smush_outnvlist = smush_outnvlist;
5716 vec->zvec_allow_log = allow_log;
5717 }
5718
5719 static void
5720 zfs_ioctl_register_pool(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5721 zfs_secpolicy_func_t *secpolicy, boolean_t log_history,
5722 zfs_ioc_poolcheck_t pool_check)
5723 {
5724 zfs_ioctl_register_legacy(ioc, func, secpolicy,
5725 POOL_NAME, log_history, pool_check);
5726 }
5727
5728 static void
5729 zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5730 zfs_secpolicy_func_t *secpolicy, zfs_ioc_poolcheck_t pool_check)
5731 {
5732 zfs_ioctl_register_legacy(ioc, func, secpolicy,
5733 DATASET_NAME, B_FALSE, pool_check);
5734 }
5735
5736 static void
5737 zfs_ioctl_register_pool_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
5738 {
5739 zfs_ioctl_register_legacy(ioc, func, zfs_secpolicy_config,
5740 POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
5741 }
5742
5743 static void
5744 zfs_ioctl_register_pool_meta(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5745 zfs_secpolicy_func_t *secpolicy)
5746 {
5747 zfs_ioctl_register_legacy(ioc, func, secpolicy,
5748 NO_NAME, B_FALSE, POOL_CHECK_NONE);
5749 }
5750
5751 static void
5752 zfs_ioctl_register_dataset_read_secpolicy(zfs_ioc_t ioc,
5753 zfs_ioc_legacy_func_t *func, zfs_secpolicy_func_t *secpolicy)
5754 {
5755 zfs_ioctl_register_legacy(ioc, func, secpolicy,
5756 DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED);
5757 }
5758
5759 static void
5760 zfs_ioctl_register_dataset_read(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
5761 {
5762 zfs_ioctl_register_dataset_read_secpolicy(ioc, func,
5763 zfs_secpolicy_read);
5764 }
5765
5766 static void
5767 zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5768 zfs_secpolicy_func_t *secpolicy)
5769 {
5770 zfs_ioctl_register_legacy(ioc, func, secpolicy,
5771 DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
5772 }
5773
5774 static void
5775 zfs_ioctl_init(void)
5776 {
5777 zfs_ioctl_register("snapshot", ZFS_IOC_SNAPSHOT,
5778 zfs_ioc_snapshot, zfs_secpolicy_snapshot, POOL_NAME,
5779 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5780
5781 zfs_ioctl_register("log_history", ZFS_IOC_LOG_HISTORY,
5782 zfs_ioc_log_history, zfs_secpolicy_log_history, NO_NAME,
5783 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE);
5784
5785 zfs_ioctl_register("space_snaps", ZFS_IOC_SPACE_SNAPS,
5786 zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME,
5787 POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5788
5789 zfs_ioctl_register("send", ZFS_IOC_SEND_NEW,
5790 zfs_ioc_send_new, zfs_secpolicy_send_new, DATASET_NAME,
5791 POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5792
5793 zfs_ioctl_register("send_space", ZFS_IOC_SEND_SPACE,
5794 zfs_ioc_send_space, zfs_secpolicy_read, DATASET_NAME,
5795 POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5796
5797 zfs_ioctl_register("create", ZFS_IOC_CREATE,
5798 zfs_ioc_create, zfs_secpolicy_create_clone, DATASET_NAME,
5799 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5800
5801 zfs_ioctl_register("clone", ZFS_IOC_CLONE,
5802 zfs_ioc_clone, zfs_secpolicy_create_clone, DATASET_NAME,
5803 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5804
5805 zfs_ioctl_register("destroy_snaps", ZFS_IOC_DESTROY_SNAPS,
5806 zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, POOL_NAME,
5807 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5808
5809 zfs_ioctl_register("hold", ZFS_IOC_HOLD,
5810 zfs_ioc_hold, zfs_secpolicy_hold, POOL_NAME,
5811 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5812 zfs_ioctl_register("release", ZFS_IOC_RELEASE,
5813 zfs_ioc_release, zfs_secpolicy_release, POOL_NAME,
5814 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5815
5816 zfs_ioctl_register("get_holds", ZFS_IOC_GET_HOLDS,
5817 zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME,
5818 POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5819
5820 zfs_ioctl_register("rollback", ZFS_IOC_ROLLBACK,
5821 zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME,
5822 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE);
5823
5824 zfs_ioctl_register("bookmark", ZFS_IOC_BOOKMARK,
5825 zfs_ioc_bookmark, zfs_secpolicy_bookmark, POOL_NAME,
5826 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5827
5828 zfs_ioctl_register("get_bookmarks", ZFS_IOC_GET_BOOKMARKS,
5829 zfs_ioc_get_bookmarks, zfs_secpolicy_read, DATASET_NAME,
5830 POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5831
5832 zfs_ioctl_register("destroy_bookmarks", ZFS_IOC_DESTROY_BOOKMARKS,
5833 zfs_ioc_destroy_bookmarks, zfs_secpolicy_destroy_bookmarks,
5834 POOL_NAME,
5835 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5836
5837 zfs_ioctl_register("receive", ZFS_IOC_RECV_NEW,
5838 zfs_ioc_recv_new, zfs_secpolicy_recv_new, DATASET_NAME,
5839 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5840
5841 /* IOCTLS that use the legacy function signature */
5842
5843 zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
5844 zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_READONLY);
5845
5846 zfs_ioctl_register_pool(ZFS_IOC_POOL_CREATE, zfs_ioc_pool_create,
5847 zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
5848 zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SCAN,
5849 zfs_ioc_pool_scan);
5850 zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_UPGRADE,
5851 zfs_ioc_pool_upgrade);
5852 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ADD,
5853 zfs_ioc_vdev_add);
5854 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_REMOVE,
5855 zfs_ioc_vdev_remove);
5856 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SET_STATE,
5857 zfs_ioc_vdev_set_state);
5858 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ATTACH,
5859 zfs_ioc_vdev_attach);
5860 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_DETACH,
5861 zfs_ioc_vdev_detach);
5862 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETPATH,
5863 zfs_ioc_vdev_setpath);
5864 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETFRU,
5865 zfs_ioc_vdev_setfru);
5866 zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SET_PROPS,
5867 zfs_ioc_pool_set_props);
5868 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SPLIT,
5869 zfs_ioc_vdev_split);
5870 zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_REGUID,
5871 zfs_ioc_pool_reguid);
5872
5873 zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_CONFIGS,
5874 zfs_ioc_pool_configs, zfs_secpolicy_none);
5875 zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_TRYIMPORT,
5876 zfs_ioc_pool_tryimport, zfs_secpolicy_config);
5877 zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_FAULT,
5878 zfs_ioc_inject_fault, zfs_secpolicy_inject);
5879 zfs_ioctl_register_pool_meta(ZFS_IOC_CLEAR_FAULT,
5880 zfs_ioc_clear_fault, zfs_secpolicy_inject);
5881 zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_LIST_NEXT,
5882 zfs_ioc_inject_list_next, zfs_secpolicy_inject);
5883
5884 /*
5885 * pool destroy, and export don't log the history as part of
5886 * zfsdev_ioctl, but rather zfs_ioc_pool_export
5887 * does the logging of those commands.
5888 */
5889 zfs_ioctl_register_pool(ZFS_IOC_POOL_DESTROY, zfs_ioc_pool_destroy,
5890 zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
5891 zfs_ioctl_register_pool(ZFS_IOC_POOL_EXPORT, zfs_ioc_pool_export,
5892 zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
5893
5894 zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats,
5895 zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
5896 zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_PROPS, zfs_ioc_pool_get_props,
5897 zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
5898
5899 zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log,
5900 zfs_secpolicy_inject, B_FALSE, POOL_CHECK_SUSPENDED);
5901 zfs_ioctl_register_pool(ZFS_IOC_DSOBJ_TO_DSNAME,
5902 zfs_ioc_dsobj_to_dsname,
5903 zfs_secpolicy_diff, B_FALSE, POOL_CHECK_SUSPENDED);
5904 zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_HISTORY,
5905 zfs_ioc_pool_get_history,
5906 zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
5907
5908 zfs_ioctl_register_pool(ZFS_IOC_POOL_IMPORT, zfs_ioc_pool_import,
5909 zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
5910
5911 zfs_ioctl_register_pool(ZFS_IOC_CLEAR, zfs_ioc_clear,
5912 zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
5913 zfs_ioctl_register_pool(ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen,
5914 zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
5915
5916 zfs_ioctl_register_dataset_read(ZFS_IOC_SPACE_WRITTEN,
5917 zfs_ioc_space_written);
5918 zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_RECVD_PROPS,
5919 zfs_ioc_objset_recvd_props);
5920 zfs_ioctl_register_dataset_read(ZFS_IOC_NEXT_OBJ,
5921 zfs_ioc_next_obj);
5922 zfs_ioctl_register_dataset_read(ZFS_IOC_GET_FSACL,
5923 zfs_ioc_get_fsacl);
5924 zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_STATS,
5925 zfs_ioc_objset_stats);
5926 zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_ZPLPROPS,
5927 zfs_ioc_objset_zplprops);
5928 zfs_ioctl_register_dataset_read(ZFS_IOC_DATASET_LIST_NEXT,
5929 zfs_ioc_dataset_list_next);
5930 zfs_ioctl_register_dataset_read(ZFS_IOC_SNAPSHOT_LIST_NEXT,
5931 zfs_ioc_snapshot_list_next);
5932 zfs_ioctl_register_dataset_read(ZFS_IOC_SEND_PROGRESS,
5933 zfs_ioc_send_progress);
5934
5935 zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_DIFF,
5936 zfs_ioc_diff, zfs_secpolicy_diff);
5937 zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_STATS,
5938 zfs_ioc_obj_to_stats, zfs_secpolicy_diff);
5939 zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_PATH,
5940 zfs_ioc_obj_to_path, zfs_secpolicy_diff);
5941 zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_ONE,
5942 zfs_ioc_userspace_one, zfs_secpolicy_userspace_one);
5943 zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_MANY,
5944 zfs_ioc_userspace_many, zfs_secpolicy_userspace_many);
5945 zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_SEND,
5946 zfs_ioc_send, zfs_secpolicy_send);
5947
5948 zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_PROP, zfs_ioc_set_prop,
5949 zfs_secpolicy_none);
5950 zfs_ioctl_register_dataset_modify(ZFS_IOC_DESTROY, zfs_ioc_destroy,
5951 zfs_secpolicy_destroy);
5952 zfs_ioctl_register_dataset_modify(ZFS_IOC_RENAME, zfs_ioc_rename,
5953 zfs_secpolicy_rename);
5954 zfs_ioctl_register_dataset_modify(ZFS_IOC_RECV, zfs_ioc_recv,
5955 zfs_secpolicy_recv);
5956 zfs_ioctl_register_dataset_modify(ZFS_IOC_PROMOTE, zfs_ioc_promote,
5957 zfs_secpolicy_promote);
5958 zfs_ioctl_register_dataset_modify(ZFS_IOC_INHERIT_PROP,
5959 zfs_ioc_inherit_prop, zfs_secpolicy_inherit_prop);
5960 zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_FSACL, zfs_ioc_set_fsacl,
5961 zfs_secpolicy_set_fsacl);
5962
5963 zfs_ioctl_register_dataset_nolog(ZFS_IOC_SHARE, zfs_ioc_share,
5964 zfs_secpolicy_share, POOL_CHECK_NONE);
5965 zfs_ioctl_register_dataset_nolog(ZFS_IOC_SMB_ACL, zfs_ioc_smb_acl,
5966 zfs_secpolicy_smb_acl, POOL_CHECK_NONE);
5967 zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERSPACE_UPGRADE,
5968 zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
5969 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
5970 zfs_ioctl_register_dataset_nolog(ZFS_IOC_TMP_SNAPSHOT,
5971 zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot,
5972 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
5973
5974 /*
5975 * ZoL functions
5976 */
5977 zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_NEXT, zfs_ioc_events_next,
5978 zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
5979 zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_CLEAR, zfs_ioc_events_clear,
5980 zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
5981 zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_SEEK, zfs_ioc_events_seek,
5982 zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
5983 }
5984
5985 int
5986 pool_status_check(const char *name, zfs_ioc_namecheck_t type,
5987 zfs_ioc_poolcheck_t check)
5988 {
5989 spa_t *spa;
5990 int error;
5991
5992 ASSERT(type == POOL_NAME || type == DATASET_NAME);
5993
5994 if (check & POOL_CHECK_NONE)
5995 return (0);
5996
5997 error = spa_open(name, &spa, FTAG);
5998 if (error == 0) {
5999 if ((check & POOL_CHECK_SUSPENDED) && spa_suspended(spa))
6000 error = SET_ERROR(EAGAIN);
6001 else if ((check & POOL_CHECK_READONLY) && !spa_writeable(spa))
6002 error = SET_ERROR(EROFS);
6003 spa_close(spa, FTAG);
6004 }
6005 return (error);
6006 }
6007
6008 static void *
6009 zfsdev_get_state_impl(minor_t minor, enum zfsdev_state_type which)
6010 {
6011 zfsdev_state_t *zs;
6012
6013 for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
6014 if (zs->zs_minor == minor) {
6015 smp_rmb();
6016 switch (which) {
6017 case ZST_ONEXIT:
6018 return (zs->zs_onexit);
6019 case ZST_ZEVENT:
6020 return (zs->zs_zevent);
6021 case ZST_ALL:
6022 return (zs);
6023 }
6024 }
6025 }
6026
6027 return (NULL);
6028 }
6029
6030 void *
6031 zfsdev_get_state(minor_t minor, enum zfsdev_state_type which)
6032 {
6033 void *ptr;
6034
6035 ptr = zfsdev_get_state_impl(minor, which);
6036
6037 return (ptr);
6038 }
6039
6040 int
6041 zfsdev_getminor(struct file *filp, minor_t *minorp)
6042 {
6043 zfsdev_state_t *zs, *fpd;
6044
6045 ASSERT(filp != NULL);
6046 ASSERT(!MUTEX_HELD(&zfsdev_state_lock));
6047
6048 fpd = filp->private_data;
6049 if (fpd == NULL)
6050 return (EBADF);
6051
6052 mutex_enter(&zfsdev_state_lock);
6053
6054 for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
6055
6056 if (zs->zs_minor == -1)
6057 continue;
6058
6059 if (fpd == zs) {
6060 *minorp = fpd->zs_minor;
6061 mutex_exit(&zfsdev_state_lock);
6062 return (0);
6063 }
6064 }
6065
6066 mutex_exit(&zfsdev_state_lock);
6067
6068 return (EBADF);
6069 }
6070
6071 /*
6072 * Find a free minor number. The zfsdev_state_list is expected to
6073 * be short since it is only a list of currently open file handles.
6074 */
6075 minor_t
6076 zfsdev_minor_alloc(void)
6077 {
6078 static minor_t last_minor = 0;
6079 minor_t m;
6080
6081 ASSERT(MUTEX_HELD(&zfsdev_state_lock));
6082
6083 for (m = last_minor + 1; m != last_minor; m++) {
6084 if (m > ZFSDEV_MAX_MINOR)
6085 m = 1;
6086 if (zfsdev_get_state_impl(m, ZST_ALL) == NULL) {
6087 last_minor = m;
6088 return (m);
6089 }
6090 }
6091
6092 return (0);
6093 }
6094
6095 static int
6096 zfsdev_state_init(struct file *filp)
6097 {
6098 zfsdev_state_t *zs, *zsprev = NULL;
6099 minor_t minor;
6100 boolean_t newzs = B_FALSE;
6101
6102 ASSERT(MUTEX_HELD(&zfsdev_state_lock));
6103
6104 minor = zfsdev_minor_alloc();
6105 if (minor == 0)
6106 return (SET_ERROR(ENXIO));
6107
6108 for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
6109 if (zs->zs_minor == -1)
6110 break;
6111 zsprev = zs;
6112 }
6113
6114 if (!zs) {
6115 zs = kmem_zalloc(sizeof (zfsdev_state_t), KM_SLEEP);
6116 newzs = B_TRUE;
6117 }
6118
6119 zs->zs_file = filp;
6120 filp->private_data = zs;
6121
6122 zfs_onexit_init((zfs_onexit_t **)&zs->zs_onexit);
6123 zfs_zevent_init((zfs_zevent_t **)&zs->zs_zevent);
6124
6125
6126 /*
6127 * In order to provide for lock-free concurrent read access
6128 * to the minor list in zfsdev_get_state_impl(), new entries
6129 * must be completely written before linking them into the
6130 * list whereas existing entries are already linked; the last
6131 * operation must be updating zs_minor (from -1 to the new
6132 * value).
6133 */
6134 if (newzs) {
6135 zs->zs_minor = minor;
6136 smp_wmb();
6137 zsprev->zs_next = zs;
6138 } else {
6139 smp_wmb();
6140 zs->zs_minor = minor;
6141 }
6142
6143 return (0);
6144 }
6145
6146 static int
6147 zfsdev_state_destroy(struct file *filp)
6148 {
6149 zfsdev_state_t *zs;
6150
6151 ASSERT(MUTEX_HELD(&zfsdev_state_lock));
6152 ASSERT(filp->private_data != NULL);
6153
6154 zs = filp->private_data;
6155 zs->zs_minor = -1;
6156 zfs_onexit_destroy(zs->zs_onexit);
6157 zfs_zevent_destroy(zs->zs_zevent);
6158
6159 return (0);
6160 }
6161
6162 static int
6163 zfsdev_open(struct inode *ino, struct file *filp)
6164 {
6165 int error;
6166
6167 mutex_enter(&zfsdev_state_lock);
6168 error = zfsdev_state_init(filp);
6169 mutex_exit(&zfsdev_state_lock);
6170
6171 return (-error);
6172 }
6173
6174 static int
6175 zfsdev_release(struct inode *ino, struct file *filp)
6176 {
6177 int error;
6178
6179 mutex_enter(&zfsdev_state_lock);
6180 error = zfsdev_state_destroy(filp);
6181 mutex_exit(&zfsdev_state_lock);
6182
6183 return (-error);
6184 }
6185
6186 static long
6187 zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
6188 {
6189 zfs_cmd_t *zc;
6190 uint_t vecnum;
6191 int error, rc, flag = 0;
6192 const zfs_ioc_vec_t *vec;
6193 char *saved_poolname = NULL;
6194 nvlist_t *innvl = NULL;
6195 fstrans_cookie_t cookie;
6196
6197 vecnum = cmd - ZFS_IOC_FIRST;
6198 if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
6199 return (-SET_ERROR(EINVAL));
6200 vec = &zfs_ioc_vec[vecnum];
6201
6202 /*
6203 * The registered ioctl list may be sparse, verify that either
6204 * a normal or legacy handler are registered.
6205 */
6206 if (vec->zvec_func == NULL && vec->zvec_legacy_func == NULL)
6207 return (-SET_ERROR(EINVAL));
6208
6209 zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
6210
6211 error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag);
6212 if (error != 0) {
6213 error = SET_ERROR(EFAULT);
6214 goto out;
6215 }
6216
6217 zc->zc_iflags = flag & FKIOCTL;
6218 if (zc->zc_nvlist_src_size > MAX_NVLIST_SRC_SIZE) {
6219 /*
6220 * Make sure the user doesn't pass in an insane value for
6221 * zc_nvlist_src_size. We have to check, since we will end
6222 * up allocating that much memory inside of get_nvlist(). This
6223 * prevents a nefarious user from allocating tons of kernel
6224 * memory.
6225 *
6226 * Also, we return EINVAL instead of ENOMEM here. The reason
6227 * being that returning ENOMEM from an ioctl() has a special
6228 * connotation; that the user's size value is too small and
6229 * needs to be expanded to hold the nvlist. See
6230 * zcmd_expand_dst_nvlist() for details.
6231 */
6232 error = SET_ERROR(EINVAL); /* User's size too big */
6233
6234 } else if (zc->zc_nvlist_src_size != 0) {
6235 error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
6236 zc->zc_iflags, &innvl);
6237 if (error != 0)
6238 goto out;
6239 }
6240
6241 /*
6242 * Ensure that all pool/dataset names are valid before we pass down to
6243 * the lower layers.
6244 */
6245 zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
6246 switch (vec->zvec_namecheck) {
6247 case POOL_NAME:
6248 if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
6249 error = SET_ERROR(EINVAL);
6250 else
6251 error = pool_status_check(zc->zc_name,
6252 vec->zvec_namecheck, vec->zvec_pool_check);
6253 break;
6254
6255 case DATASET_NAME:
6256 if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
6257 error = SET_ERROR(EINVAL);
6258 else
6259 error = pool_status_check(zc->zc_name,
6260 vec->zvec_namecheck, vec->zvec_pool_check);
6261 break;
6262
6263 case NO_NAME:
6264 break;
6265 }
6266
6267
6268 if (error == 0 && !(flag & FKIOCTL)) {
6269 cookie = spl_fstrans_mark();
6270 error = vec->zvec_secpolicy(zc, innvl, CRED());
6271 spl_fstrans_unmark(cookie);
6272 }
6273
6274 if (error != 0)
6275 goto out;
6276
6277 /* legacy ioctls can modify zc_name */
6278 saved_poolname = strdup(zc->zc_name);
6279 if (saved_poolname == NULL) {
6280 error = SET_ERROR(ENOMEM);
6281 goto out;
6282 } else {
6283 saved_poolname[strcspn(saved_poolname, "/@#")] = '\0';
6284 }
6285
6286 if (vec->zvec_func != NULL) {
6287 nvlist_t *outnvl;
6288 int puterror = 0;
6289 spa_t *spa;
6290 nvlist_t *lognv = NULL;
6291
6292 ASSERT(vec->zvec_legacy_func == NULL);
6293
6294 /*
6295 * Add the innvl to the lognv before calling the func,
6296 * in case the func changes the innvl.
6297 */
6298 if (vec->zvec_allow_log) {
6299 lognv = fnvlist_alloc();
6300 fnvlist_add_string(lognv, ZPOOL_HIST_IOCTL,
6301 vec->zvec_name);
6302 if (!nvlist_empty(innvl)) {
6303 fnvlist_add_nvlist(lognv, ZPOOL_HIST_INPUT_NVL,
6304 innvl);
6305 }
6306 }
6307
6308 outnvl = fnvlist_alloc();
6309 cookie = spl_fstrans_mark();
6310 error = vec->zvec_func(zc->zc_name, innvl, outnvl);
6311 spl_fstrans_unmark(cookie);
6312
6313 if (error == 0 && vec->zvec_allow_log &&
6314 spa_open(zc->zc_name, &spa, FTAG) == 0) {
6315 if (!nvlist_empty(outnvl)) {
6316 fnvlist_add_nvlist(lognv, ZPOOL_HIST_OUTPUT_NVL,
6317 outnvl);
6318 }
6319 (void) spa_history_log_nvl(spa, lognv);
6320 spa_close(spa, FTAG);
6321 }
6322 fnvlist_free(lognv);
6323
6324 if (!nvlist_empty(outnvl) || zc->zc_nvlist_dst_size != 0) {
6325 int smusherror = 0;
6326 if (vec->zvec_smush_outnvlist) {
6327 smusherror = nvlist_smush(outnvl,
6328 zc->zc_nvlist_dst_size);
6329 }
6330 if (smusherror == 0)
6331 puterror = put_nvlist(zc, outnvl);
6332 }
6333
6334 if (puterror != 0)
6335 error = puterror;
6336
6337 nvlist_free(outnvl);
6338 } else {
6339 cookie = spl_fstrans_mark();
6340 error = vec->zvec_legacy_func(zc);
6341 spl_fstrans_unmark(cookie);
6342 }
6343
6344 out:
6345 nvlist_free(innvl);
6346 rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag);
6347 if (error == 0 && rc != 0)
6348 error = SET_ERROR(EFAULT);
6349 if (error == 0 && vec->zvec_allow_log) {
6350 char *s = tsd_get(zfs_allow_log_key);
6351 if (s != NULL)
6352 strfree(s);
6353 (void) tsd_set(zfs_allow_log_key, saved_poolname);
6354 } else {
6355 if (saved_poolname != NULL)
6356 strfree(saved_poolname);
6357 }
6358
6359 kmem_free(zc, sizeof (zfs_cmd_t));
6360 return (-error);
6361 }
6362
6363 #ifdef CONFIG_COMPAT
6364 static long
6365 zfsdev_compat_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
6366 {
6367 return (zfsdev_ioctl(filp, cmd, arg));
6368 }
6369 #else
6370 #define zfsdev_compat_ioctl NULL
6371 #endif
6372
6373 static const struct file_operations zfsdev_fops = {
6374 .open = zfsdev_open,
6375 .release = zfsdev_release,
6376 .unlocked_ioctl = zfsdev_ioctl,
6377 .compat_ioctl = zfsdev_compat_ioctl,
6378 .owner = THIS_MODULE,
6379 };
6380
6381 static struct miscdevice zfs_misc = {
6382 .minor = MISC_DYNAMIC_MINOR,
6383 .name = ZFS_DRIVER,
6384 .fops = &zfsdev_fops,
6385 };
6386
6387 static int
6388 zfs_attach(void)
6389 {
6390 int error;
6391
6392 mutex_init(&zfsdev_state_lock, NULL, MUTEX_DEFAULT, NULL);
6393 zfsdev_state_list = kmem_zalloc(sizeof (zfsdev_state_t), KM_SLEEP);
6394 zfsdev_state_list->zs_minor = -1;
6395
6396 error = misc_register(&zfs_misc);
6397 if (error != 0) {
6398 printk(KERN_INFO "ZFS: misc_register() failed %d\n", error);
6399 return (error);
6400 }
6401
6402 return (0);
6403 }
6404
6405 static void
6406 zfs_detach(void)
6407 {
6408 zfsdev_state_t *zs, *zsprev = NULL;
6409
6410 misc_deregister(&zfs_misc);
6411 mutex_destroy(&zfsdev_state_lock);
6412
6413 for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
6414 if (zsprev)
6415 kmem_free(zsprev, sizeof (zfsdev_state_t));
6416 zsprev = zs;
6417 }
6418 if (zsprev)
6419 kmem_free(zsprev, sizeof (zfsdev_state_t));
6420 }
6421
6422 static void
6423 zfs_allow_log_destroy(void *arg)
6424 {
6425 char *poolname = arg;
6426
6427 if (poolname != NULL)
6428 strfree(poolname);
6429 }
6430
6431 #ifdef DEBUG
6432 #define ZFS_DEBUG_STR " (DEBUG mode)"
6433 #else
6434 #define ZFS_DEBUG_STR ""
6435 #endif
6436
6437 static int __init
6438 _init(void)
6439 {
6440 int error;
6441
6442 error = -vn_set_pwd("/");
6443 if (error) {
6444 printk(KERN_NOTICE
6445 "ZFS: Warning unable to set pwd to '/': %d\n", error);
6446 return (error);
6447 }
6448
6449 if ((error = -zvol_init()) != 0)
6450 return (error);
6451
6452 spa_init(FREAD | FWRITE);
6453 zfs_init();
6454
6455 zfs_ioctl_init();
6456
6457 if ((error = zfs_attach()) != 0)
6458 goto out;
6459
6460 tsd_create(&zfs_fsyncer_key, NULL);
6461 tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
6462 tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
6463
6464 printk(KERN_NOTICE "ZFS: Loaded module v%s-%s%s, "
6465 "ZFS pool version %s, ZFS filesystem version %s\n",
6466 ZFS_META_VERSION, ZFS_META_RELEASE, ZFS_DEBUG_STR,
6467 SPA_VERSION_STRING, ZPL_VERSION_STRING);
6468 #ifndef CONFIG_FS_POSIX_ACL
6469 printk(KERN_NOTICE "ZFS: Posix ACLs disabled by kernel\n");
6470 #endif /* CONFIG_FS_POSIX_ACL */
6471
6472 return (0);
6473
6474 out:
6475 zfs_fini();
6476 spa_fini();
6477 (void) zvol_fini();
6478 printk(KERN_NOTICE "ZFS: Failed to Load ZFS Filesystem v%s-%s%s"
6479 ", rc = %d\n", ZFS_META_VERSION, ZFS_META_RELEASE,
6480 ZFS_DEBUG_STR, error);
6481
6482 return (error);
6483 }
6484
6485 static void __exit
6486 _fini(void)
6487 {
6488 zfs_detach();
6489 zfs_fini();
6490 spa_fini();
6491 zvol_fini();
6492
6493 tsd_destroy(&zfs_fsyncer_key);
6494 tsd_destroy(&rrw_tsd_key);
6495 tsd_destroy(&zfs_allow_log_key);
6496
6497 printk(KERN_NOTICE "ZFS: Unloaded module v%s-%s%s\n",
6498 ZFS_META_VERSION, ZFS_META_RELEASE, ZFS_DEBUG_STR);
6499 }
6500
6501 #ifdef HAVE_SPL
6502 module_init(_init);
6503 module_exit(_fini);
6504
6505 MODULE_DESCRIPTION("ZFS");
6506 MODULE_AUTHOR(ZFS_META_AUTHOR);
6507 MODULE_LICENSE(ZFS_META_LICENSE);
6508 MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE);
6509 #endif /* HAVE_SPL */