]> git.proxmox.com Git - mirror_zfs.git/blob - module/zfs/zfs_ioctl.c
OpenZFS 6101 - attempt to lzc_create() a filesystem under a volume results in a panic
[mirror_zfs.git] / module / zfs / zfs_ioctl.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Portions Copyright 2011 Martin Matuska
25 * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
26 * Portions Copyright 2012 Pawel Jakub Dawidek <pawel@dawidek.net>
27 * Copyright (c) 2014, 2016 Joyent, Inc. All rights reserved.
28 * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
29 * Copyright (c) 2014, Joyent, Inc. All rights reserved.
30 * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
31 * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
32 * Copyright (c) 2013 Steven Hartland. All rights reserved.
33 * Copyright (c) 2014 Integros [integros.com]
34 * Copyright 2016 Toomas Soome <tsoome@me.com>
35 * Copyright (c) 2016 Actifio, Inc. All rights reserved.
36 */
37
38 /*
39 * ZFS ioctls.
40 *
41 * This file handles the ioctls to /dev/zfs, used for configuring ZFS storage
42 * pools and filesystems, e.g. with /sbin/zfs and /sbin/zpool.
43 *
44 * There are two ways that we handle ioctls: the legacy way where almost
45 * all of the logic is in the ioctl callback, and the new way where most
46 * of the marshalling is handled in the common entry point, zfsdev_ioctl().
47 *
48 * Non-legacy ioctls should be registered by calling
49 * zfs_ioctl_register() from zfs_ioctl_init(). The ioctl is invoked
50 * from userland by lzc_ioctl().
51 *
52 * The registration arguments are as follows:
53 *
54 * const char *name
55 * The name of the ioctl. This is used for history logging. If the
56 * ioctl returns successfully (the callback returns 0), and allow_log
57 * is true, then a history log entry will be recorded with the input &
58 * output nvlists. The log entry can be printed with "zpool history -i".
59 *
60 * zfs_ioc_t ioc
61 * The ioctl request number, which userland will pass to ioctl(2).
62 * The ioctl numbers can change from release to release, because
63 * the caller (libzfs) must be matched to the kernel.
64 *
65 * zfs_secpolicy_func_t *secpolicy
66 * This function will be called before the zfs_ioc_func_t, to
67 * determine if this operation is permitted. It should return EPERM
68 * on failure, and 0 on success. Checks include determining if the
69 * dataset is visible in this zone, and if the user has either all
70 * zfs privileges in the zone (SYS_MOUNT), or has been granted permission
71 * to do this operation on this dataset with "zfs allow".
72 *
73 * zfs_ioc_namecheck_t namecheck
74 * This specifies what to expect in the zfs_cmd_t:zc_name -- a pool
75 * name, a dataset name, or nothing. If the name is not well-formed,
76 * the ioctl will fail and the callback will not be called.
77 * Therefore, the callback can assume that the name is well-formed
78 * (e.g. is null-terminated, doesn't have more than one '@' character,
79 * doesn't have invalid characters).
80 *
81 * zfs_ioc_poolcheck_t pool_check
82 * This specifies requirements on the pool state. If the pool does
83 * not meet them (is suspended or is readonly), the ioctl will fail
84 * and the callback will not be called. If any checks are specified
85 * (i.e. it is not POOL_CHECK_NONE), namecheck must not be NO_NAME.
86 * Multiple checks can be or-ed together (e.g. POOL_CHECK_SUSPENDED |
87 * POOL_CHECK_READONLY).
88 *
89 * boolean_t smush_outnvlist
90 * If smush_outnvlist is true, then the output is presumed to be a
91 * list of errors, and it will be "smushed" down to fit into the
92 * caller's buffer, by removing some entries and replacing them with a
93 * single "N_MORE_ERRORS" entry indicating how many were removed. See
94 * nvlist_smush() for details. If smush_outnvlist is false, and the
95 * outnvlist does not fit into the userland-provided buffer, then the
96 * ioctl will fail with ENOMEM.
97 *
98 * zfs_ioc_func_t *func
99 * The callback function that will perform the operation.
100 *
101 * The callback should return 0 on success, or an error number on
102 * failure. If the function fails, the userland ioctl will return -1,
103 * and errno will be set to the callback's return value. The callback
104 * will be called with the following arguments:
105 *
106 * const char *name
107 * The name of the pool or dataset to operate on, from
108 * zfs_cmd_t:zc_name. The 'namecheck' argument specifies the
109 * expected type (pool, dataset, or none).
110 *
111 * nvlist_t *innvl
112 * The input nvlist, deserialized from zfs_cmd_t:zc_nvlist_src. Or
113 * NULL if no input nvlist was provided. Changes to this nvlist are
114 * ignored. If the input nvlist could not be deserialized, the
115 * ioctl will fail and the callback will not be called.
116 *
117 * nvlist_t *outnvl
118 * The output nvlist, initially empty. The callback can fill it in,
119 * and it will be returned to userland by serializing it into
120 * zfs_cmd_t:zc_nvlist_dst. If it is non-empty, and serialization
121 * fails (e.g. because the caller didn't supply a large enough
122 * buffer), then the overall ioctl will fail. See the
123 * 'smush_nvlist' argument above for additional behaviors.
124 *
125 * There are two typical uses of the output nvlist:
126 * - To return state, e.g. property values. In this case,
127 * smush_outnvlist should be false. If the buffer was not large
128 * enough, the caller will reallocate a larger buffer and try
129 * the ioctl again.
130 *
131 * - To return multiple errors from an ioctl which makes on-disk
132 * changes. In this case, smush_outnvlist should be true.
133 * Ioctls which make on-disk modifications should generally not
134 * use the outnvl if they succeed, because the caller can not
135 * distinguish between the operation failing, and
136 * deserialization failing.
137 */
138
139 #include <sys/types.h>
140 #include <sys/param.h>
141 #include <sys/errno.h>
142 #include <sys/uio.h>
143 #include <sys/buf.h>
144 #include <sys/modctl.h>
145 #include <sys/open.h>
146 #include <sys/file.h>
147 #include <sys/kmem.h>
148 #include <sys/conf.h>
149 #include <sys/cmn_err.h>
150 #include <sys/stat.h>
151 #include <sys/zfs_ioctl.h>
152 #include <sys/zfs_vfsops.h>
153 #include <sys/zfs_znode.h>
154 #include <sys/zap.h>
155 #include <sys/spa.h>
156 #include <sys/spa_impl.h>
157 #include <sys/vdev.h>
158 #include <sys/priv_impl.h>
159 #include <sys/dmu.h>
160 #include <sys/dsl_dir.h>
161 #include <sys/dsl_dataset.h>
162 #include <sys/dsl_prop.h>
163 #include <sys/dsl_deleg.h>
164 #include <sys/dmu_objset.h>
165 #include <sys/dmu_impl.h>
166 #include <sys/dmu_tx.h>
167 #include <sys/ddi.h>
168 #include <sys/sunddi.h>
169 #include <sys/sunldi.h>
170 #include <sys/policy.h>
171 #include <sys/zone.h>
172 #include <sys/nvpair.h>
173 #include <sys/pathname.h>
174 #include <sys/mount.h>
175 #include <sys/sdt.h>
176 #include <sys/fs/zfs.h>
177 #include <sys/zfs_ctldir.h>
178 #include <sys/zfs_dir.h>
179 #include <sys/zfs_onexit.h>
180 #include <sys/zvol.h>
181 #include <sys/dsl_scan.h>
182 #include <sharefs/share.h>
183 #include <sys/fm/util.h>
184
185 #include <sys/dmu_send.h>
186 #include <sys/dsl_destroy.h>
187 #include <sys/dsl_bookmark.h>
188 #include <sys/dsl_userhold.h>
189 #include <sys/zfeature.h>
190 #include <sys/zio_checksum.h>
191
192 #include <linux/miscdevice.h>
193 #include <linux/slab.h>
194
195 #include "zfs_namecheck.h"
196 #include "zfs_prop.h"
197 #include "zfs_deleg.h"
198 #include "zfs_comutil.h"
199
200 /*
201 * Limit maximum nvlist size. We don't want users passing in insane values
202 * for zc->zc_nvlist_src_size, since we will need to allocate that much memory.
203 */
204 #define MAX_NVLIST_SRC_SIZE KMALLOC_MAX_SIZE
205
206 kmutex_t zfsdev_state_lock;
207 zfsdev_state_t *zfsdev_state_list;
208
209 extern void zfs_init(void);
210 extern void zfs_fini(void);
211
212 uint_t zfs_fsyncer_key;
213 extern uint_t rrw_tsd_key;
214 static uint_t zfs_allow_log_key;
215
216 typedef int zfs_ioc_legacy_func_t(zfs_cmd_t *);
217 typedef int zfs_ioc_func_t(const char *, nvlist_t *, nvlist_t *);
218 typedef int zfs_secpolicy_func_t(zfs_cmd_t *, nvlist_t *, cred_t *);
219
220 typedef enum {
221 NO_NAME,
222 POOL_NAME,
223 DATASET_NAME
224 } zfs_ioc_namecheck_t;
225
226 typedef enum {
227 POOL_CHECK_NONE = 1 << 0,
228 POOL_CHECK_SUSPENDED = 1 << 1,
229 POOL_CHECK_READONLY = 1 << 2,
230 } zfs_ioc_poolcheck_t;
231
232 typedef struct zfs_ioc_vec {
233 zfs_ioc_legacy_func_t *zvec_legacy_func;
234 zfs_ioc_func_t *zvec_func;
235 zfs_secpolicy_func_t *zvec_secpolicy;
236 zfs_ioc_namecheck_t zvec_namecheck;
237 boolean_t zvec_allow_log;
238 zfs_ioc_poolcheck_t zvec_pool_check;
239 boolean_t zvec_smush_outnvlist;
240 const char *zvec_name;
241 } zfs_ioc_vec_t;
242
243 /* This array is indexed by zfs_userquota_prop_t */
244 static const char *userquota_perms[] = {
245 ZFS_DELEG_PERM_USERUSED,
246 ZFS_DELEG_PERM_USERQUOTA,
247 ZFS_DELEG_PERM_GROUPUSED,
248 ZFS_DELEG_PERM_GROUPQUOTA,
249 ZFS_DELEG_PERM_USEROBJUSED,
250 ZFS_DELEG_PERM_USEROBJQUOTA,
251 ZFS_DELEG_PERM_GROUPOBJUSED,
252 ZFS_DELEG_PERM_GROUPOBJQUOTA,
253 };
254
255 static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
256 static int zfs_ioc_userobjspace_upgrade(zfs_cmd_t *zc);
257 static int zfs_check_settable(const char *name, nvpair_t *property,
258 cred_t *cr);
259 static int zfs_check_clearable(char *dataset, nvlist_t *props,
260 nvlist_t **errors);
261 static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
262 boolean_t *);
263 int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
264 static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
265
266 static void
267 history_str_free(char *buf)
268 {
269 kmem_free(buf, HIS_MAX_RECORD_LEN);
270 }
271
272 static char *
273 history_str_get(zfs_cmd_t *zc)
274 {
275 char *buf;
276
277 if (zc->zc_history == 0)
278 return (NULL);
279
280 buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
281 if (copyinstr((void *)(uintptr_t)zc->zc_history,
282 buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
283 history_str_free(buf);
284 return (NULL);
285 }
286
287 buf[HIS_MAX_RECORD_LEN -1] = '\0';
288
289 return (buf);
290 }
291
292 /*
293 * Check to see if the named dataset is currently defined as bootable
294 */
295 static boolean_t
296 zfs_is_bootfs(const char *name)
297 {
298 objset_t *os;
299
300 if (dmu_objset_hold(name, FTAG, &os) == 0) {
301 boolean_t ret;
302 ret = (dmu_objset_id(os) == spa_bootfs(dmu_objset_spa(os)));
303 dmu_objset_rele(os, FTAG);
304 return (ret);
305 }
306 return (B_FALSE);
307 }
308
309 /*
310 * Return non-zero if the spa version is less than requested version.
311 */
312 static int
313 zfs_earlier_version(const char *name, int version)
314 {
315 spa_t *spa;
316
317 if (spa_open(name, &spa, FTAG) == 0) {
318 if (spa_version(spa) < version) {
319 spa_close(spa, FTAG);
320 return (1);
321 }
322 spa_close(spa, FTAG);
323 }
324 return (0);
325 }
326
327 /*
328 * Return TRUE if the ZPL version is less than requested version.
329 */
330 static boolean_t
331 zpl_earlier_version(const char *name, int version)
332 {
333 objset_t *os;
334 boolean_t rc = B_TRUE;
335
336 if (dmu_objset_hold(name, FTAG, &os) == 0) {
337 uint64_t zplversion;
338
339 if (dmu_objset_type(os) != DMU_OST_ZFS) {
340 dmu_objset_rele(os, FTAG);
341 return (B_TRUE);
342 }
343 /* XXX reading from non-owned objset */
344 if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
345 rc = zplversion < version;
346 dmu_objset_rele(os, FTAG);
347 }
348 return (rc);
349 }
350
351 static void
352 zfs_log_history(zfs_cmd_t *zc)
353 {
354 spa_t *spa;
355 char *buf;
356
357 if ((buf = history_str_get(zc)) == NULL)
358 return;
359
360 if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
361 if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
362 (void) spa_history_log(spa, buf);
363 spa_close(spa, FTAG);
364 }
365 history_str_free(buf);
366 }
367
368 /*
369 * Policy for top-level read operations (list pools). Requires no privileges,
370 * and can be used in the local zone, as there is no associated dataset.
371 */
372 /* ARGSUSED */
373 static int
374 zfs_secpolicy_none(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
375 {
376 return (0);
377 }
378
379 /*
380 * Policy for dataset read operations (list children, get statistics). Requires
381 * no privileges, but must be visible in the local zone.
382 */
383 /* ARGSUSED */
384 static int
385 zfs_secpolicy_read(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
386 {
387 if (INGLOBALZONE(curproc) ||
388 zone_dataset_visible(zc->zc_name, NULL))
389 return (0);
390
391 return (SET_ERROR(ENOENT));
392 }
393
394 static int
395 zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr)
396 {
397 int writable = 1;
398
399 /*
400 * The dataset must be visible by this zone -- check this first
401 * so they don't see EPERM on something they shouldn't know about.
402 */
403 if (!INGLOBALZONE(curproc) &&
404 !zone_dataset_visible(dataset, &writable))
405 return (SET_ERROR(ENOENT));
406
407 if (INGLOBALZONE(curproc)) {
408 /*
409 * If the fs is zoned, only root can access it from the
410 * global zone.
411 */
412 if (secpolicy_zfs(cr) && zoned)
413 return (SET_ERROR(EPERM));
414 } else {
415 /*
416 * If we are in a local zone, the 'zoned' property must be set.
417 */
418 if (!zoned)
419 return (SET_ERROR(EPERM));
420
421 /* must be writable by this zone */
422 if (!writable)
423 return (SET_ERROR(EPERM));
424 }
425 return (0);
426 }
427
428 static int
429 zfs_dozonecheck(const char *dataset, cred_t *cr)
430 {
431 uint64_t zoned;
432
433 if (dsl_prop_get_integer(dataset, "zoned", &zoned, NULL))
434 return (SET_ERROR(ENOENT));
435
436 return (zfs_dozonecheck_impl(dataset, zoned, cr));
437 }
438
439 static int
440 zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
441 {
442 uint64_t zoned;
443
444 if (dsl_prop_get_int_ds(ds, "zoned", &zoned))
445 return (SET_ERROR(ENOENT));
446
447 return (zfs_dozonecheck_impl(dataset, zoned, cr));
448 }
449
450 static int
451 zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
452 const char *perm, cred_t *cr)
453 {
454 int error;
455
456 error = zfs_dozonecheck_ds(name, ds, cr);
457 if (error == 0) {
458 error = secpolicy_zfs(cr);
459 if (error != 0)
460 error = dsl_deleg_access_impl(ds, perm, cr);
461 }
462 return (error);
463 }
464
465 static int
466 zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
467 {
468 int error;
469 dsl_dataset_t *ds;
470 dsl_pool_t *dp;
471
472 /*
473 * First do a quick check for root in the global zone, which
474 * is allowed to do all write_perms. This ensures that zfs_ioc_*
475 * will get to handle nonexistent datasets.
476 */
477 if (INGLOBALZONE(curproc) && secpolicy_zfs(cr) == 0)
478 return (0);
479
480 error = dsl_pool_hold(name, FTAG, &dp);
481 if (error != 0)
482 return (error);
483
484 error = dsl_dataset_hold(dp, name, FTAG, &ds);
485 if (error != 0) {
486 dsl_pool_rele(dp, FTAG);
487 return (error);
488 }
489
490 error = zfs_secpolicy_write_perms_ds(name, ds, perm, cr);
491
492 dsl_dataset_rele(ds, FTAG);
493 dsl_pool_rele(dp, FTAG);
494 return (error);
495 }
496
497 /*
498 * Policy for setting the security label property.
499 *
500 * Returns 0 for success, non-zero for access and other errors.
501 */
502 static int
503 zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
504 {
505 #ifdef HAVE_MLSLABEL
506 char ds_hexsl[MAXNAMELEN];
507 bslabel_t ds_sl, new_sl;
508 boolean_t new_default = FALSE;
509 uint64_t zoned;
510 int needed_priv = -1;
511 int error;
512
513 /* First get the existing dataset label. */
514 error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
515 1, sizeof (ds_hexsl), &ds_hexsl, NULL);
516 if (error != 0)
517 return (SET_ERROR(EPERM));
518
519 if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
520 new_default = TRUE;
521
522 /* The label must be translatable */
523 if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
524 return (SET_ERROR(EINVAL));
525
526 /*
527 * In a non-global zone, disallow attempts to set a label that
528 * doesn't match that of the zone; otherwise no other checks
529 * are needed.
530 */
531 if (!INGLOBALZONE(curproc)) {
532 if (new_default || !blequal(&new_sl, CR_SL(CRED())))
533 return (SET_ERROR(EPERM));
534 return (0);
535 }
536
537 /*
538 * For global-zone datasets (i.e., those whose zoned property is
539 * "off", verify that the specified new label is valid for the
540 * global zone.
541 */
542 if (dsl_prop_get_integer(name,
543 zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
544 return (SET_ERROR(EPERM));
545 if (!zoned) {
546 if (zfs_check_global_label(name, strval) != 0)
547 return (SET_ERROR(EPERM));
548 }
549
550 /*
551 * If the existing dataset label is nondefault, check if the
552 * dataset is mounted (label cannot be changed while mounted).
553 * Get the zfsvfs_t; if there isn't one, then the dataset isn't
554 * mounted (or isn't a dataset, doesn't exist, ...).
555 */
556 if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
557 objset_t *os;
558 static char *setsl_tag = "setsl_tag";
559
560 /*
561 * Try to own the dataset; abort if there is any error,
562 * (e.g., already mounted, in use, or other error).
563 */
564 error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE,
565 setsl_tag, &os);
566 if (error != 0)
567 return (SET_ERROR(EPERM));
568
569 dmu_objset_disown(os, setsl_tag);
570
571 if (new_default) {
572 needed_priv = PRIV_FILE_DOWNGRADE_SL;
573 goto out_check;
574 }
575
576 if (hexstr_to_label(strval, &new_sl) != 0)
577 return (SET_ERROR(EPERM));
578
579 if (blstrictdom(&ds_sl, &new_sl))
580 needed_priv = PRIV_FILE_DOWNGRADE_SL;
581 else if (blstrictdom(&new_sl, &ds_sl))
582 needed_priv = PRIV_FILE_UPGRADE_SL;
583 } else {
584 /* dataset currently has a default label */
585 if (!new_default)
586 needed_priv = PRIV_FILE_UPGRADE_SL;
587 }
588
589 out_check:
590 if (needed_priv != -1)
591 return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
592 return (0);
593 #else
594 return (ENOTSUP);
595 #endif /* HAVE_MLSLABEL */
596 }
597
598 static int
599 zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
600 cred_t *cr)
601 {
602 char *strval;
603
604 /*
605 * Check permissions for special properties.
606 */
607 switch (prop) {
608 default:
609 break;
610 case ZFS_PROP_ZONED:
611 /*
612 * Disallow setting of 'zoned' from within a local zone.
613 */
614 if (!INGLOBALZONE(curproc))
615 return (SET_ERROR(EPERM));
616 break;
617
618 case ZFS_PROP_QUOTA:
619 case ZFS_PROP_FILESYSTEM_LIMIT:
620 case ZFS_PROP_SNAPSHOT_LIMIT:
621 if (!INGLOBALZONE(curproc)) {
622 uint64_t zoned;
623 char setpoint[ZFS_MAX_DATASET_NAME_LEN];
624 /*
625 * Unprivileged users are allowed to modify the
626 * limit on things *under* (ie. contained by)
627 * the thing they own.
628 */
629 if (dsl_prop_get_integer(dsname, "zoned", &zoned,
630 setpoint))
631 return (SET_ERROR(EPERM));
632 if (!zoned || strlen(dsname) <= strlen(setpoint))
633 return (SET_ERROR(EPERM));
634 }
635 break;
636
637 case ZFS_PROP_MLSLABEL:
638 if (!is_system_labeled())
639 return (SET_ERROR(EPERM));
640
641 if (nvpair_value_string(propval, &strval) == 0) {
642 int err;
643
644 err = zfs_set_slabel_policy(dsname, strval, CRED());
645 if (err != 0)
646 return (err);
647 }
648 break;
649 }
650
651 return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
652 }
653
654 /* ARGSUSED */
655 static int
656 zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
657 {
658 int error;
659
660 error = zfs_dozonecheck(zc->zc_name, cr);
661 if (error != 0)
662 return (error);
663
664 /*
665 * permission to set permissions will be evaluated later in
666 * dsl_deleg_can_allow()
667 */
668 return (0);
669 }
670
671 /* ARGSUSED */
672 static int
673 zfs_secpolicy_rollback(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
674 {
675 return (zfs_secpolicy_write_perms(zc->zc_name,
676 ZFS_DELEG_PERM_ROLLBACK, cr));
677 }
678
679 /* ARGSUSED */
680 static int
681 zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
682 {
683 dsl_pool_t *dp;
684 dsl_dataset_t *ds;
685 char *cp;
686 int error;
687
688 /*
689 * Generate the current snapshot name from the given objsetid, then
690 * use that name for the secpolicy/zone checks.
691 */
692 cp = strchr(zc->zc_name, '@');
693 if (cp == NULL)
694 return (SET_ERROR(EINVAL));
695 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
696 if (error != 0)
697 return (error);
698
699 error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
700 if (error != 0) {
701 dsl_pool_rele(dp, FTAG);
702 return (error);
703 }
704
705 dsl_dataset_name(ds, zc->zc_name);
706
707 error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
708 ZFS_DELEG_PERM_SEND, cr);
709 dsl_dataset_rele(ds, FTAG);
710 dsl_pool_rele(dp, FTAG);
711
712 return (error);
713 }
714
715 /* ARGSUSED */
716 static int
717 zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
718 {
719 return (zfs_secpolicy_write_perms(zc->zc_name,
720 ZFS_DELEG_PERM_SEND, cr));
721 }
722
723 #ifdef HAVE_SMB_SHARE
724 /* ARGSUSED */
725 static int
726 zfs_secpolicy_deleg_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
727 {
728 vnode_t *vp;
729 int error;
730
731 if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
732 NO_FOLLOW, NULL, &vp)) != 0)
733 return (error);
734
735 /* Now make sure mntpnt and dataset are ZFS */
736
737 if (vp->v_vfsp->vfs_fstype != zfsfstype ||
738 (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
739 zc->zc_name) != 0)) {
740 VN_RELE(vp);
741 return (SET_ERROR(EPERM));
742 }
743
744 VN_RELE(vp);
745 return (dsl_deleg_access(zc->zc_name,
746 ZFS_DELEG_PERM_SHARE, cr));
747 }
748 #endif /* HAVE_SMB_SHARE */
749
750 int
751 zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
752 {
753 #ifdef HAVE_SMB_SHARE
754 if (!INGLOBALZONE(curproc))
755 return (SET_ERROR(EPERM));
756
757 if (secpolicy_nfs(cr) == 0) {
758 return (0);
759 } else {
760 return (zfs_secpolicy_deleg_share(zc, innvl, cr));
761 }
762 #else
763 return (SET_ERROR(ENOTSUP));
764 #endif /* HAVE_SMB_SHARE */
765 }
766
767 int
768 zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
769 {
770 #ifdef HAVE_SMB_SHARE
771 if (!INGLOBALZONE(curproc))
772 return (SET_ERROR(EPERM));
773
774 if (secpolicy_smb(cr) == 0) {
775 return (0);
776 } else {
777 return (zfs_secpolicy_deleg_share(zc, innvl, cr));
778 }
779 #else
780 return (SET_ERROR(ENOTSUP));
781 #endif /* HAVE_SMB_SHARE */
782 }
783
784 static int
785 zfs_get_parent(const char *datasetname, char *parent, int parentsize)
786 {
787 char *cp;
788
789 /*
790 * Remove the @bla or /bla from the end of the name to get the parent.
791 */
792 (void) strncpy(parent, datasetname, parentsize);
793 cp = strrchr(parent, '@');
794 if (cp != NULL) {
795 cp[0] = '\0';
796 } else {
797 cp = strrchr(parent, '/');
798 if (cp == NULL)
799 return (SET_ERROR(ENOENT));
800 cp[0] = '\0';
801 }
802
803 return (0);
804 }
805
806 int
807 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
808 {
809 int error;
810
811 if ((error = zfs_secpolicy_write_perms(name,
812 ZFS_DELEG_PERM_MOUNT, cr)) != 0)
813 return (error);
814
815 return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
816 }
817
818 /* ARGSUSED */
819 static int
820 zfs_secpolicy_destroy(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
821 {
822 return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
823 }
824
825 /*
826 * Destroying snapshots with delegated permissions requires
827 * descendant mount and destroy permissions.
828 */
829 /* ARGSUSED */
830 static int
831 zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
832 {
833 nvlist_t *snaps;
834 nvpair_t *pair, *nextpair;
835 int error = 0;
836
837 if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
838 return (SET_ERROR(EINVAL));
839 for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
840 pair = nextpair) {
841 nextpair = nvlist_next_nvpair(snaps, pair);
842 error = zfs_secpolicy_destroy_perms(nvpair_name(pair), cr);
843 if (error == ENOENT) {
844 /*
845 * Ignore any snapshots that don't exist (we consider
846 * them "already destroyed"). Remove the name from the
847 * nvl here in case the snapshot is created between
848 * now and when we try to destroy it (in which case
849 * we don't want to destroy it since we haven't
850 * checked for permission).
851 */
852 fnvlist_remove_nvpair(snaps, pair);
853 error = 0;
854 }
855 if (error != 0)
856 break;
857 }
858
859 return (error);
860 }
861
862 int
863 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
864 {
865 char parentname[ZFS_MAX_DATASET_NAME_LEN];
866 int error;
867
868 if ((error = zfs_secpolicy_write_perms(from,
869 ZFS_DELEG_PERM_RENAME, cr)) != 0)
870 return (error);
871
872 if ((error = zfs_secpolicy_write_perms(from,
873 ZFS_DELEG_PERM_MOUNT, cr)) != 0)
874 return (error);
875
876 if ((error = zfs_get_parent(to, parentname,
877 sizeof (parentname))) != 0)
878 return (error);
879
880 if ((error = zfs_secpolicy_write_perms(parentname,
881 ZFS_DELEG_PERM_CREATE, cr)) != 0)
882 return (error);
883
884 if ((error = zfs_secpolicy_write_perms(parentname,
885 ZFS_DELEG_PERM_MOUNT, cr)) != 0)
886 return (error);
887
888 return (error);
889 }
890
891 /* ARGSUSED */
892 static int
893 zfs_secpolicy_rename(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
894 {
895 return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));
896 }
897
898 /* ARGSUSED */
899 static int
900 zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
901 {
902 dsl_pool_t *dp;
903 dsl_dataset_t *clone;
904 int error;
905
906 error = zfs_secpolicy_write_perms(zc->zc_name,
907 ZFS_DELEG_PERM_PROMOTE, cr);
908 if (error != 0)
909 return (error);
910
911 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
912 if (error != 0)
913 return (error);
914
915 error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &clone);
916
917 if (error == 0) {
918 char parentname[ZFS_MAX_DATASET_NAME_LEN];
919 dsl_dataset_t *origin = NULL;
920 dsl_dir_t *dd;
921 dd = clone->ds_dir;
922
923 error = dsl_dataset_hold_obj(dd->dd_pool,
924 dsl_dir_phys(dd)->dd_origin_obj, FTAG, &origin);
925 if (error != 0) {
926 dsl_dataset_rele(clone, FTAG);
927 dsl_pool_rele(dp, FTAG);
928 return (error);
929 }
930
931 error = zfs_secpolicy_write_perms_ds(zc->zc_name, clone,
932 ZFS_DELEG_PERM_MOUNT, cr);
933
934 dsl_dataset_name(origin, parentname);
935 if (error == 0) {
936 error = zfs_secpolicy_write_perms_ds(parentname, origin,
937 ZFS_DELEG_PERM_PROMOTE, cr);
938 }
939 dsl_dataset_rele(clone, FTAG);
940 dsl_dataset_rele(origin, FTAG);
941 }
942 dsl_pool_rele(dp, FTAG);
943 return (error);
944 }
945
946 /* ARGSUSED */
947 static int
948 zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
949 {
950 int error;
951
952 if ((error = zfs_secpolicy_write_perms(zc->zc_name,
953 ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
954 return (error);
955
956 if ((error = zfs_secpolicy_write_perms(zc->zc_name,
957 ZFS_DELEG_PERM_MOUNT, cr)) != 0)
958 return (error);
959
960 return (zfs_secpolicy_write_perms(zc->zc_name,
961 ZFS_DELEG_PERM_CREATE, cr));
962 }
963
964 /* ARGSUSED */
965 static int
966 zfs_secpolicy_recv_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
967 {
968 return (zfs_secpolicy_recv(zc, innvl, cr));
969 }
970
971 int
972 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
973 {
974 return (zfs_secpolicy_write_perms(name,
975 ZFS_DELEG_PERM_SNAPSHOT, cr));
976 }
977
978 /*
979 * Check for permission to create each snapshot in the nvlist.
980 */
981 /* ARGSUSED */
982 static int
983 zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
984 {
985 nvlist_t *snaps;
986 int error = 0;
987 nvpair_t *pair;
988
989 if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
990 return (SET_ERROR(EINVAL));
991 for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
992 pair = nvlist_next_nvpair(snaps, pair)) {
993 char *name = nvpair_name(pair);
994 char *atp = strchr(name, '@');
995
996 if (atp == NULL) {
997 error = SET_ERROR(EINVAL);
998 break;
999 }
1000 *atp = '\0';
1001 error = zfs_secpolicy_snapshot_perms(name, cr);
1002 *atp = '@';
1003 if (error != 0)
1004 break;
1005 }
1006 return (error);
1007 }
1008
1009 /*
1010 * Check for permission to create each snapshot in the nvlist.
1011 */
1012 /* ARGSUSED */
1013 static int
1014 zfs_secpolicy_bookmark(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1015 {
1016 int error = 0;
1017 nvpair_t *pair;
1018
1019 for (pair = nvlist_next_nvpair(innvl, NULL);
1020 pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
1021 char *name = nvpair_name(pair);
1022 char *hashp = strchr(name, '#');
1023
1024 if (hashp == NULL) {
1025 error = SET_ERROR(EINVAL);
1026 break;
1027 }
1028 *hashp = '\0';
1029 error = zfs_secpolicy_write_perms(name,
1030 ZFS_DELEG_PERM_BOOKMARK, cr);
1031 *hashp = '#';
1032 if (error != 0)
1033 break;
1034 }
1035 return (error);
1036 }
1037
1038 /* ARGSUSED */
1039 static int
1040 zfs_secpolicy_destroy_bookmarks(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1041 {
1042 nvpair_t *pair, *nextpair;
1043 int error = 0;
1044
1045 for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1046 pair = nextpair) {
1047 char *name = nvpair_name(pair);
1048 char *hashp = strchr(name, '#');
1049 nextpair = nvlist_next_nvpair(innvl, pair);
1050
1051 if (hashp == NULL) {
1052 error = SET_ERROR(EINVAL);
1053 break;
1054 }
1055
1056 *hashp = '\0';
1057 error = zfs_secpolicy_write_perms(name,
1058 ZFS_DELEG_PERM_DESTROY, cr);
1059 *hashp = '#';
1060 if (error == ENOENT) {
1061 /*
1062 * Ignore any filesystems that don't exist (we consider
1063 * their bookmarks "already destroyed"). Remove
1064 * the name from the nvl here in case the filesystem
1065 * is created between now and when we try to destroy
1066 * the bookmark (in which case we don't want to
1067 * destroy it since we haven't checked for permission).
1068 */
1069 fnvlist_remove_nvpair(innvl, pair);
1070 error = 0;
1071 }
1072 if (error != 0)
1073 break;
1074 }
1075
1076 return (error);
1077 }
1078
1079 /* ARGSUSED */
1080 static int
1081 zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1082 {
1083 /*
1084 * Even root must have a proper TSD so that we know what pool
1085 * to log to.
1086 */
1087 if (tsd_get(zfs_allow_log_key) == NULL)
1088 return (SET_ERROR(EPERM));
1089 return (0);
1090 }
1091
1092 static int
1093 zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1094 {
1095 char parentname[ZFS_MAX_DATASET_NAME_LEN];
1096 int error;
1097 char *origin;
1098
1099 if ((error = zfs_get_parent(zc->zc_name, parentname,
1100 sizeof (parentname))) != 0)
1101 return (error);
1102
1103 if (nvlist_lookup_string(innvl, "origin", &origin) == 0 &&
1104 (error = zfs_secpolicy_write_perms(origin,
1105 ZFS_DELEG_PERM_CLONE, cr)) != 0)
1106 return (error);
1107
1108 if ((error = zfs_secpolicy_write_perms(parentname,
1109 ZFS_DELEG_PERM_CREATE, cr)) != 0)
1110 return (error);
1111
1112 return (zfs_secpolicy_write_perms(parentname,
1113 ZFS_DELEG_PERM_MOUNT, cr));
1114 }
1115
1116 /*
1117 * Policy for pool operations - create/destroy pools, add vdevs, etc. Requires
1118 * SYS_CONFIG privilege, which is not available in a local zone.
1119 */
1120 /* ARGSUSED */
1121 static int
1122 zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1123 {
1124 if (secpolicy_sys_config(cr, B_FALSE) != 0)
1125 return (SET_ERROR(EPERM));
1126
1127 return (0);
1128 }
1129
1130 /*
1131 * Policy for object to name lookups.
1132 */
1133 /* ARGSUSED */
1134 static int
1135 zfs_secpolicy_diff(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1136 {
1137 int error;
1138
1139 if ((error = secpolicy_sys_config(cr, B_FALSE)) == 0)
1140 return (0);
1141
1142 error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr);
1143 return (error);
1144 }
1145
1146 /*
1147 * Policy for fault injection. Requires all privileges.
1148 */
1149 /* ARGSUSED */
1150 static int
1151 zfs_secpolicy_inject(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1152 {
1153 return (secpolicy_zinject(cr));
1154 }
1155
1156 /* ARGSUSED */
1157 static int
1158 zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1159 {
1160 zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
1161
1162 if (prop == ZPROP_INVAL) {
1163 if (!zfs_prop_user(zc->zc_value))
1164 return (SET_ERROR(EINVAL));
1165 return (zfs_secpolicy_write_perms(zc->zc_name,
1166 ZFS_DELEG_PERM_USERPROP, cr));
1167 } else {
1168 return (zfs_secpolicy_setprop(zc->zc_name, prop,
1169 NULL, cr));
1170 }
1171 }
1172
1173 static int
1174 zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1175 {
1176 int err = zfs_secpolicy_read(zc, innvl, cr);
1177 if (err)
1178 return (err);
1179
1180 if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1181 return (SET_ERROR(EINVAL));
1182
1183 if (zc->zc_value[0] == 0) {
1184 /*
1185 * They are asking about a posix uid/gid. If it's
1186 * themself, allow it.
1187 */
1188 if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
1189 zc->zc_objset_type == ZFS_PROP_USERQUOTA ||
1190 zc->zc_objset_type == ZFS_PROP_USEROBJUSED ||
1191 zc->zc_objset_type == ZFS_PROP_USEROBJQUOTA) {
1192 if (zc->zc_guid == crgetuid(cr))
1193 return (0);
1194 } else {
1195 if (groupmember(zc->zc_guid, cr))
1196 return (0);
1197 }
1198 }
1199
1200 return (zfs_secpolicy_write_perms(zc->zc_name,
1201 userquota_perms[zc->zc_objset_type], cr));
1202 }
1203
1204 static int
1205 zfs_secpolicy_userspace_many(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1206 {
1207 int err = zfs_secpolicy_read(zc, innvl, cr);
1208 if (err)
1209 return (err);
1210
1211 if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1212 return (SET_ERROR(EINVAL));
1213
1214 return (zfs_secpolicy_write_perms(zc->zc_name,
1215 userquota_perms[zc->zc_objset_type], cr));
1216 }
1217
1218 /* ARGSUSED */
1219 static int
1220 zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1221 {
1222 return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
1223 NULL, cr));
1224 }
1225
1226 /* ARGSUSED */
1227 static int
1228 zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1229 {
1230 nvpair_t *pair;
1231 nvlist_t *holds;
1232 int error;
1233
1234 error = nvlist_lookup_nvlist(innvl, "holds", &holds);
1235 if (error != 0)
1236 return (SET_ERROR(EINVAL));
1237
1238 for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
1239 pair = nvlist_next_nvpair(holds, pair)) {
1240 char fsname[ZFS_MAX_DATASET_NAME_LEN];
1241 error = dmu_fsname(nvpair_name(pair), fsname);
1242 if (error != 0)
1243 return (error);
1244 error = zfs_secpolicy_write_perms(fsname,
1245 ZFS_DELEG_PERM_HOLD, cr);
1246 if (error != 0)
1247 return (error);
1248 }
1249 return (0);
1250 }
1251
1252 /* ARGSUSED */
1253 static int
1254 zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1255 {
1256 nvpair_t *pair;
1257 int error;
1258
1259 for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1260 pair = nvlist_next_nvpair(innvl, pair)) {
1261 char fsname[ZFS_MAX_DATASET_NAME_LEN];
1262 error = dmu_fsname(nvpair_name(pair), fsname);
1263 if (error != 0)
1264 return (error);
1265 error = zfs_secpolicy_write_perms(fsname,
1266 ZFS_DELEG_PERM_RELEASE, cr);
1267 if (error != 0)
1268 return (error);
1269 }
1270 return (0);
1271 }
1272
1273 /*
1274 * Policy for allowing temporary snapshots to be taken or released
1275 */
1276 static int
1277 zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1278 {
1279 /*
1280 * A temporary snapshot is the same as a snapshot,
1281 * hold, destroy and release all rolled into one.
1282 * Delegated diff alone is sufficient that we allow this.
1283 */
1284 int error;
1285
1286 if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1287 ZFS_DELEG_PERM_DIFF, cr)) == 0)
1288 return (0);
1289
1290 error = zfs_secpolicy_snapshot_perms(zc->zc_name, cr);
1291 if (error == 0)
1292 error = zfs_secpolicy_hold(zc, innvl, cr);
1293 if (error == 0)
1294 error = zfs_secpolicy_release(zc, innvl, cr);
1295 if (error == 0)
1296 error = zfs_secpolicy_destroy(zc, innvl, cr);
1297 return (error);
1298 }
1299
1300 /*
1301 * Returns the nvlist as specified by the user in the zfs_cmd_t.
1302 */
1303 static int
1304 get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
1305 {
1306 char *packed;
1307 int error;
1308 nvlist_t *list = NULL;
1309
1310 /*
1311 * Read in and unpack the user-supplied nvlist.
1312 */
1313 if (size == 0)
1314 return (SET_ERROR(EINVAL));
1315
1316 packed = vmem_alloc(size, KM_SLEEP);
1317
1318 if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
1319 iflag)) != 0) {
1320 vmem_free(packed, size);
1321 return (SET_ERROR(EFAULT));
1322 }
1323
1324 if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
1325 vmem_free(packed, size);
1326 return (error);
1327 }
1328
1329 vmem_free(packed, size);
1330
1331 *nvp = list;
1332 return (0);
1333 }
1334
1335 /*
1336 * Reduce the size of this nvlist until it can be serialized in 'max' bytes.
1337 * Entries will be removed from the end of the nvlist, and one int32 entry
1338 * named "N_MORE_ERRORS" will be added indicating how many entries were
1339 * removed.
1340 */
1341 static int
1342 nvlist_smush(nvlist_t *errors, size_t max)
1343 {
1344 size_t size;
1345
1346 size = fnvlist_size(errors);
1347
1348 if (size > max) {
1349 nvpair_t *more_errors;
1350 int n = 0;
1351
1352 if (max < 1024)
1353 return (SET_ERROR(ENOMEM));
1354
1355 fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, 0);
1356 more_errors = nvlist_prev_nvpair(errors, NULL);
1357
1358 do {
1359 nvpair_t *pair = nvlist_prev_nvpair(errors,
1360 more_errors);
1361 fnvlist_remove_nvpair(errors, pair);
1362 n++;
1363 size = fnvlist_size(errors);
1364 } while (size > max);
1365
1366 fnvlist_remove_nvpair(errors, more_errors);
1367 fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, n);
1368 ASSERT3U(fnvlist_size(errors), <=, max);
1369 }
1370
1371 return (0);
1372 }
1373
1374 static int
1375 put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
1376 {
1377 char *packed = NULL;
1378 int error = 0;
1379 size_t size;
1380
1381 size = fnvlist_size(nvl);
1382
1383 if (size > zc->zc_nvlist_dst_size) {
1384 error = SET_ERROR(ENOMEM);
1385 } else {
1386 packed = fnvlist_pack(nvl, &size);
1387 if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
1388 size, zc->zc_iflags) != 0)
1389 error = SET_ERROR(EFAULT);
1390 fnvlist_pack_free(packed, size);
1391 }
1392
1393 zc->zc_nvlist_dst_size = size;
1394 zc->zc_nvlist_dst_filled = B_TRUE;
1395 return (error);
1396 }
1397
1398 static int
1399 getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
1400 {
1401 objset_t *os;
1402 int error;
1403
1404 error = dmu_objset_hold(dsname, FTAG, &os);
1405 if (error != 0)
1406 return (error);
1407 if (dmu_objset_type(os) != DMU_OST_ZFS) {
1408 dmu_objset_rele(os, FTAG);
1409 return (SET_ERROR(EINVAL));
1410 }
1411
1412 mutex_enter(&os->os_user_ptr_lock);
1413 *zfvp = dmu_objset_get_user(os);
1414 /* bump s_active only when non-zero to prevent umount race */
1415 if (*zfvp == NULL || (*zfvp)->z_sb == NULL ||
1416 !atomic_inc_not_zero(&((*zfvp)->z_sb->s_active))) {
1417 error = SET_ERROR(ESRCH);
1418 }
1419 mutex_exit(&os->os_user_ptr_lock);
1420 dmu_objset_rele(os, FTAG);
1421 return (error);
1422 }
1423
1424 /*
1425 * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
1426 * case its z_sb will be NULL, and it will be opened as the owner.
1427 * If 'writer' is set, the z_teardown_lock will be held for RW_WRITER,
1428 * which prevents all inode ops from running.
1429 */
1430 static int
1431 zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer)
1432 {
1433 int error = 0;
1434
1435 if (getzfsvfs(name, zfvp) != 0)
1436 error = zfsvfs_create(name, zfvp);
1437 if (error == 0) {
1438 rrm_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
1439 RW_READER, tag);
1440 if ((*zfvp)->z_unmounted) {
1441 /*
1442 * XXX we could probably try again, since the unmounting
1443 * thread should be just about to disassociate the
1444 * objset from the zfsvfs.
1445 */
1446 rrm_exit(&(*zfvp)->z_teardown_lock, tag);
1447 return (SET_ERROR(EBUSY));
1448 }
1449 }
1450 return (error);
1451 }
1452
1453 static void
1454 zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
1455 {
1456 rrm_exit(&zfsvfs->z_teardown_lock, tag);
1457
1458 if (zfsvfs->z_sb) {
1459 deactivate_super(zfsvfs->z_sb);
1460 } else {
1461 dmu_objset_disown(zfsvfs->z_os, zfsvfs);
1462 zfsvfs_free(zfsvfs);
1463 }
1464 }
1465
1466 static int
1467 zfs_ioc_pool_create(zfs_cmd_t *zc)
1468 {
1469 int error;
1470 nvlist_t *config, *props = NULL;
1471 nvlist_t *rootprops = NULL;
1472 nvlist_t *zplprops = NULL;
1473
1474 if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1475 zc->zc_iflags, &config)))
1476 return (error);
1477
1478 if (zc->zc_nvlist_src_size != 0 && (error =
1479 get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1480 zc->zc_iflags, &props))) {
1481 nvlist_free(config);
1482 return (error);
1483 }
1484
1485 if (props) {
1486 nvlist_t *nvl = NULL;
1487 uint64_t version = SPA_VERSION;
1488
1489 (void) nvlist_lookup_uint64(props,
1490 zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
1491 if (!SPA_VERSION_IS_SUPPORTED(version)) {
1492 error = SET_ERROR(EINVAL);
1493 goto pool_props_bad;
1494 }
1495 (void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
1496 if (nvl) {
1497 error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
1498 if (error != 0) {
1499 nvlist_free(config);
1500 nvlist_free(props);
1501 return (error);
1502 }
1503 (void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
1504 }
1505 VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1506 error = zfs_fill_zplprops_root(version, rootprops,
1507 zplprops, NULL);
1508 if (error != 0)
1509 goto pool_props_bad;
1510 }
1511
1512 error = spa_create(zc->zc_name, config, props, zplprops);
1513
1514 /*
1515 * Set the remaining root properties
1516 */
1517 if (!error && (error = zfs_set_prop_nvlist(zc->zc_name,
1518 ZPROP_SRC_LOCAL, rootprops, NULL)) != 0)
1519 (void) spa_destroy(zc->zc_name);
1520
1521 pool_props_bad:
1522 nvlist_free(rootprops);
1523 nvlist_free(zplprops);
1524 nvlist_free(config);
1525 nvlist_free(props);
1526
1527 return (error);
1528 }
1529
1530 static int
1531 zfs_ioc_pool_destroy(zfs_cmd_t *zc)
1532 {
1533 int error;
1534 zfs_log_history(zc);
1535 error = spa_destroy(zc->zc_name);
1536
1537 return (error);
1538 }
1539
1540 static int
1541 zfs_ioc_pool_import(zfs_cmd_t *zc)
1542 {
1543 nvlist_t *config, *props = NULL;
1544 uint64_t guid;
1545 int error;
1546
1547 if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1548 zc->zc_iflags, &config)) != 0)
1549 return (error);
1550
1551 if (zc->zc_nvlist_src_size != 0 && (error =
1552 get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1553 zc->zc_iflags, &props))) {
1554 nvlist_free(config);
1555 return (error);
1556 }
1557
1558 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
1559 guid != zc->zc_guid)
1560 error = SET_ERROR(EINVAL);
1561 else
1562 error = spa_import(zc->zc_name, config, props, zc->zc_cookie);
1563
1564 if (zc->zc_nvlist_dst != 0) {
1565 int err;
1566
1567 if ((err = put_nvlist(zc, config)) != 0)
1568 error = err;
1569 }
1570
1571 nvlist_free(config);
1572 nvlist_free(props);
1573
1574 return (error);
1575 }
1576
1577 static int
1578 zfs_ioc_pool_export(zfs_cmd_t *zc)
1579 {
1580 int error;
1581 boolean_t force = (boolean_t)zc->zc_cookie;
1582 boolean_t hardforce = (boolean_t)zc->zc_guid;
1583
1584 zfs_log_history(zc);
1585 error = spa_export(zc->zc_name, NULL, force, hardforce);
1586
1587 return (error);
1588 }
1589
1590 static int
1591 zfs_ioc_pool_configs(zfs_cmd_t *zc)
1592 {
1593 nvlist_t *configs;
1594 int error;
1595
1596 if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
1597 return (SET_ERROR(EEXIST));
1598
1599 error = put_nvlist(zc, configs);
1600
1601 nvlist_free(configs);
1602
1603 return (error);
1604 }
1605
1606 /*
1607 * inputs:
1608 * zc_name name of the pool
1609 *
1610 * outputs:
1611 * zc_cookie real errno
1612 * zc_nvlist_dst config nvlist
1613 * zc_nvlist_dst_size size of config nvlist
1614 */
1615 static int
1616 zfs_ioc_pool_stats(zfs_cmd_t *zc)
1617 {
1618 nvlist_t *config;
1619 int error;
1620 int ret = 0;
1621
1622 error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
1623 sizeof (zc->zc_value));
1624
1625 if (config != NULL) {
1626 ret = put_nvlist(zc, config);
1627 nvlist_free(config);
1628
1629 /*
1630 * The config may be present even if 'error' is non-zero.
1631 * In this case we return success, and preserve the real errno
1632 * in 'zc_cookie'.
1633 */
1634 zc->zc_cookie = error;
1635 } else {
1636 ret = error;
1637 }
1638
1639 return (ret);
1640 }
1641
1642 /*
1643 * Try to import the given pool, returning pool stats as appropriate so that
1644 * user land knows which devices are available and overall pool health.
1645 */
1646 static int
1647 zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
1648 {
1649 nvlist_t *tryconfig, *config;
1650 int error;
1651
1652 if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1653 zc->zc_iflags, &tryconfig)) != 0)
1654 return (error);
1655
1656 config = spa_tryimport(tryconfig);
1657
1658 nvlist_free(tryconfig);
1659
1660 if (config == NULL)
1661 return (SET_ERROR(EINVAL));
1662
1663 error = put_nvlist(zc, config);
1664 nvlist_free(config);
1665
1666 return (error);
1667 }
1668
1669 /*
1670 * inputs:
1671 * zc_name name of the pool
1672 * zc_cookie scan func (pool_scan_func_t)
1673 */
1674 static int
1675 zfs_ioc_pool_scan(zfs_cmd_t *zc)
1676 {
1677 spa_t *spa;
1678 int error;
1679
1680 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1681 return (error);
1682
1683 if (zc->zc_cookie == POOL_SCAN_NONE)
1684 error = spa_scan_stop(spa);
1685 else
1686 error = spa_scan(spa, zc->zc_cookie);
1687
1688 spa_close(spa, FTAG);
1689
1690 return (error);
1691 }
1692
1693 static int
1694 zfs_ioc_pool_freeze(zfs_cmd_t *zc)
1695 {
1696 spa_t *spa;
1697 int error;
1698
1699 error = spa_open(zc->zc_name, &spa, FTAG);
1700 if (error == 0) {
1701 spa_freeze(spa);
1702 spa_close(spa, FTAG);
1703 }
1704 return (error);
1705 }
1706
1707 static int
1708 zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
1709 {
1710 spa_t *spa;
1711 int error;
1712
1713 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1714 return (error);
1715
1716 if (zc->zc_cookie < spa_version(spa) ||
1717 !SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) {
1718 spa_close(spa, FTAG);
1719 return (SET_ERROR(EINVAL));
1720 }
1721
1722 spa_upgrade(spa, zc->zc_cookie);
1723 spa_close(spa, FTAG);
1724
1725 return (error);
1726 }
1727
1728 static int
1729 zfs_ioc_pool_get_history(zfs_cmd_t *zc)
1730 {
1731 spa_t *spa;
1732 char *hist_buf;
1733 uint64_t size;
1734 int error;
1735
1736 if ((size = zc->zc_history_len) == 0)
1737 return (SET_ERROR(EINVAL));
1738
1739 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1740 return (error);
1741
1742 if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
1743 spa_close(spa, FTAG);
1744 return (SET_ERROR(ENOTSUP));
1745 }
1746
1747 hist_buf = vmem_alloc(size, KM_SLEEP);
1748 if ((error = spa_history_get(spa, &zc->zc_history_offset,
1749 &zc->zc_history_len, hist_buf)) == 0) {
1750 error = ddi_copyout(hist_buf,
1751 (void *)(uintptr_t)zc->zc_history,
1752 zc->zc_history_len, zc->zc_iflags);
1753 }
1754
1755 spa_close(spa, FTAG);
1756 vmem_free(hist_buf, size);
1757 return (error);
1758 }
1759
1760 static int
1761 zfs_ioc_pool_reguid(zfs_cmd_t *zc)
1762 {
1763 spa_t *spa;
1764 int error;
1765
1766 error = spa_open(zc->zc_name, &spa, FTAG);
1767 if (error == 0) {
1768 error = spa_change_guid(spa);
1769 spa_close(spa, FTAG);
1770 }
1771 return (error);
1772 }
1773
1774 static int
1775 zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
1776 {
1777 return (dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value));
1778 }
1779
1780 /*
1781 * inputs:
1782 * zc_name name of filesystem
1783 * zc_obj object to find
1784 *
1785 * outputs:
1786 * zc_value name of object
1787 */
1788 static int
1789 zfs_ioc_obj_to_path(zfs_cmd_t *zc)
1790 {
1791 objset_t *os;
1792 int error;
1793
1794 /* XXX reading from objset not owned */
1795 if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1796 return (error);
1797 if (dmu_objset_type(os) != DMU_OST_ZFS) {
1798 dmu_objset_rele(os, FTAG);
1799 return (SET_ERROR(EINVAL));
1800 }
1801 error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
1802 sizeof (zc->zc_value));
1803 dmu_objset_rele(os, FTAG);
1804
1805 return (error);
1806 }
1807
1808 /*
1809 * inputs:
1810 * zc_name name of filesystem
1811 * zc_obj object to find
1812 *
1813 * outputs:
1814 * zc_stat stats on object
1815 * zc_value path to object
1816 */
1817 static int
1818 zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
1819 {
1820 objset_t *os;
1821 int error;
1822
1823 /* XXX reading from objset not owned */
1824 if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1825 return (error);
1826 if (dmu_objset_type(os) != DMU_OST_ZFS) {
1827 dmu_objset_rele(os, FTAG);
1828 return (SET_ERROR(EINVAL));
1829 }
1830 error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
1831 sizeof (zc->zc_value));
1832 dmu_objset_rele(os, FTAG);
1833
1834 return (error);
1835 }
1836
1837 static int
1838 zfs_ioc_vdev_add(zfs_cmd_t *zc)
1839 {
1840 spa_t *spa;
1841 int error;
1842 nvlist_t *config;
1843
1844 error = spa_open(zc->zc_name, &spa, FTAG);
1845 if (error != 0)
1846 return (error);
1847
1848 error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1849 zc->zc_iflags, &config);
1850 if (error == 0) {
1851 error = spa_vdev_add(spa, config);
1852 nvlist_free(config);
1853 }
1854 spa_close(spa, FTAG);
1855 return (error);
1856 }
1857
1858 /*
1859 * inputs:
1860 * zc_name name of the pool
1861 * zc_nvlist_conf nvlist of devices to remove
1862 * zc_cookie to stop the remove?
1863 */
1864 static int
1865 zfs_ioc_vdev_remove(zfs_cmd_t *zc)
1866 {
1867 spa_t *spa;
1868 int error;
1869
1870 error = spa_open(zc->zc_name, &spa, FTAG);
1871 if (error != 0)
1872 return (error);
1873 error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
1874 spa_close(spa, FTAG);
1875 return (error);
1876 }
1877
1878 static int
1879 zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
1880 {
1881 spa_t *spa;
1882 int error;
1883 vdev_state_t newstate = VDEV_STATE_UNKNOWN;
1884
1885 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1886 return (error);
1887 switch (zc->zc_cookie) {
1888 case VDEV_STATE_ONLINE:
1889 error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
1890 break;
1891
1892 case VDEV_STATE_OFFLINE:
1893 error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
1894 break;
1895
1896 case VDEV_STATE_FAULTED:
1897 if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1898 zc->zc_obj != VDEV_AUX_EXTERNAL)
1899 zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1900
1901 error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
1902 break;
1903
1904 case VDEV_STATE_DEGRADED:
1905 if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1906 zc->zc_obj != VDEV_AUX_EXTERNAL)
1907 zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1908
1909 error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
1910 break;
1911
1912 default:
1913 error = SET_ERROR(EINVAL);
1914 }
1915 zc->zc_cookie = newstate;
1916 spa_close(spa, FTAG);
1917 return (error);
1918 }
1919
1920 static int
1921 zfs_ioc_vdev_attach(zfs_cmd_t *zc)
1922 {
1923 spa_t *spa;
1924 int replacing = zc->zc_cookie;
1925 nvlist_t *config;
1926 int error;
1927
1928 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1929 return (error);
1930
1931 if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1932 zc->zc_iflags, &config)) == 0) {
1933 error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
1934 nvlist_free(config);
1935 }
1936
1937 spa_close(spa, FTAG);
1938 return (error);
1939 }
1940
1941 static int
1942 zfs_ioc_vdev_detach(zfs_cmd_t *zc)
1943 {
1944 spa_t *spa;
1945 int error;
1946
1947 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1948 return (error);
1949
1950 error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
1951
1952 spa_close(spa, FTAG);
1953 return (error);
1954 }
1955
1956 static int
1957 zfs_ioc_vdev_split(zfs_cmd_t *zc)
1958 {
1959 spa_t *spa;
1960 nvlist_t *config, *props = NULL;
1961 int error;
1962 boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
1963
1964 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1965 return (error);
1966
1967 if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1968 zc->zc_iflags, &config))) {
1969 spa_close(spa, FTAG);
1970 return (error);
1971 }
1972
1973 if (zc->zc_nvlist_src_size != 0 && (error =
1974 get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1975 zc->zc_iflags, &props))) {
1976 spa_close(spa, FTAG);
1977 nvlist_free(config);
1978 return (error);
1979 }
1980
1981 error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
1982
1983 spa_close(spa, FTAG);
1984
1985 nvlist_free(config);
1986 nvlist_free(props);
1987
1988 return (error);
1989 }
1990
1991 static int
1992 zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
1993 {
1994 spa_t *spa;
1995 char *path = zc->zc_value;
1996 uint64_t guid = zc->zc_guid;
1997 int error;
1998
1999 error = spa_open(zc->zc_name, &spa, FTAG);
2000 if (error != 0)
2001 return (error);
2002
2003 error = spa_vdev_setpath(spa, guid, path);
2004 spa_close(spa, FTAG);
2005 return (error);
2006 }
2007
2008 static int
2009 zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
2010 {
2011 spa_t *spa;
2012 char *fru = zc->zc_value;
2013 uint64_t guid = zc->zc_guid;
2014 int error;
2015
2016 error = spa_open(zc->zc_name, &spa, FTAG);
2017 if (error != 0)
2018 return (error);
2019
2020 error = spa_vdev_setfru(spa, guid, fru);
2021 spa_close(spa, FTAG);
2022 return (error);
2023 }
2024
2025 static int
2026 zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
2027 {
2028 int error = 0;
2029 nvlist_t *nv;
2030
2031 dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2032
2033 if (zc->zc_nvlist_dst != 0 &&
2034 (error = dsl_prop_get_all(os, &nv)) == 0) {
2035 dmu_objset_stats(os, nv);
2036 /*
2037 * NB: zvol_get_stats() will read the objset contents,
2038 * which we aren't supposed to do with a
2039 * DS_MODE_USER hold, because it could be
2040 * inconsistent. So this is a bit of a workaround...
2041 * XXX reading with out owning
2042 */
2043 if (!zc->zc_objset_stats.dds_inconsistent &&
2044 dmu_objset_type(os) == DMU_OST_ZVOL) {
2045 error = zvol_get_stats(os, nv);
2046 if (error == EIO) {
2047 nvlist_free(nv);
2048 return (error);
2049 }
2050 VERIFY0(error);
2051 }
2052 if (error == 0)
2053 error = put_nvlist(zc, nv);
2054 nvlist_free(nv);
2055 }
2056
2057 return (error);
2058 }
2059
2060 /*
2061 * inputs:
2062 * zc_name name of filesystem
2063 * zc_nvlist_dst_size size of buffer for property nvlist
2064 *
2065 * outputs:
2066 * zc_objset_stats stats
2067 * zc_nvlist_dst property nvlist
2068 * zc_nvlist_dst_size size of property nvlist
2069 */
2070 static int
2071 zfs_ioc_objset_stats(zfs_cmd_t *zc)
2072 {
2073 objset_t *os;
2074 int error;
2075
2076 error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2077 if (error == 0) {
2078 error = zfs_ioc_objset_stats_impl(zc, os);
2079 dmu_objset_rele(os, FTAG);
2080 }
2081
2082 return (error);
2083 }
2084
2085 /*
2086 * inputs:
2087 * zc_name name of filesystem
2088 * zc_nvlist_dst_size size of buffer for property nvlist
2089 *
2090 * outputs:
2091 * zc_nvlist_dst received property nvlist
2092 * zc_nvlist_dst_size size of received property nvlist
2093 *
2094 * Gets received properties (distinct from local properties on or after
2095 * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
2096 * local property values.
2097 */
2098 static int
2099 zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
2100 {
2101 int error = 0;
2102 nvlist_t *nv;
2103
2104 /*
2105 * Without this check, we would return local property values if the
2106 * caller has not already received properties on or after
2107 * SPA_VERSION_RECVD_PROPS.
2108 */
2109 if (!dsl_prop_get_hasrecvd(zc->zc_name))
2110 return (SET_ERROR(ENOTSUP));
2111
2112 if (zc->zc_nvlist_dst != 0 &&
2113 (error = dsl_prop_get_received(zc->zc_name, &nv)) == 0) {
2114 error = put_nvlist(zc, nv);
2115 nvlist_free(nv);
2116 }
2117
2118 return (error);
2119 }
2120
2121 static int
2122 nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
2123 {
2124 uint64_t value;
2125 int error;
2126
2127 /*
2128 * zfs_get_zplprop() will either find a value or give us
2129 * the default value (if there is one).
2130 */
2131 if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
2132 return (error);
2133 VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
2134 return (0);
2135 }
2136
2137 /*
2138 * inputs:
2139 * zc_name name of filesystem
2140 * zc_nvlist_dst_size size of buffer for zpl property nvlist
2141 *
2142 * outputs:
2143 * zc_nvlist_dst zpl property nvlist
2144 * zc_nvlist_dst_size size of zpl property nvlist
2145 */
2146 static int
2147 zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
2148 {
2149 objset_t *os;
2150 int err;
2151
2152 /* XXX reading without owning */
2153 if ((err = dmu_objset_hold(zc->zc_name, FTAG, &os)))
2154 return (err);
2155
2156 dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2157
2158 /*
2159 * NB: nvl_add_zplprop() will read the objset contents,
2160 * which we aren't supposed to do with a DS_MODE_USER
2161 * hold, because it could be inconsistent.
2162 */
2163 if (zc->zc_nvlist_dst != 0 &&
2164 !zc->zc_objset_stats.dds_inconsistent &&
2165 dmu_objset_type(os) == DMU_OST_ZFS) {
2166 nvlist_t *nv;
2167
2168 VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2169 if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
2170 (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
2171 (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
2172 (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
2173 err = put_nvlist(zc, nv);
2174 nvlist_free(nv);
2175 } else {
2176 err = SET_ERROR(ENOENT);
2177 }
2178 dmu_objset_rele(os, FTAG);
2179 return (err);
2180 }
2181
2182 boolean_t
2183 dataset_name_hidden(const char *name)
2184 {
2185 /*
2186 * Skip over datasets that are not visible in this zone,
2187 * internal datasets (which have a $ in their name), and
2188 * temporary datasets (which have a % in their name).
2189 */
2190 if (strchr(name, '$') != NULL)
2191 return (B_TRUE);
2192 if (strchr(name, '%') != NULL)
2193 return (B_TRUE);
2194 if (!INGLOBALZONE(curproc) && !zone_dataset_visible(name, NULL))
2195 return (B_TRUE);
2196 return (B_FALSE);
2197 }
2198
2199 /*
2200 * inputs:
2201 * zc_name name of filesystem
2202 * zc_cookie zap cursor
2203 * zc_nvlist_dst_size size of buffer for property nvlist
2204 *
2205 * outputs:
2206 * zc_name name of next filesystem
2207 * zc_cookie zap cursor
2208 * zc_objset_stats stats
2209 * zc_nvlist_dst property nvlist
2210 * zc_nvlist_dst_size size of property nvlist
2211 */
2212 static int
2213 zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
2214 {
2215 objset_t *os;
2216 int error;
2217 char *p;
2218 size_t orig_len = strlen(zc->zc_name);
2219
2220 top:
2221 if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os))) {
2222 if (error == ENOENT)
2223 error = SET_ERROR(ESRCH);
2224 return (error);
2225 }
2226
2227 p = strrchr(zc->zc_name, '/');
2228 if (p == NULL || p[1] != '\0')
2229 (void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
2230 p = zc->zc_name + strlen(zc->zc_name);
2231
2232 do {
2233 error = dmu_dir_list_next(os,
2234 sizeof (zc->zc_name) - (p - zc->zc_name), p,
2235 NULL, &zc->zc_cookie);
2236 if (error == ENOENT)
2237 error = SET_ERROR(ESRCH);
2238 } while (error == 0 && dataset_name_hidden(zc->zc_name));
2239 dmu_objset_rele(os, FTAG);
2240
2241 /*
2242 * If it's an internal dataset (ie. with a '$' in its name),
2243 * don't try to get stats for it, otherwise we'll return ENOENT.
2244 */
2245 if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
2246 error = zfs_ioc_objset_stats(zc); /* fill in the stats */
2247 if (error == ENOENT) {
2248 /* We lost a race with destroy, get the next one. */
2249 zc->zc_name[orig_len] = '\0';
2250 goto top;
2251 }
2252 }
2253 return (error);
2254 }
2255
2256 /*
2257 * inputs:
2258 * zc_name name of filesystem
2259 * zc_cookie zap cursor
2260 * zc_nvlist_dst_size size of buffer for property nvlist
2261 *
2262 * outputs:
2263 * zc_name name of next snapshot
2264 * zc_objset_stats stats
2265 * zc_nvlist_dst property nvlist
2266 * zc_nvlist_dst_size size of property nvlist
2267 */
2268 static int
2269 zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
2270 {
2271 objset_t *os;
2272 int error;
2273
2274 error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2275 if (error != 0) {
2276 return (error == ENOENT ? ESRCH : error);
2277 }
2278
2279 /*
2280 * A dataset name of maximum length cannot have any snapshots,
2281 * so exit immediately.
2282 */
2283 if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >=
2284 ZFS_MAX_DATASET_NAME_LEN) {
2285 dmu_objset_rele(os, FTAG);
2286 return (SET_ERROR(ESRCH));
2287 }
2288
2289 error = dmu_snapshot_list_next(os,
2290 sizeof (zc->zc_name) - strlen(zc->zc_name),
2291 zc->zc_name + strlen(zc->zc_name), &zc->zc_obj, &zc->zc_cookie,
2292 NULL);
2293
2294 if (error == 0 && !zc->zc_simple) {
2295 dsl_dataset_t *ds;
2296 dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool;
2297
2298 error = dsl_dataset_hold_obj(dp, zc->zc_obj, FTAG, &ds);
2299 if (error == 0) {
2300 objset_t *ossnap;
2301
2302 error = dmu_objset_from_ds(ds, &ossnap);
2303 if (error == 0)
2304 error = zfs_ioc_objset_stats_impl(zc, ossnap);
2305 dsl_dataset_rele(ds, FTAG);
2306 }
2307 } else if (error == ENOENT) {
2308 error = SET_ERROR(ESRCH);
2309 }
2310
2311 dmu_objset_rele(os, FTAG);
2312 /* if we failed, undo the @ that we tacked on to zc_name */
2313 if (error != 0)
2314 *strchr(zc->zc_name, '@') = '\0';
2315 return (error);
2316 }
2317
2318 static int
2319 zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
2320 {
2321 const char *propname = nvpair_name(pair);
2322 uint64_t *valary;
2323 unsigned int vallen;
2324 const char *domain;
2325 char *dash;
2326 zfs_userquota_prop_t type;
2327 uint64_t rid;
2328 uint64_t quota;
2329 zfsvfs_t *zfsvfs;
2330 int err;
2331
2332 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2333 nvlist_t *attrs;
2334 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2335 if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2336 &pair) != 0)
2337 return (SET_ERROR(EINVAL));
2338 }
2339
2340 /*
2341 * A correctly constructed propname is encoded as
2342 * userquota@<rid>-<domain>.
2343 */
2344 if ((dash = strchr(propname, '-')) == NULL ||
2345 nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
2346 vallen != 3)
2347 return (SET_ERROR(EINVAL));
2348
2349 domain = dash + 1;
2350 type = valary[0];
2351 rid = valary[1];
2352 quota = valary[2];
2353
2354 err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE);
2355 if (err == 0) {
2356 err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
2357 zfsvfs_rele(zfsvfs, FTAG);
2358 }
2359
2360 return (err);
2361 }
2362
2363 /*
2364 * If the named property is one that has a special function to set its value,
2365 * return 0 on success and a positive error code on failure; otherwise if it is
2366 * not one of the special properties handled by this function, return -1.
2367 *
2368 * XXX: It would be better for callers of the property interface if we handled
2369 * these special cases in dsl_prop.c (in the dsl layer).
2370 */
2371 static int
2372 zfs_prop_set_special(const char *dsname, zprop_source_t source,
2373 nvpair_t *pair)
2374 {
2375 const char *propname = nvpair_name(pair);
2376 zfs_prop_t prop = zfs_name_to_prop(propname);
2377 uint64_t intval;
2378 int err = -1;
2379
2380 if (prop == ZPROP_INVAL) {
2381 if (zfs_prop_userquota(propname))
2382 return (zfs_prop_set_userquota(dsname, pair));
2383 return (-1);
2384 }
2385
2386 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2387 nvlist_t *attrs;
2388 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2389 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2390 &pair) == 0);
2391 }
2392
2393 if (zfs_prop_get_type(prop) == PROP_TYPE_STRING)
2394 return (-1);
2395
2396 VERIFY(0 == nvpair_value_uint64(pair, &intval));
2397
2398 switch (prop) {
2399 case ZFS_PROP_QUOTA:
2400 err = dsl_dir_set_quota(dsname, source, intval);
2401 break;
2402 case ZFS_PROP_REFQUOTA:
2403 err = dsl_dataset_set_refquota(dsname, source, intval);
2404 break;
2405 case ZFS_PROP_FILESYSTEM_LIMIT:
2406 case ZFS_PROP_SNAPSHOT_LIMIT:
2407 if (intval == UINT64_MAX) {
2408 /* clearing the limit, just do it */
2409 err = 0;
2410 } else {
2411 err = dsl_dir_activate_fs_ss_limit(dsname);
2412 }
2413 /*
2414 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2415 * default path to set the value in the nvlist.
2416 */
2417 if (err == 0)
2418 err = -1;
2419 break;
2420 case ZFS_PROP_RESERVATION:
2421 err = dsl_dir_set_reservation(dsname, source, intval);
2422 break;
2423 case ZFS_PROP_REFRESERVATION:
2424 err = dsl_dataset_set_refreservation(dsname, source, intval);
2425 break;
2426 case ZFS_PROP_VOLSIZE:
2427 err = zvol_set_volsize(dsname, intval);
2428 break;
2429 case ZFS_PROP_SNAPDEV:
2430 err = zvol_set_snapdev(dsname, source, intval);
2431 break;
2432 case ZFS_PROP_VERSION:
2433 {
2434 zfsvfs_t *zfsvfs;
2435
2436 if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
2437 break;
2438
2439 err = zfs_set_version(zfsvfs, intval);
2440 zfsvfs_rele(zfsvfs, FTAG);
2441
2442 if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
2443 zfs_cmd_t *zc;
2444
2445 zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
2446 (void) strcpy(zc->zc_name, dsname);
2447 (void) zfs_ioc_userspace_upgrade(zc);
2448 (void) zfs_ioc_userobjspace_upgrade(zc);
2449 kmem_free(zc, sizeof (zfs_cmd_t));
2450 }
2451 break;
2452 }
2453 default:
2454 err = -1;
2455 }
2456
2457 return (err);
2458 }
2459
2460 /*
2461 * This function is best effort. If it fails to set any of the given properties,
2462 * it continues to set as many as it can and returns the last error
2463 * encountered. If the caller provides a non-NULL errlist, it will be filled in
2464 * with the list of names of all the properties that failed along with the
2465 * corresponding error numbers.
2466 *
2467 * If every property is set successfully, zero is returned and errlist is not
2468 * modified.
2469 */
2470 int
2471 zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
2472 nvlist_t *errlist)
2473 {
2474 nvpair_t *pair;
2475 nvpair_t *propval;
2476 int rv = 0;
2477 uint64_t intval;
2478 char *strval;
2479
2480 nvlist_t *genericnvl = fnvlist_alloc();
2481 nvlist_t *retrynvl = fnvlist_alloc();
2482 retry:
2483 pair = NULL;
2484 while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2485 const char *propname = nvpair_name(pair);
2486 zfs_prop_t prop = zfs_name_to_prop(propname);
2487 int err = 0;
2488
2489 /* decode the property value */
2490 propval = pair;
2491 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2492 nvlist_t *attrs;
2493 attrs = fnvpair_value_nvlist(pair);
2494 if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2495 &propval) != 0)
2496 err = SET_ERROR(EINVAL);
2497 }
2498
2499 /* Validate value type */
2500 if (err == 0 && prop == ZPROP_INVAL) {
2501 if (zfs_prop_user(propname)) {
2502 if (nvpair_type(propval) != DATA_TYPE_STRING)
2503 err = SET_ERROR(EINVAL);
2504 } else if (zfs_prop_userquota(propname)) {
2505 if (nvpair_type(propval) !=
2506 DATA_TYPE_UINT64_ARRAY)
2507 err = SET_ERROR(EINVAL);
2508 } else {
2509 err = SET_ERROR(EINVAL);
2510 }
2511 } else if (err == 0) {
2512 if (nvpair_type(propval) == DATA_TYPE_STRING) {
2513 if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
2514 err = SET_ERROR(EINVAL);
2515 } else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
2516 const char *unused;
2517
2518 intval = fnvpair_value_uint64(propval);
2519
2520 switch (zfs_prop_get_type(prop)) {
2521 case PROP_TYPE_NUMBER:
2522 break;
2523 case PROP_TYPE_STRING:
2524 err = SET_ERROR(EINVAL);
2525 break;
2526 case PROP_TYPE_INDEX:
2527 if (zfs_prop_index_to_string(prop,
2528 intval, &unused) != 0)
2529 err = SET_ERROR(EINVAL);
2530 break;
2531 default:
2532 cmn_err(CE_PANIC,
2533 "unknown property type");
2534 }
2535 } else {
2536 err = SET_ERROR(EINVAL);
2537 }
2538 }
2539
2540 /* Validate permissions */
2541 if (err == 0)
2542 err = zfs_check_settable(dsname, pair, CRED());
2543
2544 if (err == 0) {
2545 err = zfs_prop_set_special(dsname, source, pair);
2546 if (err == -1) {
2547 /*
2548 * For better performance we build up a list of
2549 * properties to set in a single transaction.
2550 */
2551 err = nvlist_add_nvpair(genericnvl, pair);
2552 } else if (err != 0 && nvl != retrynvl) {
2553 /*
2554 * This may be a spurious error caused by
2555 * receiving quota and reservation out of order.
2556 * Try again in a second pass.
2557 */
2558 err = nvlist_add_nvpair(retrynvl, pair);
2559 }
2560 }
2561
2562 if (err != 0) {
2563 if (errlist != NULL)
2564 fnvlist_add_int32(errlist, propname, err);
2565 rv = err;
2566 }
2567 }
2568
2569 if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
2570 nvl = retrynvl;
2571 goto retry;
2572 }
2573
2574 if (!nvlist_empty(genericnvl) &&
2575 dsl_props_set(dsname, source, genericnvl) != 0) {
2576 /*
2577 * If this fails, we still want to set as many properties as we
2578 * can, so try setting them individually.
2579 */
2580 pair = NULL;
2581 while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
2582 const char *propname = nvpair_name(pair);
2583 int err = 0;
2584
2585 propval = pair;
2586 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2587 nvlist_t *attrs;
2588 attrs = fnvpair_value_nvlist(pair);
2589 propval = fnvlist_lookup_nvpair(attrs,
2590 ZPROP_VALUE);
2591 }
2592
2593 if (nvpair_type(propval) == DATA_TYPE_STRING) {
2594 strval = fnvpair_value_string(propval);
2595 err = dsl_prop_set_string(dsname, propname,
2596 source, strval);
2597 } else {
2598 intval = fnvpair_value_uint64(propval);
2599 err = dsl_prop_set_int(dsname, propname, source,
2600 intval);
2601 }
2602
2603 if (err != 0) {
2604 if (errlist != NULL) {
2605 fnvlist_add_int32(errlist, propname,
2606 err);
2607 }
2608 rv = err;
2609 }
2610 }
2611 }
2612 nvlist_free(genericnvl);
2613 nvlist_free(retrynvl);
2614
2615 return (rv);
2616 }
2617
2618 /*
2619 * Check that all the properties are valid user properties.
2620 */
2621 static int
2622 zfs_check_userprops(const char *fsname, nvlist_t *nvl)
2623 {
2624 nvpair_t *pair = NULL;
2625 int error = 0;
2626
2627 while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2628 const char *propname = nvpair_name(pair);
2629
2630 if (!zfs_prop_user(propname) ||
2631 nvpair_type(pair) != DATA_TYPE_STRING)
2632 return (SET_ERROR(EINVAL));
2633
2634 if ((error = zfs_secpolicy_write_perms(fsname,
2635 ZFS_DELEG_PERM_USERPROP, CRED())))
2636 return (error);
2637
2638 if (strlen(propname) >= ZAP_MAXNAMELEN)
2639 return (SET_ERROR(ENAMETOOLONG));
2640
2641 if (strlen(fnvpair_value_string(pair)) >= ZAP_MAXVALUELEN)
2642 return (SET_ERROR(E2BIG));
2643 }
2644 return (0);
2645 }
2646
2647 static void
2648 props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
2649 {
2650 nvpair_t *pair;
2651
2652 VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2653
2654 pair = NULL;
2655 while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
2656 if (nvlist_exists(skipped, nvpair_name(pair)))
2657 continue;
2658
2659 VERIFY(nvlist_add_nvpair(*newprops, pair) == 0);
2660 }
2661 }
2662
2663 static int
2664 clear_received_props(const char *dsname, nvlist_t *props,
2665 nvlist_t *skipped)
2666 {
2667 int err = 0;
2668 nvlist_t *cleared_props = NULL;
2669 props_skip(props, skipped, &cleared_props);
2670 if (!nvlist_empty(cleared_props)) {
2671 /*
2672 * Acts on local properties until the dataset has received
2673 * properties at least once on or after SPA_VERSION_RECVD_PROPS.
2674 */
2675 zprop_source_t flags = (ZPROP_SRC_NONE |
2676 (dsl_prop_get_hasrecvd(dsname) ? ZPROP_SRC_RECEIVED : 0));
2677 err = zfs_set_prop_nvlist(dsname, flags, cleared_props, NULL);
2678 }
2679 nvlist_free(cleared_props);
2680 return (err);
2681 }
2682
2683 /*
2684 * inputs:
2685 * zc_name name of filesystem
2686 * zc_value name of property to set
2687 * zc_nvlist_src{_size} nvlist of properties to apply
2688 * zc_cookie received properties flag
2689 *
2690 * outputs:
2691 * zc_nvlist_dst{_size} error for each unapplied received property
2692 */
2693 static int
2694 zfs_ioc_set_prop(zfs_cmd_t *zc)
2695 {
2696 nvlist_t *nvl;
2697 boolean_t received = zc->zc_cookie;
2698 zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
2699 ZPROP_SRC_LOCAL);
2700 nvlist_t *errors;
2701 int error;
2702
2703 if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2704 zc->zc_iflags, &nvl)) != 0)
2705 return (error);
2706
2707 if (received) {
2708 nvlist_t *origprops;
2709
2710 if (dsl_prop_get_received(zc->zc_name, &origprops) == 0) {
2711 (void) clear_received_props(zc->zc_name,
2712 origprops, nvl);
2713 nvlist_free(origprops);
2714 }
2715
2716 error = dsl_prop_set_hasrecvd(zc->zc_name);
2717 }
2718
2719 errors = fnvlist_alloc();
2720 if (error == 0)
2721 error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, errors);
2722
2723 if (zc->zc_nvlist_dst != 0 && errors != NULL) {
2724 (void) put_nvlist(zc, errors);
2725 }
2726
2727 nvlist_free(errors);
2728 nvlist_free(nvl);
2729 return (error);
2730 }
2731
2732 /*
2733 * inputs:
2734 * zc_name name of filesystem
2735 * zc_value name of property to inherit
2736 * zc_cookie revert to received value if TRUE
2737 *
2738 * outputs: none
2739 */
2740 static int
2741 zfs_ioc_inherit_prop(zfs_cmd_t *zc)
2742 {
2743 const char *propname = zc->zc_value;
2744 zfs_prop_t prop = zfs_name_to_prop(propname);
2745 boolean_t received = zc->zc_cookie;
2746 zprop_source_t source = (received
2747 ? ZPROP_SRC_NONE /* revert to received value, if any */
2748 : ZPROP_SRC_INHERITED); /* explicitly inherit */
2749
2750 if (received) {
2751 nvlist_t *dummy;
2752 nvpair_t *pair;
2753 zprop_type_t type;
2754 int err;
2755
2756 /*
2757 * zfs_prop_set_special() expects properties in the form of an
2758 * nvpair with type info.
2759 */
2760 if (prop == ZPROP_INVAL) {
2761 if (!zfs_prop_user(propname))
2762 return (SET_ERROR(EINVAL));
2763
2764 type = PROP_TYPE_STRING;
2765 } else if (prop == ZFS_PROP_VOLSIZE ||
2766 prop == ZFS_PROP_VERSION) {
2767 return (SET_ERROR(EINVAL));
2768 } else {
2769 type = zfs_prop_get_type(prop);
2770 }
2771
2772 VERIFY(nvlist_alloc(&dummy, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2773
2774 switch (type) {
2775 case PROP_TYPE_STRING:
2776 VERIFY(0 == nvlist_add_string(dummy, propname, ""));
2777 break;
2778 case PROP_TYPE_NUMBER:
2779 case PROP_TYPE_INDEX:
2780 VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
2781 break;
2782 default:
2783 nvlist_free(dummy);
2784 return (SET_ERROR(EINVAL));
2785 }
2786
2787 pair = nvlist_next_nvpair(dummy, NULL);
2788 if (pair == NULL) {
2789 nvlist_free(dummy);
2790 return (SET_ERROR(EINVAL));
2791 }
2792 err = zfs_prop_set_special(zc->zc_name, source, pair);
2793 nvlist_free(dummy);
2794 if (err != -1)
2795 return (err); /* special property already handled */
2796 } else {
2797 /*
2798 * Only check this in the non-received case. We want to allow
2799 * 'inherit -S' to revert non-inheritable properties like quota
2800 * and reservation to the received or default values even though
2801 * they are not considered inheritable.
2802 */
2803 if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop))
2804 return (SET_ERROR(EINVAL));
2805 }
2806
2807 /* property name has been validated by zfs_secpolicy_inherit_prop() */
2808 return (dsl_prop_inherit(zc->zc_name, zc->zc_value, source));
2809 }
2810
2811 static int
2812 zfs_ioc_pool_set_props(zfs_cmd_t *zc)
2813 {
2814 nvlist_t *props;
2815 spa_t *spa;
2816 int error;
2817 nvpair_t *pair;
2818
2819 if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2820 zc->zc_iflags, &props)))
2821 return (error);
2822
2823 /*
2824 * If the only property is the configfile, then just do a spa_lookup()
2825 * to handle the faulted case.
2826 */
2827 pair = nvlist_next_nvpair(props, NULL);
2828 if (pair != NULL && strcmp(nvpair_name(pair),
2829 zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
2830 nvlist_next_nvpair(props, pair) == NULL) {
2831 mutex_enter(&spa_namespace_lock);
2832 if ((spa = spa_lookup(zc->zc_name)) != NULL) {
2833 spa_configfile_set(spa, props, B_FALSE);
2834 spa_config_sync(spa, B_FALSE, B_TRUE);
2835 }
2836 mutex_exit(&spa_namespace_lock);
2837 if (spa != NULL) {
2838 nvlist_free(props);
2839 return (0);
2840 }
2841 }
2842
2843 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2844 nvlist_free(props);
2845 return (error);
2846 }
2847
2848 error = spa_prop_set(spa, props);
2849
2850 nvlist_free(props);
2851 spa_close(spa, FTAG);
2852
2853 return (error);
2854 }
2855
2856 static int
2857 zfs_ioc_pool_get_props(zfs_cmd_t *zc)
2858 {
2859 spa_t *spa;
2860 int error;
2861 nvlist_t *nvp = NULL;
2862
2863 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2864 /*
2865 * If the pool is faulted, there may be properties we can still
2866 * get (such as altroot and cachefile), so attempt to get them
2867 * anyway.
2868 */
2869 mutex_enter(&spa_namespace_lock);
2870 if ((spa = spa_lookup(zc->zc_name)) != NULL)
2871 error = spa_prop_get(spa, &nvp);
2872 mutex_exit(&spa_namespace_lock);
2873 } else {
2874 error = spa_prop_get(spa, &nvp);
2875 spa_close(spa, FTAG);
2876 }
2877
2878 if (error == 0 && zc->zc_nvlist_dst != 0)
2879 error = put_nvlist(zc, nvp);
2880 else
2881 error = SET_ERROR(EFAULT);
2882
2883 nvlist_free(nvp);
2884 return (error);
2885 }
2886
2887 /*
2888 * inputs:
2889 * zc_name name of filesystem
2890 * zc_nvlist_src{_size} nvlist of delegated permissions
2891 * zc_perm_action allow/unallow flag
2892 *
2893 * outputs: none
2894 */
2895 static int
2896 zfs_ioc_set_fsacl(zfs_cmd_t *zc)
2897 {
2898 int error;
2899 nvlist_t *fsaclnv = NULL;
2900
2901 if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2902 zc->zc_iflags, &fsaclnv)) != 0)
2903 return (error);
2904
2905 /*
2906 * Verify nvlist is constructed correctly
2907 */
2908 if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
2909 nvlist_free(fsaclnv);
2910 return (SET_ERROR(EINVAL));
2911 }
2912
2913 /*
2914 * If we don't have PRIV_SYS_MOUNT, then validate
2915 * that user is allowed to hand out each permission in
2916 * the nvlist(s)
2917 */
2918
2919 error = secpolicy_zfs(CRED());
2920 if (error != 0) {
2921 if (zc->zc_perm_action == B_FALSE) {
2922 error = dsl_deleg_can_allow(zc->zc_name,
2923 fsaclnv, CRED());
2924 } else {
2925 error = dsl_deleg_can_unallow(zc->zc_name,
2926 fsaclnv, CRED());
2927 }
2928 }
2929
2930 if (error == 0)
2931 error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
2932
2933 nvlist_free(fsaclnv);
2934 return (error);
2935 }
2936
2937 /*
2938 * inputs:
2939 * zc_name name of filesystem
2940 *
2941 * outputs:
2942 * zc_nvlist_src{_size} nvlist of delegated permissions
2943 */
2944 static int
2945 zfs_ioc_get_fsacl(zfs_cmd_t *zc)
2946 {
2947 nvlist_t *nvp;
2948 int error;
2949
2950 if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
2951 error = put_nvlist(zc, nvp);
2952 nvlist_free(nvp);
2953 }
2954
2955 return (error);
2956 }
2957
2958 /* ARGSUSED */
2959 static void
2960 zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
2961 {
2962 zfs_creat_t *zct = arg;
2963
2964 zfs_create_fs(os, cr, zct->zct_zplprops, tx);
2965 }
2966
2967 #define ZFS_PROP_UNDEFINED ((uint64_t)-1)
2968
2969 /*
2970 * inputs:
2971 * os parent objset pointer (NULL if root fs)
2972 * fuids_ok fuids allowed in this version of the spa?
2973 * sa_ok SAs allowed in this version of the spa?
2974 * createprops list of properties requested by creator
2975 *
2976 * outputs:
2977 * zplprops values for the zplprops we attach to the master node object
2978 * is_ci true if requested file system will be purely case-insensitive
2979 *
2980 * Determine the settings for utf8only, normalization and
2981 * casesensitivity. Specific values may have been requested by the
2982 * creator and/or we can inherit values from the parent dataset. If
2983 * the file system is of too early a vintage, a creator can not
2984 * request settings for these properties, even if the requested
2985 * setting is the default value. We don't actually want to create dsl
2986 * properties for these, so remove them from the source nvlist after
2987 * processing.
2988 */
2989 static int
2990 zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
2991 boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
2992 nvlist_t *zplprops, boolean_t *is_ci)
2993 {
2994 uint64_t sense = ZFS_PROP_UNDEFINED;
2995 uint64_t norm = ZFS_PROP_UNDEFINED;
2996 uint64_t u8 = ZFS_PROP_UNDEFINED;
2997 int error;
2998
2999 ASSERT(zplprops != NULL);
3000
3001 if (os != NULL && os->os_phys->os_type != DMU_OST_ZFS)
3002 return (SET_ERROR(EINVAL));
3003
3004 /*
3005 * Pull out creator prop choices, if any.
3006 */
3007 if (createprops) {
3008 (void) nvlist_lookup_uint64(createprops,
3009 zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
3010 (void) nvlist_lookup_uint64(createprops,
3011 zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
3012 (void) nvlist_remove_all(createprops,
3013 zfs_prop_to_name(ZFS_PROP_NORMALIZE));
3014 (void) nvlist_lookup_uint64(createprops,
3015 zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
3016 (void) nvlist_remove_all(createprops,
3017 zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
3018 (void) nvlist_lookup_uint64(createprops,
3019 zfs_prop_to_name(ZFS_PROP_CASE), &sense);
3020 (void) nvlist_remove_all(createprops,
3021 zfs_prop_to_name(ZFS_PROP_CASE));
3022 }
3023
3024 /*
3025 * If the zpl version requested is whacky or the file system
3026 * or pool is version is too "young" to support normalization
3027 * and the creator tried to set a value for one of the props,
3028 * error out.
3029 */
3030 if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
3031 (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
3032 (zplver >= ZPL_VERSION_SA && !sa_ok) ||
3033 (zplver < ZPL_VERSION_NORMALIZATION &&
3034 (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
3035 sense != ZFS_PROP_UNDEFINED)))
3036 return (SET_ERROR(ENOTSUP));
3037
3038 /*
3039 * Put the version in the zplprops
3040 */
3041 VERIFY(nvlist_add_uint64(zplprops,
3042 zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
3043
3044 if (norm == ZFS_PROP_UNDEFINED &&
3045 (error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm)) != 0)
3046 return (error);
3047 VERIFY(nvlist_add_uint64(zplprops,
3048 zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
3049
3050 /*
3051 * If we're normalizing, names must always be valid UTF-8 strings.
3052 */
3053 if (norm)
3054 u8 = 1;
3055 if (u8 == ZFS_PROP_UNDEFINED &&
3056 (error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8)) != 0)
3057 return (error);
3058 VERIFY(nvlist_add_uint64(zplprops,
3059 zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
3060
3061 if (sense == ZFS_PROP_UNDEFINED &&
3062 (error = zfs_get_zplprop(os, ZFS_PROP_CASE, &sense)) != 0)
3063 return (error);
3064 VERIFY(nvlist_add_uint64(zplprops,
3065 zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
3066
3067 if (is_ci)
3068 *is_ci = (sense == ZFS_CASE_INSENSITIVE);
3069
3070 return (0);
3071 }
3072
3073 static int
3074 zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
3075 nvlist_t *zplprops, boolean_t *is_ci)
3076 {
3077 boolean_t fuids_ok, sa_ok;
3078 uint64_t zplver = ZPL_VERSION;
3079 objset_t *os = NULL;
3080 char parentname[ZFS_MAX_DATASET_NAME_LEN];
3081 char *cp;
3082 spa_t *spa;
3083 uint64_t spa_vers;
3084 int error;
3085
3086 (void) strlcpy(parentname, dataset, sizeof (parentname));
3087 cp = strrchr(parentname, '/');
3088 ASSERT(cp != NULL);
3089 cp[0] = '\0';
3090
3091 if ((error = spa_open(dataset, &spa, FTAG)) != 0)
3092 return (error);
3093
3094 spa_vers = spa_version(spa);
3095 spa_close(spa, FTAG);
3096
3097 zplver = zfs_zpl_version_map(spa_vers);
3098 fuids_ok = (zplver >= ZPL_VERSION_FUID);
3099 sa_ok = (zplver >= ZPL_VERSION_SA);
3100
3101 /*
3102 * Open parent object set so we can inherit zplprop values.
3103 */
3104 if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
3105 return (error);
3106
3107 error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
3108 zplprops, is_ci);
3109 dmu_objset_rele(os, FTAG);
3110 return (error);
3111 }
3112
3113 static int
3114 zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
3115 nvlist_t *zplprops, boolean_t *is_ci)
3116 {
3117 boolean_t fuids_ok;
3118 boolean_t sa_ok;
3119 uint64_t zplver = ZPL_VERSION;
3120 int error;
3121
3122 zplver = zfs_zpl_version_map(spa_vers);
3123 fuids_ok = (zplver >= ZPL_VERSION_FUID);
3124 sa_ok = (zplver >= ZPL_VERSION_SA);
3125
3126 error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
3127 createprops, zplprops, is_ci);
3128 return (error);
3129 }
3130
3131 /*
3132 * innvl: {
3133 * "type" -> dmu_objset_type_t (int32)
3134 * (optional) "props" -> { prop -> value }
3135 * }
3136 *
3137 * outnvl: propname -> error code (int32)
3138 */
3139 static int
3140 zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3141 {
3142 int error = 0;
3143 zfs_creat_t zct = { 0 };
3144 nvlist_t *nvprops = NULL;
3145 void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
3146 int32_t type32;
3147 dmu_objset_type_t type;
3148 boolean_t is_insensitive = B_FALSE;
3149
3150 if (nvlist_lookup_int32(innvl, "type", &type32) != 0)
3151 return (SET_ERROR(EINVAL));
3152 type = type32;
3153 (void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3154
3155 switch (type) {
3156 case DMU_OST_ZFS:
3157 cbfunc = zfs_create_cb;
3158 break;
3159
3160 case DMU_OST_ZVOL:
3161 cbfunc = zvol_create_cb;
3162 break;
3163
3164 default:
3165 cbfunc = NULL;
3166 break;
3167 }
3168 if (strchr(fsname, '@') ||
3169 strchr(fsname, '%'))
3170 return (SET_ERROR(EINVAL));
3171
3172 zct.zct_props = nvprops;
3173
3174 if (cbfunc == NULL)
3175 return (SET_ERROR(EINVAL));
3176
3177 if (type == DMU_OST_ZVOL) {
3178 uint64_t volsize, volblocksize;
3179
3180 if (nvprops == NULL)
3181 return (SET_ERROR(EINVAL));
3182 if (nvlist_lookup_uint64(nvprops,
3183 zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0)
3184 return (SET_ERROR(EINVAL));
3185
3186 if ((error = nvlist_lookup_uint64(nvprops,
3187 zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
3188 &volblocksize)) != 0 && error != ENOENT)
3189 return (SET_ERROR(EINVAL));
3190
3191 if (error != 0)
3192 volblocksize = zfs_prop_default_numeric(
3193 ZFS_PROP_VOLBLOCKSIZE);
3194
3195 if ((error = zvol_check_volblocksize(fsname,
3196 volblocksize)) != 0 ||
3197 (error = zvol_check_volsize(volsize,
3198 volblocksize)) != 0)
3199 return (error);
3200 } else if (type == DMU_OST_ZFS) {
3201 int error;
3202
3203 /*
3204 * We have to have normalization and
3205 * case-folding flags correct when we do the
3206 * file system creation, so go figure them out
3207 * now.
3208 */
3209 VERIFY(nvlist_alloc(&zct.zct_zplprops,
3210 NV_UNIQUE_NAME, KM_SLEEP) == 0);
3211 error = zfs_fill_zplprops(fsname, nvprops,
3212 zct.zct_zplprops, &is_insensitive);
3213 if (error != 0) {
3214 nvlist_free(zct.zct_zplprops);
3215 return (error);
3216 }
3217 }
3218
3219 error = dmu_objset_create(fsname, type,
3220 is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct);
3221 nvlist_free(zct.zct_zplprops);
3222
3223 /*
3224 * It would be nice to do this atomically.
3225 */
3226 if (error == 0) {
3227 error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3228 nvprops, outnvl);
3229 if (error != 0) {
3230 spa_t *spa;
3231 int error2;
3232
3233 /*
3234 * Volumes will return EBUSY and cannot be destroyed
3235 * until all asynchronous minor handling has completed.
3236 * Wait for the spa_zvol_taskq to drain then retry.
3237 */
3238 error2 = dsl_destroy_head(fsname);
3239 while ((error2 == EBUSY) && (type == DMU_OST_ZVOL)) {
3240 error2 = spa_open(fsname, &spa, FTAG);
3241 if (error2 == 0) {
3242 taskq_wait(spa->spa_zvol_taskq);
3243 spa_close(spa, FTAG);
3244 }
3245 error2 = dsl_destroy_head(fsname);
3246 }
3247 }
3248 }
3249 return (error);
3250 }
3251
3252 /*
3253 * innvl: {
3254 * "origin" -> name of origin snapshot
3255 * (optional) "props" -> { prop -> value }
3256 * }
3257 *
3258 * outputs:
3259 * outnvl: propname -> error code (int32)
3260 */
3261 static int
3262 zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3263 {
3264 int error = 0;
3265 nvlist_t *nvprops = NULL;
3266 char *origin_name;
3267
3268 if (nvlist_lookup_string(innvl, "origin", &origin_name) != 0)
3269 return (SET_ERROR(EINVAL));
3270 (void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3271
3272 if (strchr(fsname, '@') ||
3273 strchr(fsname, '%'))
3274 return (SET_ERROR(EINVAL));
3275
3276 if (dataset_namecheck(origin_name, NULL, NULL) != 0)
3277 return (SET_ERROR(EINVAL));
3278 error = dmu_objset_clone(fsname, origin_name);
3279 if (error != 0)
3280 return (error);
3281
3282 /*
3283 * It would be nice to do this atomically.
3284 */
3285 if (error == 0) {
3286 error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3287 nvprops, outnvl);
3288 if (error != 0)
3289 (void) dsl_destroy_head(fsname);
3290 }
3291 return (error);
3292 }
3293
3294 /*
3295 * innvl: {
3296 * "snaps" -> { snapshot1, snapshot2 }
3297 * (optional) "props" -> { prop -> value (string) }
3298 * }
3299 *
3300 * outnvl: snapshot -> error code (int32)
3301 */
3302 static int
3303 zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3304 {
3305 nvlist_t *snaps;
3306 nvlist_t *props = NULL;
3307 int error, poollen;
3308 nvpair_t *pair, *pair2;
3309
3310 (void) nvlist_lookup_nvlist(innvl, "props", &props);
3311 if ((error = zfs_check_userprops(poolname, props)) != 0)
3312 return (error);
3313
3314 if (!nvlist_empty(props) &&
3315 zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS))
3316 return (SET_ERROR(ENOTSUP));
3317
3318 if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
3319 return (SET_ERROR(EINVAL));
3320 poollen = strlen(poolname);
3321 for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3322 pair = nvlist_next_nvpair(snaps, pair)) {
3323 const char *name = nvpair_name(pair);
3324 const char *cp = strchr(name, '@');
3325
3326 /*
3327 * The snap name must contain an @, and the part after it must
3328 * contain only valid characters.
3329 */
3330 if (cp == NULL ||
3331 zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3332 return (SET_ERROR(EINVAL));
3333
3334 /*
3335 * The snap must be in the specified pool.
3336 */
3337 if (strncmp(name, poolname, poollen) != 0 ||
3338 (name[poollen] != '/' && name[poollen] != '@'))
3339 return (SET_ERROR(EXDEV));
3340
3341 /* This must be the only snap of this fs. */
3342 for (pair2 = nvlist_next_nvpair(snaps, pair);
3343 pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) {
3344 if (strncmp(name, nvpair_name(pair2), cp - name + 1)
3345 == 0) {
3346 return (SET_ERROR(EXDEV));
3347 }
3348 }
3349 }
3350
3351 error = dsl_dataset_snapshot(snaps, props, outnvl);
3352
3353 return (error);
3354 }
3355
3356 /*
3357 * innvl: "message" -> string
3358 */
3359 /* ARGSUSED */
3360 static int
3361 zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
3362 {
3363 char *message;
3364 spa_t *spa;
3365 int error;
3366 char *poolname;
3367
3368 /*
3369 * The poolname in the ioctl is not set, we get it from the TSD,
3370 * which was set at the end of the last successful ioctl that allows
3371 * logging. The secpolicy func already checked that it is set.
3372 * Only one log ioctl is allowed after each successful ioctl, so
3373 * we clear the TSD here.
3374 */
3375 poolname = tsd_get(zfs_allow_log_key);
3376 if (poolname == NULL)
3377 return (SET_ERROR(EINVAL));
3378 (void) tsd_set(zfs_allow_log_key, NULL);
3379 error = spa_open(poolname, &spa, FTAG);
3380 strfree(poolname);
3381 if (error != 0)
3382 return (error);
3383
3384 if (nvlist_lookup_string(innvl, "message", &message) != 0) {
3385 spa_close(spa, FTAG);
3386 return (SET_ERROR(EINVAL));
3387 }
3388
3389 if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
3390 spa_close(spa, FTAG);
3391 return (SET_ERROR(ENOTSUP));
3392 }
3393
3394 error = spa_history_log(spa, message);
3395 spa_close(spa, FTAG);
3396 return (error);
3397 }
3398
3399 /*
3400 * The dp_config_rwlock must not be held when calling this, because the
3401 * unmount may need to write out data.
3402 *
3403 * This function is best-effort. Callers must deal gracefully if it
3404 * remains mounted (or is remounted after this call).
3405 *
3406 * Returns 0 if the argument is not a snapshot, or it is not currently a
3407 * filesystem, or we were able to unmount it. Returns error code otherwise.
3408 */
3409 int
3410 zfs_unmount_snap(const char *snapname)
3411 {
3412 int err;
3413
3414 if (strchr(snapname, '@') == NULL)
3415 return (0);
3416
3417 err = zfsctl_snapshot_unmount((char *)snapname, MNT_FORCE);
3418 if (err != 0 && err != ENOENT)
3419 return (SET_ERROR(err));
3420
3421 return (0);
3422 }
3423
3424 /* ARGSUSED */
3425 static int
3426 zfs_unmount_snap_cb(const char *snapname, void *arg)
3427 {
3428 return (zfs_unmount_snap(snapname));
3429 }
3430
3431 /*
3432 * When a clone is destroyed, its origin may also need to be destroyed,
3433 * in which case it must be unmounted. This routine will do that unmount
3434 * if necessary.
3435 */
3436 void
3437 zfs_destroy_unmount_origin(const char *fsname)
3438 {
3439 int error;
3440 objset_t *os;
3441 dsl_dataset_t *ds;
3442
3443 error = dmu_objset_hold(fsname, FTAG, &os);
3444 if (error != 0)
3445 return;
3446 ds = dmu_objset_ds(os);
3447 if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev)) {
3448 char originname[ZFS_MAX_DATASET_NAME_LEN];
3449 dsl_dataset_name(ds->ds_prev, originname);
3450 dmu_objset_rele(os, FTAG);
3451 (void) zfs_unmount_snap(originname);
3452 } else {
3453 dmu_objset_rele(os, FTAG);
3454 }
3455 }
3456
3457 /*
3458 * innvl: {
3459 * "snaps" -> { snapshot1, snapshot2 }
3460 * (optional boolean) "defer"
3461 * }
3462 *
3463 * outnvl: snapshot -> error code (int32)
3464 */
3465 /* ARGSUSED */
3466 static int
3467 zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3468 {
3469 nvlist_t *snaps;
3470 nvpair_t *pair;
3471 boolean_t defer;
3472
3473 if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
3474 return (SET_ERROR(EINVAL));
3475 defer = nvlist_exists(innvl, "defer");
3476
3477 for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3478 pair = nvlist_next_nvpair(snaps, pair)) {
3479 (void) zfs_unmount_snap(nvpair_name(pair));
3480 }
3481
3482 return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
3483 }
3484
3485 /*
3486 * Create bookmarks. Bookmark names are of the form <fs>#<bmark>.
3487 * All bookmarks must be in the same pool.
3488 *
3489 * innvl: {
3490 * bookmark1 -> snapshot1, bookmark2 -> snapshot2
3491 * }
3492 *
3493 * outnvl: bookmark -> error code (int32)
3494 *
3495 */
3496 /* ARGSUSED */
3497 static int
3498 zfs_ioc_bookmark(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3499 {
3500 nvpair_t *pair, *pair2;
3501
3502 for (pair = nvlist_next_nvpair(innvl, NULL);
3503 pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
3504 char *snap_name;
3505
3506 /*
3507 * Verify the snapshot argument.
3508 */
3509 if (nvpair_value_string(pair, &snap_name) != 0)
3510 return (SET_ERROR(EINVAL));
3511
3512
3513 /* Verify that the keys (bookmarks) are unique */
3514 for (pair2 = nvlist_next_nvpair(innvl, pair);
3515 pair2 != NULL; pair2 = nvlist_next_nvpair(innvl, pair2)) {
3516 if (strcmp(nvpair_name(pair), nvpair_name(pair2)) == 0)
3517 return (SET_ERROR(EINVAL));
3518 }
3519 }
3520
3521 return (dsl_bookmark_create(innvl, outnvl));
3522 }
3523
3524 /*
3525 * innvl: {
3526 * property 1, property 2, ...
3527 * }
3528 *
3529 * outnvl: {
3530 * bookmark name 1 -> { property 1, property 2, ... },
3531 * bookmark name 2 -> { property 1, property 2, ... }
3532 * }
3533 *
3534 */
3535 static int
3536 zfs_ioc_get_bookmarks(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3537 {
3538 return (dsl_get_bookmarks(fsname, innvl, outnvl));
3539 }
3540
3541 /*
3542 * innvl: {
3543 * bookmark name 1, bookmark name 2
3544 * }
3545 *
3546 * outnvl: bookmark -> error code (int32)
3547 *
3548 */
3549 static int
3550 zfs_ioc_destroy_bookmarks(const char *poolname, nvlist_t *innvl,
3551 nvlist_t *outnvl)
3552 {
3553 int error, poollen;
3554 nvpair_t *pair;
3555
3556 poollen = strlen(poolname);
3557 for (pair = nvlist_next_nvpair(innvl, NULL);
3558 pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
3559 const char *name = nvpair_name(pair);
3560 const char *cp = strchr(name, '#');
3561
3562 /*
3563 * The bookmark name must contain an #, and the part after it
3564 * must contain only valid characters.
3565 */
3566 if (cp == NULL ||
3567 zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3568 return (SET_ERROR(EINVAL));
3569
3570 /*
3571 * The bookmark must be in the specified pool.
3572 */
3573 if (strncmp(name, poolname, poollen) != 0 ||
3574 (name[poollen] != '/' && name[poollen] != '#'))
3575 return (SET_ERROR(EXDEV));
3576 }
3577
3578 error = dsl_bookmark_destroy(innvl, outnvl);
3579 return (error);
3580 }
3581
3582 /*
3583 * inputs:
3584 * zc_name name of dataset to destroy
3585 * zc_objset_type type of objset
3586 * zc_defer_destroy mark for deferred destroy
3587 *
3588 * outputs: none
3589 */
3590 static int
3591 zfs_ioc_destroy(zfs_cmd_t *zc)
3592 {
3593 int err;
3594
3595 if (zc->zc_objset_type == DMU_OST_ZFS) {
3596 err = zfs_unmount_snap(zc->zc_name);
3597 if (err != 0)
3598 return (err);
3599 }
3600
3601 if (strchr(zc->zc_name, '@')) {
3602 err = dsl_destroy_snapshot(zc->zc_name, zc->zc_defer_destroy);
3603 } else {
3604 err = dsl_destroy_head(zc->zc_name);
3605 if (err == EEXIST) {
3606 /*
3607 * It is possible that the given DS may have
3608 * hidden child (%recv) datasets - "leftovers"
3609 * resulting from the previously interrupted
3610 * 'zfs receive'.
3611 *
3612 * 6 extra bytes for /%recv
3613 */
3614 char namebuf[ZFS_MAX_DATASET_NAME_LEN + 6];
3615
3616 (void) snprintf(namebuf, sizeof (namebuf),
3617 "%s/%s", zc->zc_name, recv_clone_name);
3618
3619 /*
3620 * Try to remove the hidden child (%recv) and after
3621 * that try to remove the target dataset.
3622 * If the hidden child (%recv) does not exist
3623 * the original error (EEXIST) will be returned
3624 */
3625 err = dsl_destroy_head(namebuf);
3626 if (err == 0)
3627 err = dsl_destroy_head(zc->zc_name);
3628 else if (err == ENOENT)
3629 err = EEXIST;
3630 }
3631 }
3632
3633 return (err);
3634 }
3635
3636 /*
3637 * fsname is name of dataset to rollback (to most recent snapshot)
3638 *
3639 * innvl is not used.
3640 *
3641 * outnvl: "target" -> name of most recent snapshot
3642 * }
3643 */
3644 /* ARGSUSED */
3645 static int
3646 zfs_ioc_rollback(const char *fsname, nvlist_t *args, nvlist_t *outnvl)
3647 {
3648 zfsvfs_t *zfsvfs;
3649 zvol_state_t *zv;
3650 int error;
3651
3652 if (getzfsvfs(fsname, &zfsvfs) == 0) {
3653 dsl_dataset_t *ds;
3654
3655 ds = dmu_objset_ds(zfsvfs->z_os);
3656 error = zfs_suspend_fs(zfsvfs);
3657 if (error == 0) {
3658 int resume_err;
3659
3660 error = dsl_dataset_rollback(fsname, zfsvfs, outnvl);
3661 resume_err = zfs_resume_fs(zfsvfs, ds);
3662 error = error ? error : resume_err;
3663 }
3664 deactivate_super(zfsvfs->z_sb);
3665 } else if ((zv = zvol_suspend(fsname)) != NULL) {
3666 error = dsl_dataset_rollback(fsname, zvol_tag(zv), outnvl);
3667 zvol_resume(zv);
3668 } else {
3669 error = dsl_dataset_rollback(fsname, NULL, outnvl);
3670 }
3671 return (error);
3672 }
3673
3674 static int
3675 recursive_unmount(const char *fsname, void *arg)
3676 {
3677 const char *snapname = arg;
3678 char *fullname;
3679 int error;
3680
3681 fullname = kmem_asprintf("%s@%s", fsname, snapname);
3682 error = zfs_unmount_snap(fullname);
3683 strfree(fullname);
3684
3685 return (error);
3686 }
3687
3688 /*
3689 * inputs:
3690 * zc_name old name of dataset
3691 * zc_value new name of dataset
3692 * zc_cookie recursive flag (only valid for snapshots)
3693 *
3694 * outputs: none
3695 */
3696 static int
3697 zfs_ioc_rename(zfs_cmd_t *zc)
3698 {
3699 boolean_t recursive = zc->zc_cookie & 1;
3700 char *at;
3701
3702 zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
3703 if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
3704 strchr(zc->zc_value, '%'))
3705 return (SET_ERROR(EINVAL));
3706
3707 at = strchr(zc->zc_name, '@');
3708 if (at != NULL) {
3709 /* snaps must be in same fs */
3710 int error;
3711
3712 if (strncmp(zc->zc_name, zc->zc_value, at - zc->zc_name + 1))
3713 return (SET_ERROR(EXDEV));
3714 *at = '\0';
3715 if (zc->zc_objset_type == DMU_OST_ZFS) {
3716 error = dmu_objset_find(zc->zc_name,
3717 recursive_unmount, at + 1,
3718 recursive ? DS_FIND_CHILDREN : 0);
3719 if (error != 0) {
3720 *at = '@';
3721 return (error);
3722 }
3723 }
3724 error = dsl_dataset_rename_snapshot(zc->zc_name,
3725 at + 1, strchr(zc->zc_value, '@') + 1, recursive);
3726 *at = '@';
3727
3728 return (error);
3729 } else {
3730 return (dsl_dir_rename(zc->zc_name, zc->zc_value));
3731 }
3732 }
3733
3734 static int
3735 zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
3736 {
3737 const char *propname = nvpair_name(pair);
3738 boolean_t issnap = (strchr(dsname, '@') != NULL);
3739 zfs_prop_t prop = zfs_name_to_prop(propname);
3740 uint64_t intval;
3741 int err;
3742
3743 if (prop == ZPROP_INVAL) {
3744 if (zfs_prop_user(propname)) {
3745 if ((err = zfs_secpolicy_write_perms(dsname,
3746 ZFS_DELEG_PERM_USERPROP, cr)))
3747 return (err);
3748 return (0);
3749 }
3750
3751 if (!issnap && zfs_prop_userquota(propname)) {
3752 const char *perm = NULL;
3753 const char *uq_prefix =
3754 zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
3755 const char *gq_prefix =
3756 zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
3757 const char *uiq_prefix =
3758 zfs_userquota_prop_prefixes[ZFS_PROP_USEROBJQUOTA];
3759 const char *giq_prefix =
3760 zfs_userquota_prop_prefixes[ZFS_PROP_GROUPOBJQUOTA];
3761
3762 if (strncmp(propname, uq_prefix,
3763 strlen(uq_prefix)) == 0) {
3764 perm = ZFS_DELEG_PERM_USERQUOTA;
3765 } else if (strncmp(propname, uiq_prefix,
3766 strlen(uiq_prefix)) == 0) {
3767 perm = ZFS_DELEG_PERM_USEROBJQUOTA;
3768 } else if (strncmp(propname, gq_prefix,
3769 strlen(gq_prefix)) == 0) {
3770 perm = ZFS_DELEG_PERM_GROUPQUOTA;
3771 } else if (strncmp(propname, giq_prefix,
3772 strlen(giq_prefix)) == 0) {
3773 perm = ZFS_DELEG_PERM_GROUPOBJQUOTA;
3774 } else {
3775 /* USERUSED and GROUPUSED are read-only */
3776 return (SET_ERROR(EINVAL));
3777 }
3778
3779 if ((err = zfs_secpolicy_write_perms(dsname, perm, cr)))
3780 return (err);
3781 return (0);
3782 }
3783
3784 return (SET_ERROR(EINVAL));
3785 }
3786
3787 if (issnap)
3788 return (SET_ERROR(EINVAL));
3789
3790 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
3791 /*
3792 * dsl_prop_get_all_impl() returns properties in this
3793 * format.
3794 */
3795 nvlist_t *attrs;
3796 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
3797 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3798 &pair) == 0);
3799 }
3800
3801 /*
3802 * Check that this value is valid for this pool version
3803 */
3804 switch (prop) {
3805 case ZFS_PROP_COMPRESSION:
3806 /*
3807 * If the user specified gzip compression, make sure
3808 * the SPA supports it. We ignore any errors here since
3809 * we'll catch them later.
3810 */
3811 if (nvpair_value_uint64(pair, &intval) == 0) {
3812 if (intval >= ZIO_COMPRESS_GZIP_1 &&
3813 intval <= ZIO_COMPRESS_GZIP_9 &&
3814 zfs_earlier_version(dsname,
3815 SPA_VERSION_GZIP_COMPRESSION)) {
3816 return (SET_ERROR(ENOTSUP));
3817 }
3818
3819 if (intval == ZIO_COMPRESS_ZLE &&
3820 zfs_earlier_version(dsname,
3821 SPA_VERSION_ZLE_COMPRESSION))
3822 return (SET_ERROR(ENOTSUP));
3823
3824 if (intval == ZIO_COMPRESS_LZ4) {
3825 spa_t *spa;
3826
3827 if ((err = spa_open(dsname, &spa, FTAG)) != 0)
3828 return (err);
3829
3830 if (!spa_feature_is_enabled(spa,
3831 SPA_FEATURE_LZ4_COMPRESS)) {
3832 spa_close(spa, FTAG);
3833 return (SET_ERROR(ENOTSUP));
3834 }
3835 spa_close(spa, FTAG);
3836 }
3837
3838 /*
3839 * If this is a bootable dataset then
3840 * verify that the compression algorithm
3841 * is supported for booting. We must return
3842 * something other than ENOTSUP since it
3843 * implies a downrev pool version.
3844 */
3845 if (zfs_is_bootfs(dsname) &&
3846 !BOOTFS_COMPRESS_VALID(intval)) {
3847 return (SET_ERROR(ERANGE));
3848 }
3849 }
3850 break;
3851
3852 case ZFS_PROP_COPIES:
3853 if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
3854 return (SET_ERROR(ENOTSUP));
3855 break;
3856
3857 case ZFS_PROP_VOLBLOCKSIZE:
3858 case ZFS_PROP_RECORDSIZE:
3859 /* Record sizes above 128k need the feature to be enabled */
3860 if (nvpair_value_uint64(pair, &intval) == 0 &&
3861 intval > SPA_OLD_MAXBLOCKSIZE) {
3862 spa_t *spa;
3863
3864 /*
3865 * We don't allow setting the property above 1MB,
3866 * unless the tunable has been changed.
3867 */
3868 if (intval > zfs_max_recordsize ||
3869 intval > SPA_MAXBLOCKSIZE)
3870 return (SET_ERROR(ERANGE));
3871
3872 if ((err = spa_open(dsname, &spa, FTAG)) != 0)
3873 return (err);
3874
3875 if (!spa_feature_is_enabled(spa,
3876 SPA_FEATURE_LARGE_BLOCKS)) {
3877 spa_close(spa, FTAG);
3878 return (SET_ERROR(ENOTSUP));
3879 }
3880 spa_close(spa, FTAG);
3881 }
3882 break;
3883
3884 case ZFS_PROP_DNODESIZE:
3885 /* Dnode sizes above 512 need the feature to be enabled */
3886 if (nvpair_value_uint64(pair, &intval) == 0 &&
3887 intval != ZFS_DNSIZE_LEGACY) {
3888 spa_t *spa;
3889
3890 /*
3891 * If this is a bootable dataset then
3892 * we don't allow large (>512B) dnodes,
3893 * because GRUB doesn't support them.
3894 */
3895 if (zfs_is_bootfs(dsname) &&
3896 intval != ZFS_DNSIZE_LEGACY) {
3897 return (SET_ERROR(EDOM));
3898 }
3899
3900 if ((err = spa_open(dsname, &spa, FTAG)) != 0)
3901 return (err);
3902
3903 if (!spa_feature_is_enabled(spa,
3904 SPA_FEATURE_LARGE_DNODE)) {
3905 spa_close(spa, FTAG);
3906 return (SET_ERROR(ENOTSUP));
3907 }
3908 spa_close(spa, FTAG);
3909 }
3910 break;
3911
3912 case ZFS_PROP_SHARESMB:
3913 if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
3914 return (SET_ERROR(ENOTSUP));
3915 break;
3916
3917 case ZFS_PROP_ACLINHERIT:
3918 if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
3919 nvpair_value_uint64(pair, &intval) == 0) {
3920 if (intval == ZFS_ACL_PASSTHROUGH_X &&
3921 zfs_earlier_version(dsname,
3922 SPA_VERSION_PASSTHROUGH_X))
3923 return (SET_ERROR(ENOTSUP));
3924 }
3925 break;
3926 case ZFS_PROP_CHECKSUM:
3927 case ZFS_PROP_DEDUP:
3928 {
3929 spa_feature_t feature;
3930 spa_t *spa;
3931 uint64_t intval;
3932 int err;
3933
3934 /* dedup feature version checks */
3935 if (prop == ZFS_PROP_DEDUP &&
3936 zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
3937 return (SET_ERROR(ENOTSUP));
3938
3939 if (nvpair_value_uint64(pair, &intval) != 0)
3940 return (SET_ERROR(EINVAL));
3941
3942 /* check prop value is enabled in features */
3943 feature = zio_checksum_to_feature(intval & ZIO_CHECKSUM_MASK);
3944 if (feature == SPA_FEATURE_NONE)
3945 break;
3946
3947 if ((err = spa_open(dsname, &spa, FTAG)) != 0)
3948 return (err);
3949 /*
3950 * Salted checksums are not supported on root pools.
3951 */
3952 if (spa_bootfs(spa) != 0 &&
3953 intval < ZIO_CHECKSUM_FUNCTIONS &&
3954 (zio_checksum_table[intval].ci_flags &
3955 ZCHECKSUM_FLAG_SALTED)) {
3956 spa_close(spa, FTAG);
3957 return (SET_ERROR(ERANGE));
3958 }
3959 if (!spa_feature_is_enabled(spa, feature)) {
3960 spa_close(spa, FTAG);
3961 return (SET_ERROR(ENOTSUP));
3962 }
3963 spa_close(spa, FTAG);
3964 break;
3965 }
3966
3967 default:
3968 break;
3969 }
3970
3971 return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
3972 }
3973
3974 /*
3975 * Removes properties from the given props list that fail permission checks
3976 * needed to clear them and to restore them in case of a receive error. For each
3977 * property, make sure we have both set and inherit permissions.
3978 *
3979 * Returns the first error encountered if any permission checks fail. If the
3980 * caller provides a non-NULL errlist, it also gives the complete list of names
3981 * of all the properties that failed a permission check along with the
3982 * corresponding error numbers. The caller is responsible for freeing the
3983 * returned errlist.
3984 *
3985 * If every property checks out successfully, zero is returned and the list
3986 * pointed at by errlist is NULL.
3987 */
3988 static int
3989 zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist)
3990 {
3991 zfs_cmd_t *zc;
3992 nvpair_t *pair, *next_pair;
3993 nvlist_t *errors;
3994 int err, rv = 0;
3995
3996 if (props == NULL)
3997 return (0);
3998
3999 VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4000
4001 zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
4002 (void) strlcpy(zc->zc_name, dataset, sizeof (zc->zc_name));
4003 pair = nvlist_next_nvpair(props, NULL);
4004 while (pair != NULL) {
4005 next_pair = nvlist_next_nvpair(props, pair);
4006
4007 (void) strlcpy(zc->zc_value, nvpair_name(pair),
4008 sizeof (zc->zc_value));
4009 if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
4010 (err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) {
4011 VERIFY(nvlist_remove_nvpair(props, pair) == 0);
4012 VERIFY(nvlist_add_int32(errors,
4013 zc->zc_value, err) == 0);
4014 }
4015 pair = next_pair;
4016 }
4017 kmem_free(zc, sizeof (zfs_cmd_t));
4018
4019 if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
4020 nvlist_free(errors);
4021 errors = NULL;
4022 } else {
4023 VERIFY(nvpair_value_int32(pair, &rv) == 0);
4024 }
4025
4026 if (errlist == NULL)
4027 nvlist_free(errors);
4028 else
4029 *errlist = errors;
4030
4031 return (rv);
4032 }
4033
4034 static boolean_t
4035 propval_equals(nvpair_t *p1, nvpair_t *p2)
4036 {
4037 if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
4038 /* dsl_prop_get_all_impl() format */
4039 nvlist_t *attrs;
4040 VERIFY(nvpair_value_nvlist(p1, &attrs) == 0);
4041 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4042 &p1) == 0);
4043 }
4044
4045 if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
4046 nvlist_t *attrs;
4047 VERIFY(nvpair_value_nvlist(p2, &attrs) == 0);
4048 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4049 &p2) == 0);
4050 }
4051
4052 if (nvpair_type(p1) != nvpair_type(p2))
4053 return (B_FALSE);
4054
4055 if (nvpair_type(p1) == DATA_TYPE_STRING) {
4056 char *valstr1, *valstr2;
4057
4058 VERIFY(nvpair_value_string(p1, (char **)&valstr1) == 0);
4059 VERIFY(nvpair_value_string(p2, (char **)&valstr2) == 0);
4060 return (strcmp(valstr1, valstr2) == 0);
4061 } else {
4062 uint64_t intval1, intval2;
4063
4064 VERIFY(nvpair_value_uint64(p1, &intval1) == 0);
4065 VERIFY(nvpair_value_uint64(p2, &intval2) == 0);
4066 return (intval1 == intval2);
4067 }
4068 }
4069
4070 /*
4071 * Remove properties from props if they are not going to change (as determined
4072 * by comparison with origprops). Remove them from origprops as well, since we
4073 * do not need to clear or restore properties that won't change.
4074 */
4075 static void
4076 props_reduce(nvlist_t *props, nvlist_t *origprops)
4077 {
4078 nvpair_t *pair, *next_pair;
4079
4080 if (origprops == NULL)
4081 return; /* all props need to be received */
4082
4083 pair = nvlist_next_nvpair(props, NULL);
4084 while (pair != NULL) {
4085 const char *propname = nvpair_name(pair);
4086 nvpair_t *match;
4087
4088 next_pair = nvlist_next_nvpair(props, pair);
4089
4090 if ((nvlist_lookup_nvpair(origprops, propname,
4091 &match) != 0) || !propval_equals(pair, match))
4092 goto next; /* need to set received value */
4093
4094 /* don't clear the existing received value */
4095 (void) nvlist_remove_nvpair(origprops, match);
4096 /* don't bother receiving the property */
4097 (void) nvlist_remove_nvpair(props, pair);
4098 next:
4099 pair = next_pair;
4100 }
4101 }
4102
4103 /*
4104 * Extract properties that cannot be set PRIOR to the receipt of a dataset.
4105 * For example, refquota cannot be set until after the receipt of a dataset,
4106 * because in replication streams, an older/earlier snapshot may exceed the
4107 * refquota. We want to receive the older/earlier snapshot, but setting
4108 * refquota pre-receipt will set the dsl's ACTUAL quota, which will prevent
4109 * the older/earlier snapshot from being received (with EDQUOT).
4110 *
4111 * The ZFS test "zfs_receive_011_pos" demonstrates such a scenario.
4112 *
4113 * libzfs will need to be judicious handling errors encountered by props
4114 * extracted by this function.
4115 */
4116 static nvlist_t *
4117 extract_delay_props(nvlist_t *props)
4118 {
4119 nvlist_t *delayprops;
4120 nvpair_t *nvp, *tmp;
4121 static const zfs_prop_t delayable[] = { ZFS_PROP_REFQUOTA, 0 };
4122 int i;
4123
4124 VERIFY(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4125
4126 for (nvp = nvlist_next_nvpair(props, NULL); nvp != NULL;
4127 nvp = nvlist_next_nvpair(props, nvp)) {
4128 /*
4129 * strcmp() is safe because zfs_prop_to_name() always returns
4130 * a bounded string.
4131 */
4132 for (i = 0; delayable[i] != 0; i++) {
4133 if (strcmp(zfs_prop_to_name(delayable[i]),
4134 nvpair_name(nvp)) == 0) {
4135 break;
4136 }
4137 }
4138 if (delayable[i] != 0) {
4139 tmp = nvlist_prev_nvpair(props, nvp);
4140 VERIFY(nvlist_add_nvpair(delayprops, nvp) == 0);
4141 VERIFY(nvlist_remove_nvpair(props, nvp) == 0);
4142 nvp = tmp;
4143 }
4144 }
4145
4146 if (nvlist_empty(delayprops)) {
4147 nvlist_free(delayprops);
4148 delayprops = NULL;
4149 }
4150 return (delayprops);
4151 }
4152
4153 #ifdef DEBUG
4154 static boolean_t zfs_ioc_recv_inject_err;
4155 #endif
4156
4157 /*
4158 * nvlist 'errors' is always allocated. It will contain descriptions of
4159 * encountered errors, if any. It's the callers responsibility to free.
4160 */
4161 static int
4162 zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin,
4163 nvlist_t *props, boolean_t force, boolean_t resumable, int input_fd,
4164 dmu_replay_record_t *begin_record, int cleanup_fd, uint64_t *read_bytes,
4165 uint64_t *errflags, uint64_t *action_handle, nvlist_t **errors)
4166 {
4167 dmu_recv_cookie_t drc;
4168 int error = 0;
4169 int props_error = 0;
4170 offset_t off;
4171 nvlist_t *delayprops = NULL; /* sent properties applied post-receive */
4172 nvlist_t *origprops = NULL; /* existing properties */
4173 boolean_t first_recvd_props = B_FALSE;
4174 file_t *input_fp;
4175
4176 *read_bytes = 0;
4177 *errflags = 0;
4178 *errors = fnvlist_alloc();
4179
4180 input_fp = getf(input_fd);
4181 if (input_fp == NULL)
4182 return (SET_ERROR(EBADF));
4183
4184 error = dmu_recv_begin(tofs, tosnap,
4185 begin_record, force, resumable, origin, &drc);
4186 if (error != 0)
4187 goto out;
4188
4189 /*
4190 * Set properties before we receive the stream so that they are applied
4191 * to the new data. Note that we must call dmu_recv_stream() if
4192 * dmu_recv_begin() succeeds.
4193 */
4194 if (props != NULL && !drc.drc_newfs) {
4195 if (spa_version(dsl_dataset_get_spa(drc.drc_ds)) >=
4196 SPA_VERSION_RECVD_PROPS &&
4197 !dsl_prop_get_hasrecvd(tofs))
4198 first_recvd_props = B_TRUE;
4199
4200 /*
4201 * If new received properties are supplied, they are to
4202 * completely replace the existing received properties, so stash
4203 * away the existing ones.
4204 */
4205 if (dsl_prop_get_received(tofs, &origprops) == 0) {
4206 nvlist_t *errlist = NULL;
4207 /*
4208 * Don't bother writing a property if its value won't
4209 * change (and avoid the unnecessary security checks).
4210 *
4211 * The first receive after SPA_VERSION_RECVD_PROPS is a
4212 * special case where we blow away all local properties
4213 * regardless.
4214 */
4215 if (!first_recvd_props)
4216 props_reduce(props, origprops);
4217 if (zfs_check_clearable(tofs, origprops, &errlist) != 0)
4218 (void) nvlist_merge(*errors, errlist, 0);
4219 nvlist_free(errlist);
4220
4221 if (clear_received_props(tofs, origprops,
4222 first_recvd_props ? NULL : props) != 0)
4223 *errflags |= ZPROP_ERR_NOCLEAR;
4224 } else {
4225 *errflags |= ZPROP_ERR_NOCLEAR;
4226 }
4227 }
4228
4229 if (props != NULL) {
4230 props_error = dsl_prop_set_hasrecvd(tofs);
4231
4232 if (props_error == 0) {
4233 delayprops = extract_delay_props(props);
4234 (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
4235 props, *errors);
4236 }
4237 }
4238
4239 off = input_fp->f_offset;
4240 error = dmu_recv_stream(&drc, input_fp->f_vnode, &off, cleanup_fd,
4241 action_handle);
4242
4243 if (error == 0) {
4244 zfsvfs_t *zfsvfs = NULL;
4245 zvol_state_t *zv = NULL;
4246
4247 if (getzfsvfs(tofs, &zfsvfs) == 0) {
4248 /* online recv */
4249 dsl_dataset_t *ds;
4250 int end_err;
4251
4252 ds = dmu_objset_ds(zfsvfs->z_os);
4253 error = zfs_suspend_fs(zfsvfs);
4254 /*
4255 * If the suspend fails, then the recv_end will
4256 * likely also fail, and clean up after itself.
4257 */
4258 end_err = dmu_recv_end(&drc, zfsvfs);
4259 if (error == 0)
4260 error = zfs_resume_fs(zfsvfs, ds);
4261 error = error ? error : end_err;
4262 deactivate_super(zfsvfs->z_sb);
4263 } else if ((zv = zvol_suspend(tofs)) != NULL) {
4264 error = dmu_recv_end(&drc, zvol_tag(zv));
4265 zvol_resume(zv);
4266 } else {
4267 error = dmu_recv_end(&drc, NULL);
4268 }
4269
4270 /* Set delayed properties now, after we're done receiving. */
4271 if (delayprops != NULL && error == 0) {
4272 (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
4273 delayprops, *errors);
4274 }
4275 }
4276
4277 if (delayprops != NULL) {
4278 /*
4279 * Merge delayed props back in with initial props, in case
4280 * we're DEBUG and zfs_ioc_recv_inject_err is set (which means
4281 * we have to make sure clear_received_props() includes
4282 * the delayed properties).
4283 *
4284 * Since zfs_ioc_recv_inject_err is only in DEBUG kernels,
4285 * using ASSERT() will be just like a VERIFY.
4286 */
4287 ASSERT(nvlist_merge(props, delayprops, 0) == 0);
4288 nvlist_free(delayprops);
4289 }
4290
4291
4292 *read_bytes = off - input_fp->f_offset;
4293 if (VOP_SEEK(input_fp->f_vnode, input_fp->f_offset, &off, NULL) == 0)
4294 input_fp->f_offset = off;
4295
4296 #ifdef DEBUG
4297 if (zfs_ioc_recv_inject_err) {
4298 zfs_ioc_recv_inject_err = B_FALSE;
4299 error = 1;
4300 }
4301 #endif
4302
4303 /*
4304 * On error, restore the original props.
4305 */
4306 if (error != 0 && props != NULL && !drc.drc_newfs) {
4307 if (clear_received_props(tofs, props, NULL) != 0) {
4308 /*
4309 * We failed to clear the received properties.
4310 * Since we may have left a $recvd value on the
4311 * system, we can't clear the $hasrecvd flag.
4312 */
4313 *errflags |= ZPROP_ERR_NORESTORE;
4314 } else if (first_recvd_props) {
4315 dsl_prop_unset_hasrecvd(tofs);
4316 }
4317
4318 if (origprops == NULL && !drc.drc_newfs) {
4319 /* We failed to stash the original properties. */
4320 *errflags |= ZPROP_ERR_NORESTORE;
4321 }
4322
4323 /*
4324 * dsl_props_set() will not convert RECEIVED to LOCAL on or
4325 * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
4326 * explicitly if we're restoring local properties cleared in the
4327 * first new-style receive.
4328 */
4329 if (origprops != NULL &&
4330 zfs_set_prop_nvlist(tofs, (first_recvd_props ?
4331 ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
4332 origprops, NULL) != 0) {
4333 /*
4334 * We stashed the original properties but failed to
4335 * restore them.
4336 */
4337 *errflags |= ZPROP_ERR_NORESTORE;
4338 }
4339 }
4340 out:
4341 releasef(input_fd);
4342 nvlist_free(origprops);
4343
4344 if (error == 0)
4345 error = props_error;
4346
4347 return (error);
4348 }
4349
4350 /*
4351 * inputs:
4352 * zc_name name of containing filesystem (unused)
4353 * zc_nvlist_src{_size} nvlist of properties to apply
4354 * zc_value name of snapshot to create
4355 * zc_string name of clone origin (if DRR_FLAG_CLONE)
4356 * zc_cookie file descriptor to recv from
4357 * zc_begin_record the BEGIN record of the stream (not byteswapped)
4358 * zc_guid force flag
4359 * zc_cleanup_fd cleanup-on-exit file descriptor
4360 * zc_action_handle handle for this guid/ds mapping (or zero on first call)
4361 *
4362 * outputs:
4363 * zc_cookie number of bytes read
4364 * zc_obj zprop_errflags_t
4365 * zc_action_handle handle for this guid/ds mapping
4366 * zc_nvlist_dst{_size} error for each unapplied received property
4367 */
4368 static int
4369 zfs_ioc_recv(zfs_cmd_t *zc)
4370 {
4371 dmu_replay_record_t begin_record;
4372 nvlist_t *errors = NULL;
4373 nvlist_t *props = NULL;
4374 char *origin = NULL;
4375 char *tosnap;
4376 char tofs[ZFS_MAX_DATASET_NAME_LEN];
4377 int error = 0;
4378
4379 if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
4380 strchr(zc->zc_value, '@') == NULL ||
4381 strchr(zc->zc_value, '%'))
4382 return (SET_ERROR(EINVAL));
4383
4384 (void) strlcpy(tofs, zc->zc_value, sizeof (tofs));
4385 tosnap = strchr(tofs, '@');
4386 *tosnap++ = '\0';
4387
4388 if (zc->zc_nvlist_src != 0 &&
4389 (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
4390 zc->zc_iflags, &props)) != 0)
4391 return (error);
4392
4393 if (zc->zc_string[0])
4394 origin = zc->zc_string;
4395
4396 begin_record.drr_type = DRR_BEGIN;
4397 begin_record.drr_payloadlen = 0;
4398 begin_record.drr_u.drr_begin = zc->zc_begin_record;
4399
4400 error = zfs_ioc_recv_impl(tofs, tosnap, origin, props, zc->zc_guid,
4401 B_FALSE, zc->zc_cookie, &begin_record, zc->zc_cleanup_fd,
4402 &zc->zc_cookie, &zc->zc_obj, &zc->zc_action_handle, &errors);
4403 nvlist_free(props);
4404
4405 /*
4406 * Now that all props, initial and delayed, are set, report the prop
4407 * errors to the caller.
4408 */
4409 if (zc->zc_nvlist_dst_size != 0 && errors != NULL &&
4410 (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
4411 put_nvlist(zc, errors) != 0)) {
4412 /*
4413 * Caller made zc->zc_nvlist_dst less than the minimum expected
4414 * size or supplied an invalid address.
4415 */
4416 error = SET_ERROR(EINVAL);
4417 }
4418
4419 nvlist_free(errors);
4420
4421 return (error);
4422 }
4423
4424 /*
4425 * innvl: {
4426 * "snapname" -> full name of the snapshot to create
4427 * (optional) "props" -> properties to set (nvlist)
4428 * (optional) "origin" -> name of clone origin (DRR_FLAG_CLONE)
4429 * "begin_record" -> non-byteswapped dmu_replay_record_t
4430 * "input_fd" -> file descriptor to read stream from (int32)
4431 * (optional) "force" -> force flag (value ignored)
4432 * (optional) "resumable" -> resumable flag (value ignored)
4433 * (optional) "cleanup_fd" -> cleanup-on-exit file descriptor
4434 * (optional) "action_handle" -> handle for this guid/ds mapping
4435 * }
4436 *
4437 * outnvl: {
4438 * "read_bytes" -> number of bytes read
4439 * "error_flags" -> zprop_errflags_t
4440 * "action_handle" -> handle for this guid/ds mapping
4441 * "errors" -> error for each unapplied received property (nvlist)
4442 * }
4443 */
4444 static int
4445 zfs_ioc_recv_new(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
4446 {
4447 dmu_replay_record_t *begin_record;
4448 uint_t begin_record_size;
4449 nvlist_t *errors = NULL;
4450 nvlist_t *props = NULL;
4451 char *snapname = NULL;
4452 char *origin = NULL;
4453 char *tosnap;
4454 char tofs[ZFS_MAX_DATASET_NAME_LEN];
4455 boolean_t force;
4456 boolean_t resumable;
4457 uint64_t action_handle = 0;
4458 uint64_t read_bytes = 0;
4459 uint64_t errflags = 0;
4460 int input_fd = -1;
4461 int cleanup_fd = -1;
4462 int error;
4463
4464 error = nvlist_lookup_string(innvl, "snapname", &snapname);
4465 if (error != 0)
4466 return (SET_ERROR(EINVAL));
4467
4468 if (dataset_namecheck(snapname, NULL, NULL) != 0 ||
4469 strchr(snapname, '@') == NULL ||
4470 strchr(snapname, '%'))
4471 return (SET_ERROR(EINVAL));
4472
4473 (void) strcpy(tofs, snapname);
4474 tosnap = strchr(tofs, '@');
4475 *tosnap++ = '\0';
4476
4477 error = nvlist_lookup_string(innvl, "origin", &origin);
4478 if (error && error != ENOENT)
4479 return (error);
4480
4481 error = nvlist_lookup_byte_array(innvl, "begin_record",
4482 (uchar_t **)&begin_record, &begin_record_size);
4483 if (error != 0 || begin_record_size != sizeof (*begin_record))
4484 return (SET_ERROR(EINVAL));
4485
4486 error = nvlist_lookup_int32(innvl, "input_fd", &input_fd);
4487 if (error != 0)
4488 return (SET_ERROR(EINVAL));
4489
4490 force = nvlist_exists(innvl, "force");
4491 resumable = nvlist_exists(innvl, "resumable");
4492
4493 error = nvlist_lookup_int32(innvl, "cleanup_fd", &cleanup_fd);
4494 if (error && error != ENOENT)
4495 return (error);
4496
4497 error = nvlist_lookup_uint64(innvl, "action_handle", &action_handle);
4498 if (error && error != ENOENT)
4499 return (error);
4500
4501 error = nvlist_lookup_nvlist(innvl, "props", &props);
4502 if (error && error != ENOENT)
4503 return (error);
4504
4505 error = zfs_ioc_recv_impl(tofs, tosnap, origin, props, force,
4506 resumable, input_fd, begin_record, cleanup_fd, &read_bytes,
4507 &errflags, &action_handle, &errors);
4508
4509 fnvlist_add_uint64(outnvl, "read_bytes", read_bytes);
4510 fnvlist_add_uint64(outnvl, "error_flags", errflags);
4511 fnvlist_add_uint64(outnvl, "action_handle", action_handle);
4512 fnvlist_add_nvlist(outnvl, "errors", errors);
4513
4514 nvlist_free(errors);
4515 nvlist_free(props);
4516
4517 return (error);
4518 }
4519
4520 /*
4521 * inputs:
4522 * zc_name name of snapshot to send
4523 * zc_cookie file descriptor to send stream to
4524 * zc_obj fromorigin flag (mutually exclusive with zc_fromobj)
4525 * zc_sendobj objsetid of snapshot to send
4526 * zc_fromobj objsetid of incremental fromsnap (may be zero)
4527 * zc_guid if set, estimate size of stream only. zc_cookie is ignored.
4528 * output size in zc_objset_type.
4529 * zc_flags lzc_send_flags
4530 *
4531 * outputs:
4532 * zc_objset_type estimated size, if zc_guid is set
4533 */
4534 static int
4535 zfs_ioc_send(zfs_cmd_t *zc)
4536 {
4537 int error;
4538 offset_t off;
4539 boolean_t estimate = (zc->zc_guid != 0);
4540 boolean_t embedok = (zc->zc_flags & 0x1);
4541 boolean_t large_block_ok = (zc->zc_flags & 0x2);
4542 boolean_t compressok = (zc->zc_flags & 0x4);
4543
4544 if (zc->zc_obj != 0) {
4545 dsl_pool_t *dp;
4546 dsl_dataset_t *tosnap;
4547
4548 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
4549 if (error != 0)
4550 return (error);
4551
4552 error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
4553 if (error != 0) {
4554 dsl_pool_rele(dp, FTAG);
4555 return (error);
4556 }
4557
4558 if (dsl_dir_is_clone(tosnap->ds_dir))
4559 zc->zc_fromobj =
4560 dsl_dir_phys(tosnap->ds_dir)->dd_origin_obj;
4561 dsl_dataset_rele(tosnap, FTAG);
4562 dsl_pool_rele(dp, FTAG);
4563 }
4564
4565 if (estimate) {
4566 dsl_pool_t *dp;
4567 dsl_dataset_t *tosnap;
4568 dsl_dataset_t *fromsnap = NULL;
4569
4570 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
4571 if (error != 0)
4572 return (error);
4573
4574 error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
4575 if (error != 0) {
4576 dsl_pool_rele(dp, FTAG);
4577 return (error);
4578 }
4579
4580 if (zc->zc_fromobj != 0) {
4581 error = dsl_dataset_hold_obj(dp, zc->zc_fromobj,
4582 FTAG, &fromsnap);
4583 if (error != 0) {
4584 dsl_dataset_rele(tosnap, FTAG);
4585 dsl_pool_rele(dp, FTAG);
4586 return (error);
4587 }
4588 }
4589
4590 error = dmu_send_estimate(tosnap, fromsnap, compressok,
4591 &zc->zc_objset_type);
4592
4593 if (fromsnap != NULL)
4594 dsl_dataset_rele(fromsnap, FTAG);
4595 dsl_dataset_rele(tosnap, FTAG);
4596 dsl_pool_rele(dp, FTAG);
4597 } else {
4598 file_t *fp = getf(zc->zc_cookie);
4599 if (fp == NULL)
4600 return (SET_ERROR(EBADF));
4601
4602 off = fp->f_offset;
4603 error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
4604 zc->zc_fromobj, embedok, large_block_ok, compressok,
4605 zc->zc_cookie, fp->f_vnode, &off);
4606
4607 if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
4608 fp->f_offset = off;
4609 releasef(zc->zc_cookie);
4610 }
4611 return (error);
4612 }
4613
4614 /*
4615 * inputs:
4616 * zc_name name of snapshot on which to report progress
4617 * zc_cookie file descriptor of send stream
4618 *
4619 * outputs:
4620 * zc_cookie number of bytes written in send stream thus far
4621 */
4622 static int
4623 zfs_ioc_send_progress(zfs_cmd_t *zc)
4624 {
4625 dsl_pool_t *dp;
4626 dsl_dataset_t *ds;
4627 dmu_sendarg_t *dsp = NULL;
4628 int error;
4629
4630 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
4631 if (error != 0)
4632 return (error);
4633
4634 error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
4635 if (error != 0) {
4636 dsl_pool_rele(dp, FTAG);
4637 return (error);
4638 }
4639
4640 mutex_enter(&ds->ds_sendstream_lock);
4641
4642 /*
4643 * Iterate over all the send streams currently active on this dataset.
4644 * If there's one which matches the specified file descriptor _and_ the
4645 * stream was started by the current process, return the progress of
4646 * that stream.
4647 */
4648
4649 for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
4650 dsp = list_next(&ds->ds_sendstreams, dsp)) {
4651 if (dsp->dsa_outfd == zc->zc_cookie &&
4652 dsp->dsa_proc->group_leader == curproc->group_leader)
4653 break;
4654 }
4655
4656 if (dsp != NULL)
4657 zc->zc_cookie = *(dsp->dsa_off);
4658 else
4659 error = SET_ERROR(ENOENT);
4660
4661 mutex_exit(&ds->ds_sendstream_lock);
4662 dsl_dataset_rele(ds, FTAG);
4663 dsl_pool_rele(dp, FTAG);
4664 return (error);
4665 }
4666
4667 static int
4668 zfs_ioc_inject_fault(zfs_cmd_t *zc)
4669 {
4670 int id, error;
4671
4672 error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
4673 &zc->zc_inject_record);
4674
4675 if (error == 0)
4676 zc->zc_guid = (uint64_t)id;
4677
4678 return (error);
4679 }
4680
4681 static int
4682 zfs_ioc_clear_fault(zfs_cmd_t *zc)
4683 {
4684 return (zio_clear_fault((int)zc->zc_guid));
4685 }
4686
4687 static int
4688 zfs_ioc_inject_list_next(zfs_cmd_t *zc)
4689 {
4690 int id = (int)zc->zc_guid;
4691 int error;
4692
4693 error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
4694 &zc->zc_inject_record);
4695
4696 zc->zc_guid = id;
4697
4698 return (error);
4699 }
4700
4701 static int
4702 zfs_ioc_error_log(zfs_cmd_t *zc)
4703 {
4704 spa_t *spa;
4705 int error;
4706 size_t count = (size_t)zc->zc_nvlist_dst_size;
4707
4708 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
4709 return (error);
4710
4711 error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
4712 &count);
4713 if (error == 0)
4714 zc->zc_nvlist_dst_size = count;
4715 else
4716 zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
4717
4718 spa_close(spa, FTAG);
4719
4720 return (error);
4721 }
4722
4723 static int
4724 zfs_ioc_clear(zfs_cmd_t *zc)
4725 {
4726 spa_t *spa;
4727 vdev_t *vd;
4728 int error;
4729
4730 /*
4731 * On zpool clear we also fix up missing slogs
4732 */
4733 mutex_enter(&spa_namespace_lock);
4734 spa = spa_lookup(zc->zc_name);
4735 if (spa == NULL) {
4736 mutex_exit(&spa_namespace_lock);
4737 return (SET_ERROR(EIO));
4738 }
4739 if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
4740 /* we need to let spa_open/spa_load clear the chains */
4741 spa_set_log_state(spa, SPA_LOG_CLEAR);
4742 }
4743 spa->spa_last_open_failed = 0;
4744 mutex_exit(&spa_namespace_lock);
4745
4746 if (zc->zc_cookie & ZPOOL_NO_REWIND) {
4747 error = spa_open(zc->zc_name, &spa, FTAG);
4748 } else {
4749 nvlist_t *policy;
4750 nvlist_t *config = NULL;
4751
4752 if (zc->zc_nvlist_src == 0)
4753 return (SET_ERROR(EINVAL));
4754
4755 if ((error = get_nvlist(zc->zc_nvlist_src,
4756 zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
4757 error = spa_open_rewind(zc->zc_name, &spa, FTAG,
4758 policy, &config);
4759 if (config != NULL) {
4760 int err;
4761
4762 if ((err = put_nvlist(zc, config)) != 0)
4763 error = err;
4764 nvlist_free(config);
4765 }
4766 nvlist_free(policy);
4767 }
4768 }
4769
4770 if (error != 0)
4771 return (error);
4772
4773 spa_vdev_state_enter(spa, SCL_NONE);
4774
4775 if (zc->zc_guid == 0) {
4776 vd = NULL;
4777 } else {
4778 vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
4779 if (vd == NULL) {
4780 (void) spa_vdev_state_exit(spa, NULL, ENODEV);
4781 spa_close(spa, FTAG);
4782 return (SET_ERROR(ENODEV));
4783 }
4784 }
4785
4786 vdev_clear(spa, vd);
4787
4788 (void) spa_vdev_state_exit(spa, NULL, 0);
4789
4790 /*
4791 * Resume any suspended I/Os.
4792 */
4793 if (zio_resume(spa) != 0)
4794 error = SET_ERROR(EIO);
4795
4796 spa_close(spa, FTAG);
4797
4798 return (error);
4799 }
4800
4801 static int
4802 zfs_ioc_pool_reopen(zfs_cmd_t *zc)
4803 {
4804 spa_t *spa;
4805 int error;
4806
4807 error = spa_open(zc->zc_name, &spa, FTAG);
4808 if (error != 0)
4809 return (error);
4810
4811 spa_vdev_state_enter(spa, SCL_NONE);
4812
4813 /*
4814 * If a resilver is already in progress then set the
4815 * spa_scrub_reopen flag to B_TRUE so that we don't restart
4816 * the scan as a side effect of the reopen. Otherwise, let
4817 * vdev_open() decided if a resilver is required.
4818 */
4819 spa->spa_scrub_reopen = dsl_scan_resilvering(spa->spa_dsl_pool);
4820 vdev_reopen(spa->spa_root_vdev);
4821 spa->spa_scrub_reopen = B_FALSE;
4822
4823 (void) spa_vdev_state_exit(spa, NULL, 0);
4824 spa_close(spa, FTAG);
4825 return (0);
4826 }
4827 /*
4828 * inputs:
4829 * zc_name name of filesystem
4830 * zc_value name of origin snapshot
4831 *
4832 * outputs:
4833 * zc_string name of conflicting snapshot, if there is one
4834 */
4835 static int
4836 zfs_ioc_promote(zfs_cmd_t *zc)
4837 {
4838 char *cp;
4839
4840 /*
4841 * We don't need to unmount *all* the origin fs's snapshots, but
4842 * it's easier.
4843 */
4844 cp = strchr(zc->zc_value, '@');
4845 if (cp)
4846 *cp = '\0';
4847 (void) dmu_objset_find(zc->zc_value,
4848 zfs_unmount_snap_cb, NULL, DS_FIND_SNAPSHOTS);
4849 return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
4850 }
4851
4852 /*
4853 * Retrieve a single {user|group}{used|quota}@... property.
4854 *
4855 * inputs:
4856 * zc_name name of filesystem
4857 * zc_objset_type zfs_userquota_prop_t
4858 * zc_value domain name (eg. "S-1-234-567-89")
4859 * zc_guid RID/UID/GID
4860 *
4861 * outputs:
4862 * zc_cookie property value
4863 */
4864 static int
4865 zfs_ioc_userspace_one(zfs_cmd_t *zc)
4866 {
4867 zfsvfs_t *zfsvfs;
4868 int error;
4869
4870 if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
4871 return (SET_ERROR(EINVAL));
4872
4873 error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
4874 if (error != 0)
4875 return (error);
4876
4877 error = zfs_userspace_one(zfsvfs,
4878 zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
4879 zfsvfs_rele(zfsvfs, FTAG);
4880
4881 return (error);
4882 }
4883
4884 /*
4885 * inputs:
4886 * zc_name name of filesystem
4887 * zc_cookie zap cursor
4888 * zc_objset_type zfs_userquota_prop_t
4889 * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
4890 *
4891 * outputs:
4892 * zc_nvlist_dst[_size] data buffer (array of zfs_useracct_t)
4893 * zc_cookie zap cursor
4894 */
4895 static int
4896 zfs_ioc_userspace_many(zfs_cmd_t *zc)
4897 {
4898 zfsvfs_t *zfsvfs;
4899 int bufsize = zc->zc_nvlist_dst_size;
4900 int error;
4901 void *buf;
4902
4903 if (bufsize <= 0)
4904 return (SET_ERROR(ENOMEM));
4905
4906 error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
4907 if (error != 0)
4908 return (error);
4909
4910 buf = vmem_alloc(bufsize, KM_SLEEP);
4911
4912 error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
4913 buf, &zc->zc_nvlist_dst_size);
4914
4915 if (error == 0) {
4916 error = xcopyout(buf,
4917 (void *)(uintptr_t)zc->zc_nvlist_dst,
4918 zc->zc_nvlist_dst_size);
4919 }
4920 vmem_free(buf, bufsize);
4921 zfsvfs_rele(zfsvfs, FTAG);
4922
4923 return (error);
4924 }
4925
4926 /*
4927 * inputs:
4928 * zc_name name of filesystem
4929 *
4930 * outputs:
4931 * none
4932 */
4933 static int
4934 zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
4935 {
4936 objset_t *os;
4937 int error = 0;
4938 zfsvfs_t *zfsvfs;
4939
4940 if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
4941 if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
4942 /*
4943 * If userused is not enabled, it may be because the
4944 * objset needs to be closed & reopened (to grow the
4945 * objset_phys_t). Suspend/resume the fs will do that.
4946 */
4947 dsl_dataset_t *ds;
4948
4949 ds = dmu_objset_ds(zfsvfs->z_os);
4950 error = zfs_suspend_fs(zfsvfs);
4951 if (error == 0) {
4952 dmu_objset_refresh_ownership(zfsvfs->z_os,
4953 zfsvfs);
4954 error = zfs_resume_fs(zfsvfs, ds);
4955 }
4956 }
4957 if (error == 0)
4958 error = dmu_objset_userspace_upgrade(zfsvfs->z_os);
4959 deactivate_super(zfsvfs->z_sb);
4960 } else {
4961 /* XXX kind of reading contents without owning */
4962 error = dmu_objset_hold(zc->zc_name, FTAG, &os);
4963 if (error != 0)
4964 return (error);
4965
4966 error = dmu_objset_userspace_upgrade(os);
4967 dmu_objset_rele(os, FTAG);
4968 }
4969
4970 return (error);
4971 }
4972
4973 /*
4974 * inputs:
4975 * zc_name name of filesystem
4976 *
4977 * outputs:
4978 * none
4979 */
4980 static int
4981 zfs_ioc_userobjspace_upgrade(zfs_cmd_t *zc)
4982 {
4983 objset_t *os;
4984 int error;
4985
4986 error = dmu_objset_hold(zc->zc_name, FTAG, &os);
4987 if (error != 0)
4988 return (error);
4989
4990 dsl_dataset_long_hold(dmu_objset_ds(os), FTAG);
4991 dsl_pool_rele(dmu_objset_pool(os), FTAG);
4992
4993 if (dmu_objset_userobjspace_upgradable(os)) {
4994 mutex_enter(&os->os_upgrade_lock);
4995 if (os->os_upgrade_id == 0) {
4996 /* clear potential error code and retry */
4997 os->os_upgrade_status = 0;
4998 mutex_exit(&os->os_upgrade_lock);
4999
5000 dmu_objset_userobjspace_upgrade(os);
5001 } else {
5002 mutex_exit(&os->os_upgrade_lock);
5003 }
5004
5005 taskq_wait_id(os->os_spa->spa_upgrade_taskq, os->os_upgrade_id);
5006 error = os->os_upgrade_status;
5007 }
5008
5009 dsl_dataset_long_rele(dmu_objset_ds(os), FTAG);
5010 dsl_dataset_rele(dmu_objset_ds(os), FTAG);
5011
5012 return (error);
5013 }
5014
5015 static int
5016 zfs_ioc_share(zfs_cmd_t *zc)
5017 {
5018 return (SET_ERROR(ENOSYS));
5019 }
5020
5021 ace_t full_access[] = {
5022 {(uid_t)-1, ACE_ALL_PERMS, ACE_EVERYONE, 0}
5023 };
5024
5025 /*
5026 * inputs:
5027 * zc_name name of containing filesystem
5028 * zc_obj object # beyond which we want next in-use object #
5029 *
5030 * outputs:
5031 * zc_obj next in-use object #
5032 */
5033 static int
5034 zfs_ioc_next_obj(zfs_cmd_t *zc)
5035 {
5036 objset_t *os = NULL;
5037 int error;
5038
5039 error = dmu_objset_hold(zc->zc_name, FTAG, &os);
5040 if (error != 0)
5041 return (error);
5042
5043 error = dmu_object_next(os, &zc->zc_obj, B_FALSE, 0);
5044
5045 dmu_objset_rele(os, FTAG);
5046 return (error);
5047 }
5048
5049 /*
5050 * inputs:
5051 * zc_name name of filesystem
5052 * zc_value prefix name for snapshot
5053 * zc_cleanup_fd cleanup-on-exit file descriptor for calling process
5054 *
5055 * outputs:
5056 * zc_value short name of new snapshot
5057 */
5058 static int
5059 zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
5060 {
5061 char *snap_name;
5062 char *hold_name;
5063 int error;
5064 minor_t minor;
5065
5066 error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
5067 if (error != 0)
5068 return (error);
5069
5070 snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
5071 (u_longlong_t)ddi_get_lbolt64());
5072 hold_name = kmem_asprintf("%%%s", zc->zc_value);
5073
5074 error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor,
5075 hold_name);
5076 if (error == 0)
5077 (void) strlcpy(zc->zc_value, snap_name,
5078 sizeof (zc->zc_value));
5079 strfree(snap_name);
5080 strfree(hold_name);
5081 zfs_onexit_fd_rele(zc->zc_cleanup_fd);
5082 return (error);
5083 }
5084
5085 /*
5086 * inputs:
5087 * zc_name name of "to" snapshot
5088 * zc_value name of "from" snapshot
5089 * zc_cookie file descriptor to write diff data on
5090 *
5091 * outputs:
5092 * dmu_diff_record_t's to the file descriptor
5093 */
5094 static int
5095 zfs_ioc_diff(zfs_cmd_t *zc)
5096 {
5097 file_t *fp;
5098 offset_t off;
5099 int error;
5100
5101 fp = getf(zc->zc_cookie);
5102 if (fp == NULL)
5103 return (SET_ERROR(EBADF));
5104
5105 off = fp->f_offset;
5106
5107 error = dmu_diff(zc->zc_name, zc->zc_value, fp->f_vnode, &off);
5108
5109 if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
5110 fp->f_offset = off;
5111 releasef(zc->zc_cookie);
5112
5113 return (error);
5114 }
5115
5116 /*
5117 * Remove all ACL files in shares dir
5118 */
5119 #ifdef HAVE_SMB_SHARE
5120 static int
5121 zfs_smb_acl_purge(znode_t *dzp)
5122 {
5123 zap_cursor_t zc;
5124 zap_attribute_t zap;
5125 zfsvfs_t *zfsvfs = ZTOZSB(dzp);
5126 int error;
5127
5128 for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
5129 (error = zap_cursor_retrieve(&zc, &zap)) == 0;
5130 zap_cursor_advance(&zc)) {
5131 if ((error = VOP_REMOVE(ZTOV(dzp), zap.za_name, kcred,
5132 NULL, 0)) != 0)
5133 break;
5134 }
5135 zap_cursor_fini(&zc);
5136 return (error);
5137 }
5138 #endif /* HAVE_SMB_SHARE */
5139
5140 static int
5141 zfs_ioc_smb_acl(zfs_cmd_t *zc)
5142 {
5143 #ifdef HAVE_SMB_SHARE
5144 vnode_t *vp;
5145 znode_t *dzp;
5146 vnode_t *resourcevp = NULL;
5147 znode_t *sharedir;
5148 zfsvfs_t *zfsvfs;
5149 nvlist_t *nvlist;
5150 char *src, *target;
5151 vattr_t vattr;
5152 vsecattr_t vsec;
5153 int error = 0;
5154
5155 if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
5156 NO_FOLLOW, NULL, &vp)) != 0)
5157 return (error);
5158
5159 /* Now make sure mntpnt and dataset are ZFS */
5160
5161 if (vp->v_vfsp->vfs_fstype != zfsfstype ||
5162 (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
5163 zc->zc_name) != 0)) {
5164 VN_RELE(vp);
5165 return (SET_ERROR(EINVAL));
5166 }
5167
5168 dzp = VTOZ(vp);
5169 zfsvfs = ZTOZSB(dzp);
5170 ZFS_ENTER(zfsvfs);
5171
5172 /*
5173 * Create share dir if its missing.
5174 */
5175 mutex_enter(&zfsvfs->z_lock);
5176 if (zfsvfs->z_shares_dir == 0) {
5177 dmu_tx_t *tx;
5178
5179 tx = dmu_tx_create(zfsvfs->z_os);
5180 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE,
5181 ZFS_SHARES_DIR);
5182 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
5183 error = dmu_tx_assign(tx, TXG_WAIT);
5184 if (error != 0) {
5185 dmu_tx_abort(tx);
5186 } else {
5187 error = zfs_create_share_dir(zfsvfs, tx);
5188 dmu_tx_commit(tx);
5189 }
5190 if (error != 0) {
5191 mutex_exit(&zfsvfs->z_lock);
5192 VN_RELE(vp);
5193 ZFS_EXIT(zfsvfs);
5194 return (error);
5195 }
5196 }
5197 mutex_exit(&zfsvfs->z_lock);
5198
5199 ASSERT(zfsvfs->z_shares_dir);
5200 if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &sharedir)) != 0) {
5201 VN_RELE(vp);
5202 ZFS_EXIT(zfsvfs);
5203 return (error);
5204 }
5205
5206 switch (zc->zc_cookie) {
5207 case ZFS_SMB_ACL_ADD:
5208 vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
5209 vattr.va_mode = S_IFREG|0777;
5210 vattr.va_uid = 0;
5211 vattr.va_gid = 0;
5212
5213 vsec.vsa_mask = VSA_ACE;
5214 vsec.vsa_aclentp = &full_access;
5215 vsec.vsa_aclentsz = sizeof (full_access);
5216 vsec.vsa_aclcnt = 1;
5217
5218 error = VOP_CREATE(ZTOV(sharedir), zc->zc_string,
5219 &vattr, EXCL, 0, &resourcevp, kcred, 0, NULL, &vsec);
5220 if (resourcevp)
5221 VN_RELE(resourcevp);
5222 break;
5223
5224 case ZFS_SMB_ACL_REMOVE:
5225 error = VOP_REMOVE(ZTOV(sharedir), zc->zc_string, kcred,
5226 NULL, 0);
5227 break;
5228
5229 case ZFS_SMB_ACL_RENAME:
5230 if ((error = get_nvlist(zc->zc_nvlist_src,
5231 zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) {
5232 VN_RELE(vp);
5233 VN_RELE(ZTOV(sharedir));
5234 ZFS_EXIT(zfsvfs);
5235 return (error);
5236 }
5237 if (nvlist_lookup_string(nvlist, ZFS_SMB_ACL_SRC, &src) ||
5238 nvlist_lookup_string(nvlist, ZFS_SMB_ACL_TARGET,
5239 &target)) {
5240 VN_RELE(vp);
5241 VN_RELE(ZTOV(sharedir));
5242 ZFS_EXIT(zfsvfs);
5243 nvlist_free(nvlist);
5244 return (error);
5245 }
5246 error = VOP_RENAME(ZTOV(sharedir), src, ZTOV(sharedir), target,
5247 kcred, NULL, 0);
5248 nvlist_free(nvlist);
5249 break;
5250
5251 case ZFS_SMB_ACL_PURGE:
5252 error = zfs_smb_acl_purge(sharedir);
5253 break;
5254
5255 default:
5256 error = SET_ERROR(EINVAL);
5257 break;
5258 }
5259
5260 VN_RELE(vp);
5261 VN_RELE(ZTOV(sharedir));
5262
5263 ZFS_EXIT(zfsvfs);
5264
5265 return (error);
5266 #else
5267 return (SET_ERROR(ENOTSUP));
5268 #endif /* HAVE_SMB_SHARE */
5269 }
5270
5271 /*
5272 * innvl: {
5273 * "holds" -> { snapname -> holdname (string), ... }
5274 * (optional) "cleanup_fd" -> fd (int32)
5275 * }
5276 *
5277 * outnvl: {
5278 * snapname -> error value (int32)
5279 * ...
5280 * }
5281 */
5282 /* ARGSUSED */
5283 static int
5284 zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
5285 {
5286 nvpair_t *pair;
5287 nvlist_t *holds;
5288 int cleanup_fd = -1;
5289 int error;
5290 minor_t minor = 0;
5291
5292 error = nvlist_lookup_nvlist(args, "holds", &holds);
5293 if (error != 0)
5294 return (SET_ERROR(EINVAL));
5295
5296 /* make sure the user didn't pass us any invalid (empty) tags */
5297 for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
5298 pair = nvlist_next_nvpair(holds, pair)) {
5299 char *htag;
5300
5301 error = nvpair_value_string(pair, &htag);
5302 if (error != 0)
5303 return (SET_ERROR(error));
5304
5305 if (strlen(htag) == 0)
5306 return (SET_ERROR(EINVAL));
5307 }
5308
5309 if (nvlist_lookup_int32(args, "cleanup_fd", &cleanup_fd) == 0) {
5310 error = zfs_onexit_fd_hold(cleanup_fd, &minor);
5311 if (error != 0)
5312 return (error);
5313 }
5314
5315 error = dsl_dataset_user_hold(holds, minor, errlist);
5316 if (minor != 0)
5317 zfs_onexit_fd_rele(cleanup_fd);
5318 return (error);
5319 }
5320
5321 /*
5322 * innvl is not used.
5323 *
5324 * outnvl: {
5325 * holdname -> time added (uint64 seconds since epoch)
5326 * ...
5327 * }
5328 */
5329 /* ARGSUSED */
5330 static int
5331 zfs_ioc_get_holds(const char *snapname, nvlist_t *args, nvlist_t *outnvl)
5332 {
5333 return (dsl_dataset_get_holds(snapname, outnvl));
5334 }
5335
5336 /*
5337 * innvl: {
5338 * snapname -> { holdname, ... }
5339 * ...
5340 * }
5341 *
5342 * outnvl: {
5343 * snapname -> error value (int32)
5344 * ...
5345 * }
5346 */
5347 /* ARGSUSED */
5348 static int
5349 zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist)
5350 {
5351 return (dsl_dataset_user_release(holds, errlist));
5352 }
5353
5354 /*
5355 * inputs:
5356 * zc_guid flags (ZEVENT_NONBLOCK)
5357 * zc_cleanup_fd zevent file descriptor
5358 *
5359 * outputs:
5360 * zc_nvlist_dst next nvlist event
5361 * zc_cookie dropped events since last get
5362 */
5363 static int
5364 zfs_ioc_events_next(zfs_cmd_t *zc)
5365 {
5366 zfs_zevent_t *ze;
5367 nvlist_t *event = NULL;
5368 minor_t minor;
5369 uint64_t dropped = 0;
5370 int error;
5371
5372 error = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
5373 if (error != 0)
5374 return (error);
5375
5376 do {
5377 error = zfs_zevent_next(ze, &event,
5378 &zc->zc_nvlist_dst_size, &dropped);
5379 if (event != NULL) {
5380 zc->zc_cookie = dropped;
5381 error = put_nvlist(zc, event);
5382 nvlist_free(event);
5383 }
5384
5385 if (zc->zc_guid & ZEVENT_NONBLOCK)
5386 break;
5387
5388 if ((error == 0) || (error != ENOENT))
5389 break;
5390
5391 error = zfs_zevent_wait(ze);
5392 if (error != 0)
5393 break;
5394 } while (1);
5395
5396 zfs_zevent_fd_rele(zc->zc_cleanup_fd);
5397
5398 return (error);
5399 }
5400
5401 /*
5402 * outputs:
5403 * zc_cookie cleared events count
5404 */
5405 static int
5406 zfs_ioc_events_clear(zfs_cmd_t *zc)
5407 {
5408 int count;
5409
5410 zfs_zevent_drain_all(&count);
5411 zc->zc_cookie = count;
5412
5413 return (0);
5414 }
5415
5416 /*
5417 * inputs:
5418 * zc_guid eid | ZEVENT_SEEK_START | ZEVENT_SEEK_END
5419 * zc_cleanup zevent file descriptor
5420 */
5421 static int
5422 zfs_ioc_events_seek(zfs_cmd_t *zc)
5423 {
5424 zfs_zevent_t *ze;
5425 minor_t minor;
5426 int error;
5427
5428 error = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
5429 if (error != 0)
5430 return (error);
5431
5432 error = zfs_zevent_seek(ze, zc->zc_guid);
5433 zfs_zevent_fd_rele(zc->zc_cleanup_fd);
5434
5435 return (error);
5436 }
5437
5438 /*
5439 * inputs:
5440 * zc_name name of new filesystem or snapshot
5441 * zc_value full name of old snapshot
5442 *
5443 * outputs:
5444 * zc_cookie space in bytes
5445 * zc_objset_type compressed space in bytes
5446 * zc_perm_action uncompressed space in bytes
5447 */
5448 static int
5449 zfs_ioc_space_written(zfs_cmd_t *zc)
5450 {
5451 int error;
5452 dsl_pool_t *dp;
5453 dsl_dataset_t *new, *old;
5454
5455 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5456 if (error != 0)
5457 return (error);
5458 error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &new);
5459 if (error != 0) {
5460 dsl_pool_rele(dp, FTAG);
5461 return (error);
5462 }
5463 error = dsl_dataset_hold(dp, zc->zc_value, FTAG, &old);
5464 if (error != 0) {
5465 dsl_dataset_rele(new, FTAG);
5466 dsl_pool_rele(dp, FTAG);
5467 return (error);
5468 }
5469
5470 error = dsl_dataset_space_written(old, new, &zc->zc_cookie,
5471 &zc->zc_objset_type, &zc->zc_perm_action);
5472 dsl_dataset_rele(old, FTAG);
5473 dsl_dataset_rele(new, FTAG);
5474 dsl_pool_rele(dp, FTAG);
5475 return (error);
5476 }
5477
5478 /*
5479 * innvl: {
5480 * "firstsnap" -> snapshot name
5481 * }
5482 *
5483 * outnvl: {
5484 * "used" -> space in bytes
5485 * "compressed" -> compressed space in bytes
5486 * "uncompressed" -> uncompressed space in bytes
5487 * }
5488 */
5489 static int
5490 zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
5491 {
5492 int error;
5493 dsl_pool_t *dp;
5494 dsl_dataset_t *new, *old;
5495 char *firstsnap;
5496 uint64_t used, comp, uncomp;
5497
5498 if (nvlist_lookup_string(innvl, "firstsnap", &firstsnap) != 0)
5499 return (SET_ERROR(EINVAL));
5500
5501 error = dsl_pool_hold(lastsnap, FTAG, &dp);
5502 if (error != 0)
5503 return (error);
5504
5505 error = dsl_dataset_hold(dp, lastsnap, FTAG, &new);
5506 if (error == 0 && !new->ds_is_snapshot) {
5507 dsl_dataset_rele(new, FTAG);
5508 error = SET_ERROR(EINVAL);
5509 }
5510 if (error != 0) {
5511 dsl_pool_rele(dp, FTAG);
5512 return (error);
5513 }
5514 error = dsl_dataset_hold(dp, firstsnap, FTAG, &old);
5515 if (error == 0 && !old->ds_is_snapshot) {
5516 dsl_dataset_rele(old, FTAG);
5517 error = SET_ERROR(EINVAL);
5518 }
5519 if (error != 0) {
5520 dsl_dataset_rele(new, FTAG);
5521 dsl_pool_rele(dp, FTAG);
5522 return (error);
5523 }
5524
5525 error = dsl_dataset_space_wouldfree(old, new, &used, &comp, &uncomp);
5526 dsl_dataset_rele(old, FTAG);
5527 dsl_dataset_rele(new, FTAG);
5528 dsl_pool_rele(dp, FTAG);
5529 fnvlist_add_uint64(outnvl, "used", used);
5530 fnvlist_add_uint64(outnvl, "compressed", comp);
5531 fnvlist_add_uint64(outnvl, "uncompressed", uncomp);
5532 return (error);
5533 }
5534
5535 /*
5536 * innvl: {
5537 * "fd" -> file descriptor to write stream to (int32)
5538 * (optional) "fromsnap" -> full snap name to send an incremental from
5539 * (optional) "largeblockok" -> (value ignored)
5540 * indicates that blocks > 128KB are permitted
5541 * (optional) "embedok" -> (value ignored)
5542 * presence indicates DRR_WRITE_EMBEDDED records are permitted
5543 * (optional) "compressok" -> (value ignored)
5544 * presence indicates compressed DRR_WRITE records are permitted
5545 * (optional) "resume_object" and "resume_offset" -> (uint64)
5546 * if present, resume send stream from specified object and offset.
5547 * }
5548 *
5549 * outnvl is unused
5550 */
5551 /* ARGSUSED */
5552 static int
5553 zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
5554 {
5555 int error;
5556 offset_t off;
5557 char *fromname = NULL;
5558 int fd;
5559 file_t *fp;
5560 boolean_t largeblockok;
5561 boolean_t embedok;
5562 boolean_t compressok;
5563 uint64_t resumeobj = 0;
5564 uint64_t resumeoff = 0;
5565
5566 error = nvlist_lookup_int32(innvl, "fd", &fd);
5567 if (error != 0)
5568 return (SET_ERROR(EINVAL));
5569
5570 (void) nvlist_lookup_string(innvl, "fromsnap", &fromname);
5571
5572 largeblockok = nvlist_exists(innvl, "largeblockok");
5573 embedok = nvlist_exists(innvl, "embedok");
5574 compressok = nvlist_exists(innvl, "compressok");
5575
5576 (void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
5577 (void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
5578
5579 if ((fp = getf(fd)) == NULL)
5580 return (SET_ERROR(EBADF));
5581
5582 off = fp->f_offset;
5583 error = dmu_send(snapname, fromname, embedok, largeblockok, compressok,
5584 fd, resumeobj, resumeoff, fp->f_vnode, &off);
5585
5586 if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
5587 fp->f_offset = off;
5588
5589 releasef(fd);
5590 return (error);
5591 }
5592
5593 /*
5594 * Determine approximately how large a zfs send stream will be -- the number
5595 * of bytes that will be written to the fd supplied to zfs_ioc_send_new().
5596 *
5597 * innvl: {
5598 * (optional) "from" -> full snap or bookmark name to send an incremental
5599 * from
5600 * (optional) "largeblockok" -> (value ignored)
5601 * indicates that blocks > 128KB are permitted
5602 * (optional) "embedok" -> (value ignored)
5603 * presence indicates DRR_WRITE_EMBEDDED records are permitted
5604 * (optional) "compressok" -> (value ignored)
5605 * presence indicates compressed DRR_WRITE records are permitted
5606 * }
5607 *
5608 * outnvl: {
5609 * "space" -> bytes of space (uint64)
5610 * }
5611 */
5612 static int
5613 zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
5614 {
5615 dsl_pool_t *dp;
5616 dsl_dataset_t *tosnap;
5617 int error;
5618 char *fromname;
5619 /* LINTED E_FUNC_SET_NOT_USED */
5620 boolean_t largeblockok;
5621 /* LINTED E_FUNC_SET_NOT_USED */
5622 boolean_t embedok;
5623 boolean_t compressok;
5624 uint64_t space;
5625
5626 error = dsl_pool_hold(snapname, FTAG, &dp);
5627 if (error != 0)
5628 return (error);
5629
5630 error = dsl_dataset_hold(dp, snapname, FTAG, &tosnap);
5631 if (error != 0) {
5632 dsl_pool_rele(dp, FTAG);
5633 return (error);
5634 }
5635
5636 largeblockok = nvlist_exists(innvl, "largeblockok");
5637 embedok = nvlist_exists(innvl, "embedok");
5638 compressok = nvlist_exists(innvl, "compressok");
5639
5640 error = nvlist_lookup_string(innvl, "from", &fromname);
5641 if (error == 0) {
5642 if (strchr(fromname, '@') != NULL) {
5643 /*
5644 * If from is a snapshot, hold it and use the more
5645 * efficient dmu_send_estimate to estimate send space
5646 * size using deadlists.
5647 */
5648 dsl_dataset_t *fromsnap;
5649 error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
5650 if (error != 0)
5651 goto out;
5652 error = dmu_send_estimate(tosnap, fromsnap, compressok,
5653 &space);
5654 dsl_dataset_rele(fromsnap, FTAG);
5655 } else if (strchr(fromname, '#') != NULL) {
5656 /*
5657 * If from is a bookmark, fetch the creation TXG of the
5658 * snapshot it was created from and use that to find
5659 * blocks that were born after it.
5660 */
5661 zfs_bookmark_phys_t frombm;
5662
5663 error = dsl_bookmark_lookup(dp, fromname, tosnap,
5664 &frombm);
5665 if (error != 0)
5666 goto out;
5667 error = dmu_send_estimate_from_txg(tosnap,
5668 frombm.zbm_creation_txg, compressok, &space);
5669 } else {
5670 /*
5671 * from is not properly formatted as a snapshot or
5672 * bookmark
5673 */
5674 error = SET_ERROR(EINVAL);
5675 goto out;
5676 }
5677 } else {
5678 // If estimating the size of a full send, use dmu_send_estimate
5679 error = dmu_send_estimate(tosnap, NULL, compressok, &space);
5680 }
5681
5682 fnvlist_add_uint64(outnvl, "space", space);
5683
5684 out:
5685 dsl_dataset_rele(tosnap, FTAG);
5686 dsl_pool_rele(dp, FTAG);
5687 return (error);
5688 }
5689
5690 static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
5691
5692 static void
5693 zfs_ioctl_register_legacy(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5694 zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
5695 boolean_t log_history, zfs_ioc_poolcheck_t pool_check)
5696 {
5697 zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
5698
5699 ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
5700 ASSERT3U(ioc, <, ZFS_IOC_LAST);
5701 ASSERT3P(vec->zvec_legacy_func, ==, NULL);
5702 ASSERT3P(vec->zvec_func, ==, NULL);
5703
5704 vec->zvec_legacy_func = func;
5705 vec->zvec_secpolicy = secpolicy;
5706 vec->zvec_namecheck = namecheck;
5707 vec->zvec_allow_log = log_history;
5708 vec->zvec_pool_check = pool_check;
5709 }
5710
5711 /*
5712 * See the block comment at the beginning of this file for details on
5713 * each argument to this function.
5714 */
5715 static void
5716 zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func,
5717 zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
5718 zfs_ioc_poolcheck_t pool_check, boolean_t smush_outnvlist,
5719 boolean_t allow_log)
5720 {
5721 zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
5722
5723 ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
5724 ASSERT3U(ioc, <, ZFS_IOC_LAST);
5725 ASSERT3P(vec->zvec_legacy_func, ==, NULL);
5726 ASSERT3P(vec->zvec_func, ==, NULL);
5727
5728 /* if we are logging, the name must be valid */
5729 ASSERT(!allow_log || namecheck != NO_NAME);
5730
5731 vec->zvec_name = name;
5732 vec->zvec_func = func;
5733 vec->zvec_secpolicy = secpolicy;
5734 vec->zvec_namecheck = namecheck;
5735 vec->zvec_pool_check = pool_check;
5736 vec->zvec_smush_outnvlist = smush_outnvlist;
5737 vec->zvec_allow_log = allow_log;
5738 }
5739
5740 static void
5741 zfs_ioctl_register_pool(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5742 zfs_secpolicy_func_t *secpolicy, boolean_t log_history,
5743 zfs_ioc_poolcheck_t pool_check)
5744 {
5745 zfs_ioctl_register_legacy(ioc, func, secpolicy,
5746 POOL_NAME, log_history, pool_check);
5747 }
5748
5749 static void
5750 zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5751 zfs_secpolicy_func_t *secpolicy, zfs_ioc_poolcheck_t pool_check)
5752 {
5753 zfs_ioctl_register_legacy(ioc, func, secpolicy,
5754 DATASET_NAME, B_FALSE, pool_check);
5755 }
5756
5757 static void
5758 zfs_ioctl_register_pool_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
5759 {
5760 zfs_ioctl_register_legacy(ioc, func, zfs_secpolicy_config,
5761 POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
5762 }
5763
5764 static void
5765 zfs_ioctl_register_pool_meta(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5766 zfs_secpolicy_func_t *secpolicy)
5767 {
5768 zfs_ioctl_register_legacy(ioc, func, secpolicy,
5769 NO_NAME, B_FALSE, POOL_CHECK_NONE);
5770 }
5771
5772 static void
5773 zfs_ioctl_register_dataset_read_secpolicy(zfs_ioc_t ioc,
5774 zfs_ioc_legacy_func_t *func, zfs_secpolicy_func_t *secpolicy)
5775 {
5776 zfs_ioctl_register_legacy(ioc, func, secpolicy,
5777 DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED);
5778 }
5779
5780 static void
5781 zfs_ioctl_register_dataset_read(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
5782 {
5783 zfs_ioctl_register_dataset_read_secpolicy(ioc, func,
5784 zfs_secpolicy_read);
5785 }
5786
5787 static void
5788 zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5789 zfs_secpolicy_func_t *secpolicy)
5790 {
5791 zfs_ioctl_register_legacy(ioc, func, secpolicy,
5792 DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
5793 }
5794
5795 static void
5796 zfs_ioctl_init(void)
5797 {
5798 zfs_ioctl_register("snapshot", ZFS_IOC_SNAPSHOT,
5799 zfs_ioc_snapshot, zfs_secpolicy_snapshot, POOL_NAME,
5800 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5801
5802 zfs_ioctl_register("log_history", ZFS_IOC_LOG_HISTORY,
5803 zfs_ioc_log_history, zfs_secpolicy_log_history, NO_NAME,
5804 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE);
5805
5806 zfs_ioctl_register("space_snaps", ZFS_IOC_SPACE_SNAPS,
5807 zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME,
5808 POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5809
5810 zfs_ioctl_register("send", ZFS_IOC_SEND_NEW,
5811 zfs_ioc_send_new, zfs_secpolicy_send_new, DATASET_NAME,
5812 POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5813
5814 zfs_ioctl_register("send_space", ZFS_IOC_SEND_SPACE,
5815 zfs_ioc_send_space, zfs_secpolicy_read, DATASET_NAME,
5816 POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5817
5818 zfs_ioctl_register("create", ZFS_IOC_CREATE,
5819 zfs_ioc_create, zfs_secpolicy_create_clone, DATASET_NAME,
5820 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5821
5822 zfs_ioctl_register("clone", ZFS_IOC_CLONE,
5823 zfs_ioc_clone, zfs_secpolicy_create_clone, DATASET_NAME,
5824 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5825
5826 zfs_ioctl_register("destroy_snaps", ZFS_IOC_DESTROY_SNAPS,
5827 zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, POOL_NAME,
5828 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5829
5830 zfs_ioctl_register("hold", ZFS_IOC_HOLD,
5831 zfs_ioc_hold, zfs_secpolicy_hold, POOL_NAME,
5832 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5833 zfs_ioctl_register("release", ZFS_IOC_RELEASE,
5834 zfs_ioc_release, zfs_secpolicy_release, POOL_NAME,
5835 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5836
5837 zfs_ioctl_register("get_holds", ZFS_IOC_GET_HOLDS,
5838 zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME,
5839 POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5840
5841 zfs_ioctl_register("rollback", ZFS_IOC_ROLLBACK,
5842 zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME,
5843 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE);
5844
5845 zfs_ioctl_register("bookmark", ZFS_IOC_BOOKMARK,
5846 zfs_ioc_bookmark, zfs_secpolicy_bookmark, POOL_NAME,
5847 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5848
5849 zfs_ioctl_register("get_bookmarks", ZFS_IOC_GET_BOOKMARKS,
5850 zfs_ioc_get_bookmarks, zfs_secpolicy_read, DATASET_NAME,
5851 POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5852
5853 zfs_ioctl_register("destroy_bookmarks", ZFS_IOC_DESTROY_BOOKMARKS,
5854 zfs_ioc_destroy_bookmarks, zfs_secpolicy_destroy_bookmarks,
5855 POOL_NAME,
5856 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5857
5858 zfs_ioctl_register("receive", ZFS_IOC_RECV_NEW,
5859 zfs_ioc_recv_new, zfs_secpolicy_recv_new, DATASET_NAME,
5860 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5861
5862 /* IOCTLS that use the legacy function signature */
5863
5864 zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
5865 zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_READONLY);
5866
5867 zfs_ioctl_register_pool(ZFS_IOC_POOL_CREATE, zfs_ioc_pool_create,
5868 zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
5869 zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SCAN,
5870 zfs_ioc_pool_scan);
5871 zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_UPGRADE,
5872 zfs_ioc_pool_upgrade);
5873 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ADD,
5874 zfs_ioc_vdev_add);
5875 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_REMOVE,
5876 zfs_ioc_vdev_remove);
5877 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SET_STATE,
5878 zfs_ioc_vdev_set_state);
5879 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ATTACH,
5880 zfs_ioc_vdev_attach);
5881 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_DETACH,
5882 zfs_ioc_vdev_detach);
5883 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETPATH,
5884 zfs_ioc_vdev_setpath);
5885 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETFRU,
5886 zfs_ioc_vdev_setfru);
5887 zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SET_PROPS,
5888 zfs_ioc_pool_set_props);
5889 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SPLIT,
5890 zfs_ioc_vdev_split);
5891 zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_REGUID,
5892 zfs_ioc_pool_reguid);
5893
5894 zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_CONFIGS,
5895 zfs_ioc_pool_configs, zfs_secpolicy_none);
5896 zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_TRYIMPORT,
5897 zfs_ioc_pool_tryimport, zfs_secpolicy_config);
5898 zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_FAULT,
5899 zfs_ioc_inject_fault, zfs_secpolicy_inject);
5900 zfs_ioctl_register_pool_meta(ZFS_IOC_CLEAR_FAULT,
5901 zfs_ioc_clear_fault, zfs_secpolicy_inject);
5902 zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_LIST_NEXT,
5903 zfs_ioc_inject_list_next, zfs_secpolicy_inject);
5904
5905 /*
5906 * pool destroy, and export don't log the history as part of
5907 * zfsdev_ioctl, but rather zfs_ioc_pool_export
5908 * does the logging of those commands.
5909 */
5910 zfs_ioctl_register_pool(ZFS_IOC_POOL_DESTROY, zfs_ioc_pool_destroy,
5911 zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
5912 zfs_ioctl_register_pool(ZFS_IOC_POOL_EXPORT, zfs_ioc_pool_export,
5913 zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
5914
5915 zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats,
5916 zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
5917 zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_PROPS, zfs_ioc_pool_get_props,
5918 zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
5919
5920 zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log,
5921 zfs_secpolicy_inject, B_FALSE, POOL_CHECK_SUSPENDED);
5922 zfs_ioctl_register_pool(ZFS_IOC_DSOBJ_TO_DSNAME,
5923 zfs_ioc_dsobj_to_dsname,
5924 zfs_secpolicy_diff, B_FALSE, POOL_CHECK_SUSPENDED);
5925 zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_HISTORY,
5926 zfs_ioc_pool_get_history,
5927 zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
5928
5929 zfs_ioctl_register_pool(ZFS_IOC_POOL_IMPORT, zfs_ioc_pool_import,
5930 zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
5931
5932 zfs_ioctl_register_pool(ZFS_IOC_CLEAR, zfs_ioc_clear,
5933 zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
5934 zfs_ioctl_register_pool(ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen,
5935 zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
5936
5937 zfs_ioctl_register_dataset_read(ZFS_IOC_SPACE_WRITTEN,
5938 zfs_ioc_space_written);
5939 zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_RECVD_PROPS,
5940 zfs_ioc_objset_recvd_props);
5941 zfs_ioctl_register_dataset_read(ZFS_IOC_NEXT_OBJ,
5942 zfs_ioc_next_obj);
5943 zfs_ioctl_register_dataset_read(ZFS_IOC_GET_FSACL,
5944 zfs_ioc_get_fsacl);
5945 zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_STATS,
5946 zfs_ioc_objset_stats);
5947 zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_ZPLPROPS,
5948 zfs_ioc_objset_zplprops);
5949 zfs_ioctl_register_dataset_read(ZFS_IOC_DATASET_LIST_NEXT,
5950 zfs_ioc_dataset_list_next);
5951 zfs_ioctl_register_dataset_read(ZFS_IOC_SNAPSHOT_LIST_NEXT,
5952 zfs_ioc_snapshot_list_next);
5953 zfs_ioctl_register_dataset_read(ZFS_IOC_SEND_PROGRESS,
5954 zfs_ioc_send_progress);
5955
5956 zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_DIFF,
5957 zfs_ioc_diff, zfs_secpolicy_diff);
5958 zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_STATS,
5959 zfs_ioc_obj_to_stats, zfs_secpolicy_diff);
5960 zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_PATH,
5961 zfs_ioc_obj_to_path, zfs_secpolicy_diff);
5962 zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_ONE,
5963 zfs_ioc_userspace_one, zfs_secpolicy_userspace_one);
5964 zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_MANY,
5965 zfs_ioc_userspace_many, zfs_secpolicy_userspace_many);
5966 zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_SEND,
5967 zfs_ioc_send, zfs_secpolicy_send);
5968
5969 zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_PROP, zfs_ioc_set_prop,
5970 zfs_secpolicy_none);
5971 zfs_ioctl_register_dataset_modify(ZFS_IOC_DESTROY, zfs_ioc_destroy,
5972 zfs_secpolicy_destroy);
5973 zfs_ioctl_register_dataset_modify(ZFS_IOC_RENAME, zfs_ioc_rename,
5974 zfs_secpolicy_rename);
5975 zfs_ioctl_register_dataset_modify(ZFS_IOC_RECV, zfs_ioc_recv,
5976 zfs_secpolicy_recv);
5977 zfs_ioctl_register_dataset_modify(ZFS_IOC_PROMOTE, zfs_ioc_promote,
5978 zfs_secpolicy_promote);
5979 zfs_ioctl_register_dataset_modify(ZFS_IOC_INHERIT_PROP,
5980 zfs_ioc_inherit_prop, zfs_secpolicy_inherit_prop);
5981 zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_FSACL, zfs_ioc_set_fsacl,
5982 zfs_secpolicy_set_fsacl);
5983
5984 zfs_ioctl_register_dataset_nolog(ZFS_IOC_SHARE, zfs_ioc_share,
5985 zfs_secpolicy_share, POOL_CHECK_NONE);
5986 zfs_ioctl_register_dataset_nolog(ZFS_IOC_SMB_ACL, zfs_ioc_smb_acl,
5987 zfs_secpolicy_smb_acl, POOL_CHECK_NONE);
5988 zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERSPACE_UPGRADE,
5989 zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
5990 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
5991 zfs_ioctl_register_dataset_nolog(ZFS_IOC_TMP_SNAPSHOT,
5992 zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot,
5993 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
5994
5995 /*
5996 * ZoL functions
5997 */
5998 zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_NEXT, zfs_ioc_events_next,
5999 zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
6000 zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_CLEAR, zfs_ioc_events_clear,
6001 zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
6002 zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_SEEK, zfs_ioc_events_seek,
6003 zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
6004 }
6005
6006 int
6007 pool_status_check(const char *name, zfs_ioc_namecheck_t type,
6008 zfs_ioc_poolcheck_t check)
6009 {
6010 spa_t *spa;
6011 int error;
6012
6013 ASSERT(type == POOL_NAME || type == DATASET_NAME);
6014
6015 if (check & POOL_CHECK_NONE)
6016 return (0);
6017
6018 error = spa_open(name, &spa, FTAG);
6019 if (error == 0) {
6020 if ((check & POOL_CHECK_SUSPENDED) && spa_suspended(spa))
6021 error = SET_ERROR(EAGAIN);
6022 else if ((check & POOL_CHECK_READONLY) && !spa_writeable(spa))
6023 error = SET_ERROR(EROFS);
6024 spa_close(spa, FTAG);
6025 }
6026 return (error);
6027 }
6028
6029 static void *
6030 zfsdev_get_state_impl(minor_t minor, enum zfsdev_state_type which)
6031 {
6032 zfsdev_state_t *zs;
6033
6034 for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
6035 if (zs->zs_minor == minor) {
6036 smp_rmb();
6037 switch (which) {
6038 case ZST_ONEXIT:
6039 return (zs->zs_onexit);
6040 case ZST_ZEVENT:
6041 return (zs->zs_zevent);
6042 case ZST_ALL:
6043 return (zs);
6044 }
6045 }
6046 }
6047
6048 return (NULL);
6049 }
6050
6051 void *
6052 zfsdev_get_state(minor_t minor, enum zfsdev_state_type which)
6053 {
6054 void *ptr;
6055
6056 ptr = zfsdev_get_state_impl(minor, which);
6057
6058 return (ptr);
6059 }
6060
6061 int
6062 zfsdev_getminor(struct file *filp, minor_t *minorp)
6063 {
6064 zfsdev_state_t *zs, *fpd;
6065
6066 ASSERT(filp != NULL);
6067 ASSERT(!MUTEX_HELD(&zfsdev_state_lock));
6068
6069 fpd = filp->private_data;
6070 if (fpd == NULL)
6071 return (EBADF);
6072
6073 mutex_enter(&zfsdev_state_lock);
6074
6075 for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
6076
6077 if (zs->zs_minor == -1)
6078 continue;
6079
6080 if (fpd == zs) {
6081 *minorp = fpd->zs_minor;
6082 mutex_exit(&zfsdev_state_lock);
6083 return (0);
6084 }
6085 }
6086
6087 mutex_exit(&zfsdev_state_lock);
6088
6089 return (EBADF);
6090 }
6091
6092 /*
6093 * Find a free minor number. The zfsdev_state_list is expected to
6094 * be short since it is only a list of currently open file handles.
6095 */
6096 minor_t
6097 zfsdev_minor_alloc(void)
6098 {
6099 static minor_t last_minor = 0;
6100 minor_t m;
6101
6102 ASSERT(MUTEX_HELD(&zfsdev_state_lock));
6103
6104 for (m = last_minor + 1; m != last_minor; m++) {
6105 if (m > ZFSDEV_MAX_MINOR)
6106 m = 1;
6107 if (zfsdev_get_state_impl(m, ZST_ALL) == NULL) {
6108 last_minor = m;
6109 return (m);
6110 }
6111 }
6112
6113 return (0);
6114 }
6115
6116 static int
6117 zfsdev_state_init(struct file *filp)
6118 {
6119 zfsdev_state_t *zs, *zsprev = NULL;
6120 minor_t minor;
6121 boolean_t newzs = B_FALSE;
6122
6123 ASSERT(MUTEX_HELD(&zfsdev_state_lock));
6124
6125 minor = zfsdev_minor_alloc();
6126 if (minor == 0)
6127 return (SET_ERROR(ENXIO));
6128
6129 for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
6130 if (zs->zs_minor == -1)
6131 break;
6132 zsprev = zs;
6133 }
6134
6135 if (!zs) {
6136 zs = kmem_zalloc(sizeof (zfsdev_state_t), KM_SLEEP);
6137 newzs = B_TRUE;
6138 }
6139
6140 zs->zs_file = filp;
6141 filp->private_data = zs;
6142
6143 zfs_onexit_init((zfs_onexit_t **)&zs->zs_onexit);
6144 zfs_zevent_init((zfs_zevent_t **)&zs->zs_zevent);
6145
6146
6147 /*
6148 * In order to provide for lock-free concurrent read access
6149 * to the minor list in zfsdev_get_state_impl(), new entries
6150 * must be completely written before linking them into the
6151 * list whereas existing entries are already linked; the last
6152 * operation must be updating zs_minor (from -1 to the new
6153 * value).
6154 */
6155 if (newzs) {
6156 zs->zs_minor = minor;
6157 smp_wmb();
6158 zsprev->zs_next = zs;
6159 } else {
6160 smp_wmb();
6161 zs->zs_minor = minor;
6162 }
6163
6164 return (0);
6165 }
6166
6167 static int
6168 zfsdev_state_destroy(struct file *filp)
6169 {
6170 zfsdev_state_t *zs;
6171
6172 ASSERT(MUTEX_HELD(&zfsdev_state_lock));
6173 ASSERT(filp->private_data != NULL);
6174
6175 zs = filp->private_data;
6176 zs->zs_minor = -1;
6177 zfs_onexit_destroy(zs->zs_onexit);
6178 zfs_zevent_destroy(zs->zs_zevent);
6179
6180 return (0);
6181 }
6182
6183 static int
6184 zfsdev_open(struct inode *ino, struct file *filp)
6185 {
6186 int error;
6187
6188 mutex_enter(&zfsdev_state_lock);
6189 error = zfsdev_state_init(filp);
6190 mutex_exit(&zfsdev_state_lock);
6191
6192 return (-error);
6193 }
6194
6195 static int
6196 zfsdev_release(struct inode *ino, struct file *filp)
6197 {
6198 int error;
6199
6200 mutex_enter(&zfsdev_state_lock);
6201 error = zfsdev_state_destroy(filp);
6202 mutex_exit(&zfsdev_state_lock);
6203
6204 return (-error);
6205 }
6206
6207 static long
6208 zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
6209 {
6210 zfs_cmd_t *zc;
6211 uint_t vecnum;
6212 int error, rc, flag = 0;
6213 const zfs_ioc_vec_t *vec;
6214 char *saved_poolname = NULL;
6215 nvlist_t *innvl = NULL;
6216 fstrans_cookie_t cookie;
6217
6218 vecnum = cmd - ZFS_IOC_FIRST;
6219 if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
6220 return (-SET_ERROR(EINVAL));
6221 vec = &zfs_ioc_vec[vecnum];
6222
6223 /*
6224 * The registered ioctl list may be sparse, verify that either
6225 * a normal or legacy handler are registered.
6226 */
6227 if (vec->zvec_func == NULL && vec->zvec_legacy_func == NULL)
6228 return (-SET_ERROR(EINVAL));
6229
6230 zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
6231
6232 error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag);
6233 if (error != 0) {
6234 error = SET_ERROR(EFAULT);
6235 goto out;
6236 }
6237
6238 zc->zc_iflags = flag & FKIOCTL;
6239 if (zc->zc_nvlist_src_size > MAX_NVLIST_SRC_SIZE) {
6240 /*
6241 * Make sure the user doesn't pass in an insane value for
6242 * zc_nvlist_src_size. We have to check, since we will end
6243 * up allocating that much memory inside of get_nvlist(). This
6244 * prevents a nefarious user from allocating tons of kernel
6245 * memory.
6246 *
6247 * Also, we return EINVAL instead of ENOMEM here. The reason
6248 * being that returning ENOMEM from an ioctl() has a special
6249 * connotation; that the user's size value is too small and
6250 * needs to be expanded to hold the nvlist. See
6251 * zcmd_expand_dst_nvlist() for details.
6252 */
6253 error = SET_ERROR(EINVAL); /* User's size too big */
6254
6255 } else if (zc->zc_nvlist_src_size != 0) {
6256 error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
6257 zc->zc_iflags, &innvl);
6258 if (error != 0)
6259 goto out;
6260 }
6261
6262 /*
6263 * Ensure that all pool/dataset names are valid before we pass down to
6264 * the lower layers.
6265 */
6266 zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
6267 switch (vec->zvec_namecheck) {
6268 case POOL_NAME:
6269 if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
6270 error = SET_ERROR(EINVAL);
6271 else
6272 error = pool_status_check(zc->zc_name,
6273 vec->zvec_namecheck, vec->zvec_pool_check);
6274 break;
6275
6276 case DATASET_NAME:
6277 if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
6278 error = SET_ERROR(EINVAL);
6279 else
6280 error = pool_status_check(zc->zc_name,
6281 vec->zvec_namecheck, vec->zvec_pool_check);
6282 break;
6283
6284 case NO_NAME:
6285 break;
6286 }
6287
6288
6289 if (error == 0) {
6290 cookie = spl_fstrans_mark();
6291 error = vec->zvec_secpolicy(zc, innvl, CRED());
6292 spl_fstrans_unmark(cookie);
6293 }
6294
6295 if (error != 0)
6296 goto out;
6297
6298 /* legacy ioctls can modify zc_name */
6299 saved_poolname = strdup(zc->zc_name);
6300 if (saved_poolname == NULL) {
6301 error = SET_ERROR(ENOMEM);
6302 goto out;
6303 } else {
6304 saved_poolname[strcspn(saved_poolname, "/@#")] = '\0';
6305 }
6306
6307 if (vec->zvec_func != NULL) {
6308 nvlist_t *outnvl;
6309 int puterror = 0;
6310 spa_t *spa;
6311 nvlist_t *lognv = NULL;
6312
6313 ASSERT(vec->zvec_legacy_func == NULL);
6314
6315 /*
6316 * Add the innvl to the lognv before calling the func,
6317 * in case the func changes the innvl.
6318 */
6319 if (vec->zvec_allow_log) {
6320 lognv = fnvlist_alloc();
6321 fnvlist_add_string(lognv, ZPOOL_HIST_IOCTL,
6322 vec->zvec_name);
6323 if (!nvlist_empty(innvl)) {
6324 fnvlist_add_nvlist(lognv, ZPOOL_HIST_INPUT_NVL,
6325 innvl);
6326 }
6327 }
6328
6329 outnvl = fnvlist_alloc();
6330 cookie = spl_fstrans_mark();
6331 error = vec->zvec_func(zc->zc_name, innvl, outnvl);
6332 spl_fstrans_unmark(cookie);
6333
6334 if (error == 0 && vec->zvec_allow_log &&
6335 spa_open(zc->zc_name, &spa, FTAG) == 0) {
6336 if (!nvlist_empty(outnvl)) {
6337 fnvlist_add_nvlist(lognv, ZPOOL_HIST_OUTPUT_NVL,
6338 outnvl);
6339 }
6340 (void) spa_history_log_nvl(spa, lognv);
6341 spa_close(spa, FTAG);
6342 }
6343 fnvlist_free(lognv);
6344
6345 if (!nvlist_empty(outnvl) || zc->zc_nvlist_dst_size != 0) {
6346 int smusherror = 0;
6347 if (vec->zvec_smush_outnvlist) {
6348 smusherror = nvlist_smush(outnvl,
6349 zc->zc_nvlist_dst_size);
6350 }
6351 if (smusherror == 0)
6352 puterror = put_nvlist(zc, outnvl);
6353 }
6354
6355 if (puterror != 0)
6356 error = puterror;
6357
6358 nvlist_free(outnvl);
6359 } else {
6360 cookie = spl_fstrans_mark();
6361 error = vec->zvec_legacy_func(zc);
6362 spl_fstrans_unmark(cookie);
6363 }
6364
6365 out:
6366 nvlist_free(innvl);
6367 rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag);
6368 if (error == 0 && rc != 0)
6369 error = SET_ERROR(EFAULT);
6370 if (error == 0 && vec->zvec_allow_log) {
6371 char *s = tsd_get(zfs_allow_log_key);
6372 if (s != NULL)
6373 strfree(s);
6374 (void) tsd_set(zfs_allow_log_key, saved_poolname);
6375 } else {
6376 if (saved_poolname != NULL)
6377 strfree(saved_poolname);
6378 }
6379
6380 kmem_free(zc, sizeof (zfs_cmd_t));
6381 return (-error);
6382 }
6383
6384 #ifdef CONFIG_COMPAT
6385 static long
6386 zfsdev_compat_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
6387 {
6388 return (zfsdev_ioctl(filp, cmd, arg));
6389 }
6390 #else
6391 #define zfsdev_compat_ioctl NULL
6392 #endif
6393
6394 static const struct file_operations zfsdev_fops = {
6395 .open = zfsdev_open,
6396 .release = zfsdev_release,
6397 .unlocked_ioctl = zfsdev_ioctl,
6398 .compat_ioctl = zfsdev_compat_ioctl,
6399 .owner = THIS_MODULE,
6400 };
6401
6402 static struct miscdevice zfs_misc = {
6403 .minor = MISC_DYNAMIC_MINOR,
6404 .name = ZFS_DRIVER,
6405 .fops = &zfsdev_fops,
6406 };
6407
6408 static int
6409 zfs_attach(void)
6410 {
6411 int error;
6412
6413 mutex_init(&zfsdev_state_lock, NULL, MUTEX_DEFAULT, NULL);
6414 zfsdev_state_list = kmem_zalloc(sizeof (zfsdev_state_t), KM_SLEEP);
6415 zfsdev_state_list->zs_minor = -1;
6416
6417 error = misc_register(&zfs_misc);
6418 if (error != 0) {
6419 printk(KERN_INFO "ZFS: misc_register() failed %d\n", error);
6420 return (error);
6421 }
6422
6423 return (0);
6424 }
6425
6426 static void
6427 zfs_detach(void)
6428 {
6429 zfsdev_state_t *zs, *zsprev = NULL;
6430
6431 misc_deregister(&zfs_misc);
6432 mutex_destroy(&zfsdev_state_lock);
6433
6434 for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
6435 if (zsprev)
6436 kmem_free(zsprev, sizeof (zfsdev_state_t));
6437 zsprev = zs;
6438 }
6439 if (zsprev)
6440 kmem_free(zsprev, sizeof (zfsdev_state_t));
6441 }
6442
6443 static void
6444 zfs_allow_log_destroy(void *arg)
6445 {
6446 char *poolname = arg;
6447
6448 if (poolname != NULL)
6449 strfree(poolname);
6450 }
6451
6452 #ifdef DEBUG
6453 #define ZFS_DEBUG_STR " (DEBUG mode)"
6454 #else
6455 #define ZFS_DEBUG_STR ""
6456 #endif
6457
6458 static int __init
6459 _init(void)
6460 {
6461 int error;
6462
6463 error = -vn_set_pwd("/");
6464 if (error) {
6465 printk(KERN_NOTICE
6466 "ZFS: Warning unable to set pwd to '/': %d\n", error);
6467 return (error);
6468 }
6469
6470 if ((error = -zvol_init()) != 0)
6471 return (error);
6472
6473 spa_init(FREAD | FWRITE);
6474 zfs_init();
6475
6476 zfs_ioctl_init();
6477
6478 if ((error = zfs_attach()) != 0)
6479 goto out;
6480
6481 tsd_create(&zfs_fsyncer_key, NULL);
6482 tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
6483 tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
6484
6485 printk(KERN_NOTICE "ZFS: Loaded module v%s-%s%s, "
6486 "ZFS pool version %s, ZFS filesystem version %s\n",
6487 ZFS_META_VERSION, ZFS_META_RELEASE, ZFS_DEBUG_STR,
6488 SPA_VERSION_STRING, ZPL_VERSION_STRING);
6489 #ifndef CONFIG_FS_POSIX_ACL
6490 printk(KERN_NOTICE "ZFS: Posix ACLs disabled by kernel\n");
6491 #endif /* CONFIG_FS_POSIX_ACL */
6492
6493 return (0);
6494
6495 out:
6496 zfs_fini();
6497 spa_fini();
6498 (void) zvol_fini();
6499 printk(KERN_NOTICE "ZFS: Failed to Load ZFS Filesystem v%s-%s%s"
6500 ", rc = %d\n", ZFS_META_VERSION, ZFS_META_RELEASE,
6501 ZFS_DEBUG_STR, error);
6502
6503 return (error);
6504 }
6505
6506 static void __exit
6507 _fini(void)
6508 {
6509 zfs_detach();
6510 zfs_fini();
6511 spa_fini();
6512 zvol_fini();
6513
6514 tsd_destroy(&zfs_fsyncer_key);
6515 tsd_destroy(&rrw_tsd_key);
6516 tsd_destroy(&zfs_allow_log_key);
6517
6518 printk(KERN_NOTICE "ZFS: Unloaded module v%s-%s%s\n",
6519 ZFS_META_VERSION, ZFS_META_RELEASE, ZFS_DEBUG_STR);
6520 }
6521
6522 #ifdef HAVE_SPL
6523 module_init(_init);
6524 module_exit(_fini);
6525
6526 MODULE_DESCRIPTION("ZFS");
6527 MODULE_AUTHOR(ZFS_META_AUTHOR);
6528 MODULE_LICENSE(ZFS_META_LICENSE);
6529 MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE);
6530 #endif /* HAVE_SPL */