]> git.proxmox.com Git - mirror_zfs.git/blame - lib/libzfs/libzfs_pool.c
OpenZFS 7613 - ms_freetree[4] is only used in syncing context
[mirror_zfs.git] / lib / libzfs / libzfs_pool.c
CommitLineData
34dc7c2f
BB
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
0fdd8d64 23 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
428870ff 24 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
a05dfd00 25 * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
34dc7c2f
BB
26 */
27
34dc7c2f
BB
28#include <ctype.h>
29#include <errno.h>
30#include <devid.h>
34dc7c2f
BB
31#include <fcntl.h>
32#include <libintl.h>
33#include <stdio.h>
34#include <stdlib.h>
35#include <strings.h>
36#include <unistd.h>
6f1ffb06 37#include <libgen.h>
d603ed6c
BB
38#include <zone.h>
39#include <sys/stat.h>
34dc7c2f
BB
40#include <sys/efi_partition.h>
41#include <sys/vtoc.h>
42#include <sys/zfs_ioctl.h>
9babb374 43#include <dlfcn.h>
34dc7c2f
BB
44
45#include "zfs_namecheck.h"
46#include "zfs_prop.h"
47#include "libzfs_impl.h"
428870ff 48#include "zfs_comutil.h"
9ae529ec 49#include "zfeature_common.h"
34dc7c2f 50
b128c09f
BB
51static int read_efi_label(nvlist_t *config, diskaddr_t *sb);
52
572e2857
BB
53typedef struct prop_flags {
54 int create:1; /* Validate property on creation */
55 int import:1; /* Validate property on import */
56} prop_flags_t;
57
34dc7c2f
BB
58/*
59 * ====================================================================
60 * zpool property functions
61 * ====================================================================
62 */
63
64static int
65zpool_get_all_props(zpool_handle_t *zhp)
66{
13fe0198 67 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
68 libzfs_handle_t *hdl = zhp->zpool_hdl;
69
70 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
71
72 if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
73 return (-1);
74
75 while (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_PROPS, &zc) != 0) {
76 if (errno == ENOMEM) {
77 if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
78 zcmd_free_nvlists(&zc);
79 return (-1);
80 }
81 } else {
82 zcmd_free_nvlists(&zc);
83 return (-1);
84 }
85 }
86
87 if (zcmd_read_dst_nvlist(hdl, &zc, &zhp->zpool_props) != 0) {
88 zcmd_free_nvlists(&zc);
89 return (-1);
90 }
91
92 zcmd_free_nvlists(&zc);
93
94 return (0);
95}
96
97static int
98zpool_props_refresh(zpool_handle_t *zhp)
99{
100 nvlist_t *old_props;
101
102 old_props = zhp->zpool_props;
103
104 if (zpool_get_all_props(zhp) != 0)
105 return (-1);
106
107 nvlist_free(old_props);
108 return (0);
109}
110
111static char *
112zpool_get_prop_string(zpool_handle_t *zhp, zpool_prop_t prop,
113 zprop_source_t *src)
114{
115 nvlist_t *nv, *nvl;
116 uint64_t ival;
117 char *value;
118 zprop_source_t source;
119
120 nvl = zhp->zpool_props;
121 if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
122 verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &ival) == 0);
123 source = ival;
124 verify(nvlist_lookup_string(nv, ZPROP_VALUE, &value) == 0);
125 } else {
126 source = ZPROP_SRC_DEFAULT;
127 if ((value = (char *)zpool_prop_default_string(prop)) == NULL)
128 value = "-";
129 }
130
131 if (src)
132 *src = source;
133
134 return (value);
135}
136
137uint64_t
138zpool_get_prop_int(zpool_handle_t *zhp, zpool_prop_t prop, zprop_source_t *src)
139{
140 nvlist_t *nv, *nvl;
141 uint64_t value;
142 zprop_source_t source;
143
b128c09f
BB
144 if (zhp->zpool_props == NULL && zpool_get_all_props(zhp)) {
145 /*
146 * zpool_get_all_props() has most likely failed because
147 * the pool is faulted, but if all we need is the top level
148 * vdev's guid then get it from the zhp config nvlist.
149 */
150 if ((prop == ZPOOL_PROP_GUID) &&
151 (nvlist_lookup_nvlist(zhp->zpool_config,
152 ZPOOL_CONFIG_VDEV_TREE, &nv) == 0) &&
153 (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &value)
154 == 0)) {
155 return (value);
156 }
34dc7c2f 157 return (zpool_prop_default_numeric(prop));
b128c09f 158 }
34dc7c2f
BB
159
160 nvl = zhp->zpool_props;
161 if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
162 verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &value) == 0);
163 source = value;
164 verify(nvlist_lookup_uint64(nv, ZPROP_VALUE, &value) == 0);
165 } else {
166 source = ZPROP_SRC_DEFAULT;
167 value = zpool_prop_default_numeric(prop);
168 }
169
170 if (src)
171 *src = source;
172
173 return (value);
174}
175
176/*
177 * Map VDEV STATE to printed strings.
178 */
179char *
180zpool_state_to_name(vdev_state_t state, vdev_aux_t aux)
181{
182 switch (state) {
e75c13c3
BB
183 default:
184 break;
34dc7c2f
BB
185 case VDEV_STATE_CLOSED:
186 case VDEV_STATE_OFFLINE:
187 return (gettext("OFFLINE"));
188 case VDEV_STATE_REMOVED:
189 return (gettext("REMOVED"));
190 case VDEV_STATE_CANT_OPEN:
b128c09f 191 if (aux == VDEV_AUX_CORRUPT_DATA || aux == VDEV_AUX_BAD_LOG)
34dc7c2f 192 return (gettext("FAULTED"));
428870ff
BB
193 else if (aux == VDEV_AUX_SPLIT_POOL)
194 return (gettext("SPLIT"));
34dc7c2f
BB
195 else
196 return (gettext("UNAVAIL"));
197 case VDEV_STATE_FAULTED:
198 return (gettext("FAULTED"));
199 case VDEV_STATE_DEGRADED:
200 return (gettext("DEGRADED"));
201 case VDEV_STATE_HEALTHY:
202 return (gettext("ONLINE"));
203 }
204
205 return (gettext("UNKNOWN"));
206}
207
131cc95c
DK
208/*
209 * Map POOL STATE to printed strings.
210 */
211const char *
212zpool_pool_state_to_name(pool_state_t state)
213{
214 switch (state) {
215 default:
216 break;
217 case POOL_STATE_ACTIVE:
218 return (gettext("ACTIVE"));
219 case POOL_STATE_EXPORTED:
220 return (gettext("EXPORTED"));
221 case POOL_STATE_DESTROYED:
222 return (gettext("DESTROYED"));
223 case POOL_STATE_SPARE:
224 return (gettext("SPARE"));
225 case POOL_STATE_L2CACHE:
226 return (gettext("L2CACHE"));
227 case POOL_STATE_UNINITIALIZED:
228 return (gettext("UNINITIALIZED"));
229 case POOL_STATE_UNAVAIL:
230 return (gettext("UNAVAIL"));
231 case POOL_STATE_POTENTIALLY_ACTIVE:
232 return (gettext("POTENTIALLY_ACTIVE"));
233 }
234
235 return (gettext("UNKNOWN"));
236}
237
8b921f66
RE
238/*
239 * Get a zpool property value for 'prop' and return the value in
240 * a pre-allocated buffer.
241 */
242int
2a8b84b7 243zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf,
d1d7e268 244 size_t len, zprop_source_t *srctype, boolean_t literal)
34dc7c2f
BB
245{
246 uint64_t intval;
247 const char *strval;
248 zprop_source_t src = ZPROP_SRC_NONE;
249 nvlist_t *nvroot;
250 vdev_stat_t *vs;
251 uint_t vsc;
252
253 if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
d164b209
BB
254 switch (prop) {
255 case ZPOOL_PROP_NAME:
34dc7c2f 256 (void) strlcpy(buf, zpool_get_name(zhp), len);
d164b209
BB
257 break;
258
259 case ZPOOL_PROP_HEALTH:
34dc7c2f 260 (void) strlcpy(buf, "FAULTED", len);
d164b209
BB
261 break;
262
263 case ZPOOL_PROP_GUID:
264 intval = zpool_get_prop_int(zhp, prop, &src);
b8864a23 265 (void) snprintf(buf, len, "%llu", (u_longlong_t)intval);
d164b209
BB
266 break;
267
268 case ZPOOL_PROP_ALTROOT:
269 case ZPOOL_PROP_CACHEFILE:
d96eb2b1 270 case ZPOOL_PROP_COMMENT:
d164b209
BB
271 if (zhp->zpool_props != NULL ||
272 zpool_get_all_props(zhp) == 0) {
273 (void) strlcpy(buf,
274 zpool_get_prop_string(zhp, prop, &src),
275 len);
2a8b84b7 276 break;
d164b209
BB
277 }
278 /* FALLTHROUGH */
279 default:
34dc7c2f 280 (void) strlcpy(buf, "-", len);
d164b209
BB
281 break;
282 }
283
284 if (srctype != NULL)
285 *srctype = src;
34dc7c2f
BB
286 return (0);
287 }
288
289 if (zhp->zpool_props == NULL && zpool_get_all_props(zhp) &&
290 prop != ZPOOL_PROP_NAME)
291 return (-1);
292
293 switch (zpool_prop_get_type(prop)) {
294 case PROP_TYPE_STRING:
295 (void) strlcpy(buf, zpool_get_prop_string(zhp, prop, &src),
296 len);
297 break;
298
299 case PROP_TYPE_NUMBER:
300 intval = zpool_get_prop_int(zhp, prop, &src);
301
302 switch (prop) {
303 case ZPOOL_PROP_SIZE:
428870ff
BB
304 case ZPOOL_PROP_ALLOCATED:
305 case ZPOOL_PROP_FREE:
9ae529ec 306 case ZPOOL_PROP_FREEING:
fbeddd60 307 case ZPOOL_PROP_LEAKED:
df30f566 308 case ZPOOL_PROP_ASHIFT:
8b921f66
RE
309 if (literal)
310 (void) snprintf(buf, len, "%llu",
02730c33 311 (u_longlong_t)intval);
8b921f66
RE
312 else
313 (void) zfs_nicenum(intval, buf, len);
34dc7c2f
BB
314 break;
315
a05dfd00
GW
316 case ZPOOL_PROP_EXPANDSZ:
317 if (intval == 0) {
318 (void) strlcpy(buf, "-", len);
319 } else if (literal) {
320 (void) snprintf(buf, len, "%llu",
321 (u_longlong_t)intval);
322 } else {
323 (void) zfs_nicenum(intval, buf, len);
324 }
325 break;
326
34dc7c2f 327 case ZPOOL_PROP_CAPACITY:
2a8b84b7
AS
328 if (literal) {
329 (void) snprintf(buf, len, "%llu",
330 (u_longlong_t)intval);
331 } else {
332 (void) snprintf(buf, len, "%llu%%",
333 (u_longlong_t)intval);
334 }
34dc7c2f
BB
335 break;
336
1ca56e60 337 case ZPOOL_PROP_FRAGMENTATION:
338 if (intval == UINT64_MAX) {
339 (void) strlcpy(buf, "-", len);
bc2d8093
CE
340 } else if (literal) {
341 (void) snprintf(buf, len, "%llu",
342 (u_longlong_t)intval);
1ca56e60 343 } else {
344 (void) snprintf(buf, len, "%llu%%",
345 (u_longlong_t)intval);
346 }
347 break;
348
428870ff 349 case ZPOOL_PROP_DEDUPRATIO:
bc2d8093
CE
350 if (literal)
351 (void) snprintf(buf, len, "%llu.%02llu",
352 (u_longlong_t)(intval / 100),
353 (u_longlong_t)(intval % 100));
354 else
355 (void) snprintf(buf, len, "%llu.%02llux",
356 (u_longlong_t)(intval / 100),
357 (u_longlong_t)(intval % 100));
428870ff
BB
358 break;
359
34dc7c2f
BB
360 case ZPOOL_PROP_HEALTH:
361 verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
362 ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
363 verify(nvlist_lookup_uint64_array(nvroot,
428870ff
BB
364 ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc)
365 == 0);
34dc7c2f
BB
366
367 (void) strlcpy(buf, zpool_state_to_name(intval,
368 vs->vs_aux), len);
369 break;
9ae529ec
CS
370 case ZPOOL_PROP_VERSION:
371 if (intval >= SPA_VERSION_FEATURES) {
372 (void) snprintf(buf, len, "-");
373 break;
374 }
375 /* FALLTHROUGH */
34dc7c2f 376 default:
b8864a23 377 (void) snprintf(buf, len, "%llu", (u_longlong_t)intval);
34dc7c2f
BB
378 }
379 break;
380
381 case PROP_TYPE_INDEX:
382 intval = zpool_get_prop_int(zhp, prop, &src);
383 if (zpool_prop_index_to_string(prop, intval, &strval)
384 != 0)
385 return (-1);
386 (void) strlcpy(buf, strval, len);
387 break;
388
389 default:
390 abort();
391 }
392
393 if (srctype)
394 *srctype = src;
395
396 return (0);
397}
398
399/*
400 * Check if the bootfs name has the same pool name as it is set to.
401 * Assuming bootfs is a valid dataset name.
402 */
403static boolean_t
404bootfs_name_valid(const char *pool, char *bootfs)
405{
406 int len = strlen(pool);
407
b128c09f 408 if (!zfs_name_valid(bootfs, ZFS_TYPE_FILESYSTEM|ZFS_TYPE_SNAPSHOT))
34dc7c2f
BB
409 return (B_FALSE);
410
411 if (strncmp(pool, bootfs, len) == 0 &&
412 (bootfs[len] == '/' || bootfs[len] == '\0'))
413 return (B_TRUE);
414
415 return (B_FALSE);
416}
417
c372b36e 418#if defined(__sun__) || defined(__sun)
b128c09f
BB
419/*
420 * Inspect the configuration to determine if any of the devices contain
421 * an EFI label.
422 */
423static boolean_t
424pool_uses_efi(nvlist_t *config)
425{
426 nvlist_t **child;
427 uint_t c, children;
428
429 if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN,
430 &child, &children) != 0)
431 return (read_efi_label(config, NULL) >= 0);
432
433 for (c = 0; c < children; c++) {
434 if (pool_uses_efi(child[c]))
435 return (B_TRUE);
436 }
437 return (B_FALSE);
438}
c372b36e 439#endif
b128c09f 440
1bd201e7
CS
441boolean_t
442zpool_is_bootable(zpool_handle_t *zhp)
b128c09f 443{
eca7b760 444 char bootfs[ZFS_MAX_DATASET_NAME_LEN];
b128c09f
BB
445
446 return (zpool_get_prop(zhp, ZPOOL_PROP_BOOTFS, bootfs,
2a8b84b7 447 sizeof (bootfs), NULL, B_FALSE) == 0 && strncmp(bootfs, "-",
b128c09f
BB
448 sizeof (bootfs)) != 0);
449}
450
451
34dc7c2f
BB
452/*
453 * Given an nvlist of zpool properties to be set, validate that they are
454 * correct, and parse any numeric properties (index, boolean, etc) if they are
455 * specified as strings.
456 */
457static nvlist_t *
b128c09f 458zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname,
572e2857 459 nvlist_t *props, uint64_t version, prop_flags_t flags, char *errbuf)
34dc7c2f
BB
460{
461 nvpair_t *elem;
462 nvlist_t *retprops;
463 zpool_prop_t prop;
464 char *strval;
465 uint64_t intval;
d96eb2b1 466 char *slash, *check;
34dc7c2f 467 struct stat64 statbuf;
b128c09f
BB
468 zpool_handle_t *zhp;
469 nvlist_t *nvroot;
34dc7c2f
BB
470
471 if (nvlist_alloc(&retprops, NV_UNIQUE_NAME, 0) != 0) {
472 (void) no_memory(hdl);
473 return (NULL);
474 }
475
476 elem = NULL;
477 while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
478 const char *propname = nvpair_name(elem);
479
9ae529ec
CS
480 prop = zpool_name_to_prop(propname);
481 if (prop == ZPROP_INVAL && zpool_prop_feature(propname)) {
482 int err;
9ae529ec
CS
483 char *fname = strchr(propname, '@') + 1;
484
fa86b5db 485 err = zfeature_lookup_name(fname, NULL);
9ae529ec
CS
486 if (err != 0) {
487 ASSERT3U(err, ==, ENOENT);
488 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
489 "invalid feature '%s'"), fname);
490 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
491 goto error;
492 }
493
494 if (nvpair_type(elem) != DATA_TYPE_STRING) {
495 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
496 "'%s' must be a string"), propname);
497 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
498 goto error;
499 }
500
501 (void) nvpair_value_string(elem, &strval);
e4010f27 502 if (strcmp(strval, ZFS_FEATURE_ENABLED) != 0 &&
503 strcmp(strval, ZFS_FEATURE_DISABLED) != 0) {
9ae529ec
CS
504 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
505 "property '%s' can only be set to "
e4010f27 506 "'enabled' or 'disabled'"), propname);
9ae529ec
CS
507 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
508 goto error;
509 }
510
511 if (nvlist_add_uint64(retprops, propname, 0) != 0) {
512 (void) no_memory(hdl);
513 goto error;
514 }
515 continue;
516 }
517
34dc7c2f
BB
518 /*
519 * Make sure this property is valid and applies to this type.
520 */
9ae529ec 521 if (prop == ZPROP_INVAL) {
34dc7c2f
BB
522 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
523 "invalid property '%s'"), propname);
524 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
525 goto error;
526 }
527
528 if (zpool_prop_readonly(prop)) {
529 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
530 "is readonly"), propname);
531 (void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf);
532 goto error;
533 }
534
535 if (zprop_parse_value(hdl, elem, prop, ZFS_TYPE_POOL, retprops,
536 &strval, &intval, errbuf) != 0)
537 goto error;
538
539 /*
540 * Perform additional checking for specific properties.
541 */
542 switch (prop) {
e75c13c3
BB
543 default:
544 break;
34dc7c2f 545 case ZPOOL_PROP_VERSION:
9ae529ec
CS
546 if (intval < version ||
547 !SPA_VERSION_IS_SUPPORTED(intval)) {
34dc7c2f
BB
548 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
549 "property '%s' number %d is invalid."),
550 propname, intval);
551 (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
552 goto error;
553 }
554 break;
555
df30f566
CK
556 case ZPOOL_PROP_ASHIFT:
557 if (!flags.create) {
558 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
559 "property '%s' can only be set at "
560 "creation time"), propname);
561 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
562 goto error;
563 }
564
b41c9906 565 if (intval != 0 && (intval < 9 || intval > 13)) {
df30f566
CK
566 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
567 "property '%s' number %d is invalid."),
568 propname, intval);
569 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
570 goto error;
571 }
572 break;
573
34dc7c2f 574 case ZPOOL_PROP_BOOTFS:
572e2857 575 if (flags.create || flags.import) {
34dc7c2f
BB
576 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
577 "property '%s' cannot be set at creation "
578 "or import time"), propname);
579 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
580 goto error;
581 }
582
583 if (version < SPA_VERSION_BOOTFS) {
584 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
585 "pool must be upgraded to support "
586 "'%s' property"), propname);
587 (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
588 goto error;
589 }
590
591 /*
592 * bootfs property value has to be a dataset name and
593 * the dataset has to be in the same pool as it sets to.
594 */
595 if (strval[0] != '\0' && !bootfs_name_valid(poolname,
596 strval)) {
597 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
598 "is an invalid name"), strval);
599 (void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
600 goto error;
601 }
b128c09f
BB
602
603 if ((zhp = zpool_open_canfail(hdl, poolname)) == NULL) {
604 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
605 "could not open pool '%s'"), poolname);
606 (void) zfs_error(hdl, EZFS_OPENFAILED, errbuf);
607 goto error;
608 }
609 verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
610 ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
611
f783130a 612#if defined(__sun__) || defined(__sun)
b128c09f
BB
613 /*
614 * bootfs property cannot be set on a disk which has
615 * been EFI labeled.
616 */
617 if (pool_uses_efi(nvroot)) {
618 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
619 "property '%s' not supported on "
620 "EFI labeled devices"), propname);
621 (void) zfs_error(hdl, EZFS_POOL_NOTSUP, errbuf);
622 zpool_close(zhp);
623 goto error;
624 }
f783130a 625#endif
b128c09f 626 zpool_close(zhp);
34dc7c2f
BB
627 break;
628
629 case ZPOOL_PROP_ALTROOT:
572e2857 630 if (!flags.create && !flags.import) {
34dc7c2f
BB
631 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
632 "property '%s' can only be set during pool "
633 "creation or import"), propname);
634 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
635 goto error;
636 }
637
638 if (strval[0] != '/') {
639 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
640 "bad alternate root '%s'"), strval);
641 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
642 goto error;
643 }
644 break;
645
646 case ZPOOL_PROP_CACHEFILE:
647 if (strval[0] == '\0')
648 break;
649
650 if (strcmp(strval, "none") == 0)
651 break;
652
653 if (strval[0] != '/') {
654 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
655 "property '%s' must be empty, an "
656 "absolute path, or 'none'"), propname);
657 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
658 goto error;
659 }
660
661 slash = strrchr(strval, '/');
662
663 if (slash[1] == '\0' || strcmp(slash, "/.") == 0 ||
664 strcmp(slash, "/..") == 0) {
665 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
666 "'%s' is not a valid file"), strval);
667 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
668 goto error;
669 }
670
671 *slash = '\0';
672
673 if (strval[0] != '\0' &&
674 (stat64(strval, &statbuf) != 0 ||
675 !S_ISDIR(statbuf.st_mode))) {
676 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
677 "'%s' is not a valid directory"),
678 strval);
679 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
680 goto error;
681 }
682
683 *slash = '/';
684 break;
572e2857 685
d96eb2b1
DM
686 case ZPOOL_PROP_COMMENT:
687 for (check = strval; *check != '\0'; check++) {
688 if (!isprint(*check)) {
689 zfs_error_aux(hdl,
690 dgettext(TEXT_DOMAIN,
691 "comment may only have printable "
692 "characters"));
693 (void) zfs_error(hdl, EZFS_BADPROP,
694 errbuf);
695 goto error;
696 }
697 }
698 if (strlen(strval) > ZPROP_MAX_COMMENT) {
699 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
700 "comment must not exceed %d characters"),
701 ZPROP_MAX_COMMENT);
702 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
703 goto error;
704 }
705 break;
572e2857
BB
706 case ZPOOL_PROP_READONLY:
707 if (!flags.import) {
708 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
709 "property '%s' can only be set at "
710 "import time"), propname);
711 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
712 goto error;
713 }
714 break;
83e9986f
RY
715 case ZPOOL_PROP_TNAME:
716 if (!flags.create) {
717 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
718 "property '%s' can only be set at "
719 "creation time"), propname);
720 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
721 goto error;
722 }
723 break;
34dc7c2f
BB
724 }
725 }
726
727 return (retprops);
728error:
729 nvlist_free(retprops);
730 return (NULL);
731}
732
733/*
734 * Set zpool property : propname=propval.
735 */
736int
737zpool_set_prop(zpool_handle_t *zhp, const char *propname, const char *propval)
738{
13fe0198 739 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
740 int ret = -1;
741 char errbuf[1024];
742 nvlist_t *nvl = NULL;
743 nvlist_t *realprops;
744 uint64_t version;
572e2857 745 prop_flags_t flags = { 0 };
34dc7c2f
BB
746
747 (void) snprintf(errbuf, sizeof (errbuf),
748 dgettext(TEXT_DOMAIN, "cannot set property for '%s'"),
749 zhp->zpool_name);
750
34dc7c2f
BB
751 if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
752 return (no_memory(zhp->zpool_hdl));
753
754 if (nvlist_add_string(nvl, propname, propval) != 0) {
755 nvlist_free(nvl);
756 return (no_memory(zhp->zpool_hdl));
757 }
758
759 version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
b128c09f 760 if ((realprops = zpool_valid_proplist(zhp->zpool_hdl,
572e2857 761 zhp->zpool_name, nvl, version, flags, errbuf)) == NULL) {
34dc7c2f
BB
762 nvlist_free(nvl);
763 return (-1);
764 }
765
766 nvlist_free(nvl);
767 nvl = realprops;
768
769 /*
770 * Execute the corresponding ioctl() to set this property.
771 */
772 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
773
774 if (zcmd_write_src_nvlist(zhp->zpool_hdl, &zc, nvl) != 0) {
775 nvlist_free(nvl);
776 return (-1);
777 }
778
779 ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SET_PROPS, &zc);
780
781 zcmd_free_nvlists(&zc);
782 nvlist_free(nvl);
783
784 if (ret)
785 (void) zpool_standard_error(zhp->zpool_hdl, errno, errbuf);
786 else
787 (void) zpool_props_refresh(zhp);
788
789 return (ret);
790}
791
792int
793zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp)
794{
795 libzfs_handle_t *hdl = zhp->zpool_hdl;
796 zprop_list_t *entry;
797 char buf[ZFS_MAXPROPLEN];
9ae529ec
CS
798 nvlist_t *features = NULL;
799 nvpair_t *nvp;
800 zprop_list_t **last;
801 boolean_t firstexpand = (NULL == *plp);
802 int i;
34dc7c2f
BB
803
804 if (zprop_expand_list(hdl, plp, ZFS_TYPE_POOL) != 0)
805 return (-1);
806
9ae529ec
CS
807 last = plp;
808 while (*last != NULL)
809 last = &(*last)->pl_next;
810
811 if ((*plp)->pl_all)
812 features = zpool_get_features(zhp);
813
814 if ((*plp)->pl_all && firstexpand) {
815 for (i = 0; i < SPA_FEATURES; i++) {
816 zprop_list_t *entry = zfs_alloc(hdl,
817 sizeof (zprop_list_t));
818 entry->pl_prop = ZPROP_INVAL;
819 entry->pl_user_prop = zfs_asprintf(hdl, "feature@%s",
820 spa_feature_table[i].fi_uname);
821 entry->pl_width = strlen(entry->pl_user_prop);
822 entry->pl_all = B_TRUE;
823
824 *last = entry;
825 last = &entry->pl_next;
826 }
827 }
828
829 /* add any unsupported features */
830 for (nvp = nvlist_next_nvpair(features, NULL);
831 nvp != NULL; nvp = nvlist_next_nvpair(features, nvp)) {
832 char *propname;
833 boolean_t found;
834 zprop_list_t *entry;
835
836 if (zfeature_is_supported(nvpair_name(nvp)))
837 continue;
838
839 propname = zfs_asprintf(hdl, "unsupported@%s",
840 nvpair_name(nvp));
841
842 /*
843 * Before adding the property to the list make sure that no
844 * other pool already added the same property.
845 */
846 found = B_FALSE;
847 entry = *plp;
848 while (entry != NULL) {
849 if (entry->pl_user_prop != NULL &&
850 strcmp(propname, entry->pl_user_prop) == 0) {
851 found = B_TRUE;
852 break;
853 }
854 entry = entry->pl_next;
855 }
856 if (found) {
857 free(propname);
858 continue;
859 }
860
861 entry = zfs_alloc(hdl, sizeof (zprop_list_t));
862 entry->pl_prop = ZPROP_INVAL;
863 entry->pl_user_prop = propname;
864 entry->pl_width = strlen(entry->pl_user_prop);
865 entry->pl_all = B_TRUE;
866
867 *last = entry;
868 last = &entry->pl_next;
869 }
870
34dc7c2f
BB
871 for (entry = *plp; entry != NULL; entry = entry->pl_next) {
872
873 if (entry->pl_fixed)
874 continue;
875
876 if (entry->pl_prop != ZPROP_INVAL &&
877 zpool_get_prop(zhp, entry->pl_prop, buf, sizeof (buf),
2a8b84b7 878 NULL, B_FALSE) == 0) {
34dc7c2f
BB
879 if (strlen(buf) > entry->pl_width)
880 entry->pl_width = strlen(buf);
881 }
882 }
883
884 return (0);
885}
886
9ae529ec
CS
887/*
888 * Get the state for the given feature on the given ZFS pool.
889 */
890int
891zpool_prop_get_feature(zpool_handle_t *zhp, const char *propname, char *buf,
892 size_t len)
893{
894 uint64_t refcount;
895 boolean_t found = B_FALSE;
896 nvlist_t *features = zpool_get_features(zhp);
897 boolean_t supported;
898 const char *feature = strchr(propname, '@') + 1;
899
900 supported = zpool_prop_feature(propname);
901 ASSERT(supported || zpool_prop_unsupported(propname));
902
903 /*
904 * Convert from feature name to feature guid. This conversion is
4e33ba4c 905 * unnecessary for unsupported@... properties because they already
9ae529ec
CS
906 * use guids.
907 */
908 if (supported) {
909 int ret;
fa86b5db 910 spa_feature_t fid;
9ae529ec 911
fa86b5db 912 ret = zfeature_lookup_name(feature, &fid);
9ae529ec
CS
913 if (ret != 0) {
914 (void) strlcpy(buf, "-", len);
915 return (ENOTSUP);
916 }
fa86b5db 917 feature = spa_feature_table[fid].fi_guid;
9ae529ec
CS
918 }
919
920 if (nvlist_lookup_uint64(features, feature, &refcount) == 0)
921 found = B_TRUE;
922
923 if (supported) {
924 if (!found) {
925 (void) strlcpy(buf, ZFS_FEATURE_DISABLED, len);
926 } else {
927 if (refcount == 0)
928 (void) strlcpy(buf, ZFS_FEATURE_ENABLED, len);
929 else
930 (void) strlcpy(buf, ZFS_FEATURE_ACTIVE, len);
931 }
932 } else {
933 if (found) {
934 if (refcount == 0) {
935 (void) strcpy(buf, ZFS_UNSUPPORTED_INACTIVE);
936 } else {
937 (void) strcpy(buf, ZFS_UNSUPPORTED_READONLY);
938 }
939 } else {
940 (void) strlcpy(buf, "-", len);
941 return (ENOTSUP);
942 }
943 }
944
945 return (0);
946}
34dc7c2f 947
9babb374
BB
948/*
949 * Don't start the slice at the default block of 34; many storage
d603ed6c
BB
950 * devices will use a stripe width of 128k, other vendors prefer a 1m
951 * alignment. It is best to play it safe and ensure a 1m alignment
613d88ed
NB
952 * given 512B blocks. When the block size is larger by a power of 2
953 * we will still be 1m aligned. Some devices are sensitive to the
954 * partition ending alignment as well.
9babb374 955 */
613d88ed
NB
956#define NEW_START_BLOCK 2048
957#define PARTITION_END_ALIGNMENT 2048
9babb374 958
34dc7c2f
BB
959/*
960 * Validate the given pool name, optionally putting an extended error message in
961 * 'buf'.
962 */
963boolean_t
964zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool)
965{
966 namecheck_err_t why;
967 char what;
968 int ret;
969
970 ret = pool_namecheck(pool, &why, &what);
971
972 /*
973 * The rules for reserved pool names were extended at a later point.
974 * But we need to support users with existing pools that may now be
975 * invalid. So we only check for this expanded set of names during a
976 * create (or import), and only in userland.
977 */
978 if (ret == 0 && !isopen &&
979 (strncmp(pool, "mirror", 6) == 0 ||
980 strncmp(pool, "raidz", 5) == 0 ||
981 strncmp(pool, "spare", 5) == 0 ||
982 strcmp(pool, "log") == 0)) {
983 if (hdl != NULL)
984 zfs_error_aux(hdl,
985 dgettext(TEXT_DOMAIN, "name is reserved"));
986 return (B_FALSE);
987 }
988
989
990 if (ret != 0) {
991 if (hdl != NULL) {
992 switch (why) {
993 case NAME_ERR_TOOLONG:
994 zfs_error_aux(hdl,
995 dgettext(TEXT_DOMAIN, "name is too long"));
996 break;
997
998 case NAME_ERR_INVALCHAR:
999 zfs_error_aux(hdl,
1000 dgettext(TEXT_DOMAIN, "invalid character "
1001 "'%c' in pool name"), what);
1002 break;
1003
1004 case NAME_ERR_NOLETTER:
1005 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1006 "name must begin with a letter"));
1007 break;
1008
1009 case NAME_ERR_RESERVED:
1010 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1011 "name is reserved"));
1012 break;
1013
1014 case NAME_ERR_DISKLIKE:
1015 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1016 "pool name is reserved"));
1017 break;
1018
1019 case NAME_ERR_LEADING_SLASH:
1020 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1021 "leading slash in name"));
1022 break;
1023
1024 case NAME_ERR_EMPTY_COMPONENT:
1025 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1026 "empty component in name"));
1027 break;
1028
1029 case NAME_ERR_TRAILING_SLASH:
1030 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1031 "trailing slash in name"));
1032 break;
1033
aeacdefe 1034 case NAME_ERR_MULTIPLE_DELIMITERS:
34dc7c2f 1035 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
aeacdefe
GM
1036 "multiple '@' and/or '#' delimiters in "
1037 "name"));
34dc7c2f 1038 break;
e75c13c3
BB
1039 case NAME_ERR_NO_AT:
1040 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1041 "permission set is missing '@'"));
1042 break;
34dc7c2f
BB
1043 }
1044 }
1045 return (B_FALSE);
1046 }
1047
1048 return (B_TRUE);
1049}
1050
1051/*
1052 * Open a handle to the given pool, even if the pool is currently in the FAULTED
1053 * state.
1054 */
1055zpool_handle_t *
1056zpool_open_canfail(libzfs_handle_t *hdl, const char *pool)
1057{
1058 zpool_handle_t *zhp;
1059 boolean_t missing;
1060
1061 /*
1062 * Make sure the pool name is valid.
1063 */
1064 if (!zpool_name_valid(hdl, B_TRUE, pool)) {
1065 (void) zfs_error_fmt(hdl, EZFS_INVALIDNAME,
1066 dgettext(TEXT_DOMAIN, "cannot open '%s'"),
1067 pool);
1068 return (NULL);
1069 }
1070
1071 if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
1072 return (NULL);
1073
1074 zhp->zpool_hdl = hdl;
1075 (void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
1076
1077 if (zpool_refresh_stats(zhp, &missing) != 0) {
1078 zpool_close(zhp);
1079 return (NULL);
1080 }
1081
1082 if (missing) {
1083 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no such pool"));
1084 (void) zfs_error_fmt(hdl, EZFS_NOENT,
1085 dgettext(TEXT_DOMAIN, "cannot open '%s'"), pool);
1086 zpool_close(zhp);
1087 return (NULL);
1088 }
1089
1090 return (zhp);
1091}
1092
1093/*
1094 * Like the above, but silent on error. Used when iterating over pools (because
1095 * the configuration cache may be out of date).
1096 */
1097int
1098zpool_open_silent(libzfs_handle_t *hdl, const char *pool, zpool_handle_t **ret)
1099{
1100 zpool_handle_t *zhp;
1101 boolean_t missing;
1102
1103 if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
1104 return (-1);
1105
1106 zhp->zpool_hdl = hdl;
1107 (void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
1108
1109 if (zpool_refresh_stats(zhp, &missing) != 0) {
1110 zpool_close(zhp);
1111 return (-1);
1112 }
1113
1114 if (missing) {
1115 zpool_close(zhp);
1116 *ret = NULL;
1117 return (0);
1118 }
1119
1120 *ret = zhp;
1121 return (0);
1122}
1123
1124/*
1125 * Similar to zpool_open_canfail(), but refuses to open pools in the faulted
1126 * state.
1127 */
1128zpool_handle_t *
1129zpool_open(libzfs_handle_t *hdl, const char *pool)
1130{
1131 zpool_handle_t *zhp;
1132
1133 if ((zhp = zpool_open_canfail(hdl, pool)) == NULL)
1134 return (NULL);
1135
1136 if (zhp->zpool_state == POOL_STATE_UNAVAIL) {
1137 (void) zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
1138 dgettext(TEXT_DOMAIN, "cannot open '%s'"), zhp->zpool_name);
1139 zpool_close(zhp);
1140 return (NULL);
1141 }
1142
1143 return (zhp);
1144}
1145
1146/*
1147 * Close the handle. Simply frees the memory associated with the handle.
1148 */
1149void
1150zpool_close(zpool_handle_t *zhp)
1151{
8a5fc748
JJS
1152 nvlist_free(zhp->zpool_config);
1153 nvlist_free(zhp->zpool_old_config);
1154 nvlist_free(zhp->zpool_props);
34dc7c2f
BB
1155 free(zhp);
1156}
1157
1158/*
1159 * Return the name of the pool.
1160 */
1161const char *
1162zpool_get_name(zpool_handle_t *zhp)
1163{
1164 return (zhp->zpool_name);
1165}
1166
1167
1168/*
1169 * Return the state of the pool (ACTIVE or UNAVAILABLE)
1170 */
1171int
1172zpool_get_state(zpool_handle_t *zhp)
1173{
1174 return (zhp->zpool_state);
1175}
1176
1177/*
1178 * Create the named pool, using the provided vdev list. It is assumed
1179 * that the consumer has already validated the contents of the nvlist, so we
1180 * don't have to worry about error semantics.
1181 */
1182int
1183zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
b128c09f 1184 nvlist_t *props, nvlist_t *fsprops)
34dc7c2f 1185{
13fe0198 1186 zfs_cmd_t zc = {"\0"};
b128c09f
BB
1187 nvlist_t *zc_fsprops = NULL;
1188 nvlist_t *zc_props = NULL;
34dc7c2f 1189 char msg[1024];
b128c09f 1190 int ret = -1;
34dc7c2f
BB
1191
1192 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1193 "cannot create '%s'"), pool);
1194
1195 if (!zpool_name_valid(hdl, B_FALSE, pool))
1196 return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
1197
1198 if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
1199 return (-1);
1200
b128c09f 1201 if (props) {
572e2857
BB
1202 prop_flags_t flags = { .create = B_TRUE, .import = B_FALSE };
1203
b128c09f 1204 if ((zc_props = zpool_valid_proplist(hdl, pool, props,
572e2857 1205 SPA_VERSION_1, flags, msg)) == NULL) {
b128c09f
BB
1206 goto create_failed;
1207 }
1208 }
34dc7c2f 1209
b128c09f
BB
1210 if (fsprops) {
1211 uint64_t zoned;
1212 char *zonestr;
1213
1214 zoned = ((nvlist_lookup_string(fsprops,
1215 zfs_prop_to_name(ZFS_PROP_ZONED), &zonestr) == 0) &&
1216 strcmp(zonestr, "on") == 0);
1217
82f6f6e6
JS
1218 if ((zc_fsprops = zfs_valid_proplist(hdl, ZFS_TYPE_FILESYSTEM,
1219 fsprops, zoned, NULL, NULL, msg)) == NULL) {
b128c09f
BB
1220 goto create_failed;
1221 }
1222 if (!zc_props &&
1223 (nvlist_alloc(&zc_props, NV_UNIQUE_NAME, 0) != 0)) {
1224 goto create_failed;
1225 }
1226 if (nvlist_add_nvlist(zc_props,
1227 ZPOOL_ROOTFS_PROPS, zc_fsprops) != 0) {
1228 goto create_failed;
1229 }
34dc7c2f
BB
1230 }
1231
b128c09f
BB
1232 if (zc_props && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
1233 goto create_failed;
1234
34dc7c2f
BB
1235 (void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
1236
b128c09f 1237 if ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_CREATE, &zc)) != 0) {
34dc7c2f
BB
1238
1239 zcmd_free_nvlists(&zc);
b128c09f
BB
1240 nvlist_free(zc_props);
1241 nvlist_free(zc_fsprops);
34dc7c2f
BB
1242
1243 switch (errno) {
1244 case EBUSY:
1245 /*
1246 * This can happen if the user has specified the same
1247 * device multiple times. We can't reliably detect this
1248 * until we try to add it and see we already have a
d603ed6c
BB
1249 * label. This can also happen under if the device is
1250 * part of an active md or lvm device.
34dc7c2f
BB
1251 */
1252 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
d1d7e268
MK
1253 "one or more vdevs refer to the same device, or "
1254 "one of\nthe devices is part of an active md or "
1255 "lvm device"));
34dc7c2f
BB
1256 return (zfs_error(hdl, EZFS_BADDEV, msg));
1257
82f6f6e6
JS
1258 case ERANGE:
1259 /*
1260 * This happens if the record size is smaller or larger
1261 * than the allowed size range, or not a power of 2.
1262 *
1263 * NOTE: although zfs_valid_proplist is called earlier,
1264 * this case may have slipped through since the
1265 * pool does not exist yet and it is therefore
1266 * impossible to read properties e.g. max blocksize
1267 * from the pool.
1268 */
1269 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1270 "record size invalid"));
1271 return (zfs_error(hdl, EZFS_BADPROP, msg));
1272
34dc7c2f
BB
1273 case EOVERFLOW:
1274 /*
1275 * This occurs when one of the devices is below
1276 * SPA_MINDEVSIZE. Unfortunately, we can't detect which
1277 * device was the problem device since there's no
1278 * reliable way to determine device size from userland.
1279 */
1280 {
1281 char buf[64];
1282
1283 zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
1284
1285 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1286 "one or more devices is less than the "
1287 "minimum size (%s)"), buf);
1288 }
1289 return (zfs_error(hdl, EZFS_BADDEV, msg));
1290
1291 case ENOSPC:
1292 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1293 "one or more devices is out of space"));
1294 return (zfs_error(hdl, EZFS_BADDEV, msg));
1295
1296 case ENOTBLK:
1297 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1298 "cache device must be a disk or disk slice"));
1299 return (zfs_error(hdl, EZFS_BADDEV, msg));
1300
1301 default:
1302 return (zpool_standard_error(hdl, errno, msg));
1303 }
1304 }
1305
b128c09f 1306create_failed:
34dc7c2f 1307 zcmd_free_nvlists(&zc);
b128c09f
BB
1308 nvlist_free(zc_props);
1309 nvlist_free(zc_fsprops);
1310 return (ret);
34dc7c2f
BB
1311}
1312
1313/*
1314 * Destroy the given pool. It is up to the caller to ensure that there are no
1315 * datasets left in the pool.
1316 */
1317int
6f1ffb06 1318zpool_destroy(zpool_handle_t *zhp, const char *log_str)
34dc7c2f 1319{
13fe0198 1320 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
1321 zfs_handle_t *zfp = NULL;
1322 libzfs_handle_t *hdl = zhp->zpool_hdl;
1323 char msg[1024];
1324
1325 if (zhp->zpool_state == POOL_STATE_ACTIVE &&
572e2857 1326 (zfp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_FILESYSTEM)) == NULL)
34dc7c2f
BB
1327 return (-1);
1328
34dc7c2f 1329 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
6f1ffb06 1330 zc.zc_history = (uint64_t)(uintptr_t)log_str;
34dc7c2f 1331
572e2857 1332 if (zfs_ioctl(hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
34dc7c2f
BB
1333 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1334 "cannot destroy '%s'"), zhp->zpool_name);
1335
1336 if (errno == EROFS) {
1337 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1338 "one or more devices is read only"));
1339 (void) zfs_error(hdl, EZFS_BADDEV, msg);
1340 } else {
1341 (void) zpool_standard_error(hdl, errno, msg);
1342 }
1343
1344 if (zfp)
1345 zfs_close(zfp);
1346 return (-1);
1347 }
1348
1349 if (zfp) {
1350 remove_mountpoint(zfp);
1351 zfs_close(zfp);
1352 }
1353
1354 return (0);
1355}
1356
1357/*
1358 * Add the given vdevs to the pool. The caller must have already performed the
1359 * necessary verification to ensure that the vdev specification is well-formed.
1360 */
1361int
1362zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
1363{
13fe0198 1364 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
1365 int ret;
1366 libzfs_handle_t *hdl = zhp->zpool_hdl;
1367 char msg[1024];
1368 nvlist_t **spares, **l2cache;
1369 uint_t nspares, nl2cache;
1370
1371 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1372 "cannot add to '%s'"), zhp->zpool_name);
1373
1374 if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1375 SPA_VERSION_SPARES &&
1376 nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
1377 &spares, &nspares) == 0) {
1378 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1379 "upgraded to add hot spares"));
1380 return (zfs_error(hdl, EZFS_BADVERSION, msg));
1381 }
1382
c372b36e 1383#if defined(__sun__) || defined(__sun)
1bd201e7 1384 if (zpool_is_bootable(zhp) && nvlist_lookup_nvlist_array(nvroot,
b128c09f
BB
1385 ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0) {
1386 uint64_t s;
1387
1388 for (s = 0; s < nspares; s++) {
1389 char *path;
1390
1391 if (nvlist_lookup_string(spares[s], ZPOOL_CONFIG_PATH,
1392 &path) == 0 && pool_uses_efi(spares[s])) {
1393 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1394 "device '%s' contains an EFI label and "
1395 "cannot be used on root pools."),
d2f3e292 1396 zpool_vdev_name(hdl, NULL, spares[s], 0));
b128c09f
BB
1397 return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg));
1398 }
1399 }
1400 }
c372b36e 1401#endif
b128c09f 1402
34dc7c2f
BB
1403 if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1404 SPA_VERSION_L2CACHE &&
1405 nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
1406 &l2cache, &nl2cache) == 0) {
1407 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1408 "upgraded to add cache devices"));
1409 return (zfs_error(hdl, EZFS_BADVERSION, msg));
1410 }
1411
1412 if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
1413 return (-1);
1414 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1415
572e2857 1416 if (zfs_ioctl(hdl, ZFS_IOC_VDEV_ADD, &zc) != 0) {
34dc7c2f
BB
1417 switch (errno) {
1418 case EBUSY:
1419 /*
1420 * This can happen if the user has specified the same
1421 * device multiple times. We can't reliably detect this
1422 * until we try to add it and see we already have a
1423 * label.
1424 */
1425 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1426 "one or more vdevs refer to the same device"));
1427 (void) zfs_error(hdl, EZFS_BADDEV, msg);
1428 break;
1429
1430 case EOVERFLOW:
1431 /*
1432 * This occurrs when one of the devices is below
1433 * SPA_MINDEVSIZE. Unfortunately, we can't detect which
1434 * device was the problem device since there's no
1435 * reliable way to determine device size from userland.
1436 */
1437 {
1438 char buf[64];
1439
1440 zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
1441
1442 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1443 "device is less than the minimum "
1444 "size (%s)"), buf);
1445 }
1446 (void) zfs_error(hdl, EZFS_BADDEV, msg);
1447 break;
1448
1449 case ENOTSUP:
1450 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1451 "pool must be upgraded to add these vdevs"));
1452 (void) zfs_error(hdl, EZFS_BADVERSION, msg);
1453 break;
1454
34dc7c2f
BB
1455 case ENOTBLK:
1456 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1457 "cache device must be a disk or disk slice"));
1458 (void) zfs_error(hdl, EZFS_BADDEV, msg);
1459 break;
1460
1461 default:
1462 (void) zpool_standard_error(hdl, errno, msg);
1463 }
1464
1465 ret = -1;
1466 } else {
1467 ret = 0;
1468 }
1469
1470 zcmd_free_nvlists(&zc);
1471
1472 return (ret);
1473}
1474
1475/*
1476 * Exports the pool from the system. The caller must ensure that there are no
1477 * mounted datasets in the pool.
1478 */
6f1ffb06
MA
1479static int
1480zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce,
1481 const char *log_str)
34dc7c2f 1482{
13fe0198 1483 zfs_cmd_t zc = {"\0"};
b128c09f 1484 char msg[1024];
34dc7c2f 1485
b128c09f
BB
1486 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1487 "cannot export '%s'"), zhp->zpool_name);
1488
34dc7c2f 1489 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
b128c09f 1490 zc.zc_cookie = force;
fb5f0bc8 1491 zc.zc_guid = hardforce;
6f1ffb06 1492 zc.zc_history = (uint64_t)(uintptr_t)log_str;
b128c09f
BB
1493
1494 if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_EXPORT, &zc) != 0) {
1495 switch (errno) {
1496 case EXDEV:
1497 zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN,
1498 "use '-f' to override the following errors:\n"
1499 "'%s' has an active shared spare which could be"
1500 " used by other pools once '%s' is exported."),
1501 zhp->zpool_name, zhp->zpool_name);
1502 return (zfs_error(zhp->zpool_hdl, EZFS_ACTIVE_SPARE,
1503 msg));
1504 default:
1505 return (zpool_standard_error_fmt(zhp->zpool_hdl, errno,
1506 msg));
1507 }
1508 }
34dc7c2f 1509
34dc7c2f
BB
1510 return (0);
1511}
1512
fb5f0bc8 1513int
6f1ffb06 1514zpool_export(zpool_handle_t *zhp, boolean_t force, const char *log_str)
fb5f0bc8 1515{
6f1ffb06 1516 return (zpool_export_common(zhp, force, B_FALSE, log_str));
fb5f0bc8
BB
1517}
1518
1519int
6f1ffb06 1520zpool_export_force(zpool_handle_t *zhp, const char *log_str)
fb5f0bc8 1521{
6f1ffb06 1522 return (zpool_export_common(zhp, B_TRUE, B_TRUE, log_str));
fb5f0bc8
BB
1523}
1524
428870ff
BB
1525static void
1526zpool_rewind_exclaim(libzfs_handle_t *hdl, const char *name, boolean_t dryrun,
572e2857 1527 nvlist_t *config)
428870ff 1528{
572e2857 1529 nvlist_t *nv = NULL;
428870ff
BB
1530 uint64_t rewindto;
1531 int64_t loss = -1;
1532 struct tm t;
1533 char timestr[128];
1534
572e2857
BB
1535 if (!hdl->libzfs_printerr || config == NULL)
1536 return;
1537
9ae529ec
CS
1538 if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 ||
1539 nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_REWIND_INFO, &nv) != 0) {
428870ff 1540 return;
9ae529ec 1541 }
428870ff 1542
572e2857 1543 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
428870ff 1544 return;
572e2857 1545 (void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
428870ff
BB
1546
1547 if (localtime_r((time_t *)&rewindto, &t) != NULL &&
b8864a23 1548 strftime(timestr, 128, "%c", &t) != 0) {
428870ff
BB
1549 if (dryrun) {
1550 (void) printf(dgettext(TEXT_DOMAIN,
1551 "Would be able to return %s "
1552 "to its state as of %s.\n"),
1553 name, timestr);
1554 } else {
1555 (void) printf(dgettext(TEXT_DOMAIN,
1556 "Pool %s returned to its state as of %s.\n"),
1557 name, timestr);
1558 }
1559 if (loss > 120) {
1560 (void) printf(dgettext(TEXT_DOMAIN,
1561 "%s approximately %lld "),
1562 dryrun ? "Would discard" : "Discarded",
b8864a23 1563 ((longlong_t)loss + 30) / 60);
428870ff
BB
1564 (void) printf(dgettext(TEXT_DOMAIN,
1565 "minutes of transactions.\n"));
1566 } else if (loss > 0) {
1567 (void) printf(dgettext(TEXT_DOMAIN,
1568 "%s approximately %lld "),
b8864a23
BB
1569 dryrun ? "Would discard" : "Discarded",
1570 (longlong_t)loss);
428870ff
BB
1571 (void) printf(dgettext(TEXT_DOMAIN,
1572 "seconds of transactions.\n"));
1573 }
1574 }
1575}
1576
1577void
1578zpool_explain_recover(libzfs_handle_t *hdl, const char *name, int reason,
1579 nvlist_t *config)
1580{
572e2857 1581 nvlist_t *nv = NULL;
428870ff
BB
1582 int64_t loss = -1;
1583 uint64_t edata = UINT64_MAX;
1584 uint64_t rewindto;
1585 struct tm t;
1586 char timestr[128];
1587
1588 if (!hdl->libzfs_printerr)
1589 return;
1590
1591 if (reason >= 0)
1592 (void) printf(dgettext(TEXT_DOMAIN, "action: "));
1593 else
1594 (void) printf(dgettext(TEXT_DOMAIN, "\t"));
1595
1596 /* All attempted rewinds failed if ZPOOL_CONFIG_LOAD_TIME missing */
572e2857 1597 if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 ||
9ae529ec 1598 nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_REWIND_INFO, &nv) != 0 ||
572e2857 1599 nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
428870ff
BB
1600 goto no_info;
1601
572e2857
BB
1602 (void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
1603 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_DATA_ERRORS,
428870ff
BB
1604 &edata);
1605
1606 (void) printf(dgettext(TEXT_DOMAIN,
1607 "Recovery is possible, but will result in some data loss.\n"));
1608
1609 if (localtime_r((time_t *)&rewindto, &t) != NULL &&
b8864a23 1610 strftime(timestr, 128, "%c", &t) != 0) {
428870ff
BB
1611 (void) printf(dgettext(TEXT_DOMAIN,
1612 "\tReturning the pool to its state as of %s\n"
1613 "\tshould correct the problem. "),
1614 timestr);
1615 } else {
1616 (void) printf(dgettext(TEXT_DOMAIN,
1617 "\tReverting the pool to an earlier state "
1618 "should correct the problem.\n\t"));
1619 }
1620
1621 if (loss > 120) {
1622 (void) printf(dgettext(TEXT_DOMAIN,
1623 "Approximately %lld minutes of data\n"
b8864a23
BB
1624 "\tmust be discarded, irreversibly. "),
1625 ((longlong_t)loss + 30) / 60);
428870ff
BB
1626 } else if (loss > 0) {
1627 (void) printf(dgettext(TEXT_DOMAIN,
1628 "Approximately %lld seconds of data\n"
b8864a23
BB
1629 "\tmust be discarded, irreversibly. "),
1630 (longlong_t)loss);
428870ff
BB
1631 }
1632 if (edata != 0 && edata != UINT64_MAX) {
1633 if (edata == 1) {
1634 (void) printf(dgettext(TEXT_DOMAIN,
1635 "After rewind, at least\n"
1636 "\tone persistent user-data error will remain. "));
1637 } else {
1638 (void) printf(dgettext(TEXT_DOMAIN,
1639 "After rewind, several\n"
1640 "\tpersistent user-data errors will remain. "));
1641 }
1642 }
1643 (void) printf(dgettext(TEXT_DOMAIN,
1644 "Recovery can be attempted\n\tby executing 'zpool %s -F %s'. "),
1645 reason >= 0 ? "clear" : "import", name);
1646
1647 (void) printf(dgettext(TEXT_DOMAIN,
1648 "A scrub of the pool\n"
1649 "\tis strongly recommended after recovery.\n"));
1650 return;
1651
1652no_info:
1653 (void) printf(dgettext(TEXT_DOMAIN,
1654 "Destroy and re-create the pool from\n\ta backup source.\n"));
1655}
1656
34dc7c2f
BB
1657/*
1658 * zpool_import() is a contracted interface. Should be kept the same
1659 * if possible.
1660 *
1661 * Applications should use zpool_import_props() to import a pool with
1662 * new properties value to be set.
1663 */
1664int
1665zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1666 char *altroot)
1667{
1668 nvlist_t *props = NULL;
1669 int ret;
1670
1671 if (altroot != NULL) {
1672 if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) {
1673 return (zfs_error_fmt(hdl, EZFS_NOMEM,
1674 dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1675 newname));
1676 }
1677
1678 if (nvlist_add_string(props,
fb5f0bc8
BB
1679 zpool_prop_to_name(ZPOOL_PROP_ALTROOT), altroot) != 0 ||
1680 nvlist_add_string(props,
1681 zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), "none") != 0) {
34dc7c2f
BB
1682 nvlist_free(props);
1683 return (zfs_error_fmt(hdl, EZFS_NOMEM,
1684 dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1685 newname));
1686 }
1687 }
1688
572e2857
BB
1689 ret = zpool_import_props(hdl, config, newname, props,
1690 ZFS_IMPORT_NORMAL);
8a5fc748 1691 nvlist_free(props);
34dc7c2f
BB
1692 return (ret);
1693}
1694
572e2857
BB
1695static void
1696print_vdev_tree(libzfs_handle_t *hdl, const char *name, nvlist_t *nv,
1697 int indent)
1698{
1699 nvlist_t **child;
1700 uint_t c, children;
1701 char *vname;
1702 uint64_t is_log = 0;
1703
1704 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG,
1705 &is_log);
1706
1707 if (name != NULL)
1708 (void) printf("\t%*s%s%s\n", indent, "", name,
1709 is_log ? " [log]" : "");
1710
1711 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
1712 &child, &children) != 0)
1713 return;
1714
1715 for (c = 0; c < children; c++) {
d2f3e292 1716 vname = zpool_vdev_name(hdl, NULL, child[c], VDEV_NAME_TYPE_ID);
572e2857
BB
1717 print_vdev_tree(hdl, vname, child[c], indent + 2);
1718 free(vname);
1719 }
1720}
1721
9ae529ec
CS
1722void
1723zpool_print_unsup_feat(nvlist_t *config)
1724{
1725 nvlist_t *nvinfo, *unsup_feat;
1726 nvpair_t *nvp;
1727
1728 verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nvinfo) ==
1729 0);
1730 verify(nvlist_lookup_nvlist(nvinfo, ZPOOL_CONFIG_UNSUP_FEAT,
1731 &unsup_feat) == 0);
1732
1733 for (nvp = nvlist_next_nvpair(unsup_feat, NULL); nvp != NULL;
1734 nvp = nvlist_next_nvpair(unsup_feat, nvp)) {
1735 char *desc;
1736
1737 verify(nvpair_type(nvp) == DATA_TYPE_STRING);
1738 verify(nvpair_value_string(nvp, &desc) == 0);
1739
1740 if (strlen(desc) > 0)
1741 (void) printf("\t%s (%s)\n", nvpair_name(nvp), desc);
1742 else
1743 (void) printf("\t%s\n", nvpair_name(nvp));
1744 }
1745}
1746
34dc7c2f
BB
1747/*
1748 * Import the given pool using the known configuration and a list of
1749 * properties to be set. The configuration should have come from
1750 * zpool_find_import(). The 'newname' parameters control whether the pool
1751 * is imported with a different name.
1752 */
1753int
1754zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
572e2857 1755 nvlist_t *props, int flags)
34dc7c2f 1756{
13fe0198 1757 zfs_cmd_t zc = {"\0"};
428870ff 1758 zpool_rewind_policy_t policy;
572e2857
BB
1759 nvlist_t *nv = NULL;
1760 nvlist_t *nvinfo = NULL;
1761 nvlist_t *missing = NULL;
34dc7c2f
BB
1762 char *thename;
1763 char *origname;
1764 int ret;
572e2857 1765 int error = 0;
34dc7c2f
BB
1766 char errbuf[1024];
1767
1768 verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
1769 &origname) == 0);
1770
1771 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1772 "cannot import pool '%s'"), origname);
1773
1774 if (newname != NULL) {
1775 if (!zpool_name_valid(hdl, B_FALSE, newname))
1776 return (zfs_error_fmt(hdl, EZFS_INVALIDNAME,
1777 dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1778 newname));
1779 thename = (char *)newname;
1780 } else {
1781 thename = origname;
1782 }
1783
0fdd8d64 1784 if (props != NULL) {
34dc7c2f 1785 uint64_t version;
572e2857 1786 prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
34dc7c2f
BB
1787
1788 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
1789 &version) == 0);
1790
b128c09f 1791 if ((props = zpool_valid_proplist(hdl, origname,
0fdd8d64 1792 props, version, flags, errbuf)) == NULL)
34dc7c2f 1793 return (-1);
0fdd8d64 1794 if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
34dc7c2f
BB
1795 nvlist_free(props);
1796 return (-1);
1797 }
0fdd8d64 1798 nvlist_free(props);
34dc7c2f
BB
1799 }
1800
1801 (void) strlcpy(zc.zc_name, thename, sizeof (zc.zc_name));
1802
1803 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
1804 &zc.zc_guid) == 0);
1805
1806 if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0) {
0fdd8d64 1807 zcmd_free_nvlists(&zc);
34dc7c2f
BB
1808 return (-1);
1809 }
572e2857 1810 if (zcmd_alloc_dst_nvlist(hdl, &zc, zc.zc_nvlist_conf_size * 2) != 0) {
0fdd8d64 1811 zcmd_free_nvlists(&zc);
428870ff
BB
1812 return (-1);
1813 }
34dc7c2f 1814
572e2857
BB
1815 zc.zc_cookie = flags;
1816 while ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_IMPORT, &zc)) != 0 &&
1817 errno == ENOMEM) {
1818 if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
1819 zcmd_free_nvlists(&zc);
1820 return (-1);
1821 }
1822 }
1823 if (ret != 0)
1824 error = errno;
1825
1826 (void) zcmd_read_dst_nvlist(hdl, &zc, &nv);
0fdd8d64
MT
1827
1828 zcmd_free_nvlists(&zc);
1829
572e2857
BB
1830 zpool_get_rewind_policy(config, &policy);
1831
1832 if (error) {
34dc7c2f 1833 char desc[1024];
428870ff 1834
428870ff
BB
1835 /*
1836 * Dry-run failed, but we print out what success
1837 * looks like if we found a best txg
1838 */
572e2857 1839 if (policy.zrp_request & ZPOOL_TRY_REWIND) {
428870ff 1840 zpool_rewind_exclaim(hdl, newname ? origname : thename,
572e2857
BB
1841 B_TRUE, nv);
1842 nvlist_free(nv);
428870ff
BB
1843 return (-1);
1844 }
1845
34dc7c2f
BB
1846 if (newname == NULL)
1847 (void) snprintf(desc, sizeof (desc),
1848 dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1849 thename);
1850 else
1851 (void) snprintf(desc, sizeof (desc),
1852 dgettext(TEXT_DOMAIN, "cannot import '%s' as '%s'"),
1853 origname, thename);
1854
572e2857 1855 switch (error) {
34dc7c2f 1856 case ENOTSUP:
9ae529ec
CS
1857 if (nv != NULL && nvlist_lookup_nvlist(nv,
1858 ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 &&
1859 nvlist_exists(nvinfo, ZPOOL_CONFIG_UNSUP_FEAT)) {
1860 (void) printf(dgettext(TEXT_DOMAIN, "This "
1861 "pool uses the following feature(s) not "
1862 "supported by this system:\n"));
1863 zpool_print_unsup_feat(nv);
1864 if (nvlist_exists(nvinfo,
1865 ZPOOL_CONFIG_CAN_RDONLY)) {
1866 (void) printf(dgettext(TEXT_DOMAIN,
1867 "All unsupported features are only "
1868 "required for writing to the pool."
1869 "\nThe pool can be imported using "
1870 "'-o readonly=on'.\n"));
1871 }
1872 }
34dc7c2f
BB
1873 /*
1874 * Unsupported version.
1875 */
1876 (void) zfs_error(hdl, EZFS_BADVERSION, desc);
1877 break;
1878
1879 case EINVAL:
1880 (void) zfs_error(hdl, EZFS_INVALCONFIG, desc);
1881 break;
1882
428870ff
BB
1883 case EROFS:
1884 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1885 "one or more devices is read only"));
1886 (void) zfs_error(hdl, EZFS_BADDEV, desc);
1887 break;
1888
572e2857
BB
1889 case ENXIO:
1890 if (nv && nvlist_lookup_nvlist(nv,
1891 ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 &&
1892 nvlist_lookup_nvlist(nvinfo,
1893 ZPOOL_CONFIG_MISSING_DEVICES, &missing) == 0) {
1894 (void) printf(dgettext(TEXT_DOMAIN,
1895 "The devices below are missing, use "
1896 "'-m' to import the pool anyway:\n"));
1897 print_vdev_tree(hdl, NULL, missing, 2);
1898 (void) printf("\n");
1899 }
1900 (void) zpool_standard_error(hdl, error, desc);
1901 break;
1902
1903 case EEXIST:
1904 (void) zpool_standard_error(hdl, error, desc);
1905 break;
1906
abe5b8fb
BB
1907 case EBUSY:
1908 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1909 "one or more devices are already in use\n"));
1910 (void) zfs_error(hdl, EZFS_BADDEV, desc);
1911 break;
d1d19c78
PD
1912 case ENAMETOOLONG:
1913 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1914 "new name of at least one dataset is longer than "
1915 "the maximum allowable length"));
1916 (void) zfs_error(hdl, EZFS_NAMETOOLONG, desc);
1917 break;
34dc7c2f 1918 default:
572e2857 1919 (void) zpool_standard_error(hdl, error, desc);
428870ff 1920 zpool_explain_recover(hdl,
572e2857 1921 newname ? origname : thename, -error, nv);
428870ff 1922 break;
34dc7c2f
BB
1923 }
1924
572e2857 1925 nvlist_free(nv);
34dc7c2f
BB
1926 ret = -1;
1927 } else {
1928 zpool_handle_t *zhp;
1929
1930 /*
1931 * This should never fail, but play it safe anyway.
1932 */
428870ff 1933 if (zpool_open_silent(hdl, thename, &zhp) != 0)
34dc7c2f 1934 ret = -1;
428870ff 1935 else if (zhp != NULL)
34dc7c2f 1936 zpool_close(zhp);
428870ff
BB
1937 if (policy.zrp_request &
1938 (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
1939 zpool_rewind_exclaim(hdl, newname ? origname : thename,
572e2857 1940 ((policy.zrp_request & ZPOOL_TRY_REWIND) != 0), nv);
34dc7c2f 1941 }
572e2857 1942 nvlist_free(nv);
428870ff 1943 return (0);
34dc7c2f
BB
1944 }
1945
34dc7c2f
BB
1946 return (ret);
1947}
1948
1949/*
428870ff 1950 * Scan the pool.
34dc7c2f
BB
1951 */
1952int
428870ff 1953zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func)
34dc7c2f 1954{
13fe0198 1955 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
1956 char msg[1024];
1957 libzfs_handle_t *hdl = zhp->zpool_hdl;
1958
1959 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
428870ff 1960 zc.zc_cookie = func;
34dc7c2f 1961
572e2857 1962 if (zfs_ioctl(hdl, ZFS_IOC_POOL_SCAN, &zc) == 0 ||
428870ff 1963 (errno == ENOENT && func != POOL_SCAN_NONE))
34dc7c2f
BB
1964 return (0);
1965
428870ff
BB
1966 if (func == POOL_SCAN_SCRUB) {
1967 (void) snprintf(msg, sizeof (msg),
1968 dgettext(TEXT_DOMAIN, "cannot scrub %s"), zc.zc_name);
1969 } else if (func == POOL_SCAN_NONE) {
1970 (void) snprintf(msg, sizeof (msg),
1971 dgettext(TEXT_DOMAIN, "cannot cancel scrubbing %s"),
1972 zc.zc_name);
1973 } else {
1974 assert(!"unexpected result");
1975 }
34dc7c2f 1976
428870ff
BB
1977 if (errno == EBUSY) {
1978 nvlist_t *nvroot;
1979 pool_scan_stat_t *ps = NULL;
1980 uint_t psc;
1981
1982 verify(nvlist_lookup_nvlist(zhp->zpool_config,
1983 ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
1984 (void) nvlist_lookup_uint64_array(nvroot,
1985 ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &psc);
1986 if (ps && ps->pss_func == POOL_SCAN_SCRUB)
1987 return (zfs_error(hdl, EZFS_SCRUBBING, msg));
1988 else
1989 return (zfs_error(hdl, EZFS_RESILVERING, msg));
1990 } else if (errno == ENOENT) {
1991 return (zfs_error(hdl, EZFS_NO_SCRUB, msg));
1992 } else {
34dc7c2f 1993 return (zpool_standard_error(hdl, errno, msg));
428870ff
BB
1994 }
1995}
1996
34dc7c2f 1997/*
9babb374
BB
1998 * Find a vdev that matches the search criteria specified. We use the
1999 * the nvpair name to determine how we should look for the device.
34dc7c2f
BB
2000 * 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL
2001 * spare; but FALSE if its an INUSE spare.
2002 */
2003static nvlist_t *
9babb374
BB
2004vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare,
2005 boolean_t *l2cache, boolean_t *log)
34dc7c2f
BB
2006{
2007 uint_t c, children;
2008 nvlist_t **child;
34dc7c2f 2009 nvlist_t *ret;
b128c09f 2010 uint64_t is_log;
9babb374
BB
2011 char *srchkey;
2012 nvpair_t *pair = nvlist_next_nvpair(search, NULL);
2013
2014 /* Nothing to look for */
2015 if (search == NULL || pair == NULL)
2016 return (NULL);
2017
2018 /* Obtain the key we will use to search */
2019 srchkey = nvpair_name(pair);
2020
2021 switch (nvpair_type(pair)) {
572e2857 2022 case DATA_TYPE_UINT64:
9babb374 2023 if (strcmp(srchkey, ZPOOL_CONFIG_GUID) == 0) {
572e2857
BB
2024 uint64_t srchval, theguid;
2025
2026 verify(nvpair_value_uint64(pair, &srchval) == 0);
2027 verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
2028 &theguid) == 0);
2029 if (theguid == srchval)
2030 return (nv);
9babb374
BB
2031 }
2032 break;
9babb374
BB
2033
2034 case DATA_TYPE_STRING: {
2035 char *srchval, *val;
2036
2037 verify(nvpair_value_string(pair, &srchval) == 0);
2038 if (nvlist_lookup_string(nv, srchkey, &val) != 0)
2039 break;
34dc7c2f 2040
9babb374 2041 /*
428870ff
BB
2042 * Search for the requested value. Special cases:
2043 *
eac47204
BB
2044 * - ZPOOL_CONFIG_PATH for whole disk entries. These end in
2045 * "-part1", or "p1". The suffix is hidden from the user,
2046 * but included in the string, so this matches around it.
2047 * - ZPOOL_CONFIG_PATH for short names zfs_strcmp_shortname()
2048 * is used to check all possible expanded paths.
428870ff
BB
2049 * - looking for a top-level vdev name (i.e. ZPOOL_CONFIG_TYPE).
2050 *
2051 * Otherwise, all other searches are simple string compares.
9babb374 2052 */
a2c6816c 2053 if (strcmp(srchkey, ZPOOL_CONFIG_PATH) == 0) {
9babb374
BB
2054 uint64_t wholedisk = 0;
2055
2056 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
2057 &wholedisk);
eac47204
BB
2058 if (zfs_strcmp_pathname(srchval, val, wholedisk) == 0)
2059 return (nv);
428870ff 2060
428870ff
BB
2061 } else if (strcmp(srchkey, ZPOOL_CONFIG_TYPE) == 0 && val) {
2062 char *type, *idx, *end, *p;
2063 uint64_t id, vdev_id;
2064
2065 /*
2066 * Determine our vdev type, keeping in mind
2067 * that the srchval is composed of a type and
2068 * vdev id pair (i.e. mirror-4).
2069 */
2070 if ((type = strdup(srchval)) == NULL)
2071 return (NULL);
2072
2073 if ((p = strrchr(type, '-')) == NULL) {
2074 free(type);
2075 break;
2076 }
2077 idx = p + 1;
2078 *p = '\0';
2079
2080 /*
2081 * If the types don't match then keep looking.
2082 */
2083 if (strncmp(val, type, strlen(val)) != 0) {
2084 free(type);
2085 break;
2086 }
2087
2088 verify(strncmp(type, VDEV_TYPE_RAIDZ,
2089 strlen(VDEV_TYPE_RAIDZ)) == 0 ||
2090 strncmp(type, VDEV_TYPE_MIRROR,
2091 strlen(VDEV_TYPE_MIRROR)) == 0);
2092 verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
2093 &id) == 0);
2094
2095 errno = 0;
2096 vdev_id = strtoull(idx, &end, 10);
2097
2098 free(type);
2099 if (errno != 0)
2100 return (NULL);
2101
2102 /*
2103 * Now verify that we have the correct vdev id.
2104 */
2105 if (vdev_id == id)
2106 return (nv);
9babb374 2107 }
34dc7c2f 2108
34dc7c2f 2109 /*
9babb374 2110 * Common case
34dc7c2f 2111 */
9babb374 2112 if (strcmp(srchval, val) == 0)
34dc7c2f 2113 return (nv);
9babb374
BB
2114 break;
2115 }
2116
2117 default:
2118 break;
34dc7c2f
BB
2119 }
2120
2121 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
2122 &child, &children) != 0)
2123 return (NULL);
2124
b128c09f 2125 for (c = 0; c < children; c++) {
9babb374 2126 if ((ret = vdev_to_nvlist_iter(child[c], search,
b128c09f
BB
2127 avail_spare, l2cache, NULL)) != NULL) {
2128 /*
2129 * The 'is_log' value is only set for the toplevel
2130 * vdev, not the leaf vdevs. So we always lookup the
2131 * log device from the root of the vdev tree (where
2132 * 'log' is non-NULL).
2133 */
2134 if (log != NULL &&
2135 nvlist_lookup_uint64(child[c],
2136 ZPOOL_CONFIG_IS_LOG, &is_log) == 0 &&
2137 is_log) {
2138 *log = B_TRUE;
2139 }
34dc7c2f 2140 return (ret);
b128c09f
BB
2141 }
2142 }
34dc7c2f
BB
2143
2144 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
2145 &child, &children) == 0) {
2146 for (c = 0; c < children; c++) {
9babb374 2147 if ((ret = vdev_to_nvlist_iter(child[c], search,
b128c09f 2148 avail_spare, l2cache, NULL)) != NULL) {
34dc7c2f
BB
2149 *avail_spare = B_TRUE;
2150 return (ret);
2151 }
2152 }
2153 }
2154
2155 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
2156 &child, &children) == 0) {
2157 for (c = 0; c < children; c++) {
9babb374 2158 if ((ret = vdev_to_nvlist_iter(child[c], search,
b128c09f 2159 avail_spare, l2cache, NULL)) != NULL) {
34dc7c2f
BB
2160 *l2cache = B_TRUE;
2161 return (ret);
2162 }
2163 }
2164 }
2165
2166 return (NULL);
2167}
2168
9babb374
BB
2169/*
2170 * Given a physical path (minus the "/devices" prefix), find the
2171 * associated vdev.
2172 */
2173nvlist_t *
2174zpool_find_vdev_by_physpath(zpool_handle_t *zhp, const char *ppath,
2175 boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log)
2176{
2177 nvlist_t *search, *nvroot, *ret;
2178
2179 verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2180 verify(nvlist_add_string(search, ZPOOL_CONFIG_PHYS_PATH, ppath) == 0);
2181
2182 verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
2183 &nvroot) == 0);
2184
2185 *avail_spare = B_FALSE;
572e2857
BB
2186 *l2cache = B_FALSE;
2187 if (log != NULL)
2188 *log = B_FALSE;
9babb374
BB
2189 ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
2190 nvlist_free(search);
2191
2192 return (ret);
2193}
2194
428870ff
BB
2195/*
2196 * Determine if we have an "interior" top-level vdev (i.e mirror/raidz).
2197 */
2198boolean_t
2199zpool_vdev_is_interior(const char *name)
2200{
2201 if (strncmp(name, VDEV_TYPE_RAIDZ, strlen(VDEV_TYPE_RAIDZ)) == 0 ||
2202 strncmp(name, VDEV_TYPE_MIRROR, strlen(VDEV_TYPE_MIRROR)) == 0)
2203 return (B_TRUE);
2204 return (B_FALSE);
2205}
2206
34dc7c2f
BB
2207nvlist_t *
2208zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
b128c09f 2209 boolean_t *l2cache, boolean_t *log)
34dc7c2f 2210{
34dc7c2f 2211 char *end;
9babb374 2212 nvlist_t *nvroot, *search, *ret;
34dc7c2f
BB
2213 uint64_t guid;
2214
9babb374
BB
2215 verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2216
1a5c611a 2217 guid = strtoull(path, &end, 0);
34dc7c2f 2218 if (guid != 0 && *end == '\0') {
9babb374 2219 verify(nvlist_add_uint64(search, ZPOOL_CONFIG_GUID, guid) == 0);
428870ff
BB
2220 } else if (zpool_vdev_is_interior(path)) {
2221 verify(nvlist_add_string(search, ZPOOL_CONFIG_TYPE, path) == 0);
34dc7c2f 2222 } else {
9babb374 2223 verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, path) == 0);
34dc7c2f
BB
2224 }
2225
2226 verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
2227 &nvroot) == 0);
2228
2229 *avail_spare = B_FALSE;
2230 *l2cache = B_FALSE;
b128c09f
BB
2231 if (log != NULL)
2232 *log = B_FALSE;
9babb374
BB
2233 ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
2234 nvlist_free(search);
2235
2236 return (ret);
b128c09f
BB
2237}
2238
2239static int
2240vdev_online(nvlist_t *nv)
2241{
2242 uint64_t ival;
2243
2244 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, &ival) == 0 ||
2245 nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED, &ival) == 0 ||
2246 nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVED, &ival) == 0)
2247 return (0);
2248
2249 return (1);
2250}
2251
2252/*
9babb374 2253 * Helper function for zpool_get_physpaths().
b128c09f 2254 */
9babb374
BB
2255static int
2256vdev_get_one_physpath(nvlist_t *config, char *physpath, size_t physpath_size,
2257 size_t *bytes_written)
2258{
2259 size_t bytes_left, pos, rsz;
2260 char *tmppath;
2261 const char *format;
2262
2263 if (nvlist_lookup_string(config, ZPOOL_CONFIG_PHYS_PATH,
2264 &tmppath) != 0)
2265 return (EZFS_NODEVICE);
2266
2267 pos = *bytes_written;
2268 bytes_left = physpath_size - pos;
2269 format = (pos == 0) ? "%s" : " %s";
2270
2271 rsz = snprintf(physpath + pos, bytes_left, format, tmppath);
2272 *bytes_written += rsz;
2273
2274 if (rsz >= bytes_left) {
2275 /* if physpath was not copied properly, clear it */
2276 if (bytes_left != 0) {
2277 physpath[pos] = 0;
2278 }
2279 return (EZFS_NOSPC);
2280 }
2281 return (0);
2282}
2283
2284static int
2285vdev_get_physpaths(nvlist_t *nv, char *physpath, size_t phypath_size,
2286 size_t *rsz, boolean_t is_spare)
2287{
2288 char *type;
2289 int ret;
2290
2291 if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0)
2292 return (EZFS_INVALCONFIG);
2293
2294 if (strcmp(type, VDEV_TYPE_DISK) == 0) {
2295 /*
2296 * An active spare device has ZPOOL_CONFIG_IS_SPARE set.
2297 * For a spare vdev, we only want to boot from the active
2298 * spare device.
2299 */
2300 if (is_spare) {
2301 uint64_t spare = 0;
2302 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_SPARE,
2303 &spare);
2304 if (!spare)
2305 return (EZFS_INVALCONFIG);
2306 }
2307
2308 if (vdev_online(nv)) {
2309 if ((ret = vdev_get_one_physpath(nv, physpath,
2310 phypath_size, rsz)) != 0)
2311 return (ret);
2312 }
2313 } else if (strcmp(type, VDEV_TYPE_MIRROR) == 0 ||
0a3d2673 2314 strcmp(type, VDEV_TYPE_RAIDZ) == 0 ||
9babb374
BB
2315 strcmp(type, VDEV_TYPE_REPLACING) == 0 ||
2316 (is_spare = (strcmp(type, VDEV_TYPE_SPARE) == 0))) {
2317 nvlist_t **child;
2318 uint_t count;
2319 int i, ret;
2320
2321 if (nvlist_lookup_nvlist_array(nv,
2322 ZPOOL_CONFIG_CHILDREN, &child, &count) != 0)
2323 return (EZFS_INVALCONFIG);
2324
2325 for (i = 0; i < count; i++) {
2326 ret = vdev_get_physpaths(child[i], physpath,
2327 phypath_size, rsz, is_spare);
2328 if (ret == EZFS_NOSPC)
2329 return (ret);
2330 }
2331 }
2332
2333 return (EZFS_POOL_INVALARG);
2334}
2335
2336/*
2337 * Get phys_path for a root pool config.
2338 * Return 0 on success; non-zero on failure.
2339 */
2340static int
2341zpool_get_config_physpath(nvlist_t *config, char *physpath, size_t phypath_size)
b128c09f 2342{
9babb374 2343 size_t rsz;
b128c09f
BB
2344 nvlist_t *vdev_root;
2345 nvlist_t **child;
2346 uint_t count;
9babb374 2347 char *type;
b128c09f 2348
9babb374 2349 rsz = 0;
b128c09f 2350
9babb374
BB
2351 if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
2352 &vdev_root) != 0)
2353 return (EZFS_INVALCONFIG);
b128c09f 2354
9babb374
BB
2355 if (nvlist_lookup_string(vdev_root, ZPOOL_CONFIG_TYPE, &type) != 0 ||
2356 nvlist_lookup_nvlist_array(vdev_root, ZPOOL_CONFIG_CHILDREN,
b128c09f 2357 &child, &count) != 0)
9babb374 2358 return (EZFS_INVALCONFIG);
b128c09f 2359
c372b36e 2360#if defined(__sun__) || defined(__sun)
9babb374
BB
2361 /*
2362 * root pool can not have EFI labeled disks and can only have
2363 * a single top-level vdev.
2364 */
2365 if (strcmp(type, VDEV_TYPE_ROOT) != 0 || count != 1 ||
2366 pool_uses_efi(vdev_root))
2367 return (EZFS_POOL_INVALARG);
c372b36e 2368#endif
b128c09f 2369
9babb374
BB
2370 (void) vdev_get_physpaths(child[0], physpath, phypath_size, &rsz,
2371 B_FALSE);
2372
2373 /* No online devices */
2374 if (rsz == 0)
2375 return (EZFS_NODEVICE);
b128c09f
BB
2376
2377 return (0);
34dc7c2f
BB
2378}
2379
9babb374
BB
2380/*
2381 * Get phys_path for a root pool
2382 * Return 0 on success; non-zero on failure.
2383 */
2384int
2385zpool_get_physpath(zpool_handle_t *zhp, char *physpath, size_t phypath_size)
2386{
2387 return (zpool_get_config_physpath(zhp->zpool_config, physpath,
2388 phypath_size));
2389}
2390
9babb374
BB
2391/*
2392 * If the device has being dynamically expanded then we need to relabel
2393 * the disk to use the new unallocated space.
2394 */
2395static int
8adf4864 2396zpool_relabel_disk(libzfs_handle_t *hdl, const char *path, const char *msg)
9babb374 2397{
9babb374 2398 int fd, error;
9babb374 2399
d603ed6c 2400 if ((fd = open(path, O_RDWR|O_DIRECT)) < 0) {
9babb374 2401 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
109491a8 2402 "relabel '%s': unable to open device: %d"), path, errno);
8adf4864 2403 return (zfs_error(hdl, EZFS_OPENFAILED, msg));
9babb374
BB
2404 }
2405
2406 /*
2407 * It's possible that we might encounter an error if the device
2408 * does not have any unallocated space left. If so, we simply
2409 * ignore that error and continue on.
b5a28807
ED
2410 *
2411 * Also, we don't call efi_rescan() - that would just return EBUSY.
2412 * The module will do it for us in vdev_disk_open().
9babb374 2413 */
d603ed6c 2414 error = efi_use_whole_disk(fd);
9babb374
BB
2415 (void) close(fd);
2416 if (error && error != VT_ENOSPC) {
2417 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
d603ed6c 2418 "relabel '%s': unable to read disk capacity"), path);
8adf4864 2419 return (zfs_error(hdl, EZFS_NOCAP, msg));
9babb374
BB
2420 }
2421 return (0);
2422}
2423
34dc7c2f
BB
2424/*
2425 * Bring the specified vdev online. The 'flags' parameter is a set of the
2426 * ZFS_ONLINE_* flags.
2427 */
2428int
2429zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
2430 vdev_state_t *newstate)
2431{
13fe0198 2432 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
2433 char msg[1024];
2434 nvlist_t *tgt;
9babb374 2435 boolean_t avail_spare, l2cache, islog;
34dc7c2f 2436 libzfs_handle_t *hdl = zhp->zpool_hdl;
8adf4864 2437 int error;
34dc7c2f 2438
9babb374
BB
2439 if (flags & ZFS_ONLINE_EXPAND) {
2440 (void) snprintf(msg, sizeof (msg),
2441 dgettext(TEXT_DOMAIN, "cannot expand %s"), path);
2442 } else {
2443 (void) snprintf(msg, sizeof (msg),
2444 dgettext(TEXT_DOMAIN, "cannot online %s"), path);
2445 }
34dc7c2f
BB
2446
2447 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
b128c09f 2448 if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
9babb374 2449 &islog)) == NULL)
34dc7c2f
BB
2450 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2451
2452 verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2453
428870ff 2454 if (avail_spare)
34dc7c2f
BB
2455 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2456
9babb374
BB
2457 if (flags & ZFS_ONLINE_EXPAND ||
2458 zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) {
9babb374
BB
2459 uint64_t wholedisk = 0;
2460
2461 (void) nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK,
2462 &wholedisk);
9babb374
BB
2463
2464 /*
2465 * XXX - L2ARC 1.0 devices can't support expansion.
2466 */
2467 if (l2cache) {
2468 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2469 "cannot expand cache devices"));
2470 return (zfs_error(hdl, EZFS_VDEVNOTSUP, msg));
2471 }
2472
2473 if (wholedisk) {
7608bd0d
ED
2474 const char *fullpath = path;
2475 char buf[MAXPATHLEN];
2476
2477 if (path[0] != '/') {
2478 error = zfs_resolve_shortname(path, buf,
d1d7e268 2479 sizeof (buf));
7608bd0d
ED
2480 if (error != 0)
2481 return (zfs_error(hdl, EZFS_NODEVICE,
2482 msg));
2483
2484 fullpath = buf;
2485 }
2486
2487 error = zpool_relabel_disk(hdl, fullpath, msg);
8adf4864
ED
2488 if (error != 0)
2489 return (error);
9babb374
BB
2490 }
2491 }
2492
34dc7c2f
BB
2493 zc.zc_cookie = VDEV_STATE_ONLINE;
2494 zc.zc_obj = flags;
2495
572e2857 2496 if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0) {
428870ff
BB
2497 if (errno == EINVAL) {
2498 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "was split "
2499 "from this pool into a new one. Use '%s' "
2500 "instead"), "zpool detach");
2501 return (zfs_error(hdl, EZFS_POSTSPLIT_ONLINE, msg));
2502 }
34dc7c2f 2503 return (zpool_standard_error(hdl, errno, msg));
428870ff 2504 }
34dc7c2f
BB
2505
2506 *newstate = zc.zc_cookie;
2507 return (0);
2508}
2509
2510/*
2511 * Take the specified vdev offline
2512 */
2513int
2514zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
2515{
13fe0198 2516 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
2517 char msg[1024];
2518 nvlist_t *tgt;
2519 boolean_t avail_spare, l2cache;
2520 libzfs_handle_t *hdl = zhp->zpool_hdl;
2521
2522 (void) snprintf(msg, sizeof (msg),
2523 dgettext(TEXT_DOMAIN, "cannot offline %s"), path);
2524
2525 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
b128c09f
BB
2526 if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2527 NULL)) == NULL)
34dc7c2f
BB
2528 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2529
2530 verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2531
428870ff 2532 if (avail_spare)
34dc7c2f
BB
2533 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2534
34dc7c2f
BB
2535 zc.zc_cookie = VDEV_STATE_OFFLINE;
2536 zc.zc_obj = istmp ? ZFS_OFFLINE_TEMPORARY : 0;
2537
572e2857 2538 if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
34dc7c2f
BB
2539 return (0);
2540
2541 switch (errno) {
2542 case EBUSY:
2543
2544 /*
2545 * There are no other replicas of this device.
2546 */
2547 return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
2548
9babb374
BB
2549 case EEXIST:
2550 /*
2551 * The log device has unplayed logs
2552 */
2553 return (zfs_error(hdl, EZFS_UNPLAYED_LOGS, msg));
2554
34dc7c2f
BB
2555 default:
2556 return (zpool_standard_error(hdl, errno, msg));
2557 }
2558}
2559
2560/*
2561 * Mark the given vdev faulted.
2562 */
2563int
428870ff 2564zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
34dc7c2f 2565{
13fe0198 2566 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
2567 char msg[1024];
2568 libzfs_handle_t *hdl = zhp->zpool_hdl;
2569
2570 (void) snprintf(msg, sizeof (msg),
d1d7e268 2571 dgettext(TEXT_DOMAIN, "cannot fault %llu"), (u_longlong_t)guid);
34dc7c2f
BB
2572
2573 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2574 zc.zc_guid = guid;
2575 zc.zc_cookie = VDEV_STATE_FAULTED;
428870ff 2576 zc.zc_obj = aux;
34dc7c2f 2577
572e2857 2578 if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
34dc7c2f
BB
2579 return (0);
2580
2581 switch (errno) {
2582 case EBUSY:
2583
2584 /*
2585 * There are no other replicas of this device.
2586 */
2587 return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
2588
2589 default:
2590 return (zpool_standard_error(hdl, errno, msg));
2591 }
2592
2593}
2594
2595/*
2596 * Mark the given vdev degraded.
2597 */
2598int
428870ff 2599zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
34dc7c2f 2600{
13fe0198 2601 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
2602 char msg[1024];
2603 libzfs_handle_t *hdl = zhp->zpool_hdl;
2604
2605 (void) snprintf(msg, sizeof (msg),
d1d7e268 2606 dgettext(TEXT_DOMAIN, "cannot degrade %llu"), (u_longlong_t)guid);
34dc7c2f
BB
2607
2608 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2609 zc.zc_guid = guid;
2610 zc.zc_cookie = VDEV_STATE_DEGRADED;
428870ff 2611 zc.zc_obj = aux;
34dc7c2f 2612
572e2857 2613 if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
34dc7c2f
BB
2614 return (0);
2615
2616 return (zpool_standard_error(hdl, errno, msg));
2617}
2618
2619/*
2620 * Returns TRUE if the given nvlist is a vdev that was originally swapped in as
2621 * a hot spare.
2622 */
2623static boolean_t
2624is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which)
2625{
2626 nvlist_t **child;
2627 uint_t c, children;
2628 char *type;
2629
2630 if (nvlist_lookup_nvlist_array(search, ZPOOL_CONFIG_CHILDREN, &child,
2631 &children) == 0) {
2632 verify(nvlist_lookup_string(search, ZPOOL_CONFIG_TYPE,
2633 &type) == 0);
2634
2635 if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
2636 children == 2 && child[which] == tgt)
2637 return (B_TRUE);
2638
2639 for (c = 0; c < children; c++)
2640 if (is_replacing_spare(child[c], tgt, which))
2641 return (B_TRUE);
2642 }
2643
2644 return (B_FALSE);
2645}
2646
2647/*
2648 * Attach new_disk (fully described by nvroot) to old_disk.
2649 * If 'replacing' is specified, the new disk will replace the old one.
2650 */
2651int
2652zpool_vdev_attach(zpool_handle_t *zhp,
2653 const char *old_disk, const char *new_disk, nvlist_t *nvroot, int replacing)
2654{
13fe0198 2655 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
2656 char msg[1024];
2657 int ret;
2658 nvlist_t *tgt;
b128c09f
BB
2659 boolean_t avail_spare, l2cache, islog;
2660 uint64_t val;
572e2857 2661 char *newname;
34dc7c2f
BB
2662 nvlist_t **child;
2663 uint_t children;
2664 nvlist_t *config_root;
2665 libzfs_handle_t *hdl = zhp->zpool_hdl;
1bd201e7 2666 boolean_t rootpool = zpool_is_bootable(zhp);
34dc7c2f
BB
2667
2668 if (replacing)
2669 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2670 "cannot replace %s with %s"), old_disk, new_disk);
2671 else
2672 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2673 "cannot attach %s to %s"), new_disk, old_disk);
2674
c372b36e 2675#if defined(__sun__) || defined(__sun)
b128c09f
BB
2676 /*
2677 * If this is a root pool, make sure that we're not attaching an
2678 * EFI labeled device.
2679 */
2680 if (rootpool && pool_uses_efi(nvroot)) {
2681 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2682 "EFI labeled devices are not supported on root pools."));
2683 return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg));
2684 }
c372b36e 2685#endif
b128c09f 2686
34dc7c2f 2687 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
b128c09f
BB
2688 if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare, &l2cache,
2689 &islog)) == 0)
34dc7c2f
BB
2690 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2691
2692 if (avail_spare)
2693 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2694
2695 if (l2cache)
2696 return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
2697
2698 verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2699 zc.zc_cookie = replacing;
2700
2701 if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
2702 &child, &children) != 0 || children != 1) {
2703 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2704 "new device must be a single disk"));
2705 return (zfs_error(hdl, EZFS_INVALCONFIG, msg));
2706 }
2707
2708 verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
2709 ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0);
2710
d2f3e292 2711 if ((newname = zpool_vdev_name(NULL, NULL, child[0], 0)) == NULL)
b128c09f
BB
2712 return (-1);
2713
34dc7c2f
BB
2714 /*
2715 * If the target is a hot spare that has been swapped in, we can only
2716 * replace it with another hot spare.
2717 */
2718 if (replacing &&
2719 nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_SPARE, &val) == 0 &&
b128c09f
BB
2720 (zpool_find_vdev(zhp, newname, &avail_spare, &l2cache,
2721 NULL) == NULL || !avail_spare) &&
2722 is_replacing_spare(config_root, tgt, 1)) {
34dc7c2f
BB
2723 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2724 "can only be replaced by another hot spare"));
b128c09f 2725 free(newname);
34dc7c2f
BB
2726 return (zfs_error(hdl, EZFS_BADTARGET, msg));
2727 }
2728
b128c09f
BB
2729 free(newname);
2730
34dc7c2f
BB
2731 if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
2732 return (-1);
2733
572e2857 2734 ret = zfs_ioctl(hdl, ZFS_IOC_VDEV_ATTACH, &zc);
34dc7c2f
BB
2735
2736 zcmd_free_nvlists(&zc);
2737
b128c09f
BB
2738 if (ret == 0) {
2739 if (rootpool) {
9babb374
BB
2740 /*
2741 * XXX need a better way to prevent user from
2742 * booting up a half-baked vdev.
2743 */
2744 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Make "
2745 "sure to wait until resilver is done "
2746 "before rebooting.\n"));
b128c09f 2747 }
34dc7c2f 2748 return (0);
b128c09f 2749 }
34dc7c2f
BB
2750
2751 switch (errno) {
2752 case ENOTSUP:
2753 /*
2754 * Can't attach to or replace this type of vdev.
2755 */
2756 if (replacing) {
572e2857
BB
2757 uint64_t version = zpool_get_prop_int(zhp,
2758 ZPOOL_PROP_VERSION, NULL);
2759
b128c09f 2760 if (islog)
34dc7c2f
BB
2761 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2762 "cannot replace a log with a spare"));
572e2857
BB
2763 else if (version >= SPA_VERSION_MULTI_REPLACE)
2764 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2765 "already in replacing/spare config; wait "
2766 "for completion or use 'zpool detach'"));
34dc7c2f
BB
2767 else
2768 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2769 "cannot replace a replacing device"));
2770 } else {
2771 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2772 "can only attach to mirrors and top-level "
2773 "disks"));
2774 }
2775 (void) zfs_error(hdl, EZFS_BADTARGET, msg);
2776 break;
2777
2778 case EINVAL:
2779 /*
2780 * The new device must be a single disk.
2781 */
2782 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2783 "new device must be a single disk"));
2784 (void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
2785 break;
2786
2787 case EBUSY:
2788 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy"),
2789 new_disk);
2790 (void) zfs_error(hdl, EZFS_BADDEV, msg);
2791 break;
2792
2793 case EOVERFLOW:
2794 /*
2795 * The new device is too small.
2796 */
2797 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2798 "device is too small"));
2799 (void) zfs_error(hdl, EZFS_BADDEV, msg);
2800 break;
2801
2802 case EDOM:
2803 /*
d4aae2a0 2804 * The new device has a different optimal sector size.
34dc7c2f
BB
2805 */
2806 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
d4aae2a0
BB
2807 "new device has a different optimal sector size; use the "
2808 "option '-o ashift=N' to override the optimal size"));
34dc7c2f
BB
2809 (void) zfs_error(hdl, EZFS_BADDEV, msg);
2810 break;
2811
2812 case ENAMETOOLONG:
2813 /*
2814 * The resulting top-level vdev spec won't fit in the label.
2815 */
2816 (void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg);
2817 break;
2818
2819 default:
2820 (void) zpool_standard_error(hdl, errno, msg);
2821 }
2822
2823 return (-1);
2824}
2825
2826/*
2827 * Detach the specified device.
2828 */
2829int
2830zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
2831{
13fe0198 2832 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
2833 char msg[1024];
2834 nvlist_t *tgt;
2835 boolean_t avail_spare, l2cache;
2836 libzfs_handle_t *hdl = zhp->zpool_hdl;
2837
2838 (void) snprintf(msg, sizeof (msg),
2839 dgettext(TEXT_DOMAIN, "cannot detach %s"), path);
2840
2841 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
b128c09f
BB
2842 if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2843 NULL)) == 0)
34dc7c2f
BB
2844 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2845
2846 if (avail_spare)
2847 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2848
2849 if (l2cache)
2850 return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
2851
2852 verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2853
2854 if (zfs_ioctl(hdl, ZFS_IOC_VDEV_DETACH, &zc) == 0)
2855 return (0);
2856
2857 switch (errno) {
2858
2859 case ENOTSUP:
2860 /*
2861 * Can't detach from this type of vdev.
2862 */
2863 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only "
2864 "applicable to mirror and replacing vdevs"));
572e2857 2865 (void) zfs_error(hdl, EZFS_BADTARGET, msg);
34dc7c2f
BB
2866 break;
2867
2868 case EBUSY:
2869 /*
2870 * There are no other replicas of this device.
2871 */
2872 (void) zfs_error(hdl, EZFS_NOREPLICAS, msg);
2873 break;
2874
2875 default:
2876 (void) zpool_standard_error(hdl, errno, msg);
2877 }
2878
2879 return (-1);
2880}
2881
428870ff
BB
2882/*
2883 * Find a mirror vdev in the source nvlist.
2884 *
2885 * The mchild array contains a list of disks in one of the top-level mirrors
2886 * of the source pool. The schild array contains a list of disks that the
2887 * user specified on the command line. We loop over the mchild array to
2888 * see if any entry in the schild array matches.
2889 *
2890 * If a disk in the mchild array is found in the schild array, we return
2891 * the index of that entry. Otherwise we return -1.
2892 */
2893static int
2894find_vdev_entry(zpool_handle_t *zhp, nvlist_t **mchild, uint_t mchildren,
2895 nvlist_t **schild, uint_t schildren)
2896{
2897 uint_t mc;
2898
2899 for (mc = 0; mc < mchildren; mc++) {
2900 uint_t sc;
2901 char *mpath = zpool_vdev_name(zhp->zpool_hdl, zhp,
d2f3e292 2902 mchild[mc], 0);
428870ff
BB
2903
2904 for (sc = 0; sc < schildren; sc++) {
2905 char *spath = zpool_vdev_name(zhp->zpool_hdl, zhp,
d2f3e292 2906 schild[sc], 0);
428870ff
BB
2907 boolean_t result = (strcmp(mpath, spath) == 0);
2908
2909 free(spath);
2910 if (result) {
2911 free(mpath);
2912 return (mc);
2913 }
2914 }
2915
2916 free(mpath);
2917 }
2918
2919 return (-1);
2920}
2921
2922/*
2923 * Split a mirror pool. If newroot points to null, then a new nvlist
2924 * is generated and it is the responsibility of the caller to free it.
2925 */
2926int
2927zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot,
2928 nvlist_t *props, splitflags_t flags)
2929{
13fe0198 2930 zfs_cmd_t zc = {"\0"};
428870ff
BB
2931 char msg[1024];
2932 nvlist_t *tree, *config, **child, **newchild, *newconfig = NULL;
2933 nvlist_t **varray = NULL, *zc_props = NULL;
2934 uint_t c, children, newchildren, lastlog = 0, vcount, found = 0;
2935 libzfs_handle_t *hdl = zhp->zpool_hdl;
2936 uint64_t vers;
2937 boolean_t freelist = B_FALSE, memory_err = B_TRUE;
2938 int retval = 0;
2939
2940 (void) snprintf(msg, sizeof (msg),
2941 dgettext(TEXT_DOMAIN, "Unable to split %s"), zhp->zpool_name);
2942
2943 if (!zpool_name_valid(hdl, B_FALSE, newname))
2944 return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
2945
2946 if ((config = zpool_get_config(zhp, NULL)) == NULL) {
2947 (void) fprintf(stderr, gettext("Internal error: unable to "
2948 "retrieve pool configuration\n"));
2949 return (-1);
2950 }
2951
2952 verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &tree)
2953 == 0);
2954 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &vers) == 0);
2955
2956 if (props) {
572e2857 2957 prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
428870ff 2958 if ((zc_props = zpool_valid_proplist(hdl, zhp->zpool_name,
572e2857 2959 props, vers, flags, msg)) == NULL)
428870ff
BB
2960 return (-1);
2961 }
2962
2963 if (nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child,
2964 &children) != 0) {
2965 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2966 "Source pool is missing vdev tree"));
8a5fc748 2967 nvlist_free(zc_props);
428870ff
BB
2968 return (-1);
2969 }
2970
2971 varray = zfs_alloc(hdl, children * sizeof (nvlist_t *));
2972 vcount = 0;
2973
2974 if (*newroot == NULL ||
2975 nvlist_lookup_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN,
2976 &newchild, &newchildren) != 0)
2977 newchildren = 0;
2978
2979 for (c = 0; c < children; c++) {
2980 uint64_t is_log = B_FALSE, is_hole = B_FALSE;
2981 char *type;
2982 nvlist_t **mchild, *vdev;
2983 uint_t mchildren;
2984 int entry;
2985
2986 /*
2987 * Unlike cache & spares, slogs are stored in the
2988 * ZPOOL_CONFIG_CHILDREN array. We filter them out here.
2989 */
2990 (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
2991 &is_log);
2992 (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE,
2993 &is_hole);
2994 if (is_log || is_hole) {
2995 /*
2996 * Create a hole vdev and put it in the config.
2997 */
2998 if (nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) != 0)
2999 goto out;
3000 if (nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE,
3001 VDEV_TYPE_HOLE) != 0)
3002 goto out;
3003 if (nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_HOLE,
3004 1) != 0)
3005 goto out;
3006 if (lastlog == 0)
3007 lastlog = vcount;
3008 varray[vcount++] = vdev;
3009 continue;
3010 }
3011 lastlog = 0;
3012 verify(nvlist_lookup_string(child[c], ZPOOL_CONFIG_TYPE, &type)
3013 == 0);
3014 if (strcmp(type, VDEV_TYPE_MIRROR) != 0) {
3015 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3016 "Source pool must be composed only of mirrors\n"));
3017 retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
3018 goto out;
3019 }
3020
3021 verify(nvlist_lookup_nvlist_array(child[c],
3022 ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren) == 0);
3023
3024 /* find or add an entry for this top-level vdev */
3025 if (newchildren > 0 &&
3026 (entry = find_vdev_entry(zhp, mchild, mchildren,
3027 newchild, newchildren)) >= 0) {
3028 /* We found a disk that the user specified. */
3029 vdev = mchild[entry];
3030 ++found;
3031 } else {
3032 /* User didn't specify a disk for this vdev. */
3033 vdev = mchild[mchildren - 1];
3034 }
3035
3036 if (nvlist_dup(vdev, &varray[vcount++], 0) != 0)
3037 goto out;
3038 }
3039
3040 /* did we find every disk the user specified? */
3041 if (found != newchildren) {
3042 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Device list must "
3043 "include at most one disk from each mirror"));
3044 retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
3045 goto out;
3046 }
3047
3048 /* Prepare the nvlist for populating. */
3049 if (*newroot == NULL) {
3050 if (nvlist_alloc(newroot, NV_UNIQUE_NAME, 0) != 0)
3051 goto out;
3052 freelist = B_TRUE;
3053 if (nvlist_add_string(*newroot, ZPOOL_CONFIG_TYPE,
3054 VDEV_TYPE_ROOT) != 0)
3055 goto out;
3056 } else {
3057 verify(nvlist_remove_all(*newroot, ZPOOL_CONFIG_CHILDREN) == 0);
3058 }
3059
3060 /* Add all the children we found */
3061 if (nvlist_add_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN, varray,
3062 lastlog == 0 ? vcount : lastlog) != 0)
3063 goto out;
3064
3065 /*
3066 * If we're just doing a dry run, exit now with success.
3067 */
3068 if (flags.dryrun) {
3069 memory_err = B_FALSE;
3070 freelist = B_FALSE;
3071 goto out;
3072 }
3073
3074 /* now build up the config list & call the ioctl */
3075 if (nvlist_alloc(&newconfig, NV_UNIQUE_NAME, 0) != 0)
3076 goto out;
3077
3078 if (nvlist_add_nvlist(newconfig,
3079 ZPOOL_CONFIG_VDEV_TREE, *newroot) != 0 ||
3080 nvlist_add_string(newconfig,
3081 ZPOOL_CONFIG_POOL_NAME, newname) != 0 ||
3082 nvlist_add_uint64(newconfig, ZPOOL_CONFIG_VERSION, vers) != 0)
3083 goto out;
3084
3085 /*
3086 * The new pool is automatically part of the namespace unless we
3087 * explicitly export it.
3088 */
3089 if (!flags.import)
3090 zc.zc_cookie = ZPOOL_EXPORT_AFTER_SPLIT;
3091 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3092 (void) strlcpy(zc.zc_string, newname, sizeof (zc.zc_string));
3093 if (zcmd_write_conf_nvlist(hdl, &zc, newconfig) != 0)
3094 goto out;
3095 if (zc_props != NULL && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
3096 goto out;
3097
3098 if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SPLIT, &zc) != 0) {
3099 retval = zpool_standard_error(hdl, errno, msg);
3100 goto out;
3101 }
3102
3103 freelist = B_FALSE;
3104 memory_err = B_FALSE;
3105
3106out:
3107 if (varray != NULL) {
3108 int v;
3109
3110 for (v = 0; v < vcount; v++)
3111 nvlist_free(varray[v]);
3112 free(varray);
3113 }
3114 zcmd_free_nvlists(&zc);
8a5fc748
JJS
3115 nvlist_free(zc_props);
3116 nvlist_free(newconfig);
428870ff
BB
3117 if (freelist) {
3118 nvlist_free(*newroot);
3119 *newroot = NULL;
3120 }
3121
3122 if (retval != 0)
3123 return (retval);
3124
3125 if (memory_err)
3126 return (no_memory(hdl));
3127
3128 return (0);
3129}
3130
34dc7c2f 3131/*
d1502e9e
RL
3132 * Remove the given device. Currently, this is supported only for hot spares,
3133 * cache, and log devices.
34dc7c2f
BB
3134 */
3135int
3136zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
3137{
13fe0198 3138 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
3139 char msg[1024];
3140 nvlist_t *tgt;
428870ff 3141 boolean_t avail_spare, l2cache, islog;
34dc7c2f 3142 libzfs_handle_t *hdl = zhp->zpool_hdl;
428870ff 3143 uint64_t version;
34dc7c2f
BB
3144
3145 (void) snprintf(msg, sizeof (msg),
3146 dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
3147
3148 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
b128c09f 3149 if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
428870ff 3150 &islog)) == 0)
34dc7c2f 3151 return (zfs_error(hdl, EZFS_NODEVICE, msg));
428870ff
BB
3152 /*
3153 * XXX - this should just go away.
3154 */
3155 if (!avail_spare && !l2cache && !islog) {
34dc7c2f 3156 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
d1502e9e 3157 "only inactive hot spares, cache, "
428870ff 3158 "or log devices can be removed"));
34dc7c2f
BB
3159 return (zfs_error(hdl, EZFS_NODEVICE, msg));
3160 }
3161
428870ff
BB
3162 version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
3163 if (islog && version < SPA_VERSION_HOLES) {
3164 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3165 "pool must be upgrade to support log removal"));
3166 return (zfs_error(hdl, EZFS_BADVERSION, msg));
3167 }
3168
34dc7c2f
BB
3169 verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
3170
3171 if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
3172 return (0);
3173
3174 return (zpool_standard_error(hdl, errno, msg));
3175}
3176
3177/*
3178 * Clear the errors for the pool, or the particular device if specified.
3179 */
3180int
428870ff 3181zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl)
34dc7c2f 3182{
13fe0198 3183 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
3184 char msg[1024];
3185 nvlist_t *tgt;
428870ff 3186 zpool_rewind_policy_t policy;
34dc7c2f
BB
3187 boolean_t avail_spare, l2cache;
3188 libzfs_handle_t *hdl = zhp->zpool_hdl;
428870ff 3189 nvlist_t *nvi = NULL;
572e2857 3190 int error;
34dc7c2f
BB
3191
3192 if (path)
3193 (void) snprintf(msg, sizeof (msg),
3194 dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
3195 path);
3196 else
3197 (void) snprintf(msg, sizeof (msg),
3198 dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
3199 zhp->zpool_name);
3200
3201 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3202 if (path) {
3203 if ((tgt = zpool_find_vdev(zhp, path, &avail_spare,
b128c09f 3204 &l2cache, NULL)) == 0)
34dc7c2f
BB
3205 return (zfs_error(hdl, EZFS_NODEVICE, msg));
3206
3207 /*
3208 * Don't allow error clearing for hot spares. Do allow
3209 * error clearing for l2cache devices.
3210 */
3211 if (avail_spare)
3212 return (zfs_error(hdl, EZFS_ISSPARE, msg));
3213
3214 verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
3215 &zc.zc_guid) == 0);
3216 }
3217
428870ff
BB
3218 zpool_get_rewind_policy(rewindnvl, &policy);
3219 zc.zc_cookie = policy.zrp_request;
3220
572e2857 3221 if (zcmd_alloc_dst_nvlist(hdl, &zc, zhp->zpool_config_size * 2) != 0)
428870ff
BB
3222 return (-1);
3223
572e2857 3224 if (zcmd_write_src_nvlist(hdl, &zc, rewindnvl) != 0)
428870ff
BB
3225 return (-1);
3226
572e2857
BB
3227 while ((error = zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc)) != 0 &&
3228 errno == ENOMEM) {
3229 if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
3230 zcmd_free_nvlists(&zc);
3231 return (-1);
3232 }
3233 }
3234
3235 if (!error || ((policy.zrp_request & ZPOOL_TRY_REWIND) &&
428870ff
BB
3236 errno != EPERM && errno != EACCES)) {
3237 if (policy.zrp_request &
3238 (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
3239 (void) zcmd_read_dst_nvlist(hdl, &zc, &nvi);
3240 zpool_rewind_exclaim(hdl, zc.zc_name,
3241 ((policy.zrp_request & ZPOOL_TRY_REWIND) != 0),
3242 nvi);
3243 nvlist_free(nvi);
3244 }
3245 zcmd_free_nvlists(&zc);
34dc7c2f 3246 return (0);
428870ff 3247 }
34dc7c2f 3248
428870ff 3249 zcmd_free_nvlists(&zc);
34dc7c2f
BB
3250 return (zpool_standard_error(hdl, errno, msg));
3251}
3252
3253/*
3254 * Similar to zpool_clear(), but takes a GUID (used by fmd).
3255 */
3256int
3257zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid)
3258{
13fe0198 3259 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
3260 char msg[1024];
3261 libzfs_handle_t *hdl = zhp->zpool_hdl;
3262
3263 (void) snprintf(msg, sizeof (msg),
3264 dgettext(TEXT_DOMAIN, "cannot clear errors for %llx"),
d1d7e268 3265 (u_longlong_t)guid);
34dc7c2f
BB
3266
3267 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3268 zc.zc_guid = guid;
428870ff 3269 zc.zc_cookie = ZPOOL_NO_REWIND;
34dc7c2f
BB
3270
3271 if (ioctl(hdl->libzfs_fd, ZFS_IOC_CLEAR, &zc) == 0)
3272 return (0);
3273
3274 return (zpool_standard_error(hdl, errno, msg));
3275}
3276
3541dc6d
GA
3277/*
3278 * Change the GUID for a pool.
3279 */
3280int
3281zpool_reguid(zpool_handle_t *zhp)
3282{
3283 char msg[1024];
3284 libzfs_handle_t *hdl = zhp->zpool_hdl;
13fe0198 3285 zfs_cmd_t zc = {"\0"};
3541dc6d
GA
3286
3287 (void) snprintf(msg, sizeof (msg),
3288 dgettext(TEXT_DOMAIN, "cannot reguid '%s'"), zhp->zpool_name);
3289
3290 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3291 if (zfs_ioctl(hdl, ZFS_IOC_POOL_REGUID, &zc) == 0)
3292 return (0);
3293
3294 return (zpool_standard_error(hdl, errno, msg));
3295}
3296
1bd201e7
CS
3297/*
3298 * Reopen the pool.
3299 */
3300int
3301zpool_reopen(zpool_handle_t *zhp)
3302{
13fe0198 3303 zfs_cmd_t zc = {"\0"};
1bd201e7
CS
3304 char msg[1024];
3305 libzfs_handle_t *hdl = zhp->zpool_hdl;
3306
3307 (void) snprintf(msg, sizeof (msg),
3308 dgettext(TEXT_DOMAIN, "cannot reopen '%s'"),
3309 zhp->zpool_name);
3310
3311 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3312 if (zfs_ioctl(hdl, ZFS_IOC_POOL_REOPEN, &zc) == 0)
3313 return (0);
3314 return (zpool_standard_error(hdl, errno, msg));
3315}
3316
39fc0cb5 3317#if defined(__sun__) || defined(__sun)
34dc7c2f
BB
3318/*
3319 * Convert from a devid string to a path.
3320 */
3321static char *
3322devid_to_path(char *devid_str)
3323{
3324 ddi_devid_t devid;
3325 char *minor;
3326 char *path;
3327 devid_nmlist_t *list = NULL;
3328 int ret;
3329
3330 if (devid_str_decode(devid_str, &devid, &minor) != 0)
3331 return (NULL);
3332
3333 ret = devid_deviceid_to_nmlist("/dev", devid, minor, &list);
3334
3335 devid_str_free(minor);
3336 devid_free(devid);
3337
3338 if (ret != 0)
3339 return (NULL);
3340
0fdd8d64
MT
3341 /*
3342 * In a case the strdup() fails, we will just return NULL below.
3343 */
3344 path = strdup(list[0].devname);
34dc7c2f
BB
3345
3346 devid_free_nmlist(list);
3347
3348 return (path);
3349}
3350
3351/*
3352 * Convert from a path to a devid string.
3353 */
3354static char *
3355path_to_devid(const char *path)
3356{
3357 int fd;
3358 ddi_devid_t devid;
3359 char *minor, *ret;
3360
3361 if ((fd = open(path, O_RDONLY)) < 0)
3362 return (NULL);
3363
3364 minor = NULL;
3365 ret = NULL;
3366 if (devid_get(fd, &devid) == 0) {
3367 if (devid_get_minor_name(fd, &minor) == 0)
3368 ret = devid_str_encode(devid, minor);
3369 if (minor != NULL)
3370 devid_str_free(minor);
3371 devid_free(devid);
3372 }
3373 (void) close(fd);
3374
3375 return (ret);
3376}
3377
3378/*
3379 * Issue the necessary ioctl() to update the stored path value for the vdev. We
3380 * ignore any failure here, since a common case is for an unprivileged user to
3381 * type 'zpool status', and we'll display the correct information anyway.
3382 */
3383static void
3384set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path)
3385{
13fe0198 3386 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
3387
3388 (void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3389 (void) strncpy(zc.zc_value, path, sizeof (zc.zc_value));
3390 verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
3391 &zc.zc_guid) == 0);
3392
3393 (void) ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SETPATH, &zc);
3394}
39fc0cb5 3395#endif /* sun */
34dc7c2f 3396
83c62c93
NB
3397/*
3398 * Remove partition suffix from a vdev path. Partition suffixes may take three
3399 * forms: "-partX", "pX", or "X", where X is a string of digits. The second
3400 * case only occurs when the suffix is preceded by a digit, i.e. "md0p0" The
3401 * third case only occurs when preceded by a string matching the regular
541da993 3402 * expression "^([hsv]|xv)d[a-z]+", i.e. a scsi, ide, virtio or xen disk.
d02ca379
DB
3403 *
3404 * caller must free the returned string
83c62c93 3405 */
d02ca379 3406char *
6078881a 3407zfs_strip_partition(char *path)
83c62c93 3408{
6078881a 3409 char *tmp = strdup(path);
83c62c93 3410 char *part = NULL, *d = NULL;
6078881a
TH
3411 if (!tmp)
3412 return (NULL);
83c62c93
NB
3413
3414 if ((part = strstr(tmp, "-part")) && part != tmp) {
3415 d = part + 5;
3416 } else if ((part = strrchr(tmp, 'p')) &&
3417 part > tmp + 1 && isdigit(*(part-1))) {
3418 d = part + 1;
541da993
RY
3419 } else if ((tmp[0] == 'h' || tmp[0] == 's' || tmp[0] == 'v') &&
3420 tmp[1] == 'd') {
02730c33 3421 for (d = &tmp[2]; isalpha(*d); part = ++d) { }
541da993 3422 } else if (strncmp("xvd", tmp, 3) == 0) {
02730c33 3423 for (d = &tmp[3]; isalpha(*d); part = ++d) { }
83c62c93
NB
3424 }
3425 if (part && d && *d != '\0') {
02730c33 3426 for (; isdigit(*d); d++) { }
83c62c93
NB
3427 if (*d == '\0')
3428 *part = '\0';
3429 }
6078881a 3430
83c62c93
NB
3431 return (tmp);
3432}
3433
8720e9e7
TH
3434/*
3435 * Same as zfs_strip_partition, but allows "/dev/" to be in the pathname
3436 *
3437 * path: /dev/sda1
3438 * returns: /dev/sda
3439 *
3440 * Returned string must be freed.
3441 */
3442char *
3443zfs_strip_partition_path(char *path)
3444{
3445 char *newpath = strdup(path);
3446 char *sd_offset;
3447 char *new_sd;
3448
3449 if (!newpath)
3450 return (NULL);
3451
3452 /* Point to "sda1" part of "/dev/sda1" */
3453 sd_offset = strrchr(newpath, '/') + 1;
3454
3455 /* Get our new name "sda" */
3456 new_sd = zfs_strip_partition(sd_offset);
3457 if (!new_sd) {
3458 free(newpath);
3459 return (NULL);
3460 }
3461
3462 /* Paste the "sda" where "sda1" was */
3463 strlcpy(sd_offset, new_sd, strlen(sd_offset) + 1);
3464
3465 /* Free temporary "sda" */
3466 free(new_sd);
3467
3468 return (newpath);
3469}
3470
858219cc
NB
3471#define PATH_BUF_LEN 64
3472
34dc7c2f
BB
3473/*
3474 * Given a vdev, return the name to display in iostat. If the vdev has a path,
3475 * we use that, stripping off any leading "/dev/dsk/"; if not, we use the type.
3476 * We also check if this is a whole disk, in which case we strip off the
3477 * trailing 's0' slice name.
3478 *
3479 * This routine is also responsible for identifying when disks have been
3480 * reconfigured in a new location. The kernel will have opened the device by
3481 * devid, but the path will still refer to the old location. To catch this, we
3482 * first do a path -> devid translation (which is fast for the common case). If
3483 * the devid matches, we're done. If not, we do a reverse devid -> path
3484 * translation and issue the appropriate ioctl() to update the path of the vdev.
3485 * If 'zhp' is NULL, then this is an exported pool, and we don't need to do any
3486 * of these checks.
3487 */
3488char *
428870ff 3489zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv,
d2f3e292 3490 int name_flags)
34dc7c2f 3491{
39fc0cb5 3492 char *path, *type, *env;
34dc7c2f 3493 uint64_t value;
858219cc 3494 char buf[PATH_BUF_LEN];
fc24f7c8 3495 char tmpbuf[PATH_BUF_LEN];
34dc7c2f 3496
d2f3e292
RY
3497 env = getenv("ZPOOL_VDEV_NAME_PATH");
3498 if (env && (strtoul(env, NULL, 0) > 0 ||
3499 !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2)))
3500 name_flags |= VDEV_NAME_PATH;
3501
3502 env = getenv("ZPOOL_VDEV_NAME_GUID");
3503 if (env && (strtoul(env, NULL, 0) > 0 ||
3504 !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2)))
3505 name_flags |= VDEV_NAME_GUID;
3506
3507 env = getenv("ZPOOL_VDEV_NAME_FOLLOW_LINKS");
3508 if (env && (strtoul(env, NULL, 0) > 0 ||
3509 !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2)))
3510 name_flags |= VDEV_NAME_FOLLOW_LINKS;
3511
3512 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, &value) == 0 ||
3513 name_flags & VDEV_NAME_GUID) {
aecdc706 3514 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &value);
d2f3e292 3515 (void) snprintf(buf, sizeof (buf), "%llu", (u_longlong_t)value);
34dc7c2f
BB
3516 path = buf;
3517 } else if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
39fc0cb5
DB
3518#if defined(__sun__) || defined(__sun)
3519 /*
3520 * Live VDEV path updates to a kernel VDEV during a
3521 * zpool_vdev_name lookup are not supported on Linux.
3522 */
3523 char *devid;
3524 vdev_stat_t *vs;
3525 uint_t vsc;
3526
34dc7c2f
BB
3527 /*
3528 * If the device is dead (faulted, offline, etc) then don't
3529 * bother opening it. Otherwise we may be forcing the user to
3530 * open a misbehaving device, which can have undesirable
3531 * effects.
3532 */
428870ff 3533 if ((nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
34dc7c2f
BB
3534 (uint64_t **)&vs, &vsc) != 0 ||
3535 vs->vs_state >= VDEV_STATE_DEGRADED) &&
3536 zhp != NULL &&
3537 nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &devid) == 0) {
3538 /*
3539 * Determine if the current path is correct.
3540 */
3541 char *newdevid = path_to_devid(path);
3542
3543 if (newdevid == NULL ||
3544 strcmp(devid, newdevid) != 0) {
3545 char *newpath;
3546
3547 if ((newpath = devid_to_path(devid)) != NULL) {
3548 /*
3549 * Update the path appropriately.
3550 */
3551 set_path(zhp, nv, newpath);
3552 if (nvlist_add_string(nv,
3553 ZPOOL_CONFIG_PATH, newpath) == 0)
3554 verify(nvlist_lookup_string(nv,
3555 ZPOOL_CONFIG_PATH,
3556 &path) == 0);
3557 free(newpath);
3558 }
3559 }
3560
3561 if (newdevid)
3562 devid_str_free(newdevid);
3563 }
39fc0cb5 3564#endif /* sun */
34dc7c2f 3565
d2f3e292
RY
3566 if (name_flags & VDEV_NAME_FOLLOW_LINKS) {
3567 char *rp = realpath(path, NULL);
3568 if (rp) {
3569 strlcpy(buf, rp, sizeof (buf));
3570 path = buf;
3571 free(rp);
3572 }
3573 }
3574
d603ed6c
BB
3575 /*
3576 * For a block device only use the name.
3577 */
3578 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
d2f3e292
RY
3579 if ((strcmp(type, VDEV_TYPE_DISK) == 0) &&
3580 !(name_flags & VDEV_NAME_PATH)) {
d603ed6c
BB
3581 path = strrchr(path, '/');
3582 path++;
3583 }
34dc7c2f 3584
d603ed6c 3585 /*
83c62c93 3586 * Remove the partition from the path it this is a whole disk.
d603ed6c 3587 */
d2f3e292
RY
3588 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &value)
3589 == 0 && value && !(name_flags & VDEV_NAME_PATH)) {
6078881a 3590 return (zfs_strip_partition(path));
34dc7c2f
BB
3591 }
3592 } else {
3593 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &path) == 0);
3594
3595 /*
3596 * If it's a raidz device, we need to stick in the parity level.
3597 */
3598 if (strcmp(path, VDEV_TYPE_RAIDZ) == 0) {
3599 verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
3600 &value) == 0);
fc24f7c8 3601 (void) snprintf(buf, sizeof (buf), "%s%llu", path,
34dc7c2f 3602 (u_longlong_t)value);
fc24f7c8 3603 path = buf;
34dc7c2f 3604 }
428870ff
BB
3605
3606 /*
3607 * We identify each top-level vdev by using a <type-id>
3608 * naming convention.
3609 */
d2f3e292 3610 if (name_flags & VDEV_NAME_TYPE_ID) {
428870ff 3611 uint64_t id;
428870ff
BB
3612 verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
3613 &id) == 0);
fc24f7c8
MM
3614 (void) snprintf(tmpbuf, sizeof (tmpbuf), "%s-%llu",
3615 path, (u_longlong_t)id);
3616 path = tmpbuf;
428870ff 3617 }
34dc7c2f
BB
3618 }
3619
3620 return (zfs_strdup(hdl, path));
3621}
3622
3623static int
fcff0f35 3624zbookmark_mem_compare(const void *a, const void *b)
34dc7c2f 3625{
5dbd68a3 3626 return (memcmp(a, b, sizeof (zbookmark_phys_t)));
34dc7c2f
BB
3627}
3628
3629/*
3630 * Retrieve the persistent error log, uniquify the members, and return to the
3631 * caller.
3632 */
3633int
3634zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp)
3635{
13fe0198 3636 zfs_cmd_t zc = {"\0"};
34dc7c2f 3637 uint64_t count;
5dbd68a3 3638 zbookmark_phys_t *zb = NULL;
34dc7c2f
BB
3639 int i;
3640
3641 /*
3642 * Retrieve the raw error list from the kernel. If the number of errors
3643 * has increased, allocate more space and continue until we get the
3644 * entire list.
3645 */
3646 verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT,
3647 &count) == 0);
3648 if (count == 0)
3649 return (0);
3650 if ((zc.zc_nvlist_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl,
5dbd68a3 3651 count * sizeof (zbookmark_phys_t))) == (uintptr_t)NULL)
34dc7c2f
BB
3652 return (-1);
3653 zc.zc_nvlist_dst_size = count;
3654 (void) strcpy(zc.zc_name, zhp->zpool_name);
3655 for (;;) {
3656 if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_ERROR_LOG,
3657 &zc) != 0) {
3658 free((void *)(uintptr_t)zc.zc_nvlist_dst);
3659 if (errno == ENOMEM) {
5dbd68a3
MA
3660 void *dst;
3661
34dc7c2f 3662 count = zc.zc_nvlist_dst_size;
5dbd68a3
MA
3663 dst = zfs_alloc(zhp->zpool_hdl, count *
3664 sizeof (zbookmark_phys_t));
3665 if (dst == NULL)
34dc7c2f 3666 return (-1);
5dbd68a3 3667 zc.zc_nvlist_dst = (uintptr_t)dst;
34dc7c2f
BB
3668 } else {
3669 return (-1);
3670 }
3671 } else {
3672 break;
3673 }
3674 }
3675
3676 /*
3677 * Sort the resulting bookmarks. This is a little confusing due to the
3678 * implementation of ZFS_IOC_ERROR_LOG. The bookmarks are copied last
3679 * to first, and 'zc_nvlist_dst_size' indicates the number of boomarks
3680 * _not_ copied as part of the process. So we point the start of our
3681 * array appropriate and decrement the total number of elements.
3682 */
5dbd68a3 3683 zb = ((zbookmark_phys_t *)(uintptr_t)zc.zc_nvlist_dst) +
34dc7c2f
BB
3684 zc.zc_nvlist_dst_size;
3685 count -= zc.zc_nvlist_dst_size;
3686
fcff0f35 3687 qsort(zb, count, sizeof (zbookmark_phys_t), zbookmark_mem_compare);
34dc7c2f
BB
3688
3689 verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0);
3690
3691 /*
3692 * Fill in the nverrlistp with nvlist's of dataset and object numbers.
3693 */
3694 for (i = 0; i < count; i++) {
3695 nvlist_t *nv;
3696
3697 /* ignoring zb_blkid and zb_level for now */
3698 if (i > 0 && zb[i-1].zb_objset == zb[i].zb_objset &&
3699 zb[i-1].zb_object == zb[i].zb_object)
3700 continue;
3701
3702 if (nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) != 0)
3703 goto nomem;
3704 if (nvlist_add_uint64(nv, ZPOOL_ERR_DATASET,
3705 zb[i].zb_objset) != 0) {
3706 nvlist_free(nv);
3707 goto nomem;
3708 }
3709 if (nvlist_add_uint64(nv, ZPOOL_ERR_OBJECT,
3710 zb[i].zb_object) != 0) {
3711 nvlist_free(nv);
3712 goto nomem;
3713 }
3714 if (nvlist_add_nvlist(*nverrlistp, "ejk", nv) != 0) {
3715 nvlist_free(nv);
3716 goto nomem;
3717 }
3718 nvlist_free(nv);
3719 }
3720
3721 free((void *)(uintptr_t)zc.zc_nvlist_dst);
3722 return (0);
3723
3724nomem:
3725 free((void *)(uintptr_t)zc.zc_nvlist_dst);
3726 return (no_memory(zhp->zpool_hdl));
3727}
3728
3729/*
3730 * Upgrade a ZFS pool to the latest on-disk version.
3731 */
3732int
3733zpool_upgrade(zpool_handle_t *zhp, uint64_t new_version)
3734{
13fe0198 3735 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
3736 libzfs_handle_t *hdl = zhp->zpool_hdl;
3737
3738 (void) strcpy(zc.zc_name, zhp->zpool_name);
3739 zc.zc_cookie = new_version;
3740
3741 if (zfs_ioctl(hdl, ZFS_IOC_POOL_UPGRADE, &zc) != 0)
3742 return (zpool_standard_error_fmt(hdl, errno,
3743 dgettext(TEXT_DOMAIN, "cannot upgrade '%s'"),
3744 zhp->zpool_name));
3745 return (0);
3746}
3747
3748void
6f1ffb06 3749zfs_save_arguments(int argc, char **argv, char *string, int len)
34dc7c2f
BB
3750{
3751 int i;
3752
6f1ffb06 3753 (void) strlcpy(string, basename(argv[0]), len);
34dc7c2f 3754 for (i = 1; i < argc; i++) {
6f1ffb06
MA
3755 (void) strlcat(string, " ", len);
3756 (void) strlcat(string, argv[i], len);
34dc7c2f
BB
3757 }
3758}
3759
34dc7c2f 3760int
6f1ffb06
MA
3761zpool_log_history(libzfs_handle_t *hdl, const char *message)
3762{
13fe0198 3763 zfs_cmd_t zc = {"\0"};
6f1ffb06
MA
3764 nvlist_t *args;
3765 int err;
3766
3767 args = fnvlist_alloc();
3768 fnvlist_add_string(args, "message", message);
3769 err = zcmd_write_src_nvlist(hdl, &zc, args);
3770 if (err == 0)
3771 err = ioctl(hdl->libzfs_fd, ZFS_IOC_LOG_HISTORY, &zc);
3772 nvlist_free(args);
3773 zcmd_free_nvlists(&zc);
3774 return (err);
34dc7c2f
BB
3775}
3776
3777/*
3778 * Perform ioctl to get some command history of a pool.
3779 *
3780 * 'buf' is the buffer to fill up to 'len' bytes. 'off' is the
3781 * logical offset of the history buffer to start reading from.
3782 *
3783 * Upon return, 'off' is the next logical offset to read from and
3784 * 'len' is the actual amount of bytes read into 'buf'.
3785 */
3786static int
3787get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len)
3788{
13fe0198 3789 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
3790 libzfs_handle_t *hdl = zhp->zpool_hdl;
3791
3792 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3793
3794 zc.zc_history = (uint64_t)(uintptr_t)buf;
3795 zc.zc_history_len = *len;
3796 zc.zc_history_offset = *off;
3797
3798 if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_HISTORY, &zc) != 0) {
3799 switch (errno) {
3800 case EPERM:
3801 return (zfs_error_fmt(hdl, EZFS_PERM,
3802 dgettext(TEXT_DOMAIN,
3803 "cannot show history for pool '%s'"),
3804 zhp->zpool_name));
3805 case ENOENT:
3806 return (zfs_error_fmt(hdl, EZFS_NOHISTORY,
3807 dgettext(TEXT_DOMAIN, "cannot get history for pool "
3808 "'%s'"), zhp->zpool_name));
3809 case ENOTSUP:
3810 return (zfs_error_fmt(hdl, EZFS_BADVERSION,
3811 dgettext(TEXT_DOMAIN, "cannot get history for pool "
3812 "'%s', pool must be upgraded"), zhp->zpool_name));
3813 default:
3814 return (zpool_standard_error_fmt(hdl, errno,
3815 dgettext(TEXT_DOMAIN,
3816 "cannot get history for '%s'"), zhp->zpool_name));
3817 }
3818 }
3819
3820 *len = zc.zc_history_len;
3821 *off = zc.zc_history_offset;
3822
3823 return (0);
3824}
3825
3826/*
3827 * Process the buffer of nvlists, unpacking and storing each nvlist record
3828 * into 'records'. 'leftover' is set to the number of bytes that weren't
3829 * processed as there wasn't a complete record.
3830 */
428870ff 3831int
34dc7c2f
BB
3832zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover,
3833 nvlist_t ***records, uint_t *numrecords)
3834{
3835 uint64_t reclen;
3836 nvlist_t *nv;
3837 int i;
3838
3839 while (bytes_read > sizeof (reclen)) {
3840
3841 /* get length of packed record (stored as little endian) */
3842 for (i = 0, reclen = 0; i < sizeof (reclen); i++)
3843 reclen += (uint64_t)(((uchar_t *)buf)[i]) << (8*i);
3844
3845 if (bytes_read < sizeof (reclen) + reclen)
3846 break;
3847
3848 /* unpack record */
3849 if (nvlist_unpack(buf + sizeof (reclen), reclen, &nv, 0) != 0)
3850 return (ENOMEM);
3851 bytes_read -= sizeof (reclen) + reclen;
3852 buf += sizeof (reclen) + reclen;
3853
3854 /* add record to nvlist array */
3855 (*numrecords)++;
3856 if (ISP2(*numrecords + 1)) {
3857 *records = realloc(*records,
3858 *numrecords * 2 * sizeof (nvlist_t *));
3859 }
3860 (*records)[*numrecords - 1] = nv;
3861 }
3862
3863 *leftover = bytes_read;
3864 return (0);
3865}
3866
34dc7c2f
BB
3867/*
3868 * Retrieve the command history of a pool.
3869 */
3870int
3871zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp)
3872{
1f6f97f3
MA
3873 char *buf;
3874 int buflen = 128 * 1024;
34dc7c2f
BB
3875 uint64_t off = 0;
3876 nvlist_t **records = NULL;
3877 uint_t numrecords = 0;
3878 int err, i;
3879
1f6f97f3
MA
3880 buf = malloc(buflen);
3881 if (buf == NULL)
3882 return (ENOMEM);
34dc7c2f 3883 do {
1f6f97f3 3884 uint64_t bytes_read = buflen;
34dc7c2f
BB
3885 uint64_t leftover;
3886
3887 if ((err = get_history(zhp, buf, &off, &bytes_read)) != 0)
3888 break;
3889
3890 /* if nothing else was read in, we're at EOF, just return */
3891 if (!bytes_read)
3892 break;
3893
3894 if ((err = zpool_history_unpack(buf, bytes_read,
3895 &leftover, &records, &numrecords)) != 0)
3896 break;
3897 off -= leftover;
1f6f97f3
MA
3898 if (leftover == bytes_read) {
3899 /*
3900 * no progress made, because buffer is not big enough
3901 * to hold this record; resize and retry.
3902 */
3903 buflen *= 2;
3904 free(buf);
3905 buf = malloc(buflen);
3906 if (buf == NULL)
3907 return (ENOMEM);
3908 }
34dc7c2f
BB
3909
3910 /* CONSTCOND */
3911 } while (1);
3912
1f6f97f3
MA
3913 free(buf);
3914
34dc7c2f
BB
3915 if (!err) {
3916 verify(nvlist_alloc(nvhisp, NV_UNIQUE_NAME, 0) == 0);
3917 verify(nvlist_add_nvlist_array(*nvhisp, ZPOOL_HIST_RECORD,
3918 records, numrecords) == 0);
3919 }
3920 for (i = 0; i < numrecords; i++)
3921 nvlist_free(records[i]);
3922 free(records);
3923
3924 return (err);
3925}
3926
26685276 3927/*
9b101a73
BB
3928 * Retrieve the next event given the passed 'zevent_fd' file descriptor.
3929 * If there is a new event available 'nvp' will contain a newly allocated
3930 * nvlist and 'dropped' will be set to the number of missed events since
3931 * the last call to this function. When 'nvp' is set to NULL it indicates
3932 * no new events are available. In either case the function returns 0 and
3933 * it is up to the caller to free 'nvp'. In the case of a fatal error the
3934 * function will return a non-zero value. When the function is called in
8c7aa0cf
CD
3935 * blocking mode (the default, unless the ZEVENT_NONBLOCK flag is passed),
3936 * it will not return until a new event is available.
26685276
BB
3937 */
3938int
3939zpool_events_next(libzfs_handle_t *hdl, nvlist_t **nvp,
8c7aa0cf 3940 int *dropped, unsigned flags, int zevent_fd)
26685276 3941{
13fe0198 3942 zfs_cmd_t zc = {"\0"};
26685276
BB
3943 int error = 0;
3944
3945 *nvp = NULL;
3946 *dropped = 0;
9b101a73 3947 zc.zc_cleanup_fd = zevent_fd;
26685276 3948
8c7aa0cf 3949 if (flags & ZEVENT_NONBLOCK)
26685276
BB
3950 zc.zc_guid = ZEVENT_NONBLOCK;
3951
3952 if (zcmd_alloc_dst_nvlist(hdl, &zc, ZEVENT_SIZE) != 0)
3953 return (-1);
3954
3955retry:
3956 if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_NEXT, &zc) != 0) {
3957 switch (errno) {
3958 case ESHUTDOWN:
3959 error = zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
3960 dgettext(TEXT_DOMAIN, "zfs shutdown"));
3961 goto out;
3962 case ENOENT:
3963 /* Blocking error case should not occur */
8c7aa0cf 3964 if (!(flags & ZEVENT_NONBLOCK))
26685276
BB
3965 error = zpool_standard_error_fmt(hdl, errno,
3966 dgettext(TEXT_DOMAIN, "cannot get event"));
3967
3968 goto out;
3969 case ENOMEM:
3970 if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
3971 error = zfs_error_fmt(hdl, EZFS_NOMEM,
3972 dgettext(TEXT_DOMAIN, "cannot get event"));
3973 goto out;
3974 } else {
3975 goto retry;
3976 }
3977 default:
3978 error = zpool_standard_error_fmt(hdl, errno,
3979 dgettext(TEXT_DOMAIN, "cannot get event"));
3980 goto out;
3981 }
3982 }
3983
3984 error = zcmd_read_dst_nvlist(hdl, &zc, nvp);
3985 if (error != 0)
3986 goto out;
3987
3988 *dropped = (int)zc.zc_cookie;
3989out:
3990 zcmd_free_nvlists(&zc);
3991
3992 return (error);
3993}
3994
3995/*
3996 * Clear all events.
3997 */
3998int
3999zpool_events_clear(libzfs_handle_t *hdl, int *count)
4000{
13fe0198 4001 zfs_cmd_t zc = {"\0"};
26685276
BB
4002 char msg[1024];
4003
4004 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
4005 "cannot clear events"));
4006
4007 if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_CLEAR, &zc) != 0)
4008 return (zpool_standard_error_fmt(hdl, errno, msg));
4009
4010 if (count != NULL)
4011 *count = (int)zc.zc_cookie; /* # of events cleared */
4012
4013 return (0);
4014}
4015
75e3ff58
BB
4016/*
4017 * Seek to a specific EID, ZEVENT_SEEK_START, or ZEVENT_SEEK_END for
4018 * the passed zevent_fd file handle. On success zero is returned,
4019 * otherwise -1 is returned and hdl->libzfs_error is set to the errno.
4020 */
4021int
4022zpool_events_seek(libzfs_handle_t *hdl, uint64_t eid, int zevent_fd)
4023{
4024 zfs_cmd_t zc = {"\0"};
4025 int error = 0;
4026
4027 zc.zc_guid = eid;
4028 zc.zc_cleanup_fd = zevent_fd;
4029
4030 if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_SEEK, &zc) != 0) {
4031 switch (errno) {
4032 case ENOENT:
4033 error = zfs_error_fmt(hdl, EZFS_NOENT,
4034 dgettext(TEXT_DOMAIN, "cannot get event"));
4035 break;
4036
4037 case ENOMEM:
4038 error = zfs_error_fmt(hdl, EZFS_NOMEM,
4039 dgettext(TEXT_DOMAIN, "cannot get event"));
4040 break;
4041
4042 default:
4043 error = zpool_standard_error_fmt(hdl, errno,
4044 dgettext(TEXT_DOMAIN, "cannot get event"));
4045 break;
4046 }
4047 }
4048
4049 return (error);
4050}
4051
34dc7c2f
BB
4052void
4053zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
4054 char *pathname, size_t len)
4055{
13fe0198 4056 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
4057 boolean_t mounted = B_FALSE;
4058 char *mntpnt = NULL;
eca7b760 4059 char dsname[ZFS_MAX_DATASET_NAME_LEN];
34dc7c2f
BB
4060
4061 if (dsobj == 0) {
4062 /* special case for the MOS */
d1d7e268
MK
4063 (void) snprintf(pathname, len, "<metadata>:<0x%llx>",
4064 (longlong_t)obj);
34dc7c2f
BB
4065 return;
4066 }
4067
4068 /* get the dataset's name */
4069 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
4070 zc.zc_obj = dsobj;
4071 if (ioctl(zhp->zpool_hdl->libzfs_fd,
4072 ZFS_IOC_DSOBJ_TO_DSNAME, &zc) != 0) {
4073 /* just write out a path of two object numbers */
4074 (void) snprintf(pathname, len, "<0x%llx>:<0x%llx>",
b8864a23 4075 (longlong_t)dsobj, (longlong_t)obj);
34dc7c2f
BB
4076 return;
4077 }
4078 (void) strlcpy(dsname, zc.zc_value, sizeof (dsname));
4079
4080 /* find out if the dataset is mounted */
4081 mounted = is_mounted(zhp->zpool_hdl, dsname, &mntpnt);
4082
4083 /* get the corrupted object's path */
4084 (void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name));
4085 zc.zc_obj = obj;
4086 if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJ_TO_PATH,
4087 &zc) == 0) {
4088 if (mounted) {
4089 (void) snprintf(pathname, len, "%s%s", mntpnt,
4090 zc.zc_value);
4091 } else {
4092 (void) snprintf(pathname, len, "%s:%s",
4093 dsname, zc.zc_value);
4094 }
4095 } else {
d1d7e268
MK
4096 (void) snprintf(pathname, len, "%s:<0x%llx>", dsname,
4097 (longlong_t)obj);
34dc7c2f
BB
4098 }
4099 free(mntpnt);
4100}
4101
b128c09f
BB
4102/*
4103 * Read the EFI label from the config, if a label does not exist then
4104 * pass back the error to the caller. If the caller has passed a non-NULL
4105 * diskaddr argument then we set it to the starting address of the EFI
4106 * partition.
4107 */
4108static int
4109read_efi_label(nvlist_t *config, diskaddr_t *sb)
4110{
4111 char *path;
4112 int fd;
4113 char diskname[MAXPATHLEN];
4114 int err = -1;
4115
4116 if (nvlist_lookup_string(config, ZPOOL_CONFIG_PATH, &path) != 0)
4117 return (err);
4118
eac47204 4119 (void) snprintf(diskname, sizeof (diskname), "%s%s", DISK_ROOT,
b128c09f 4120 strrchr(path, '/'));
d603ed6c 4121 if ((fd = open(diskname, O_RDWR|O_DIRECT)) >= 0) {
b128c09f
BB
4122 struct dk_gpt *vtoc;
4123
4124 if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) {
4125 if (sb != NULL)
4126 *sb = vtoc->efi_parts[0].p_start;
4127 efi_free(vtoc);
4128 }
4129 (void) close(fd);
4130 }
4131 return (err);
4132}
4133
34dc7c2f
BB
4134/*
4135 * determine where a partition starts on a disk in the current
4136 * configuration
4137 */
4138static diskaddr_t
4139find_start_block(nvlist_t *config)
4140{
4141 nvlist_t **child;
4142 uint_t c, children;
34dc7c2f 4143 diskaddr_t sb = MAXOFFSET_T;
34dc7c2f
BB
4144 uint64_t wholedisk;
4145
4146 if (nvlist_lookup_nvlist_array(config,
4147 ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) {
4148 if (nvlist_lookup_uint64(config,
4149 ZPOOL_CONFIG_WHOLE_DISK,
4150 &wholedisk) != 0 || !wholedisk) {
4151 return (MAXOFFSET_T);
4152 }
b128c09f
BB
4153 if (read_efi_label(config, &sb) < 0)
4154 sb = MAXOFFSET_T;
34dc7c2f
BB
4155 return (sb);
4156 }
4157
4158 for (c = 0; c < children; c++) {
4159 sb = find_start_block(child[c]);
4160 if (sb != MAXOFFSET_T) {
4161 return (sb);
4162 }
4163 }
4164 return (MAXOFFSET_T);
4165}
4166
2d82ea8b 4167static int
d603ed6c
BB
4168zpool_label_disk_check(char *path)
4169{
4170 struct dk_gpt *vtoc;
4171 int fd, err;
4172
4173 if ((fd = open(path, O_RDWR|O_DIRECT)) < 0)
d1d7e268 4174 return (errno);
d603ed6c
BB
4175
4176 if ((err = efi_alloc_and_read(fd, &vtoc)) != 0) {
4177 (void) close(fd);
d1d7e268 4178 return (err);
d603ed6c
BB
4179 }
4180
4181 if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) {
4182 efi_free(vtoc);
4183 (void) close(fd);
d1d7e268 4184 return (EIDRM);
d603ed6c
BB
4185 }
4186
4187 efi_free(vtoc);
4188 (void) close(fd);
d1d7e268 4189 return (0);
d603ed6c
BB
4190}
4191
5b4136bd
BB
4192/*
4193 * Generate a unique partition name for the ZFS member. Partitions must
4194 * have unique names to ensure udev will be able to create symlinks under
4195 * /dev/disk/by-partlabel/ for all pool members. The partition names are
4196 * of the form <pool>-<unique-id>.
4197 */
4198static void
4199zpool_label_name(char *label_name, int label_size)
4200{
4201 uint64_t id = 0;
4202 int fd;
4203
4204 fd = open("/dev/urandom", O_RDONLY);
06cf4d98 4205 if (fd >= 0) {
5b4136bd
BB
4206 if (read(fd, &id, sizeof (id)) != sizeof (id))
4207 id = 0;
4208
4209 close(fd);
4210 }
4211
4212 if (id == 0)
4213 id = (((uint64_t)rand()) << 32) | (uint64_t)rand();
4214
02730c33 4215 snprintf(label_name, label_size, "zfs-%016llx", (u_longlong_t)id);
5b4136bd
BB
4216}
4217
34dc7c2f
BB
4218/*
4219 * Label an individual disk. The name provided is the short name,
4220 * stripped of any leading /dev path.
4221 */
4222int
4223zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
4224{
4225 char path[MAXPATHLEN];
4226 struct dk_gpt *vtoc;
d603ed6c 4227 int rval, fd;
34dc7c2f
BB
4228 size_t resv = EFI_MIN_RESV_SIZE;
4229 uint64_t slice_size;
4230 diskaddr_t start_block;
4231 char errbuf[1024];
4232
4233 /* prepare an error message just in case */
4234 (void) snprintf(errbuf, sizeof (errbuf),
4235 dgettext(TEXT_DOMAIN, "cannot label '%s'"), name);
4236
4237 if (zhp) {
4238 nvlist_t *nvroot;
4239
c372b36e 4240#if defined(__sun__) || defined(__sun)
1bd201e7 4241 if (zpool_is_bootable(zhp)) {
b128c09f
BB
4242 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4243 "EFI labeled devices are not supported on root "
4244 "pools."));
4245 return (zfs_error(hdl, EZFS_POOL_NOTSUP, errbuf));
4246 }
c372b36e 4247#endif
b128c09f 4248
34dc7c2f
BB
4249 verify(nvlist_lookup_nvlist(zhp->zpool_config,
4250 ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
4251
4252 if (zhp->zpool_start_block == 0)
4253 start_block = find_start_block(nvroot);
4254 else
4255 start_block = zhp->zpool_start_block;
4256 zhp->zpool_start_block = start_block;
4257 } else {
4258 /* new pool */
4259 start_block = NEW_START_BLOCK;
4260 }
4261
eac47204 4262 (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
34dc7c2f 4263
d02ca379 4264 if ((fd = open(path, O_RDWR|O_DIRECT|O_EXCL)) < 0) {
34dc7c2f
BB
4265 /*
4266 * This shouldn't happen. We've long since verified that this
4267 * is a valid device.
4268 */
109491a8
RL
4269 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
4270 "label '%s': unable to open device: %d"), path, errno);
34dc7c2f
BB
4271 return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
4272 }
4273
4274 if (efi_alloc_and_init(fd, EFI_NUMPAR, &vtoc) != 0) {
4275 /*
4276 * The only way this can fail is if we run out of memory, or we
4277 * were unable to read the disk's capacity
4278 */
4279 if (errno == ENOMEM)
4280 (void) no_memory(hdl);
4281
4282 (void) close(fd);
109491a8
RL
4283 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
4284 "label '%s': unable to read disk capacity"), path);
34dc7c2f
BB
4285
4286 return (zfs_error(hdl, EZFS_NOCAP, errbuf));
4287 }
4288
4289 slice_size = vtoc->efi_last_u_lba + 1;
4290 slice_size -= EFI_MIN_RESV_SIZE;
4291 if (start_block == MAXOFFSET_T)
4292 start_block = NEW_START_BLOCK;
4293 slice_size -= start_block;
613d88ed 4294 slice_size = P2ALIGN(slice_size, PARTITION_END_ALIGNMENT);
34dc7c2f
BB
4295
4296 vtoc->efi_parts[0].p_start = start_block;
4297 vtoc->efi_parts[0].p_size = slice_size;
4298
4299 /*
4300 * Why we use V_USR: V_BACKUP confuses users, and is considered
4301 * disposable by some EFI utilities (since EFI doesn't have a backup
4302 * slice). V_UNASSIGNED is supposed to be used only for zero size
4303 * partitions, and efi_write() will fail if we use it. V_ROOT, V_BOOT,
4304 * etc. were all pretty specific. V_USR is as close to reality as we
4305 * can get, in the absence of V_OTHER.
4306 */
4307 vtoc->efi_parts[0].p_tag = V_USR;
5b4136bd 4308 zpool_label_name(vtoc->efi_parts[0].p_name, EFI_PART_NAME_LEN);
34dc7c2f
BB
4309
4310 vtoc->efi_parts[8].p_start = slice_size + start_block;
4311 vtoc->efi_parts[8].p_size = resv;
4312 vtoc->efi_parts[8].p_tag = V_RESERVED;
4313
b5a28807 4314 if ((rval = efi_write(fd, vtoc)) != 0 || (rval = efi_rescan(fd)) != 0) {
34dc7c2f
BB
4315 /*
4316 * Some block drivers (like pcata) may not support EFI
4317 * GPT labels. Print out a helpful error message dir-
4318 * ecting the user to manually label the disk and give
4319 * a specific slice.
4320 */
4321 (void) close(fd);
4322 efi_free(vtoc);
4323
d603ed6c
BB
4324 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "try using "
4325 "parted(8) and then provide a specific slice: %d"), rval);
34dc7c2f
BB
4326 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
4327 }
4328
4329 (void) close(fd);
4330 efi_free(vtoc);
34dc7c2f 4331
eac47204
BB
4332 (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
4333 (void) zfs_append_partition(path, MAXPATHLEN);
4334
2d82ea8b
BB
4335 /* Wait to udev to signal use the device has settled. */
4336 rval = zpool_label_disk_wait(path, DISK_LABEL_WAIT);
d603ed6c
BB
4337 if (rval) {
4338 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "failed to "
4339 "detect device partitions on '%s': %d"), path, rval);
4340 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
34dc7c2f
BB
4341 }
4342
d603ed6c
BB
4343 /* We can't be to paranoid. Read the label back and verify it. */
4344 (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
4345 rval = zpool_label_disk_check(path);
4346 if (rval) {
4347 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "freshly written "
4348 "EFI label on '%s' is damaged. Ensure\nthis device "
4349 "is not in in use, and is functioning properly: %d"),
4350 path, rval);
4351 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
34dc7c2f 4352 }
34dc7c2f 4353
d1d7e268 4354 return (0);
34dc7c2f 4355}
6078881a 4356
6078881a
TH
4357/*
4358 * Allocate and return the underlying device name for a device mapper device.
4359 * If a device mapper device maps to multiple devices, return the first device.
4360 *
8720e9e7
TH
4361 * For example, dm_name = "/dev/dm-0" could return "/dev/sda". Symlinks to a
4362 * DM device (like /dev/disk/by-vdev/A0) are also allowed.
6078881a
TH
4363 *
4364 * Returns device name, or NULL on error or no match. If dm_name is not a DM
4365 * device then return NULL.
4366 *
4367 * NOTE: The returned name string must be *freed*.
4368 */
8720e9e7
TH
4369char *
4370dm_get_underlying_path(char *dm_name)
6078881a 4371{
8720e9e7
TH
4372 DIR *dp = NULL;
4373 struct dirent *ep;
4374 char *realp;
4375 char *tmp = NULL;
4376 char *path = NULL;
4377 char *dev_str;
4378 int size;
6078881a 4379
8720e9e7
TH
4380 if (dm_name == NULL)
4381 return (NULL);
4382
4383 /* dm name may be a symlink (like /dev/disk/by-vdev/A0) */
4384 realp = realpath(dm_name, NULL);
4385 if (realp == NULL)
4386 return (NULL);
6078881a
TH
4387
4388 /*
8720e9e7
TH
4389 * If they preface 'dev' with a path (like "/dev") then strip it off.
4390 * We just want the 'dm-N' part.
6078881a 4391 */
8720e9e7
TH
4392 tmp = strrchr(realp, '/');
4393 if (tmp != NULL)
4394 dev_str = tmp + 1; /* +1 since we want the chr after '/' */
4395 else
4396 dev_str = tmp;
6078881a 4397
8720e9e7
TH
4398 size = asprintf(&tmp, "/sys/block/%s/slaves/", dev_str);
4399 if (size == -1 || !tmp)
6078881a
TH
4400 goto end;
4401
8720e9e7
TH
4402 dp = opendir(tmp);
4403 if (dp == NULL)
6078881a
TH
4404 goto end;
4405
8720e9e7
TH
4406 /* Return first sd* entry in /sys/block/dm-N/slaves/ */
4407 while ((ep = readdir(dp))) {
4408 if (ep->d_type != DT_DIR) { /* skip "." and ".." dirs */
4409 size = asprintf(&path, "/dev/%s", ep->d_name);
4410 break;
4411 }
4412 }
6078881a
TH
4413
4414end:
8720e9e7
TH
4415 if (dp != NULL)
4416 closedir(dp);
4417 free(tmp);
4418 free(realp);
4419 return (path);
6078881a
TH
4420}
4421
4422/*
4423 * Return 1 if device is a device mapper or multipath device.
4424 * Return 0 if not.
4425 */
4426int
1bbd8770 4427zfs_dev_is_dm(char *dev_name)
6078881a
TH
4428{
4429
4430 char *tmp;
1bbd8770 4431 tmp = dm_get_underlying_path(dev_name);
8720e9e7 4432 if (tmp == NULL)
6078881a
TH
4433 return (0);
4434
4435 free(tmp);
4436 return (1);
4437}
4438
4439/*
4440 * Lookup the underlying device for a device name
4441 *
4442 * Often you'll have a symlink to a device, a partition device,
4443 * or a multipath device, and want to look up the underlying device.
4444 * This function returns the underlying device name. If the device
4445 * name is already the underlying device, then just return the same
4446 * name. If the device is a DM device with multiple underlying devices
4447 * then return the first one.
4448 *
4449 * For example:
4450 *
4451 * 1. /dev/disk/by-id/ata-QEMU_HARDDISK_QM00001 -> ../../sda
4452 * dev_name: /dev/disk/by-id/ata-QEMU_HARDDISK_QM00001
4453 * returns: /dev/sda
4454 *
4455 * 2. /dev/mapper/mpatha (made up of /dev/sda and /dev/sdb)
4456 * dev_name: /dev/mapper/mpatha
4457 * returns: /dev/sda (first device)
4458 *
4459 * 3. /dev/sda (already the underlying device)
4460 * dev_name: /dev/sda
4461 * returns: /dev/sda
4462 *
4463 * 4. /dev/dm-3 (mapped to /dev/sda)
4464 * dev_name: /dev/dm-3
4465 * returns: /dev/sda
4466 *
4467 * 5. /dev/disk/by-id/scsi-0QEMU_drive-scsi0-0-0-0-part9 -> ../../sdb9
4468 * dev_name: /dev/disk/by-id/scsi-0QEMU_drive-scsi0-0-0-0-part9
4469 * returns: /dev/sdb
4470 *
4471 * 6. /dev/disk/by-uuid/5df030cf-3cd9-46e4-8e99-3ccb462a4e9a -> ../dev/sda2
4472 * dev_name: /dev/disk/by-uuid/5df030cf-3cd9-46e4-8e99-3ccb462a4e9a
4473 * returns: /dev/sda
4474 *
4475 * Returns underlying device name, or NULL on error or no match.
4476 *
4477 * NOTE: The returned name string must be *freed*.
4478 */
4479char *
1bbd8770 4480zfs_get_underlying_path(char *dev_name)
6078881a
TH
4481{
4482 char *name = NULL;
4483 char *tmp;
4484
8720e9e7 4485 if (dev_name == NULL)
6078881a
TH
4486 return (NULL);
4487
4488 tmp = dm_get_underlying_path(dev_name);
4489
4490 /* dev_name not a DM device, so just un-symlinkize it */
8720e9e7 4491 if (tmp == NULL)
6078881a
TH
4492 tmp = realpath(dev_name, NULL);
4493
8720e9e7
TH
4494 if (tmp != NULL) {
4495 name = zfs_strip_partition_path(tmp);
6078881a
TH
4496 free(tmp);
4497 }
4498
4499 return (name);
4500}
1bbd8770
TH
4501
4502/*
4503 * Given a dev name like "sda", return the full enclosure sysfs path to
4504 * the disk. You can also pass in the name with "/dev" prepended
4505 * to it (like /dev/sda).
4506 *
4507 * For example, disk "sda" in enclosure slot 1:
4508 * dev: "sda"
4509 * returns: "/sys/class/enclosure/1:0:3:0/Slot 1"
4510 *
4511 * 'dev' must be a non-devicemapper device.
4512 *
4513 * Returned string must be freed.
4514 */
4515char *
4516zfs_get_enclosure_sysfs_path(char *dev_name)
4517{
4518 DIR *dp = NULL;
4519 struct dirent *ep;
4520 char buf[MAXPATHLEN];
4521 char *tmp1 = NULL;
4522 char *tmp2 = NULL;
4523 char *tmp3 = NULL;
4524 char *path = NULL;
4525 size_t size;
4526 int tmpsize;
4527
8720e9e7 4528 if (dev_name == NULL)
1bbd8770
TH
4529 return (NULL);
4530
4531 /* If they preface 'dev' with a path (like "/dev") then strip it off */
4532 tmp1 = strrchr(dev_name, '/');
8720e9e7 4533 if (tmp1 != NULL)
1bbd8770
TH
4534 dev_name = tmp1 + 1; /* +1 since we want the chr after '/' */
4535
4536 tmpsize = asprintf(&tmp1, "/sys/block/%s/device", dev_name);
4537 if (tmpsize == -1 || tmp1 == NULL) {
4538 tmp1 = NULL;
4539 goto end;
4540 }
4541
4542 dp = opendir(tmp1);
4543 if (dp == NULL) {
4544 tmp1 = NULL; /* To make free() at the end a NOP */
4545 goto end;
4546 }
4547
4548 /*
4549 * Look though all sysfs entries in /sys/block/<dev>/device for
4550 * the enclosure symlink.
4551 */
4552 while ((ep = readdir(dp))) {
4553 /* Ignore everything that's not our enclosure_device link */
8720e9e7 4554 if (strstr(ep->d_name, "enclosure_device") == NULL)
1bbd8770
TH
4555 continue;
4556
4557 if (asprintf(&tmp2, "%s/%s", tmp1, ep->d_name) == -1 ||
4558 tmp2 == NULL)
4559 break;
4560
4561 size = readlink(tmp2, buf, sizeof (buf));
4562
4563 /* Did readlink fail or crop the link name? */
4564 if (size == -1 || size >= sizeof (buf)) {
4565 free(tmp2);
4566 tmp2 = NULL; /* To make free() at the end a NOP */
4567 break;
4568 }
4569
4570 /*
4571 * We got a valid link. readlink() doesn't terminate strings
4572 * so we have to do it.
4573 */
4574 buf[size] = '\0';
4575
4576 /*
4577 * Our link will look like:
4578 *
4579 * "../../../../port-11:1:2/..STUFF../enclosure/1:0:3:0/SLOT 1"
4580 *
4581 * We want to grab the "enclosure/1:0:3:0/SLOT 1" part
4582 */
4583 tmp3 = strstr(buf, "enclosure");
4584 if (tmp3 == NULL)
4585 break;
4586
4587 if (asprintf(&path, "/sys/class/%s", tmp3) == -1) {
4588 /* If asprintf() fails, 'path' is undefined */
4589 path = NULL;
4590 break;
4591 }
4592
4593 if (path == NULL)
4594 break;
4595 }
4596
4597end:
4598 free(tmp2);
4599 free(tmp1);
4600
8720e9e7 4601 if (dp != NULL)
1bbd8770
TH
4602 closedir(dp);
4603
4604 return (path);
4605}