]> git.proxmox.com Git - mirror_zfs.git/blame - lib/libzfs/libzfs_pool.c
OpenZFS 5142 - libzfs support raidz root pool (loader project)
[mirror_zfs.git] / lib / libzfs / libzfs_pool.c
CommitLineData
34dc7c2f
BB
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
0fdd8d64 23 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
428870ff 24 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
a05dfd00 25 * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
34dc7c2f
BB
26 */
27
34dc7c2f
BB
28#include <ctype.h>
29#include <errno.h>
30#include <devid.h>
34dc7c2f
BB
31#include <fcntl.h>
32#include <libintl.h>
33#include <stdio.h>
34#include <stdlib.h>
35#include <strings.h>
36#include <unistd.h>
6f1ffb06 37#include <libgen.h>
d603ed6c
BB
38#include <zone.h>
39#include <sys/stat.h>
34dc7c2f
BB
40#include <sys/efi_partition.h>
41#include <sys/vtoc.h>
42#include <sys/zfs_ioctl.h>
9babb374 43#include <dlfcn.h>
34dc7c2f
BB
44
45#include "zfs_namecheck.h"
46#include "zfs_prop.h"
47#include "libzfs_impl.h"
428870ff 48#include "zfs_comutil.h"
9ae529ec 49#include "zfeature_common.h"
34dc7c2f 50
b128c09f
BB
51static int read_efi_label(nvlist_t *config, diskaddr_t *sb);
52
572e2857
BB
53typedef struct prop_flags {
54 int create:1; /* Validate property on creation */
55 int import:1; /* Validate property on import */
56} prop_flags_t;
57
34dc7c2f
BB
58/*
59 * ====================================================================
60 * zpool property functions
61 * ====================================================================
62 */
63
64static int
65zpool_get_all_props(zpool_handle_t *zhp)
66{
13fe0198 67 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
68 libzfs_handle_t *hdl = zhp->zpool_hdl;
69
70 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
71
72 if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
73 return (-1);
74
75 while (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_PROPS, &zc) != 0) {
76 if (errno == ENOMEM) {
77 if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
78 zcmd_free_nvlists(&zc);
79 return (-1);
80 }
81 } else {
82 zcmd_free_nvlists(&zc);
83 return (-1);
84 }
85 }
86
87 if (zcmd_read_dst_nvlist(hdl, &zc, &zhp->zpool_props) != 0) {
88 zcmd_free_nvlists(&zc);
89 return (-1);
90 }
91
92 zcmd_free_nvlists(&zc);
93
94 return (0);
95}
96
97static int
98zpool_props_refresh(zpool_handle_t *zhp)
99{
100 nvlist_t *old_props;
101
102 old_props = zhp->zpool_props;
103
104 if (zpool_get_all_props(zhp) != 0)
105 return (-1);
106
107 nvlist_free(old_props);
108 return (0);
109}
110
111static char *
112zpool_get_prop_string(zpool_handle_t *zhp, zpool_prop_t prop,
113 zprop_source_t *src)
114{
115 nvlist_t *nv, *nvl;
116 uint64_t ival;
117 char *value;
118 zprop_source_t source;
119
120 nvl = zhp->zpool_props;
121 if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
122 verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &ival) == 0);
123 source = ival;
124 verify(nvlist_lookup_string(nv, ZPROP_VALUE, &value) == 0);
125 } else {
126 source = ZPROP_SRC_DEFAULT;
127 if ((value = (char *)zpool_prop_default_string(prop)) == NULL)
128 value = "-";
129 }
130
131 if (src)
132 *src = source;
133
134 return (value);
135}
136
137uint64_t
138zpool_get_prop_int(zpool_handle_t *zhp, zpool_prop_t prop, zprop_source_t *src)
139{
140 nvlist_t *nv, *nvl;
141 uint64_t value;
142 zprop_source_t source;
143
b128c09f
BB
144 if (zhp->zpool_props == NULL && zpool_get_all_props(zhp)) {
145 /*
146 * zpool_get_all_props() has most likely failed because
147 * the pool is faulted, but if all we need is the top level
148 * vdev's guid then get it from the zhp config nvlist.
149 */
150 if ((prop == ZPOOL_PROP_GUID) &&
151 (nvlist_lookup_nvlist(zhp->zpool_config,
152 ZPOOL_CONFIG_VDEV_TREE, &nv) == 0) &&
153 (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &value)
154 == 0)) {
155 return (value);
156 }
34dc7c2f 157 return (zpool_prop_default_numeric(prop));
b128c09f 158 }
34dc7c2f
BB
159
160 nvl = zhp->zpool_props;
161 if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
162 verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &value) == 0);
163 source = value;
164 verify(nvlist_lookup_uint64(nv, ZPROP_VALUE, &value) == 0);
165 } else {
166 source = ZPROP_SRC_DEFAULT;
167 value = zpool_prop_default_numeric(prop);
168 }
169
170 if (src)
171 *src = source;
172
173 return (value);
174}
175
176/*
177 * Map VDEV STATE to printed strings.
178 */
179char *
180zpool_state_to_name(vdev_state_t state, vdev_aux_t aux)
181{
182 switch (state) {
e75c13c3
BB
183 default:
184 break;
34dc7c2f
BB
185 case VDEV_STATE_CLOSED:
186 case VDEV_STATE_OFFLINE:
187 return (gettext("OFFLINE"));
188 case VDEV_STATE_REMOVED:
189 return (gettext("REMOVED"));
190 case VDEV_STATE_CANT_OPEN:
b128c09f 191 if (aux == VDEV_AUX_CORRUPT_DATA || aux == VDEV_AUX_BAD_LOG)
34dc7c2f 192 return (gettext("FAULTED"));
428870ff
BB
193 else if (aux == VDEV_AUX_SPLIT_POOL)
194 return (gettext("SPLIT"));
34dc7c2f
BB
195 else
196 return (gettext("UNAVAIL"));
197 case VDEV_STATE_FAULTED:
198 return (gettext("FAULTED"));
199 case VDEV_STATE_DEGRADED:
200 return (gettext("DEGRADED"));
201 case VDEV_STATE_HEALTHY:
202 return (gettext("ONLINE"));
203 }
204
205 return (gettext("UNKNOWN"));
206}
207
131cc95c
DK
208/*
209 * Map POOL STATE to printed strings.
210 */
211const char *
212zpool_pool_state_to_name(pool_state_t state)
213{
214 switch (state) {
215 default:
216 break;
217 case POOL_STATE_ACTIVE:
218 return (gettext("ACTIVE"));
219 case POOL_STATE_EXPORTED:
220 return (gettext("EXPORTED"));
221 case POOL_STATE_DESTROYED:
222 return (gettext("DESTROYED"));
223 case POOL_STATE_SPARE:
224 return (gettext("SPARE"));
225 case POOL_STATE_L2CACHE:
226 return (gettext("L2CACHE"));
227 case POOL_STATE_UNINITIALIZED:
228 return (gettext("UNINITIALIZED"));
229 case POOL_STATE_UNAVAIL:
230 return (gettext("UNAVAIL"));
231 case POOL_STATE_POTENTIALLY_ACTIVE:
232 return (gettext("POTENTIALLY_ACTIVE"));
233 }
234
235 return (gettext("UNKNOWN"));
236}
237
8b921f66
RE
238/*
239 * Get a zpool property value for 'prop' and return the value in
240 * a pre-allocated buffer.
241 */
242int
2a8b84b7 243zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf,
d1d7e268 244 size_t len, zprop_source_t *srctype, boolean_t literal)
34dc7c2f
BB
245{
246 uint64_t intval;
247 const char *strval;
248 zprop_source_t src = ZPROP_SRC_NONE;
249 nvlist_t *nvroot;
250 vdev_stat_t *vs;
251 uint_t vsc;
252
253 if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
d164b209
BB
254 switch (prop) {
255 case ZPOOL_PROP_NAME:
34dc7c2f 256 (void) strlcpy(buf, zpool_get_name(zhp), len);
d164b209
BB
257 break;
258
259 case ZPOOL_PROP_HEALTH:
34dc7c2f 260 (void) strlcpy(buf, "FAULTED", len);
d164b209
BB
261 break;
262
263 case ZPOOL_PROP_GUID:
264 intval = zpool_get_prop_int(zhp, prop, &src);
b8864a23 265 (void) snprintf(buf, len, "%llu", (u_longlong_t)intval);
d164b209
BB
266 break;
267
268 case ZPOOL_PROP_ALTROOT:
269 case ZPOOL_PROP_CACHEFILE:
d96eb2b1 270 case ZPOOL_PROP_COMMENT:
d164b209
BB
271 if (zhp->zpool_props != NULL ||
272 zpool_get_all_props(zhp) == 0) {
273 (void) strlcpy(buf,
274 zpool_get_prop_string(zhp, prop, &src),
275 len);
2a8b84b7 276 break;
d164b209
BB
277 }
278 /* FALLTHROUGH */
279 default:
34dc7c2f 280 (void) strlcpy(buf, "-", len);
d164b209
BB
281 break;
282 }
283
284 if (srctype != NULL)
285 *srctype = src;
34dc7c2f
BB
286 return (0);
287 }
288
289 if (zhp->zpool_props == NULL && zpool_get_all_props(zhp) &&
290 prop != ZPOOL_PROP_NAME)
291 return (-1);
292
293 switch (zpool_prop_get_type(prop)) {
294 case PROP_TYPE_STRING:
295 (void) strlcpy(buf, zpool_get_prop_string(zhp, prop, &src),
296 len);
297 break;
298
299 case PROP_TYPE_NUMBER:
300 intval = zpool_get_prop_int(zhp, prop, &src);
301
302 switch (prop) {
303 case ZPOOL_PROP_SIZE:
428870ff
BB
304 case ZPOOL_PROP_ALLOCATED:
305 case ZPOOL_PROP_FREE:
9ae529ec 306 case ZPOOL_PROP_FREEING:
fbeddd60 307 case ZPOOL_PROP_LEAKED:
df30f566 308 case ZPOOL_PROP_ASHIFT:
8b921f66
RE
309 if (literal)
310 (void) snprintf(buf, len, "%llu",
02730c33 311 (u_longlong_t)intval);
8b921f66
RE
312 else
313 (void) zfs_nicenum(intval, buf, len);
34dc7c2f
BB
314 break;
315
a05dfd00
GW
316 case ZPOOL_PROP_EXPANDSZ:
317 if (intval == 0) {
318 (void) strlcpy(buf, "-", len);
319 } else if (literal) {
320 (void) snprintf(buf, len, "%llu",
321 (u_longlong_t)intval);
322 } else {
323 (void) zfs_nicenum(intval, buf, len);
324 }
325 break;
326
34dc7c2f 327 case ZPOOL_PROP_CAPACITY:
2a8b84b7
AS
328 if (literal) {
329 (void) snprintf(buf, len, "%llu",
330 (u_longlong_t)intval);
331 } else {
332 (void) snprintf(buf, len, "%llu%%",
333 (u_longlong_t)intval);
334 }
34dc7c2f
BB
335 break;
336
1ca56e60 337 case ZPOOL_PROP_FRAGMENTATION:
338 if (intval == UINT64_MAX) {
339 (void) strlcpy(buf, "-", len);
bc2d8093
CE
340 } else if (literal) {
341 (void) snprintf(buf, len, "%llu",
342 (u_longlong_t)intval);
1ca56e60 343 } else {
344 (void) snprintf(buf, len, "%llu%%",
345 (u_longlong_t)intval);
346 }
347 break;
348
428870ff 349 case ZPOOL_PROP_DEDUPRATIO:
bc2d8093
CE
350 if (literal)
351 (void) snprintf(buf, len, "%llu.%02llu",
352 (u_longlong_t)(intval / 100),
353 (u_longlong_t)(intval % 100));
354 else
355 (void) snprintf(buf, len, "%llu.%02llux",
356 (u_longlong_t)(intval / 100),
357 (u_longlong_t)(intval % 100));
428870ff
BB
358 break;
359
34dc7c2f
BB
360 case ZPOOL_PROP_HEALTH:
361 verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
362 ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
363 verify(nvlist_lookup_uint64_array(nvroot,
428870ff
BB
364 ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc)
365 == 0);
34dc7c2f
BB
366
367 (void) strlcpy(buf, zpool_state_to_name(intval,
368 vs->vs_aux), len);
369 break;
9ae529ec
CS
370 case ZPOOL_PROP_VERSION:
371 if (intval >= SPA_VERSION_FEATURES) {
372 (void) snprintf(buf, len, "-");
373 break;
374 }
375 /* FALLTHROUGH */
34dc7c2f 376 default:
b8864a23 377 (void) snprintf(buf, len, "%llu", (u_longlong_t)intval);
34dc7c2f
BB
378 }
379 break;
380
381 case PROP_TYPE_INDEX:
382 intval = zpool_get_prop_int(zhp, prop, &src);
383 if (zpool_prop_index_to_string(prop, intval, &strval)
384 != 0)
385 return (-1);
386 (void) strlcpy(buf, strval, len);
387 break;
388
389 default:
390 abort();
391 }
392
393 if (srctype)
394 *srctype = src;
395
396 return (0);
397}
398
399/*
400 * Check if the bootfs name has the same pool name as it is set to.
401 * Assuming bootfs is a valid dataset name.
402 */
403static boolean_t
404bootfs_name_valid(const char *pool, char *bootfs)
405{
406 int len = strlen(pool);
407
b128c09f 408 if (!zfs_name_valid(bootfs, ZFS_TYPE_FILESYSTEM|ZFS_TYPE_SNAPSHOT))
34dc7c2f
BB
409 return (B_FALSE);
410
411 if (strncmp(pool, bootfs, len) == 0 &&
412 (bootfs[len] == '/' || bootfs[len] == '\0'))
413 return (B_TRUE);
414
415 return (B_FALSE);
416}
417
c372b36e 418#if defined(__sun__) || defined(__sun)
b128c09f
BB
419/*
420 * Inspect the configuration to determine if any of the devices contain
421 * an EFI label.
422 */
423static boolean_t
424pool_uses_efi(nvlist_t *config)
425{
426 nvlist_t **child;
427 uint_t c, children;
428
429 if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN,
430 &child, &children) != 0)
431 return (read_efi_label(config, NULL) >= 0);
432
433 for (c = 0; c < children; c++) {
434 if (pool_uses_efi(child[c]))
435 return (B_TRUE);
436 }
437 return (B_FALSE);
438}
c372b36e 439#endif
b128c09f 440
1bd201e7
CS
441boolean_t
442zpool_is_bootable(zpool_handle_t *zhp)
b128c09f 443{
eca7b760 444 char bootfs[ZFS_MAX_DATASET_NAME_LEN];
b128c09f
BB
445
446 return (zpool_get_prop(zhp, ZPOOL_PROP_BOOTFS, bootfs,
2a8b84b7 447 sizeof (bootfs), NULL, B_FALSE) == 0 && strncmp(bootfs, "-",
b128c09f
BB
448 sizeof (bootfs)) != 0);
449}
450
451
34dc7c2f
BB
452/*
453 * Given an nvlist of zpool properties to be set, validate that they are
454 * correct, and parse any numeric properties (index, boolean, etc) if they are
455 * specified as strings.
456 */
457static nvlist_t *
b128c09f 458zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname,
572e2857 459 nvlist_t *props, uint64_t version, prop_flags_t flags, char *errbuf)
34dc7c2f
BB
460{
461 nvpair_t *elem;
462 nvlist_t *retprops;
463 zpool_prop_t prop;
464 char *strval;
465 uint64_t intval;
d96eb2b1 466 char *slash, *check;
34dc7c2f 467 struct stat64 statbuf;
b128c09f
BB
468 zpool_handle_t *zhp;
469 nvlist_t *nvroot;
34dc7c2f
BB
470
471 if (nvlist_alloc(&retprops, NV_UNIQUE_NAME, 0) != 0) {
472 (void) no_memory(hdl);
473 return (NULL);
474 }
475
476 elem = NULL;
477 while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
478 const char *propname = nvpair_name(elem);
479
9ae529ec
CS
480 prop = zpool_name_to_prop(propname);
481 if (prop == ZPROP_INVAL && zpool_prop_feature(propname)) {
482 int err;
9ae529ec
CS
483 char *fname = strchr(propname, '@') + 1;
484
fa86b5db 485 err = zfeature_lookup_name(fname, NULL);
9ae529ec
CS
486 if (err != 0) {
487 ASSERT3U(err, ==, ENOENT);
488 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
489 "invalid feature '%s'"), fname);
490 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
491 goto error;
492 }
493
494 if (nvpair_type(elem) != DATA_TYPE_STRING) {
495 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
496 "'%s' must be a string"), propname);
497 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
498 goto error;
499 }
500
501 (void) nvpair_value_string(elem, &strval);
e4010f27 502 if (strcmp(strval, ZFS_FEATURE_ENABLED) != 0 &&
503 strcmp(strval, ZFS_FEATURE_DISABLED) != 0) {
9ae529ec
CS
504 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
505 "property '%s' can only be set to "
e4010f27 506 "'enabled' or 'disabled'"), propname);
9ae529ec
CS
507 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
508 goto error;
509 }
510
511 if (nvlist_add_uint64(retprops, propname, 0) != 0) {
512 (void) no_memory(hdl);
513 goto error;
514 }
515 continue;
516 }
517
34dc7c2f
BB
518 /*
519 * Make sure this property is valid and applies to this type.
520 */
9ae529ec 521 if (prop == ZPROP_INVAL) {
34dc7c2f
BB
522 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
523 "invalid property '%s'"), propname);
524 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
525 goto error;
526 }
527
528 if (zpool_prop_readonly(prop)) {
529 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
530 "is readonly"), propname);
531 (void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf);
532 goto error;
533 }
534
535 if (zprop_parse_value(hdl, elem, prop, ZFS_TYPE_POOL, retprops,
536 &strval, &intval, errbuf) != 0)
537 goto error;
538
539 /*
540 * Perform additional checking for specific properties.
541 */
542 switch (prop) {
e75c13c3
BB
543 default:
544 break;
34dc7c2f 545 case ZPOOL_PROP_VERSION:
9ae529ec
CS
546 if (intval < version ||
547 !SPA_VERSION_IS_SUPPORTED(intval)) {
34dc7c2f
BB
548 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
549 "property '%s' number %d is invalid."),
550 propname, intval);
551 (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
552 goto error;
553 }
554 break;
555
df30f566
CK
556 case ZPOOL_PROP_ASHIFT:
557 if (!flags.create) {
558 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
559 "property '%s' can only be set at "
560 "creation time"), propname);
561 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
562 goto error;
563 }
564
b41c9906 565 if (intval != 0 && (intval < 9 || intval > 13)) {
df30f566
CK
566 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
567 "property '%s' number %d is invalid."),
568 propname, intval);
569 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
570 goto error;
571 }
572 break;
573
34dc7c2f 574 case ZPOOL_PROP_BOOTFS:
572e2857 575 if (flags.create || flags.import) {
34dc7c2f
BB
576 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
577 "property '%s' cannot be set at creation "
578 "or import time"), propname);
579 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
580 goto error;
581 }
582
583 if (version < SPA_VERSION_BOOTFS) {
584 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
585 "pool must be upgraded to support "
586 "'%s' property"), propname);
587 (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
588 goto error;
589 }
590
591 /*
592 * bootfs property value has to be a dataset name and
593 * the dataset has to be in the same pool as it sets to.
594 */
595 if (strval[0] != '\0' && !bootfs_name_valid(poolname,
596 strval)) {
597 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
598 "is an invalid name"), strval);
599 (void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
600 goto error;
601 }
b128c09f
BB
602
603 if ((zhp = zpool_open_canfail(hdl, poolname)) == NULL) {
604 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
605 "could not open pool '%s'"), poolname);
606 (void) zfs_error(hdl, EZFS_OPENFAILED, errbuf);
607 goto error;
608 }
609 verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
610 ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
611
f783130a 612#if defined(__sun__) || defined(__sun)
b128c09f
BB
613 /*
614 * bootfs property cannot be set on a disk which has
615 * been EFI labeled.
616 */
617 if (pool_uses_efi(nvroot)) {
618 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
619 "property '%s' not supported on "
620 "EFI labeled devices"), propname);
621 (void) zfs_error(hdl, EZFS_POOL_NOTSUP, errbuf);
622 zpool_close(zhp);
623 goto error;
624 }
f783130a 625#endif
b128c09f 626 zpool_close(zhp);
34dc7c2f
BB
627 break;
628
629 case ZPOOL_PROP_ALTROOT:
572e2857 630 if (!flags.create && !flags.import) {
34dc7c2f
BB
631 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
632 "property '%s' can only be set during pool "
633 "creation or import"), propname);
634 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
635 goto error;
636 }
637
638 if (strval[0] != '/') {
639 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
640 "bad alternate root '%s'"), strval);
641 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
642 goto error;
643 }
644 break;
645
646 case ZPOOL_PROP_CACHEFILE:
647 if (strval[0] == '\0')
648 break;
649
650 if (strcmp(strval, "none") == 0)
651 break;
652
653 if (strval[0] != '/') {
654 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
655 "property '%s' must be empty, an "
656 "absolute path, or 'none'"), propname);
657 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
658 goto error;
659 }
660
661 slash = strrchr(strval, '/');
662
663 if (slash[1] == '\0' || strcmp(slash, "/.") == 0 ||
664 strcmp(slash, "/..") == 0) {
665 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
666 "'%s' is not a valid file"), strval);
667 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
668 goto error;
669 }
670
671 *slash = '\0';
672
673 if (strval[0] != '\0' &&
674 (stat64(strval, &statbuf) != 0 ||
675 !S_ISDIR(statbuf.st_mode))) {
676 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
677 "'%s' is not a valid directory"),
678 strval);
679 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
680 goto error;
681 }
682
683 *slash = '/';
684 break;
572e2857 685
d96eb2b1
DM
686 case ZPOOL_PROP_COMMENT:
687 for (check = strval; *check != '\0'; check++) {
688 if (!isprint(*check)) {
689 zfs_error_aux(hdl,
690 dgettext(TEXT_DOMAIN,
691 "comment may only have printable "
692 "characters"));
693 (void) zfs_error(hdl, EZFS_BADPROP,
694 errbuf);
695 goto error;
696 }
697 }
698 if (strlen(strval) > ZPROP_MAX_COMMENT) {
699 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
700 "comment must not exceed %d characters"),
701 ZPROP_MAX_COMMENT);
702 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
703 goto error;
704 }
705 break;
572e2857
BB
706 case ZPOOL_PROP_READONLY:
707 if (!flags.import) {
708 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
709 "property '%s' can only be set at "
710 "import time"), propname);
711 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
712 goto error;
713 }
714 break;
83e9986f
RY
715 case ZPOOL_PROP_TNAME:
716 if (!flags.create) {
717 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
718 "property '%s' can only be set at "
719 "creation time"), propname);
720 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
721 goto error;
722 }
723 break;
34dc7c2f
BB
724 }
725 }
726
727 return (retprops);
728error:
729 nvlist_free(retprops);
730 return (NULL);
731}
732
733/*
734 * Set zpool property : propname=propval.
735 */
736int
737zpool_set_prop(zpool_handle_t *zhp, const char *propname, const char *propval)
738{
13fe0198 739 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
740 int ret = -1;
741 char errbuf[1024];
742 nvlist_t *nvl = NULL;
743 nvlist_t *realprops;
744 uint64_t version;
572e2857 745 prop_flags_t flags = { 0 };
34dc7c2f
BB
746
747 (void) snprintf(errbuf, sizeof (errbuf),
748 dgettext(TEXT_DOMAIN, "cannot set property for '%s'"),
749 zhp->zpool_name);
750
34dc7c2f
BB
751 if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
752 return (no_memory(zhp->zpool_hdl));
753
754 if (nvlist_add_string(nvl, propname, propval) != 0) {
755 nvlist_free(nvl);
756 return (no_memory(zhp->zpool_hdl));
757 }
758
759 version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
b128c09f 760 if ((realprops = zpool_valid_proplist(zhp->zpool_hdl,
572e2857 761 zhp->zpool_name, nvl, version, flags, errbuf)) == NULL) {
34dc7c2f
BB
762 nvlist_free(nvl);
763 return (-1);
764 }
765
766 nvlist_free(nvl);
767 nvl = realprops;
768
769 /*
770 * Execute the corresponding ioctl() to set this property.
771 */
772 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
773
774 if (zcmd_write_src_nvlist(zhp->zpool_hdl, &zc, nvl) != 0) {
775 nvlist_free(nvl);
776 return (-1);
777 }
778
779 ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SET_PROPS, &zc);
780
781 zcmd_free_nvlists(&zc);
782 nvlist_free(nvl);
783
784 if (ret)
785 (void) zpool_standard_error(zhp->zpool_hdl, errno, errbuf);
786 else
787 (void) zpool_props_refresh(zhp);
788
789 return (ret);
790}
791
792int
793zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp)
794{
795 libzfs_handle_t *hdl = zhp->zpool_hdl;
796 zprop_list_t *entry;
797 char buf[ZFS_MAXPROPLEN];
9ae529ec
CS
798 nvlist_t *features = NULL;
799 nvpair_t *nvp;
800 zprop_list_t **last;
801 boolean_t firstexpand = (NULL == *plp);
802 int i;
34dc7c2f
BB
803
804 if (zprop_expand_list(hdl, plp, ZFS_TYPE_POOL) != 0)
805 return (-1);
806
9ae529ec
CS
807 last = plp;
808 while (*last != NULL)
809 last = &(*last)->pl_next;
810
811 if ((*plp)->pl_all)
812 features = zpool_get_features(zhp);
813
814 if ((*plp)->pl_all && firstexpand) {
815 for (i = 0; i < SPA_FEATURES; i++) {
816 zprop_list_t *entry = zfs_alloc(hdl,
817 sizeof (zprop_list_t));
818 entry->pl_prop = ZPROP_INVAL;
819 entry->pl_user_prop = zfs_asprintf(hdl, "feature@%s",
820 spa_feature_table[i].fi_uname);
821 entry->pl_width = strlen(entry->pl_user_prop);
822 entry->pl_all = B_TRUE;
823
824 *last = entry;
825 last = &entry->pl_next;
826 }
827 }
828
829 /* add any unsupported features */
830 for (nvp = nvlist_next_nvpair(features, NULL);
831 nvp != NULL; nvp = nvlist_next_nvpair(features, nvp)) {
832 char *propname;
833 boolean_t found;
834 zprop_list_t *entry;
835
836 if (zfeature_is_supported(nvpair_name(nvp)))
837 continue;
838
839 propname = zfs_asprintf(hdl, "unsupported@%s",
840 nvpair_name(nvp));
841
842 /*
843 * Before adding the property to the list make sure that no
844 * other pool already added the same property.
845 */
846 found = B_FALSE;
847 entry = *plp;
848 while (entry != NULL) {
849 if (entry->pl_user_prop != NULL &&
850 strcmp(propname, entry->pl_user_prop) == 0) {
851 found = B_TRUE;
852 break;
853 }
854 entry = entry->pl_next;
855 }
856 if (found) {
857 free(propname);
858 continue;
859 }
860
861 entry = zfs_alloc(hdl, sizeof (zprop_list_t));
862 entry->pl_prop = ZPROP_INVAL;
863 entry->pl_user_prop = propname;
864 entry->pl_width = strlen(entry->pl_user_prop);
865 entry->pl_all = B_TRUE;
866
867 *last = entry;
868 last = &entry->pl_next;
869 }
870
34dc7c2f
BB
871 for (entry = *plp; entry != NULL; entry = entry->pl_next) {
872
873 if (entry->pl_fixed)
874 continue;
875
876 if (entry->pl_prop != ZPROP_INVAL &&
877 zpool_get_prop(zhp, entry->pl_prop, buf, sizeof (buf),
2a8b84b7 878 NULL, B_FALSE) == 0) {
34dc7c2f
BB
879 if (strlen(buf) > entry->pl_width)
880 entry->pl_width = strlen(buf);
881 }
882 }
883
884 return (0);
885}
886
9ae529ec
CS
887/*
888 * Get the state for the given feature on the given ZFS pool.
889 */
890int
891zpool_prop_get_feature(zpool_handle_t *zhp, const char *propname, char *buf,
892 size_t len)
893{
894 uint64_t refcount;
895 boolean_t found = B_FALSE;
896 nvlist_t *features = zpool_get_features(zhp);
897 boolean_t supported;
898 const char *feature = strchr(propname, '@') + 1;
899
900 supported = zpool_prop_feature(propname);
901 ASSERT(supported || zpool_prop_unsupported(propname));
902
903 /*
904 * Convert from feature name to feature guid. This conversion is
4e33ba4c 905 * unnecessary for unsupported@... properties because they already
9ae529ec
CS
906 * use guids.
907 */
908 if (supported) {
909 int ret;
fa86b5db 910 spa_feature_t fid;
9ae529ec 911
fa86b5db 912 ret = zfeature_lookup_name(feature, &fid);
9ae529ec
CS
913 if (ret != 0) {
914 (void) strlcpy(buf, "-", len);
915 return (ENOTSUP);
916 }
fa86b5db 917 feature = spa_feature_table[fid].fi_guid;
9ae529ec
CS
918 }
919
920 if (nvlist_lookup_uint64(features, feature, &refcount) == 0)
921 found = B_TRUE;
922
923 if (supported) {
924 if (!found) {
925 (void) strlcpy(buf, ZFS_FEATURE_DISABLED, len);
926 } else {
927 if (refcount == 0)
928 (void) strlcpy(buf, ZFS_FEATURE_ENABLED, len);
929 else
930 (void) strlcpy(buf, ZFS_FEATURE_ACTIVE, len);
931 }
932 } else {
933 if (found) {
934 if (refcount == 0) {
935 (void) strcpy(buf, ZFS_UNSUPPORTED_INACTIVE);
936 } else {
937 (void) strcpy(buf, ZFS_UNSUPPORTED_READONLY);
938 }
939 } else {
940 (void) strlcpy(buf, "-", len);
941 return (ENOTSUP);
942 }
943 }
944
945 return (0);
946}
34dc7c2f 947
9babb374
BB
948/*
949 * Don't start the slice at the default block of 34; many storage
d603ed6c
BB
950 * devices will use a stripe width of 128k, other vendors prefer a 1m
951 * alignment. It is best to play it safe and ensure a 1m alignment
613d88ed
NB
952 * given 512B blocks. When the block size is larger by a power of 2
953 * we will still be 1m aligned. Some devices are sensitive to the
954 * partition ending alignment as well.
9babb374 955 */
613d88ed
NB
956#define NEW_START_BLOCK 2048
957#define PARTITION_END_ALIGNMENT 2048
9babb374 958
34dc7c2f
BB
959/*
960 * Validate the given pool name, optionally putting an extended error message in
961 * 'buf'.
962 */
963boolean_t
964zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool)
965{
966 namecheck_err_t why;
967 char what;
968 int ret;
969
970 ret = pool_namecheck(pool, &why, &what);
971
972 /*
973 * The rules for reserved pool names were extended at a later point.
974 * But we need to support users with existing pools that may now be
975 * invalid. So we only check for this expanded set of names during a
976 * create (or import), and only in userland.
977 */
978 if (ret == 0 && !isopen &&
979 (strncmp(pool, "mirror", 6) == 0 ||
980 strncmp(pool, "raidz", 5) == 0 ||
981 strncmp(pool, "spare", 5) == 0 ||
982 strcmp(pool, "log") == 0)) {
983 if (hdl != NULL)
984 zfs_error_aux(hdl,
985 dgettext(TEXT_DOMAIN, "name is reserved"));
986 return (B_FALSE);
987 }
988
989
990 if (ret != 0) {
991 if (hdl != NULL) {
992 switch (why) {
993 case NAME_ERR_TOOLONG:
994 zfs_error_aux(hdl,
995 dgettext(TEXT_DOMAIN, "name is too long"));
996 break;
997
998 case NAME_ERR_INVALCHAR:
999 zfs_error_aux(hdl,
1000 dgettext(TEXT_DOMAIN, "invalid character "
1001 "'%c' in pool name"), what);
1002 break;
1003
1004 case NAME_ERR_NOLETTER:
1005 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1006 "name must begin with a letter"));
1007 break;
1008
1009 case NAME_ERR_RESERVED:
1010 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1011 "name is reserved"));
1012 break;
1013
1014 case NAME_ERR_DISKLIKE:
1015 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1016 "pool name is reserved"));
1017 break;
1018
1019 case NAME_ERR_LEADING_SLASH:
1020 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1021 "leading slash in name"));
1022 break;
1023
1024 case NAME_ERR_EMPTY_COMPONENT:
1025 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1026 "empty component in name"));
1027 break;
1028
1029 case NAME_ERR_TRAILING_SLASH:
1030 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1031 "trailing slash in name"));
1032 break;
1033
1034 case NAME_ERR_MULTIPLE_AT:
1035 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1036 "multiple '@' delimiters in name"));
1037 break;
e75c13c3
BB
1038 case NAME_ERR_NO_AT:
1039 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1040 "permission set is missing '@'"));
1041 break;
34dc7c2f
BB
1042 }
1043 }
1044 return (B_FALSE);
1045 }
1046
1047 return (B_TRUE);
1048}
1049
1050/*
1051 * Open a handle to the given pool, even if the pool is currently in the FAULTED
1052 * state.
1053 */
1054zpool_handle_t *
1055zpool_open_canfail(libzfs_handle_t *hdl, const char *pool)
1056{
1057 zpool_handle_t *zhp;
1058 boolean_t missing;
1059
1060 /*
1061 * Make sure the pool name is valid.
1062 */
1063 if (!zpool_name_valid(hdl, B_TRUE, pool)) {
1064 (void) zfs_error_fmt(hdl, EZFS_INVALIDNAME,
1065 dgettext(TEXT_DOMAIN, "cannot open '%s'"),
1066 pool);
1067 return (NULL);
1068 }
1069
1070 if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
1071 return (NULL);
1072
1073 zhp->zpool_hdl = hdl;
1074 (void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
1075
1076 if (zpool_refresh_stats(zhp, &missing) != 0) {
1077 zpool_close(zhp);
1078 return (NULL);
1079 }
1080
1081 if (missing) {
1082 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no such pool"));
1083 (void) zfs_error_fmt(hdl, EZFS_NOENT,
1084 dgettext(TEXT_DOMAIN, "cannot open '%s'"), pool);
1085 zpool_close(zhp);
1086 return (NULL);
1087 }
1088
1089 return (zhp);
1090}
1091
1092/*
1093 * Like the above, but silent on error. Used when iterating over pools (because
1094 * the configuration cache may be out of date).
1095 */
1096int
1097zpool_open_silent(libzfs_handle_t *hdl, const char *pool, zpool_handle_t **ret)
1098{
1099 zpool_handle_t *zhp;
1100 boolean_t missing;
1101
1102 if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
1103 return (-1);
1104
1105 zhp->zpool_hdl = hdl;
1106 (void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
1107
1108 if (zpool_refresh_stats(zhp, &missing) != 0) {
1109 zpool_close(zhp);
1110 return (-1);
1111 }
1112
1113 if (missing) {
1114 zpool_close(zhp);
1115 *ret = NULL;
1116 return (0);
1117 }
1118
1119 *ret = zhp;
1120 return (0);
1121}
1122
1123/*
1124 * Similar to zpool_open_canfail(), but refuses to open pools in the faulted
1125 * state.
1126 */
1127zpool_handle_t *
1128zpool_open(libzfs_handle_t *hdl, const char *pool)
1129{
1130 zpool_handle_t *zhp;
1131
1132 if ((zhp = zpool_open_canfail(hdl, pool)) == NULL)
1133 return (NULL);
1134
1135 if (zhp->zpool_state == POOL_STATE_UNAVAIL) {
1136 (void) zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
1137 dgettext(TEXT_DOMAIN, "cannot open '%s'"), zhp->zpool_name);
1138 zpool_close(zhp);
1139 return (NULL);
1140 }
1141
1142 return (zhp);
1143}
1144
1145/*
1146 * Close the handle. Simply frees the memory associated with the handle.
1147 */
1148void
1149zpool_close(zpool_handle_t *zhp)
1150{
8a5fc748
JJS
1151 nvlist_free(zhp->zpool_config);
1152 nvlist_free(zhp->zpool_old_config);
1153 nvlist_free(zhp->zpool_props);
34dc7c2f
BB
1154 free(zhp);
1155}
1156
1157/*
1158 * Return the name of the pool.
1159 */
1160const char *
1161zpool_get_name(zpool_handle_t *zhp)
1162{
1163 return (zhp->zpool_name);
1164}
1165
1166
1167/*
1168 * Return the state of the pool (ACTIVE or UNAVAILABLE)
1169 */
1170int
1171zpool_get_state(zpool_handle_t *zhp)
1172{
1173 return (zhp->zpool_state);
1174}
1175
1176/*
1177 * Create the named pool, using the provided vdev list. It is assumed
1178 * that the consumer has already validated the contents of the nvlist, so we
1179 * don't have to worry about error semantics.
1180 */
1181int
1182zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
b128c09f 1183 nvlist_t *props, nvlist_t *fsprops)
34dc7c2f 1184{
13fe0198 1185 zfs_cmd_t zc = {"\0"};
b128c09f
BB
1186 nvlist_t *zc_fsprops = NULL;
1187 nvlist_t *zc_props = NULL;
34dc7c2f 1188 char msg[1024];
b128c09f 1189 int ret = -1;
34dc7c2f
BB
1190
1191 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1192 "cannot create '%s'"), pool);
1193
1194 if (!zpool_name_valid(hdl, B_FALSE, pool))
1195 return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
1196
1197 if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
1198 return (-1);
1199
b128c09f 1200 if (props) {
572e2857
BB
1201 prop_flags_t flags = { .create = B_TRUE, .import = B_FALSE };
1202
b128c09f 1203 if ((zc_props = zpool_valid_proplist(hdl, pool, props,
572e2857 1204 SPA_VERSION_1, flags, msg)) == NULL) {
b128c09f
BB
1205 goto create_failed;
1206 }
1207 }
34dc7c2f 1208
b128c09f
BB
1209 if (fsprops) {
1210 uint64_t zoned;
1211 char *zonestr;
1212
1213 zoned = ((nvlist_lookup_string(fsprops,
1214 zfs_prop_to_name(ZFS_PROP_ZONED), &zonestr) == 0) &&
1215 strcmp(zonestr, "on") == 0);
1216
82f6f6e6
JS
1217 if ((zc_fsprops = zfs_valid_proplist(hdl, ZFS_TYPE_FILESYSTEM,
1218 fsprops, zoned, NULL, NULL, msg)) == NULL) {
b128c09f
BB
1219 goto create_failed;
1220 }
1221 if (!zc_props &&
1222 (nvlist_alloc(&zc_props, NV_UNIQUE_NAME, 0) != 0)) {
1223 goto create_failed;
1224 }
1225 if (nvlist_add_nvlist(zc_props,
1226 ZPOOL_ROOTFS_PROPS, zc_fsprops) != 0) {
1227 goto create_failed;
1228 }
34dc7c2f
BB
1229 }
1230
b128c09f
BB
1231 if (zc_props && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
1232 goto create_failed;
1233
34dc7c2f
BB
1234 (void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
1235
b128c09f 1236 if ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_CREATE, &zc)) != 0) {
34dc7c2f
BB
1237
1238 zcmd_free_nvlists(&zc);
b128c09f
BB
1239 nvlist_free(zc_props);
1240 nvlist_free(zc_fsprops);
34dc7c2f
BB
1241
1242 switch (errno) {
1243 case EBUSY:
1244 /*
1245 * This can happen if the user has specified the same
1246 * device multiple times. We can't reliably detect this
1247 * until we try to add it and see we already have a
d603ed6c
BB
1248 * label. This can also happen under if the device is
1249 * part of an active md or lvm device.
34dc7c2f
BB
1250 */
1251 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
d1d7e268
MK
1252 "one or more vdevs refer to the same device, or "
1253 "one of\nthe devices is part of an active md or "
1254 "lvm device"));
34dc7c2f
BB
1255 return (zfs_error(hdl, EZFS_BADDEV, msg));
1256
82f6f6e6
JS
1257 case ERANGE:
1258 /*
1259 * This happens if the record size is smaller or larger
1260 * than the allowed size range, or not a power of 2.
1261 *
1262 * NOTE: although zfs_valid_proplist is called earlier,
1263 * this case may have slipped through since the
1264 * pool does not exist yet and it is therefore
1265 * impossible to read properties e.g. max blocksize
1266 * from the pool.
1267 */
1268 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1269 "record size invalid"));
1270 return (zfs_error(hdl, EZFS_BADPROP, msg));
1271
34dc7c2f
BB
1272 case EOVERFLOW:
1273 /*
1274 * This occurs when one of the devices is below
1275 * SPA_MINDEVSIZE. Unfortunately, we can't detect which
1276 * device was the problem device since there's no
1277 * reliable way to determine device size from userland.
1278 */
1279 {
1280 char buf[64];
1281
1282 zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
1283
1284 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1285 "one or more devices is less than the "
1286 "minimum size (%s)"), buf);
1287 }
1288 return (zfs_error(hdl, EZFS_BADDEV, msg));
1289
1290 case ENOSPC:
1291 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1292 "one or more devices is out of space"));
1293 return (zfs_error(hdl, EZFS_BADDEV, msg));
1294
1295 case ENOTBLK:
1296 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1297 "cache device must be a disk or disk slice"));
1298 return (zfs_error(hdl, EZFS_BADDEV, msg));
1299
1300 default:
1301 return (zpool_standard_error(hdl, errno, msg));
1302 }
1303 }
1304
b128c09f 1305create_failed:
34dc7c2f 1306 zcmd_free_nvlists(&zc);
b128c09f
BB
1307 nvlist_free(zc_props);
1308 nvlist_free(zc_fsprops);
1309 return (ret);
34dc7c2f
BB
1310}
1311
1312/*
1313 * Destroy the given pool. It is up to the caller to ensure that there are no
1314 * datasets left in the pool.
1315 */
1316int
6f1ffb06 1317zpool_destroy(zpool_handle_t *zhp, const char *log_str)
34dc7c2f 1318{
13fe0198 1319 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
1320 zfs_handle_t *zfp = NULL;
1321 libzfs_handle_t *hdl = zhp->zpool_hdl;
1322 char msg[1024];
1323
1324 if (zhp->zpool_state == POOL_STATE_ACTIVE &&
572e2857 1325 (zfp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_FILESYSTEM)) == NULL)
34dc7c2f
BB
1326 return (-1);
1327
34dc7c2f 1328 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
6f1ffb06 1329 zc.zc_history = (uint64_t)(uintptr_t)log_str;
34dc7c2f 1330
572e2857 1331 if (zfs_ioctl(hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
34dc7c2f
BB
1332 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1333 "cannot destroy '%s'"), zhp->zpool_name);
1334
1335 if (errno == EROFS) {
1336 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1337 "one or more devices is read only"));
1338 (void) zfs_error(hdl, EZFS_BADDEV, msg);
1339 } else {
1340 (void) zpool_standard_error(hdl, errno, msg);
1341 }
1342
1343 if (zfp)
1344 zfs_close(zfp);
1345 return (-1);
1346 }
1347
1348 if (zfp) {
1349 remove_mountpoint(zfp);
1350 zfs_close(zfp);
1351 }
1352
1353 return (0);
1354}
1355
1356/*
1357 * Add the given vdevs to the pool. The caller must have already performed the
1358 * necessary verification to ensure that the vdev specification is well-formed.
1359 */
1360int
1361zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
1362{
13fe0198 1363 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
1364 int ret;
1365 libzfs_handle_t *hdl = zhp->zpool_hdl;
1366 char msg[1024];
1367 nvlist_t **spares, **l2cache;
1368 uint_t nspares, nl2cache;
1369
1370 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1371 "cannot add to '%s'"), zhp->zpool_name);
1372
1373 if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1374 SPA_VERSION_SPARES &&
1375 nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
1376 &spares, &nspares) == 0) {
1377 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1378 "upgraded to add hot spares"));
1379 return (zfs_error(hdl, EZFS_BADVERSION, msg));
1380 }
1381
c372b36e 1382#if defined(__sun__) || defined(__sun)
1bd201e7 1383 if (zpool_is_bootable(zhp) && nvlist_lookup_nvlist_array(nvroot,
b128c09f
BB
1384 ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0) {
1385 uint64_t s;
1386
1387 for (s = 0; s < nspares; s++) {
1388 char *path;
1389
1390 if (nvlist_lookup_string(spares[s], ZPOOL_CONFIG_PATH,
1391 &path) == 0 && pool_uses_efi(spares[s])) {
1392 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1393 "device '%s' contains an EFI label and "
1394 "cannot be used on root pools."),
d2f3e292 1395 zpool_vdev_name(hdl, NULL, spares[s], 0));
b128c09f
BB
1396 return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg));
1397 }
1398 }
1399 }
c372b36e 1400#endif
b128c09f 1401
34dc7c2f
BB
1402 if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1403 SPA_VERSION_L2CACHE &&
1404 nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
1405 &l2cache, &nl2cache) == 0) {
1406 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1407 "upgraded to add cache devices"));
1408 return (zfs_error(hdl, EZFS_BADVERSION, msg));
1409 }
1410
1411 if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
1412 return (-1);
1413 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1414
572e2857 1415 if (zfs_ioctl(hdl, ZFS_IOC_VDEV_ADD, &zc) != 0) {
34dc7c2f
BB
1416 switch (errno) {
1417 case EBUSY:
1418 /*
1419 * This can happen if the user has specified the same
1420 * device multiple times. We can't reliably detect this
1421 * until we try to add it and see we already have a
1422 * label.
1423 */
1424 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1425 "one or more vdevs refer to the same device"));
1426 (void) zfs_error(hdl, EZFS_BADDEV, msg);
1427 break;
1428
1429 case EOVERFLOW:
1430 /*
1431 * This occurrs when one of the devices is below
1432 * SPA_MINDEVSIZE. Unfortunately, we can't detect which
1433 * device was the problem device since there's no
1434 * reliable way to determine device size from userland.
1435 */
1436 {
1437 char buf[64];
1438
1439 zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
1440
1441 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1442 "device is less than the minimum "
1443 "size (%s)"), buf);
1444 }
1445 (void) zfs_error(hdl, EZFS_BADDEV, msg);
1446 break;
1447
1448 case ENOTSUP:
1449 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1450 "pool must be upgraded to add these vdevs"));
1451 (void) zfs_error(hdl, EZFS_BADVERSION, msg);
1452 break;
1453
34dc7c2f
BB
1454 case ENOTBLK:
1455 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1456 "cache device must be a disk or disk slice"));
1457 (void) zfs_error(hdl, EZFS_BADDEV, msg);
1458 break;
1459
1460 default:
1461 (void) zpool_standard_error(hdl, errno, msg);
1462 }
1463
1464 ret = -1;
1465 } else {
1466 ret = 0;
1467 }
1468
1469 zcmd_free_nvlists(&zc);
1470
1471 return (ret);
1472}
1473
1474/*
1475 * Exports the pool from the system. The caller must ensure that there are no
1476 * mounted datasets in the pool.
1477 */
6f1ffb06
MA
1478static int
1479zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce,
1480 const char *log_str)
34dc7c2f 1481{
13fe0198 1482 zfs_cmd_t zc = {"\0"};
b128c09f 1483 char msg[1024];
34dc7c2f 1484
b128c09f
BB
1485 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1486 "cannot export '%s'"), zhp->zpool_name);
1487
34dc7c2f 1488 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
b128c09f 1489 zc.zc_cookie = force;
fb5f0bc8 1490 zc.zc_guid = hardforce;
6f1ffb06 1491 zc.zc_history = (uint64_t)(uintptr_t)log_str;
b128c09f
BB
1492
1493 if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_EXPORT, &zc) != 0) {
1494 switch (errno) {
1495 case EXDEV:
1496 zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN,
1497 "use '-f' to override the following errors:\n"
1498 "'%s' has an active shared spare which could be"
1499 " used by other pools once '%s' is exported."),
1500 zhp->zpool_name, zhp->zpool_name);
1501 return (zfs_error(zhp->zpool_hdl, EZFS_ACTIVE_SPARE,
1502 msg));
1503 default:
1504 return (zpool_standard_error_fmt(zhp->zpool_hdl, errno,
1505 msg));
1506 }
1507 }
34dc7c2f 1508
34dc7c2f
BB
1509 return (0);
1510}
1511
fb5f0bc8 1512int
6f1ffb06 1513zpool_export(zpool_handle_t *zhp, boolean_t force, const char *log_str)
fb5f0bc8 1514{
6f1ffb06 1515 return (zpool_export_common(zhp, force, B_FALSE, log_str));
fb5f0bc8
BB
1516}
1517
1518int
6f1ffb06 1519zpool_export_force(zpool_handle_t *zhp, const char *log_str)
fb5f0bc8 1520{
6f1ffb06 1521 return (zpool_export_common(zhp, B_TRUE, B_TRUE, log_str));
fb5f0bc8
BB
1522}
1523
428870ff
BB
1524static void
1525zpool_rewind_exclaim(libzfs_handle_t *hdl, const char *name, boolean_t dryrun,
572e2857 1526 nvlist_t *config)
428870ff 1527{
572e2857 1528 nvlist_t *nv = NULL;
428870ff
BB
1529 uint64_t rewindto;
1530 int64_t loss = -1;
1531 struct tm t;
1532 char timestr[128];
1533
572e2857
BB
1534 if (!hdl->libzfs_printerr || config == NULL)
1535 return;
1536
9ae529ec
CS
1537 if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 ||
1538 nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_REWIND_INFO, &nv) != 0) {
428870ff 1539 return;
9ae529ec 1540 }
428870ff 1541
572e2857 1542 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
428870ff 1543 return;
572e2857 1544 (void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
428870ff
BB
1545
1546 if (localtime_r((time_t *)&rewindto, &t) != NULL &&
b8864a23 1547 strftime(timestr, 128, "%c", &t) != 0) {
428870ff
BB
1548 if (dryrun) {
1549 (void) printf(dgettext(TEXT_DOMAIN,
1550 "Would be able to return %s "
1551 "to its state as of %s.\n"),
1552 name, timestr);
1553 } else {
1554 (void) printf(dgettext(TEXT_DOMAIN,
1555 "Pool %s returned to its state as of %s.\n"),
1556 name, timestr);
1557 }
1558 if (loss > 120) {
1559 (void) printf(dgettext(TEXT_DOMAIN,
1560 "%s approximately %lld "),
1561 dryrun ? "Would discard" : "Discarded",
b8864a23 1562 ((longlong_t)loss + 30) / 60);
428870ff
BB
1563 (void) printf(dgettext(TEXT_DOMAIN,
1564 "minutes of transactions.\n"));
1565 } else if (loss > 0) {
1566 (void) printf(dgettext(TEXT_DOMAIN,
1567 "%s approximately %lld "),
b8864a23
BB
1568 dryrun ? "Would discard" : "Discarded",
1569 (longlong_t)loss);
428870ff
BB
1570 (void) printf(dgettext(TEXT_DOMAIN,
1571 "seconds of transactions.\n"));
1572 }
1573 }
1574}
1575
1576void
1577zpool_explain_recover(libzfs_handle_t *hdl, const char *name, int reason,
1578 nvlist_t *config)
1579{
572e2857 1580 nvlist_t *nv = NULL;
428870ff
BB
1581 int64_t loss = -1;
1582 uint64_t edata = UINT64_MAX;
1583 uint64_t rewindto;
1584 struct tm t;
1585 char timestr[128];
1586
1587 if (!hdl->libzfs_printerr)
1588 return;
1589
1590 if (reason >= 0)
1591 (void) printf(dgettext(TEXT_DOMAIN, "action: "));
1592 else
1593 (void) printf(dgettext(TEXT_DOMAIN, "\t"));
1594
1595 /* All attempted rewinds failed if ZPOOL_CONFIG_LOAD_TIME missing */
572e2857 1596 if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 ||
9ae529ec 1597 nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_REWIND_INFO, &nv) != 0 ||
572e2857 1598 nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
428870ff
BB
1599 goto no_info;
1600
572e2857
BB
1601 (void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
1602 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_DATA_ERRORS,
428870ff
BB
1603 &edata);
1604
1605 (void) printf(dgettext(TEXT_DOMAIN,
1606 "Recovery is possible, but will result in some data loss.\n"));
1607
1608 if (localtime_r((time_t *)&rewindto, &t) != NULL &&
b8864a23 1609 strftime(timestr, 128, "%c", &t) != 0) {
428870ff
BB
1610 (void) printf(dgettext(TEXT_DOMAIN,
1611 "\tReturning the pool to its state as of %s\n"
1612 "\tshould correct the problem. "),
1613 timestr);
1614 } else {
1615 (void) printf(dgettext(TEXT_DOMAIN,
1616 "\tReverting the pool to an earlier state "
1617 "should correct the problem.\n\t"));
1618 }
1619
1620 if (loss > 120) {
1621 (void) printf(dgettext(TEXT_DOMAIN,
1622 "Approximately %lld minutes of data\n"
b8864a23
BB
1623 "\tmust be discarded, irreversibly. "),
1624 ((longlong_t)loss + 30) / 60);
428870ff
BB
1625 } else if (loss > 0) {
1626 (void) printf(dgettext(TEXT_DOMAIN,
1627 "Approximately %lld seconds of data\n"
b8864a23
BB
1628 "\tmust be discarded, irreversibly. "),
1629 (longlong_t)loss);
428870ff
BB
1630 }
1631 if (edata != 0 && edata != UINT64_MAX) {
1632 if (edata == 1) {
1633 (void) printf(dgettext(TEXT_DOMAIN,
1634 "After rewind, at least\n"
1635 "\tone persistent user-data error will remain. "));
1636 } else {
1637 (void) printf(dgettext(TEXT_DOMAIN,
1638 "After rewind, several\n"
1639 "\tpersistent user-data errors will remain. "));
1640 }
1641 }
1642 (void) printf(dgettext(TEXT_DOMAIN,
1643 "Recovery can be attempted\n\tby executing 'zpool %s -F %s'. "),
1644 reason >= 0 ? "clear" : "import", name);
1645
1646 (void) printf(dgettext(TEXT_DOMAIN,
1647 "A scrub of the pool\n"
1648 "\tis strongly recommended after recovery.\n"));
1649 return;
1650
1651no_info:
1652 (void) printf(dgettext(TEXT_DOMAIN,
1653 "Destroy and re-create the pool from\n\ta backup source.\n"));
1654}
1655
34dc7c2f
BB
1656/*
1657 * zpool_import() is a contracted interface. Should be kept the same
1658 * if possible.
1659 *
1660 * Applications should use zpool_import_props() to import a pool with
1661 * new properties value to be set.
1662 */
1663int
1664zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1665 char *altroot)
1666{
1667 nvlist_t *props = NULL;
1668 int ret;
1669
1670 if (altroot != NULL) {
1671 if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) {
1672 return (zfs_error_fmt(hdl, EZFS_NOMEM,
1673 dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1674 newname));
1675 }
1676
1677 if (nvlist_add_string(props,
fb5f0bc8
BB
1678 zpool_prop_to_name(ZPOOL_PROP_ALTROOT), altroot) != 0 ||
1679 nvlist_add_string(props,
1680 zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), "none") != 0) {
34dc7c2f
BB
1681 nvlist_free(props);
1682 return (zfs_error_fmt(hdl, EZFS_NOMEM,
1683 dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1684 newname));
1685 }
1686 }
1687
572e2857
BB
1688 ret = zpool_import_props(hdl, config, newname, props,
1689 ZFS_IMPORT_NORMAL);
8a5fc748 1690 nvlist_free(props);
34dc7c2f
BB
1691 return (ret);
1692}
1693
572e2857
BB
1694static void
1695print_vdev_tree(libzfs_handle_t *hdl, const char *name, nvlist_t *nv,
1696 int indent)
1697{
1698 nvlist_t **child;
1699 uint_t c, children;
1700 char *vname;
1701 uint64_t is_log = 0;
1702
1703 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG,
1704 &is_log);
1705
1706 if (name != NULL)
1707 (void) printf("\t%*s%s%s\n", indent, "", name,
1708 is_log ? " [log]" : "");
1709
1710 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
1711 &child, &children) != 0)
1712 return;
1713
1714 for (c = 0; c < children; c++) {
d2f3e292 1715 vname = zpool_vdev_name(hdl, NULL, child[c], VDEV_NAME_TYPE_ID);
572e2857
BB
1716 print_vdev_tree(hdl, vname, child[c], indent + 2);
1717 free(vname);
1718 }
1719}
1720
9ae529ec
CS
1721void
1722zpool_print_unsup_feat(nvlist_t *config)
1723{
1724 nvlist_t *nvinfo, *unsup_feat;
1725 nvpair_t *nvp;
1726
1727 verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nvinfo) ==
1728 0);
1729 verify(nvlist_lookup_nvlist(nvinfo, ZPOOL_CONFIG_UNSUP_FEAT,
1730 &unsup_feat) == 0);
1731
1732 for (nvp = nvlist_next_nvpair(unsup_feat, NULL); nvp != NULL;
1733 nvp = nvlist_next_nvpair(unsup_feat, nvp)) {
1734 char *desc;
1735
1736 verify(nvpair_type(nvp) == DATA_TYPE_STRING);
1737 verify(nvpair_value_string(nvp, &desc) == 0);
1738
1739 if (strlen(desc) > 0)
1740 (void) printf("\t%s (%s)\n", nvpair_name(nvp), desc);
1741 else
1742 (void) printf("\t%s\n", nvpair_name(nvp));
1743 }
1744}
1745
34dc7c2f
BB
1746/*
1747 * Import the given pool using the known configuration and a list of
1748 * properties to be set. The configuration should have come from
1749 * zpool_find_import(). The 'newname' parameters control whether the pool
1750 * is imported with a different name.
1751 */
1752int
1753zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
572e2857 1754 nvlist_t *props, int flags)
34dc7c2f 1755{
13fe0198 1756 zfs_cmd_t zc = {"\0"};
428870ff 1757 zpool_rewind_policy_t policy;
572e2857
BB
1758 nvlist_t *nv = NULL;
1759 nvlist_t *nvinfo = NULL;
1760 nvlist_t *missing = NULL;
34dc7c2f
BB
1761 char *thename;
1762 char *origname;
1763 int ret;
572e2857 1764 int error = 0;
34dc7c2f
BB
1765 char errbuf[1024];
1766
1767 verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
1768 &origname) == 0);
1769
1770 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1771 "cannot import pool '%s'"), origname);
1772
1773 if (newname != NULL) {
1774 if (!zpool_name_valid(hdl, B_FALSE, newname))
1775 return (zfs_error_fmt(hdl, EZFS_INVALIDNAME,
1776 dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1777 newname));
1778 thename = (char *)newname;
1779 } else {
1780 thename = origname;
1781 }
1782
0fdd8d64 1783 if (props != NULL) {
34dc7c2f 1784 uint64_t version;
572e2857 1785 prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
34dc7c2f
BB
1786
1787 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
1788 &version) == 0);
1789
b128c09f 1790 if ((props = zpool_valid_proplist(hdl, origname,
0fdd8d64 1791 props, version, flags, errbuf)) == NULL)
34dc7c2f 1792 return (-1);
0fdd8d64 1793 if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
34dc7c2f
BB
1794 nvlist_free(props);
1795 return (-1);
1796 }
0fdd8d64 1797 nvlist_free(props);
34dc7c2f
BB
1798 }
1799
1800 (void) strlcpy(zc.zc_name, thename, sizeof (zc.zc_name));
1801
1802 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
1803 &zc.zc_guid) == 0);
1804
1805 if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0) {
0fdd8d64 1806 zcmd_free_nvlists(&zc);
34dc7c2f
BB
1807 return (-1);
1808 }
572e2857 1809 if (zcmd_alloc_dst_nvlist(hdl, &zc, zc.zc_nvlist_conf_size * 2) != 0) {
0fdd8d64 1810 zcmd_free_nvlists(&zc);
428870ff
BB
1811 return (-1);
1812 }
34dc7c2f 1813
572e2857
BB
1814 zc.zc_cookie = flags;
1815 while ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_IMPORT, &zc)) != 0 &&
1816 errno == ENOMEM) {
1817 if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
1818 zcmd_free_nvlists(&zc);
1819 return (-1);
1820 }
1821 }
1822 if (ret != 0)
1823 error = errno;
1824
1825 (void) zcmd_read_dst_nvlist(hdl, &zc, &nv);
0fdd8d64
MT
1826
1827 zcmd_free_nvlists(&zc);
1828
572e2857
BB
1829 zpool_get_rewind_policy(config, &policy);
1830
1831 if (error) {
34dc7c2f 1832 char desc[1024];
428870ff 1833
428870ff
BB
1834 /*
1835 * Dry-run failed, but we print out what success
1836 * looks like if we found a best txg
1837 */
572e2857 1838 if (policy.zrp_request & ZPOOL_TRY_REWIND) {
428870ff 1839 zpool_rewind_exclaim(hdl, newname ? origname : thename,
572e2857
BB
1840 B_TRUE, nv);
1841 nvlist_free(nv);
428870ff
BB
1842 return (-1);
1843 }
1844
34dc7c2f
BB
1845 if (newname == NULL)
1846 (void) snprintf(desc, sizeof (desc),
1847 dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1848 thename);
1849 else
1850 (void) snprintf(desc, sizeof (desc),
1851 dgettext(TEXT_DOMAIN, "cannot import '%s' as '%s'"),
1852 origname, thename);
1853
572e2857 1854 switch (error) {
34dc7c2f 1855 case ENOTSUP:
9ae529ec
CS
1856 if (nv != NULL && nvlist_lookup_nvlist(nv,
1857 ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 &&
1858 nvlist_exists(nvinfo, ZPOOL_CONFIG_UNSUP_FEAT)) {
1859 (void) printf(dgettext(TEXT_DOMAIN, "This "
1860 "pool uses the following feature(s) not "
1861 "supported by this system:\n"));
1862 zpool_print_unsup_feat(nv);
1863 if (nvlist_exists(nvinfo,
1864 ZPOOL_CONFIG_CAN_RDONLY)) {
1865 (void) printf(dgettext(TEXT_DOMAIN,
1866 "All unsupported features are only "
1867 "required for writing to the pool."
1868 "\nThe pool can be imported using "
1869 "'-o readonly=on'.\n"));
1870 }
1871 }
34dc7c2f
BB
1872 /*
1873 * Unsupported version.
1874 */
1875 (void) zfs_error(hdl, EZFS_BADVERSION, desc);
1876 break;
1877
1878 case EINVAL:
1879 (void) zfs_error(hdl, EZFS_INVALCONFIG, desc);
1880 break;
1881
428870ff
BB
1882 case EROFS:
1883 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1884 "one or more devices is read only"));
1885 (void) zfs_error(hdl, EZFS_BADDEV, desc);
1886 break;
1887
572e2857
BB
1888 case ENXIO:
1889 if (nv && nvlist_lookup_nvlist(nv,
1890 ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 &&
1891 nvlist_lookup_nvlist(nvinfo,
1892 ZPOOL_CONFIG_MISSING_DEVICES, &missing) == 0) {
1893 (void) printf(dgettext(TEXT_DOMAIN,
1894 "The devices below are missing, use "
1895 "'-m' to import the pool anyway:\n"));
1896 print_vdev_tree(hdl, NULL, missing, 2);
1897 (void) printf("\n");
1898 }
1899 (void) zpool_standard_error(hdl, error, desc);
1900 break;
1901
1902 case EEXIST:
1903 (void) zpool_standard_error(hdl, error, desc);
1904 break;
1905
abe5b8fb
BB
1906 case EBUSY:
1907 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1908 "one or more devices are already in use\n"));
1909 (void) zfs_error(hdl, EZFS_BADDEV, desc);
1910 break;
d1d19c78
PD
1911 case ENAMETOOLONG:
1912 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1913 "new name of at least one dataset is longer than "
1914 "the maximum allowable length"));
1915 (void) zfs_error(hdl, EZFS_NAMETOOLONG, desc);
1916 break;
34dc7c2f 1917 default:
572e2857 1918 (void) zpool_standard_error(hdl, error, desc);
428870ff 1919 zpool_explain_recover(hdl,
572e2857 1920 newname ? origname : thename, -error, nv);
428870ff 1921 break;
34dc7c2f
BB
1922 }
1923
572e2857 1924 nvlist_free(nv);
34dc7c2f
BB
1925 ret = -1;
1926 } else {
1927 zpool_handle_t *zhp;
1928
1929 /*
1930 * This should never fail, but play it safe anyway.
1931 */
428870ff 1932 if (zpool_open_silent(hdl, thename, &zhp) != 0)
34dc7c2f 1933 ret = -1;
428870ff 1934 else if (zhp != NULL)
34dc7c2f 1935 zpool_close(zhp);
428870ff
BB
1936 if (policy.zrp_request &
1937 (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
1938 zpool_rewind_exclaim(hdl, newname ? origname : thename,
572e2857 1939 ((policy.zrp_request & ZPOOL_TRY_REWIND) != 0), nv);
34dc7c2f 1940 }
572e2857 1941 nvlist_free(nv);
428870ff 1942 return (0);
34dc7c2f
BB
1943 }
1944
34dc7c2f
BB
1945 return (ret);
1946}
1947
1948/*
428870ff 1949 * Scan the pool.
34dc7c2f
BB
1950 */
1951int
428870ff 1952zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func)
34dc7c2f 1953{
13fe0198 1954 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
1955 char msg[1024];
1956 libzfs_handle_t *hdl = zhp->zpool_hdl;
1957
1958 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
428870ff 1959 zc.zc_cookie = func;
34dc7c2f 1960
572e2857 1961 if (zfs_ioctl(hdl, ZFS_IOC_POOL_SCAN, &zc) == 0 ||
428870ff 1962 (errno == ENOENT && func != POOL_SCAN_NONE))
34dc7c2f
BB
1963 return (0);
1964
428870ff
BB
1965 if (func == POOL_SCAN_SCRUB) {
1966 (void) snprintf(msg, sizeof (msg),
1967 dgettext(TEXT_DOMAIN, "cannot scrub %s"), zc.zc_name);
1968 } else if (func == POOL_SCAN_NONE) {
1969 (void) snprintf(msg, sizeof (msg),
1970 dgettext(TEXT_DOMAIN, "cannot cancel scrubbing %s"),
1971 zc.zc_name);
1972 } else {
1973 assert(!"unexpected result");
1974 }
34dc7c2f 1975
428870ff
BB
1976 if (errno == EBUSY) {
1977 nvlist_t *nvroot;
1978 pool_scan_stat_t *ps = NULL;
1979 uint_t psc;
1980
1981 verify(nvlist_lookup_nvlist(zhp->zpool_config,
1982 ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
1983 (void) nvlist_lookup_uint64_array(nvroot,
1984 ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &psc);
1985 if (ps && ps->pss_func == POOL_SCAN_SCRUB)
1986 return (zfs_error(hdl, EZFS_SCRUBBING, msg));
1987 else
1988 return (zfs_error(hdl, EZFS_RESILVERING, msg));
1989 } else if (errno == ENOENT) {
1990 return (zfs_error(hdl, EZFS_NO_SCRUB, msg));
1991 } else {
34dc7c2f 1992 return (zpool_standard_error(hdl, errno, msg));
428870ff
BB
1993 }
1994}
1995
34dc7c2f 1996/*
9babb374
BB
1997 * Find a vdev that matches the search criteria specified. We use the
1998 * the nvpair name to determine how we should look for the device.
34dc7c2f
BB
1999 * 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL
2000 * spare; but FALSE if its an INUSE spare.
2001 */
2002static nvlist_t *
9babb374
BB
2003vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare,
2004 boolean_t *l2cache, boolean_t *log)
34dc7c2f
BB
2005{
2006 uint_t c, children;
2007 nvlist_t **child;
34dc7c2f 2008 nvlist_t *ret;
b128c09f 2009 uint64_t is_log;
9babb374
BB
2010 char *srchkey;
2011 nvpair_t *pair = nvlist_next_nvpair(search, NULL);
2012
2013 /* Nothing to look for */
2014 if (search == NULL || pair == NULL)
2015 return (NULL);
2016
2017 /* Obtain the key we will use to search */
2018 srchkey = nvpair_name(pair);
2019
2020 switch (nvpair_type(pair)) {
572e2857 2021 case DATA_TYPE_UINT64:
9babb374 2022 if (strcmp(srchkey, ZPOOL_CONFIG_GUID) == 0) {
572e2857
BB
2023 uint64_t srchval, theguid;
2024
2025 verify(nvpair_value_uint64(pair, &srchval) == 0);
2026 verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
2027 &theguid) == 0);
2028 if (theguid == srchval)
2029 return (nv);
9babb374
BB
2030 }
2031 break;
9babb374
BB
2032
2033 case DATA_TYPE_STRING: {
2034 char *srchval, *val;
2035
2036 verify(nvpair_value_string(pair, &srchval) == 0);
2037 if (nvlist_lookup_string(nv, srchkey, &val) != 0)
2038 break;
34dc7c2f 2039
9babb374 2040 /*
428870ff
BB
2041 * Search for the requested value. Special cases:
2042 *
eac47204
BB
2043 * - ZPOOL_CONFIG_PATH for whole disk entries. These end in
2044 * "-part1", or "p1". The suffix is hidden from the user,
2045 * but included in the string, so this matches around it.
2046 * - ZPOOL_CONFIG_PATH for short names zfs_strcmp_shortname()
2047 * is used to check all possible expanded paths.
428870ff
BB
2048 * - looking for a top-level vdev name (i.e. ZPOOL_CONFIG_TYPE).
2049 *
2050 * Otherwise, all other searches are simple string compares.
9babb374 2051 */
a2c6816c 2052 if (strcmp(srchkey, ZPOOL_CONFIG_PATH) == 0) {
9babb374
BB
2053 uint64_t wholedisk = 0;
2054
2055 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
2056 &wholedisk);
eac47204
BB
2057 if (zfs_strcmp_pathname(srchval, val, wholedisk) == 0)
2058 return (nv);
428870ff 2059
428870ff
BB
2060 } else if (strcmp(srchkey, ZPOOL_CONFIG_TYPE) == 0 && val) {
2061 char *type, *idx, *end, *p;
2062 uint64_t id, vdev_id;
2063
2064 /*
2065 * Determine our vdev type, keeping in mind
2066 * that the srchval is composed of a type and
2067 * vdev id pair (i.e. mirror-4).
2068 */
2069 if ((type = strdup(srchval)) == NULL)
2070 return (NULL);
2071
2072 if ((p = strrchr(type, '-')) == NULL) {
2073 free(type);
2074 break;
2075 }
2076 idx = p + 1;
2077 *p = '\0';
2078
2079 /*
2080 * If the types don't match then keep looking.
2081 */
2082 if (strncmp(val, type, strlen(val)) != 0) {
2083 free(type);
2084 break;
2085 }
2086
2087 verify(strncmp(type, VDEV_TYPE_RAIDZ,
2088 strlen(VDEV_TYPE_RAIDZ)) == 0 ||
2089 strncmp(type, VDEV_TYPE_MIRROR,
2090 strlen(VDEV_TYPE_MIRROR)) == 0);
2091 verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
2092 &id) == 0);
2093
2094 errno = 0;
2095 vdev_id = strtoull(idx, &end, 10);
2096
2097 free(type);
2098 if (errno != 0)
2099 return (NULL);
2100
2101 /*
2102 * Now verify that we have the correct vdev id.
2103 */
2104 if (vdev_id == id)
2105 return (nv);
9babb374 2106 }
34dc7c2f 2107
34dc7c2f 2108 /*
9babb374 2109 * Common case
34dc7c2f 2110 */
9babb374 2111 if (strcmp(srchval, val) == 0)
34dc7c2f 2112 return (nv);
9babb374
BB
2113 break;
2114 }
2115
2116 default:
2117 break;
34dc7c2f
BB
2118 }
2119
2120 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
2121 &child, &children) != 0)
2122 return (NULL);
2123
b128c09f 2124 for (c = 0; c < children; c++) {
9babb374 2125 if ((ret = vdev_to_nvlist_iter(child[c], search,
b128c09f
BB
2126 avail_spare, l2cache, NULL)) != NULL) {
2127 /*
2128 * The 'is_log' value is only set for the toplevel
2129 * vdev, not the leaf vdevs. So we always lookup the
2130 * log device from the root of the vdev tree (where
2131 * 'log' is non-NULL).
2132 */
2133 if (log != NULL &&
2134 nvlist_lookup_uint64(child[c],
2135 ZPOOL_CONFIG_IS_LOG, &is_log) == 0 &&
2136 is_log) {
2137 *log = B_TRUE;
2138 }
34dc7c2f 2139 return (ret);
b128c09f
BB
2140 }
2141 }
34dc7c2f
BB
2142
2143 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
2144 &child, &children) == 0) {
2145 for (c = 0; c < children; c++) {
9babb374 2146 if ((ret = vdev_to_nvlist_iter(child[c], search,
b128c09f 2147 avail_spare, l2cache, NULL)) != NULL) {
34dc7c2f
BB
2148 *avail_spare = B_TRUE;
2149 return (ret);
2150 }
2151 }
2152 }
2153
2154 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
2155 &child, &children) == 0) {
2156 for (c = 0; c < children; c++) {
9babb374 2157 if ((ret = vdev_to_nvlist_iter(child[c], search,
b128c09f 2158 avail_spare, l2cache, NULL)) != NULL) {
34dc7c2f
BB
2159 *l2cache = B_TRUE;
2160 return (ret);
2161 }
2162 }
2163 }
2164
2165 return (NULL);
2166}
2167
9babb374
BB
2168/*
2169 * Given a physical path (minus the "/devices" prefix), find the
2170 * associated vdev.
2171 */
2172nvlist_t *
2173zpool_find_vdev_by_physpath(zpool_handle_t *zhp, const char *ppath,
2174 boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log)
2175{
2176 nvlist_t *search, *nvroot, *ret;
2177
2178 verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2179 verify(nvlist_add_string(search, ZPOOL_CONFIG_PHYS_PATH, ppath) == 0);
2180
2181 verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
2182 &nvroot) == 0);
2183
2184 *avail_spare = B_FALSE;
572e2857
BB
2185 *l2cache = B_FALSE;
2186 if (log != NULL)
2187 *log = B_FALSE;
9babb374
BB
2188 ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
2189 nvlist_free(search);
2190
2191 return (ret);
2192}
2193
428870ff
BB
2194/*
2195 * Determine if we have an "interior" top-level vdev (i.e mirror/raidz).
2196 */
2197boolean_t
2198zpool_vdev_is_interior(const char *name)
2199{
2200 if (strncmp(name, VDEV_TYPE_RAIDZ, strlen(VDEV_TYPE_RAIDZ)) == 0 ||
2201 strncmp(name, VDEV_TYPE_MIRROR, strlen(VDEV_TYPE_MIRROR)) == 0)
2202 return (B_TRUE);
2203 return (B_FALSE);
2204}
2205
34dc7c2f
BB
2206nvlist_t *
2207zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
b128c09f 2208 boolean_t *l2cache, boolean_t *log)
34dc7c2f 2209{
34dc7c2f 2210 char *end;
9babb374 2211 nvlist_t *nvroot, *search, *ret;
34dc7c2f
BB
2212 uint64_t guid;
2213
9babb374
BB
2214 verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2215
1a5c611a 2216 guid = strtoull(path, &end, 0);
34dc7c2f 2217 if (guid != 0 && *end == '\0') {
9babb374 2218 verify(nvlist_add_uint64(search, ZPOOL_CONFIG_GUID, guid) == 0);
428870ff
BB
2219 } else if (zpool_vdev_is_interior(path)) {
2220 verify(nvlist_add_string(search, ZPOOL_CONFIG_TYPE, path) == 0);
34dc7c2f 2221 } else {
9babb374 2222 verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, path) == 0);
34dc7c2f
BB
2223 }
2224
2225 verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
2226 &nvroot) == 0);
2227
2228 *avail_spare = B_FALSE;
2229 *l2cache = B_FALSE;
b128c09f
BB
2230 if (log != NULL)
2231 *log = B_FALSE;
9babb374
BB
2232 ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
2233 nvlist_free(search);
2234
2235 return (ret);
b128c09f
BB
2236}
2237
2238static int
2239vdev_online(nvlist_t *nv)
2240{
2241 uint64_t ival;
2242
2243 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, &ival) == 0 ||
2244 nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED, &ival) == 0 ||
2245 nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVED, &ival) == 0)
2246 return (0);
2247
2248 return (1);
2249}
2250
2251/*
9babb374 2252 * Helper function for zpool_get_physpaths().
b128c09f 2253 */
9babb374
BB
2254static int
2255vdev_get_one_physpath(nvlist_t *config, char *physpath, size_t physpath_size,
2256 size_t *bytes_written)
2257{
2258 size_t bytes_left, pos, rsz;
2259 char *tmppath;
2260 const char *format;
2261
2262 if (nvlist_lookup_string(config, ZPOOL_CONFIG_PHYS_PATH,
2263 &tmppath) != 0)
2264 return (EZFS_NODEVICE);
2265
2266 pos = *bytes_written;
2267 bytes_left = physpath_size - pos;
2268 format = (pos == 0) ? "%s" : " %s";
2269
2270 rsz = snprintf(physpath + pos, bytes_left, format, tmppath);
2271 *bytes_written += rsz;
2272
2273 if (rsz >= bytes_left) {
2274 /* if physpath was not copied properly, clear it */
2275 if (bytes_left != 0) {
2276 physpath[pos] = 0;
2277 }
2278 return (EZFS_NOSPC);
2279 }
2280 return (0);
2281}
2282
2283static int
2284vdev_get_physpaths(nvlist_t *nv, char *physpath, size_t phypath_size,
2285 size_t *rsz, boolean_t is_spare)
2286{
2287 char *type;
2288 int ret;
2289
2290 if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0)
2291 return (EZFS_INVALCONFIG);
2292
2293 if (strcmp(type, VDEV_TYPE_DISK) == 0) {
2294 /*
2295 * An active spare device has ZPOOL_CONFIG_IS_SPARE set.
2296 * For a spare vdev, we only want to boot from the active
2297 * spare device.
2298 */
2299 if (is_spare) {
2300 uint64_t spare = 0;
2301 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_SPARE,
2302 &spare);
2303 if (!spare)
2304 return (EZFS_INVALCONFIG);
2305 }
2306
2307 if (vdev_online(nv)) {
2308 if ((ret = vdev_get_one_physpath(nv, physpath,
2309 phypath_size, rsz)) != 0)
2310 return (ret);
2311 }
2312 } else if (strcmp(type, VDEV_TYPE_MIRROR) == 0 ||
0a3d2673 2313 strcmp(type, VDEV_TYPE_RAIDZ) == 0 ||
9babb374
BB
2314 strcmp(type, VDEV_TYPE_REPLACING) == 0 ||
2315 (is_spare = (strcmp(type, VDEV_TYPE_SPARE) == 0))) {
2316 nvlist_t **child;
2317 uint_t count;
2318 int i, ret;
2319
2320 if (nvlist_lookup_nvlist_array(nv,
2321 ZPOOL_CONFIG_CHILDREN, &child, &count) != 0)
2322 return (EZFS_INVALCONFIG);
2323
2324 for (i = 0; i < count; i++) {
2325 ret = vdev_get_physpaths(child[i], physpath,
2326 phypath_size, rsz, is_spare);
2327 if (ret == EZFS_NOSPC)
2328 return (ret);
2329 }
2330 }
2331
2332 return (EZFS_POOL_INVALARG);
2333}
2334
2335/*
2336 * Get phys_path for a root pool config.
2337 * Return 0 on success; non-zero on failure.
2338 */
2339static int
2340zpool_get_config_physpath(nvlist_t *config, char *physpath, size_t phypath_size)
b128c09f 2341{
9babb374 2342 size_t rsz;
b128c09f
BB
2343 nvlist_t *vdev_root;
2344 nvlist_t **child;
2345 uint_t count;
9babb374 2346 char *type;
b128c09f 2347
9babb374 2348 rsz = 0;
b128c09f 2349
9babb374
BB
2350 if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
2351 &vdev_root) != 0)
2352 return (EZFS_INVALCONFIG);
b128c09f 2353
9babb374
BB
2354 if (nvlist_lookup_string(vdev_root, ZPOOL_CONFIG_TYPE, &type) != 0 ||
2355 nvlist_lookup_nvlist_array(vdev_root, ZPOOL_CONFIG_CHILDREN,
b128c09f 2356 &child, &count) != 0)
9babb374 2357 return (EZFS_INVALCONFIG);
b128c09f 2358
c372b36e 2359#if defined(__sun__) || defined(__sun)
9babb374
BB
2360 /*
2361 * root pool can not have EFI labeled disks and can only have
2362 * a single top-level vdev.
2363 */
2364 if (strcmp(type, VDEV_TYPE_ROOT) != 0 || count != 1 ||
2365 pool_uses_efi(vdev_root))
2366 return (EZFS_POOL_INVALARG);
c372b36e 2367#endif
b128c09f 2368
9babb374
BB
2369 (void) vdev_get_physpaths(child[0], physpath, phypath_size, &rsz,
2370 B_FALSE);
2371
2372 /* No online devices */
2373 if (rsz == 0)
2374 return (EZFS_NODEVICE);
b128c09f
BB
2375
2376 return (0);
34dc7c2f
BB
2377}
2378
9babb374
BB
2379/*
2380 * Get phys_path for a root pool
2381 * Return 0 on success; non-zero on failure.
2382 */
2383int
2384zpool_get_physpath(zpool_handle_t *zhp, char *physpath, size_t phypath_size)
2385{
2386 return (zpool_get_config_physpath(zhp->zpool_config, physpath,
2387 phypath_size));
2388}
2389
9babb374
BB
2390/*
2391 * If the device has being dynamically expanded then we need to relabel
2392 * the disk to use the new unallocated space.
2393 */
2394static int
8adf4864 2395zpool_relabel_disk(libzfs_handle_t *hdl, const char *path, const char *msg)
9babb374 2396{
9babb374 2397 int fd, error;
9babb374 2398
d603ed6c 2399 if ((fd = open(path, O_RDWR|O_DIRECT)) < 0) {
9babb374 2400 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
109491a8 2401 "relabel '%s': unable to open device: %d"), path, errno);
8adf4864 2402 return (zfs_error(hdl, EZFS_OPENFAILED, msg));
9babb374
BB
2403 }
2404
2405 /*
2406 * It's possible that we might encounter an error if the device
2407 * does not have any unallocated space left. If so, we simply
2408 * ignore that error and continue on.
b5a28807
ED
2409 *
2410 * Also, we don't call efi_rescan() - that would just return EBUSY.
2411 * The module will do it for us in vdev_disk_open().
9babb374 2412 */
d603ed6c 2413 error = efi_use_whole_disk(fd);
9babb374
BB
2414 (void) close(fd);
2415 if (error && error != VT_ENOSPC) {
2416 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
d603ed6c 2417 "relabel '%s': unable to read disk capacity"), path);
8adf4864 2418 return (zfs_error(hdl, EZFS_NOCAP, msg));
9babb374
BB
2419 }
2420 return (0);
2421}
2422
34dc7c2f
BB
2423/*
2424 * Bring the specified vdev online. The 'flags' parameter is a set of the
2425 * ZFS_ONLINE_* flags.
2426 */
2427int
2428zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
2429 vdev_state_t *newstate)
2430{
13fe0198 2431 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
2432 char msg[1024];
2433 nvlist_t *tgt;
9babb374 2434 boolean_t avail_spare, l2cache, islog;
34dc7c2f 2435 libzfs_handle_t *hdl = zhp->zpool_hdl;
8adf4864 2436 int error;
34dc7c2f 2437
9babb374
BB
2438 if (flags & ZFS_ONLINE_EXPAND) {
2439 (void) snprintf(msg, sizeof (msg),
2440 dgettext(TEXT_DOMAIN, "cannot expand %s"), path);
2441 } else {
2442 (void) snprintf(msg, sizeof (msg),
2443 dgettext(TEXT_DOMAIN, "cannot online %s"), path);
2444 }
34dc7c2f
BB
2445
2446 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
b128c09f 2447 if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
9babb374 2448 &islog)) == NULL)
34dc7c2f
BB
2449 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2450
2451 verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2452
428870ff 2453 if (avail_spare)
34dc7c2f
BB
2454 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2455
9babb374
BB
2456 if (flags & ZFS_ONLINE_EXPAND ||
2457 zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) {
9babb374
BB
2458 uint64_t wholedisk = 0;
2459
2460 (void) nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK,
2461 &wholedisk);
9babb374
BB
2462
2463 /*
2464 * XXX - L2ARC 1.0 devices can't support expansion.
2465 */
2466 if (l2cache) {
2467 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2468 "cannot expand cache devices"));
2469 return (zfs_error(hdl, EZFS_VDEVNOTSUP, msg));
2470 }
2471
2472 if (wholedisk) {
7608bd0d
ED
2473 const char *fullpath = path;
2474 char buf[MAXPATHLEN];
2475
2476 if (path[0] != '/') {
2477 error = zfs_resolve_shortname(path, buf,
d1d7e268 2478 sizeof (buf));
7608bd0d
ED
2479 if (error != 0)
2480 return (zfs_error(hdl, EZFS_NODEVICE,
2481 msg));
2482
2483 fullpath = buf;
2484 }
2485
2486 error = zpool_relabel_disk(hdl, fullpath, msg);
8adf4864
ED
2487 if (error != 0)
2488 return (error);
9babb374
BB
2489 }
2490 }
2491
34dc7c2f
BB
2492 zc.zc_cookie = VDEV_STATE_ONLINE;
2493 zc.zc_obj = flags;
2494
572e2857 2495 if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0) {
428870ff
BB
2496 if (errno == EINVAL) {
2497 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "was split "
2498 "from this pool into a new one. Use '%s' "
2499 "instead"), "zpool detach");
2500 return (zfs_error(hdl, EZFS_POSTSPLIT_ONLINE, msg));
2501 }
34dc7c2f 2502 return (zpool_standard_error(hdl, errno, msg));
428870ff 2503 }
34dc7c2f
BB
2504
2505 *newstate = zc.zc_cookie;
2506 return (0);
2507}
2508
2509/*
2510 * Take the specified vdev offline
2511 */
2512int
2513zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
2514{
13fe0198 2515 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
2516 char msg[1024];
2517 nvlist_t *tgt;
2518 boolean_t avail_spare, l2cache;
2519 libzfs_handle_t *hdl = zhp->zpool_hdl;
2520
2521 (void) snprintf(msg, sizeof (msg),
2522 dgettext(TEXT_DOMAIN, "cannot offline %s"), path);
2523
2524 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
b128c09f
BB
2525 if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2526 NULL)) == NULL)
34dc7c2f
BB
2527 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2528
2529 verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2530
428870ff 2531 if (avail_spare)
34dc7c2f
BB
2532 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2533
34dc7c2f
BB
2534 zc.zc_cookie = VDEV_STATE_OFFLINE;
2535 zc.zc_obj = istmp ? ZFS_OFFLINE_TEMPORARY : 0;
2536
572e2857 2537 if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
34dc7c2f
BB
2538 return (0);
2539
2540 switch (errno) {
2541 case EBUSY:
2542
2543 /*
2544 * There are no other replicas of this device.
2545 */
2546 return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
2547
9babb374
BB
2548 case EEXIST:
2549 /*
2550 * The log device has unplayed logs
2551 */
2552 return (zfs_error(hdl, EZFS_UNPLAYED_LOGS, msg));
2553
34dc7c2f
BB
2554 default:
2555 return (zpool_standard_error(hdl, errno, msg));
2556 }
2557}
2558
2559/*
2560 * Mark the given vdev faulted.
2561 */
2562int
428870ff 2563zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
34dc7c2f 2564{
13fe0198 2565 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
2566 char msg[1024];
2567 libzfs_handle_t *hdl = zhp->zpool_hdl;
2568
2569 (void) snprintf(msg, sizeof (msg),
d1d7e268 2570 dgettext(TEXT_DOMAIN, "cannot fault %llu"), (u_longlong_t)guid);
34dc7c2f
BB
2571
2572 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2573 zc.zc_guid = guid;
2574 zc.zc_cookie = VDEV_STATE_FAULTED;
428870ff 2575 zc.zc_obj = aux;
34dc7c2f 2576
572e2857 2577 if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
34dc7c2f
BB
2578 return (0);
2579
2580 switch (errno) {
2581 case EBUSY:
2582
2583 /*
2584 * There are no other replicas of this device.
2585 */
2586 return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
2587
2588 default:
2589 return (zpool_standard_error(hdl, errno, msg));
2590 }
2591
2592}
2593
2594/*
2595 * Mark the given vdev degraded.
2596 */
2597int
428870ff 2598zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
34dc7c2f 2599{
13fe0198 2600 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
2601 char msg[1024];
2602 libzfs_handle_t *hdl = zhp->zpool_hdl;
2603
2604 (void) snprintf(msg, sizeof (msg),
d1d7e268 2605 dgettext(TEXT_DOMAIN, "cannot degrade %llu"), (u_longlong_t)guid);
34dc7c2f
BB
2606
2607 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2608 zc.zc_guid = guid;
2609 zc.zc_cookie = VDEV_STATE_DEGRADED;
428870ff 2610 zc.zc_obj = aux;
34dc7c2f 2611
572e2857 2612 if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
34dc7c2f
BB
2613 return (0);
2614
2615 return (zpool_standard_error(hdl, errno, msg));
2616}
2617
2618/*
2619 * Returns TRUE if the given nvlist is a vdev that was originally swapped in as
2620 * a hot spare.
2621 */
2622static boolean_t
2623is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which)
2624{
2625 nvlist_t **child;
2626 uint_t c, children;
2627 char *type;
2628
2629 if (nvlist_lookup_nvlist_array(search, ZPOOL_CONFIG_CHILDREN, &child,
2630 &children) == 0) {
2631 verify(nvlist_lookup_string(search, ZPOOL_CONFIG_TYPE,
2632 &type) == 0);
2633
2634 if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
2635 children == 2 && child[which] == tgt)
2636 return (B_TRUE);
2637
2638 for (c = 0; c < children; c++)
2639 if (is_replacing_spare(child[c], tgt, which))
2640 return (B_TRUE);
2641 }
2642
2643 return (B_FALSE);
2644}
2645
2646/*
2647 * Attach new_disk (fully described by nvroot) to old_disk.
2648 * If 'replacing' is specified, the new disk will replace the old one.
2649 */
2650int
2651zpool_vdev_attach(zpool_handle_t *zhp,
2652 const char *old_disk, const char *new_disk, nvlist_t *nvroot, int replacing)
2653{
13fe0198 2654 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
2655 char msg[1024];
2656 int ret;
2657 nvlist_t *tgt;
b128c09f
BB
2658 boolean_t avail_spare, l2cache, islog;
2659 uint64_t val;
572e2857 2660 char *newname;
34dc7c2f
BB
2661 nvlist_t **child;
2662 uint_t children;
2663 nvlist_t *config_root;
2664 libzfs_handle_t *hdl = zhp->zpool_hdl;
1bd201e7 2665 boolean_t rootpool = zpool_is_bootable(zhp);
34dc7c2f
BB
2666
2667 if (replacing)
2668 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2669 "cannot replace %s with %s"), old_disk, new_disk);
2670 else
2671 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2672 "cannot attach %s to %s"), new_disk, old_disk);
2673
c372b36e 2674#if defined(__sun__) || defined(__sun)
b128c09f
BB
2675 /*
2676 * If this is a root pool, make sure that we're not attaching an
2677 * EFI labeled device.
2678 */
2679 if (rootpool && pool_uses_efi(nvroot)) {
2680 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2681 "EFI labeled devices are not supported on root pools."));
2682 return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg));
2683 }
c372b36e 2684#endif
b128c09f 2685
34dc7c2f 2686 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
b128c09f
BB
2687 if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare, &l2cache,
2688 &islog)) == 0)
34dc7c2f
BB
2689 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2690
2691 if (avail_spare)
2692 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2693
2694 if (l2cache)
2695 return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
2696
2697 verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2698 zc.zc_cookie = replacing;
2699
2700 if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
2701 &child, &children) != 0 || children != 1) {
2702 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2703 "new device must be a single disk"));
2704 return (zfs_error(hdl, EZFS_INVALCONFIG, msg));
2705 }
2706
2707 verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
2708 ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0);
2709
d2f3e292 2710 if ((newname = zpool_vdev_name(NULL, NULL, child[0], 0)) == NULL)
b128c09f
BB
2711 return (-1);
2712
34dc7c2f
BB
2713 /*
2714 * If the target is a hot spare that has been swapped in, we can only
2715 * replace it with another hot spare.
2716 */
2717 if (replacing &&
2718 nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_SPARE, &val) == 0 &&
b128c09f
BB
2719 (zpool_find_vdev(zhp, newname, &avail_spare, &l2cache,
2720 NULL) == NULL || !avail_spare) &&
2721 is_replacing_spare(config_root, tgt, 1)) {
34dc7c2f
BB
2722 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2723 "can only be replaced by another hot spare"));
b128c09f 2724 free(newname);
34dc7c2f
BB
2725 return (zfs_error(hdl, EZFS_BADTARGET, msg));
2726 }
2727
b128c09f
BB
2728 free(newname);
2729
34dc7c2f
BB
2730 if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
2731 return (-1);
2732
572e2857 2733 ret = zfs_ioctl(hdl, ZFS_IOC_VDEV_ATTACH, &zc);
34dc7c2f
BB
2734
2735 zcmd_free_nvlists(&zc);
2736
b128c09f
BB
2737 if (ret == 0) {
2738 if (rootpool) {
9babb374
BB
2739 /*
2740 * XXX need a better way to prevent user from
2741 * booting up a half-baked vdev.
2742 */
2743 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Make "
2744 "sure to wait until resilver is done "
2745 "before rebooting.\n"));
b128c09f 2746 }
34dc7c2f 2747 return (0);
b128c09f 2748 }
34dc7c2f
BB
2749
2750 switch (errno) {
2751 case ENOTSUP:
2752 /*
2753 * Can't attach to or replace this type of vdev.
2754 */
2755 if (replacing) {
572e2857
BB
2756 uint64_t version = zpool_get_prop_int(zhp,
2757 ZPOOL_PROP_VERSION, NULL);
2758
b128c09f 2759 if (islog)
34dc7c2f
BB
2760 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2761 "cannot replace a log with a spare"));
572e2857
BB
2762 else if (version >= SPA_VERSION_MULTI_REPLACE)
2763 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2764 "already in replacing/spare config; wait "
2765 "for completion or use 'zpool detach'"));
34dc7c2f
BB
2766 else
2767 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2768 "cannot replace a replacing device"));
2769 } else {
2770 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2771 "can only attach to mirrors and top-level "
2772 "disks"));
2773 }
2774 (void) zfs_error(hdl, EZFS_BADTARGET, msg);
2775 break;
2776
2777 case EINVAL:
2778 /*
2779 * The new device must be a single disk.
2780 */
2781 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2782 "new device must be a single disk"));
2783 (void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
2784 break;
2785
2786 case EBUSY:
2787 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy"),
2788 new_disk);
2789 (void) zfs_error(hdl, EZFS_BADDEV, msg);
2790 break;
2791
2792 case EOVERFLOW:
2793 /*
2794 * The new device is too small.
2795 */
2796 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2797 "device is too small"));
2798 (void) zfs_error(hdl, EZFS_BADDEV, msg);
2799 break;
2800
2801 case EDOM:
2802 /*
d4aae2a0 2803 * The new device has a different optimal sector size.
34dc7c2f
BB
2804 */
2805 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
d4aae2a0
BB
2806 "new device has a different optimal sector size; use the "
2807 "option '-o ashift=N' to override the optimal size"));
34dc7c2f
BB
2808 (void) zfs_error(hdl, EZFS_BADDEV, msg);
2809 break;
2810
2811 case ENAMETOOLONG:
2812 /*
2813 * The resulting top-level vdev spec won't fit in the label.
2814 */
2815 (void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg);
2816 break;
2817
2818 default:
2819 (void) zpool_standard_error(hdl, errno, msg);
2820 }
2821
2822 return (-1);
2823}
2824
2825/*
2826 * Detach the specified device.
2827 */
2828int
2829zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
2830{
13fe0198 2831 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
2832 char msg[1024];
2833 nvlist_t *tgt;
2834 boolean_t avail_spare, l2cache;
2835 libzfs_handle_t *hdl = zhp->zpool_hdl;
2836
2837 (void) snprintf(msg, sizeof (msg),
2838 dgettext(TEXT_DOMAIN, "cannot detach %s"), path);
2839
2840 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
b128c09f
BB
2841 if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2842 NULL)) == 0)
34dc7c2f
BB
2843 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2844
2845 if (avail_spare)
2846 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2847
2848 if (l2cache)
2849 return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
2850
2851 verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2852
2853 if (zfs_ioctl(hdl, ZFS_IOC_VDEV_DETACH, &zc) == 0)
2854 return (0);
2855
2856 switch (errno) {
2857
2858 case ENOTSUP:
2859 /*
2860 * Can't detach from this type of vdev.
2861 */
2862 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only "
2863 "applicable to mirror and replacing vdevs"));
572e2857 2864 (void) zfs_error(hdl, EZFS_BADTARGET, msg);
34dc7c2f
BB
2865 break;
2866
2867 case EBUSY:
2868 /*
2869 * There are no other replicas of this device.
2870 */
2871 (void) zfs_error(hdl, EZFS_NOREPLICAS, msg);
2872 break;
2873
2874 default:
2875 (void) zpool_standard_error(hdl, errno, msg);
2876 }
2877
2878 return (-1);
2879}
2880
428870ff
BB
2881/*
2882 * Find a mirror vdev in the source nvlist.
2883 *
2884 * The mchild array contains a list of disks in one of the top-level mirrors
2885 * of the source pool. The schild array contains a list of disks that the
2886 * user specified on the command line. We loop over the mchild array to
2887 * see if any entry in the schild array matches.
2888 *
2889 * If a disk in the mchild array is found in the schild array, we return
2890 * the index of that entry. Otherwise we return -1.
2891 */
2892static int
2893find_vdev_entry(zpool_handle_t *zhp, nvlist_t **mchild, uint_t mchildren,
2894 nvlist_t **schild, uint_t schildren)
2895{
2896 uint_t mc;
2897
2898 for (mc = 0; mc < mchildren; mc++) {
2899 uint_t sc;
2900 char *mpath = zpool_vdev_name(zhp->zpool_hdl, zhp,
d2f3e292 2901 mchild[mc], 0);
428870ff
BB
2902
2903 for (sc = 0; sc < schildren; sc++) {
2904 char *spath = zpool_vdev_name(zhp->zpool_hdl, zhp,
d2f3e292 2905 schild[sc], 0);
428870ff
BB
2906 boolean_t result = (strcmp(mpath, spath) == 0);
2907
2908 free(spath);
2909 if (result) {
2910 free(mpath);
2911 return (mc);
2912 }
2913 }
2914
2915 free(mpath);
2916 }
2917
2918 return (-1);
2919}
2920
2921/*
2922 * Split a mirror pool. If newroot points to null, then a new nvlist
2923 * is generated and it is the responsibility of the caller to free it.
2924 */
2925int
2926zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot,
2927 nvlist_t *props, splitflags_t flags)
2928{
13fe0198 2929 zfs_cmd_t zc = {"\0"};
428870ff
BB
2930 char msg[1024];
2931 nvlist_t *tree, *config, **child, **newchild, *newconfig = NULL;
2932 nvlist_t **varray = NULL, *zc_props = NULL;
2933 uint_t c, children, newchildren, lastlog = 0, vcount, found = 0;
2934 libzfs_handle_t *hdl = zhp->zpool_hdl;
2935 uint64_t vers;
2936 boolean_t freelist = B_FALSE, memory_err = B_TRUE;
2937 int retval = 0;
2938
2939 (void) snprintf(msg, sizeof (msg),
2940 dgettext(TEXT_DOMAIN, "Unable to split %s"), zhp->zpool_name);
2941
2942 if (!zpool_name_valid(hdl, B_FALSE, newname))
2943 return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
2944
2945 if ((config = zpool_get_config(zhp, NULL)) == NULL) {
2946 (void) fprintf(stderr, gettext("Internal error: unable to "
2947 "retrieve pool configuration\n"));
2948 return (-1);
2949 }
2950
2951 verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &tree)
2952 == 0);
2953 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &vers) == 0);
2954
2955 if (props) {
572e2857 2956 prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
428870ff 2957 if ((zc_props = zpool_valid_proplist(hdl, zhp->zpool_name,
572e2857 2958 props, vers, flags, msg)) == NULL)
428870ff
BB
2959 return (-1);
2960 }
2961
2962 if (nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child,
2963 &children) != 0) {
2964 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2965 "Source pool is missing vdev tree"));
8a5fc748 2966 nvlist_free(zc_props);
428870ff
BB
2967 return (-1);
2968 }
2969
2970 varray = zfs_alloc(hdl, children * sizeof (nvlist_t *));
2971 vcount = 0;
2972
2973 if (*newroot == NULL ||
2974 nvlist_lookup_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN,
2975 &newchild, &newchildren) != 0)
2976 newchildren = 0;
2977
2978 for (c = 0; c < children; c++) {
2979 uint64_t is_log = B_FALSE, is_hole = B_FALSE;
2980 char *type;
2981 nvlist_t **mchild, *vdev;
2982 uint_t mchildren;
2983 int entry;
2984
2985 /*
2986 * Unlike cache & spares, slogs are stored in the
2987 * ZPOOL_CONFIG_CHILDREN array. We filter them out here.
2988 */
2989 (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
2990 &is_log);
2991 (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE,
2992 &is_hole);
2993 if (is_log || is_hole) {
2994 /*
2995 * Create a hole vdev and put it in the config.
2996 */
2997 if (nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) != 0)
2998 goto out;
2999 if (nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE,
3000 VDEV_TYPE_HOLE) != 0)
3001 goto out;
3002 if (nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_HOLE,
3003 1) != 0)
3004 goto out;
3005 if (lastlog == 0)
3006 lastlog = vcount;
3007 varray[vcount++] = vdev;
3008 continue;
3009 }
3010 lastlog = 0;
3011 verify(nvlist_lookup_string(child[c], ZPOOL_CONFIG_TYPE, &type)
3012 == 0);
3013 if (strcmp(type, VDEV_TYPE_MIRROR) != 0) {
3014 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3015 "Source pool must be composed only of mirrors\n"));
3016 retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
3017 goto out;
3018 }
3019
3020 verify(nvlist_lookup_nvlist_array(child[c],
3021 ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren) == 0);
3022
3023 /* find or add an entry for this top-level vdev */
3024 if (newchildren > 0 &&
3025 (entry = find_vdev_entry(zhp, mchild, mchildren,
3026 newchild, newchildren)) >= 0) {
3027 /* We found a disk that the user specified. */
3028 vdev = mchild[entry];
3029 ++found;
3030 } else {
3031 /* User didn't specify a disk for this vdev. */
3032 vdev = mchild[mchildren - 1];
3033 }
3034
3035 if (nvlist_dup(vdev, &varray[vcount++], 0) != 0)
3036 goto out;
3037 }
3038
3039 /* did we find every disk the user specified? */
3040 if (found != newchildren) {
3041 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Device list must "
3042 "include at most one disk from each mirror"));
3043 retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
3044 goto out;
3045 }
3046
3047 /* Prepare the nvlist for populating. */
3048 if (*newroot == NULL) {
3049 if (nvlist_alloc(newroot, NV_UNIQUE_NAME, 0) != 0)
3050 goto out;
3051 freelist = B_TRUE;
3052 if (nvlist_add_string(*newroot, ZPOOL_CONFIG_TYPE,
3053 VDEV_TYPE_ROOT) != 0)
3054 goto out;
3055 } else {
3056 verify(nvlist_remove_all(*newroot, ZPOOL_CONFIG_CHILDREN) == 0);
3057 }
3058
3059 /* Add all the children we found */
3060 if (nvlist_add_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN, varray,
3061 lastlog == 0 ? vcount : lastlog) != 0)
3062 goto out;
3063
3064 /*
3065 * If we're just doing a dry run, exit now with success.
3066 */
3067 if (flags.dryrun) {
3068 memory_err = B_FALSE;
3069 freelist = B_FALSE;
3070 goto out;
3071 }
3072
3073 /* now build up the config list & call the ioctl */
3074 if (nvlist_alloc(&newconfig, NV_UNIQUE_NAME, 0) != 0)
3075 goto out;
3076
3077 if (nvlist_add_nvlist(newconfig,
3078 ZPOOL_CONFIG_VDEV_TREE, *newroot) != 0 ||
3079 nvlist_add_string(newconfig,
3080 ZPOOL_CONFIG_POOL_NAME, newname) != 0 ||
3081 nvlist_add_uint64(newconfig, ZPOOL_CONFIG_VERSION, vers) != 0)
3082 goto out;
3083
3084 /*
3085 * The new pool is automatically part of the namespace unless we
3086 * explicitly export it.
3087 */
3088 if (!flags.import)
3089 zc.zc_cookie = ZPOOL_EXPORT_AFTER_SPLIT;
3090 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3091 (void) strlcpy(zc.zc_string, newname, sizeof (zc.zc_string));
3092 if (zcmd_write_conf_nvlist(hdl, &zc, newconfig) != 0)
3093 goto out;
3094 if (zc_props != NULL && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
3095 goto out;
3096
3097 if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SPLIT, &zc) != 0) {
3098 retval = zpool_standard_error(hdl, errno, msg);
3099 goto out;
3100 }
3101
3102 freelist = B_FALSE;
3103 memory_err = B_FALSE;
3104
3105out:
3106 if (varray != NULL) {
3107 int v;
3108
3109 for (v = 0; v < vcount; v++)
3110 nvlist_free(varray[v]);
3111 free(varray);
3112 }
3113 zcmd_free_nvlists(&zc);
8a5fc748
JJS
3114 nvlist_free(zc_props);
3115 nvlist_free(newconfig);
428870ff
BB
3116 if (freelist) {
3117 nvlist_free(*newroot);
3118 *newroot = NULL;
3119 }
3120
3121 if (retval != 0)
3122 return (retval);
3123
3124 if (memory_err)
3125 return (no_memory(hdl));
3126
3127 return (0);
3128}
3129
34dc7c2f 3130/*
d1502e9e
RL
3131 * Remove the given device. Currently, this is supported only for hot spares,
3132 * cache, and log devices.
34dc7c2f
BB
3133 */
3134int
3135zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
3136{
13fe0198 3137 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
3138 char msg[1024];
3139 nvlist_t *tgt;
428870ff 3140 boolean_t avail_spare, l2cache, islog;
34dc7c2f 3141 libzfs_handle_t *hdl = zhp->zpool_hdl;
428870ff 3142 uint64_t version;
34dc7c2f
BB
3143
3144 (void) snprintf(msg, sizeof (msg),
3145 dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
3146
3147 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
b128c09f 3148 if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
428870ff 3149 &islog)) == 0)
34dc7c2f 3150 return (zfs_error(hdl, EZFS_NODEVICE, msg));
428870ff
BB
3151 /*
3152 * XXX - this should just go away.
3153 */
3154 if (!avail_spare && !l2cache && !islog) {
34dc7c2f 3155 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
d1502e9e 3156 "only inactive hot spares, cache, "
428870ff 3157 "or log devices can be removed"));
34dc7c2f
BB
3158 return (zfs_error(hdl, EZFS_NODEVICE, msg));
3159 }
3160
428870ff
BB
3161 version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
3162 if (islog && version < SPA_VERSION_HOLES) {
3163 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3164 "pool must be upgrade to support log removal"));
3165 return (zfs_error(hdl, EZFS_BADVERSION, msg));
3166 }
3167
34dc7c2f
BB
3168 verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
3169
3170 if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
3171 return (0);
3172
3173 return (zpool_standard_error(hdl, errno, msg));
3174}
3175
3176/*
3177 * Clear the errors for the pool, or the particular device if specified.
3178 */
3179int
428870ff 3180zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl)
34dc7c2f 3181{
13fe0198 3182 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
3183 char msg[1024];
3184 nvlist_t *tgt;
428870ff 3185 zpool_rewind_policy_t policy;
34dc7c2f
BB
3186 boolean_t avail_spare, l2cache;
3187 libzfs_handle_t *hdl = zhp->zpool_hdl;
428870ff 3188 nvlist_t *nvi = NULL;
572e2857 3189 int error;
34dc7c2f
BB
3190
3191 if (path)
3192 (void) snprintf(msg, sizeof (msg),
3193 dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
3194 path);
3195 else
3196 (void) snprintf(msg, sizeof (msg),
3197 dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
3198 zhp->zpool_name);
3199
3200 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3201 if (path) {
3202 if ((tgt = zpool_find_vdev(zhp, path, &avail_spare,
b128c09f 3203 &l2cache, NULL)) == 0)
34dc7c2f
BB
3204 return (zfs_error(hdl, EZFS_NODEVICE, msg));
3205
3206 /*
3207 * Don't allow error clearing for hot spares. Do allow
3208 * error clearing for l2cache devices.
3209 */
3210 if (avail_spare)
3211 return (zfs_error(hdl, EZFS_ISSPARE, msg));
3212
3213 verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
3214 &zc.zc_guid) == 0);
3215 }
3216
428870ff
BB
3217 zpool_get_rewind_policy(rewindnvl, &policy);
3218 zc.zc_cookie = policy.zrp_request;
3219
572e2857 3220 if (zcmd_alloc_dst_nvlist(hdl, &zc, zhp->zpool_config_size * 2) != 0)
428870ff
BB
3221 return (-1);
3222
572e2857 3223 if (zcmd_write_src_nvlist(hdl, &zc, rewindnvl) != 0)
428870ff
BB
3224 return (-1);
3225
572e2857
BB
3226 while ((error = zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc)) != 0 &&
3227 errno == ENOMEM) {
3228 if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
3229 zcmd_free_nvlists(&zc);
3230 return (-1);
3231 }
3232 }
3233
3234 if (!error || ((policy.zrp_request & ZPOOL_TRY_REWIND) &&
428870ff
BB
3235 errno != EPERM && errno != EACCES)) {
3236 if (policy.zrp_request &
3237 (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
3238 (void) zcmd_read_dst_nvlist(hdl, &zc, &nvi);
3239 zpool_rewind_exclaim(hdl, zc.zc_name,
3240 ((policy.zrp_request & ZPOOL_TRY_REWIND) != 0),
3241 nvi);
3242 nvlist_free(nvi);
3243 }
3244 zcmd_free_nvlists(&zc);
34dc7c2f 3245 return (0);
428870ff 3246 }
34dc7c2f 3247
428870ff 3248 zcmd_free_nvlists(&zc);
34dc7c2f
BB
3249 return (zpool_standard_error(hdl, errno, msg));
3250}
3251
3252/*
3253 * Similar to zpool_clear(), but takes a GUID (used by fmd).
3254 */
3255int
3256zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid)
3257{
13fe0198 3258 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
3259 char msg[1024];
3260 libzfs_handle_t *hdl = zhp->zpool_hdl;
3261
3262 (void) snprintf(msg, sizeof (msg),
3263 dgettext(TEXT_DOMAIN, "cannot clear errors for %llx"),
d1d7e268 3264 (u_longlong_t)guid);
34dc7c2f
BB
3265
3266 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3267 zc.zc_guid = guid;
428870ff 3268 zc.zc_cookie = ZPOOL_NO_REWIND;
34dc7c2f
BB
3269
3270 if (ioctl(hdl->libzfs_fd, ZFS_IOC_CLEAR, &zc) == 0)
3271 return (0);
3272
3273 return (zpool_standard_error(hdl, errno, msg));
3274}
3275
3541dc6d
GA
3276/*
3277 * Change the GUID for a pool.
3278 */
3279int
3280zpool_reguid(zpool_handle_t *zhp)
3281{
3282 char msg[1024];
3283 libzfs_handle_t *hdl = zhp->zpool_hdl;
13fe0198 3284 zfs_cmd_t zc = {"\0"};
3541dc6d
GA
3285
3286 (void) snprintf(msg, sizeof (msg),
3287 dgettext(TEXT_DOMAIN, "cannot reguid '%s'"), zhp->zpool_name);
3288
3289 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3290 if (zfs_ioctl(hdl, ZFS_IOC_POOL_REGUID, &zc) == 0)
3291 return (0);
3292
3293 return (zpool_standard_error(hdl, errno, msg));
3294}
3295
1bd201e7
CS
3296/*
3297 * Reopen the pool.
3298 */
3299int
3300zpool_reopen(zpool_handle_t *zhp)
3301{
13fe0198 3302 zfs_cmd_t zc = {"\0"};
1bd201e7
CS
3303 char msg[1024];
3304 libzfs_handle_t *hdl = zhp->zpool_hdl;
3305
3306 (void) snprintf(msg, sizeof (msg),
3307 dgettext(TEXT_DOMAIN, "cannot reopen '%s'"),
3308 zhp->zpool_name);
3309
3310 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3311 if (zfs_ioctl(hdl, ZFS_IOC_POOL_REOPEN, &zc) == 0)
3312 return (0);
3313 return (zpool_standard_error(hdl, errno, msg));
3314}
3315
39fc0cb5 3316#if defined(__sun__) || defined(__sun)
34dc7c2f
BB
3317/*
3318 * Convert from a devid string to a path.
3319 */
3320static char *
3321devid_to_path(char *devid_str)
3322{
3323 ddi_devid_t devid;
3324 char *minor;
3325 char *path;
3326 devid_nmlist_t *list = NULL;
3327 int ret;
3328
3329 if (devid_str_decode(devid_str, &devid, &minor) != 0)
3330 return (NULL);
3331
3332 ret = devid_deviceid_to_nmlist("/dev", devid, minor, &list);
3333
3334 devid_str_free(minor);
3335 devid_free(devid);
3336
3337 if (ret != 0)
3338 return (NULL);
3339
0fdd8d64
MT
3340 /*
3341 * In a case the strdup() fails, we will just return NULL below.
3342 */
3343 path = strdup(list[0].devname);
34dc7c2f
BB
3344
3345 devid_free_nmlist(list);
3346
3347 return (path);
3348}
3349
3350/*
3351 * Convert from a path to a devid string.
3352 */
3353static char *
3354path_to_devid(const char *path)
3355{
3356 int fd;
3357 ddi_devid_t devid;
3358 char *minor, *ret;
3359
3360 if ((fd = open(path, O_RDONLY)) < 0)
3361 return (NULL);
3362
3363 minor = NULL;
3364 ret = NULL;
3365 if (devid_get(fd, &devid) == 0) {
3366 if (devid_get_minor_name(fd, &minor) == 0)
3367 ret = devid_str_encode(devid, minor);
3368 if (minor != NULL)
3369 devid_str_free(minor);
3370 devid_free(devid);
3371 }
3372 (void) close(fd);
3373
3374 return (ret);
3375}
3376
3377/*
3378 * Issue the necessary ioctl() to update the stored path value for the vdev. We
3379 * ignore any failure here, since a common case is for an unprivileged user to
3380 * type 'zpool status', and we'll display the correct information anyway.
3381 */
3382static void
3383set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path)
3384{
13fe0198 3385 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
3386
3387 (void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3388 (void) strncpy(zc.zc_value, path, sizeof (zc.zc_value));
3389 verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
3390 &zc.zc_guid) == 0);
3391
3392 (void) ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SETPATH, &zc);
3393}
39fc0cb5 3394#endif /* sun */
34dc7c2f 3395
83c62c93
NB
3396/*
3397 * Remove partition suffix from a vdev path. Partition suffixes may take three
3398 * forms: "-partX", "pX", or "X", where X is a string of digits. The second
3399 * case only occurs when the suffix is preceded by a digit, i.e. "md0p0" The
3400 * third case only occurs when preceded by a string matching the regular
541da993 3401 * expression "^([hsv]|xv)d[a-z]+", i.e. a scsi, ide, virtio or xen disk.
d02ca379
DB
3402 *
3403 * caller must free the returned string
83c62c93 3404 */
d02ca379 3405char *
6078881a 3406zfs_strip_partition(char *path)
83c62c93 3407{
6078881a 3408 char *tmp = strdup(path);
83c62c93 3409 char *part = NULL, *d = NULL;
6078881a
TH
3410 if (!tmp)
3411 return (NULL);
83c62c93
NB
3412
3413 if ((part = strstr(tmp, "-part")) && part != tmp) {
3414 d = part + 5;
3415 } else if ((part = strrchr(tmp, 'p')) &&
3416 part > tmp + 1 && isdigit(*(part-1))) {
3417 d = part + 1;
541da993
RY
3418 } else if ((tmp[0] == 'h' || tmp[0] == 's' || tmp[0] == 'v') &&
3419 tmp[1] == 'd') {
02730c33 3420 for (d = &tmp[2]; isalpha(*d); part = ++d) { }
541da993 3421 } else if (strncmp("xvd", tmp, 3) == 0) {
02730c33 3422 for (d = &tmp[3]; isalpha(*d); part = ++d) { }
83c62c93
NB
3423 }
3424 if (part && d && *d != '\0') {
02730c33 3425 for (; isdigit(*d); d++) { }
83c62c93
NB
3426 if (*d == '\0')
3427 *part = '\0';
3428 }
6078881a 3429
83c62c93
NB
3430 return (tmp);
3431}
3432
8720e9e7
TH
3433/*
3434 * Same as zfs_strip_partition, but allows "/dev/" to be in the pathname
3435 *
3436 * path: /dev/sda1
3437 * returns: /dev/sda
3438 *
3439 * Returned string must be freed.
3440 */
3441char *
3442zfs_strip_partition_path(char *path)
3443{
3444 char *newpath = strdup(path);
3445 char *sd_offset;
3446 char *new_sd;
3447
3448 if (!newpath)
3449 return (NULL);
3450
3451 /* Point to "sda1" part of "/dev/sda1" */
3452 sd_offset = strrchr(newpath, '/') + 1;
3453
3454 /* Get our new name "sda" */
3455 new_sd = zfs_strip_partition(sd_offset);
3456 if (!new_sd) {
3457 free(newpath);
3458 return (NULL);
3459 }
3460
3461 /* Paste the "sda" where "sda1" was */
3462 strlcpy(sd_offset, new_sd, strlen(sd_offset) + 1);
3463
3464 /* Free temporary "sda" */
3465 free(new_sd);
3466
3467 return (newpath);
3468}
3469
858219cc
NB
3470#define PATH_BUF_LEN 64
3471
34dc7c2f
BB
3472/*
3473 * Given a vdev, return the name to display in iostat. If the vdev has a path,
3474 * we use that, stripping off any leading "/dev/dsk/"; if not, we use the type.
3475 * We also check if this is a whole disk, in which case we strip off the
3476 * trailing 's0' slice name.
3477 *
3478 * This routine is also responsible for identifying when disks have been
3479 * reconfigured in a new location. The kernel will have opened the device by
3480 * devid, but the path will still refer to the old location. To catch this, we
3481 * first do a path -> devid translation (which is fast for the common case). If
3482 * the devid matches, we're done. If not, we do a reverse devid -> path
3483 * translation and issue the appropriate ioctl() to update the path of the vdev.
3484 * If 'zhp' is NULL, then this is an exported pool, and we don't need to do any
3485 * of these checks.
3486 */
3487char *
428870ff 3488zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv,
d2f3e292 3489 int name_flags)
34dc7c2f 3490{
39fc0cb5 3491 char *path, *type, *env;
34dc7c2f 3492 uint64_t value;
858219cc 3493 char buf[PATH_BUF_LEN];
fc24f7c8 3494 char tmpbuf[PATH_BUF_LEN];
34dc7c2f 3495
d2f3e292
RY
3496 env = getenv("ZPOOL_VDEV_NAME_PATH");
3497 if (env && (strtoul(env, NULL, 0) > 0 ||
3498 !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2)))
3499 name_flags |= VDEV_NAME_PATH;
3500
3501 env = getenv("ZPOOL_VDEV_NAME_GUID");
3502 if (env && (strtoul(env, NULL, 0) > 0 ||
3503 !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2)))
3504 name_flags |= VDEV_NAME_GUID;
3505
3506 env = getenv("ZPOOL_VDEV_NAME_FOLLOW_LINKS");
3507 if (env && (strtoul(env, NULL, 0) > 0 ||
3508 !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2)))
3509 name_flags |= VDEV_NAME_FOLLOW_LINKS;
3510
3511 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, &value) == 0 ||
3512 name_flags & VDEV_NAME_GUID) {
aecdc706 3513 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &value);
d2f3e292 3514 (void) snprintf(buf, sizeof (buf), "%llu", (u_longlong_t)value);
34dc7c2f
BB
3515 path = buf;
3516 } else if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
39fc0cb5
DB
3517#if defined(__sun__) || defined(__sun)
3518 /*
3519 * Live VDEV path updates to a kernel VDEV during a
3520 * zpool_vdev_name lookup are not supported on Linux.
3521 */
3522 char *devid;
3523 vdev_stat_t *vs;
3524 uint_t vsc;
3525
34dc7c2f
BB
3526 /*
3527 * If the device is dead (faulted, offline, etc) then don't
3528 * bother opening it. Otherwise we may be forcing the user to
3529 * open a misbehaving device, which can have undesirable
3530 * effects.
3531 */
428870ff 3532 if ((nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
34dc7c2f
BB
3533 (uint64_t **)&vs, &vsc) != 0 ||
3534 vs->vs_state >= VDEV_STATE_DEGRADED) &&
3535 zhp != NULL &&
3536 nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &devid) == 0) {
3537 /*
3538 * Determine if the current path is correct.
3539 */
3540 char *newdevid = path_to_devid(path);
3541
3542 if (newdevid == NULL ||
3543 strcmp(devid, newdevid) != 0) {
3544 char *newpath;
3545
3546 if ((newpath = devid_to_path(devid)) != NULL) {
3547 /*
3548 * Update the path appropriately.
3549 */
3550 set_path(zhp, nv, newpath);
3551 if (nvlist_add_string(nv,
3552 ZPOOL_CONFIG_PATH, newpath) == 0)
3553 verify(nvlist_lookup_string(nv,
3554 ZPOOL_CONFIG_PATH,
3555 &path) == 0);
3556 free(newpath);
3557 }
3558 }
3559
3560 if (newdevid)
3561 devid_str_free(newdevid);
3562 }
39fc0cb5 3563#endif /* sun */
34dc7c2f 3564
d2f3e292
RY
3565 if (name_flags & VDEV_NAME_FOLLOW_LINKS) {
3566 char *rp = realpath(path, NULL);
3567 if (rp) {
3568 strlcpy(buf, rp, sizeof (buf));
3569 path = buf;
3570 free(rp);
3571 }
3572 }
3573
d603ed6c
BB
3574 /*
3575 * For a block device only use the name.
3576 */
3577 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
d2f3e292
RY
3578 if ((strcmp(type, VDEV_TYPE_DISK) == 0) &&
3579 !(name_flags & VDEV_NAME_PATH)) {
d603ed6c
BB
3580 path = strrchr(path, '/');
3581 path++;
3582 }
34dc7c2f 3583
d603ed6c 3584 /*
83c62c93 3585 * Remove the partition from the path it this is a whole disk.
d603ed6c 3586 */
d2f3e292
RY
3587 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &value)
3588 == 0 && value && !(name_flags & VDEV_NAME_PATH)) {
6078881a 3589 return (zfs_strip_partition(path));
34dc7c2f
BB
3590 }
3591 } else {
3592 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &path) == 0);
3593
3594 /*
3595 * If it's a raidz device, we need to stick in the parity level.
3596 */
3597 if (strcmp(path, VDEV_TYPE_RAIDZ) == 0) {
3598 verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
3599 &value) == 0);
fc24f7c8 3600 (void) snprintf(buf, sizeof (buf), "%s%llu", path,
34dc7c2f 3601 (u_longlong_t)value);
fc24f7c8 3602 path = buf;
34dc7c2f 3603 }
428870ff
BB
3604
3605 /*
3606 * We identify each top-level vdev by using a <type-id>
3607 * naming convention.
3608 */
d2f3e292 3609 if (name_flags & VDEV_NAME_TYPE_ID) {
428870ff 3610 uint64_t id;
428870ff
BB
3611 verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
3612 &id) == 0);
fc24f7c8
MM
3613 (void) snprintf(tmpbuf, sizeof (tmpbuf), "%s-%llu",
3614 path, (u_longlong_t)id);
3615 path = tmpbuf;
428870ff 3616 }
34dc7c2f
BB
3617 }
3618
3619 return (zfs_strdup(hdl, path));
3620}
3621
3622static int
fcff0f35 3623zbookmark_mem_compare(const void *a, const void *b)
34dc7c2f 3624{
5dbd68a3 3625 return (memcmp(a, b, sizeof (zbookmark_phys_t)));
34dc7c2f
BB
3626}
3627
3628/*
3629 * Retrieve the persistent error log, uniquify the members, and return to the
3630 * caller.
3631 */
3632int
3633zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp)
3634{
13fe0198 3635 zfs_cmd_t zc = {"\0"};
34dc7c2f 3636 uint64_t count;
5dbd68a3 3637 zbookmark_phys_t *zb = NULL;
34dc7c2f
BB
3638 int i;
3639
3640 /*
3641 * Retrieve the raw error list from the kernel. If the number of errors
3642 * has increased, allocate more space and continue until we get the
3643 * entire list.
3644 */
3645 verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT,
3646 &count) == 0);
3647 if (count == 0)
3648 return (0);
3649 if ((zc.zc_nvlist_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl,
5dbd68a3 3650 count * sizeof (zbookmark_phys_t))) == (uintptr_t)NULL)
34dc7c2f
BB
3651 return (-1);
3652 zc.zc_nvlist_dst_size = count;
3653 (void) strcpy(zc.zc_name, zhp->zpool_name);
3654 for (;;) {
3655 if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_ERROR_LOG,
3656 &zc) != 0) {
3657 free((void *)(uintptr_t)zc.zc_nvlist_dst);
3658 if (errno == ENOMEM) {
5dbd68a3
MA
3659 void *dst;
3660
34dc7c2f 3661 count = zc.zc_nvlist_dst_size;
5dbd68a3
MA
3662 dst = zfs_alloc(zhp->zpool_hdl, count *
3663 sizeof (zbookmark_phys_t));
3664 if (dst == NULL)
34dc7c2f 3665 return (-1);
5dbd68a3 3666 zc.zc_nvlist_dst = (uintptr_t)dst;
34dc7c2f
BB
3667 } else {
3668 return (-1);
3669 }
3670 } else {
3671 break;
3672 }
3673 }
3674
3675 /*
3676 * Sort the resulting bookmarks. This is a little confusing due to the
3677 * implementation of ZFS_IOC_ERROR_LOG. The bookmarks are copied last
3678 * to first, and 'zc_nvlist_dst_size' indicates the number of boomarks
3679 * _not_ copied as part of the process. So we point the start of our
3680 * array appropriate and decrement the total number of elements.
3681 */
5dbd68a3 3682 zb = ((zbookmark_phys_t *)(uintptr_t)zc.zc_nvlist_dst) +
34dc7c2f
BB
3683 zc.zc_nvlist_dst_size;
3684 count -= zc.zc_nvlist_dst_size;
3685
fcff0f35 3686 qsort(zb, count, sizeof (zbookmark_phys_t), zbookmark_mem_compare);
34dc7c2f
BB
3687
3688 verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0);
3689
3690 /*
3691 * Fill in the nverrlistp with nvlist's of dataset and object numbers.
3692 */
3693 for (i = 0; i < count; i++) {
3694 nvlist_t *nv;
3695
3696 /* ignoring zb_blkid and zb_level for now */
3697 if (i > 0 && zb[i-1].zb_objset == zb[i].zb_objset &&
3698 zb[i-1].zb_object == zb[i].zb_object)
3699 continue;
3700
3701 if (nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) != 0)
3702 goto nomem;
3703 if (nvlist_add_uint64(nv, ZPOOL_ERR_DATASET,
3704 zb[i].zb_objset) != 0) {
3705 nvlist_free(nv);
3706 goto nomem;
3707 }
3708 if (nvlist_add_uint64(nv, ZPOOL_ERR_OBJECT,
3709 zb[i].zb_object) != 0) {
3710 nvlist_free(nv);
3711 goto nomem;
3712 }
3713 if (nvlist_add_nvlist(*nverrlistp, "ejk", nv) != 0) {
3714 nvlist_free(nv);
3715 goto nomem;
3716 }
3717 nvlist_free(nv);
3718 }
3719
3720 free((void *)(uintptr_t)zc.zc_nvlist_dst);
3721 return (0);
3722
3723nomem:
3724 free((void *)(uintptr_t)zc.zc_nvlist_dst);
3725 return (no_memory(zhp->zpool_hdl));
3726}
3727
3728/*
3729 * Upgrade a ZFS pool to the latest on-disk version.
3730 */
3731int
3732zpool_upgrade(zpool_handle_t *zhp, uint64_t new_version)
3733{
13fe0198 3734 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
3735 libzfs_handle_t *hdl = zhp->zpool_hdl;
3736
3737 (void) strcpy(zc.zc_name, zhp->zpool_name);
3738 zc.zc_cookie = new_version;
3739
3740 if (zfs_ioctl(hdl, ZFS_IOC_POOL_UPGRADE, &zc) != 0)
3741 return (zpool_standard_error_fmt(hdl, errno,
3742 dgettext(TEXT_DOMAIN, "cannot upgrade '%s'"),
3743 zhp->zpool_name));
3744 return (0);
3745}
3746
3747void
6f1ffb06 3748zfs_save_arguments(int argc, char **argv, char *string, int len)
34dc7c2f
BB
3749{
3750 int i;
3751
6f1ffb06 3752 (void) strlcpy(string, basename(argv[0]), len);
34dc7c2f 3753 for (i = 1; i < argc; i++) {
6f1ffb06
MA
3754 (void) strlcat(string, " ", len);
3755 (void) strlcat(string, argv[i], len);
34dc7c2f
BB
3756 }
3757}
3758
34dc7c2f 3759int
6f1ffb06
MA
3760zpool_log_history(libzfs_handle_t *hdl, const char *message)
3761{
13fe0198 3762 zfs_cmd_t zc = {"\0"};
6f1ffb06
MA
3763 nvlist_t *args;
3764 int err;
3765
3766 args = fnvlist_alloc();
3767 fnvlist_add_string(args, "message", message);
3768 err = zcmd_write_src_nvlist(hdl, &zc, args);
3769 if (err == 0)
3770 err = ioctl(hdl->libzfs_fd, ZFS_IOC_LOG_HISTORY, &zc);
3771 nvlist_free(args);
3772 zcmd_free_nvlists(&zc);
3773 return (err);
34dc7c2f
BB
3774}
3775
3776/*
3777 * Perform ioctl to get some command history of a pool.
3778 *
3779 * 'buf' is the buffer to fill up to 'len' bytes. 'off' is the
3780 * logical offset of the history buffer to start reading from.
3781 *
3782 * Upon return, 'off' is the next logical offset to read from and
3783 * 'len' is the actual amount of bytes read into 'buf'.
3784 */
3785static int
3786get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len)
3787{
13fe0198 3788 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
3789 libzfs_handle_t *hdl = zhp->zpool_hdl;
3790
3791 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3792
3793 zc.zc_history = (uint64_t)(uintptr_t)buf;
3794 zc.zc_history_len = *len;
3795 zc.zc_history_offset = *off;
3796
3797 if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_HISTORY, &zc) != 0) {
3798 switch (errno) {
3799 case EPERM:
3800 return (zfs_error_fmt(hdl, EZFS_PERM,
3801 dgettext(TEXT_DOMAIN,
3802 "cannot show history for pool '%s'"),
3803 zhp->zpool_name));
3804 case ENOENT:
3805 return (zfs_error_fmt(hdl, EZFS_NOHISTORY,
3806 dgettext(TEXT_DOMAIN, "cannot get history for pool "
3807 "'%s'"), zhp->zpool_name));
3808 case ENOTSUP:
3809 return (zfs_error_fmt(hdl, EZFS_BADVERSION,
3810 dgettext(TEXT_DOMAIN, "cannot get history for pool "
3811 "'%s', pool must be upgraded"), zhp->zpool_name));
3812 default:
3813 return (zpool_standard_error_fmt(hdl, errno,
3814 dgettext(TEXT_DOMAIN,
3815 "cannot get history for '%s'"), zhp->zpool_name));
3816 }
3817 }
3818
3819 *len = zc.zc_history_len;
3820 *off = zc.zc_history_offset;
3821
3822 return (0);
3823}
3824
3825/*
3826 * Process the buffer of nvlists, unpacking and storing each nvlist record
3827 * into 'records'. 'leftover' is set to the number of bytes that weren't
3828 * processed as there wasn't a complete record.
3829 */
428870ff 3830int
34dc7c2f
BB
3831zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover,
3832 nvlist_t ***records, uint_t *numrecords)
3833{
3834 uint64_t reclen;
3835 nvlist_t *nv;
3836 int i;
3837
3838 while (bytes_read > sizeof (reclen)) {
3839
3840 /* get length of packed record (stored as little endian) */
3841 for (i = 0, reclen = 0; i < sizeof (reclen); i++)
3842 reclen += (uint64_t)(((uchar_t *)buf)[i]) << (8*i);
3843
3844 if (bytes_read < sizeof (reclen) + reclen)
3845 break;
3846
3847 /* unpack record */
3848 if (nvlist_unpack(buf + sizeof (reclen), reclen, &nv, 0) != 0)
3849 return (ENOMEM);
3850 bytes_read -= sizeof (reclen) + reclen;
3851 buf += sizeof (reclen) + reclen;
3852
3853 /* add record to nvlist array */
3854 (*numrecords)++;
3855 if (ISP2(*numrecords + 1)) {
3856 *records = realloc(*records,
3857 *numrecords * 2 * sizeof (nvlist_t *));
3858 }
3859 (*records)[*numrecords - 1] = nv;
3860 }
3861
3862 *leftover = bytes_read;
3863 return (0);
3864}
3865
34dc7c2f
BB
3866/*
3867 * Retrieve the command history of a pool.
3868 */
3869int
3870zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp)
3871{
1f6f97f3
MA
3872 char *buf;
3873 int buflen = 128 * 1024;
34dc7c2f
BB
3874 uint64_t off = 0;
3875 nvlist_t **records = NULL;
3876 uint_t numrecords = 0;
3877 int err, i;
3878
1f6f97f3
MA
3879 buf = malloc(buflen);
3880 if (buf == NULL)
3881 return (ENOMEM);
34dc7c2f 3882 do {
1f6f97f3 3883 uint64_t bytes_read = buflen;
34dc7c2f
BB
3884 uint64_t leftover;
3885
3886 if ((err = get_history(zhp, buf, &off, &bytes_read)) != 0)
3887 break;
3888
3889 /* if nothing else was read in, we're at EOF, just return */
3890 if (!bytes_read)
3891 break;
3892
3893 if ((err = zpool_history_unpack(buf, bytes_read,
3894 &leftover, &records, &numrecords)) != 0)
3895 break;
3896 off -= leftover;
1f6f97f3
MA
3897 if (leftover == bytes_read) {
3898 /*
3899 * no progress made, because buffer is not big enough
3900 * to hold this record; resize and retry.
3901 */
3902 buflen *= 2;
3903 free(buf);
3904 buf = malloc(buflen);
3905 if (buf == NULL)
3906 return (ENOMEM);
3907 }
34dc7c2f
BB
3908
3909 /* CONSTCOND */
3910 } while (1);
3911
1f6f97f3
MA
3912 free(buf);
3913
34dc7c2f
BB
3914 if (!err) {
3915 verify(nvlist_alloc(nvhisp, NV_UNIQUE_NAME, 0) == 0);
3916 verify(nvlist_add_nvlist_array(*nvhisp, ZPOOL_HIST_RECORD,
3917 records, numrecords) == 0);
3918 }
3919 for (i = 0; i < numrecords; i++)
3920 nvlist_free(records[i]);
3921 free(records);
3922
3923 return (err);
3924}
3925
26685276 3926/*
9b101a73
BB
3927 * Retrieve the next event given the passed 'zevent_fd' file descriptor.
3928 * If there is a new event available 'nvp' will contain a newly allocated
3929 * nvlist and 'dropped' will be set to the number of missed events since
3930 * the last call to this function. When 'nvp' is set to NULL it indicates
3931 * no new events are available. In either case the function returns 0 and
3932 * it is up to the caller to free 'nvp'. In the case of a fatal error the
3933 * function will return a non-zero value. When the function is called in
8c7aa0cf
CD
3934 * blocking mode (the default, unless the ZEVENT_NONBLOCK flag is passed),
3935 * it will not return until a new event is available.
26685276
BB
3936 */
3937int
3938zpool_events_next(libzfs_handle_t *hdl, nvlist_t **nvp,
8c7aa0cf 3939 int *dropped, unsigned flags, int zevent_fd)
26685276 3940{
13fe0198 3941 zfs_cmd_t zc = {"\0"};
26685276
BB
3942 int error = 0;
3943
3944 *nvp = NULL;
3945 *dropped = 0;
9b101a73 3946 zc.zc_cleanup_fd = zevent_fd;
26685276 3947
8c7aa0cf 3948 if (flags & ZEVENT_NONBLOCK)
26685276
BB
3949 zc.zc_guid = ZEVENT_NONBLOCK;
3950
3951 if (zcmd_alloc_dst_nvlist(hdl, &zc, ZEVENT_SIZE) != 0)
3952 return (-1);
3953
3954retry:
3955 if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_NEXT, &zc) != 0) {
3956 switch (errno) {
3957 case ESHUTDOWN:
3958 error = zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
3959 dgettext(TEXT_DOMAIN, "zfs shutdown"));
3960 goto out;
3961 case ENOENT:
3962 /* Blocking error case should not occur */
8c7aa0cf 3963 if (!(flags & ZEVENT_NONBLOCK))
26685276
BB
3964 error = zpool_standard_error_fmt(hdl, errno,
3965 dgettext(TEXT_DOMAIN, "cannot get event"));
3966
3967 goto out;
3968 case ENOMEM:
3969 if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
3970 error = zfs_error_fmt(hdl, EZFS_NOMEM,
3971 dgettext(TEXT_DOMAIN, "cannot get event"));
3972 goto out;
3973 } else {
3974 goto retry;
3975 }
3976 default:
3977 error = zpool_standard_error_fmt(hdl, errno,
3978 dgettext(TEXT_DOMAIN, "cannot get event"));
3979 goto out;
3980 }
3981 }
3982
3983 error = zcmd_read_dst_nvlist(hdl, &zc, nvp);
3984 if (error != 0)
3985 goto out;
3986
3987 *dropped = (int)zc.zc_cookie;
3988out:
3989 zcmd_free_nvlists(&zc);
3990
3991 return (error);
3992}
3993
3994/*
3995 * Clear all events.
3996 */
3997int
3998zpool_events_clear(libzfs_handle_t *hdl, int *count)
3999{
13fe0198 4000 zfs_cmd_t zc = {"\0"};
26685276
BB
4001 char msg[1024];
4002
4003 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
4004 "cannot clear events"));
4005
4006 if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_CLEAR, &zc) != 0)
4007 return (zpool_standard_error_fmt(hdl, errno, msg));
4008
4009 if (count != NULL)
4010 *count = (int)zc.zc_cookie; /* # of events cleared */
4011
4012 return (0);
4013}
4014
75e3ff58
BB
4015/*
4016 * Seek to a specific EID, ZEVENT_SEEK_START, or ZEVENT_SEEK_END for
4017 * the passed zevent_fd file handle. On success zero is returned,
4018 * otherwise -1 is returned and hdl->libzfs_error is set to the errno.
4019 */
4020int
4021zpool_events_seek(libzfs_handle_t *hdl, uint64_t eid, int zevent_fd)
4022{
4023 zfs_cmd_t zc = {"\0"};
4024 int error = 0;
4025
4026 zc.zc_guid = eid;
4027 zc.zc_cleanup_fd = zevent_fd;
4028
4029 if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_SEEK, &zc) != 0) {
4030 switch (errno) {
4031 case ENOENT:
4032 error = zfs_error_fmt(hdl, EZFS_NOENT,
4033 dgettext(TEXT_DOMAIN, "cannot get event"));
4034 break;
4035
4036 case ENOMEM:
4037 error = zfs_error_fmt(hdl, EZFS_NOMEM,
4038 dgettext(TEXT_DOMAIN, "cannot get event"));
4039 break;
4040
4041 default:
4042 error = zpool_standard_error_fmt(hdl, errno,
4043 dgettext(TEXT_DOMAIN, "cannot get event"));
4044 break;
4045 }
4046 }
4047
4048 return (error);
4049}
4050
34dc7c2f
BB
4051void
4052zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
4053 char *pathname, size_t len)
4054{
13fe0198 4055 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
4056 boolean_t mounted = B_FALSE;
4057 char *mntpnt = NULL;
eca7b760 4058 char dsname[ZFS_MAX_DATASET_NAME_LEN];
34dc7c2f
BB
4059
4060 if (dsobj == 0) {
4061 /* special case for the MOS */
d1d7e268
MK
4062 (void) snprintf(pathname, len, "<metadata>:<0x%llx>",
4063 (longlong_t)obj);
34dc7c2f
BB
4064 return;
4065 }
4066
4067 /* get the dataset's name */
4068 (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
4069 zc.zc_obj = dsobj;
4070 if (ioctl(zhp->zpool_hdl->libzfs_fd,
4071 ZFS_IOC_DSOBJ_TO_DSNAME, &zc) != 0) {
4072 /* just write out a path of two object numbers */
4073 (void) snprintf(pathname, len, "<0x%llx>:<0x%llx>",
b8864a23 4074 (longlong_t)dsobj, (longlong_t)obj);
34dc7c2f
BB
4075 return;
4076 }
4077 (void) strlcpy(dsname, zc.zc_value, sizeof (dsname));
4078
4079 /* find out if the dataset is mounted */
4080 mounted = is_mounted(zhp->zpool_hdl, dsname, &mntpnt);
4081
4082 /* get the corrupted object's path */
4083 (void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name));
4084 zc.zc_obj = obj;
4085 if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJ_TO_PATH,
4086 &zc) == 0) {
4087 if (mounted) {
4088 (void) snprintf(pathname, len, "%s%s", mntpnt,
4089 zc.zc_value);
4090 } else {
4091 (void) snprintf(pathname, len, "%s:%s",
4092 dsname, zc.zc_value);
4093 }
4094 } else {
d1d7e268
MK
4095 (void) snprintf(pathname, len, "%s:<0x%llx>", dsname,
4096 (longlong_t)obj);
34dc7c2f
BB
4097 }
4098 free(mntpnt);
4099}
4100
b128c09f
BB
4101/*
4102 * Read the EFI label from the config, if a label does not exist then
4103 * pass back the error to the caller. If the caller has passed a non-NULL
4104 * diskaddr argument then we set it to the starting address of the EFI
4105 * partition.
4106 */
4107static int
4108read_efi_label(nvlist_t *config, diskaddr_t *sb)
4109{
4110 char *path;
4111 int fd;
4112 char diskname[MAXPATHLEN];
4113 int err = -1;
4114
4115 if (nvlist_lookup_string(config, ZPOOL_CONFIG_PATH, &path) != 0)
4116 return (err);
4117
eac47204 4118 (void) snprintf(diskname, sizeof (diskname), "%s%s", DISK_ROOT,
b128c09f 4119 strrchr(path, '/'));
d603ed6c 4120 if ((fd = open(diskname, O_RDWR|O_DIRECT)) >= 0) {
b128c09f
BB
4121 struct dk_gpt *vtoc;
4122
4123 if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) {
4124 if (sb != NULL)
4125 *sb = vtoc->efi_parts[0].p_start;
4126 efi_free(vtoc);
4127 }
4128 (void) close(fd);
4129 }
4130 return (err);
4131}
4132
34dc7c2f
BB
4133/*
4134 * determine where a partition starts on a disk in the current
4135 * configuration
4136 */
4137static diskaddr_t
4138find_start_block(nvlist_t *config)
4139{
4140 nvlist_t **child;
4141 uint_t c, children;
34dc7c2f 4142 diskaddr_t sb = MAXOFFSET_T;
34dc7c2f
BB
4143 uint64_t wholedisk;
4144
4145 if (nvlist_lookup_nvlist_array(config,
4146 ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) {
4147 if (nvlist_lookup_uint64(config,
4148 ZPOOL_CONFIG_WHOLE_DISK,
4149 &wholedisk) != 0 || !wholedisk) {
4150 return (MAXOFFSET_T);
4151 }
b128c09f
BB
4152 if (read_efi_label(config, &sb) < 0)
4153 sb = MAXOFFSET_T;
34dc7c2f
BB
4154 return (sb);
4155 }
4156
4157 for (c = 0; c < children; c++) {
4158 sb = find_start_block(child[c]);
4159 if (sb != MAXOFFSET_T) {
4160 return (sb);
4161 }
4162 }
4163 return (MAXOFFSET_T);
4164}
4165
2d82ea8b 4166static int
d603ed6c
BB
4167zpool_label_disk_check(char *path)
4168{
4169 struct dk_gpt *vtoc;
4170 int fd, err;
4171
4172 if ((fd = open(path, O_RDWR|O_DIRECT)) < 0)
d1d7e268 4173 return (errno);
d603ed6c
BB
4174
4175 if ((err = efi_alloc_and_read(fd, &vtoc)) != 0) {
4176 (void) close(fd);
d1d7e268 4177 return (err);
d603ed6c
BB
4178 }
4179
4180 if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) {
4181 efi_free(vtoc);
4182 (void) close(fd);
d1d7e268 4183 return (EIDRM);
d603ed6c
BB
4184 }
4185
4186 efi_free(vtoc);
4187 (void) close(fd);
d1d7e268 4188 return (0);
d603ed6c
BB
4189}
4190
5b4136bd
BB
4191/*
4192 * Generate a unique partition name for the ZFS member. Partitions must
4193 * have unique names to ensure udev will be able to create symlinks under
4194 * /dev/disk/by-partlabel/ for all pool members. The partition names are
4195 * of the form <pool>-<unique-id>.
4196 */
4197static void
4198zpool_label_name(char *label_name, int label_size)
4199{
4200 uint64_t id = 0;
4201 int fd;
4202
4203 fd = open("/dev/urandom", O_RDONLY);
06cf4d98 4204 if (fd >= 0) {
5b4136bd
BB
4205 if (read(fd, &id, sizeof (id)) != sizeof (id))
4206 id = 0;
4207
4208 close(fd);
4209 }
4210
4211 if (id == 0)
4212 id = (((uint64_t)rand()) << 32) | (uint64_t)rand();
4213
02730c33 4214 snprintf(label_name, label_size, "zfs-%016llx", (u_longlong_t)id);
5b4136bd
BB
4215}
4216
34dc7c2f
BB
4217/*
4218 * Label an individual disk. The name provided is the short name,
4219 * stripped of any leading /dev path.
4220 */
4221int
4222zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
4223{
4224 char path[MAXPATHLEN];
4225 struct dk_gpt *vtoc;
d603ed6c 4226 int rval, fd;
34dc7c2f
BB
4227 size_t resv = EFI_MIN_RESV_SIZE;
4228 uint64_t slice_size;
4229 diskaddr_t start_block;
4230 char errbuf[1024];
4231
4232 /* prepare an error message just in case */
4233 (void) snprintf(errbuf, sizeof (errbuf),
4234 dgettext(TEXT_DOMAIN, "cannot label '%s'"), name);
4235
4236 if (zhp) {
4237 nvlist_t *nvroot;
4238
c372b36e 4239#if defined(__sun__) || defined(__sun)
1bd201e7 4240 if (zpool_is_bootable(zhp)) {
b128c09f
BB
4241 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4242 "EFI labeled devices are not supported on root "
4243 "pools."));
4244 return (zfs_error(hdl, EZFS_POOL_NOTSUP, errbuf));
4245 }
c372b36e 4246#endif
b128c09f 4247
34dc7c2f
BB
4248 verify(nvlist_lookup_nvlist(zhp->zpool_config,
4249 ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
4250
4251 if (zhp->zpool_start_block == 0)
4252 start_block = find_start_block(nvroot);
4253 else
4254 start_block = zhp->zpool_start_block;
4255 zhp->zpool_start_block = start_block;
4256 } else {
4257 /* new pool */
4258 start_block = NEW_START_BLOCK;
4259 }
4260
eac47204 4261 (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
34dc7c2f 4262
d02ca379 4263 if ((fd = open(path, O_RDWR|O_DIRECT|O_EXCL)) < 0) {
34dc7c2f
BB
4264 /*
4265 * This shouldn't happen. We've long since verified that this
4266 * is a valid device.
4267 */
109491a8
RL
4268 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
4269 "label '%s': unable to open device: %d"), path, errno);
34dc7c2f
BB
4270 return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
4271 }
4272
4273 if (efi_alloc_and_init(fd, EFI_NUMPAR, &vtoc) != 0) {
4274 /*
4275 * The only way this can fail is if we run out of memory, or we
4276 * were unable to read the disk's capacity
4277 */
4278 if (errno == ENOMEM)
4279 (void) no_memory(hdl);
4280
4281 (void) close(fd);
109491a8
RL
4282 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
4283 "label '%s': unable to read disk capacity"), path);
34dc7c2f
BB
4284
4285 return (zfs_error(hdl, EZFS_NOCAP, errbuf));
4286 }
4287
4288 slice_size = vtoc->efi_last_u_lba + 1;
4289 slice_size -= EFI_MIN_RESV_SIZE;
4290 if (start_block == MAXOFFSET_T)
4291 start_block = NEW_START_BLOCK;
4292 slice_size -= start_block;
613d88ed 4293 slice_size = P2ALIGN(slice_size, PARTITION_END_ALIGNMENT);
34dc7c2f
BB
4294
4295 vtoc->efi_parts[0].p_start = start_block;
4296 vtoc->efi_parts[0].p_size = slice_size;
4297
4298 /*
4299 * Why we use V_USR: V_BACKUP confuses users, and is considered
4300 * disposable by some EFI utilities (since EFI doesn't have a backup
4301 * slice). V_UNASSIGNED is supposed to be used only for zero size
4302 * partitions, and efi_write() will fail if we use it. V_ROOT, V_BOOT,
4303 * etc. were all pretty specific. V_USR is as close to reality as we
4304 * can get, in the absence of V_OTHER.
4305 */
4306 vtoc->efi_parts[0].p_tag = V_USR;
5b4136bd 4307 zpool_label_name(vtoc->efi_parts[0].p_name, EFI_PART_NAME_LEN);
34dc7c2f
BB
4308
4309 vtoc->efi_parts[8].p_start = slice_size + start_block;
4310 vtoc->efi_parts[8].p_size = resv;
4311 vtoc->efi_parts[8].p_tag = V_RESERVED;
4312
b5a28807 4313 if ((rval = efi_write(fd, vtoc)) != 0 || (rval = efi_rescan(fd)) != 0) {
34dc7c2f
BB
4314 /*
4315 * Some block drivers (like pcata) may not support EFI
4316 * GPT labels. Print out a helpful error message dir-
4317 * ecting the user to manually label the disk and give
4318 * a specific slice.
4319 */
4320 (void) close(fd);
4321 efi_free(vtoc);
4322
d603ed6c
BB
4323 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "try using "
4324 "parted(8) and then provide a specific slice: %d"), rval);
34dc7c2f
BB
4325 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
4326 }
4327
4328 (void) close(fd);
4329 efi_free(vtoc);
34dc7c2f 4330
eac47204
BB
4331 (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
4332 (void) zfs_append_partition(path, MAXPATHLEN);
4333
2d82ea8b
BB
4334 /* Wait to udev to signal use the device has settled. */
4335 rval = zpool_label_disk_wait(path, DISK_LABEL_WAIT);
d603ed6c
BB
4336 if (rval) {
4337 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "failed to "
4338 "detect device partitions on '%s': %d"), path, rval);
4339 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
34dc7c2f
BB
4340 }
4341
d603ed6c
BB
4342 /* We can't be to paranoid. Read the label back and verify it. */
4343 (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
4344 rval = zpool_label_disk_check(path);
4345 if (rval) {
4346 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "freshly written "
4347 "EFI label on '%s' is damaged. Ensure\nthis device "
4348 "is not in in use, and is functioning properly: %d"),
4349 path, rval);
4350 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
34dc7c2f 4351 }
34dc7c2f 4352
d1d7e268 4353 return (0);
34dc7c2f 4354}
6078881a 4355
6078881a
TH
4356/*
4357 * Allocate and return the underlying device name for a device mapper device.
4358 * If a device mapper device maps to multiple devices, return the first device.
4359 *
8720e9e7
TH
4360 * For example, dm_name = "/dev/dm-0" could return "/dev/sda". Symlinks to a
4361 * DM device (like /dev/disk/by-vdev/A0) are also allowed.
6078881a
TH
4362 *
4363 * Returns device name, or NULL on error or no match. If dm_name is not a DM
4364 * device then return NULL.
4365 *
4366 * NOTE: The returned name string must be *freed*.
4367 */
8720e9e7
TH
4368char *
4369dm_get_underlying_path(char *dm_name)
6078881a 4370{
8720e9e7
TH
4371 DIR *dp = NULL;
4372 struct dirent *ep;
4373 char *realp;
4374 char *tmp = NULL;
4375 char *path = NULL;
4376 char *dev_str;
4377 int size;
6078881a 4378
8720e9e7
TH
4379 if (dm_name == NULL)
4380 return (NULL);
4381
4382 /* dm name may be a symlink (like /dev/disk/by-vdev/A0) */
4383 realp = realpath(dm_name, NULL);
4384 if (realp == NULL)
4385 return (NULL);
6078881a
TH
4386
4387 /*
8720e9e7
TH
4388 * If they preface 'dev' with a path (like "/dev") then strip it off.
4389 * We just want the 'dm-N' part.
6078881a 4390 */
8720e9e7
TH
4391 tmp = strrchr(realp, '/');
4392 if (tmp != NULL)
4393 dev_str = tmp + 1; /* +1 since we want the chr after '/' */
4394 else
4395 dev_str = tmp;
6078881a 4396
8720e9e7
TH
4397 size = asprintf(&tmp, "/sys/block/%s/slaves/", dev_str);
4398 if (size == -1 || !tmp)
6078881a
TH
4399 goto end;
4400
8720e9e7
TH
4401 dp = opendir(tmp);
4402 if (dp == NULL)
6078881a
TH
4403 goto end;
4404
8720e9e7
TH
4405 /* Return first sd* entry in /sys/block/dm-N/slaves/ */
4406 while ((ep = readdir(dp))) {
4407 if (ep->d_type != DT_DIR) { /* skip "." and ".." dirs */
4408 size = asprintf(&path, "/dev/%s", ep->d_name);
4409 break;
4410 }
4411 }
6078881a
TH
4412
4413end:
8720e9e7
TH
4414 if (dp != NULL)
4415 closedir(dp);
4416 free(tmp);
4417 free(realp);
4418 return (path);
6078881a
TH
4419}
4420
4421/*
4422 * Return 1 if device is a device mapper or multipath device.
4423 * Return 0 if not.
4424 */
4425int
1bbd8770 4426zfs_dev_is_dm(char *dev_name)
6078881a
TH
4427{
4428
4429 char *tmp;
1bbd8770 4430 tmp = dm_get_underlying_path(dev_name);
8720e9e7 4431 if (tmp == NULL)
6078881a
TH
4432 return (0);
4433
4434 free(tmp);
4435 return (1);
4436}
4437
4438/*
4439 * Lookup the underlying device for a device name
4440 *
4441 * Often you'll have a symlink to a device, a partition device,
4442 * or a multipath device, and want to look up the underlying device.
4443 * This function returns the underlying device name. If the device
4444 * name is already the underlying device, then just return the same
4445 * name. If the device is a DM device with multiple underlying devices
4446 * then return the first one.
4447 *
4448 * For example:
4449 *
4450 * 1. /dev/disk/by-id/ata-QEMU_HARDDISK_QM00001 -> ../../sda
4451 * dev_name: /dev/disk/by-id/ata-QEMU_HARDDISK_QM00001
4452 * returns: /dev/sda
4453 *
4454 * 2. /dev/mapper/mpatha (made up of /dev/sda and /dev/sdb)
4455 * dev_name: /dev/mapper/mpatha
4456 * returns: /dev/sda (first device)
4457 *
4458 * 3. /dev/sda (already the underlying device)
4459 * dev_name: /dev/sda
4460 * returns: /dev/sda
4461 *
4462 * 4. /dev/dm-3 (mapped to /dev/sda)
4463 * dev_name: /dev/dm-3
4464 * returns: /dev/sda
4465 *
4466 * 5. /dev/disk/by-id/scsi-0QEMU_drive-scsi0-0-0-0-part9 -> ../../sdb9
4467 * dev_name: /dev/disk/by-id/scsi-0QEMU_drive-scsi0-0-0-0-part9
4468 * returns: /dev/sdb
4469 *
4470 * 6. /dev/disk/by-uuid/5df030cf-3cd9-46e4-8e99-3ccb462a4e9a -> ../dev/sda2
4471 * dev_name: /dev/disk/by-uuid/5df030cf-3cd9-46e4-8e99-3ccb462a4e9a
4472 * returns: /dev/sda
4473 *
4474 * Returns underlying device name, or NULL on error or no match.
4475 *
4476 * NOTE: The returned name string must be *freed*.
4477 */
4478char *
1bbd8770 4479zfs_get_underlying_path(char *dev_name)
6078881a
TH
4480{
4481 char *name = NULL;
4482 char *tmp;
4483
8720e9e7 4484 if (dev_name == NULL)
6078881a
TH
4485 return (NULL);
4486
4487 tmp = dm_get_underlying_path(dev_name);
4488
4489 /* dev_name not a DM device, so just un-symlinkize it */
8720e9e7 4490 if (tmp == NULL)
6078881a
TH
4491 tmp = realpath(dev_name, NULL);
4492
8720e9e7
TH
4493 if (tmp != NULL) {
4494 name = zfs_strip_partition_path(tmp);
6078881a
TH
4495 free(tmp);
4496 }
4497
4498 return (name);
4499}
1bbd8770
TH
4500
4501/*
4502 * Given a dev name like "sda", return the full enclosure sysfs path to
4503 * the disk. You can also pass in the name with "/dev" prepended
4504 * to it (like /dev/sda).
4505 *
4506 * For example, disk "sda" in enclosure slot 1:
4507 * dev: "sda"
4508 * returns: "/sys/class/enclosure/1:0:3:0/Slot 1"
4509 *
4510 * 'dev' must be a non-devicemapper device.
4511 *
4512 * Returned string must be freed.
4513 */
4514char *
4515zfs_get_enclosure_sysfs_path(char *dev_name)
4516{
4517 DIR *dp = NULL;
4518 struct dirent *ep;
4519 char buf[MAXPATHLEN];
4520 char *tmp1 = NULL;
4521 char *tmp2 = NULL;
4522 char *tmp3 = NULL;
4523 char *path = NULL;
4524 size_t size;
4525 int tmpsize;
4526
8720e9e7 4527 if (dev_name == NULL)
1bbd8770
TH
4528 return (NULL);
4529
4530 /* If they preface 'dev' with a path (like "/dev") then strip it off */
4531 tmp1 = strrchr(dev_name, '/');
8720e9e7 4532 if (tmp1 != NULL)
1bbd8770
TH
4533 dev_name = tmp1 + 1; /* +1 since we want the chr after '/' */
4534
4535 tmpsize = asprintf(&tmp1, "/sys/block/%s/device", dev_name);
4536 if (tmpsize == -1 || tmp1 == NULL) {
4537 tmp1 = NULL;
4538 goto end;
4539 }
4540
4541 dp = opendir(tmp1);
4542 if (dp == NULL) {
4543 tmp1 = NULL; /* To make free() at the end a NOP */
4544 goto end;
4545 }
4546
4547 /*
4548 * Look though all sysfs entries in /sys/block/<dev>/device for
4549 * the enclosure symlink.
4550 */
4551 while ((ep = readdir(dp))) {
4552 /* Ignore everything that's not our enclosure_device link */
8720e9e7 4553 if (strstr(ep->d_name, "enclosure_device") == NULL)
1bbd8770
TH
4554 continue;
4555
4556 if (asprintf(&tmp2, "%s/%s", tmp1, ep->d_name) == -1 ||
4557 tmp2 == NULL)
4558 break;
4559
4560 size = readlink(tmp2, buf, sizeof (buf));
4561
4562 /* Did readlink fail or crop the link name? */
4563 if (size == -1 || size >= sizeof (buf)) {
4564 free(tmp2);
4565 tmp2 = NULL; /* To make free() at the end a NOP */
4566 break;
4567 }
4568
4569 /*
4570 * We got a valid link. readlink() doesn't terminate strings
4571 * so we have to do it.
4572 */
4573 buf[size] = '\0';
4574
4575 /*
4576 * Our link will look like:
4577 *
4578 * "../../../../port-11:1:2/..STUFF../enclosure/1:0:3:0/SLOT 1"
4579 *
4580 * We want to grab the "enclosure/1:0:3:0/SLOT 1" part
4581 */
4582 tmp3 = strstr(buf, "enclosure");
4583 if (tmp3 == NULL)
4584 break;
4585
4586 if (asprintf(&path, "/sys/class/%s", tmp3) == -1) {
4587 /* If asprintf() fails, 'path' is undefined */
4588 path = NULL;
4589 break;
4590 }
4591
4592 if (path == NULL)
4593 break;
4594 }
4595
4596end:
4597 free(tmp2);
4598 free(tmp1);
4599
8720e9e7 4600 if (dp != NULL)
1bbd8770
TH
4601 closedir(dp);
4602
4603 return (path);
4604}