]> git.proxmox.com Git - mirror_zfs.git/blob - module/zfs/vdev_root.c
ddt: document the theory and the key data structures
[mirror_zfs.git] / module / zfs / vdev_root.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
28 */
29
30 #include <sys/zfs_context.h>
31 #include <sys/spa.h>
32 #include <sys/vdev_impl.h>
33 #include <sys/zio.h>
34 #include <sys/fs/zfs.h>
35
36 /*
37 * Virtual device vector for the pool's root vdev.
38 */
39
40 static uint64_t
41 vdev_root_core_tvds(vdev_t *vd)
42 {
43 uint64_t tvds = 0;
44
45 for (uint64_t c = 0; c < vd->vdev_children; c++) {
46 vdev_t *cvd = vd->vdev_child[c];
47
48 if (!cvd->vdev_ishole && !cvd->vdev_islog &&
49 cvd->vdev_ops != &vdev_indirect_ops) {
50 tvds++;
51 }
52 }
53
54 return (tvds);
55 }
56
57 /*
58 * We should be able to tolerate one failure with absolutely no damage
59 * to our metadata. Two failures will take out space maps, a bunch of
60 * indirect block trees, meta dnodes, dnodes, etc. Probably not a happy
61 * place to live. When we get smarter, we can liberalize this policy.
62 * e.g. If we haven't lost two consecutive top-level vdevs, then we are
63 * probably fine. Adding bean counters during alloc/free can make this
64 * future guesswork more accurate.
65 */
66 static boolean_t
67 too_many_errors(vdev_t *vd, uint64_t numerrors)
68 {
69 uint64_t tvds;
70
71 if (numerrors == 0)
72 return (B_FALSE);
73
74 tvds = vdev_root_core_tvds(vd);
75 ASSERT3U(numerrors, <=, tvds);
76
77 if (numerrors == tvds)
78 return (B_TRUE);
79
80 return (numerrors > spa_missing_tvds_allowed(vd->vdev_spa));
81 }
82
83 static int
84 vdev_root_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize,
85 uint64_t *ashift, uint64_t *pshift)
86 {
87 spa_t *spa = vd->vdev_spa;
88 int lasterror = 0;
89 int numerrors = 0;
90
91 if (vd->vdev_children == 0) {
92 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
93 return (SET_ERROR(EINVAL));
94 }
95
96 vdev_open_children(vd);
97
98 for (int c = 0; c < vd->vdev_children; c++) {
99 vdev_t *cvd = vd->vdev_child[c];
100
101 if (cvd->vdev_open_error && !cvd->vdev_islog &&
102 cvd->vdev_ops != &vdev_indirect_ops) {
103 lasterror = cvd->vdev_open_error;
104 numerrors++;
105 }
106 }
107
108 if (spa_load_state(spa) != SPA_LOAD_NONE)
109 spa_set_missing_tvds(spa, numerrors);
110
111 if (too_many_errors(vd, numerrors)) {
112 vd->vdev_stat.vs_aux = VDEV_AUX_NO_REPLICAS;
113 return (lasterror);
114 }
115
116 *asize = 0;
117 *max_asize = 0;
118 *ashift = 0;
119 *pshift = 0;
120
121 return (0);
122 }
123
124 static void
125 vdev_root_close(vdev_t *vd)
126 {
127 for (int c = 0; c < vd->vdev_children; c++)
128 vdev_close(vd->vdev_child[c]);
129 }
130
131 static void
132 vdev_root_state_change(vdev_t *vd, int faulted, int degraded)
133 {
134 if (too_many_errors(vd, faulted)) {
135 vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
136 VDEV_AUX_NO_REPLICAS);
137 } else if (degraded || faulted) {
138 vdev_set_state(vd, B_FALSE, VDEV_STATE_DEGRADED, VDEV_AUX_NONE);
139 } else {
140 vdev_set_state(vd, B_FALSE, VDEV_STATE_HEALTHY, VDEV_AUX_NONE);
141 }
142 }
143
144 vdev_ops_t vdev_root_ops = {
145 .vdev_op_init = NULL,
146 .vdev_op_fini = NULL,
147 .vdev_op_open = vdev_root_open,
148 .vdev_op_close = vdev_root_close,
149 .vdev_op_asize = vdev_default_asize,
150 .vdev_op_min_asize = vdev_default_min_asize,
151 .vdev_op_min_alloc = NULL,
152 .vdev_op_io_start = NULL, /* not applicable to the root */
153 .vdev_op_io_done = NULL, /* not applicable to the root */
154 .vdev_op_state_change = vdev_root_state_change,
155 .vdev_op_need_resilver = NULL,
156 .vdev_op_hold = NULL,
157 .vdev_op_rele = NULL,
158 .vdev_op_remap = NULL,
159 .vdev_op_xlate = NULL,
160 .vdev_op_rebuild_asize = NULL,
161 .vdev_op_metaslab_init = NULL,
162 .vdev_op_config_generate = NULL,
163 .vdev_op_nparity = NULL,
164 .vdev_op_ndisks = NULL,
165 .vdev_op_type = VDEV_TYPE_ROOT, /* name of this vdev type */
166 .vdev_op_leaf = B_FALSE /* not a leaf vdev */
167 };