]>
Commit | Line | Data |
---|---|---|
34dc7c2f BB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or http://www.opensolaris.org/os/licensing. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | ||
22 | /* | |
23 | * Copyright 2008 Sun Microsystems, Inc. All rights reserved. | |
24 | * Use is subject to license terms. | |
25 | */ | |
26 | ||
34dc7c2f BB |
27 | #include <sys/spa.h> |
28 | #include <sys/spa_impl.h> | |
29 | #include <sys/nvpair.h> | |
30 | #include <sys/uio.h> | |
31 | #include <sys/fs/zfs.h> | |
32 | #include <sys/vdev_impl.h> | |
33 | #include <sys/zfs_ioctl.h> | |
34 | #include <sys/utsname.h> | |
35 | #include <sys/systeminfo.h> | |
36 | #include <sys/sunddi.h> | |
37 | #ifdef _KERNEL | |
38 | #include <sys/kobj.h> | |
39 | #endif | |
40 | ||
41 | /* | |
42 | * Pool configuration repository. | |
43 | * | |
44 | * Pool configuration is stored as a packed nvlist on the filesystem. By | |
45 | * default, all pools are stored in /etc/zfs/zpool.cache and loaded on boot | |
46 | * (when the ZFS module is loaded). Pools can also have the 'cachefile' | |
47 | * property set that allows them to be stored in an alternate location until | |
48 | * the control of external software. | |
49 | * | |
50 | * For each cache file, we have a single nvlist which holds all the | |
51 | * configuration information. When the module loads, we read this information | |
52 | * from /etc/zfs/zpool.cache and populate the SPA namespace. This namespace is | |
53 | * maintained independently in spa.c. Whenever the namespace is modified, or | |
54 | * the configuration of a pool is changed, we call spa_config_sync(), which | |
55 | * walks through all the active pools and writes the configuration to disk. | |
56 | */ | |
57 | ||
58 | static uint64_t spa_config_generation = 1; | |
59 | ||
60 | /* | |
61 | * This can be overridden in userland to preserve an alternate namespace for | |
62 | * userland pools when doing testing. | |
63 | */ | |
b128c09f | 64 | const char *spa_config_path = ZPOOL_CACHE; |
34dc7c2f BB |
65 | |
66 | /* | |
67 | * Called when the module is first loaded, this routine loads the configuration | |
68 | * file into the SPA namespace. It does not actually open or load the pools; it | |
69 | * only populates the namespace. | |
70 | */ | |
71 | void | |
72 | spa_config_load(void) | |
73 | { | |
74 | void *buf = NULL; | |
75 | nvlist_t *nvlist, *child; | |
76 | nvpair_t *nvpair; | |
77 | spa_t *spa; | |
b128c09f | 78 | char *pathname; |
34dc7c2f BB |
79 | struct _buf *file; |
80 | uint64_t fsize; | |
81 | ||
82 | /* | |
83 | * Open the configuration file. | |
84 | */ | |
b128c09f BB |
85 | pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); |
86 | ||
87 | (void) snprintf(pathname, MAXPATHLEN, "%s%s", | |
88 | (rootdir != NULL) ? "./" : "", spa_config_path); | |
34dc7c2f BB |
89 | |
90 | file = kobj_open_file(pathname); | |
b128c09f BB |
91 | |
92 | kmem_free(pathname, MAXPATHLEN); | |
93 | ||
34dc7c2f BB |
94 | if (file == (struct _buf *)-1) |
95 | return; | |
96 | ||
97 | if (kobj_get_filesize(file, &fsize) != 0) | |
98 | goto out; | |
99 | ||
100 | buf = kmem_alloc(fsize, KM_SLEEP); | |
101 | ||
102 | /* | |
103 | * Read the nvlist from the file. | |
104 | */ | |
105 | if (kobj_read_file(file, buf, fsize, 0) < 0) | |
106 | goto out; | |
107 | ||
108 | /* | |
109 | * Unpack the nvlist. | |
110 | */ | |
111 | if (nvlist_unpack(buf, fsize, &nvlist, KM_SLEEP) != 0) | |
112 | goto out; | |
113 | ||
114 | /* | |
115 | * Iterate over all elements in the nvlist, creating a new spa_t for | |
116 | * each one with the specified configuration. | |
117 | */ | |
118 | mutex_enter(&spa_namespace_lock); | |
119 | nvpair = NULL; | |
120 | while ((nvpair = nvlist_next_nvpair(nvlist, nvpair)) != NULL) { | |
121 | ||
122 | if (nvpair_type(nvpair) != DATA_TYPE_NVLIST) | |
123 | continue; | |
124 | ||
125 | VERIFY(nvpair_value_nvlist(nvpair, &child) == 0); | |
126 | ||
127 | if (spa_lookup(nvpair_name(nvpair)) != NULL) | |
128 | continue; | |
129 | spa = spa_add(nvpair_name(nvpair), NULL); | |
130 | ||
131 | /* | |
132 | * We blindly duplicate the configuration here. If it's | |
133 | * invalid, we will catch it when the pool is first opened. | |
134 | */ | |
135 | VERIFY(nvlist_dup(child, &spa->spa_config, 0) == 0); | |
136 | } | |
137 | mutex_exit(&spa_namespace_lock); | |
138 | ||
139 | nvlist_free(nvlist); | |
140 | ||
141 | out: | |
142 | if (buf != NULL) | |
143 | kmem_free(buf, fsize); | |
144 | ||
145 | kobj_close_file(file); | |
146 | } | |
147 | ||
34dc7c2f | 148 | static void |
b128c09f | 149 | spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl) |
34dc7c2f | 150 | { |
34dc7c2f BB |
151 | size_t buflen; |
152 | char *buf; | |
153 | vnode_t *vp; | |
154 | int oflags = FWRITE | FTRUNC | FCREAT | FOFFMAX; | |
b128c09f BB |
155 | char *temp; |
156 | ||
157 | /* | |
158 | * If the nvlist is empty (NULL), then remove the old cachefile. | |
159 | */ | |
160 | if (nvl == NULL) { | |
161 | (void) vn_remove(dp->scd_path, UIO_SYSSPACE, RMFILE); | |
162 | return; | |
163 | } | |
34dc7c2f BB |
164 | |
165 | /* | |
166 | * Pack the configuration into a buffer. | |
167 | */ | |
b128c09f | 168 | VERIFY(nvlist_size(nvl, &buflen, NV_ENCODE_XDR) == 0); |
34dc7c2f BB |
169 | |
170 | buf = kmem_alloc(buflen, KM_SLEEP); | |
b128c09f | 171 | temp = kmem_zalloc(MAXPATHLEN, KM_SLEEP); |
34dc7c2f | 172 | |
b128c09f | 173 | VERIFY(nvlist_pack(nvl, &buf, &buflen, NV_ENCODE_XDR, |
34dc7c2f BB |
174 | KM_SLEEP) == 0); |
175 | ||
176 | /* | |
177 | * Write the configuration to disk. We need to do the traditional | |
178 | * 'write to temporary file, sync, move over original' to make sure we | |
179 | * always have a consistent view of the data. | |
180 | */ | |
b128c09f | 181 | (void) snprintf(temp, MAXPATHLEN, "%s.tmp", dp->scd_path); |
34dc7c2f | 182 | |
b128c09f BB |
183 | if (vn_open(temp, UIO_SYSSPACE, oflags, 0644, &vp, CRCREAT, 0) == 0) { |
184 | if (vn_rdwr(UIO_WRITE, vp, buf, buflen, 0, UIO_SYSSPACE, | |
185 | 0, RLIM64_INFINITY, kcred, NULL) == 0 && | |
186 | VOP_FSYNC(vp, FSYNC, kcred, NULL) == 0) { | |
187 | (void) vn_rename(temp, dp->scd_path, UIO_SYSSPACE); | |
188 | } | |
189 | (void) VOP_CLOSE(vp, oflags, 1, 0, kcred, NULL); | |
190 | VN_RELE(vp); | |
34dc7c2f BB |
191 | } |
192 | ||
b128c09f | 193 | (void) vn_remove(temp, UIO_SYSSPACE, RMFILE); |
34dc7c2f | 194 | |
34dc7c2f | 195 | kmem_free(buf, buflen); |
b128c09f | 196 | kmem_free(temp, MAXPATHLEN); |
34dc7c2f BB |
197 | } |
198 | ||
199 | /* | |
b128c09f BB |
200 | * Synchronize pool configuration to disk. This must be called with the |
201 | * namespace lock held. | |
34dc7c2f BB |
202 | */ |
203 | void | |
b128c09f | 204 | spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent) |
34dc7c2f | 205 | { |
b128c09f BB |
206 | spa_config_dirent_t *dp, *tdp; |
207 | nvlist_t *nvl; | |
34dc7c2f BB |
208 | |
209 | ASSERT(MUTEX_HELD(&spa_namespace_lock)); | |
210 | ||
fb5f0bc8 BB |
211 | if (rootdir == NULL) |
212 | return; | |
213 | ||
b128c09f BB |
214 | /* |
215 | * Iterate over all cachefiles for the pool, past or present. When the | |
216 | * cachefile is changed, the new one is pushed onto this list, allowing | |
217 | * us to update previous cachefiles that no longer contain this pool. | |
218 | */ | |
219 | for (dp = list_head(&target->spa_config_list); dp != NULL; | |
220 | dp = list_next(&target->spa_config_list, dp)) { | |
221 | spa_t *spa = NULL; | |
222 | if (dp->scd_path == NULL) | |
223 | continue; | |
224 | ||
225 | /* | |
226 | * Iterate over all pools, adding any matching pools to 'nvl'. | |
227 | */ | |
228 | nvl = NULL; | |
229 | while ((spa = spa_next(spa)) != NULL) { | |
230 | if (spa == target && removing) | |
231 | continue; | |
232 | ||
233 | mutex_enter(&spa->spa_props_lock); | |
234 | tdp = list_head(&spa->spa_config_list); | |
235 | if (spa->spa_config == NULL || | |
236 | tdp->scd_path == NULL || | |
237 | strcmp(tdp->scd_path, dp->scd_path) != 0) { | |
238 | mutex_exit(&spa->spa_props_lock); | |
239 | continue; | |
240 | } | |
241 | ||
242 | if (nvl == NULL) | |
243 | VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, | |
244 | KM_SLEEP) == 0); | |
245 | ||
246 | VERIFY(nvlist_add_nvlist(nvl, spa->spa_name, | |
247 | spa->spa_config) == 0); | |
248 | mutex_exit(&spa->spa_props_lock); | |
249 | } | |
250 | ||
251 | spa_config_write(dp, nvl); | |
252 | nvlist_free(nvl); | |
253 | } | |
34dc7c2f BB |
254 | |
255 | /* | |
b128c09f | 256 | * Remove any config entries older than the current one. |
34dc7c2f | 257 | */ |
b128c09f BB |
258 | dp = list_head(&target->spa_config_list); |
259 | while ((tdp = list_next(&target->spa_config_list, dp)) != NULL) { | |
260 | list_remove(&target->spa_config_list, tdp); | |
261 | if (tdp->scd_path != NULL) | |
262 | spa_strfree(tdp->scd_path); | |
263 | kmem_free(tdp, sizeof (spa_config_dirent_t)); | |
34dc7c2f BB |
264 | } |
265 | ||
266 | spa_config_generation++; | |
b128c09f BB |
267 | |
268 | if (postsysevent) | |
269 | spa_event_notify(target, NULL, ESC_ZFS_CONFIG_SYNC); | |
34dc7c2f BB |
270 | } |
271 | ||
272 | /* | |
273 | * Sigh. Inside a local zone, we don't have access to /etc/zfs/zpool.cache, | |
274 | * and we don't want to allow the local zone to see all the pools anyway. | |
275 | * So we have to invent the ZFS_IOC_CONFIG ioctl to grab the configuration | |
276 | * information for all pool visible within the zone. | |
277 | */ | |
278 | nvlist_t * | |
279 | spa_all_configs(uint64_t *generation) | |
280 | { | |
281 | nvlist_t *pools; | |
b128c09f | 282 | spa_t *spa = NULL; |
34dc7c2f BB |
283 | |
284 | if (*generation == spa_config_generation) | |
285 | return (NULL); | |
286 | ||
287 | VERIFY(nvlist_alloc(&pools, NV_UNIQUE_NAME, KM_SLEEP) == 0); | |
288 | ||
34dc7c2f BB |
289 | mutex_enter(&spa_namespace_lock); |
290 | while ((spa = spa_next(spa)) != NULL) { | |
291 | if (INGLOBALZONE(curproc) || | |
292 | zone_dataset_visible(spa_name(spa), NULL)) { | |
b128c09f | 293 | mutex_enter(&spa->spa_props_lock); |
34dc7c2f BB |
294 | VERIFY(nvlist_add_nvlist(pools, spa_name(spa), |
295 | spa->spa_config) == 0); | |
b128c09f | 296 | mutex_exit(&spa->spa_props_lock); |
34dc7c2f BB |
297 | } |
298 | } | |
34dc7c2f | 299 | *generation = spa_config_generation; |
b128c09f | 300 | mutex_exit(&spa_namespace_lock); |
34dc7c2f BB |
301 | |
302 | return (pools); | |
303 | } | |
304 | ||
305 | void | |
306 | spa_config_set(spa_t *spa, nvlist_t *config) | |
307 | { | |
b128c09f | 308 | mutex_enter(&spa->spa_props_lock); |
34dc7c2f BB |
309 | if (spa->spa_config != NULL) |
310 | nvlist_free(spa->spa_config); | |
311 | spa->spa_config = config; | |
b128c09f | 312 | mutex_exit(&spa->spa_props_lock); |
34dc7c2f BB |
313 | } |
314 | ||
315 | /* | |
316 | * Generate the pool's configuration based on the current in-core state. | |
317 | * We infer whether to generate a complete config or just one top-level config | |
318 | * based on whether vd is the root vdev. | |
319 | */ | |
320 | nvlist_t * | |
321 | spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats) | |
322 | { | |
323 | nvlist_t *config, *nvroot; | |
324 | vdev_t *rvd = spa->spa_root_vdev; | |
325 | unsigned long hostid = 0; | |
b128c09f | 326 | boolean_t locked = B_FALSE; |
34dc7c2f | 327 | |
b128c09f | 328 | if (vd == NULL) { |
34dc7c2f | 329 | vd = rvd; |
b128c09f BB |
330 | locked = B_TRUE; |
331 | spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER); | |
332 | } | |
333 | ||
334 | ASSERT(spa_config_held(spa, SCL_CONFIG | SCL_STATE, RW_READER) == | |
335 | (SCL_CONFIG | SCL_STATE)); | |
34dc7c2f BB |
336 | |
337 | /* | |
338 | * If txg is -1, report the current value of spa->spa_config_txg. | |
339 | */ | |
340 | if (txg == -1ULL) | |
341 | txg = spa->spa_config_txg; | |
342 | ||
343 | VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, KM_SLEEP) == 0); | |
344 | ||
345 | VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION, | |
346 | spa_version(spa)) == 0); | |
347 | VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, | |
348 | spa_name(spa)) == 0); | |
349 | VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, | |
350 | spa_state(spa)) == 0); | |
351 | VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG, | |
352 | txg) == 0); | |
353 | VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID, | |
354 | spa_guid(spa)) == 0); | |
355 | (void) ddi_strtoul(hw_serial, NULL, 10, &hostid); | |
356 | if (hostid != 0) { | |
357 | VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID, | |
358 | hostid) == 0); | |
359 | } | |
360 | VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME, | |
361 | utsname.nodename) == 0); | |
362 | ||
363 | if (vd != rvd) { | |
364 | VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TOP_GUID, | |
365 | vd->vdev_top->vdev_guid) == 0); | |
366 | VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_GUID, | |
367 | vd->vdev_guid) == 0); | |
368 | if (vd->vdev_isspare) | |
369 | VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_SPARE, | |
370 | 1ULL) == 0); | |
371 | if (vd->vdev_islog) | |
372 | VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_LOG, | |
373 | 1ULL) == 0); | |
374 | vd = vd->vdev_top; /* label contains top config */ | |
375 | } | |
376 | ||
377 | nvroot = vdev_config_generate(spa, vd, getstats, B_FALSE, B_FALSE); | |
378 | VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); | |
379 | nvlist_free(nvroot); | |
380 | ||
b128c09f BB |
381 | if (locked) |
382 | spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); | |
383 | ||
34dc7c2f BB |
384 | return (config); |
385 | } | |
386 | ||
387 | /* | |
388 | * For a pool that's not currently a booting rootpool, update all disk labels, | |
389 | * generate a fresh config based on the current in-core state, and sync the | |
390 | * global config cache. | |
391 | */ | |
392 | void | |
393 | spa_config_update(spa_t *spa, int what) | |
394 | { | |
395 | spa_config_update_common(spa, what, FALSE); | |
396 | } | |
397 | ||
398 | /* | |
399 | * Update all disk labels, generate a fresh config based on the current | |
400 | * in-core state, and sync the global config cache (do not sync the config | |
401 | * cache if this is a booting rootpool). | |
402 | */ | |
403 | void | |
404 | spa_config_update_common(spa_t *spa, int what, boolean_t isroot) | |
405 | { | |
406 | vdev_t *rvd = spa->spa_root_vdev; | |
407 | uint64_t txg; | |
408 | int c; | |
409 | ||
410 | ASSERT(MUTEX_HELD(&spa_namespace_lock)); | |
411 | ||
b128c09f | 412 | spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); |
34dc7c2f BB |
413 | txg = spa_last_synced_txg(spa) + 1; |
414 | if (what == SPA_CONFIG_UPDATE_POOL) { | |
415 | vdev_config_dirty(rvd); | |
416 | } else { | |
417 | /* | |
418 | * If we have top-level vdevs that were added but have | |
419 | * not yet been prepared for allocation, do that now. | |
420 | * (It's safe now because the config cache is up to date, | |
421 | * so it will be able to translate the new DVAs.) | |
422 | * See comments in spa_vdev_add() for full details. | |
423 | */ | |
424 | for (c = 0; c < rvd->vdev_children; c++) { | |
425 | vdev_t *tvd = rvd->vdev_child[c]; | |
426 | if (tvd->vdev_ms_array == 0) { | |
427 | vdev_init(tvd, txg); | |
428 | vdev_config_dirty(tvd); | |
429 | } | |
430 | } | |
431 | } | |
b128c09f | 432 | spa_config_exit(spa, SCL_ALL, FTAG); |
34dc7c2f BB |
433 | |
434 | /* | |
435 | * Wait for the mosconfig to be regenerated and synced. | |
436 | */ | |
437 | txg_wait_synced(spa->spa_dsl_pool, txg); | |
438 | ||
439 | /* | |
440 | * Update the global config cache to reflect the new mosconfig. | |
441 | */ | |
442 | if (!isroot) | |
b128c09f | 443 | spa_config_sync(spa, B_FALSE, what != SPA_CONFIG_UPDATE_POOL); |
34dc7c2f BB |
444 | |
445 | if (what == SPA_CONFIG_UPDATE_POOL) | |
446 | spa_config_update_common(spa, SPA_CONFIG_UPDATE_VDEVS, isroot); | |
447 | } |