]>
Commit | Line | Data |
---|---|---|
34dc7c2f BB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or http://www.opensolaris.org/os/licensing. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | /* | |
428870ff | 22 | * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. |
34dc7c2f BB |
23 | */ |
24 | ||
34dc7c2f BB |
25 | /* |
26 | * ZFS fault injection | |
27 | * | |
28 | * To handle fault injection, we keep track of a series of zinject_record_t | |
29 | * structures which describe which logical block(s) should be injected with a | |
30 | * fault. These are kept in a global list. Each record corresponds to a given | |
31 | * spa_t and maintains a special hold on the spa_t so that it cannot be deleted | |
32 | * or exported while the injection record exists. | |
33 | * | |
34 | * Device level injection is done using the 'zi_guid' field. If this is set, it | |
35 | * means that the error is destined for a particular device, not a piece of | |
36 | * data. | |
37 | * | |
38 | * This is a rather poor data structure and algorithm, but we don't expect more | |
39 | * than a few faults at any one time, so it should be sufficient for our needs. | |
40 | */ | |
41 | ||
42 | #include <sys/arc.h> | |
43 | #include <sys/zio_impl.h> | |
44 | #include <sys/zfs_ioctl.h> | |
34dc7c2f | 45 | #include <sys/vdev_impl.h> |
428870ff | 46 | #include <sys/dmu_objset.h> |
b128c09f | 47 | #include <sys/fs/zfs.h> |
34dc7c2f BB |
48 | |
49 | uint32_t zio_injection_enabled; | |
50 | ||
51 | typedef struct inject_handler { | |
52 | int zi_id; | |
53 | spa_t *zi_spa; | |
54 | zinject_record_t zi_record; | |
55 | list_node_t zi_link; | |
56 | } inject_handler_t; | |
57 | ||
58 | static list_t inject_handlers; | |
59 | static krwlock_t inject_lock; | |
60 | static int inject_next_id = 1; | |
61 | ||
62 | /* | |
63 | * Returns true if the given record matches the I/O in progress. | |
64 | */ | |
65 | static boolean_t | |
66 | zio_match_handler(zbookmark_t *zb, uint64_t type, | |
67 | zinject_record_t *record, int error) | |
68 | { | |
69 | /* | |
70 | * Check for a match against the MOS, which is based on type | |
71 | */ | |
428870ff BB |
72 | if (zb->zb_objset == DMU_META_OBJSET && |
73 | record->zi_objset == DMU_META_OBJSET && | |
74 | record->zi_object == DMU_META_DNODE_OBJECT) { | |
34dc7c2f BB |
75 | if (record->zi_type == DMU_OT_NONE || |
76 | type == record->zi_type) | |
77 | return (record->zi_freq == 0 || | |
78 | spa_get_random(100) < record->zi_freq); | |
79 | else | |
80 | return (B_FALSE); | |
81 | } | |
82 | ||
83 | /* | |
84 | * Check for an exact match. | |
85 | */ | |
86 | if (zb->zb_objset == record->zi_objset && | |
87 | zb->zb_object == record->zi_object && | |
88 | zb->zb_level == record->zi_level && | |
89 | zb->zb_blkid >= record->zi_start && | |
90 | zb->zb_blkid <= record->zi_end && | |
91 | error == record->zi_error) | |
92 | return (record->zi_freq == 0 || | |
93 | spa_get_random(100) < record->zi_freq); | |
94 | ||
95 | return (B_FALSE); | |
96 | } | |
97 | ||
428870ff BB |
98 | /* |
99 | * Panic the system when a config change happens in the function | |
100 | * specified by tag. | |
101 | */ | |
102 | void | |
103 | zio_handle_panic_injection(spa_t *spa, char *tag, uint64_t type) | |
104 | { | |
105 | inject_handler_t *handler; | |
106 | ||
107 | rw_enter(&inject_lock, RW_READER); | |
108 | ||
109 | for (handler = list_head(&inject_handlers); handler != NULL; | |
110 | handler = list_next(&inject_handlers, handler)) { | |
111 | ||
112 | if (spa != handler->zi_spa) | |
113 | continue; | |
114 | ||
115 | if (handler->zi_record.zi_type == type && | |
116 | strcmp(tag, handler->zi_record.zi_func) == 0) | |
117 | panic("Panic requested in function %s\n", tag); | |
118 | } | |
119 | ||
120 | rw_exit(&inject_lock); | |
121 | } | |
122 | ||
34dc7c2f BB |
123 | /* |
124 | * Determine if the I/O in question should return failure. Returns the errno | |
125 | * to be returned to the caller. | |
126 | */ | |
127 | int | |
128 | zio_handle_fault_injection(zio_t *zio, int error) | |
129 | { | |
130 | int ret = 0; | |
131 | inject_handler_t *handler; | |
132 | ||
133 | /* | |
134 | * Ignore I/O not associated with any logical data. | |
135 | */ | |
136 | if (zio->io_logical == NULL) | |
137 | return (0); | |
138 | ||
139 | /* | |
140 | * Currently, we only support fault injection on reads. | |
141 | */ | |
142 | if (zio->io_type != ZIO_TYPE_READ) | |
143 | return (0); | |
144 | ||
145 | rw_enter(&inject_lock, RW_READER); | |
146 | ||
147 | for (handler = list_head(&inject_handlers); handler != NULL; | |
148 | handler = list_next(&inject_handlers, handler)) { | |
149 | ||
150 | /* Ignore errors not destined for this pool */ | |
151 | if (zio->io_spa != handler->zi_spa) | |
152 | continue; | |
153 | ||
428870ff BB |
154 | /* Ignore device errors and panic injection */ |
155 | if (handler->zi_record.zi_guid != 0 || | |
156 | handler->zi_record.zi_func[0] != '\0' || | |
157 | handler->zi_record.zi_duration != 0) | |
34dc7c2f BB |
158 | continue; |
159 | ||
160 | /* If this handler matches, return EIO */ | |
161 | if (zio_match_handler(&zio->io_logical->io_bookmark, | |
162 | zio->io_bp ? BP_GET_TYPE(zio->io_bp) : DMU_OT_NONE, | |
163 | &handler->zi_record, error)) { | |
164 | ret = error; | |
165 | break; | |
166 | } | |
167 | } | |
168 | ||
169 | rw_exit(&inject_lock); | |
170 | ||
171 | return (ret); | |
172 | } | |
173 | ||
b128c09f BB |
174 | /* |
175 | * Determine if the zio is part of a label update and has an injection | |
176 | * handler associated with that portion of the label. Currently, we | |
177 | * allow error injection in either the nvlist or the uberblock region of | |
178 | * of the vdev label. | |
179 | */ | |
180 | int | |
181 | zio_handle_label_injection(zio_t *zio, int error) | |
182 | { | |
183 | inject_handler_t *handler; | |
184 | vdev_t *vd = zio->io_vd; | |
185 | uint64_t offset = zio->io_offset; | |
186 | int label; | |
187 | int ret = 0; | |
188 | ||
428870ff | 189 | if (offset >= VDEV_LABEL_START_SIZE && |
b128c09f BB |
190 | offset < vd->vdev_psize - VDEV_LABEL_END_SIZE) |
191 | return (0); | |
192 | ||
193 | rw_enter(&inject_lock, RW_READER); | |
194 | ||
195 | for (handler = list_head(&inject_handlers); handler != NULL; | |
196 | handler = list_next(&inject_handlers, handler)) { | |
197 | uint64_t start = handler->zi_record.zi_start; | |
198 | uint64_t end = handler->zi_record.zi_end; | |
199 | ||
428870ff BB |
200 | /* Ignore device only faults or panic injection */ |
201 | if (handler->zi_record.zi_start == 0 || | |
202 | handler->zi_record.zi_func[0] != '\0' || | |
203 | handler->zi_record.zi_duration != 0) | |
b128c09f BB |
204 | continue; |
205 | ||
206 | /* | |
207 | * The injection region is the relative offsets within a | |
208 | * vdev label. We must determine the label which is being | |
209 | * updated and adjust our region accordingly. | |
210 | */ | |
211 | label = vdev_label_number(vd->vdev_psize, offset); | |
212 | start = vdev_label_offset(vd->vdev_psize, label, start); | |
213 | end = vdev_label_offset(vd->vdev_psize, label, end); | |
214 | ||
215 | if (zio->io_vd->vdev_guid == handler->zi_record.zi_guid && | |
216 | (offset >= start && offset <= end)) { | |
217 | ret = error; | |
218 | break; | |
219 | } | |
220 | } | |
221 | rw_exit(&inject_lock); | |
222 | return (ret); | |
223 | } | |
224 | ||
225 | ||
34dc7c2f | 226 | int |
9babb374 | 227 | zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error) |
34dc7c2f BB |
228 | { |
229 | inject_handler_t *handler; | |
230 | int ret = 0; | |
231 | ||
428870ff BB |
232 | /* |
233 | * We skip over faults in the labels unless it's during | |
234 | * device open (i.e. zio == NULL). | |
235 | */ | |
236 | if (zio != NULL) { | |
237 | uint64_t offset = zio->io_offset; | |
238 | ||
239 | if (offset < VDEV_LABEL_START_SIZE || | |
240 | offset >= vd->vdev_psize - VDEV_LABEL_END_SIZE) | |
241 | return (0); | |
242 | } | |
243 | ||
34dc7c2f BB |
244 | rw_enter(&inject_lock, RW_READER); |
245 | ||
246 | for (handler = list_head(&inject_handlers); handler != NULL; | |
247 | handler = list_next(&inject_handlers, handler)) { | |
248 | ||
428870ff BB |
249 | /* |
250 | * Ignore label specific faults, panic injection | |
251 | * or fake writes | |
252 | */ | |
253 | if (handler->zi_record.zi_start != 0 || | |
254 | handler->zi_record.zi_func[0] != '\0' || | |
255 | handler->zi_record.zi_duration != 0) | |
b128c09f BB |
256 | continue; |
257 | ||
34dc7c2f | 258 | if (vd->vdev_guid == handler->zi_record.zi_guid) { |
9babb374 BB |
259 | if (handler->zi_record.zi_failfast && |
260 | (zio == NULL || (zio->io_flags & | |
261 | (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))) { | |
262 | continue; | |
263 | } | |
264 | ||
428870ff BB |
265 | /* Handle type specific I/O failures */ |
266 | if (zio != NULL && | |
267 | handler->zi_record.zi_iotype != ZIO_TYPES && | |
268 | handler->zi_record.zi_iotype != zio->io_type) | |
269 | continue; | |
270 | ||
34dc7c2f BB |
271 | if (handler->zi_record.zi_error == error) { |
272 | /* | |
273 | * For a failed open, pretend like the device | |
274 | * has gone away. | |
275 | */ | |
276 | if (error == ENXIO) | |
277 | vd->vdev_stat.vs_aux = | |
278 | VDEV_AUX_OPEN_FAILED; | |
428870ff BB |
279 | |
280 | /* | |
281 | * Treat these errors as if they had been | |
282 | * retried so that all the appropriate stats | |
283 | * and FMA events are generated. | |
284 | */ | |
285 | if (!handler->zi_record.zi_failfast && | |
286 | zio != NULL) | |
287 | zio->io_flags |= ZIO_FLAG_IO_RETRY; | |
288 | ||
34dc7c2f BB |
289 | ret = error; |
290 | break; | |
291 | } | |
292 | if (handler->zi_record.zi_error == ENXIO) { | |
293 | ret = EIO; | |
294 | break; | |
295 | } | |
296 | } | |
297 | } | |
298 | ||
299 | rw_exit(&inject_lock); | |
300 | ||
301 | return (ret); | |
302 | } | |
303 | ||
428870ff BB |
304 | /* |
305 | * Simulate hardware that ignores cache flushes. For requested number | |
306 | * of seconds nix the actual writing to disk. | |
307 | */ | |
308 | void | |
309 | zio_handle_ignored_writes(zio_t *zio) | |
310 | { | |
311 | inject_handler_t *handler; | |
312 | ||
313 | rw_enter(&inject_lock, RW_READER); | |
314 | ||
315 | for (handler = list_head(&inject_handlers); handler != NULL; | |
316 | handler = list_next(&inject_handlers, handler)) { | |
317 | ||
318 | /* Ignore errors not destined for this pool */ | |
319 | if (zio->io_spa != handler->zi_spa) | |
320 | continue; | |
321 | ||
322 | if (handler->zi_record.zi_duration == 0) | |
323 | continue; | |
324 | ||
325 | /* | |
326 | * Positive duration implies # of seconds, negative | |
327 | * a number of txgs | |
328 | */ | |
329 | if (handler->zi_record.zi_timer == 0) { | |
330 | if (handler->zi_record.zi_duration > 0) | |
331 | handler->zi_record.zi_timer = ddi_get_lbolt64(); | |
332 | else | |
333 | handler->zi_record.zi_timer = zio->io_txg; | |
334 | } | |
335 | ||
336 | /* Have a "problem" writing 60% of the time */ | |
337 | if (spa_get_random(100) < 60) | |
338 | zio->io_pipeline &= ~ZIO_VDEV_IO_STAGES; | |
339 | break; | |
340 | } | |
341 | ||
342 | rw_exit(&inject_lock); | |
343 | } | |
344 | ||
345 | void | |
346 | spa_handle_ignored_writes(spa_t *spa) | |
347 | { | |
348 | inject_handler_t *handler; | |
349 | ||
350 | if (zio_injection_enabled == 0) | |
351 | return; | |
352 | ||
353 | rw_enter(&inject_lock, RW_READER); | |
354 | ||
355 | for (handler = list_head(&inject_handlers); handler != NULL; | |
356 | handler = list_next(&inject_handlers, handler)) { | |
357 | ||
358 | /* Ignore errors not destined for this pool */ | |
359 | if (spa != handler->zi_spa) | |
360 | continue; | |
361 | ||
362 | if (handler->zi_record.zi_duration == 0) | |
363 | continue; | |
364 | ||
365 | if (handler->zi_record.zi_duration > 0) { | |
366 | VERIFY(handler->zi_record.zi_timer == 0 || | |
367 | handler->zi_record.zi_timer + | |
368 | handler->zi_record.zi_duration * hz > | |
369 | ddi_get_lbolt64()); | |
370 | } else { | |
371 | /* duration is negative so the subtraction here adds */ | |
372 | VERIFY(handler->zi_record.zi_timer == 0 || | |
373 | handler->zi_record.zi_timer - | |
374 | handler->zi_record.zi_duration >= | |
375 | spa_syncing_txg(spa)); | |
376 | } | |
377 | } | |
378 | ||
379 | rw_exit(&inject_lock); | |
380 | } | |
381 | ||
34dc7c2f BB |
382 | /* |
383 | * Create a new handler for the given record. We add it to the list, adding | |
384 | * a reference to the spa_t in the process. We increment zio_injection_enabled, | |
385 | * which is the switch to trigger all fault injection. | |
386 | */ | |
387 | int | |
388 | zio_inject_fault(char *name, int flags, int *id, zinject_record_t *record) | |
389 | { | |
390 | inject_handler_t *handler; | |
391 | int error; | |
392 | spa_t *spa; | |
393 | ||
394 | /* | |
395 | * If this is pool-wide metadata, make sure we unload the corresponding | |
396 | * spa_t, so that the next attempt to load it will trigger the fault. | |
397 | * We call spa_reset() to unload the pool appropriately. | |
398 | */ | |
399 | if (flags & ZINJECT_UNLOAD_SPA) | |
400 | if ((error = spa_reset(name)) != 0) | |
401 | return (error); | |
402 | ||
403 | if (!(flags & ZINJECT_NULL)) { | |
404 | /* | |
405 | * spa_inject_ref() will add an injection reference, which will | |
406 | * prevent the pool from being removed from the namespace while | |
407 | * still allowing it to be unloaded. | |
408 | */ | |
409 | if ((spa = spa_inject_addref(name)) == NULL) | |
410 | return (ENOENT); | |
411 | ||
412 | handler = kmem_alloc(sizeof (inject_handler_t), KM_SLEEP); | |
413 | ||
414 | rw_enter(&inject_lock, RW_WRITER); | |
415 | ||
416 | *id = handler->zi_id = inject_next_id++; | |
417 | handler->zi_spa = spa; | |
418 | handler->zi_record = *record; | |
419 | list_insert_tail(&inject_handlers, handler); | |
420 | atomic_add_32(&zio_injection_enabled, 1); | |
421 | ||
422 | rw_exit(&inject_lock); | |
423 | } | |
424 | ||
425 | /* | |
426 | * Flush the ARC, so that any attempts to read this data will end up | |
427 | * going to the ZIO layer. Note that this is a little overkill, but | |
428 | * we don't have the necessary ARC interfaces to do anything else, and | |
429 | * fault injection isn't a performance critical path. | |
430 | */ | |
431 | if (flags & ZINJECT_FLUSH_ARC) | |
432 | arc_flush(NULL); | |
433 | ||
434 | return (0); | |
435 | } | |
436 | ||
437 | /* | |
438 | * Returns the next record with an ID greater than that supplied to the | |
439 | * function. Used to iterate over all handlers in the system. | |
440 | */ | |
441 | int | |
442 | zio_inject_list_next(int *id, char *name, size_t buflen, | |
443 | zinject_record_t *record) | |
444 | { | |
445 | inject_handler_t *handler; | |
446 | int ret; | |
447 | ||
448 | mutex_enter(&spa_namespace_lock); | |
449 | rw_enter(&inject_lock, RW_READER); | |
450 | ||
451 | for (handler = list_head(&inject_handlers); handler != NULL; | |
452 | handler = list_next(&inject_handlers, handler)) | |
453 | if (handler->zi_id > *id) | |
454 | break; | |
455 | ||
456 | if (handler) { | |
457 | *record = handler->zi_record; | |
458 | *id = handler->zi_id; | |
459 | (void) strncpy(name, spa_name(handler->zi_spa), buflen); | |
460 | ret = 0; | |
461 | } else { | |
462 | ret = ENOENT; | |
463 | } | |
464 | ||
465 | rw_exit(&inject_lock); | |
466 | mutex_exit(&spa_namespace_lock); | |
467 | ||
468 | return (ret); | |
469 | } | |
470 | ||
471 | /* | |
472 | * Clear the fault handler with the given identifier, or return ENOENT if none | |
473 | * exists. | |
474 | */ | |
475 | int | |
476 | zio_clear_fault(int id) | |
477 | { | |
478 | inject_handler_t *handler; | |
479 | int ret; | |
480 | ||
481 | rw_enter(&inject_lock, RW_WRITER); | |
482 | ||
483 | for (handler = list_head(&inject_handlers); handler != NULL; | |
484 | handler = list_next(&inject_handlers, handler)) | |
485 | if (handler->zi_id == id) | |
486 | break; | |
487 | ||
488 | if (handler == NULL) { | |
489 | ret = ENOENT; | |
490 | } else { | |
491 | list_remove(&inject_handlers, handler); | |
492 | spa_inject_delref(handler->zi_spa); | |
493 | kmem_free(handler, sizeof (inject_handler_t)); | |
494 | atomic_add_32(&zio_injection_enabled, -1); | |
495 | ret = 0; | |
496 | } | |
497 | ||
498 | rw_exit(&inject_lock); | |
499 | ||
500 | return (ret); | |
501 | } | |
502 | ||
503 | void | |
504 | zio_inject_init(void) | |
505 | { | |
b128c09f | 506 | rw_init(&inject_lock, NULL, RW_DEFAULT, NULL); |
34dc7c2f BB |
507 | list_create(&inject_handlers, sizeof (inject_handler_t), |
508 | offsetof(inject_handler_t, zi_link)); | |
509 | } | |
510 | ||
511 | void | |
512 | zio_inject_fini(void) | |
513 | { | |
514 | list_destroy(&inject_handlers); | |
b128c09f | 515 | rw_destroy(&inject_lock); |
34dc7c2f | 516 | } |