]>
Commit | Line | Data |
---|---|---|
34dc7c2f BB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or http://www.opensolaris.org/os/licensing. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | /* | |
428870ff | 22 | * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. |
26ef0cc7 | 23 | * Copyright (c) 2012, 2015 by Delphix. All rights reserved. |
34dc7c2f BB |
24 | */ |
25 | ||
34dc7c2f BB |
26 | /* |
27 | * ZFS fault injection | |
28 | * | |
29 | * To handle fault injection, we keep track of a series of zinject_record_t | |
30 | * structures which describe which logical block(s) should be injected with a | |
31 | * fault. These are kept in a global list. Each record corresponds to a given | |
32 | * spa_t and maintains a special hold on the spa_t so that it cannot be deleted | |
33 | * or exported while the injection record exists. | |
34 | * | |
35 | * Device level injection is done using the 'zi_guid' field. If this is set, it | |
36 | * means that the error is destined for a particular device, not a piece of | |
37 | * data. | |
38 | * | |
39 | * This is a rather poor data structure and algorithm, but we don't expect more | |
40 | * than a few faults at any one time, so it should be sufficient for our needs. | |
41 | */ | |
42 | ||
43 | #include <sys/arc.h> | |
44 | #include <sys/zio_impl.h> | |
45 | #include <sys/zfs_ioctl.h> | |
34dc7c2f | 46 | #include <sys/vdev_impl.h> |
428870ff | 47 | #include <sys/dmu_objset.h> |
b128c09f | 48 | #include <sys/fs/zfs.h> |
34dc7c2f | 49 | |
c409e464 | 50 | uint32_t zio_injection_enabled = 0; |
34dc7c2f | 51 | |
26ef0cc7 TH |
52 | /* |
53 | * Data describing each zinject handler registered on the system, and | |
54 | * contains the list node linking the handler in the global zinject | |
55 | * handler list. | |
56 | */ | |
34dc7c2f BB |
57 | typedef struct inject_handler { |
58 | int zi_id; | |
59 | spa_t *zi_spa; | |
60 | zinject_record_t zi_record; | |
26ef0cc7 TH |
61 | uint64_t *zi_lanes; |
62 | int zi_next_lane; | |
34dc7c2f BB |
63 | list_node_t zi_link; |
64 | } inject_handler_t; | |
65 | ||
26ef0cc7 TH |
66 | /* |
67 | * List of all zinject handlers registered on the system, protected by | |
68 | * the inject_lock defined below. | |
69 | */ | |
34dc7c2f | 70 | static list_t inject_handlers; |
26ef0cc7 TH |
71 | |
72 | /* | |
73 | * This protects insertion into, and traversal of, the inject handler | |
74 | * list defined above; as well as the inject_delay_count. Any time a | |
75 | * handler is inserted or removed from the list, this lock should be | |
76 | * taken as a RW_WRITER; and any time traversal is done over the list | |
77 | * (without modification to it) this lock should be taken as a RW_READER. | |
78 | */ | |
34dc7c2f | 79 | static krwlock_t inject_lock; |
26ef0cc7 TH |
80 | |
81 | /* | |
82 | * This holds the number of zinject delay handlers that have been | |
83 | * registered on the system. It is protected by the inject_lock defined | |
84 | * above. Thus modifications to this count must be a RW_WRITER of the | |
85 | * inject_lock, and reads of this count must be (at least) a RW_READER | |
86 | * of the lock. | |
87 | */ | |
88 | static int inject_delay_count = 0; | |
89 | ||
90 | /* | |
91 | * This lock is used only in zio_handle_io_delay(), refer to the comment | |
92 | * in that function for more details. | |
93 | */ | |
94 | static kmutex_t inject_delay_mtx; | |
95 | ||
96 | /* | |
97 | * Used to assign unique identifying numbers to each new zinject handler. | |
98 | */ | |
34dc7c2f BB |
99 | static int inject_next_id = 1; |
100 | ||
101 | /* | |
102 | * Returns true if the given record matches the I/O in progress. | |
103 | */ | |
104 | static boolean_t | |
5dbd68a3 | 105 | zio_match_handler(zbookmark_phys_t *zb, uint64_t type, |
34dc7c2f BB |
106 | zinject_record_t *record, int error) |
107 | { | |
108 | /* | |
109 | * Check for a match against the MOS, which is based on type | |
110 | */ | |
428870ff BB |
111 | if (zb->zb_objset == DMU_META_OBJSET && |
112 | record->zi_objset == DMU_META_OBJSET && | |
113 | record->zi_object == DMU_META_DNODE_OBJECT) { | |
34dc7c2f BB |
114 | if (record->zi_type == DMU_OT_NONE || |
115 | type == record->zi_type) | |
116 | return (record->zi_freq == 0 || | |
117 | spa_get_random(100) < record->zi_freq); | |
118 | else | |
119 | return (B_FALSE); | |
120 | } | |
121 | ||
122 | /* | |
123 | * Check for an exact match. | |
124 | */ | |
125 | if (zb->zb_objset == record->zi_objset && | |
126 | zb->zb_object == record->zi_object && | |
127 | zb->zb_level == record->zi_level && | |
128 | zb->zb_blkid >= record->zi_start && | |
129 | zb->zb_blkid <= record->zi_end && | |
130 | error == record->zi_error) | |
131 | return (record->zi_freq == 0 || | |
132 | spa_get_random(100) < record->zi_freq); | |
133 | ||
134 | return (B_FALSE); | |
135 | } | |
136 | ||
428870ff BB |
137 | /* |
138 | * Panic the system when a config change happens in the function | |
139 | * specified by tag. | |
140 | */ | |
141 | void | |
142 | zio_handle_panic_injection(spa_t *spa, char *tag, uint64_t type) | |
143 | { | |
144 | inject_handler_t *handler; | |
145 | ||
146 | rw_enter(&inject_lock, RW_READER); | |
147 | ||
148 | for (handler = list_head(&inject_handlers); handler != NULL; | |
149 | handler = list_next(&inject_handlers, handler)) { | |
150 | ||
151 | if (spa != handler->zi_spa) | |
152 | continue; | |
153 | ||
154 | if (handler->zi_record.zi_type == type && | |
155 | strcmp(tag, handler->zi_record.zi_func) == 0) | |
156 | panic("Panic requested in function %s\n", tag); | |
157 | } | |
158 | ||
159 | rw_exit(&inject_lock); | |
160 | } | |
161 | ||
34dc7c2f BB |
162 | /* |
163 | * Determine if the I/O in question should return failure. Returns the errno | |
164 | * to be returned to the caller. | |
165 | */ | |
166 | int | |
167 | zio_handle_fault_injection(zio_t *zio, int error) | |
168 | { | |
169 | int ret = 0; | |
170 | inject_handler_t *handler; | |
171 | ||
172 | /* | |
173 | * Ignore I/O not associated with any logical data. | |
174 | */ | |
175 | if (zio->io_logical == NULL) | |
176 | return (0); | |
177 | ||
178 | /* | |
179 | * Currently, we only support fault injection on reads. | |
180 | */ | |
181 | if (zio->io_type != ZIO_TYPE_READ) | |
182 | return (0); | |
183 | ||
184 | rw_enter(&inject_lock, RW_READER); | |
185 | ||
186 | for (handler = list_head(&inject_handlers); handler != NULL; | |
187 | handler = list_next(&inject_handlers, handler)) { | |
188 | ||
cc92e9d0 GW |
189 | if (zio->io_spa != handler->zi_spa || |
190 | handler->zi_record.zi_cmd != ZINJECT_DATA_FAULT) | |
34dc7c2f BB |
191 | continue; |
192 | ||
193 | /* If this handler matches, return EIO */ | |
194 | if (zio_match_handler(&zio->io_logical->io_bookmark, | |
195 | zio->io_bp ? BP_GET_TYPE(zio->io_bp) : DMU_OT_NONE, | |
196 | &handler->zi_record, error)) { | |
197 | ret = error; | |
198 | break; | |
199 | } | |
200 | } | |
201 | ||
202 | rw_exit(&inject_lock); | |
203 | ||
204 | return (ret); | |
205 | } | |
206 | ||
b128c09f BB |
207 | /* |
208 | * Determine if the zio is part of a label update and has an injection | |
209 | * handler associated with that portion of the label. Currently, we | |
210 | * allow error injection in either the nvlist or the uberblock region of | |
211 | * of the vdev label. | |
212 | */ | |
213 | int | |
214 | zio_handle_label_injection(zio_t *zio, int error) | |
215 | { | |
216 | inject_handler_t *handler; | |
217 | vdev_t *vd = zio->io_vd; | |
218 | uint64_t offset = zio->io_offset; | |
219 | int label; | |
220 | int ret = 0; | |
221 | ||
428870ff | 222 | if (offset >= VDEV_LABEL_START_SIZE && |
b128c09f BB |
223 | offset < vd->vdev_psize - VDEV_LABEL_END_SIZE) |
224 | return (0); | |
225 | ||
226 | rw_enter(&inject_lock, RW_READER); | |
227 | ||
228 | for (handler = list_head(&inject_handlers); handler != NULL; | |
229 | handler = list_next(&inject_handlers, handler)) { | |
230 | uint64_t start = handler->zi_record.zi_start; | |
231 | uint64_t end = handler->zi_record.zi_end; | |
232 | ||
cc92e9d0 | 233 | if (handler->zi_record.zi_cmd != ZINJECT_LABEL_FAULT) |
b128c09f BB |
234 | continue; |
235 | ||
236 | /* | |
237 | * The injection region is the relative offsets within a | |
238 | * vdev label. We must determine the label which is being | |
239 | * updated and adjust our region accordingly. | |
240 | */ | |
241 | label = vdev_label_number(vd->vdev_psize, offset); | |
242 | start = vdev_label_offset(vd->vdev_psize, label, start); | |
243 | end = vdev_label_offset(vd->vdev_psize, label, end); | |
244 | ||
245 | if (zio->io_vd->vdev_guid == handler->zi_record.zi_guid && | |
246 | (offset >= start && offset <= end)) { | |
247 | ret = error; | |
248 | break; | |
249 | } | |
250 | } | |
251 | rw_exit(&inject_lock); | |
252 | return (ret); | |
253 | } | |
254 | ||
255 | ||
34dc7c2f | 256 | int |
9babb374 | 257 | zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error) |
34dc7c2f BB |
258 | { |
259 | inject_handler_t *handler; | |
260 | int ret = 0; | |
261 | ||
428870ff BB |
262 | /* |
263 | * We skip over faults in the labels unless it's during | |
264 | * device open (i.e. zio == NULL). | |
265 | */ | |
266 | if (zio != NULL) { | |
267 | uint64_t offset = zio->io_offset; | |
268 | ||
269 | if (offset < VDEV_LABEL_START_SIZE || | |
270 | offset >= vd->vdev_psize - VDEV_LABEL_END_SIZE) | |
271 | return (0); | |
272 | } | |
273 | ||
34dc7c2f BB |
274 | rw_enter(&inject_lock, RW_READER); |
275 | ||
276 | for (handler = list_head(&inject_handlers); handler != NULL; | |
277 | handler = list_next(&inject_handlers, handler)) { | |
278 | ||
cc92e9d0 | 279 | if (handler->zi_record.zi_cmd != ZINJECT_DEVICE_FAULT) |
b128c09f BB |
280 | continue; |
281 | ||
34dc7c2f | 282 | if (vd->vdev_guid == handler->zi_record.zi_guid) { |
9babb374 BB |
283 | if (handler->zi_record.zi_failfast && |
284 | (zio == NULL || (zio->io_flags & | |
285 | (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))) { | |
286 | continue; | |
287 | } | |
288 | ||
428870ff BB |
289 | /* Handle type specific I/O failures */ |
290 | if (zio != NULL && | |
291 | handler->zi_record.zi_iotype != ZIO_TYPES && | |
292 | handler->zi_record.zi_iotype != zio->io_type) | |
293 | continue; | |
294 | ||
34dc7c2f BB |
295 | if (handler->zi_record.zi_error == error) { |
296 | /* | |
297 | * For a failed open, pretend like the device | |
298 | * has gone away. | |
299 | */ | |
300 | if (error == ENXIO) | |
301 | vd->vdev_stat.vs_aux = | |
302 | VDEV_AUX_OPEN_FAILED; | |
428870ff BB |
303 | |
304 | /* | |
305 | * Treat these errors as if they had been | |
306 | * retried so that all the appropriate stats | |
307 | * and FMA events are generated. | |
308 | */ | |
309 | if (!handler->zi_record.zi_failfast && | |
310 | zio != NULL) | |
311 | zio->io_flags |= ZIO_FLAG_IO_RETRY; | |
312 | ||
34dc7c2f BB |
313 | ret = error; |
314 | break; | |
315 | } | |
316 | if (handler->zi_record.zi_error == ENXIO) { | |
2e528b49 | 317 | ret = SET_ERROR(EIO); |
34dc7c2f BB |
318 | break; |
319 | } | |
320 | } | |
321 | } | |
322 | ||
323 | rw_exit(&inject_lock); | |
324 | ||
325 | return (ret); | |
326 | } | |
327 | ||
428870ff BB |
328 | /* |
329 | * Simulate hardware that ignores cache flushes. For requested number | |
330 | * of seconds nix the actual writing to disk. | |
331 | */ | |
332 | void | |
333 | zio_handle_ignored_writes(zio_t *zio) | |
334 | { | |
335 | inject_handler_t *handler; | |
336 | ||
337 | rw_enter(&inject_lock, RW_READER); | |
338 | ||
339 | for (handler = list_head(&inject_handlers); handler != NULL; | |
340 | handler = list_next(&inject_handlers, handler)) { | |
341 | ||
342 | /* Ignore errors not destined for this pool */ | |
cc92e9d0 GW |
343 | if (zio->io_spa != handler->zi_spa || |
344 | handler->zi_record.zi_cmd != ZINJECT_IGNORED_WRITES) | |
428870ff BB |
345 | continue; |
346 | ||
347 | /* | |
348 | * Positive duration implies # of seconds, negative | |
349 | * a number of txgs | |
350 | */ | |
351 | if (handler->zi_record.zi_timer == 0) { | |
352 | if (handler->zi_record.zi_duration > 0) | |
353 | handler->zi_record.zi_timer = ddi_get_lbolt64(); | |
354 | else | |
355 | handler->zi_record.zi_timer = zio->io_txg; | |
356 | } | |
357 | ||
358 | /* Have a "problem" writing 60% of the time */ | |
359 | if (spa_get_random(100) < 60) | |
360 | zio->io_pipeline &= ~ZIO_VDEV_IO_STAGES; | |
361 | break; | |
362 | } | |
363 | ||
364 | rw_exit(&inject_lock); | |
365 | } | |
366 | ||
367 | void | |
368 | spa_handle_ignored_writes(spa_t *spa) | |
369 | { | |
370 | inject_handler_t *handler; | |
371 | ||
372 | if (zio_injection_enabled == 0) | |
373 | return; | |
374 | ||
375 | rw_enter(&inject_lock, RW_READER); | |
376 | ||
377 | for (handler = list_head(&inject_handlers); handler != NULL; | |
378 | handler = list_next(&inject_handlers, handler)) { | |
379 | ||
cc92e9d0 GW |
380 | if (spa != handler->zi_spa || |
381 | handler->zi_record.zi_cmd != ZINJECT_IGNORED_WRITES) | |
428870ff BB |
382 | continue; |
383 | ||
384 | if (handler->zi_record.zi_duration > 0) { | |
385 | VERIFY(handler->zi_record.zi_timer == 0 || | |
0b75bdb3 CC |
386 | ddi_time_after64( |
387 | (int64_t)handler->zi_record.zi_timer + | |
388 | handler->zi_record.zi_duration * hz, | |
389 | ddi_get_lbolt64())); | |
428870ff BB |
390 | } else { |
391 | /* duration is negative so the subtraction here adds */ | |
392 | VERIFY(handler->zi_record.zi_timer == 0 || | |
393 | handler->zi_record.zi_timer - | |
394 | handler->zi_record.zi_duration >= | |
395 | spa_syncing_txg(spa)); | |
396 | } | |
397 | } | |
398 | ||
399 | rw_exit(&inject_lock); | |
400 | } | |
401 | ||
26ef0cc7 | 402 | hrtime_t |
cc92e9d0 GW |
403 | zio_handle_io_delay(zio_t *zio) |
404 | { | |
405 | vdev_t *vd = zio->io_vd; | |
26ef0cc7 TH |
406 | inject_handler_t *min_handler = NULL; |
407 | hrtime_t min_target = 0; | |
cc92e9d0 | 408 | inject_handler_t *handler; |
26ef0cc7 TH |
409 | hrtime_t idle; |
410 | hrtime_t busy; | |
411 | hrtime_t target; | |
cc92e9d0 GW |
412 | |
413 | rw_enter(&inject_lock, RW_READER); | |
414 | ||
26ef0cc7 TH |
415 | /* |
416 | * inject_delay_count is a subset of zio_injection_enabled that | |
417 | * is only incremented for delay handlers. These checks are | |
418 | * mainly added to remind the reader why we're not explicitly | |
419 | * checking zio_injection_enabled like the other functions. | |
420 | */ | |
421 | IMPLY(inject_delay_count > 0, zio_injection_enabled > 0); | |
422 | IMPLY(zio_injection_enabled == 0, inject_delay_count == 0); | |
423 | ||
424 | /* | |
425 | * If there aren't any inject delay handlers registered, then we | |
426 | * can short circuit and simply return 0 here. A value of zero | |
427 | * informs zio_delay_interrupt() that this request should not be | |
428 | * delayed. This short circuit keeps us from acquiring the | |
429 | * inject_delay_mutex unnecessarily. | |
430 | */ | |
431 | if (inject_delay_count == 0) { | |
432 | rw_exit(&inject_lock); | |
433 | return (0); | |
434 | } | |
435 | ||
436 | /* | |
437 | * Each inject handler has a number of "lanes" associated with | |
438 | * it. Each lane is able to handle requests independently of one | |
439 | * another, and at a latency defined by the inject handler | |
440 | * record's zi_timer field. Thus if a handler in configured with | |
441 | * a single lane with a 10ms latency, it will delay requests | |
442 | * such that only a single request is completed every 10ms. So, | |
443 | * if more than one request is attempted per each 10ms interval, | |
444 | * the average latency of the requests will be greater than | |
445 | * 10ms; but if only a single request is submitted each 10ms | |
446 | * interval the average latency will be 10ms. | |
447 | * | |
448 | * We need to acquire this mutex to prevent multiple concurrent | |
449 | * threads being assigned to the same lane of a given inject | |
450 | * handler. The mutex allows us to perform the following two | |
451 | * operations atomically: | |
452 | * | |
453 | * 1. determine the minimum handler and minimum target | |
454 | * value of all the possible handlers | |
455 | * 2. update that minimum handler's lane array | |
456 | * | |
457 | * Without atomicity, two (or more) threads could pick the same | |
458 | * lane in step (1), and then conflict with each other in step | |
459 | * (2). This could allow a single lane handler to process | |
460 | * multiple requests simultaneously, which shouldn't be possible. | |
461 | */ | |
462 | mutex_enter(&inject_delay_mtx); | |
cc92e9d0 | 463 | |
26ef0cc7 TH |
464 | for (handler = list_head(&inject_handlers); |
465 | handler != NULL; handler = list_next(&inject_handlers, handler)) { | |
cc92e9d0 GW |
466 | if (handler->zi_record.zi_cmd != ZINJECT_DELAY_IO) |
467 | continue; | |
468 | ||
c35b1882 | 469 | if (handler->zi_record.zi_freq != 0 && |
2587cd8f | 470 | spa_get_random(100) >= handler->zi_record.zi_freq) { |
c35b1882 | 471 | continue; |
2587cd8f | 472 | } |
c35b1882 | 473 | |
26ef0cc7 TH |
474 | if (vd->vdev_guid != handler->zi_record.zi_guid) |
475 | continue; | |
476 | ||
477 | /* | |
478 | * Defensive; should never happen as the array allocation | |
479 | * occurs prior to inserting this handler on the list. | |
480 | */ | |
481 | ASSERT3P(handler->zi_lanes, !=, NULL); | |
482 | ||
483 | /* | |
484 | * This should never happen, the zinject command should | |
485 | * prevent a user from setting an IO delay with zero lanes. | |
486 | */ | |
487 | ASSERT3U(handler->zi_record.zi_nlanes, !=, 0); | |
488 | ||
489 | ASSERT3U(handler->zi_record.zi_nlanes, >, | |
490 | handler->zi_next_lane); | |
491 | ||
492 | /* | |
493 | * We want to issue this IO to the lane that will become | |
494 | * idle the soonest, so we compare the soonest this | |
495 | * specific handler can complete the IO with all other | |
496 | * handlers, to find the lowest value of all possible | |
497 | * lanes. We then use this lane to submit the request. | |
498 | * | |
499 | * Since each handler has a constant value for its | |
500 | * delay, we can just use the "next" lane for that | |
501 | * handler; as it will always be the lane with the | |
502 | * lowest value for that particular handler (i.e. the | |
503 | * lane that will become idle the soonest). This saves a | |
504 | * scan of each handler's lanes array. | |
505 | * | |
506 | * There's two cases to consider when determining when | |
507 | * this specific IO request should complete. If this | |
508 | * lane is idle, we want to "submit" the request now so | |
509 | * it will complete after zi_timer milliseconds. Thus, | |
510 | * we set the target to now + zi_timer. | |
511 | * | |
512 | * If the lane is busy, we want this request to complete | |
513 | * zi_timer milliseconds after the lane becomes idle. | |
514 | * Since the 'zi_lanes' array holds the time at which | |
515 | * each lane will become idle, we use that value to | |
516 | * determine when this request should complete. | |
517 | */ | |
518 | idle = handler->zi_record.zi_timer + gethrtime(); | |
519 | busy = handler->zi_record.zi_timer + | |
520 | handler->zi_lanes[handler->zi_next_lane]; | |
521 | target = MAX(idle, busy); | |
522 | ||
523 | if (min_handler == NULL) { | |
524 | min_handler = handler; | |
525 | min_target = target; | |
526 | continue; | |
cc92e9d0 GW |
527 | } |
528 | ||
26ef0cc7 TH |
529 | ASSERT3P(min_handler, !=, NULL); |
530 | ASSERT3U(min_target, !=, 0); | |
531 | ||
532 | /* | |
533 | * We don't yet increment the "next lane" variable since | |
534 | * we still might find a lower value lane in another | |
535 | * handler during any remaining iterations. Once we're | |
536 | * sure we've selected the absolute minimum, we'll claim | |
537 | * the lane and increment the handler's "next lane" | |
538 | * field below. | |
539 | */ | |
540 | ||
541 | if (target < min_target) { | |
542 | min_handler = handler; | |
543 | min_target = target; | |
544 | } | |
cc92e9d0 | 545 | } |
26ef0cc7 TH |
546 | |
547 | /* | |
548 | * 'min_handler' will be NULL if no IO delays are registered for | |
549 | * this vdev, otherwise it will point to the handler containing | |
550 | * the lane that will become idle the soonest. | |
551 | */ | |
552 | if (min_handler != NULL) { | |
553 | ASSERT3U(min_target, !=, 0); | |
554 | min_handler->zi_lanes[min_handler->zi_next_lane] = min_target; | |
555 | ||
556 | /* | |
557 | * If we've used all possible lanes for this handler, | |
558 | * loop back and start using the first lane again; | |
559 | * otherwise, just increment the lane index. | |
560 | */ | |
561 | min_handler->zi_next_lane = (min_handler->zi_next_lane + 1) % | |
562 | min_handler->zi_record.zi_nlanes; | |
563 | } | |
564 | ||
565 | mutex_exit(&inject_delay_mtx); | |
cc92e9d0 | 566 | rw_exit(&inject_lock); |
26ef0cc7 TH |
567 | |
568 | return (min_target); | |
cc92e9d0 GW |
569 | } |
570 | ||
34dc7c2f BB |
571 | /* |
572 | * Create a new handler for the given record. We add it to the list, adding | |
573 | * a reference to the spa_t in the process. We increment zio_injection_enabled, | |
574 | * which is the switch to trigger all fault injection. | |
575 | */ | |
576 | int | |
577 | zio_inject_fault(char *name, int flags, int *id, zinject_record_t *record) | |
578 | { | |
579 | inject_handler_t *handler; | |
580 | int error; | |
581 | spa_t *spa; | |
582 | ||
583 | /* | |
584 | * If this is pool-wide metadata, make sure we unload the corresponding | |
585 | * spa_t, so that the next attempt to load it will trigger the fault. | |
586 | * We call spa_reset() to unload the pool appropriately. | |
587 | */ | |
588 | if (flags & ZINJECT_UNLOAD_SPA) | |
589 | if ((error = spa_reset(name)) != 0) | |
590 | return (error); | |
591 | ||
26ef0cc7 TH |
592 | if (record->zi_cmd == ZINJECT_DELAY_IO) { |
593 | /* | |
594 | * A value of zero for the number of lanes or for the | |
595 | * delay time doesn't make sense. | |
596 | */ | |
597 | if (record->zi_timer == 0 || record->zi_nlanes == 0) | |
598 | return (SET_ERROR(EINVAL)); | |
599 | ||
600 | /* | |
601 | * The number of lanes is directly mapped to the size of | |
602 | * an array used by the handler. Thus, to ensure the | |
603 | * user doesn't trigger an allocation that's "too large" | |
604 | * we cap the number of lanes here. | |
605 | */ | |
606 | if (record->zi_nlanes >= UINT16_MAX) | |
607 | return (SET_ERROR(EINVAL)); | |
608 | } | |
609 | ||
34dc7c2f BB |
610 | if (!(flags & ZINJECT_NULL)) { |
611 | /* | |
612 | * spa_inject_ref() will add an injection reference, which will | |
613 | * prevent the pool from being removed from the namespace while | |
614 | * still allowing it to be unloaded. | |
615 | */ | |
616 | if ((spa = spa_inject_addref(name)) == NULL) | |
2e528b49 | 617 | return (SET_ERROR(ENOENT)); |
34dc7c2f BB |
618 | |
619 | handler = kmem_alloc(sizeof (inject_handler_t), KM_SLEEP); | |
620 | ||
26ef0cc7 TH |
621 | handler->zi_spa = spa; |
622 | handler->zi_record = *record; | |
623 | ||
624 | if (handler->zi_record.zi_cmd == ZINJECT_DELAY_IO) { | |
625 | handler->zi_lanes = kmem_zalloc( | |
626 | sizeof (*handler->zi_lanes) * | |
627 | handler->zi_record.zi_nlanes, KM_SLEEP); | |
628 | handler->zi_next_lane = 0; | |
629 | } else { | |
630 | handler->zi_lanes = NULL; | |
631 | handler->zi_next_lane = 0; | |
632 | } | |
633 | ||
34dc7c2f BB |
634 | rw_enter(&inject_lock, RW_WRITER); |
635 | ||
26ef0cc7 TH |
636 | /* |
637 | * We can't move this increment into the conditional | |
638 | * above because we need to hold the RW_WRITER lock of | |
639 | * inject_lock, and we don't want to hold that while | |
640 | * allocating the handler's zi_lanes array. | |
641 | */ | |
642 | if (handler->zi_record.zi_cmd == ZINJECT_DELAY_IO) { | |
643 | ASSERT3S(inject_delay_count, >=, 0); | |
644 | inject_delay_count++; | |
645 | ASSERT3S(inject_delay_count, >, 0); | |
646 | } | |
647 | ||
34dc7c2f | 648 | *id = handler->zi_id = inject_next_id++; |
34dc7c2f | 649 | list_insert_tail(&inject_handlers, handler); |
bc89ac84 | 650 | atomic_inc_32(&zio_injection_enabled); |
34dc7c2f BB |
651 | |
652 | rw_exit(&inject_lock); | |
653 | } | |
654 | ||
655 | /* | |
656 | * Flush the ARC, so that any attempts to read this data will end up | |
657 | * going to the ZIO layer. Note that this is a little overkill, but | |
658 | * we don't have the necessary ARC interfaces to do anything else, and | |
659 | * fault injection isn't a performance critical path. | |
660 | */ | |
661 | if (flags & ZINJECT_FLUSH_ARC) | |
ca0bf58d PS |
662 | /* |
663 | * We must use FALSE to ensure arc_flush returns, since | |
664 | * we're not preventing concurrent ARC insertions. | |
665 | */ | |
666 | arc_flush(NULL, FALSE); | |
34dc7c2f BB |
667 | |
668 | return (0); | |
669 | } | |
670 | ||
671 | /* | |
672 | * Returns the next record with an ID greater than that supplied to the | |
673 | * function. Used to iterate over all handlers in the system. | |
674 | */ | |
675 | int | |
676 | zio_inject_list_next(int *id, char *name, size_t buflen, | |
677 | zinject_record_t *record) | |
678 | { | |
679 | inject_handler_t *handler; | |
680 | int ret; | |
681 | ||
682 | mutex_enter(&spa_namespace_lock); | |
683 | rw_enter(&inject_lock, RW_READER); | |
684 | ||
685 | for (handler = list_head(&inject_handlers); handler != NULL; | |
686 | handler = list_next(&inject_handlers, handler)) | |
687 | if (handler->zi_id > *id) | |
688 | break; | |
689 | ||
690 | if (handler) { | |
691 | *record = handler->zi_record; | |
692 | *id = handler->zi_id; | |
693 | (void) strncpy(name, spa_name(handler->zi_spa), buflen); | |
694 | ret = 0; | |
695 | } else { | |
2e528b49 | 696 | ret = SET_ERROR(ENOENT); |
34dc7c2f BB |
697 | } |
698 | ||
699 | rw_exit(&inject_lock); | |
700 | mutex_exit(&spa_namespace_lock); | |
701 | ||
702 | return (ret); | |
703 | } | |
704 | ||
705 | /* | |
706 | * Clear the fault handler with the given identifier, or return ENOENT if none | |
707 | * exists. | |
708 | */ | |
709 | int | |
710 | zio_clear_fault(int id) | |
711 | { | |
712 | inject_handler_t *handler; | |
34dc7c2f BB |
713 | |
714 | rw_enter(&inject_lock, RW_WRITER); | |
715 | ||
716 | for (handler = list_head(&inject_handlers); handler != NULL; | |
717 | handler = list_next(&inject_handlers, handler)) | |
718 | if (handler->zi_id == id) | |
719 | break; | |
720 | ||
721 | if (handler == NULL) { | |
572e2857 | 722 | rw_exit(&inject_lock); |
2e528b49 | 723 | return (SET_ERROR(ENOENT)); |
34dc7c2f BB |
724 | } |
725 | ||
26ef0cc7 TH |
726 | if (handler->zi_record.zi_cmd == ZINJECT_DELAY_IO) { |
727 | ASSERT3S(inject_delay_count, >, 0); | |
728 | inject_delay_count--; | |
729 | ASSERT3S(inject_delay_count, >=, 0); | |
730 | } | |
731 | ||
572e2857 | 732 | list_remove(&inject_handlers, handler); |
34dc7c2f BB |
733 | rw_exit(&inject_lock); |
734 | ||
26ef0cc7 TH |
735 | if (handler->zi_record.zi_cmd == ZINJECT_DELAY_IO) { |
736 | ASSERT3P(handler->zi_lanes, !=, NULL); | |
737 | kmem_free(handler->zi_lanes, sizeof (*handler->zi_lanes) * | |
738 | handler->zi_record.zi_nlanes); | |
739 | } else { | |
740 | ASSERT3P(handler->zi_lanes, ==, NULL); | |
741 | } | |
742 | ||
572e2857 BB |
743 | spa_inject_delref(handler->zi_spa); |
744 | kmem_free(handler, sizeof (inject_handler_t)); | |
bc89ac84 | 745 | atomic_dec_32(&zio_injection_enabled); |
572e2857 BB |
746 | |
747 | return (0); | |
34dc7c2f BB |
748 | } |
749 | ||
750 | void | |
751 | zio_inject_init(void) | |
752 | { | |
b128c09f | 753 | rw_init(&inject_lock, NULL, RW_DEFAULT, NULL); |
26ef0cc7 | 754 | mutex_init(&inject_delay_mtx, NULL, MUTEX_DEFAULT, NULL); |
34dc7c2f BB |
755 | list_create(&inject_handlers, sizeof (inject_handler_t), |
756 | offsetof(inject_handler_t, zi_link)); | |
757 | } | |
758 | ||
759 | void | |
760 | zio_inject_fini(void) | |
761 | { | |
762 | list_destroy(&inject_handlers); | |
26ef0cc7 | 763 | mutex_destroy(&inject_delay_mtx); |
b128c09f | 764 | rw_destroy(&inject_lock); |
34dc7c2f | 765 | } |
c409e464 BB |
766 | |
767 | #if defined(_KERNEL) && defined(HAVE_SPL) | |
e89bd697 IH |
768 | EXPORT_SYMBOL(zio_injection_enabled); |
769 | EXPORT_SYMBOL(zio_inject_fault); | |
770 | EXPORT_SYMBOL(zio_inject_list_next); | |
771 | EXPORT_SYMBOL(zio_clear_fault); | |
772 | EXPORT_SYMBOL(zio_handle_fault_injection); | |
773 | EXPORT_SYMBOL(zio_handle_device_injection); | |
774 | EXPORT_SYMBOL(zio_handle_label_injection); | |
c409e464 | 775 | #endif |