]>
Commit | Line | Data |
---|---|---|
70e083d2 TG |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or http://www.opensolaris.org/os/licensing. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | /* | |
22 | * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. | |
86e3c28a CIK |
23 | * Copyright (c) 2012, 2015 by Delphix. All rights reserved. |
24 | * Copyright (c) 2017, Intel Corporation. | |
70e083d2 TG |
25 | */ |
26 | ||
27 | /* | |
28 | * ZFS fault injection | |
29 | * | |
30 | * To handle fault injection, we keep track of a series of zinject_record_t | |
31 | * structures which describe which logical block(s) should be injected with a | |
32 | * fault. These are kept in a global list. Each record corresponds to a given | |
33 | * spa_t and maintains a special hold on the spa_t so that it cannot be deleted | |
34 | * or exported while the injection record exists. | |
35 | * | |
36 | * Device level injection is done using the 'zi_guid' field. If this is set, it | |
37 | * means that the error is destined for a particular device, not a piece of | |
38 | * data. | |
39 | * | |
40 | * This is a rather poor data structure and algorithm, but we don't expect more | |
41 | * than a few faults at any one time, so it should be sufficient for our needs. | |
42 | */ | |
43 | ||
44 | #include <sys/arc.h> | |
86e3c28a | 45 | #include <sys/zio.h> |
70e083d2 TG |
46 | #include <sys/zfs_ioctl.h> |
47 | #include <sys/vdev_impl.h> | |
48 | #include <sys/dmu_objset.h> | |
49 | #include <sys/fs/zfs.h> | |
50 | ||
51 | uint32_t zio_injection_enabled = 0; | |
52 | ||
86e3c28a CIK |
53 | /* |
54 | * Data describing each zinject handler registered on the system, and | |
55 | * contains the list node linking the handler in the global zinject | |
56 | * handler list. | |
57 | */ | |
70e083d2 TG |
58 | typedef struct inject_handler { |
59 | int zi_id; | |
60 | spa_t *zi_spa; | |
61 | zinject_record_t zi_record; | |
86e3c28a CIK |
62 | uint64_t *zi_lanes; |
63 | int zi_next_lane; | |
70e083d2 TG |
64 | list_node_t zi_link; |
65 | } inject_handler_t; | |
66 | ||
86e3c28a CIK |
67 | /* |
68 | * List of all zinject handlers registered on the system, protected by | |
69 | * the inject_lock defined below. | |
70 | */ | |
70e083d2 | 71 | static list_t inject_handlers; |
86e3c28a CIK |
72 | |
73 | /* | |
74 | * This protects insertion into, and traversal of, the inject handler | |
75 | * list defined above; as well as the inject_delay_count. Any time a | |
76 | * handler is inserted or removed from the list, this lock should be | |
77 | * taken as a RW_WRITER; and any time traversal is done over the list | |
78 | * (without modification to it) this lock should be taken as a RW_READER. | |
79 | */ | |
70e083d2 | 80 | static krwlock_t inject_lock; |
86e3c28a CIK |
81 | |
82 | /* | |
83 | * This holds the number of zinject delay handlers that have been | |
84 | * registered on the system. It is protected by the inject_lock defined | |
85 | * above. Thus modifications to this count must be a RW_WRITER of the | |
86 | * inject_lock, and reads of this count must be (at least) a RW_READER | |
87 | * of the lock. | |
88 | */ | |
89 | static int inject_delay_count = 0; | |
90 | ||
91 | /* | |
92 | * This lock is used only in zio_handle_io_delay(), refer to the comment | |
93 | * in that function for more details. | |
94 | */ | |
95 | static kmutex_t inject_delay_mtx; | |
96 | ||
97 | /* | |
98 | * Used to assign unique identifying numbers to each new zinject handler. | |
99 | */ | |
70e083d2 TG |
100 | static int inject_next_id = 1; |
101 | ||
86e3c28a CIK |
102 | /* |
103 | * Test if the requested frequency was triggered | |
104 | */ | |
105 | static boolean_t | |
106 | freq_triggered(uint32_t frequency) | |
107 | { | |
108 | /* | |
109 | * zero implies always (100%) | |
110 | */ | |
111 | if (frequency == 0) | |
112 | return (B_TRUE); | |
113 | ||
114 | /* | |
115 | * Note: we still handle legacy (unscaled) frequecy values | |
116 | */ | |
117 | uint32_t maximum = (frequency <= 100) ? 100 : ZI_PERCENTAGE_MAX; | |
118 | ||
119 | return (spa_get_random(maximum) < frequency); | |
120 | } | |
121 | ||
70e083d2 TG |
122 | /* |
123 | * Returns true if the given record matches the I/O in progress. | |
124 | */ | |
125 | static boolean_t | |
126 | zio_match_handler(zbookmark_phys_t *zb, uint64_t type, | |
127 | zinject_record_t *record, int error) | |
128 | { | |
129 | /* | |
130 | * Check for a match against the MOS, which is based on type | |
131 | */ | |
132 | if (zb->zb_objset == DMU_META_OBJSET && | |
133 | record->zi_objset == DMU_META_OBJSET && | |
134 | record->zi_object == DMU_META_DNODE_OBJECT) { | |
135 | if (record->zi_type == DMU_OT_NONE || | |
136 | type == record->zi_type) | |
86e3c28a | 137 | return (freq_triggered(record->zi_freq)); |
70e083d2 TG |
138 | else |
139 | return (B_FALSE); | |
140 | } | |
141 | ||
142 | /* | |
143 | * Check for an exact match. | |
144 | */ | |
145 | if (zb->zb_objset == record->zi_objset && | |
146 | zb->zb_object == record->zi_object && | |
147 | zb->zb_level == record->zi_level && | |
148 | zb->zb_blkid >= record->zi_start && | |
149 | zb->zb_blkid <= record->zi_end && | |
150 | error == record->zi_error) | |
86e3c28a | 151 | return (freq_triggered(record->zi_freq)); |
70e083d2 TG |
152 | |
153 | return (B_FALSE); | |
154 | } | |
155 | ||
156 | /* | |
157 | * Panic the system when a config change happens in the function | |
158 | * specified by tag. | |
159 | */ | |
160 | void | |
161 | zio_handle_panic_injection(spa_t *spa, char *tag, uint64_t type) | |
162 | { | |
163 | inject_handler_t *handler; | |
164 | ||
165 | rw_enter(&inject_lock, RW_READER); | |
166 | ||
167 | for (handler = list_head(&inject_handlers); handler != NULL; | |
168 | handler = list_next(&inject_handlers, handler)) { | |
169 | ||
170 | if (spa != handler->zi_spa) | |
171 | continue; | |
172 | ||
173 | if (handler->zi_record.zi_type == type && | |
174 | strcmp(tag, handler->zi_record.zi_func) == 0) | |
175 | panic("Panic requested in function %s\n", tag); | |
176 | } | |
177 | ||
178 | rw_exit(&inject_lock); | |
179 | } | |
180 | ||
181 | /* | |
182 | * Determine if the I/O in question should return failure. Returns the errno | |
183 | * to be returned to the caller. | |
184 | */ | |
185 | int | |
186 | zio_handle_fault_injection(zio_t *zio, int error) | |
187 | { | |
188 | int ret = 0; | |
189 | inject_handler_t *handler; | |
190 | ||
191 | /* | |
192 | * Ignore I/O not associated with any logical data. | |
193 | */ | |
194 | if (zio->io_logical == NULL) | |
195 | return (0); | |
196 | ||
197 | /* | |
198 | * Currently, we only support fault injection on reads. | |
199 | */ | |
200 | if (zio->io_type != ZIO_TYPE_READ) | |
201 | return (0); | |
202 | ||
203 | rw_enter(&inject_lock, RW_READER); | |
204 | ||
205 | for (handler = list_head(&inject_handlers); handler != NULL; | |
206 | handler = list_next(&inject_handlers, handler)) { | |
207 | ||
208 | if (zio->io_spa != handler->zi_spa || | |
209 | handler->zi_record.zi_cmd != ZINJECT_DATA_FAULT) | |
210 | continue; | |
211 | ||
212 | /* If this handler matches, return EIO */ | |
213 | if (zio_match_handler(&zio->io_logical->io_bookmark, | |
214 | zio->io_bp ? BP_GET_TYPE(zio->io_bp) : DMU_OT_NONE, | |
215 | &handler->zi_record, error)) { | |
216 | ret = error; | |
217 | break; | |
218 | } | |
219 | } | |
220 | ||
221 | rw_exit(&inject_lock); | |
222 | ||
223 | return (ret); | |
224 | } | |
225 | ||
226 | /* | |
227 | * Determine if the zio is part of a label update and has an injection | |
228 | * handler associated with that portion of the label. Currently, we | |
229 | * allow error injection in either the nvlist or the uberblock region of | |
230 | * of the vdev label. | |
231 | */ | |
232 | int | |
233 | zio_handle_label_injection(zio_t *zio, int error) | |
234 | { | |
235 | inject_handler_t *handler; | |
236 | vdev_t *vd = zio->io_vd; | |
237 | uint64_t offset = zio->io_offset; | |
238 | int label; | |
239 | int ret = 0; | |
240 | ||
241 | if (offset >= VDEV_LABEL_START_SIZE && | |
242 | offset < vd->vdev_psize - VDEV_LABEL_END_SIZE) | |
243 | return (0); | |
244 | ||
245 | rw_enter(&inject_lock, RW_READER); | |
246 | ||
247 | for (handler = list_head(&inject_handlers); handler != NULL; | |
248 | handler = list_next(&inject_handlers, handler)) { | |
249 | uint64_t start = handler->zi_record.zi_start; | |
250 | uint64_t end = handler->zi_record.zi_end; | |
251 | ||
252 | if (handler->zi_record.zi_cmd != ZINJECT_LABEL_FAULT) | |
253 | continue; | |
254 | ||
255 | /* | |
256 | * The injection region is the relative offsets within a | |
257 | * vdev label. We must determine the label which is being | |
258 | * updated and adjust our region accordingly. | |
259 | */ | |
260 | label = vdev_label_number(vd->vdev_psize, offset); | |
261 | start = vdev_label_offset(vd->vdev_psize, label, start); | |
262 | end = vdev_label_offset(vd->vdev_psize, label, end); | |
263 | ||
264 | if (zio->io_vd->vdev_guid == handler->zi_record.zi_guid && | |
265 | (offset >= start && offset <= end)) { | |
266 | ret = error; | |
267 | break; | |
268 | } | |
269 | } | |
270 | rw_exit(&inject_lock); | |
271 | return (ret); | |
272 | } | |
273 | ||
274 | ||
275 | int | |
276 | zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error) | |
277 | { | |
278 | inject_handler_t *handler; | |
279 | int ret = 0; | |
280 | ||
281 | /* | |
282 | * We skip over faults in the labels unless it's during | |
283 | * device open (i.e. zio == NULL). | |
284 | */ | |
285 | if (zio != NULL) { | |
286 | uint64_t offset = zio->io_offset; | |
287 | ||
288 | if (offset < VDEV_LABEL_START_SIZE || | |
289 | offset >= vd->vdev_psize - VDEV_LABEL_END_SIZE) | |
290 | return (0); | |
291 | } | |
292 | ||
293 | rw_enter(&inject_lock, RW_READER); | |
294 | ||
295 | for (handler = list_head(&inject_handlers); handler != NULL; | |
296 | handler = list_next(&inject_handlers, handler)) { | |
297 | ||
298 | if (handler->zi_record.zi_cmd != ZINJECT_DEVICE_FAULT) | |
299 | continue; | |
300 | ||
301 | if (vd->vdev_guid == handler->zi_record.zi_guid) { | |
302 | if (handler->zi_record.zi_failfast && | |
303 | (zio == NULL || (zio->io_flags & | |
304 | (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))) { | |
305 | continue; | |
306 | } | |
307 | ||
308 | /* Handle type specific I/O failures */ | |
309 | if (zio != NULL && | |
310 | handler->zi_record.zi_iotype != ZIO_TYPES && | |
311 | handler->zi_record.zi_iotype != zio->io_type) | |
312 | continue; | |
313 | ||
314 | if (handler->zi_record.zi_error == error) { | |
86e3c28a CIK |
315 | /* |
316 | * limit error injection if requested | |
317 | */ | |
318 | if (!freq_triggered(handler->zi_record.zi_freq)) | |
319 | continue; | |
320 | ||
70e083d2 TG |
321 | /* |
322 | * For a failed open, pretend like the device | |
323 | * has gone away. | |
324 | */ | |
325 | if (error == ENXIO) | |
326 | vd->vdev_stat.vs_aux = | |
327 | VDEV_AUX_OPEN_FAILED; | |
328 | ||
329 | /* | |
330 | * Treat these errors as if they had been | |
331 | * retried so that all the appropriate stats | |
332 | * and FMA events are generated. | |
333 | */ | |
334 | if (!handler->zi_record.zi_failfast && | |
335 | zio != NULL) | |
336 | zio->io_flags |= ZIO_FLAG_IO_RETRY; | |
337 | ||
338 | ret = error; | |
339 | break; | |
340 | } | |
341 | if (handler->zi_record.zi_error == ENXIO) { | |
342 | ret = SET_ERROR(EIO); | |
343 | break; | |
344 | } | |
345 | } | |
346 | } | |
347 | ||
348 | rw_exit(&inject_lock); | |
349 | ||
350 | return (ret); | |
351 | } | |
352 | ||
353 | /* | |
354 | * Simulate hardware that ignores cache flushes. For requested number | |
355 | * of seconds nix the actual writing to disk. | |
356 | */ | |
357 | void | |
358 | zio_handle_ignored_writes(zio_t *zio) | |
359 | { | |
360 | inject_handler_t *handler; | |
361 | ||
362 | rw_enter(&inject_lock, RW_READER); | |
363 | ||
364 | for (handler = list_head(&inject_handlers); handler != NULL; | |
365 | handler = list_next(&inject_handlers, handler)) { | |
366 | ||
367 | /* Ignore errors not destined for this pool */ | |
368 | if (zio->io_spa != handler->zi_spa || | |
369 | handler->zi_record.zi_cmd != ZINJECT_IGNORED_WRITES) | |
370 | continue; | |
371 | ||
372 | /* | |
373 | * Positive duration implies # of seconds, negative | |
374 | * a number of txgs | |
375 | */ | |
376 | if (handler->zi_record.zi_timer == 0) { | |
377 | if (handler->zi_record.zi_duration > 0) | |
378 | handler->zi_record.zi_timer = ddi_get_lbolt64(); | |
379 | else | |
380 | handler->zi_record.zi_timer = zio->io_txg; | |
381 | } | |
382 | ||
383 | /* Have a "problem" writing 60% of the time */ | |
384 | if (spa_get_random(100) < 60) | |
385 | zio->io_pipeline &= ~ZIO_VDEV_IO_STAGES; | |
386 | break; | |
387 | } | |
388 | ||
389 | rw_exit(&inject_lock); | |
390 | } | |
391 | ||
392 | void | |
393 | spa_handle_ignored_writes(spa_t *spa) | |
394 | { | |
395 | inject_handler_t *handler; | |
396 | ||
397 | if (zio_injection_enabled == 0) | |
398 | return; | |
399 | ||
400 | rw_enter(&inject_lock, RW_READER); | |
401 | ||
402 | for (handler = list_head(&inject_handlers); handler != NULL; | |
403 | handler = list_next(&inject_handlers, handler)) { | |
404 | ||
405 | if (spa != handler->zi_spa || | |
406 | handler->zi_record.zi_cmd != ZINJECT_IGNORED_WRITES) | |
407 | continue; | |
408 | ||
409 | if (handler->zi_record.zi_duration > 0) { | |
410 | VERIFY(handler->zi_record.zi_timer == 0 || | |
411 | ddi_time_after64( | |
412 | (int64_t)handler->zi_record.zi_timer + | |
413 | handler->zi_record.zi_duration * hz, | |
414 | ddi_get_lbolt64())); | |
415 | } else { | |
416 | /* duration is negative so the subtraction here adds */ | |
417 | VERIFY(handler->zi_record.zi_timer == 0 || | |
418 | handler->zi_record.zi_timer - | |
419 | handler->zi_record.zi_duration >= | |
420 | spa_syncing_txg(spa)); | |
421 | } | |
422 | } | |
423 | ||
424 | rw_exit(&inject_lock); | |
425 | } | |
426 | ||
86e3c28a | 427 | hrtime_t |
70e083d2 TG |
428 | zio_handle_io_delay(zio_t *zio) |
429 | { | |
430 | vdev_t *vd = zio->io_vd; | |
86e3c28a CIK |
431 | inject_handler_t *min_handler = NULL; |
432 | hrtime_t min_target = 0; | |
70e083d2 | 433 | inject_handler_t *handler; |
86e3c28a CIK |
434 | hrtime_t idle; |
435 | hrtime_t busy; | |
436 | hrtime_t target; | |
70e083d2 TG |
437 | |
438 | rw_enter(&inject_lock, RW_READER); | |
439 | ||
86e3c28a CIK |
440 | /* |
441 | * inject_delay_count is a subset of zio_injection_enabled that | |
442 | * is only incremented for delay handlers. These checks are | |
443 | * mainly added to remind the reader why we're not explicitly | |
444 | * checking zio_injection_enabled like the other functions. | |
445 | */ | |
446 | IMPLY(inject_delay_count > 0, zio_injection_enabled > 0); | |
447 | IMPLY(zio_injection_enabled == 0, inject_delay_count == 0); | |
70e083d2 | 448 | |
86e3c28a CIK |
449 | /* |
450 | * If there aren't any inject delay handlers registered, then we | |
451 | * can short circuit and simply return 0 here. A value of zero | |
452 | * informs zio_delay_interrupt() that this request should not be | |
453 | * delayed. This short circuit keeps us from acquiring the | |
454 | * inject_delay_mutex unnecessarily. | |
455 | */ | |
456 | if (inject_delay_count == 0) { | |
457 | rw_exit(&inject_lock); | |
458 | return (0); | |
459 | } | |
460 | ||
461 | /* | |
462 | * Each inject handler has a number of "lanes" associated with | |
463 | * it. Each lane is able to handle requests independently of one | |
464 | * another, and at a latency defined by the inject handler | |
465 | * record's zi_timer field. Thus if a handler in configured with | |
466 | * a single lane with a 10ms latency, it will delay requests | |
467 | * such that only a single request is completed every 10ms. So, | |
468 | * if more than one request is attempted per each 10ms interval, | |
469 | * the average latency of the requests will be greater than | |
470 | * 10ms; but if only a single request is submitted each 10ms | |
471 | * interval the average latency will be 10ms. | |
472 | * | |
473 | * We need to acquire this mutex to prevent multiple concurrent | |
474 | * threads being assigned to the same lane of a given inject | |
475 | * handler. The mutex allows us to perform the following two | |
476 | * operations atomically: | |
477 | * | |
478 | * 1. determine the minimum handler and minimum target | |
479 | * value of all the possible handlers | |
480 | * 2. update that minimum handler's lane array | |
481 | * | |
482 | * Without atomicity, two (or more) threads could pick the same | |
483 | * lane in step (1), and then conflict with each other in step | |
484 | * (2). This could allow a single lane handler to process | |
485 | * multiple requests simultaneously, which shouldn't be possible. | |
486 | */ | |
487 | mutex_enter(&inject_delay_mtx); | |
488 | ||
489 | for (handler = list_head(&inject_handlers); | |
490 | handler != NULL; handler = list_next(&inject_handlers, handler)) { | |
70e083d2 TG |
491 | if (handler->zi_record.zi_cmd != ZINJECT_DELAY_IO) |
492 | continue; | |
493 | ||
86e3c28a CIK |
494 | if (!freq_triggered(handler->zi_record.zi_freq)) |
495 | continue; | |
496 | ||
497 | if (vd->vdev_guid != handler->zi_record.zi_guid) | |
498 | continue; | |
499 | ||
500 | /* | |
501 | * Defensive; should never happen as the array allocation | |
502 | * occurs prior to inserting this handler on the list. | |
503 | */ | |
504 | ASSERT3P(handler->zi_lanes, !=, NULL); | |
505 | ||
506 | /* | |
507 | * This should never happen, the zinject command should | |
508 | * prevent a user from setting an IO delay with zero lanes. | |
509 | */ | |
510 | ASSERT3U(handler->zi_record.zi_nlanes, !=, 0); | |
511 | ||
512 | ASSERT3U(handler->zi_record.zi_nlanes, >, | |
513 | handler->zi_next_lane); | |
514 | ||
515 | /* | |
516 | * We want to issue this IO to the lane that will become | |
517 | * idle the soonest, so we compare the soonest this | |
518 | * specific handler can complete the IO with all other | |
519 | * handlers, to find the lowest value of all possible | |
520 | * lanes. We then use this lane to submit the request. | |
521 | * | |
522 | * Since each handler has a constant value for its | |
523 | * delay, we can just use the "next" lane for that | |
524 | * handler; as it will always be the lane with the | |
525 | * lowest value for that particular handler (i.e. the | |
526 | * lane that will become idle the soonest). This saves a | |
527 | * scan of each handler's lanes array. | |
528 | * | |
529 | * There's two cases to consider when determining when | |
530 | * this specific IO request should complete. If this | |
531 | * lane is idle, we want to "submit" the request now so | |
532 | * it will complete after zi_timer milliseconds. Thus, | |
533 | * we set the target to now + zi_timer. | |
534 | * | |
535 | * If the lane is busy, we want this request to complete | |
536 | * zi_timer milliseconds after the lane becomes idle. | |
537 | * Since the 'zi_lanes' array holds the time at which | |
538 | * each lane will become idle, we use that value to | |
539 | * determine when this request should complete. | |
540 | */ | |
541 | idle = handler->zi_record.zi_timer + gethrtime(); | |
542 | busy = handler->zi_record.zi_timer + | |
543 | handler->zi_lanes[handler->zi_next_lane]; | |
544 | target = MAX(idle, busy); | |
545 | ||
546 | if (min_handler == NULL) { | |
547 | min_handler = handler; | |
548 | min_target = target; | |
549 | continue; | |
70e083d2 TG |
550 | } |
551 | ||
86e3c28a CIK |
552 | ASSERT3P(min_handler, !=, NULL); |
553 | ASSERT3U(min_target, !=, 0); | |
554 | ||
555 | /* | |
556 | * We don't yet increment the "next lane" variable since | |
557 | * we still might find a lower value lane in another | |
558 | * handler during any remaining iterations. Once we're | |
559 | * sure we've selected the absolute minimum, we'll claim | |
560 | * the lane and increment the handler's "next lane" | |
561 | * field below. | |
562 | */ | |
563 | ||
564 | if (target < min_target) { | |
565 | min_handler = handler; | |
566 | min_target = target; | |
567 | } | |
568 | } | |
569 | ||
570 | /* | |
571 | * 'min_handler' will be NULL if no IO delays are registered for | |
572 | * this vdev, otherwise it will point to the handler containing | |
573 | * the lane that will become idle the soonest. | |
574 | */ | |
575 | if (min_handler != NULL) { | |
576 | ASSERT3U(min_target, !=, 0); | |
577 | min_handler->zi_lanes[min_handler->zi_next_lane] = min_target; | |
578 | ||
579 | /* | |
580 | * If we've used all possible lanes for this handler, | |
581 | * loop back and start using the first lane again; | |
582 | * otherwise, just increment the lane index. | |
583 | */ | |
584 | min_handler->zi_next_lane = (min_handler->zi_next_lane + 1) % | |
585 | min_handler->zi_record.zi_nlanes; | |
70e083d2 | 586 | } |
86e3c28a CIK |
587 | |
588 | mutex_exit(&inject_delay_mtx); | |
70e083d2 | 589 | rw_exit(&inject_lock); |
86e3c28a CIK |
590 | |
591 | return (min_target); | |
70e083d2 TG |
592 | } |
593 | ||
594 | /* | |
595 | * Create a new handler for the given record. We add it to the list, adding | |
596 | * a reference to the spa_t in the process. We increment zio_injection_enabled, | |
597 | * which is the switch to trigger all fault injection. | |
598 | */ | |
599 | int | |
600 | zio_inject_fault(char *name, int flags, int *id, zinject_record_t *record) | |
601 | { | |
602 | inject_handler_t *handler; | |
603 | int error; | |
604 | spa_t *spa; | |
605 | ||
606 | /* | |
607 | * If this is pool-wide metadata, make sure we unload the corresponding | |
608 | * spa_t, so that the next attempt to load it will trigger the fault. | |
609 | * We call spa_reset() to unload the pool appropriately. | |
610 | */ | |
611 | if (flags & ZINJECT_UNLOAD_SPA) | |
612 | if ((error = spa_reset(name)) != 0) | |
613 | return (error); | |
614 | ||
86e3c28a CIK |
615 | if (record->zi_cmd == ZINJECT_DELAY_IO) { |
616 | /* | |
617 | * A value of zero for the number of lanes or for the | |
618 | * delay time doesn't make sense. | |
619 | */ | |
620 | if (record->zi_timer == 0 || record->zi_nlanes == 0) | |
621 | return (SET_ERROR(EINVAL)); | |
622 | ||
623 | /* | |
624 | * The number of lanes is directly mapped to the size of | |
625 | * an array used by the handler. Thus, to ensure the | |
626 | * user doesn't trigger an allocation that's "too large" | |
627 | * we cap the number of lanes here. | |
628 | */ | |
629 | if (record->zi_nlanes >= UINT16_MAX) | |
630 | return (SET_ERROR(EINVAL)); | |
631 | } | |
632 | ||
70e083d2 TG |
633 | if (!(flags & ZINJECT_NULL)) { |
634 | /* | |
635 | * spa_inject_ref() will add an injection reference, which will | |
636 | * prevent the pool from being removed from the namespace while | |
637 | * still allowing it to be unloaded. | |
638 | */ | |
639 | if ((spa = spa_inject_addref(name)) == NULL) | |
640 | return (SET_ERROR(ENOENT)); | |
641 | ||
642 | handler = kmem_alloc(sizeof (inject_handler_t), KM_SLEEP); | |
643 | ||
86e3c28a CIK |
644 | handler->zi_spa = spa; |
645 | handler->zi_record = *record; | |
646 | ||
647 | if (handler->zi_record.zi_cmd == ZINJECT_DELAY_IO) { | |
648 | handler->zi_lanes = kmem_zalloc( | |
649 | sizeof (*handler->zi_lanes) * | |
650 | handler->zi_record.zi_nlanes, KM_SLEEP); | |
651 | handler->zi_next_lane = 0; | |
652 | } else { | |
653 | handler->zi_lanes = NULL; | |
654 | handler->zi_next_lane = 0; | |
655 | } | |
656 | ||
70e083d2 TG |
657 | rw_enter(&inject_lock, RW_WRITER); |
658 | ||
86e3c28a CIK |
659 | /* |
660 | * We can't move this increment into the conditional | |
661 | * above because we need to hold the RW_WRITER lock of | |
662 | * inject_lock, and we don't want to hold that while | |
663 | * allocating the handler's zi_lanes array. | |
664 | */ | |
665 | if (handler->zi_record.zi_cmd == ZINJECT_DELAY_IO) { | |
666 | ASSERT3S(inject_delay_count, >=, 0); | |
667 | inject_delay_count++; | |
668 | ASSERT3S(inject_delay_count, >, 0); | |
669 | } | |
670 | ||
70e083d2 | 671 | *id = handler->zi_id = inject_next_id++; |
70e083d2 | 672 | list_insert_tail(&inject_handlers, handler); |
86e3c28a | 673 | atomic_inc_32(&zio_injection_enabled); |
70e083d2 TG |
674 | |
675 | rw_exit(&inject_lock); | |
676 | } | |
677 | ||
678 | /* | |
679 | * Flush the ARC, so that any attempts to read this data will end up | |
680 | * going to the ZIO layer. Note that this is a little overkill, but | |
681 | * we don't have the necessary ARC interfaces to do anything else, and | |
682 | * fault injection isn't a performance critical path. | |
683 | */ | |
684 | if (flags & ZINJECT_FLUSH_ARC) | |
685 | /* | |
686 | * We must use FALSE to ensure arc_flush returns, since | |
687 | * we're not preventing concurrent ARC insertions. | |
688 | */ | |
689 | arc_flush(NULL, FALSE); | |
690 | ||
691 | return (0); | |
692 | } | |
693 | ||
694 | /* | |
695 | * Returns the next record with an ID greater than that supplied to the | |
696 | * function. Used to iterate over all handlers in the system. | |
697 | */ | |
698 | int | |
699 | zio_inject_list_next(int *id, char *name, size_t buflen, | |
700 | zinject_record_t *record) | |
701 | { | |
702 | inject_handler_t *handler; | |
703 | int ret; | |
704 | ||
705 | mutex_enter(&spa_namespace_lock); | |
706 | rw_enter(&inject_lock, RW_READER); | |
707 | ||
708 | for (handler = list_head(&inject_handlers); handler != NULL; | |
709 | handler = list_next(&inject_handlers, handler)) | |
710 | if (handler->zi_id > *id) | |
711 | break; | |
712 | ||
713 | if (handler) { | |
714 | *record = handler->zi_record; | |
715 | *id = handler->zi_id; | |
716 | (void) strncpy(name, spa_name(handler->zi_spa), buflen); | |
717 | ret = 0; | |
718 | } else { | |
719 | ret = SET_ERROR(ENOENT); | |
720 | } | |
721 | ||
722 | rw_exit(&inject_lock); | |
723 | mutex_exit(&spa_namespace_lock); | |
724 | ||
725 | return (ret); | |
726 | } | |
727 | ||
728 | /* | |
729 | * Clear the fault handler with the given identifier, or return ENOENT if none | |
730 | * exists. | |
731 | */ | |
732 | int | |
733 | zio_clear_fault(int id) | |
734 | { | |
735 | inject_handler_t *handler; | |
736 | ||
737 | rw_enter(&inject_lock, RW_WRITER); | |
738 | ||
739 | for (handler = list_head(&inject_handlers); handler != NULL; | |
740 | handler = list_next(&inject_handlers, handler)) | |
741 | if (handler->zi_id == id) | |
742 | break; | |
743 | ||
744 | if (handler == NULL) { | |
745 | rw_exit(&inject_lock); | |
746 | return (SET_ERROR(ENOENT)); | |
747 | } | |
748 | ||
86e3c28a CIK |
749 | if (handler->zi_record.zi_cmd == ZINJECT_DELAY_IO) { |
750 | ASSERT3S(inject_delay_count, >, 0); | |
751 | inject_delay_count--; | |
752 | ASSERT3S(inject_delay_count, >=, 0); | |
753 | } | |
754 | ||
70e083d2 TG |
755 | list_remove(&inject_handlers, handler); |
756 | rw_exit(&inject_lock); | |
757 | ||
86e3c28a CIK |
758 | if (handler->zi_record.zi_cmd == ZINJECT_DELAY_IO) { |
759 | ASSERT3P(handler->zi_lanes, !=, NULL); | |
760 | kmem_free(handler->zi_lanes, sizeof (*handler->zi_lanes) * | |
761 | handler->zi_record.zi_nlanes); | |
762 | } else { | |
763 | ASSERT3P(handler->zi_lanes, ==, NULL); | |
764 | } | |
765 | ||
70e083d2 TG |
766 | spa_inject_delref(handler->zi_spa); |
767 | kmem_free(handler, sizeof (inject_handler_t)); | |
86e3c28a | 768 | atomic_dec_32(&zio_injection_enabled); |
70e083d2 TG |
769 | |
770 | return (0); | |
771 | } | |
772 | ||
773 | void | |
774 | zio_inject_init(void) | |
775 | { | |
776 | rw_init(&inject_lock, NULL, RW_DEFAULT, NULL); | |
86e3c28a | 777 | mutex_init(&inject_delay_mtx, NULL, MUTEX_DEFAULT, NULL); |
70e083d2 TG |
778 | list_create(&inject_handlers, sizeof (inject_handler_t), |
779 | offsetof(inject_handler_t, zi_link)); | |
780 | } | |
781 | ||
782 | void | |
783 | zio_inject_fini(void) | |
784 | { | |
785 | list_destroy(&inject_handlers); | |
86e3c28a | 786 | mutex_destroy(&inject_delay_mtx); |
70e083d2 TG |
787 | rw_destroy(&inject_lock); |
788 | } | |
789 | ||
790 | #if defined(_KERNEL) && defined(HAVE_SPL) | |
791 | EXPORT_SYMBOL(zio_injection_enabled); | |
792 | EXPORT_SYMBOL(zio_inject_fault); | |
793 | EXPORT_SYMBOL(zio_inject_list_next); | |
794 | EXPORT_SYMBOL(zio_clear_fault); | |
795 | EXPORT_SYMBOL(zio_handle_fault_injection); | |
796 | EXPORT_SYMBOL(zio_handle_device_injection); | |
797 | EXPORT_SYMBOL(zio_handle_label_injection); | |
798 | #endif |