]> git.proxmox.com Git - mirror_zfs.git/blob - module/zfs/vdev_mirror.c
OpenZFS 9102 - zfs should be able to initialize storage devices
[mirror_zfs.git] / module / zfs / vdev_mirror.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
28 */
29
30 #include <sys/zfs_context.h>
31 #include <sys/spa.h>
32 #include <sys/vdev_impl.h>
33 #include <sys/zio.h>
34 #include <sys/abd.h>
35 #include <sys/fs/zfs.h>
36
37 /*
38 * Vdev mirror kstats
39 */
40 static kstat_t *mirror_ksp = NULL;
41
42 typedef struct mirror_stats {
43 kstat_named_t vdev_mirror_stat_rotating_linear;
44 kstat_named_t vdev_mirror_stat_rotating_offset;
45 kstat_named_t vdev_mirror_stat_rotating_seek;
46 kstat_named_t vdev_mirror_stat_non_rotating_linear;
47 kstat_named_t vdev_mirror_stat_non_rotating_seek;
48
49 kstat_named_t vdev_mirror_stat_preferred_found;
50 kstat_named_t vdev_mirror_stat_preferred_not_found;
51 } mirror_stats_t;
52
53 static mirror_stats_t mirror_stats = {
54 /* New I/O follows directly the last I/O */
55 { "rotating_linear", KSTAT_DATA_UINT64 },
56 /* New I/O is within zfs_vdev_mirror_rotating_seek_offset of the last */
57 { "rotating_offset", KSTAT_DATA_UINT64 },
58 /* New I/O requires random seek */
59 { "rotating_seek", KSTAT_DATA_UINT64 },
60 /* New I/O follows directly the last I/O (nonrot) */
61 { "non_rotating_linear", KSTAT_DATA_UINT64 },
62 /* New I/O requires random seek (nonrot) */
63 { "non_rotating_seek", KSTAT_DATA_UINT64 },
64 /* Preferred child vdev found */
65 { "preferred_found", KSTAT_DATA_UINT64 },
66 /* Preferred child vdev not found or equal load */
67 { "preferred_not_found", KSTAT_DATA_UINT64 },
68
69 };
70
71 #define MIRROR_STAT(stat) (mirror_stats.stat.value.ui64)
72 #define MIRROR_INCR(stat, val) atomic_add_64(&MIRROR_STAT(stat), val)
73 #define MIRROR_BUMP(stat) MIRROR_INCR(stat, 1)
74
75 void
76 vdev_mirror_stat_init(void)
77 {
78 mirror_ksp = kstat_create("zfs", 0, "vdev_mirror_stats",
79 "misc", KSTAT_TYPE_NAMED,
80 sizeof (mirror_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
81 if (mirror_ksp != NULL) {
82 mirror_ksp->ks_data = &mirror_stats;
83 kstat_install(mirror_ksp);
84 }
85 }
86
87 void
88 vdev_mirror_stat_fini(void)
89 {
90 if (mirror_ksp != NULL) {
91 kstat_delete(mirror_ksp);
92 mirror_ksp = NULL;
93 }
94 }
95
96 /*
97 * Virtual device vector for mirroring.
98 */
99
100 typedef struct mirror_child {
101 vdev_t *mc_vd;
102 uint64_t mc_offset;
103 int mc_error;
104 int mc_load;
105 uint8_t mc_tried;
106 uint8_t mc_skipped;
107 uint8_t mc_speculative;
108 } mirror_child_t;
109
110 typedef struct mirror_map {
111 int *mm_preferred;
112 int mm_preferred_cnt;
113 int mm_children;
114 boolean_t mm_replacing;
115 boolean_t mm_root;
116 mirror_child_t mm_child[];
117 } mirror_map_t;
118
119 static int vdev_mirror_shift = 21;
120
121 /*
122 * The load configuration settings below are tuned by default for
123 * the case where all devices are of the same rotational type.
124 *
125 * If there is a mixture of rotating and non-rotating media, setting
126 * zfs_vdev_mirror_non_rotating_seek_inc to 0 may well provide better results
127 * as it will direct more reads to the non-rotating vdevs which are more likely
128 * to have a higher performance.
129 */
130
131 /* Rotating media load calculation configuration. */
132 static int zfs_vdev_mirror_rotating_inc = 0;
133 static int zfs_vdev_mirror_rotating_seek_inc = 5;
134 static int zfs_vdev_mirror_rotating_seek_offset = 1 * 1024 * 1024;
135
136 /* Non-rotating media load calculation configuration. */
137 static int zfs_vdev_mirror_non_rotating_inc = 0;
138 static int zfs_vdev_mirror_non_rotating_seek_inc = 1;
139
140 static inline size_t
141 vdev_mirror_map_size(int children)
142 {
143 return (offsetof(mirror_map_t, mm_child[children]) +
144 sizeof (int) * children);
145 }
146
147 static inline mirror_map_t *
148 vdev_mirror_map_alloc(int children, boolean_t replacing, boolean_t root)
149 {
150 mirror_map_t *mm;
151
152 mm = kmem_zalloc(vdev_mirror_map_size(children), KM_SLEEP);
153 mm->mm_children = children;
154 mm->mm_replacing = replacing;
155 mm->mm_root = root;
156 mm->mm_preferred = (int *)((uintptr_t)mm +
157 offsetof(mirror_map_t, mm_child[children]));
158
159 return (mm);
160 }
161
162 static void
163 vdev_mirror_map_free(zio_t *zio)
164 {
165 mirror_map_t *mm = zio->io_vsd;
166
167 kmem_free(mm, vdev_mirror_map_size(mm->mm_children));
168 }
169
170 static const zio_vsd_ops_t vdev_mirror_vsd_ops = {
171 .vsd_free = vdev_mirror_map_free,
172 .vsd_cksum_report = zio_vsd_default_cksum_report
173 };
174
175 static int
176 vdev_mirror_load(mirror_map_t *mm, vdev_t *vd, uint64_t zio_offset)
177 {
178 uint64_t last_offset;
179 int64_t offset_diff;
180 int load;
181
182 /* All DVAs have equal weight at the root. */
183 if (mm->mm_root)
184 return (INT_MAX);
185
186 /*
187 * We don't return INT_MAX if the device is resilvering i.e.
188 * vdev_resilver_txg != 0 as when tested performance was slightly
189 * worse overall when resilvering with compared to without.
190 */
191
192 /* Fix zio_offset for leaf vdevs */
193 if (vd->vdev_ops->vdev_op_leaf)
194 zio_offset += VDEV_LABEL_START_SIZE;
195
196 /* Standard load based on pending queue length. */
197 load = vdev_queue_length(vd);
198 last_offset = vdev_queue_last_offset(vd);
199
200 if (vd->vdev_nonrot) {
201 /* Non-rotating media. */
202 if (last_offset == zio_offset) {
203 MIRROR_BUMP(vdev_mirror_stat_non_rotating_linear);
204 return (load + zfs_vdev_mirror_non_rotating_inc);
205 }
206
207 /*
208 * Apply a seek penalty even for non-rotating devices as
209 * sequential I/O's can be aggregated into fewer operations on
210 * the device, thus avoiding unnecessary per-command overhead
211 * and boosting performance.
212 */
213 MIRROR_BUMP(vdev_mirror_stat_non_rotating_seek);
214 return (load + zfs_vdev_mirror_non_rotating_seek_inc);
215 }
216
217 /* Rotating media I/O's which directly follow the last I/O. */
218 if (last_offset == zio_offset) {
219 MIRROR_BUMP(vdev_mirror_stat_rotating_linear);
220 return (load + zfs_vdev_mirror_rotating_inc);
221 }
222
223 /*
224 * Apply half the seek increment to I/O's within seek offset
225 * of the last I/O issued to this vdev as they should incur less
226 * of a seek increment.
227 */
228 offset_diff = (int64_t)(last_offset - zio_offset);
229 if (ABS(offset_diff) < zfs_vdev_mirror_rotating_seek_offset) {
230 MIRROR_BUMP(vdev_mirror_stat_rotating_offset);
231 return (load + (zfs_vdev_mirror_rotating_seek_inc / 2));
232 }
233
234 /* Apply the full seek increment to all other I/O's. */
235 MIRROR_BUMP(vdev_mirror_stat_rotating_seek);
236 return (load + zfs_vdev_mirror_rotating_seek_inc);
237 }
238
239 /*
240 * Avoid inlining the function to keep vdev_mirror_io_start(), which
241 * is this functions only caller, as small as possible on the stack.
242 */
243 noinline static mirror_map_t *
244 vdev_mirror_map_init(zio_t *zio)
245 {
246 mirror_map_t *mm = NULL;
247 mirror_child_t *mc;
248 vdev_t *vd = zio->io_vd;
249 int c;
250
251 if (vd == NULL) {
252 dva_t *dva = zio->io_bp->blk_dva;
253 spa_t *spa = zio->io_spa;
254 dva_t dva_copy[SPA_DVAS_PER_BP];
255
256 c = BP_GET_NDVAS(zio->io_bp);
257
258 /*
259 * If we do not trust the pool config, some DVAs might be
260 * invalid or point to vdevs that do not exist. We skip them.
261 */
262 if (!spa_trust_config(spa)) {
263 ASSERT3U(zio->io_type, ==, ZIO_TYPE_READ);
264 int j = 0;
265 for (int i = 0; i < c; i++) {
266 if (zfs_dva_valid(spa, &dva[i], zio->io_bp))
267 dva_copy[j++] = dva[i];
268 }
269 if (j == 0) {
270 zio->io_vsd = NULL;
271 zio->io_error = ENXIO;
272 return (NULL);
273 }
274 if (j < c) {
275 dva = dva_copy;
276 c = j;
277 }
278 }
279
280 mm = vdev_mirror_map_alloc(c, B_FALSE, B_TRUE);
281 for (c = 0; c < mm->mm_children; c++) {
282 mc = &mm->mm_child[c];
283
284 mc->mc_vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[c]));
285 mc->mc_offset = DVA_GET_OFFSET(&dva[c]);
286 }
287 } else {
288 mm = vdev_mirror_map_alloc(vd->vdev_children,
289 (vd->vdev_ops == &vdev_replacing_ops ||
290 vd->vdev_ops == &vdev_spare_ops), B_FALSE);
291 for (c = 0; c < mm->mm_children; c++) {
292 mc = &mm->mm_child[c];
293 mc->mc_vd = vd->vdev_child[c];
294 mc->mc_offset = zio->io_offset;
295 }
296 }
297
298 zio->io_vsd = mm;
299 zio->io_vsd_ops = &vdev_mirror_vsd_ops;
300 return (mm);
301 }
302
303 static int
304 vdev_mirror_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize,
305 uint64_t *ashift)
306 {
307 int numerrors = 0;
308 int lasterror = 0;
309
310 if (vd->vdev_children == 0) {
311 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
312 return (SET_ERROR(EINVAL));
313 }
314
315 vdev_open_children(vd);
316
317 for (int c = 0; c < vd->vdev_children; c++) {
318 vdev_t *cvd = vd->vdev_child[c];
319
320 if (cvd->vdev_open_error) {
321 lasterror = cvd->vdev_open_error;
322 numerrors++;
323 continue;
324 }
325
326 *asize = MIN(*asize - 1, cvd->vdev_asize - 1) + 1;
327 *max_asize = MIN(*max_asize - 1, cvd->vdev_max_asize - 1) + 1;
328 *ashift = MAX(*ashift, cvd->vdev_ashift);
329 }
330
331 if (numerrors == vd->vdev_children) {
332 if (vdev_children_are_offline(vd))
333 vd->vdev_stat.vs_aux = VDEV_AUX_CHILDREN_OFFLINE;
334 else
335 vd->vdev_stat.vs_aux = VDEV_AUX_NO_REPLICAS;
336 return (lasterror);
337 }
338
339 return (0);
340 }
341
342 static void
343 vdev_mirror_close(vdev_t *vd)
344 {
345 for (int c = 0; c < vd->vdev_children; c++)
346 vdev_close(vd->vdev_child[c]);
347 }
348
349 static void
350 vdev_mirror_child_done(zio_t *zio)
351 {
352 mirror_child_t *mc = zio->io_private;
353
354 mc->mc_error = zio->io_error;
355 mc->mc_tried = 1;
356 mc->mc_skipped = 0;
357 }
358
359 static void
360 vdev_mirror_scrub_done(zio_t *zio)
361 {
362 mirror_child_t *mc = zio->io_private;
363
364 if (zio->io_error == 0) {
365 zio_t *pio;
366 zio_link_t *zl = NULL;
367
368 mutex_enter(&zio->io_lock);
369 while ((pio = zio_walk_parents(zio, &zl)) != NULL) {
370 mutex_enter(&pio->io_lock);
371 ASSERT3U(zio->io_size, >=, pio->io_size);
372 abd_copy(pio->io_abd, zio->io_abd, pio->io_size);
373 mutex_exit(&pio->io_lock);
374 }
375 mutex_exit(&zio->io_lock);
376 }
377
378 abd_free(zio->io_abd);
379
380 mc->mc_error = zio->io_error;
381 mc->mc_tried = 1;
382 mc->mc_skipped = 0;
383 }
384
385 /*
386 * Check the other, lower-index DVAs to see if they're on the same
387 * vdev as the child we picked. If they are, use them since they
388 * are likely to have been allocated from the primary metaslab in
389 * use at the time, and hence are more likely to have locality with
390 * single-copy data.
391 */
392 static int
393 vdev_mirror_dva_select(zio_t *zio, int p)
394 {
395 dva_t *dva = zio->io_bp->blk_dva;
396 mirror_map_t *mm = zio->io_vsd;
397 int preferred;
398 int c;
399
400 preferred = mm->mm_preferred[p];
401 for (p--; p >= 0; p--) {
402 c = mm->mm_preferred[p];
403 if (DVA_GET_VDEV(&dva[c]) == DVA_GET_VDEV(&dva[preferred]))
404 preferred = c;
405 }
406 return (preferred);
407 }
408
409 static int
410 vdev_mirror_preferred_child_randomize(zio_t *zio)
411 {
412 mirror_map_t *mm = zio->io_vsd;
413 int p;
414
415 if (mm->mm_root) {
416 p = spa_get_random(mm->mm_preferred_cnt);
417 return (vdev_mirror_dva_select(zio, p));
418 }
419
420 /*
421 * To ensure we don't always favour the first matching vdev,
422 * which could lead to wear leveling issues on SSD's, we
423 * use the I/O offset as a pseudo random seed into the vdevs
424 * which have the lowest load.
425 */
426 p = (zio->io_offset >> vdev_mirror_shift) % mm->mm_preferred_cnt;
427 return (mm->mm_preferred[p]);
428 }
429
430 /*
431 * Try to find a vdev whose DTL doesn't contain the block we want to read
432 * prefering vdevs based on determined load.
433 *
434 * Try to find a child whose DTL doesn't contain the block we want to read.
435 * If we can't, try the read on any vdev we haven't already tried.
436 */
437 static int
438 vdev_mirror_child_select(zio_t *zio)
439 {
440 mirror_map_t *mm = zio->io_vsd;
441 uint64_t txg = zio->io_txg;
442 int c, lowest_load;
443
444 ASSERT(zio->io_bp == NULL || BP_PHYSICAL_BIRTH(zio->io_bp) == txg);
445
446 lowest_load = INT_MAX;
447 mm->mm_preferred_cnt = 0;
448 for (c = 0; c < mm->mm_children; c++) {
449 mirror_child_t *mc;
450
451 mc = &mm->mm_child[c];
452 if (mc->mc_tried || mc->mc_skipped)
453 continue;
454
455 if (mc->mc_vd == NULL || !vdev_readable(mc->mc_vd)) {
456 mc->mc_error = SET_ERROR(ENXIO);
457 mc->mc_tried = 1; /* don't even try */
458 mc->mc_skipped = 1;
459 continue;
460 }
461
462 if (vdev_dtl_contains(mc->mc_vd, DTL_MISSING, txg, 1)) {
463 mc->mc_error = SET_ERROR(ESTALE);
464 mc->mc_skipped = 1;
465 mc->mc_speculative = 1;
466 continue;
467 }
468
469 mc->mc_load = vdev_mirror_load(mm, mc->mc_vd, mc->mc_offset);
470 if (mc->mc_load > lowest_load)
471 continue;
472
473 if (mc->mc_load < lowest_load) {
474 lowest_load = mc->mc_load;
475 mm->mm_preferred_cnt = 0;
476 }
477 mm->mm_preferred[mm->mm_preferred_cnt] = c;
478 mm->mm_preferred_cnt++;
479 }
480
481 if (mm->mm_preferred_cnt == 1) {
482 MIRROR_BUMP(vdev_mirror_stat_preferred_found);
483 return (mm->mm_preferred[0]);
484 }
485
486 if (mm->mm_preferred_cnt > 1) {
487 MIRROR_BUMP(vdev_mirror_stat_preferred_not_found);
488 return (vdev_mirror_preferred_child_randomize(zio));
489 }
490
491 /*
492 * Every device is either missing or has this txg in its DTL.
493 * Look for any child we haven't already tried before giving up.
494 */
495 for (c = 0; c < mm->mm_children; c++) {
496 if (!mm->mm_child[c].mc_tried)
497 return (c);
498 }
499
500 /*
501 * Every child failed. There's no place left to look.
502 */
503 return (-1);
504 }
505
506 static void
507 vdev_mirror_io_start(zio_t *zio)
508 {
509 mirror_map_t *mm;
510 mirror_child_t *mc;
511 int c, children;
512
513 mm = vdev_mirror_map_init(zio);
514
515 if (mm == NULL) {
516 ASSERT(!spa_trust_config(zio->io_spa));
517 ASSERT(zio->io_type == ZIO_TYPE_READ);
518 zio_execute(zio);
519 return;
520 }
521
522 if (zio->io_type == ZIO_TYPE_READ) {
523 if (zio->io_bp != NULL &&
524 (zio->io_flags & ZIO_FLAG_SCRUB) && !mm->mm_replacing) {
525 /*
526 * For scrubbing reads (if we can verify the
527 * checksum here, as indicated by io_bp being
528 * non-NULL) we need to allocate a read buffer for
529 * each child and issue reads to all children. If
530 * any child succeeds, it will copy its data into
531 * zio->io_data in vdev_mirror_scrub_done.
532 */
533 for (c = 0; c < mm->mm_children; c++) {
534 mc = &mm->mm_child[c];
535 zio_nowait(zio_vdev_child_io(zio, zio->io_bp,
536 mc->mc_vd, mc->mc_offset,
537 abd_alloc_sametype(zio->io_abd,
538 zio->io_size), zio->io_size,
539 zio->io_type, zio->io_priority, 0,
540 vdev_mirror_scrub_done, mc));
541 }
542 zio_execute(zio);
543 return;
544 }
545 /*
546 * For normal reads just pick one child.
547 */
548 c = vdev_mirror_child_select(zio);
549 children = (c >= 0);
550 } else {
551 ASSERT(zio->io_type == ZIO_TYPE_WRITE);
552
553 /*
554 * Writes go to all children.
555 */
556 c = 0;
557 children = mm->mm_children;
558 }
559
560 while (children--) {
561 mc = &mm->mm_child[c];
562 zio_nowait(zio_vdev_child_io(zio, zio->io_bp,
563 mc->mc_vd, mc->mc_offset, zio->io_abd, zio->io_size,
564 zio->io_type, zio->io_priority, 0,
565 vdev_mirror_child_done, mc));
566 c++;
567 }
568
569 zio_execute(zio);
570 }
571
572 static int
573 vdev_mirror_worst_error(mirror_map_t *mm)
574 {
575 int error[2] = { 0, 0 };
576
577 for (int c = 0; c < mm->mm_children; c++) {
578 mirror_child_t *mc = &mm->mm_child[c];
579 int s = mc->mc_speculative;
580 error[s] = zio_worst_error(error[s], mc->mc_error);
581 }
582
583 return (error[0] ? error[0] : error[1]);
584 }
585
586 static void
587 vdev_mirror_io_done(zio_t *zio)
588 {
589 mirror_map_t *mm = zio->io_vsd;
590 mirror_child_t *mc;
591 int c;
592 int good_copies = 0;
593 int unexpected_errors = 0;
594
595 if (mm == NULL)
596 return;
597
598 for (c = 0; c < mm->mm_children; c++) {
599 mc = &mm->mm_child[c];
600
601 if (mc->mc_error) {
602 if (!mc->mc_skipped)
603 unexpected_errors++;
604 } else if (mc->mc_tried) {
605 good_copies++;
606 }
607 }
608
609 if (zio->io_type == ZIO_TYPE_WRITE) {
610 /*
611 * XXX -- for now, treat partial writes as success.
612 *
613 * Now that we support write reallocation, it would be better
614 * to treat partial failure as real failure unless there are
615 * no non-degraded top-level vdevs left, and not update DTLs
616 * if we intend to reallocate.
617 */
618 /* XXPOLICY */
619 if (good_copies != mm->mm_children) {
620 /*
621 * Always require at least one good copy.
622 *
623 * For ditto blocks (io_vd == NULL), require
624 * all copies to be good.
625 *
626 * XXX -- for replacing vdevs, there's no great answer.
627 * If the old device is really dead, we may not even
628 * be able to access it -- so we only want to
629 * require good writes to the new device. But if
630 * the new device turns out to be flaky, we want
631 * to be able to detach it -- which requires all
632 * writes to the old device to have succeeded.
633 */
634 if (good_copies == 0 || zio->io_vd == NULL)
635 zio->io_error = vdev_mirror_worst_error(mm);
636 }
637 return;
638 }
639
640 ASSERT(zio->io_type == ZIO_TYPE_READ);
641
642 /*
643 * If we don't have a good copy yet, keep trying other children.
644 */
645 /* XXPOLICY */
646 if (good_copies == 0 && (c = vdev_mirror_child_select(zio)) != -1) {
647 ASSERT(c >= 0 && c < mm->mm_children);
648 mc = &mm->mm_child[c];
649 zio_vdev_io_redone(zio);
650 zio_nowait(zio_vdev_child_io(zio, zio->io_bp,
651 mc->mc_vd, mc->mc_offset, zio->io_abd, zio->io_size,
652 ZIO_TYPE_READ, zio->io_priority, 0,
653 vdev_mirror_child_done, mc));
654 return;
655 }
656
657 /* XXPOLICY */
658 if (good_copies == 0) {
659 zio->io_error = vdev_mirror_worst_error(mm);
660 ASSERT(zio->io_error != 0);
661 }
662
663 if (good_copies && spa_writeable(zio->io_spa) &&
664 (unexpected_errors ||
665 (zio->io_flags & ZIO_FLAG_RESILVER) ||
666 ((zio->io_flags & ZIO_FLAG_SCRUB) && mm->mm_replacing))) {
667 /*
668 * Use the good data we have in hand to repair damaged children.
669 */
670 for (c = 0; c < mm->mm_children; c++) {
671 /*
672 * Don't rewrite known good children.
673 * Not only is it unnecessary, it could
674 * actually be harmful: if the system lost
675 * power while rewriting the only good copy,
676 * there would be no good copies left!
677 */
678 mc = &mm->mm_child[c];
679
680 if (mc->mc_error == 0) {
681 if (mc->mc_tried)
682 continue;
683 /*
684 * We didn't try this child. We need to
685 * repair it if:
686 * 1. it's a scrub (in which case we have
687 * tried everything that was healthy)
688 * - or -
689 * 2. it's an indirect vdev (in which case
690 * it could point to any other vdev, which
691 * might have a bad DTL)
692 * - or -
693 * 3. the DTL indicates that this data is
694 * missing from this vdev
695 */
696 if (!(zio->io_flags & ZIO_FLAG_SCRUB) &&
697 mc->mc_vd->vdev_ops != &vdev_indirect_ops &&
698 !vdev_dtl_contains(mc->mc_vd, DTL_PARTIAL,
699 zio->io_txg, 1))
700 continue;
701 mc->mc_error = SET_ERROR(ESTALE);
702 }
703
704 zio_nowait(zio_vdev_child_io(zio, zio->io_bp,
705 mc->mc_vd, mc->mc_offset,
706 zio->io_abd, zio->io_size,
707 ZIO_TYPE_WRITE, ZIO_PRIORITY_ASYNC_WRITE,
708 ZIO_FLAG_IO_REPAIR | (unexpected_errors ?
709 ZIO_FLAG_SELF_HEAL : 0), NULL, NULL));
710 }
711 }
712 }
713
714 static void
715 vdev_mirror_state_change(vdev_t *vd, int faulted, int degraded)
716 {
717 if (faulted == vd->vdev_children) {
718 if (vdev_children_are_offline(vd)) {
719 vdev_set_state(vd, B_FALSE, VDEV_STATE_OFFLINE,
720 VDEV_AUX_CHILDREN_OFFLINE);
721 } else {
722 vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
723 VDEV_AUX_NO_REPLICAS);
724 }
725 } else if (degraded + faulted != 0) {
726 vdev_set_state(vd, B_FALSE, VDEV_STATE_DEGRADED, VDEV_AUX_NONE);
727 } else {
728 vdev_set_state(vd, B_FALSE, VDEV_STATE_HEALTHY, VDEV_AUX_NONE);
729 }
730 }
731
732 vdev_ops_t vdev_mirror_ops = {
733 vdev_mirror_open,
734 vdev_mirror_close,
735 vdev_default_asize,
736 vdev_mirror_io_start,
737 vdev_mirror_io_done,
738 vdev_mirror_state_change,
739 NULL,
740 NULL,
741 NULL,
742 NULL,
743 vdev_default_xlate,
744 VDEV_TYPE_MIRROR, /* name of this vdev type */
745 B_FALSE /* not a leaf vdev */
746 };
747
748 vdev_ops_t vdev_replacing_ops = {
749 vdev_mirror_open,
750 vdev_mirror_close,
751 vdev_default_asize,
752 vdev_mirror_io_start,
753 vdev_mirror_io_done,
754 vdev_mirror_state_change,
755 NULL,
756 NULL,
757 NULL,
758 NULL,
759 vdev_default_xlate,
760 VDEV_TYPE_REPLACING, /* name of this vdev type */
761 B_FALSE /* not a leaf vdev */
762 };
763
764 vdev_ops_t vdev_spare_ops = {
765 vdev_mirror_open,
766 vdev_mirror_close,
767 vdev_default_asize,
768 vdev_mirror_io_start,
769 vdev_mirror_io_done,
770 vdev_mirror_state_change,
771 NULL,
772 NULL,
773 NULL,
774 NULL,
775 vdev_default_xlate,
776 VDEV_TYPE_SPARE, /* name of this vdev type */
777 B_FALSE /* not a leaf vdev */
778 };
779
780 #if defined(_KERNEL)
781 /* BEGIN CSTYLED */
782 module_param(zfs_vdev_mirror_rotating_inc, int, 0644);
783 MODULE_PARM_DESC(zfs_vdev_mirror_rotating_inc,
784 "Rotating media load increment for non-seeking I/O's");
785
786 module_param(zfs_vdev_mirror_rotating_seek_inc, int, 0644);
787 MODULE_PARM_DESC(zfs_vdev_mirror_rotating_seek_inc,
788 "Rotating media load increment for seeking I/O's");
789
790 module_param(zfs_vdev_mirror_rotating_seek_offset, int, 0644);
791
792 MODULE_PARM_DESC(zfs_vdev_mirror_rotating_seek_offset,
793 "Offset in bytes from the last I/O which "
794 "triggers a reduced rotating media seek increment");
795
796 module_param(zfs_vdev_mirror_non_rotating_inc, int, 0644);
797 MODULE_PARM_DESC(zfs_vdev_mirror_non_rotating_inc,
798 "Non-rotating media load increment for non-seeking I/O's");
799
800 module_param(zfs_vdev_mirror_non_rotating_seek_inc, int, 0644);
801 MODULE_PARM_DESC(zfs_vdev_mirror_non_rotating_seek_inc,
802 "Non-rotating media load increment for seeking I/O's");
803 /* END CSTYLED */
804 #endif