uint64_t zc_ena;
uint64_t zc_pool_guid;
uint64_t zc_vdev_guid;
+ uint64_t zc_parent_guid;
int zc_pool_state;
char zc_serd_checksum[MAX_SERDLEN];
char zc_serd_io[MAX_SERDLEN];
}
/*
- * count other unique slow-io cases in a pool
+ * Return count of other unique SERD cases under same vdev parent
*/
static uint_t
-zfs_other_slow_cases(fmd_hdl_t *hdl, const zfs_case_data_t *zfs_case)
+zfs_other_serd_cases(fmd_hdl_t *hdl, const zfs_case_data_t *zfs_case)
{
zfs_case_t *zcp;
uint_t cases = 0;
for (zcp = uu_list_first(zfs_cases); zcp != NULL;
zcp = uu_list_next(zfs_cases, zcp)) {
- if (zcp->zc_data.zc_pool_guid == zfs_case->zc_pool_guid &&
- zcp->zc_data.zc_vdev_guid != zfs_case->zc_vdev_guid &&
- zcp->zc_data.zc_serd_slow_io[0] != '\0' &&
- fmd_serd_active(hdl, zcp->zc_data.zc_serd_slow_io)) {
+ zfs_case_data_t *zcd = &zcp->zc_data;
+
+ /*
+ * must be same pool and parent vdev but different leaf vdev
+ */
+ if (zcd->zc_pool_guid != zfs_case->zc_pool_guid ||
+ zcd->zc_parent_guid != zfs_case->zc_parent_guid ||
+ zcd->zc_vdev_guid == zfs_case->zc_vdev_guid) {
+ continue;
+ }
+
+ /*
+ * Check if there is another active serd case besides zfs_case
+ *
+ * Only one serd engine will be assigned to the case
+ */
+ if (zcd->zc_serd_checksum[0] == zfs_case->zc_serd_checksum[0] &&
+ fmd_serd_active(hdl, zcd->zc_serd_checksum)) {
+ cases++;
+ }
+ if (zcd->zc_serd_io[0] == zfs_case->zc_serd_io[0] &&
+ fmd_serd_active(hdl, zcd->zc_serd_io)) {
+ cases++;
+ }
+ if (zcd->zc_serd_slow_io[0] == zfs_case->zc_serd_slow_io[0] &&
+ fmd_serd_active(hdl, zcd->zc_serd_slow_io)) {
cases++;
}
}
}
}
+/*
+ * Record the specified event in the SERD engine and return a
+ * boolean value indicating whether or not the engine fired as
+ * the result of inserting this event.
+ *
+ * When the pool has similar active cases on other vdevs, then
+ * the fired state is disregarded and the case is retired.
+ */
+static int
+zfs_fm_serd_record(fmd_hdl_t *hdl, const char *name, fmd_event_t *ep,
+ zfs_case_t *zcp, const char *err_type)
+{
+ int fired = fmd_serd_record(hdl, name, ep);
+ int peers = 0;
+
+ if (fired && (peers = zfs_other_serd_cases(hdl, &zcp->zc_data)) > 0) {
+ fmd_hdl_debug(hdl, "pool %llu is tracking %d other %s cases "
+ "-- skip faulting the vdev %llu",
+ (u_longlong_t)zcp->zc_data.zc_pool_guid,
+ peers, err_type,
+ (u_longlong_t)zcp->zc_data.zc_vdev_guid);
+ zfs_case_retire(hdl, zcp);
+ fired = 0;
+ }
+
+ return (fired);
+}
+
/*
* Main fmd entry point.
*/
{
zfs_case_t *zcp, *dcp;
int32_t pool_state;
- uint64_t ena, pool_guid, vdev_guid;
+ uint64_t ena, pool_guid, vdev_guid, parent_guid;
uint64_t checksum_n, checksum_t;
uint64_t io_n, io_t;
er_timeval_t pool_load;
if (nvlist_lookup_uint64(nvl,
FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, &vdev_guid) != 0)
vdev_guid = 0;
+ if (nvlist_lookup_uint64(nvl,
+ FM_EREPORT_PAYLOAD_ZFS_PARENT_GUID, &parent_guid) != 0)
+ parent_guid = 0;
if (nvlist_lookup_uint64(nvl, FM_EREPORT_ENA, &ena) != 0)
ena = 0;
data.zc_ena = ena;
data.zc_pool_guid = pool_guid;
data.zc_vdev_guid = vdev_guid;
+ data.zc_parent_guid = parent_guid;
data.zc_pool_state = (int)pool_state;
fmd_buf_write(hdl, cs, CASE_DATA, &data, sizeof (data));
SEC2NSEC(io_t));
zfs_case_serialize(zcp);
}
- if (fmd_serd_record(hdl, zcp->zc_data.zc_serd_io, ep))
+ if (zfs_fm_serd_record(hdl, zcp->zc_data.zc_serd_io,
+ ep, zcp, "io error")) {
checkremove = B_TRUE;
+ }
} else if (fmd_nvl_class_match(hdl, nvl,
ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_DELAY))) {
uint64_t slow_io_n, slow_io_t;
}
/* Pass event to SERD engine and see if this triggers */
if (zcp->zc_data.zc_serd_slow_io[0] != '\0' &&
- fmd_serd_record(hdl, zcp->zc_data.zc_serd_slow_io,
- ep)) {
- /*
- * Ignore a slow io diagnosis when other
- * VDEVs in the pool show signs of being slow.
- */
- if (zfs_other_slow_cases(hdl, &zcp->zc_data)) {
- zfs_case_retire(hdl, zcp);
- fmd_hdl_debug(hdl, "pool %llu has "
- "multiple slow io cases -- skip "
- "degrading vdev %llu",
- (u_longlong_t)
- zcp->zc_data.zc_pool_guid,
- (u_longlong_t)
- zcp->zc_data.zc_vdev_guid);
- } else {
- zfs_case_solve(hdl, zcp,
- "fault.fs.zfs.vdev.slow_io");
- }
+ zfs_fm_serd_record(hdl,
+ zcp->zc_data.zc_serd_slow_io, ep, zcp, "slow io")) {
+ zfs_case_solve(hdl, zcp,
+ "fault.fs.zfs.vdev.slow_io");
}
} else if (fmd_nvl_class_match(hdl, nvl,
ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_CHECKSUM))) {
SEC2NSEC(checksum_t));
zfs_case_serialize(zcp);
}
- if (fmd_serd_record(hdl,
- zcp->zc_data.zc_serd_checksum, ep)) {
+ if (zfs_fm_serd_record(hdl,
+ zcp->zc_data.zc_serd_checksum, ep, zcp,
+ "checksum")) {
zfs_case_solve(hdl, zcp,
"fault.fs.zfs.vdev.checksum");
}
--- /dev/null
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2024, Klara Inc.
+#
+
+# DESCRIPTION:
+# Verify that simultaneous io error events from multiple vdevs
+# doesn't generate a fault
+#
+# STRATEGY:
+# 1. Create a pool with a 4 disk raidz vdev
+# 2. Inject io errors
+# 3. Verify that ZED detects the errors but doesn't fault any vdevs
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+TESTDIR="$TEST_BASE_DIR/zed_error_multiple"
+VDEV1="$TEST_BASE_DIR/vdevfile1.$$"
+VDEV2="$TEST_BASE_DIR/vdevfile2.$$"
+VDEV3="$TEST_BASE_DIR/vdevfile3.$$"
+VDEV4="$TEST_BASE_DIR/vdevfile4.$$"
+VDEVS="$VDEV1 $VDEV2 $VDEV3 $VDEV4"
+TESTPOOL="zed_test_pool"
+FILEPATH="$TESTDIR/zed.testfile"
+
+verify_runnable "both"
+
+function cleanup
+{
+ log_must zinject -c all
+
+ # if pool still exists then something failed so log additional info
+ if poolexists $TESTPOOL ; then
+ log_note "$(zpool status -s $TESTPOOL)"
+ echo "=================== zed log search ==================="
+ grep "Diagnosis Engine" $ZEDLET_DIR/zed.log
+ destroy_pool $TESTPOOL
+ fi
+ log_must zed_stop
+
+ log_must rm -f $VDEVS
+}
+
+function start_io_errors
+{
+ for vdev in $VDEVS
+ do
+ log_must zpool set io_n=4 $TESTPOOL $vdev
+ log_must zpool set io_t=60 $TESTPOOL $vdev
+ done
+ zpool sync
+
+ for vdev in $VDEVS
+ do
+ log_must zinject -d $vdev -e io $TESTPOOL
+ done
+ zpool sync
+}
+
+function multiple_slow_vdevs_test
+{
+ log_must truncate -s 1G $VDEVS
+ default_raidz_setup_noexit $VDEVS
+
+ log_must zpool events -c
+ log_must zfs set compression=off $TESTPOOL
+ log_must zfs set primarycache=none $TESTPOOL
+ log_must zfs set recordsize=4K $TESTPOOL
+
+ log_must dd if=/dev/urandom of=$FILEPATH bs=1M count=4
+ zpool sync
+
+ #
+ # Read the file with io errors injected on the disks
+ # This will cause multiple errors on each disk to trip ZED SERD
+ #
+ # pool: zed_test_pool
+ # state: ONLINE
+ # status: One or more devices has experienced an unrecoverable error. An
+ # attempt was made to correct the error. Applications are unaffected.
+ # action: Determine if the device needs to be replaced, and clear the errors
+ # using 'zpool clear' or replace the device with 'zpool replace'.
+ # see: https://openzfs.github.io/openzfs-docs/msg/ZFS-8000-9P
+ # config:
+ #
+ # NAME STATE READ WRITE CKSUM
+ # zed_test_pool ONLINE 0 0 0
+ # raidz1-0 ONLINE 0 0 0
+ # /var/tmp/vdevfile1.1547063 ONLINE 532 561 0
+ # /var/tmp/vdevfile2.1547063 ONLINE 547 594 0
+ # /var/tmp/vdevfile3.1547063 ONLINE 1.05K 1.10K 0
+ # /var/tmp/vdevfile4.1547063 ONLINE 1.05K 1.00K 0
+ #
+
+ start_io_errors
+ dd if=$FILEPATH of=/dev/null bs=1M count=4 2>/dev/null
+ log_must zinject -c all
+
+ # count io error events available for processing
+ typeset -i i=0
+ typeset -i events=0
+ while [[ $i -lt 60 ]]; do
+ events=$(zpool events | grep "ereport\.fs\.zfs.io" | wc -l)
+ [[ $events -ge "50" ]] && break
+ i=$((i+1))
+ sleep 1
+ done
+ log_note "$events io error events found"
+ if [[ $events -lt "50" ]]; then
+ log_note "bailing: not enough events to complete the test"
+ destroy_pool $TESTPOOL
+ return
+ fi
+
+ #
+ # give slow ZED a chance to process the checkum events
+ #
+ typeset -i i=0
+ typeset -i skips=0
+ while [[ $i -lt 75 ]]; do
+ skips=$(grep "retiring case" \
+ $ZEDLET_DIR/zed.log | wc -l)
+ [[ $skips -gt "0" ]] && break
+ i=$((i+1))
+ sleep 1
+ done
+
+ log_note $skips fault skips in ZED log after $i seconds
+ [ $skips -gt "0" ] || log_fail "expecting to see skips"
+
+ fault=$(grep "zpool_vdev_fault" $ZEDLET_DIR/zed.log | wc -l)
+ log_note $fault vdev fault in ZED log
+ [ $fault -eq "0" ] || \
+ log_fail "expecting no fault events, found $fault"
+
+ destroy_pool $TESTPOOL
+}
+
+log_assert "Test ZED io errors across multiple vdevs"
+log_onexit cleanup
+
+log_must zed_events_drain
+log_must zed_start
+multiple_slow_vdevs_test
+
+log_pass "Test ZED io errors across multiple vdevs"