]> git.proxmox.com Git - mirror_zfs.git/commitdiff
Allow to control failfast
authorMariusz Zaborski <mariusz.zaborski@klarasystems.com>
Thu, 10 Nov 2022 21:37:12 +0000 (22:37 +0100)
committerGitHub <noreply@github.com>
Thu, 10 Nov 2022 21:37:12 +0000 (13:37 -0800)
Linux defaults to setting "failfast" on BIOs, so that the OS will not
retry IOs that fail, and instead report the error to ZFS.

In some cases, such as errors reported by the HBA driver, not
the device itself, we would wish to retry rather than generating
vdev errors in ZFS. This new property allows that.

This introduces a per vdev option to disable the failfast option.
This also introduces a global module parameter to define the failfast
mask value.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Co-authored-by: Allan Jude <allan@klarasystems.com>
Signed-off-by: Allan Jude <allan@klarasystems.com>
Signed-off-by: Mariusz Zaborski <mariusz.zaborski@klarasystems.com>
Sponsored-by: Seagate Technology LLC
Submitted-by: Klara, Inc.
Closes #14056

include/os/linux/kernel/linux/blkdev_compat.h
include/sys/fs/zfs.h
include/sys/vdev_impl.h
lib/libzfs/libzfs.abi
man/man4/zfs.4
man/man7/vdevprops.7
module/os/linux/zfs/vdev_disk.c
module/zcommon/zpool_prop.c
module/zfs/vdev.c

index 3276796537a4e20679aa960146d3960c2db6bcc3..45de1f4993f14f3a718e288f2eb8a896eb64a5a4 100644 (file)
@@ -126,7 +126,8 @@ typedef int bvec_iterator_t;
 #endif
 
 static inline void
-bio_set_flags_failfast(struct block_device *bdev, int *flags)
+bio_set_flags_failfast(struct block_device *bdev, int *flags, bool dev,
+    bool transport, bool driver)
 {
 #ifdef CONFIG_BUG
        /*
@@ -148,7 +149,12 @@ bio_set_flags_failfast(struct block_device *bdev, int *flags)
 #endif /* BLOCK_EXT_MAJOR */
 #endif /* CONFIG_BUG */
 
-       *flags |= REQ_FAILFAST_MASK;
+       if (dev)
+               *flags |= REQ_FAILFAST_DEV;
+       if (transport)
+               *flags |= REQ_FAILFAST_TRANSPORT;
+       if (driver)
+               *flags |= REQ_FAILFAST_DRIVER;
 }
 
 /*
index 10a5ec3172a2ec82aa14b48fc55b2114c347def4..1124604e8c6827bd98bfaf18d8c1f9b6ad7d7061 100644 (file)
@@ -355,6 +355,7 @@ typedef enum {
        VDEV_PROP_BYTES_TRIM,
        VDEV_PROP_REMOVING,
        VDEV_PROP_ALLOCATING,
+       VDEV_PROP_FAILFAST,
        VDEV_NUM_PROPS
 } vdev_prop_t;
 
index bfa8fe093de21805b5480ed13e03e5179a3a07a4..3f4b78b947a3b0720d631a9db3ff8a73fecbc2d3 100644 (file)
@@ -299,6 +299,7 @@ struct vdev {
        uint64_t        vdev_islog;     /* is an intent log device      */
        uint64_t        vdev_noalloc;   /* device is passivated?        */
        uint64_t        vdev_removing;  /* device is being removed?     */
+       uint64_t        vdev_failfast;  /* device failfast setting      */
        boolean_t       vdev_ishole;    /* is a hole in the namespace   */
        uint64_t        vdev_top_zap;
        vdev_alloc_bias_t vdev_alloc_bias; /* metaslab allocation bias  */
index 061a060b669709a271d1aba51e10f6b3df6e5588..98873784e7dc96db1ecdd332f4812f9e805c5178 100644 (file)
       <enumerator name='VDEV_PROP_BYTES_TRIM' value='38'/>
       <enumerator name='VDEV_PROP_REMOVING' value='39'/>
       <enumerator name='VDEV_PROP_ALLOCATING' value='40'/>
-      <enumerator name='VDEV_NUM_PROPS' value='41'/>
+      <enumerator name='VDEV_PROP_FAILFAST' value='41'/>
+      <enumerator name='VDEV_NUM_PROPS' value='42'/>
     </enum-decl>
     <typedef-decl name='vdev_prop_t' type-id='1573bec8' id='5aa5c90c'/>
     <enum-decl name='vdev_state' id='21566197'>
index ad3d8810e925415b33cb81bc35293cdb369b6cde..98539a6369e775bb7d7be96fed6da3365b660688 100644 (file)
@@ -15,7 +15,7 @@
 .\" own identifying information:
 .\" Portions Copyright [yyyy] [name of copyright owner]
 .\"
-.Dd November 7, 2022
+.Dd November 9, 2022
 .Dt ZFS 4
 .Os
 .
@@ -1345,6 +1345,19 @@ as fuller devices will tend to be slower than empty devices.
 Also see
 .Sy zio_dva_throttle_enabled .
 .
+.It Sy zfs_vdev_failfast_mask Ns = Ns Sy 1 Pq uint
+Defines if the driver should retire on a given error type.
+The following options may be bitwise-ored together:
+.TS
+box;
+lbz r l l .
+       Value   Name    Description
+_
+       1       Device  No driver retries on device errors
+       2       Transport       No driver retries on transport errors.
+       4       Driver  No driver retries on driver errors.
+.TE
+.
 .It Sy zfs_expire_snapshot Ns = Ns Sy 300 Ns s Pq int
 Time before expiring
 .Pa .zfs/snapshot .
@@ -1364,7 +1377,7 @@ The following flags may be bitwise-ored together:
 .TS
 box;
 lbz r l l .
-       Value   Symbolic Name   Description
+       Value   Name    Description
 _
        1       ZFS_DEBUG_DPRINTF       Enable dprintf entries in the debug log.
 *      2       ZFS_DEBUG_DBUF_VERIFY   Enable extra dbuf verifications.
index b98bda064c70ddd8c76c0a417e1338d19b9661f1..af5d26f6b486c414a7a1418c76c63674cfea5de1 100644 (file)
@@ -20,7 +20,7 @@
 .\"
 .\" Copyright (c) 2021 Klara, Inc.
 .\"
-.Dd November 27, 2021
+.Dd October 30, 2022
 .Dt VDEVPROPS 7
 .Os
 .
@@ -121,6 +121,9 @@ dataset.
 A text comment up to 8192 characters long
 .It Sy bootsize
 The amount of space to reserve for the EFI system partition
+.It Sy failfast
+If this device should propage BIO errors back to ZFS, used to disable
+failfast.
 .It Sy path
 The path to the device for this vdev
 .It Sy allocating
index 84d191abb90b000fdc698d5bd0451c4e9e4910a2..4f33009f14d42f1e22472e27df65199467d8d686 100644 (file)
@@ -74,6 +74,12 @@ typedef struct dio_request {
        struct bio              *dr_bio[0];     /* Attached bio's */
 } dio_request_t;
 
+/*
+ * BIO request failfast mask.
+ */
+
+static unsigned int zfs_vdev_failfast_mask = 1;
+
 static fmode_t
 vdev_bdev_mode(spa_mode_t spa_mode)
 {
@@ -659,8 +665,11 @@ __vdev_disk_physio(struct block_device *bdev, zio_t *zio,
 retry:
        dr = vdev_disk_dio_alloc(bio_count);
 
-       if (zio && !(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))
-               bio_set_flags_failfast(bdev, &flags);
+       if (zio && !(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)) &&
+           zio->io_vd->vdev_failfast == B_TRUE) {
+               bio_set_flags_failfast(bdev, &flags, zfs_vdev_failfast_mask & 1,
+                   zfs_vdev_failfast_mask & 2, zfs_vdev_failfast_mask & 4);
+       }
 
        dr->dr_zio = zio;
 
@@ -1045,3 +1054,6 @@ param_set_max_auto_ashift(const char *buf, zfs_kernel_param_t *kp)
 
 ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, open_timeout_ms, UINT, ZMOD_RW,
        "Timeout before determining that a device is missing");
+
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, failfast_mask, UINT, ZMOD_RW,
+       "Defines failfast mask: 1 - device, 2 - transport, 4 - driver");
index 4737bd628ddf9474803a4dc624c0e8ba7ae15232..285b979096312ee112b1249554613f3f648b9e6f 100644 (file)
@@ -420,6 +420,9 @@ vdev_prop_init(void)
            boolean_na_table, sfeatures);
 
        /* default index properties */
+       zprop_register_index(VDEV_PROP_FAILFAST, "failfast", B_TRUE,
+           PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off", "FAILFAST", boolean_table,
+           sfeatures);
 
        /* hidden properties */
        zprop_register_hidden(VDEV_PROP_NAME, "name", PROP_TYPE_STRING,
index 8c62112de71bb9f96110d87c3633f2beec0b5f68..4520ca31b7d738afe8dadab4198ab12635721d84 100644 (file)
@@ -3563,6 +3563,26 @@ vdev_load(vdev_t *vd)
                }
        }
 
+       if (vd == vd->vdev_top && vd->vdev_top_zap != 0) {
+               spa_t *spa = vd->vdev_spa;
+               uint64_t failfast;
+
+               error = zap_lookup(spa->spa_meta_objset, vd->vdev_top_zap,
+                   vdev_prop_to_name(VDEV_PROP_FAILFAST), sizeof (failfast),
+                   1, &failfast);
+               if (error == 0) {
+                       vd->vdev_failfast = failfast & 1;
+               } else if (error == ENOENT) {
+                       vd->vdev_failfast = vdev_prop_default_numeric(
+                           VDEV_PROP_FAILFAST);
+               } else {
+                       vdev_dbgmsg(vd,
+                           "vdev_load: zap_lookup(top_zap=%llu) "
+                           "failed [error=%d]",
+                           (u_longlong_t)vd->vdev_top_zap, error);
+               }
+       }
+
        /*
         * Load any rebuild state from the top-level vdev zap.
         */
@@ -5709,6 +5729,13 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
                        else
                                error = spa_vdev_alloc(spa, vdev_guid);
                        break;
+               case VDEV_PROP_FAILFAST:
+                       if (nvpair_value_uint64(elem, &intval) != 0) {
+                               error = EINVAL;
+                               break;
+                       }
+                       vd->vdev_failfast = intval & 1;
+                       break;
                default:
                        /* Most processing is done in vdev_props_set_sync */
                        break;
@@ -6019,6 +6046,25 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
                                        intval = ZPROP_BOOLEAN_NA;
                                }
 
+                               vdev_prop_add_list(outnvl, propname, strval,
+                                   intval, src);
+                               break;
+                       case VDEV_PROP_FAILFAST:
+                               src = ZPROP_SRC_LOCAL;
+                               strval = NULL;
+
+                               err = zap_lookup(mos, objid, nvpair_name(elem),
+                                   sizeof (uint64_t), 1, &intval);
+                               if (err == ENOENT) {
+                                       intval = vdev_prop_default_numeric(
+                                           prop);
+                                       err = 0;
+                               } else if (err) {
+                                       break;
+                               }
+                               if (intval == vdev_prop_default_numeric(prop))
+                                       src = ZPROP_SRC_DEFAULT;
+
                                vdev_prop_add_list(outnvl, propname, strval,
                                    intval, src);
                                break;