Various ZED fixes

author LOLi <loli10K@users.noreply.github.com>

Sat, 9 Dec 2017 00:58:41 +0000 (01:58 +0100)

committer Brian Behlendorf <behlendorf1@llnl.gov>

Sat, 9 Dec 2017 00:58:41 +0000 (16:58 -0800)
author LOLi <loli10K@users.noreply.github.com>
Sat, 9 Dec 2017 00:58:41 +0000 (01:58 +0100)
committer Brian Behlendorf <behlendorf1@llnl.gov>
Sat, 9 Dec 2017 00:58:41 +0000 (16:58 -0800)
diff --git a/cmd/zed/agents/zfs_agents.c b/cmd/zed/agents/zfs_agents.c

index a40d59497fbf14658a17383a863977c7cff3d406..47e251a5e760ea0ad030a2dbebc68273df6569c9 100644 (file)
--- a/cmd/zed/agents/zfs_agents.c
+++ b/cmd/zed/agents/zfs_agents.c
@@ -350,19 +350,3 @@ zfs_agent_fini(void)
  
         g_zfs_hdl = NULL;
  }
-
-/*
- * In ZED context, all the FMA agents run in the same thread
- * and do not require a unique libzfs instance. Modules should
- * use these stubs.
- */
-libzfs_handle_t *
-__libzfs_init(void)
-{
-       return (g_zfs_hdl);
-}
-
-void
-__libzfs_fini(libzfs_handle_t *hdl)
-{
-}
diff --git a/cmd/zed/agents/zfs_agents.h b/cmd/zed/agents/zfs_agents.h

index 3c9af54c926c4b421209701ed75e4438c84600b1..d1a459139b1e2f4fea9e5666cae8e46bd1fcf092 100644 (file)
--- a/cmd/zed/agents/zfs_agents.h
+++ b/cmd/zed/agents/zfs_agents.h
@@ -39,13 +39,6 @@ extern int zfs_slm_init(void);
  extern void zfs_slm_fini(void);
  extern void zfs_slm_event(const char *, const char *, nvlist_t *);
  
-/*
- * In ZED context, all the FMA agents run in the same thread
- * and do not require a unique libzfs instance.
- */
-extern libzfs_handle_t *__libzfs_init(void);
-extern void __libzfs_fini(libzfs_handle_t *);
-
  #ifdef __cplusplus
  }
  #endif
diff --git a/cmd/zed/agents/zfs_diagnosis.c b/cmd/zed/agents/zfs_diagnosis.c

index 49e3e16609e268ed693f8d2c66b310ee49fcf846..6f1f4d0175e6710e87feeee4ac0dcd884e6becee 100644 (file)
--- a/cmd/zed/agents/zfs_diagnosis.c
+++ b/cmd/zed/agents/zfs_diagnosis.c
@@ -919,27 +919,27 @@ _zfs_diagnosis_init(fmd_hdl_t *hdl)
  {
         libzfs_handle_t *zhdl;
  
-       if ((zhdl = __libzfs_init()) == NULL)
+       if ((zhdl = libzfs_init()) == NULL)
                 return;
  
         if ((zfs_case_pool = uu_list_pool_create("zfs_case_pool",
             sizeof (zfs_case_t), offsetof(zfs_case_t, zc_node),
             NULL, UU_LIST_POOL_DEBUG)) == NULL) {
-               __libzfs_fini(zhdl);
+               libzfs_fini(zhdl);
                 return;
         }
  
         if ((zfs_cases = uu_list_create(zfs_case_pool, NULL,
             UU_LIST_DEBUG)) == NULL) {
                 uu_list_pool_destroy(zfs_case_pool);
-               __libzfs_fini(zhdl);
+               libzfs_fini(zhdl);
                 return;
         }
  
         if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) {
                 uu_list_destroy(zfs_cases);
                 uu_list_pool_destroy(zfs_case_pool);
-               __libzfs_fini(zhdl);
+               libzfs_fini(zhdl);
                 return;
         }
  
@@ -975,5 +975,5 @@ _zfs_diagnosis_fini(fmd_hdl_t *hdl)
         uu_list_pool_destroy(zfs_case_pool);
  
         zhdl = fmd_hdl_getspecific(hdl);
-       __libzfs_fini(zhdl);
+       libzfs_fini(zhdl);
  }
diff --git a/cmd/zed/agents/zfs_mod.c b/cmd/zed/agents/zfs_mod.c

index 5fa74d0cea98e5baea4b2cd7f2d9574f59478e6d..54568e75ad9a2363c9b6345cc9ecbece274eff98 100644 (file)
--- a/cmd/zed/agents/zfs_mod.c
+++ b/cmd/zed/agents/zfs_mod.c
@@ -64,7 +64,6 @@
   * trigger the FMA fault that we skipped earlier.
   *
   * ZFS on Linux porting notes:
- *     In lieu of a thread pool, just spawn a thread on demmand.
   *     Linux udev provides a disk insert for both the disk and the partition
   *
   */
@@ -83,6 +82,7 @@
  #include <sys/sunddi.h>
  #include <sys/sysevent/eventdefs.h>
  #include <sys/sysevent/dev.h>
+#include <thread_pool.h>
  #include <pthread.h>
  #include <unistd.h>
  #include "zfs_agents.h"
@@ -97,12 +97,12 @@ typedef void (*zfs_process_func_t)(zpool_handle_t *, nvlist_t *, boolean_t);
  libzfs_handle_t *g_zfshdl;
  list_t g_pool_list;    /* list of unavailable pools at initialization */
  list_t g_device_list;  /* list of disks with asynchronous label request */
+tpool_t *g_tpool;
  boolean_t g_enumeration_done;
-pthread_t g_zfs_tid;
+pthread_t g_zfs_tid;   /* zfs_enum_pools() thread */
  
  typedef struct unavailpool {
         zpool_handle_t  *uap_zhp;
-       pthread_t       uap_enable_tid; /* dataset enable thread if activated */
         list_node_t     uap_node;
  } unavailpool_t;
  
@@ -135,7 +135,6 @@ zfs_unavail_pool(zpool_handle_t *zhp, void *data)
                 unavailpool_t *uap;
                 uap = malloc(sizeof (unavailpool_t));
                 uap->uap_zhp = zhp;
-               uap->uap_enable_tid = 0;
                 list_insert_tail((list_t *)data, uap);
         } else {
                 zpool_close(zhp);
@@ -512,19 +511,14 @@ zfs_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *data)
         (dp->dd_func)(zhp, nvl, dp->dd_islabeled);
  }
  
-static void *
+void
  zfs_enable_ds(void *arg)
  {
         unavailpool_t *pool = (unavailpool_t *)arg;
  
-       assert(pool->uap_enable_tid = pthread_self());
-
         (void) zpool_enable_datasets(pool->uap_zhp, NULL, 0);
         zpool_close(pool->uap_zhp);
-       pool->uap_zhp = NULL;
-
-       /* Note: zfs_slm_fini() will cleanup this pool entry on exit */
-       return (NULL);
+       free(pool);
  }
  
  static int
@@ -559,15 +553,13 @@ zfs_iter_pool(zpool_handle_t *zhp, void *data)
                 for (pool = list_head(&g_pool_list); pool != NULL;
                     pool = list_next(&g_pool_list, pool)) {
  
-                       if (pool->uap_enable_tid != 0)
-                               continue;       /* entry already processed */
                         if (strcmp(zpool_get_name(zhp),
                             zpool_get_name(pool->uap_zhp)))
                                 continue;
                         if (zfs_toplevel_state(zhp) >= VDEV_STATE_DEGRADED) {
-                               /* send to a background thread; keep on list */
-                               (void) pthread_create(&pool->uap_enable_tid,
-                                   NULL, zfs_enable_ds, pool);
+                               list_remove(&g_pool_list, pool);
+                               (void) tpool_dispatch(g_tpool, zfs_enable_ds,
+                                   pool);
                                 break;
                         }
                 }
@@ -857,7 +849,7 @@ zfs_enum_pools(void *arg)
  int
  zfs_slm_init()
  {
-       if ((g_zfshdl = __libzfs_init()) == NULL)
+       if ((g_zfshdl = libzfs_init()) == NULL)
                 return (-1);
  
         /*
@@ -869,7 +861,7 @@ zfs_slm_init()
  
         if (pthread_create(&g_zfs_tid, NULL, zfs_enum_pools, NULL) != 0) {
                 list_destroy(&g_pool_list);
-               __libzfs_fini(g_zfshdl);
+               libzfs_fini(g_zfshdl);
                 return (-1);
         }
  
@@ -887,19 +879,15 @@ zfs_slm_fini()
  
         /* wait for zfs_enum_pools thread to complete */
         (void) pthread_join(g_zfs_tid, NULL);
+       /* destroy the thread pool */
+       if (g_tpool != NULL) {
+               tpool_wait(g_tpool);
+               tpool_destroy(g_tpool);
+       }
  
         while ((pool = (list_head(&g_pool_list))) != NULL) {
-               /*
-                * each pool entry has two possibilities
-                * 1. was made available (so wait for zfs_enable_ds thread)
-                * 2. still unavailable (just close the pool)
-                */
-               if (pool->uap_enable_tid)
-                       (void) pthread_join(pool->uap_enable_tid, NULL);
-               else if (pool->uap_zhp != NULL)
-                       zpool_close(pool->uap_zhp);
-
                 list_remove(&g_pool_list, pool);
+               zpool_close(pool->uap_zhp);
                 free(pool);
         }
         list_destroy(&g_pool_list);
@@ -910,7 +898,7 @@ zfs_slm_fini()
         }
         list_destroy(&g_device_list);
  
-       __libzfs_fini(g_zfshdl);
+       libzfs_fini(g_zfshdl);
  }
  
  void
diff --git a/cmd/zed/agents/zfs_retire.c b/cmd/zed/agents/zfs_retire.c

index f69c583f0b1f5f17d224547b5c3f21a67b421d0a..5a090e32f699051ff9b85f533ed8a7ce5fdc59b4 100644 (file)
--- a/cmd/zed/agents/zfs_retire.c
+++ b/cmd/zed/agents/zfs_retire.c
@@ -176,6 +176,8 @@ replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev)
         nvlist_t **spares;
         uint_t s, nspares;
         char *dev_name;
+       zprop_source_t source;
+       int ashift;
  
         config = zpool_get_config(zhp, NULL);
         if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
@@ -189,6 +191,11 @@ replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev)
             &spares, &nspares) != 0)
                 return;
  
+       /*
+        * lookup "ashift" pool property, we may need it for the replacement
+        */
+       ashift = zpool_get_prop_int(zhp, ZPOOL_PROP_ASHIFT, &source);
+
         replacement = fmd_nvl_alloc(hdl, FMD_SLEEP);
  
         (void) nvlist_add_string(replacement, ZPOOL_CONFIG_TYPE,
@@ -207,6 +214,11 @@ replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev)
                     &spare_name) != 0)
                         continue;
  
+               /* if set, add the "ashift" pool property to the spare nvlist */
+               if (source != ZPROP_SRC_DEFAULT)
+                       (void) nvlist_add_uint64(spares[s],
+                           ZPOOL_CONFIG_ASHIFT, ashift);
+
                 (void) nvlist_add_nvlist_array(replacement,
                     ZPOOL_CONFIG_CHILDREN, &spares[s], 1);
  
@@ -483,7 +495,7 @@ _zfs_retire_init(fmd_hdl_t *hdl)
         zfs_retire_data_t *zdp;
         libzfs_handle_t *zhdl;
  
-       if ((zhdl = __libzfs_init()) == NULL)
+       if ((zhdl = libzfs_init()) == NULL)
                 return;
  
         if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) {
@@ -504,7 +516,7 @@ _zfs_retire_fini(fmd_hdl_t *hdl)
  
         if (zdp != NULL) {
                 zfs_retire_clear_data(hdl, zdp);
-               __libzfs_fini(zdp->zrd_hdl);
+               libzfs_fini(zdp->zrd_hdl);
                 fmd_hdl_free(hdl, zdp, sizeof (zfs_retire_data_t));
         }
  }
diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run

index 9424c80a02cf71faeca9be7b27e820730403f022..0c8f298c6d494e5b64b3c9b79acb8736e472de5e 100644 (file)
--- a/tests/runfiles/linux.run
+++ b/tests/runfiles/linux.run
@@ -464,7 +464,7 @@ tags = ['functional', 'exec']
  
  [tests/functional/fault]
  tests = ['auto_online_001_pos', 'auto_replace_001_pos', 'auto_spare_001_pos',
-    'auto_spare_002_pos.ksh']
+    'auto_spare_002_pos', 'auto_spare_ashift', 'auto_spare_multiple']
  tags = ['functional', 'fault']
  
  [tests/functional/features/async_destroy]
diff --git a/tests/zfs-tests/include/blkdev.shlib b/tests/zfs-tests/include/blkdev.shlib

index 876c843561b81f49192360d2c3503a10bf935f45..28ac1052c3d0cbdaeb4c1f6d38a97e4da434df45 100644 (file)
--- a/tests/zfs-tests/include/blkdev.shlib
+++ b/tests/zfs-tests/include/blkdev.shlib
@@ -353,16 +353,35 @@ function insert_disk #disk scsi_host
  
  #
  # Load scsi_debug module with specified parameters
+# $blksz can be either one of: < 512b | 512e | 4Kn >
  #
-function load_scsi_debug # dev_size_mb add_host num_tgts max_luns
+function load_scsi_debug # dev_size_mb add_host num_tgts max_luns blksz
  {
         typeset devsize=$1
         typeset hosts=$2
         typeset tgts=$3
         typeset luns=$4
+       typeset blksz=$5
  
         [[ -z $devsize ]] || [[ -z $hosts ]] || [[ -z $tgts ]] || \
-           [[ -z $luns ]] && log_fail "Arguments invalid or missing"
+           [[ -z $luns ]] || [[ -z $blksz ]] && \
+           log_fail "Arguments invalid or missing"
+
+       case "$5" in
+               '512b')
+                       typeset sector=512
+                       typeset blkexp=0
+               ;;
+               '512e')
+                       typeset sector=512
+                       typeset blkexp=3
+               ;;
+               '4Kn')
+                       typeset sector=4096
+                       typeset blkexp=0
+               ;;
+               *) log_fail "Unsupported blksz value: $5" ;;
+       esac
  
         if is_linux; then
                 modprobe -n scsi_debug
@@ -375,7 +394,8 @@ function load_scsi_debug # dev_size_mb add_host num_tgts max_luns
                         log_fail "scsi_debug module already installed"
                 else
                         log_must modprobe scsi_debug dev_size_mb=$devsize \
-                           add_host=$hosts num_tgts=$tgts max_luns=$luns
+                           add_host=$hosts num_tgts=$tgts max_luns=$luns \
+                           sector_size=$sector physblk_exp=$blkexp
                         block_device_wait
                         lsscsi | egrep scsi_debug > /dev/null
                         if (($? == 1)); then
@@ -385,6 +405,16 @@ function load_scsi_debug # dev_size_mb add_host num_tgts max_luns
         fi
  }
  
+#
+# Unload scsi_debug module, if needed.
+#
+function unload_scsi_debug
+{
+       if lsmod | grep scsi_debug >/dev/null; then
+               log_must modprobe -r scsi_debug
+       fi
+}
+
  #
  # Get scsi_debug device name.
  # Returns basename of scsi_debug device (for example "sdb").
diff --git a/tests/zfs-tests/include/libtest.shlib b/tests/zfs-tests/include/libtest.shlib

index 6b1658c1b6cec2e5d57411a8efdca8d75d4095fe..cc9fddd415a4a5a6775fbea05e22e4da710d1101 100644 (file)
--- a/tests/zfs-tests/include/libtest.shlib
+++ b/tests/zfs-tests/include/libtest.shlib
@@ -3158,13 +3158,25 @@ function zed_stop
         if [[ -f ${ZEDLET_DIR}/zed.pid ]]; then
                 zedpid=$(cat ${ZEDLET_DIR}/zed.pid)
                 kill $zedpid
-               wait $zedpid
+               while ps -p $zedpid > /dev/null; do
+                       sleep 1
+               done
                 rm -f ${ZEDLET_DIR}/zed.pid
         fi
-
         return 0
  }
  
+#
+# Drain all zevents
+#
+function zed_events_drain
+{
+       while [ $(zpool events -H | wc -l) -ne 0 ]; do
+               sleep 1
+               zpool events -c >/dev/null
+       done
+}
+
  #
  # Check is provided device is being active used as a swap device.
  #
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/cleanup.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/cleanup.ksh

index 4477e54027fb1f864b5cd3bdcb86e6a987387037..99c51351c5c8c15a847b7443ac02f76f4e6d9172 100755 (executable)
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/cleanup.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/cleanup.ksh
@@ -27,7 +27,7 @@ if is_linux; then
         for SDDEVICE in $(get_debug_device); do
                 unplug $SDDEVICE
         done
-       modprobe -r scsi_debug
+       unload_scsi_debug
  fi
  
  log_pass
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/setup.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/setup.ksh

index 4dbf8965dc6b3601a5d835cb2527173b10e4612a..59b8764ced84bb2b069125d60d6753f48d0d2c75 100755 (executable)
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/setup.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/setup.ksh
@@ -22,7 +22,7 @@ verify_runnable "global"
  
  # Create scsi_debug devices for the reopen tests
  if is_linux; then
-       load_scsi_debug $SDSIZE $SDHOSTS $SDTGTS $SDLUNS
+       load_scsi_debug $SDSIZE $SDHOSTS $SDTGTS $SDLUNS '512b'
  else
         log_unsupported "scsi debug module unsupported"
  fi
diff --git a/tests/zfs-tests/tests/functional/fault/Makefile.am b/tests/zfs-tests/tests/functional/fault/Makefile.am

index 436f3e8be7198d479d2ffa58ea2d06253b366384..ef4380835c378219154dc20398c9d2d1601fa210 100644 (file)
--- a/tests/zfs-tests/tests/functional/fault/Makefile.am
+++ b/tests/zfs-tests/tests/functional/fault/Makefile.am
@@ -6,4 +6,6 @@ dist_pkgdata_SCRIPTS = \
         auto_online_001_pos.ksh \
         auto_replace_001_pos.ksh \
         auto_spare_001_pos.ksh \
-       auto_spare_002_pos.ksh
+       auto_spare_002_pos.ksh \
+       auto_spare_ashift.ksh \
+       auto_spare_multiple.ksh
diff --git a/tests/zfs-tests/tests/functional/fault/auto_online_001_pos.ksh b/tests/zfs-tests/tests/functional/fault/auto_online_001_pos.ksh

index 0f6e38ac2271bb23d7c2c9a9e0f0b8dbb32e0ec3..beff5dc87ce150d85aea86d91b7a21bcb798238b 100755 (executable)
--- a/tests/zfs-tests/tests/functional/fault/auto_online_001_pos.ksh
+++ b/tests/zfs-tests/tests/functional/fault/auto_online_001_pos.ksh
@@ -54,9 +54,8 @@ fi
  
  function cleanup
  {
-       #online last disk before fail
-       insert_disk $offline_disk $host
-       poolexists $TESTPOOL && destroy_pool $TESTPOOL
+       destroy_pool $TESTPOOL
+       unload_scsi_debug
  }
  
  log_assert "Testing automated auto-online FMA test"
@@ -65,8 +64,8 @@ log_onexit cleanup
  
  # If using the default loop devices, need a scsi_debug device for auto-online
  if is_loop_device $DISK1; then
-       SD=$(lsscsi | nawk '/scsi_debug/ {print $6; exit}')
-       SDDEVICE=$(echo $SD | nawk -F / '{print $3}')
+       load_scsi_debug $SDSIZE $SDHOSTS $SDTGTS $SDLUNS '512b'
+       SDDEVICE=$(get_debug_device)
         SDDEVICE_ID=$(get_persistent_disk_name $SDDEVICE)
         autoonline_disks="$SDDEVICE"
  else
diff --git a/tests/zfs-tests/tests/functional/fault/auto_replace_001_pos.ksh b/tests/zfs-tests/tests/functional/fault/auto_replace_001_pos.ksh

index 40a680a9bb024c1e7582176f1f11c5666352f625..8e48b2ab4371a9f1338c02746ee43aee84aa8b71 100755 (executable)
--- a/tests/zfs-tests/tests/functional/fault/auto_replace_001_pos.ksh
+++ b/tests/zfs-tests/tests/functional/fault/auto_replace_001_pos.ksh
@@ -57,27 +57,23 @@ fi
  
  function setup
  {
-       lsmod | egrep scsi_debug > /dev/null
-       if (($? == 1)); then
-               load_scsi_debug $SDSIZE $SDHOSTS $SDTGTS $SDLUNS
-       fi
+       load_scsi_debug $SDSIZE $SDHOSTS $SDTGTS $SDLUNS '512b'
+       SD=$(get_debug_device)
+       SDDEVICE_ID=$(get_persistent_disk_name $SD)
         # Register vdev_id alias rule for scsi_debug device to create a
         # persistent path
-       SD=$(lsscsi | nawk '/scsi_debug/ {print $6; exit}' \
-           | nawk -F / '{print $3}')
-       SDDEVICE_ID=$(get_persistent_disk_name $SD)
         log_must eval "echo "alias scsidebug /dev/disk/by-id/$SDDEVICE_ID" \
             >> $VDEVID_CONF"
         block_device_wait
-
-       SDDEVICE=$(udevadm info -q all -n $DEV_DSKDIR/$SD | egrep ID_VDEV \
-           | nawk '{print $2; exit}' | nawk -F = '{print $2; exit}')
+       SDDEVICE=$(udevadm info -q all -n $DEV_DSKDIR/$SD \
+           | awk -F'=' '/ID_VDEV=/{print $2; exit}')
         [[ -z $SDDEVICE ]] && log_fail "vdev rule was not registered properly"
  }
  
  function cleanup
  {
-       poolexists $TESTPOOL && destroy_pool $TESTPOOL
+       destroy_pool $TESTPOOL
+       unload_scsi_debug
  }
  
  log_assert "Testing automated auto-replace FMA test"
@@ -112,7 +108,7 @@ log_must zpool export -F $TESTPOOL
  # Offline disk
  remove_disk $SD
  block_device_wait
-log_must modprobe -r scsi_debug
+unload_scsi_debug
  
  # Reimport pool with drive missing
  log_must zpool import $TESTPOOL
diff --git a/tests/zfs-tests/tests/functional/fault/auto_spare_001_pos.ksh b/tests/zfs-tests/tests/functional/fault/auto_spare_001_pos.ksh

index 82f7f4834ce66cd3329b7ab03c6b78ab9b4d5d86..b6af1a3f40f83bc14af8936e6f38995361cafe87 100755 (executable)
--- a/tests/zfs-tests/tests/functional/fault/auto_spare_001_pos.ksh
+++ b/tests/zfs-tests/tests/functional/fault/auto_spare_001_pos.ksh
@@ -42,7 +42,7 @@ verify_runnable "both"
  function cleanup
  {
         log_must zinject -c all
-       poolexists $TESTPOOL && destroy_pool $TESTPOOL
+       destroy_pool $TESTPOOL
         rm -f $VDEV_FILES $SPARE_FILE
  }
  
@@ -50,6 +50,9 @@ log_assert "Testing automated auto-spare FMA test"
  
  log_onexit cleanup
  
+# Clear events from previous runs
+zed_events_drain
+
  TESTFILE="/$TESTPOOL/$TESTFS/testfile"
  
  for type in "mirror" "raidz" "raidz2"; do
diff --git a/tests/zfs-tests/tests/functional/fault/auto_spare_002_pos.ksh b/tests/zfs-tests/tests/functional/fault/auto_spare_002_pos.ksh

index f0ddac35cfed6fbf8c9f969bee40e93ce1be0af7..cfa748d304b207924ab5a830f4bb22d0dfa4aaab 100755 (executable)
--- a/tests/zfs-tests/tests/functional/fault/auto_spare_002_pos.ksh
+++ b/tests/zfs-tests/tests/functional/fault/auto_spare_002_pos.ksh
@@ -42,7 +42,7 @@ verify_runnable "both"
  function cleanup
  {
         log_must zinject -c all
-       poolexists $TESTPOOL && destroy_pool $TESTPOOL
+       destroy_pool $TESTPOOL
         rm -f $VDEV_FILES $SPARE_FILE
  }
  
@@ -50,6 +50,9 @@ log_assert "Testing automated auto-spare FMA test"
  
  log_onexit cleanup
  
+# Clear events from previous runs
+zed_events_drain
+
  TESTFILE="/$TESTPOOL/$TESTFS/testfile"
  
  for type in "mirror" "raidz" "raidz2"; do
@@ -65,8 +68,14 @@ for type in "mirror" "raidz" "raidz2"; do
         log_must dd if=/dev/urandom of=$TESTFILE bs=1M count=16
  
         # 4. Inject CHECKSUM ERRORS on read with a zinject error handler
+       # NOTE: checksum events are ratelimited to max 5 per second, ZED needs
+       #       10 to kick in a spare
         log_must zinject -d $FAULT_FILE -e corrupt -f 50 -T read $TESTPOOL
         log_must cp $TESTFILE /dev/null
+       log_must sleep 1
+       log_must cp $TESTFILE /dev/null
+       log_must sleep 1
+       log_must cp $TESTFILE /dev/null
  
         # 5. Verify the ZED kicks in a hot spare and expected pool/device status
         log_note "Wait for ZED to auto-spare"
diff --git a/tests/zfs-tests/tests/functional/fault/auto_spare_ashift.ksh b/tests/zfs-tests/tests/functional/fault/auto_spare_ashift.ksh

new file mode 100755 (executable)

index 0000000..e985751
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/fault/auto_spare_ashift.ksh
@@ -0,0 +1,101 @@
+#!/bin/ksh -p
+
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017 by Intel Corporation. All rights reserved.
+# Copyright 2017, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/include/math.shlib
+. $STF_SUITE/tests/functional/fault/fault.cfg
+
+#
+# DESCRIPTION:
+# Testing Fault Management Agent ZED Logic - Automated Auto-Spare Test when
+# drive is faulted and a custom ashift value needs to be provided to replace it.
+#
+# STRATEGY:
+# 1. Create a pool from 512b devices and set "ashift" pool property accordingly
+# 2. Add one 512e spare device (4Kn would generate IO errors on replace)
+# 3. Inject IO errors with a zinject error handler
+# 4. Start a scrub
+# 5. Verify the ZED kicks in the hot spare and expected pool/device status
+# 6. Clear the fault
+# 7. Verify the hot spare is available and expected pool/device status
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+       log_must zinject -c all
+       destroy_pool $TESTPOOL
+       unload_scsi_debug
+       rm -f $SAFE_DEVICE $FAIL_DEVICE
+}
+
+log_assert "ZED should replace a device using the configured ashift property"
+log_onexit cleanup
+
+# Clear events from previous runs
+zed_events_drain
+
+SAFE_DEVICE="$TEST_BASE_DIR/safe-dev"
+FAIL_DEVICE="$TEST_BASE_DIR/fail-dev"
+
+# 1. Create a pool from 512b devices and set "ashift" pool property accordingly
+for vdev in $SAFE_DEVICE $FAIL_DEVICE; do
+       truncate -s $SPA_MINDEVSIZE $vdev
+done
+log_must zpool create -f $TESTPOOL mirror $SAFE_DEVICE $FAIL_DEVICE
+# NOTE: file VDEVs should be added as 512b devices, verify this "just in case"
+for vdev in $SAFE_DEVICE $FAIL_DEVICE; do
+       verify_eq "9" "$(zdb -e -l $vdev | awk '/ashift: /{print $2}')" "ashift"
+done
+log_must zpool set ashift=9 $TESTPOOL
+
+# 2. Add one 512e spare device (4Kn would generate IO errors on replace)
+# NOTE: must be larger than the existing 512b devices, add 32m of fudge
+load_scsi_debug $(($SPA_MINDEVSIZE/1024/1024+32)) $SDHOSTS $SDTGTS $SDLUNS '512e'
+SPARE_DEVICE=$(get_debug_device)
+log_must_busy zpool add $TESTPOOL spare $SPARE_DEVICE
+
+# 3. Inject IO errors with a zinject error handler
+log_must zinject -d $FAIL_DEVICE -e io -T all -f 100 $TESTPOOL
+
+# 4. Start a scrub
+log_must zpool scrub $TESTPOOL
+
+# 5. Verify the ZED kicks in a hot spare and expected pool/device status
+log_note "Wait for ZED to auto-spare"
+log_must wait_vdev_state $TESTPOOL $FAIL_DEVICE "FAULTED" 60
+log_must wait_vdev_state $TESTPOOL $SPARE_DEVICE "ONLINE" 60
+log_must wait_hotspare_state $TESTPOOL $SPARE_DEVICE "INUSE"
+log_must check_state $TESTPOOL "" "DEGRADED"
+
+# 6. Clear the fault
+log_must zinject -c all
+log_must zpool clear $TESTPOOL $FAIL_DEVICE
+
+# 7. Verify the hot spare is available and expected pool/device status
+log_must wait_vdev_state $TESTPOOL $FAIL_DEVICE "ONLINE" 60
+log_must wait_hotspare_state $TESTPOOL $SPARE_DEVICE "AVAIL"
+log_must is_pool_resilvered $TESTPOOL
+log_must check_state $TESTPOOL "" "ONLINE"
+
+log_pass "ZED successfully replaces a device using the configured ashift property"
diff --git a/tests/zfs-tests/tests/functional/fault/auto_spare_multiple.ksh b/tests/zfs-tests/tests/functional/fault/auto_spare_multiple.ksh

new file mode 100755 (executable)

index 0000000..8650cef
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/fault/auto_spare_multiple.ksh
@@ -0,0 +1,152 @@
+#!/bin/ksh -p
+
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017 by Intel Corporation. All rights reserved.
+# Copyright 2017, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/fault/fault.cfg
+
+#
+# DESCRIPTION:
+# Testing Fault Management Agent ZED Logic - Automated Auto-Spare Test when
+# multiple drives are faulted.
+#
+# STRATEGY:
+# 1. Create a pool with two hot spares
+# 2. Inject IO ERRORS with a zinject error handler on the first device
+# 3. Start a scrub
+# 4. Verify the ZED kicks in a hot spare and expected pool/device status
+# 5. Inject IO ERRORS on a second device
+# 6. Start a scrub
+# 7. Verify the ZED kicks in a second hot spare
+# 8. Clear the fault on both devices
+# 9. Verify the hot spares are available and expected pool/device status
+# 10. Rinse and repeat, this time faulting both devices at the same time
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+       log_must zinject -c all
+       destroy_pool $TESTPOOL
+       rm -f $DATA_DEVS $SPARE_DEVS
+}
+
+log_assert "ZED should be able to handle multiple faulted devices"
+log_onexit cleanup
+
+# Clear events from previous runs
+zed_events_drain
+
+FAULT_DEV1="$TEST_BASE_DIR/fault-dev1"
+FAULT_DEV2="$TEST_BASE_DIR/fault-dev2"
+SAFE_DEV1="$TEST_BASE_DIR/safe-dev1"
+SAFE_DEV2="$TEST_BASE_DIR/safe-dev2"
+DATA_DEVS="$FAULT_DEV1 $FAULT_DEV2 $SAFE_DEV1 $SAFE_DEV2"
+SPARE_DEV1="$TEST_BASE_DIR/spare-dev1"
+SPARE_DEV2="$TEST_BASE_DIR/spare-dev2"
+SPARE_DEVS="$SPARE_DEV1 $SPARE_DEV2"
+
+for type in "mirror" "raidz" "raidz2" "raidz3"; do
+       # 1. Create a pool with two hot spares
+       truncate -s $SPA_MINDEVSIZE $DATA_DEVS $SPARE_DEVS
+       log_must zpool create -f $TESTPOOL $type $DATA_DEVS spare $SPARE_DEVS
+
+       # 2. Inject IO ERRORS with a zinject error handler on the first device
+       log_must zinject -d $FAULT_DEV1 -e io -T all -f 100 $TESTPOOL
+
+       # 3. Start a scrub
+       log_must zpool scrub $TESTPOOL
+
+       # 4. Verify the ZED kicks in a hot spare and expected pool/device status
+       log_note "Wait for ZED to auto-spare"
+       log_must wait_vdev_state $TESTPOOL $FAULT_DEV1 "FAULTED" 60
+       log_must wait_vdev_state $TESTPOOL $SPARE_DEV1 "ONLINE" 60
+       log_must wait_hotspare_state $TESTPOOL $SPARE_DEV1 "INUSE"
+       log_must check_state $TESTPOOL "" "DEGRADED"
+
+       # 5. Inject IO ERRORS on a second device
+       log_must zinject -d $FAULT_DEV2 -e io -T all -f 100 $TESTPOOL
+
+       # 6. Start a scrub
+       while is_pool_scrubbing $TESTPOOL || is_pool_resilvering $TESTPOOL; do
+               sleep 1
+       done
+       log_must zpool scrub $TESTPOOL
+
+       # 7. Verify the ZED kicks in a second hot spare
+       log_note "Wait for ZED to auto-spare"
+       log_must wait_vdev_state $TESTPOOL $FAULT_DEV2 "FAULTED" 60
+       log_must wait_vdev_state $TESTPOOL $SPARE_DEV2 "ONLINE" 60
+       log_must wait_hotspare_state $TESTPOOL $SPARE_DEV2 "INUSE"
+       log_must check_state $TESTPOOL "" "DEGRADED"
+
+       # 8. Clear the fault on both devices
+       log_must zinject -c all
+       log_must zpool clear $TESTPOOL $FAULT_DEV1
+       log_must zpool clear $TESTPOOL $FAULT_DEV2
+
+       # 9. Verify the hot spares are available and expected pool/device status
+       log_must wait_vdev_state $TESTPOOL $FAULT_DEV1 "ONLINE" 60
+       log_must wait_vdev_state $TESTPOOL $FAULT_DEV2 "ONLINE" 60
+       log_must wait_hotspare_state $TESTPOOL $SPARE_DEV1 "AVAIL"
+       log_must wait_hotspare_state $TESTPOOL $SPARE_DEV2 "AVAIL"
+       log_must check_state $TESTPOOL "" "ONLINE"
+
+       # Cleanup
+       cleanup
+done
+
+# Rinse and repeat, this time faulting both devices at the same time
+# NOTE: "raidz" is exluded since it cannot survive 2 faulted devices
+# NOTE: "mirror" is a 4-way mirror here and should survive this test
+for type in "mirror" "raidz2" "raidz3"; do
+       # 1. Create a pool with two hot spares
+       truncate -s $SPA_MINDEVSIZE $DATA_DEVS $SPARE_DEVS
+       log_must zpool create -f $TESTPOOL $type $DATA_DEVS spare $SPARE_DEVS
+
+       # 2. Inject IO ERRORS with a zinject error handler on two devices
+       log_must eval "zinject -d $FAULT_DEV1 -e io -T all -f 100 $TESTPOOL &"
+       log_must eval "zinject -d $FAULT_DEV2 -e io -T all -f 100 $TESTPOOL &"
+
+       # 3. Start a scrub
+       log_must zpool scrub $TESTPOOL
+
+       # 4. Verify the ZED kicks in two hot spares and expected pool/device status
+       log_note "Wait for ZED to auto-spare"
+       log_must wait_vdev_state $TESTPOOL $FAULT_DEV1 "FAULTED" 60
+       log_must wait_vdev_state $TESTPOOL $FAULT_DEV2 "FAULTED" 60
+       log_must wait_vdev_state $TESTPOOL $SPARE_DEV1 "ONLINE" 60
+       log_must wait_vdev_state $TESTPOOL $SPARE_DEV2 "ONLINE" 60
+       log_must wait_hotspare_state $TESTPOOL $SPARE_DEV1 "INUSE"
+       log_must wait_hotspare_state $TESTPOOL $SPARE_DEV2 "INUSE"
+       log_must check_state $TESTPOOL "" "DEGRADED"
+
+       # 5. Clear the fault on both devices
+       log_must zinject -c all
+       log_must zpool clear $TESTPOOL $FAULT_DEV1
+       log_must zpool clear $TESTPOOL $FAULT_DEV2
+
+       # Cleanup
+       cleanup
+done
+
+log_pass "ZED successfully handles multiple faulted devices"
diff --git a/tests/zfs-tests/tests/functional/fault/cleanup.ksh b/tests/zfs-tests/tests/functional/fault/cleanup.ksh

index 82e379b0d085f0a3b9b76e0d2de6e17bd9a3d4f8..9d354f30e70931c71ee88bb5d50b1f980950d1af 100755 (executable)
--- a/tests/zfs-tests/tests/functional/fault/cleanup.ksh
+++ b/tests/zfs-tests/tests/functional/fault/cleanup.ksh
@@ -33,14 +33,4 @@ cleanup_devices $DISKS
  zed_stop
  zed_cleanup
  
-SDDEVICE=$(get_debug_device)
-
-# Offline disk and remove scsi_debug module
-if is_linux; then
-       if [ -n "$SDDEVICE" ]; then
-               remove_disk $SDDEVICE
-       fi
-       modprobe -r scsi_debug
-fi
-
  log_pass
diff --git a/tests/zfs-tests/tests/functional/fault/setup.ksh b/tests/zfs-tests/tests/functional/fault/setup.ksh

index 3d54d4f217546e8a61303961fb661e9444a603d5..3d3cbc9e568c34c862310e53f53c31fa11227c1d 100755 (executable)
--- a/tests/zfs-tests/tests/functional/fault/setup.ksh
+++ b/tests/zfs-tests/tests/functional/fault/setup.ksh
@@ -31,8 +31,4 @@ verify_runnable "global"
  zed_setup
  zed_start
  
-# Create a scsi_debug device to be used with auto-online (if using loop devices)
-# and auto-replace regardless of other devices
-load_scsi_debug $SDSIZE $SDHOSTS $SDTGTS $SDLUNS
-
  log_pass
author	LOLi <loli10K@users.noreply.github.com>
	Sat, 9 Dec 2017 00:58:41 +0000 (01:58 +0100)
committer	Brian Behlendorf <behlendorf1@llnl.gov>
	Sat, 9 Dec 2017 00:58:41 +0000 (16:58 -0800)
cmd/zed/agents/zfs_agents.c		patch \| blob \| blame \| history
cmd/zed/agents/zfs_agents.h		patch \| blob \| blame \| history
cmd/zed/agents/zfs_diagnosis.c		patch \| blob \| blame \| history
cmd/zed/agents/zfs_mod.c		patch \| blob \| blame \| history
cmd/zed/agents/zfs_retire.c		patch \| blob \| blame \| history
tests/runfiles/linux.run		patch \| blob \| blame \| history
tests/zfs-tests/include/blkdev.shlib		patch \| blob \| blame \| history
tests/zfs-tests/include/libtest.shlib		patch \| blob \| blame \| history
tests/zfs-tests/tests/functional/cli_root/zpool_reopen/cleanup.ksh		patch \| blob \| blame \| history
tests/zfs-tests/tests/functional/cli_root/zpool_reopen/setup.ksh		patch \| blob \| blame \| history
tests/zfs-tests/tests/functional/fault/Makefile.am		patch \| blob \| blame \| history
tests/zfs-tests/tests/functional/fault/auto_online_001_pos.ksh		patch \| blob \| blame \| history
tests/zfs-tests/tests/functional/fault/auto_replace_001_pos.ksh		patch \| blob \| blame \| history
tests/zfs-tests/tests/functional/fault/auto_spare_001_pos.ksh		patch \| blob \| blame \| history
tests/zfs-tests/tests/functional/fault/auto_spare_002_pos.ksh		patch \| blob \| blame \| history
tests/zfs-tests/tests/functional/fault/auto_spare_ashift.ksh	[new file with mode: 0755]	patch \| blob
tests/zfs-tests/tests/functional/fault/auto_spare_multiple.ksh	[new file with mode: 0755]	patch \| blob
tests/zfs-tests/tests/functional/fault/cleanup.ksh		patch \| blob \| blame \| history
tests/zfs-tests/tests/functional/fault/setup.ksh		patch \| blob \| blame \| history