/*
* Expiration time in milliseconds. This value has two meanings. First it is
* used to determine when the spa_deadman() logic should fire. By default the
- * spa_deadman() will fire if spa_sync() has not completed in 1000 seconds.
+ * spa_deadman() will fire if spa_sync() has not completed in 600 seconds.
* Secondly, the value determines if an I/O is considered "hung". Any I/O that
* has not completed in zfs_deadman_synctime_ms is considered "hung" resulting
- * in a system panic.
+ * in one of three behaviors controlled by zfs_deadman_failmode.
*/
-unsigned long zfs_deadman_synctime_ms = 1000000ULL;
+unsigned long zfs_deadman_synctime_ms = 600000ULL;
+
+/*
+ * This value controls the maximum amount of time zio_wait() will block for an
+ * outstanding IO. By default this is 300 seconds at which point the "hung"
+ * behavior will be applied as described for zfs_deadman_synctime_ms.
+ */
+unsigned long zfs_deadman_ziotime_ms = 300000ULL;
/*
* Check time in milliseconds. This defines the frequency at which we check
* for hung I/O.
*/
-unsigned long zfs_deadman_checktime_ms = 5000ULL;
+unsigned long zfs_deadman_checktime_ms = 60000ULL;
/*
* By default the deadman is enabled.
*/
int zfs_deadman_enabled = 1;
+/*
+ * Controls the behavior of the deadman when it detects a "hung" I/O.
+ * Valid values are zfs_deadman_failmode=<wait|continue|panic>.
+ *
+ * wait - Wait for the "hung" I/O (default)
+ * continue - Attempt to recover from a "hung" I/O
+ * panic - Panic the system
+ */
+char *zfs_deadman_failmode = "wait";
+
/*
* The worst case is single-sector max-parity RAID-Z blocks, in which
* case the space requirement is exactly (VDEV_RAIDZ_MAXPARITY + 1)
(gethrtime() - spa->spa_sync_starttime) / NANOSEC,
++spa->spa_deadman_calls);
if (zfs_deadman_enabled)
- vdev_deadman(spa->spa_root_vdev);
+ vdev_deadman(spa->spa_root_vdev, FTAG);
spa->spa_deadman_tqid = taskq_dispatch_delay(system_delay_taskq,
spa_deadman, spa, TQ_SLEEP, ddi_get_lbolt() +
spa->spa_proc_state = SPA_PROC_NONE;
spa->spa_deadman_synctime = MSEC2NSEC(zfs_deadman_synctime_ms);
+ spa->spa_deadman_ziotime = MSEC2NSEC(zfs_deadman_ziotime_ms);
+ spa_set_deadman_failmode(spa, zfs_deadman_failmode);
refcount_create(&spa->spa_refcount);
spa_config_lock_init(spa);
* Return the failure mode that has been set to this pool. The default
* behavior will be to block all I/Os when a complete failure occurs.
*/
-uint8_t
+uint64_t
spa_get_failmode(spa_t *spa)
{
return (spa->spa_failmode);
return (spa->spa_deadman_synctime);
}
+uint64_t
+spa_deadman_ziotime(spa_t *spa)
+{
+ return (spa->spa_deadman_ziotime);
+}
+
+uint64_t
+spa_get_deadman_failmode(spa_t *spa)
+{
+ return (spa->spa_deadman_failmode);
+}
+
+void
+spa_set_deadman_failmode(spa_t *spa, const char *failmode)
+{
+ if (strcmp(failmode, "wait") == 0)
+ spa->spa_deadman_failmode = ZIO_FAILURE_MODE_WAIT;
+ else if (strcmp(failmode, "continue") == 0)
+ spa->spa_deadman_failmode = ZIO_FAILURE_MODE_CONTINUE;
+ else if (strcmp(failmode, "panic") == 0)
+ spa->spa_deadman_failmode = ZIO_FAILURE_MODE_PANIC;
+ else
+ spa->spa_deadman_failmode = ZIO_FAILURE_MODE_WAIT;
+}
+
uint64_t
dva_get_dsize_sync(spa_t *spa, const dva_t *dva)
{
}
#if defined(_KERNEL) && defined(HAVE_SPL)
+
+#include <linux/mod_compat.h>
+
+static int
+param_set_deadman_failmode(const char *val, zfs_kernel_param_t *kp)
+{
+ spa_t *spa = NULL;
+ char *p;
+
+ if (val == NULL)
+ return (SET_ERROR(-EINVAL));
+
+ if ((p = strchr(val, '\n')) != NULL)
+ *p = '\0';
+
+ if (strcmp(val, "wait") != 0 && strcmp(val, "continue") != 0 &&
+ strcmp(val, "panic"))
+ return (SET_ERROR(-EINVAL));
+
+ mutex_enter(&spa_namespace_lock);
+ while ((spa = spa_next(spa)) != NULL)
+ spa_set_deadman_failmode(spa, val);
+ mutex_exit(&spa_namespace_lock);
+
+ return (param_set_charp(val, kp));
+}
+
/* Namespace manipulation */
EXPORT_SYMBOL(spa_lookup);
EXPORT_SYMBOL(spa_add);
"Set to ignore IO errors during free and permanently leak the space");
module_param(zfs_deadman_synctime_ms, ulong, 0644);
-MODULE_PARM_DESC(zfs_deadman_synctime_ms, "Expiration time in milliseconds");
+MODULE_PARM_DESC(zfs_deadman_synctime_ms,
+ "Pool sync expiration time in milliseconds");
+
+module_param(zfs_deadman_ziotime_ms, ulong, 0644);
+MODULE_PARM_DESC(zfs_deadman_ziotime_ms,
+ "IO expiration time in milliseconds");
module_param(zfs_deadman_checktime_ms, ulong, 0644);
MODULE_PARM_DESC(zfs_deadman_checktime_ms,
module_param(zfs_deadman_enabled, int, 0644);
MODULE_PARM_DESC(zfs_deadman_enabled, "Enable deadman timer");
+module_param_call(zfs_deadman_failmode, param_set_deadman_failmode,
+ param_get_charp, &zfs_deadman_failmode, 0644);
+MODULE_PARM_DESC(zfs_deadman_failmode, "Failmode for deadman timer");
+
module_param(spa_asize_inflation, int, 0644);
MODULE_PARM_DESC(spa_asize_inflation,
"SPA size estimate multiplication factor");