1 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2 From: Tony Hutter <hutter2@llnl.gov>
3 Date: Fri, 23 Feb 2018 11:38:05 -0800
4 Subject: [PATCH] Add scrub after resilver zed script
6 Content-Type: text/plain; charset=UTF-8
7 Content-Transfer-Encoding: 8bit
9 * Add a zed script to kick off a scrub after a resilver. The script is
12 * Add a optional $PATH (-P) option to zed to allow it to use a custom
13 $PATH for its zedlets. This is needed when you're running zed under
14 the ZTS in a local workspace.
16 * Update test scripts to not copy in all-debug.sh and all-syslog.sh by
17 default. They can be optionally copied in as part of zed_setup().
18 These scripts slow down zed considerably under heavy events loads and
19 can cause events to be dropped or their delivery delayed. This was
20 causing some sporadic failures in the 'fault' tests.
22 Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
23 Reviewed-by: Richard Laager <rlaager@wiktel.com>
24 Signed-off-by: Tony Hutter <hutter2@llnl.gov>
27 (cherry picked from commit 99920d823e8e1510a0ad133c985bd2aa11a02834)
28 Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
30 cmd/zed/Makefile.am | 6 +-
31 tests/zfs-tests/tests/functional/fault/Makefile.am | 3 +-
32 cmd/zed/zed_conf.h | 1 +
33 man/man8/zed.8.in | 10 +++-
34 cmd/zed/zed_conf.c | 7 ++-
35 cmd/zed/zed_event.c | 34 +++++++++--
36 cmd/zed/zed.d/resilver_finish-start-scrub.sh | 17 ++++++
37 cmd/zed/zed.d/zed.rc | 3 +
38 tests/runfiles/linux.run | 2 +-
39 tests/zfs-tests/include/commands.cfg | 1 +
40 tests/zfs-tests/include/libtest.shlib | 50 ++++++++++++++---
41 .../zfs-tests/tests/functional/events/cleanup.ksh | 2 +-
42 tests/zfs-tests/tests/functional/events/setup.ksh | 2 +-
43 tests/zfs-tests/tests/functional/fault/cleanup.ksh | 2 +-
44 .../functional/fault/scrub_after_resilver.ksh | 65 ++++++++++++++++++++++
45 tests/zfs-tests/tests/functional/fault/setup.ksh | 2 +-
46 16 files changed, 186 insertions(+), 21 deletions(-)
47 create mode 100755 cmd/zed/zed.d/resilver_finish-start-scrub.sh
48 create mode 100755 tests/zfs-tests/tests/functional/fault/scrub_after_resilver.ksh
50 diff --git a/cmd/zed/Makefile.am b/cmd/zed/Makefile.am
51 index 97733a512..ee44898cd 100644
52 --- a/cmd/zed/Makefile.am
53 +++ b/cmd/zed/Makefile.am
54 @@ -69,7 +69,8 @@ dist_zedexec_SCRIPTS = \
55 zed.d/statechange-notify.sh \
56 zed.d/vdev_clear-led.sh \
57 zed.d/vdev_attach-led.sh \
58 - zed.d/pool_import-led.sh
59 + zed.d/pool_import-led.sh \
60 + zed.d/resilver_finish-start-scrub.sh
64 @@ -80,7 +81,8 @@ zedconfdefaults = \
65 statechange-notify.sh \
69 + pool_import-led.sh \
70 + resilver_finish-start-scrub.sh
73 $(MKDIR_P) "$(DESTDIR)$(zedconfdir)"
74 diff --git a/tests/zfs-tests/tests/functional/fault/Makefile.am b/tests/zfs-tests/tests/functional/fault/Makefile.am
75 index eeff31261..abe28501d 100644
76 --- a/tests/zfs-tests/tests/functional/fault/Makefile.am
77 +++ b/tests/zfs-tests/tests/functional/fault/Makefile.am
78 @@ -4,4 +4,5 @@ dist_pkgdata_SCRIPTS = \
81 auto_online_001_pos.ksh \
82 - auto_replace_001_pos.ksh
83 + auto_replace_001_pos.ksh \
84 + scrub_after_resilver.ksh
85 diff --git a/cmd/zed/zed_conf.h b/cmd/zed/zed_conf.h
86 index 2bc634134..7d6b63b1d 100644
87 --- a/cmd/zed/zed_conf.h
88 +++ b/cmd/zed/zed_conf.h
89 @@ -37,6 +37,7 @@ struct zed_conf {
90 int state_fd; /* fd to state file */
91 libzfs_handle_t *zfs_hdl; /* handle to libzfs */
92 int zevent_fd; /* fd for access to zevents */
93 + char *path; /* custom $PATH for zedlets to use */
96 struct zed_conf *zed_conf_create(void);
97 diff --git a/man/man8/zed.8.in b/man/man8/zed.8.in
98 index 2ab088d98..645e91795 100644
99 --- a/man/man8/zed.8.in
100 +++ b/man/man8/zed.8.in
101 @@ -27,6 +27,7 @@ ZED \- ZFS Event Daemon
104 [\fB\-p\fR \fIpidfile\fR]
105 +[\fB\-P\fR \fIpath\fR]
106 [\fB\-s\fR \fIstatefile\fR]
109 @@ -78,9 +79,16 @@ Read the enabled ZEDLETs from the specified directory.
111 Write the daemon's process ID to the specified file.
114 +Custom $PATH for zedlets to use. Normally zedlets run in a locked-down
115 +environment, with hardcoded paths to the ZFS commands ($ZFS, $ZPOOL, $ZED, ...),
116 +and a hardcoded $PATH. This is done for security reasons. However, the
117 +ZFS test suite uses a custom PATH for its ZFS commands, and passes it to zed
118 +with -P. In short, -P is only to be used by the ZFS test suite; never use
122 Write the daemon's state to the specified file.
126 A zevent is comprised of a list of nvpairs (name/value pairs). Each zevent
127 diff --git a/cmd/zed/zed_conf.c b/cmd/zed/zed_conf.c
128 index 5b27f1e4f..86671369c 100644
129 --- a/cmd/zed/zed_conf.c
130 +++ b/cmd/zed/zed_conf.c
131 @@ -155,6 +155,8 @@ _zed_conf_display_help(const char *prog, int got_err)
132 "Run daemon in the foreground.");
133 fprintf(fp, "%*c%*s %s\n", w1, 0x20, -w2, "-M",
134 "Lock all pages in memory.");
135 + fprintf(fp, "%*c%*s %s\n", w1, 0x20, -w2, "-P",
136 + "$PATH for ZED to use (only used by ZTS).");
137 fprintf(fp, "%*c%*s %s\n", w1, 0x20, -w2, "-Z",
140 @@ -247,7 +249,7 @@ _zed_conf_parse_path(char **resultp, const char *path)
142 zed_conf_parse_opts(struct zed_conf *zcp, int argc, char **argv)
144 - const char * const opts = ":hLVc:d:p:s:vfFMZ";
145 + const char * const opts = ":hLVc:d:p:P:s:vfFMZ";
148 if (!zcp || !argv || !argv[0])
149 @@ -275,6 +277,9 @@ zed_conf_parse_opts(struct zed_conf *zcp, int argc, char **argv)
151 _zed_conf_parse_path(&zcp->pid_file, optarg);
154 + _zed_conf_parse_path(&zcp->path, optarg);
157 _zed_conf_parse_path(&zcp->state_file, optarg);
159 diff --git a/cmd/zed/zed_event.c b/cmd/zed/zed_event.c
160 index 390235019..2a7ff16fd 100644
161 --- a/cmd/zed/zed_event.c
162 +++ b/cmd/zed/zed_event.c
163 @@ -733,12 +733,14 @@ _zed_event_add_nvpair(uint64_t eid, zed_strings_t *zsp, nvpair_t *nvp)
166 * Restrict various environment variables to safe and sane values
167 - * when constructing the environment for the child process.
168 + * when constructing the environment for the child process, unless
169 + * we're running with a custom $PATH (like under the ZFS test suite).
171 * Reference: Secure Programming Cookbook by Viega & Messier, Section 1.1.
174 -_zed_event_add_env_restrict(uint64_t eid, zed_strings_t *zsp)
175 +_zed_event_add_env_restrict(uint64_t eid, zed_strings_t *zsp,
178 const char *env_restrict[][2] = {
180 @@ -753,11 +755,35 @@ _zed_event_add_env_restrict(uint64_t eid, zed_strings_t *zsp)
181 { "ZFS_RELEASE", ZFS_META_RELEASE },
186 + * If we have a custom $PATH, use the default ZFS binary locations
187 + * instead of the hard-coded ones.
189 + const char *env_path[][2] = {
190 + { "IFS", " \t\n" },
191 + { "PATH", NULL }, /* $PATH copied in later on */
195 + { "ZINJECT", "zinject" },
196 + { "ZPOOL", "zpool" },
197 + { "ZFS_ALIAS", ZFS_META_ALIAS },
198 + { "ZFS_VERSION", ZFS_META_VERSION },
199 + { "ZFS_RELEASE", ZFS_META_RELEASE },
202 const char *(*pa)[2];
206 - for (pa = env_restrict; *(*pa); pa++) {
207 + pa = path != NULL ? env_path : env_restrict;
209 + for (; *(*pa); pa++) {
210 + /* Use our custom $PATH if we have one */
211 + if (path != NULL && strcmp((*pa)[0], "PATH") == 0)
214 _zed_event_add_var(eid, zsp, NULL, (*pa)[0], "%s", (*pa)[1]);
217 @@ -902,7 +928,7 @@ zed_event_service(struct zed_conf *zcp)
218 while ((nvp = nvlist_next_nvpair(nvl, nvp)))
219 _zed_event_add_nvpair(eid, zsp, nvp);
221 - _zed_event_add_env_restrict(eid, zsp);
222 + _zed_event_add_env_restrict(eid, zsp, zcp->path);
223 _zed_event_add_env_preserve(eid, zsp);
225 _zed_event_add_var(eid, zsp, ZED_VAR_PREFIX, "PID",
226 diff --git a/cmd/zed/zed.d/resilver_finish-start-scrub.sh b/cmd/zed/zed.d/resilver_finish-start-scrub.sh
228 index 000000000..6f9c0b309
230 +++ b/cmd/zed/zed.d/resilver_finish-start-scrub.sh
233 +# resilver_finish-start-scrub.sh
234 +# Run a scrub after a resilver
238 +# 2: Script wasn't enabled in zed.rc
239 +[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
240 +. "${ZED_ZEDLET_DIR}/zed-functions.sh"
242 +[ "${ZED_SCRUB_AFTER_RESILVER}" = "1" ] || exit 2
243 +[ -n "${ZEVENT_POOL}" ] || exit 1
244 +[ -n "${ZEVENT_SUBCLASS}" ] || exit 1
245 +zed_check_cmd "${ZPOOL}" || exit 1
247 +zed_log_msg "Starting scrub after resilver on ${ZEVENT_POOL}"
248 +"${ZPOOL}" scrub "${ZEVENT_POOL}"
249 diff --git a/cmd/zed/zed.d/zed.rc b/cmd/zed/zed.d/zed.rc
250 index a1dd33704..8b0e476d5 100644
251 --- a/cmd/zed/zed.d/zed.rc
252 +++ b/cmd/zed/zed.d/zed.rc
255 ZED_USE_ENCLOSURE_LEDS=1
258 +# Run a scrub after every resilver
259 +#ZED_SCRUB_AFTER_RESILVER=1
262 # The syslog priority (e.g., specified as a "facility.level" pair).
263 diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run
264 index 8be3e1c62..89c923db1 100644
265 --- a/tests/runfiles/linux.run
266 +++ b/tests/runfiles/linux.run
267 @@ -421,7 +421,7 @@ tests = ['exec_001_pos', 'exec_002_neg']
268 tags = ['functional', 'exec']
270 [tests/functional/fault]
271 -tests = ['auto_online_001_pos', 'auto_replace_001_pos']
272 +tests = ['auto_online_001_pos', 'auto_replace_001_pos', 'scrub_after_resilver']
273 tags = ['functional', 'fault']
275 [tests/functional/features/async_destroy]
276 diff --git a/tests/zfs-tests/include/commands.cfg b/tests/zfs-tests/include/commands.cfg
277 index f6fd239de..936e54c1a 100644
278 --- a/tests/zfs-tests/include/commands.cfg
279 +++ b/tests/zfs-tests/include/commands.cfg
280 @@ -83,6 +83,7 @@ export SYSTEM_FILES='arp
288 diff --git a/tests/zfs-tests/include/libtest.shlib b/tests/zfs-tests/include/libtest.shlib
289 index 86f172a6d..48fb5e7c5 100644
290 --- a/tests/zfs-tests/include/libtest.shlib
291 +++ b/tests/zfs-tests/include/libtest.shlib
292 @@ -3339,9 +3339,32 @@ function wait_replacing #pool
297 +# Wait for a pool to be scrubbed
300 +# $2 number of seconds to wait (optional)
302 +# Returns true when pool has been scrubbed, or false if there's a timeout or if
303 +# no scrub was done.
305 +function wait_scrubbed
307 + typeset pool=${1:-$TESTPOOL}
308 + typeset iter=${2:-10}
309 + for i in {1..$iter} ; do
310 + if is_pool_scrubbed $pool ; then
319 # Setup custom environment for the ZED.
321 +# $@ Optional list of zedlets to run under zed.
325 @@ -3359,6 +3382,7 @@ function zed_setup
326 if [[ -e $VDEVID_CONF_ETC ]]; then
327 log_fail "Must not have $VDEVID_CONF_ETC file present on system"
331 # Create a symlink for /etc/zfs/vdev_id.conf file.
332 log_must ln -s $VDEVID_CONF $VDEVID_CONF_ETC
333 @@ -3368,32 +3392,44 @@ function zed_setup
334 log_must cp ${ZEDLET_ETC_DIR}/zed.rc $ZEDLET_DIR
335 log_must cp ${ZEDLET_ETC_DIR}/zed-functions.sh $ZEDLET_DIR
337 + # Scripts must only be user writable.
338 + if [[ -n "$EXTRA_ZEDLETS" ]] ; then
339 + saved_umask=$(umask)
340 + log_must umask 0022
341 + for i in $EXTRA_ZEDLETS ; do
342 + log_must cp ${ZEDLET_LIBEXEC_DIR}/$i $ZEDLET_DIR
344 + log_must umask $saved_umask
347 # Customize the zed.rc file to enable the full debug log.
348 log_must sed -i '/\#ZED_DEBUG_LOG=.*/d' $ZEDLET_DIR/zed.rc
349 echo "ZED_DEBUG_LOG=$ZED_DEBUG_LOG" >>$ZEDLET_DIR/zed.rc
351 - # Scripts must only be user writable.
352 - saved_umask=$(umask)
353 - log_must umask 0022
354 - log_must cp ${ZEDLET_LIBEXEC_DIR}/all-syslog.sh $ZEDLET_DIR
355 - log_must cp ${ZEDLET_LIBEXEC_DIR}/all-debug.sh $ZEDLET_DIR
356 - log_must umask $saved_umask
360 # Cleanup custom ZED environment.
362 +# $@ Optional list of zedlets to remove from our test zed.d directory.
370 log_must rm -f ${ZEDLET_DIR}/zed.rc
371 log_must rm -f ${ZEDLET_DIR}/zed-functions.sh
372 log_must rm -f ${ZEDLET_DIR}/all-syslog.sh
373 log_must rm -f ${ZEDLET_DIR}/all-debug.sh
374 log_must rm -f ${ZEDLET_DIR}/state
376 + if [[ -n "$EXTRA_ZEDLETS" ]] ; then
377 + for i in $EXTRA_ZEDLETS ; do
378 + log_must rm -f ${ZEDLET_DIR}/$i
381 log_must rm -f $ZED_LOG
382 log_must rm -f $ZED_DEBUG_LOG
383 log_must rm -f $VDEVID_CONF_ETC
384 @@ -3425,7 +3461,7 @@ function zed_start
385 # run ZED in the background and redirect foreground logging
386 # output to $ZED_LOG.
387 log_must truncate -s 0 $ZED_DEBUG_LOG
388 - log_must eval "zed -vF -d $ZEDLET_DIR -p $ZEDLET_DIR/zed.pid" \
389 + log_must eval "zed -vF -d $ZEDLET_DIR -p $ZEDLET_DIR/zed.pid -P $PATH" \
390 "-s $ZEDLET_DIR/state 2>$ZED_LOG &"
393 diff --git a/tests/zfs-tests/tests/functional/events/cleanup.ksh b/tests/zfs-tests/tests/functional/events/cleanup.ksh
394 index bc536e260..4905342b7 100755
395 --- a/tests/zfs-tests/tests/functional/events/cleanup.ksh
396 +++ b/tests/zfs-tests/tests/functional/events/cleanup.ksh
399 . $STF_SUITE/include/libtest.shlib
402 +zed_cleanup all-debug.sh all-syslog.sh
405 diff --git a/tests/zfs-tests/tests/functional/events/setup.ksh b/tests/zfs-tests/tests/functional/events/setup.ksh
406 index 7113c1f39..2f81d16b1 100755
407 --- a/tests/zfs-tests/tests/functional/events/setup.ksh
408 +++ b/tests/zfs-tests/tests/functional/events/setup.ksh
414 +zed_setup all-debug.sh all-syslog.sh
417 diff --git a/tests/zfs-tests/tests/functional/fault/cleanup.ksh b/tests/zfs-tests/tests/functional/fault/cleanup.ksh
418 index f39f05d6f..d3de742b3 100755
419 --- a/tests/zfs-tests/tests/functional/fault/cleanup.ksh
420 +++ b/tests/zfs-tests/tests/functional/fault/cleanup.ksh
421 @@ -31,7 +31,7 @@ verify_runnable "global"
422 cleanup_devices $DISKS
426 +zed_cleanup resilver_finish-start-scrub.sh
428 SD=$(lsscsi | nawk '/scsi_debug/ {print $6; exit}')
429 SDDEVICE=$(echo $SD | nawk -F / '{print $3}')
430 diff --git a/tests/zfs-tests/tests/functional/fault/scrub_after_resilver.ksh b/tests/zfs-tests/tests/functional/fault/scrub_after_resilver.ksh
432 index 000000000..558cb065f
434 +++ b/tests/zfs-tests/tests/functional/fault/scrub_after_resilver.ksh
438 +# This file and its contents are supplied under the terms of the
439 +# Common Development and Distribution License ("CDDL"), version 1.0.
440 +# You may only use this file in accordance with the terms of version
443 +# A full copy of the text of the CDDL should have accompanied this
444 +# source. A copy of the CDDL is also available via the Internet at
445 +# http://www.illumos.org/license/CDDL.
449 +# Copyright (c) 2018 by Lawrence Livermore National Security, LLC.
450 +# All rights reserved.
453 +. $STF_SUITE/include/libtest.shlib
454 +. $STF_SUITE/tests/functional/fault/fault.cfg
458 +# Test the scrub after resilver zedlet
461 +# 1. Create a mirrored pool
463 +# 3. Replace the disk, starting a resilver
464 +# 4. Verify that a scrub happens after the resilver finishes
467 +log_assert "Testing the scrub after resilver zedlet"
470 +zedrc_backup="$(mktemp)"
471 +log_must cp $ZEDLET_DIR/zed.rc $zedrc_backup
473 +# Enable ZED_SCRUB_AFTER_RESILVER
474 +eval "sed -i 's/\#ZED_SCRUB_AFTER_RESILVER/ZED_SCRUB_AFTER_RESILVER/g' $ZEDLET_DIR/zed.rc"
478 + # Restore our zed.rc
479 + log_must mv $zedrc_backup $ZEDLET_DIR/zed.rc
480 + default_cleanup_noexit
485 +verify_disk_count "$DISKS" 3
486 +default_mirror_setup_noexit $DISK1 $DISK2
488 +log_must zpool offline -f $TESTPOOL $DISK1
490 +# Write to our degraded pool so we have some data to resilver
491 +log_must mkfile 16M $TESTDIR/file1
493 +# Replace the failed disks, forcing a resilver
494 +log_must zpool replace $TESTPOOL $DISK1 $DISK3
496 +# Wait for the resilver to finish, and then the subsequent scrub to finish.
497 +# Waiting for the scrub has the effect of waiting for both. Timeout after 10
498 +# seconds if nothing is happening.
499 +log_must wait_scrubbed $TESTPOOL 10
500 +log_pass "Successfully ran the scrub after resilver zedlet"
501 diff --git a/tests/zfs-tests/tests/functional/fault/setup.ksh b/tests/zfs-tests/tests/functional/fault/setup.ksh
502 index 3d54d4f21..484bc4587 100755
503 --- a/tests/zfs-tests/tests/functional/fault/setup.ksh
504 +++ b/tests/zfs-tests/tests/functional/fault/setup.ksh
507 verify_runnable "global"
510 +zed_setup resilver_finish-start-scrub.sh
513 # Create a scsi_debug device to be used with auto-online (if using loop devices)