]> git.proxmox.com Git - mirror_zfs.git/blobdiff - scripts/zloop.sh
zloop.sh should call ZDB with pool name
[mirror_zfs.git] / scripts / zloop.sh
index 26ad1780c3f4dc4eb658351dc68798147289b471..3d9baaf0e2b8acb60934bc20a19bed5594e50e16 100755 (executable)
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/usr/bin/env bash
 
 #
 # CDDL HEADER START
 # Copyright (C) 2016 Lawrence Livermore National Security, LLC.
 #
 
-basedir="$(dirname $0)"
-
+BASE_DIR=$(dirname "$0")
 SCRIPT_COMMON=common.sh
-if [ -f "${basedir}/${SCRIPT_COMMON}" ]; then
-       . "${basedir}/${SCRIPT_COMMON}"
+if [ -f "${BASE_DIR}/${SCRIPT_COMMON}" ]; then
+       . "${BASE_DIR}/${SCRIPT_COMMON}"
 else
        echo "Missing helper script ${SCRIPT_COMMON}" && exit 1
 fi
 
+# shellcheck disable=SC2034
 PROG=zloop.sh
+GDB=${GDB:-gdb}
 
 DEFAULTWORKDIR=/var/tmp
 DEFAULTCOREDIR=/var/tmp/zloop
 
 function usage
 {
-       echo -e "\n$0 [-t <timeout>] [-c <dump directory>]" \
+       echo -e "\n$0 [-t <timeout>] [ -s <vdev size> ] [-c <dump directory>]" \
            "[ -- [extra ztest parameters]]\n" \
            "\n" \
            "  This script runs ztest repeatedly with randomized arguments.\n" \
@@ -48,42 +49,103 @@ function usage
            "  Options:\n" \
            "    -t  Total time to loop for, in seconds. If not provided,\n" \
            "        zloop runs forever.\n" \
+           "    -s  Size of vdev devices.\n" \
            "    -f  Specify working directory for ztest vdev files.\n" \
            "    -c  Specify a core dump directory to use.\n" \
+           "    -m  Max number of core dumps to allow before exiting.\n" \
+           "    -l  Create 'ztest.core.N' symlink to core directory.\n" \
            "    -h  Print this help message.\n" \
            "" >&2
 }
 
 function or_die
 {
+       # shellcheck disable=SC2068
        $@
+       # shellcheck disable=SC2181
        if [[ $? -ne 0 ]]; then
+               # shellcheck disable=SC2145
                echo "Command failed: $@"
                exit 1
        fi
 }
 
+case $(uname) in
+FreeBSD)
+       coreglob="z*.core"
+       ;;
+Linux)
+       # core file helpers
+       origcorepattern="$(cat /proc/sys/kernel/core_pattern)"
+       coreglob="$(grep -E -o '^([^|%[:space:]]*)' /proc/sys/kernel/core_pattern)*"
+
+       if [[ $coreglob = "*" ]]; then
+               echo "Setting core file pattern..."
+               echo "core" > /proc/sys/kernel/core_pattern
+               coreglob="$(grep -E -o '^([^|%[:space:]]*)' \
+                   /proc/sys/kernel/core_pattern)*"
+       fi
+       ;;
+*)
+       exit 1
+       ;;
+esac
+
+function core_file
+{
+       # shellcheck disable=SC2012 disable=2086
+        printf "%s" "$(ls -tr1 $coreglob 2> /dev/null | head -1)"
+}
+
+function core_prog
+{
+       prog=$ZTEST
+       core_id=$($GDB --batch -c "$1" | grep "Core was generated by" | \
+           tr  \' ' ')
+       # shellcheck disable=SC2076
+       if [[ "$core_id" =~ "zdb "  ]]; then
+               prog=$ZDB
+       fi
+       printf "%s" "$prog"
+}
+
 function store_core
 {
-       if [[ $ztrc -ne 0 ]] || [[ -f core ]]; then
+       core="$(core_file)"
+       if [[ $ztrc -ne 0 ]] || [[ -f "$core" ]]; then
+               df -h "$workdir" >>ztest.out
                coreid=$(date "+zloop-%y%m%d-%H%M%S")
-               foundcrashes=$(($foundcrashes + 1))
+               foundcrashes=$((foundcrashes + 1))
+
+               # zdb debugging
+               zdbcmd="$ZDB -U "$workdir/zpool.cache" -dddMmDDG ztest"
+               zdbdebug=$($zdbcmd 2>&1)
+               echo -e "$zdbcmd\n" >>ztest.zdb
+               echo "$zdbdebug" >>ztest.zdb
 
                dest=$coredir/$coreid
-               or_die mkdir -p $dest
-               or_die mkdir -p $dest/vdev
+               or_die mkdir -p "$dest"
+               or_die mkdir -p "$dest/vdev"
+
+               if [[ $symlink -ne 0 ]]; then
+                       or_die ln -sf "$dest" ztest.core.$foundcrashes
+               fi
 
                echo "*** ztest crash found - moving logs to $dest"
 
-               or_die mv ztest.history $dest/
-               or_die mv ztest.ddt $dest/
-               or_die mv ztest.out $dest/
-               or_die mv $workdir/ztest* $dest/vdev/
-               or_die mv $workdir/zpool.cache $dest/vdev/
+               or_die mv ztest.history "$dest/"
+               or_die mv ztest.zdb "$dest/"
+               or_die mv ztest.out "$dest/"
+               or_die mv "$workdir/ztest*" "$dest/vdev/"
+
+               if [[ -e "$workdir/zpool.cache" ]]; then
+                       or_die mv "$workdir/zpool.cache" "$dest/vdev/"
+               fi
 
                # check for core
-               if [[ -f core ]]; then
-                       corestatus=$($GDB --batch --quiet \
+               if [[ -f "$core" ]]; then
+                       coreprog=$(core_prog "$core")
+                       coredebug=$($GDB --batch --quiet \
                            -ex "set print thread-events off" \
                            -ex "printf \"*\n* Backtrace \n*\n\"" \
                            -ex "bt" \
@@ -95,32 +157,45 @@ function store_core
                            -ex "thread apply all bt" \
                            -ex "printf \"*\n* Backtraces (full) \n*\n\"" \
                            -ex "thread apply all bt full" \
-                           -ex "quit" $ZTEST core | grep -v "New LWP")
+                           -ex "quit" "$coreprog" "$core" 2>&1 | \
+                           grep -v "New LWP")
 
                        # Dump core + logs to stored directory
-                       echo "$corestatus" >>$dest/status
-                       or_die mv core $dest/
+                       echo "$coredebug" >>"$dest/ztest.gdb"
+                       or_die mv "$core" "$dest/"
 
                        # Record info in cores logfile
-                       echo "*** core @ $coredir/$coreid/core:" | \
+                       echo "*** core @ $coredir/$coreid/$core:" | \
                            tee -a ztest.cores
-                       echo "$corestatus" | tee -a ztest.cores
-                       echo "" | tee -a ztest.cores
                fi
-               echo "continuing..."
+
+               if [[ $coremax -gt 0 ]] &&
+                  [[ $foundcrashes -ge $coremax ]]; then
+                       echo "exiting... max $coremax allowed cores"
+                       exit 1
+               else
+                       echo "continuing..."
+               fi
        fi
 }
 
 # parse arguments
 # expected format: zloop [-t timeout] [-c coredir] [-- extra ztest args]
 coredir=$DEFAULTCOREDIR
-workdir=$DEFAULTWORKDIR
+basedir=$DEFAULTWORKDIR
+rundir="zloop-run"
 timeout=0
-while getopts ":ht:c:f:" opt; do
+size="512m"
+coremax=0
+symlink=0
+while getopts ":ht:m:s:c:f:l" opt; do
        case $opt in
                t ) [[ $OPTARG -gt 0 ]] && timeout=$OPTARG ;;
+               m ) [[ $OPTARG -gt 0 ]] && coremax=$OPTARG ;;
+               s ) [[ $OPTARG ]] && size=$OPTARG ;;
                c ) [[ $OPTARG ]] && coredir=$OPTARG ;;
-               f ) [[ $OPTARG ]] && workdir=$(readlink -f $OPTARG) ;;
+               f ) [[ $OPTARG ]] && basedir=$(readlink -f "$OPTARG") ;;
+               l ) symlink=1 ;;
                h ) usage
                    exit 2
                    ;;
@@ -134,15 +209,18 @@ shift $((OPTIND - 1))
 
 # enable core dumps
 ulimit -c unlimited
+export ASAN_OPTIONS=abort_on_error=1:disable_coredump=0
 
-if [[ -f core ]]; then
-       echo "There's a core dump here you might want to look at first."
+if [[ -f "$(core_file)" ]]; then
+       echo -n "There's a core dump here you might want to look at first... "
+       core_file
+       echo
        exit 1
 fi
 
 if [[ ! -d $coredir ]]; then
        echo "core dump directory ($coredir) does not exist, creating it."
-       or_die mkdir -p $coredir
+       or_die mkdir -p "$coredir"
 fi
 
 if [[ ! -w $coredir ]]; then
@@ -151,7 +229,7 @@ if [[ ! -w $coredir ]]; then
 fi
 
 or_die rm -f ztest.history
-or_die rm -f ztest.ddt
+or_die rm -f ztest.zdb
 or_die rm -f ztest.cores
 
 ztrc=0         # ztest return value
@@ -160,8 +238,13 @@ starttime=$(date +%s)
 curtime=$starttime
 
 # if no timeout was specified, loop forever.
-while [[ $timeout -eq 0 ]] || [[ $curtime -le $(($starttime + $timeout)) ]]; do
-       zopt="-VVVVV"
+while [[ $timeout -eq 0 ]] || [[ $curtime -le $((starttime + timeout)) ]]; do
+       zopt="-G -VVVVV"
+
+       # start each run with an empty directory
+       workdir="$basedir/$rundir"
+       or_die rm -rf "$workdir"
+       or_die mkdir "$workdir"
 
        # switch between common arrangements & fully randomized
        if [[ $((RANDOM % 2)) -eq 0 ]]; then
@@ -178,7 +261,6 @@ while [[ $timeout -eq 0 ]] || [[ $curtime -le $(($starttime + $timeout)) ]]; do
        align=$(((RANDOM % 2) * 3 + 9))
        runtime=$((RANDOM % 100))
        passtime=$((RANDOM % (runtime / 3 + 1) + 10))
-       size=128m
 
        zopt="$zopt -m $mirrors"
        zopt="$zopt -r $raidz"
@@ -190,14 +272,14 @@ while [[ $timeout -eq 0 ]] || [[ $curtime -le $(($starttime + $timeout)) ]]; do
        zopt="$zopt -s $size"
        zopt="$zopt -f $workdir"
 
+       # shellcheck disable=SC2124
        cmd="$ZTEST $zopt $@"
        desc="$(date '+%m/%d %T') $cmd"
        echo "$desc" | tee -a ztest.history
        echo "$desc" >>ztest.out
        $cmd >>ztest.out 2>&1
        ztrc=$?
-       egrep '===|WARNING' ztest.out >>ztest.history
-       $ZDB -U $workdir/zpool.cache -DD ztest >>ztest.ddt
+       grep -E '===|WARNING' ztest.out >>ztest.history
 
        store_core
 
@@ -206,6 +288,15 @@ done
 
 echo "zloop finished, $foundcrashes crashes found"
 
+# restore core pattern.
+case $(uname) in
+Linux)
+       echo "$origcorepattern" > /proc/sys/kernel/core_pattern
+       ;;
+*)
+       ;;
+esac
+
 uptime >>ztest.out
 
 if [[ $foundcrashes -gt 0 ]]; then