]> git.proxmox.com Git - mirror_zfs.git/blobdiff - scripts/zloop.sh
zloop.sh should call ZDB with pool name
[mirror_zfs.git] / scripts / zloop.sh
index a45c450247f99655e0a82ee49350fba64525c514..3d9baaf0e2b8acb60934bc20a19bed5594e50e16 100755 (executable)
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #
 # CDDL HEADER START
@@ -30,6 +30,7 @@ fi
 
 # shellcheck disable=SC2034
 PROG=zloop.sh
+GDB=${GDB:-gdb}
 
 DEFAULTWORKDIR=/var/tmp
 DEFAULTCOREDIR=/var/tmp/zloop
@@ -51,6 +52,8 @@ function usage
            "    -s  Size of vdev devices.\n" \
            "    -f  Specify working directory for ztest vdev files.\n" \
            "    -c  Specify a core dump directory to use.\n" \
+           "    -m  Max number of core dumps to allow before exiting.\n" \
+           "    -l  Create 'ztest.core.N' symlink to core directory.\n" \
            "    -h  Print this help message.\n" \
            "" >&2
 }
@@ -67,16 +70,26 @@ function or_die
        fi
 }
 
-# core file helpers
-origcorepattern="$(cat /proc/sys/kernel/core_pattern)"
-coreglob="$(egrep -o '^([^|%[:space:]]*)' /proc/sys/kernel/core_pattern)*"
-
-if [[ $coreglob = "*" ]]; then
-        echo "Setting core file pattern..."
-        echo "core" > /proc/sys/kernel/core_pattern
-        coreglob="$(egrep -o '^([^|%[:space:]]*)' \
-            /proc/sys/kernel/core_pattern)*"
-fi
+case $(uname) in
+FreeBSD)
+       coreglob="z*.core"
+       ;;
+Linux)
+       # core file helpers
+       origcorepattern="$(cat /proc/sys/kernel/core_pattern)"
+       coreglob="$(grep -E -o '^([^|%[:space:]]*)' /proc/sys/kernel/core_pattern)*"
+
+       if [[ $coreglob = "*" ]]; then
+               echo "Setting core file pattern..."
+               echo "core" > /proc/sys/kernel/core_pattern
+               coreglob="$(grep -E -o '^([^|%[:space:]]*)' \
+                   /proc/sys/kernel/core_pattern)*"
+       fi
+       ;;
+*)
+       exit 1
+       ;;
+esac
 
 function core_file
 {
@@ -104,22 +117,35 @@ function store_core
                coreid=$(date "+zloop-%y%m%d-%H%M%S")
                foundcrashes=$((foundcrashes + 1))
 
+               # zdb debugging
+               zdbcmd="$ZDB -U "$workdir/zpool.cache" -dddMmDDG ztest"
+               zdbdebug=$($zdbcmd 2>&1)
+               echo -e "$zdbcmd\n" >>ztest.zdb
+               echo "$zdbdebug" >>ztest.zdb
+
                dest=$coredir/$coreid
                or_die mkdir -p "$dest"
                or_die mkdir -p "$dest/vdev"
 
+               if [[ $symlink -ne 0 ]]; then
+                       or_die ln -sf "$dest" ztest.core.$foundcrashes
+               fi
+
                echo "*** ztest crash found - moving logs to $dest"
 
                or_die mv ztest.history "$dest/"
-               or_die mv ztest.ddt "$dest/"
+               or_die mv ztest.zdb "$dest/"
                or_die mv ztest.out "$dest/"
                or_die mv "$workdir/ztest*" "$dest/vdev/"
-               or_die mv "$workdir/zpool.cache" "$dest/vdev/"
+
+               if [[ -e "$workdir/zpool.cache" ]]; then
+                       or_die mv "$workdir/zpool.cache" "$dest/vdev/"
+               fi
 
                # check for core
                if [[ -f "$core" ]]; then
                        coreprog=$(core_prog "$core")
-                       corestatus=$($GDB --batch --quiet \
+                       coredebug=$($GDB --batch --quiet \
                            -ex "set print thread-events off" \
                            -ex "printf \"*\n* Backtrace \n*\n\"" \
                            -ex "bt" \
@@ -131,30 +157,27 @@ function store_core
                            -ex "thread apply all bt" \
                            -ex "printf \"*\n* Backtraces (full) \n*\n\"" \
                            -ex "thread apply all bt full" \
-                           -ex "quit" "$coreprog" "$core" | grep -v "New LWP")
+                           -ex "quit" "$coreprog" "$core" 2>&1 | \
+                           grep -v "New LWP")
 
                        # Dump core + logs to stored directory
-                       echo "$corestatus" >>"$dest/status"
+                       echo "$coredebug" >>"$dest/ztest.gdb"
                        or_die mv "$core" "$dest/"
 
                        # Record info in cores logfile
                        echo "*** core @ $coredir/$coreid/$core:" | \
                            tee -a ztest.cores
-                       echo "$corestatus" | tee -a ztest.cores
-                       echo "" | tee -a ztest.cores
                fi
-               echo "continuing..."
-       fi
-}
 
-rngdpid=""
-function on_exit
-{
-       if [ -n "$rngdpid" ]; then
-               kill -9 "$rngdpid"
+               if [[ $coremax -gt 0 ]] &&
+                  [[ $foundcrashes -ge $coremax ]]; then
+                       echo "exiting... max $coremax allowed cores"
+                       exit 1
+               else
+                       echo "continuing..."
+               fi
        fi
 }
-trap on_exit EXIT
 
 # parse arguments
 # expected format: zloop [-t timeout] [-c coredir] [-- extra ztest args]
@@ -163,12 +186,16 @@ basedir=$DEFAULTWORKDIR
 rundir="zloop-run"
 timeout=0
 size="512m"
-while getopts ":ht:s:c:f:" opt; do
+coremax=0
+symlink=0
+while getopts ":ht:m:s:c:f:l" opt; do
        case $opt in
                t ) [[ $OPTARG -gt 0 ]] && timeout=$OPTARG ;;
+               m ) [[ $OPTARG -gt 0 ]] && coremax=$OPTARG ;;
                s ) [[ $OPTARG ]] && size=$OPTARG ;;
                c ) [[ $OPTARG ]] && coredir=$OPTARG ;;
                f ) [[ $OPTARG ]] && basedir=$(readlink -f "$OPTARG") ;;
+               l ) symlink=1 ;;
                h ) usage
                    exit 2
                    ;;
@@ -182,10 +209,12 @@ shift $((OPTIND - 1))
 
 # enable core dumps
 ulimit -c unlimited
+export ASAN_OPTIONS=abort_on_error=1:disable_coredump=0
 
 if [[ -f "$(core_file)" ]]; then
        echo -n "There's a core dump here you might want to look at first... "
        core_file
+       echo
        exit 1
 fi
 
@@ -200,12 +229,9 @@ if [[ ! -w $coredir ]]; then
 fi
 
 or_die rm -f ztest.history
-or_die rm -f ztest.ddt
+or_die rm -f ztest.zdb
 or_die rm -f ztest.cores
 
-# start rngd in the background so we don't run out of entropy
-or_die read -r rngdpid < <(rngd -f -r /dev/urandom & echo $!)
-
 ztrc=0         # ztest return value
 foundcrashes=0 # number of crashes found so far
 starttime=$(date +%s)
@@ -213,7 +239,7 @@ curtime=$starttime
 
 # if no timeout was specified, loop forever.
 while [[ $timeout -eq 0 ]] || [[ $curtime -le $((starttime + timeout)) ]]; do
-       zopt="-VVVVV"
+       zopt="-G -VVVVV"
 
        # start each run with an empty directory
        workdir="$basedir/$rundir"
@@ -253,8 +279,7 @@ while [[ $timeout -eq 0 ]] || [[ $curtime -le $((starttime + timeout)) ]]; do
        echo "$desc" >>ztest.out
        $cmd >>ztest.out 2>&1
        ztrc=$?
-       egrep '===|WARNING' ztest.out >>ztest.history
-       $ZDB -U "$workdir/zpool.cache" -DD ztest >>ztest.ddt
+       grep -E '===|WARNING' ztest.out >>ztest.history
 
        store_core
 
@@ -263,8 +288,14 @@ done
 
 echo "zloop finished, $foundcrashes crashes found"
 
-#restore core pattern
-echo "$origcorepattern" > /proc/sys/kernel/core_pattern
+# restore core pattern.
+case $(uname) in
+Linux)
+       echo "$origcorepattern" > /proc/sys/kernel/core_pattern
+       ;;
+*)
+       ;;
+esac
 
 uptime >>ztest.out