]> git.proxmox.com Git - mirror_zfs.git/commitdiff
Extend zloop.sh for automated testing
authorBrian Behlendorf <behlendorf1@llnl.gov>
Mon, 22 Jan 2018 20:48:39 +0000 (12:48 -0800)
committerTony Hutter <hutter2@llnl.gov>
Tue, 30 Jan 2018 16:27:31 +0000 (10:27 -0600)
In order to debug issues encountered by ztest during automated
testing it's important that as much debugging information as
possible by dumped at the time of the failure.  The following
changes extend the zloop.sh script in order to make it easier
to integrate with buildbot.

* Add the `-m <maximum cores>` option to zloop.sh to place a
  limit of the number of core dumps generated.  By default, the
  existing behavior is maintained and no limit is set.

* Add the `-l` option to create a 'ztest.core.N' symlink in the
  current directory to the core directory. This functionality
  is provided primarily for buildbot which expects log files to
  have well known names.

* Rename 'ztest.ddt' to 'ztest.zdb' and extend it to dump
  additional basic information on failure for latter analysis.

Reviewed-by: Tim Chase <tim@chase2k.com>
Reviewed by: Thomas Caputi <tcaputi@datto.com>
Reviewed-by: Giuseppe Di Natale <dinatale2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #6999

Conflicts:
scripts/zloop.sh

scripts/zloop.sh

index f0af875536f26d7d3745f2bd8722b8ea07cb2409..f39e91ef9e955710e2d25b1acfd61c4f789da02a 100755 (executable)
@@ -52,6 +52,8 @@ function usage
            "    -s  Size of vdev devices.\n" \
            "    -f  Specify working directory for ztest vdev files.\n" \
            "    -c  Specify a core dump directory to use.\n" \
+           "    -m  Max number of core dumps to allow before exiting.\n" \
+           "    -l  Create 'ztest.core.N' symlink to core directory.\n" \
            "    -h  Print this help message.\n" \
            "" >&2
 }
@@ -105,14 +107,24 @@ function store_core
                coreid=$(date "+zloop-%y%m%d-%H%M%S")
                foundcrashes=$((foundcrashes + 1))
 
+               # zdb debugging
+               zdbcmd="$ZDB -U "$workdir/zpool.cache" -dddMmDDG ztest"
+               zdbdebug=$($zdbcmd 2>&1)
+               echo -e "$zdbcmd\n" >>ztest.zdb
+               echo "$zdbdebug" >>ztest.zdb
+
                dest=$coredir/$coreid
                or_die mkdir -p "$dest"
                or_die mkdir -p "$dest/vdev"
 
+               if [[ $symlink -ne 0 ]]; then
+                       or_die ln -sf "$dest" ztest.core.$foundcrashes
+               fi
+
                echo "*** ztest crash found - moving logs to $dest"
 
                or_die mv ztest.history "$dest/"
-               or_die mv ztest.ddt "$dest/"
+               or_die mv ztest.zdb "$dest/"
                or_die mv ztest.out "$dest/"
                or_die mv "$workdir/ztest*" "$dest/vdev/"
                or_die mv "$workdir/zpool.cache" "$dest/vdev/"
@@ -120,7 +132,7 @@ function store_core
                # check for core
                if [[ -f "$core" ]]; then
                        coreprog=$(core_prog "$core")
-                       corestatus=$($GDB --batch --quiet \
+                       coredebug=$($GDB --batch --quiet \
                            -ex "set print thread-events off" \
                            -ex "printf \"*\n* Backtrace \n*\n\"" \
                            -ex "bt" \
@@ -132,19 +144,25 @@ function store_core
                            -ex "thread apply all bt" \
                            -ex "printf \"*\n* Backtraces (full) \n*\n\"" \
                            -ex "thread apply all bt full" \
-                           -ex "quit" "$coreprog" "$core" | grep -v "New LWP")
+                           -ex "quit" "$coreprog" "$core" 2>&1 | \
+                           grep -v "New LWP")
 
                        # Dump core + logs to stored directory
-                       echo "$corestatus" >>"$dest/status"
+                       echo "$coredebug" >>"$dest/ztest.gdb"
                        or_die mv "$core" "$dest/"
 
                        # Record info in cores logfile
                        echo "*** core @ $coredir/$coreid/$core:" | \
                            tee -a ztest.cores
-                       echo "$corestatus" | tee -a ztest.cores
-                       echo "" | tee -a ztest.cores
                fi
-               echo "continuing..."
+
+               if [[ $coremax -gt 0 ]] &&
+                  [[ $foundcrashes -ge $coremax ]]; then
+                       echo "exiting... max $coremax allowed cores"
+                       exit 1
+               else
+                       echo "continuing..."
+               fi
        fi
 }
 
@@ -155,12 +173,16 @@ basedir=$DEFAULTWORKDIR
 rundir="zloop-run"
 timeout=0
 size="512m"
-while getopts ":ht:s:c:f:" opt; do
+coremax=0
+symlink=0
+while getopts ":ht:m:s:c:f:l" opt; do
        case $opt in
                t ) [[ $OPTARG -gt 0 ]] && timeout=$OPTARG ;;
+               m ) [[ $OPTARG -gt 0 ]] && coremax=$OPTARG ;;
                s ) [[ $OPTARG ]] && size=$OPTARG ;;
                c ) [[ $OPTARG ]] && coredir=$OPTARG ;;
                f ) [[ $OPTARG ]] && basedir=$(readlink -f "$OPTARG") ;;
+               l ) symlink=1 ;;
                h ) usage
                    exit 2
                    ;;
@@ -178,6 +200,7 @@ ulimit -c unlimited
 if [[ -f "$(core_file)" ]]; then
        echo -n "There's a core dump here you might want to look at first... "
        core_file
+       echo
        exit 1
 fi
 
@@ -192,7 +215,7 @@ if [[ ! -w $coredir ]]; then
 fi
 
 or_die rm -f ztest.history
-or_die rm -f ztest.ddt
+or_die rm -f ztest.zdb
 or_die rm -f ztest.cores
 
 ztrc=0         # ztest return value
@@ -243,7 +266,6 @@ while [[ $timeout -eq 0 ]] || [[ $curtime -le $((starttime + timeout)) ]]; do
        $cmd >>ztest.out 2>&1
        ztrc=$?
        grep -E '===|WARNING' ztest.out >>ztest.history
-       $ZDB -U "$workdir/zpool.cache" -DD ztest >>ztest.ddt
 
        store_core