]> git.proxmox.com Git - mirror_frr.git/blobdiff - tests/topotests/lib/topotest.py
lib: collect shutdown errors, add assert on single router shutdown
[mirror_frr.git] / tests / topotests / lib / topotest.py
index 8063598ebaf83bcf89b6e30b1dd47f32161192cb..655a05cbd40fd09e8aed380fbae2a92b8a8e1af6 100644 (file)
@@ -22,6 +22,7 @@
 # OF THIS SOFTWARE.
 #
 
+import json
 import os
 import errno
 import re
@@ -51,12 +52,42 @@ class json_cmp_result(object):
 
     def add_error(self, error):
         "Append error message to the result"
-        self.errors.append(error)
+        for line in error.splitlines():
+            self.errors.append(line)
 
     def has_errors(self):
         "Returns True if there were errors, otherwise False."
         return len(self.errors) > 0
 
+def get_test_logdir(node=None, init=False):
+    """
+    Return the current test log directory based on PYTEST_CURRENT_TEST
+    environment variable.
+    Optional paramters:
+    node:  when set, adds the node specific log directory to the init dir
+    init:  when set, initializes the log directory and fixes path permissions
+    """
+    cur_test = os.environ['PYTEST_CURRENT_TEST']
+
+    ret = '/tmp/topotests/' + cur_test[0:cur_test.find(".py")].replace('/','.')
+    if node != None:
+        dir = ret + "/" + node
+    if init:
+        os.system('mkdir -p ' + dir)
+        os.system('chmod -R go+rw /tmp/topotests')
+    return ret
+
+def json_diff(d1, d2):
+    """
+    Returns a string with the difference between JSON data.
+    """
+    json_format_opts = {
+        'indent': 4,
+        'sort_keys': True,
+    }
+    dstr1 = json.dumps(d1, **json_format_opts)
+    dstr2 = json.dumps(d2, **json_format_opts)
+    return difflines(dstr2, dstr1, title1='Expected value', title2='Current value', n=0)
 
 def json_cmp(d1, d2):
     """
@@ -79,21 +110,22 @@ def json_cmp(d1, d2):
         s2_req = set([key for key in nd2 if nd2[key] is not None])
         diff = s2_req - s1
         if diff != set({}):
-            result.add_error('expected key(s) {} in {} (have {})'.format(
-                str(list(diff)), parent, str(list(s1))))
+            result.add_error('expected key(s) {} in {} (have {}):\n{}'.format(
+                str(list(diff)), parent, str(list(s1)), json_diff(nd1, nd2)))
 
         for key in s2.intersection(s1):
             # Test for non existence of key in d2
             if nd2[key] is None:
-                result.add_error('"{}" should not exist in {} (have {})'.format(
-                    key, parent, str(s1)))
+                result.add_error('"{}" should not exist in {} (have {}):\n{}'.format(
+                    key, parent, str(s1), json_diff(nd1[key], nd2[key])))
                 continue
             # If nd1 key is a dict, we have to recurse in it later.
             if isinstance(nd2[key], type({})):
                 if not isinstance(nd1[key], type({})):
                     result.add_error(
                         '{}["{}"] has different type than expected '.format(parent, key) +
-                        '(have {}, expected {})'.format(type(nd1[key]), type(nd2[key])))
+                        '(have {}, expected {}):\n{}'.format(
+                            type(nd1[key]), type(nd2[key]), json_diff(nd1[key], nd2[key])))
                     continue
                 nparent = '{}["{}"]'.format(parent, key)
                 squeue.append((nd1[key], nd2[key], nparent))
@@ -103,14 +135,16 @@ def json_cmp(d1, d2):
                 if not isinstance(nd1[key], type([])):
                     result.add_error(
                         '{}["{}"] has different type than expected '.format(parent, key) +
-                        '(have {}, expected {})'.format(type(nd1[key]), type(nd2[key])))
+                        '(have {}, expected {}):\n{}'.format(
+                            type(nd1[key]), type(nd2[key]), json_diff(nd1[key], nd2[key])))
                     continue
                 # Check list size
                 if len(nd2[key]) > len(nd1[key]):
                     result.add_error(
                         '{}["{}"] too few items '.format(parent, key) +
-                        '(have ({}) "{}", expected ({}) "{}")'.format(
-                            len(nd1[key]), str(nd1[key]), len(nd2[key]), str(nd2[key])))
+                        '(have {}, expected {}:\n {})'.format(
+                            len(nd1[key]), len(nd2[key]),
+                            json_diff(nd1[key], nd2[key])))
                     continue
 
                 # List all unmatched items errors
@@ -130,15 +164,15 @@ def json_cmp(d1, d2):
                 # If there are unmatched items, error out.
                 if unmatched:
                     result.add_error(
-                        '{}["{}"] value is different (have "{}", expected "{}")'.format(
-                            parent, key, str(nd1[key]), str(nd2[key])))
+                        '{}["{}"] value is different (\n{})'.format(
+                            parent, key, json_diff(nd1[key], nd2[key])))
                 continue
 
             # Compare JSON values
             if nd1[key] != nd2[key]:
                 result.add_error(
-                    '{}["{}"] value is different (have "{}", expected "{}")'.format(
-                        parent, key, str(nd1[key]), str(nd2[key])))
+                    '{}["{}"] value is different (\n{})'.format(
+                        parent, key, json_diff(nd1[key], nd2[key])))
                 continue
 
     if result.has_errors():
@@ -277,7 +311,7 @@ def version_cmp(v1, v2):
             while v1g:
                 v1n = int(v1g.pop())
                 if v1n > 0:
-                    return -1
+                    return 1
             break
 
         if v1n > v2n:
@@ -446,12 +480,15 @@ class Router(Node):
 
     def __init__(self, name, **params):
         super(Router, self).__init__(name, **params)
-        self.logdir = params.get('logdir', '/tmp')
+        self.logdir = params.get('logdir', get_test_logdir(name, True))
         self.daemondir = None
+        self.hasmpls = False
         self.routertype = 'frr'
         self.daemons = {'zebra': 0, 'ripd': 0, 'ripngd': 0, 'ospfd': 0,
                         'ospf6d': 0, 'isisd': 0, 'bgpd': 0, 'pimd': 0,
                         'ldpd': 0, 'eigrpd': 0, 'nhrpd': 0}
+        self.daemons_options = {'zebra': ''}
+        self.reportCores = True
 
     def _config_frr(self, **params):
         "Configure FRR binaries"
@@ -500,8 +537,11 @@ class Router(Node):
         assert_sysctl(self, 'net.ipv6.conf.all.forwarding', 1)
         # Enable coredumps
         assert_sysctl(self, 'kernel.core_uses_pid', 1)
-        assert_sysctl(self, 'fs.suid_dumpable', 2)
-        corefile = '{}/{}_%e_core-sig_%s-pid_%p.dmp'.format(self.logdir, self.name)
+        assert_sysctl(self, 'fs.suid_dumpable', 1)
+        #this applies to the kernel not the namespace...
+        #original on ubuntu 17.x, but apport won't save as in namespace
+        # |/usr/share/apport/apport %p %s %c %d %P
+        corefile = '%e_core-sig_%s-pid_%p.dmp'
         assert_sysctl(self, 'kernel.core_pattern', corefile)
         self.cmd('ulimit -c unlimited')
         # Set ownership of config files
@@ -518,29 +558,66 @@ class Router(Node):
         set_sysctl(self, 'net.ipv4.ip_forward', 0)
         set_sysctl(self, 'net.ipv6.conf.all.forwarding', 0)
         super(Router, self).terminate()
-    def stopRouter(self):
+        os.system('chmod -R go+rw /tmp/topotests')
+
+    def stopRouter(self, wait=True, assertOnError=True):
         # Stop Running Quagga or FRR Daemons
         rundaemons = self.cmd('ls -1 /var/run/%s/*.pid' % self.routertype)
+        errors = ""
+        if re.search(r"No such file or directory", rundaemons):
+            return errors
         if rundaemons is not None:
+            numRunning = 0
             for d in StringIO.StringIO(rundaemons):
                 daemonpid = self.cmd('cat %s' % d.rstrip()).rstrip()
                 if (daemonpid.isdigit() and pid_exists(int(daemonpid))):
+                    logger.info('{}: stopping {}'.format(
+                        self.name,
+                        os.path.basename(d.rstrip().rsplit(".", 1)[0])
+                    ))
                     self.cmd('kill -TERM %s' % daemonpid)
                     self.waitOutput()
-            sleep(2)
-            # 2nd round of kill if daemons didn't exist
-            for d in StringIO.StringIO(rundaemons):
-                daemonpid = self.cmd('cat %s' % d.rstrip()).rstrip()
-                if (daemonpid.isdigit() and pid_exists(int(daemonpid))):
-                    self.cmd('kill -7 %s' % daemonpid)
-                    self.waitOutput()
+                    if pid_exists(int(daemonpid)):
+                        numRunning += 1
+            if wait and numRunning > 0:
+                sleep(2, '{}: waiting for daemons stopping'.format(self.name))
+                # 2nd round of kill if daemons didn't exit
+                for d in StringIO.StringIO(rundaemons):
+                    daemonpid = self.cmd('cat %s' % d.rstrip()).rstrip()
+                    if (daemonpid.isdigit() and pid_exists(int(daemonpid))):
+                        logger.info('{}: killing {}'.format(
+                            self.name,
+                            os.path.basename(d.rstrip().rsplit(".", 1)[0])
+                        ))
+                        self.cmd('kill -7 %s' % daemonpid)
+                        self.waitOutput()
+                    self.cmd('rm -- {}'.format(d.rstrip()))
+        if wait:
+                errors = self.checkRouterCores(reportOnce=True)
+                if assertOnError and len(errors) > 0:
+                    assert "Errors found - details follow:" == 0, errors
+        return errors
+
     def removeIPs(self):
         for interface in self.intfNames():
             self.cmd('ip address flush', interface)
-    def loadConf(self, daemon, source=None):
+
+    def checkCapability(self, daemon, param):
+        if param is not None:
+            daemon_path = os.path.join(self.daemondir, daemon)
+            daemon_search_option = param.replace('-','')
+            output = self.cmd('{0} -h | grep {1}'.format(
+                daemon_path, daemon_search_option))
+            if daemon_search_option not in output:
+                return False
+        return True
+
+    def loadConf(self, daemon, source=None, param=None):
         # print "Daemons before:", self.daemons
         if daemon in self.daemons.keys():
             self.daemons[daemon] = 1
+            if param is not None:
+                self.daemons_options[daemon] = param
             if source is None:
                 self.cmd('touch /etc/%s/%s.conf' % (self.routertype, daemon))
                 self.waitOutput()
@@ -552,17 +629,18 @@ class Router(Node):
             self.cmd('chown %s:%s /etc/%s/%s.conf' % (self.routertype, self.routertype, self.routertype, daemon))
             self.waitOutput()
         else:
-            logger.warning('No daemon {} known'.format(daemon))
+            logger.info('No daemon {} known'.format(daemon))
         # print "Daemons after:", self.daemons
-    def startRouter(self):
+
+    def startRouter(self, tgen=None):
         # Disable integrated-vtysh-config
         self.cmd('echo "no service integrated-vtysh-config" >> /etc/%s/vtysh.conf' % self.routertype)
         self.cmd('chown %s:%svty /etc/%s/vtysh.conf' % (self.routertype, self.routertype, self.routertype))
         # TODO remove the following lines after all tests are migrated to Topogen.
         # Try to find relevant old logfiles in /tmp and delete them
-        map(os.remove, glob.glob("/tmp/*%s*.log" % self.name))
+        map(os.remove, glob.glob('{}/{}/*.log'.format(self.logdir, self.name)))
         # Remove old core files
-        map(os.remove, glob.glob("/tmp/%s*.dmp" % self.name))
+        map(os.remove, glob.glob('{}/{}/*.dmp'.format(self.logdir, self.name)))
         # Remove IP addresses from OS first - we have them in zebra.conf
         self.removeIPs()
         # If ldp is used, check for LDP to be compiled and Linux Kernel to be 4.5 or higher
@@ -570,46 +648,56 @@ class Router(Node):
         if self.daemons['ldpd'] == 1:
             ldpd_path = os.path.join(self.daemondir, 'ldpd')
             if not os.path.isfile(ldpd_path):
-                logger.warning("LDP Test, but no ldpd compiled or installed")
+                logger.info("LDP Test, but no ldpd compiled or installed")
                 return "LDP Test, but no ldpd compiled or installed"
 
             if version_cmp(platform.release(), '4.5') < 0:
-                logger.warning("LDP Test need Linux Kernel 4.5 minimum")
+                logger.info("LDP Test need Linux Kernel 4.5 minimum")
                 return "LDP Test need Linux Kernel 4.5 minimum"
-
-            # Check if required kernel modules are available with a dryrun modprobe
-            # Silent accept of modprobe command assumes ok status
-            if self.cmd('/sbin/modprobe -n mpls-router' ) != "":
-                logger.warning("LDP Test needs mpls-router kernel module")
-                return "LDP Test needs mpls-router kernel module"
-            if self.cmd('/sbin/modprobe -n mpls-iptunnel') != "":
-                logger.warning("LDP Test needs mpls-iptunnel kernel module")
-                return "LDP Test needs mpls-router kernel module"
-
-            self.cmd('/sbin/modprobe mpls-router')
-            self.cmd('/sbin/modprobe mpls-iptunnel')
+            # Check if have mpls
+            if tgen != None:
+                self.hasmpls = tgen.hasmpls
+                if self.hasmpls != True:
+                    logger.info("LDP/MPLS Tests will be skipped, platform missing module(s)")
+            else:
+                # Test for MPLS Kernel modules available
+                self.hasmpls = False
+                if os.system('/sbin/modprobe mpls-router') != 0:
+                    logger.info('MPLS tests will not run (missing mpls-router kernel module)')
+                elif os.system('/sbin/modprobe mpls-iptunnel') != 0:
+                    logger.info('MPLS tests will not run (missing mpls-iptunnel kernel module)')
+                else:
+                    self.hasmpls = True
+            if self.hasmpls != True:
+                return "LDP/MPLS Tests need mpls kernel modules"
             self.cmd('echo 100000 > /proc/sys/net/mpls/platform_labels')
 
         if self.daemons['eigrpd'] == 1:
             eigrpd_path = os.path.join(self.daemondir, 'eigrpd')
             if not os.path.isfile(eigrpd_path):
-                logger.warning("EIGRP Test, but no eigrpd compiled or installed")
+                logger.info("EIGRP Test, but no eigrpd compiled or installed")
                 return "EIGRP Test, but no eigrpd compiled or installed"
 
-        # Init done - now restarting daemons
         self.restartRouter()
         return ""
+
     def restartRouter(self):
-        # Starts actuall daemons without init (ie restart)
+        # Starts actual daemons without init (ie restart)
+        # cd to per node directory
+        self.cmd('cd {}/{}'.format(self.logdir, self.name))
+        self.cmd('umask 000')
+        #Re-enable to allow for report per run
+        self.reportCores = True
         # Start Zebra first
         if self.daemons['zebra'] == 1:
             zebra_path = os.path.join(self.daemondir, 'zebra')
-            self.cmd('{0} > {1}/{2}-zebra.out 2> {1}/{2}-zebra.err &'.format(
-                zebra_path, self.logdir, self.name
+            zebra_option = self.daemons_options['zebra']
+            self.cmd('{0} {1} > zebra.out 2> zebra.err &'.format(
+                 zebra_path, zebra_option, self.logdir, self.name
             ))
             self.waitOutput()
             logger.debug('{}: {} zebra started'.format(self, self.routertype))
-            time.sleep(1)
+            sleep(1, '{}: waiting for zebra to start'.format(self.name))
         # Fix Link-Local Addresses
         # Somehow (on Mininet only), Zebra removes the IPv6 Link-Local addresses on start. Fix this
         self.cmd('for i in `ls /sys/class/net/` ; do mac=`cat /sys/class/net/$i/address`; IFS=\':\'; set $mac; unset IFS; ip address add dev $i scope link fe80::$(printf %02x $((0x$1 ^ 2)))$2:${3}ff:fe$4:$5$6/64; done')
@@ -620,7 +708,7 @@ class Router(Node):
                 continue
 
             daemon_path = os.path.join(self.daemondir, daemon)
-            self.cmd('{0} > {1}/{2}-{3}.out 2> {1}/{2}-{3}.err &'.format(
+            self.cmd('{0} > {3}.out 2> {3}.err &'.format(
                 daemon_path, self.logdir, self.name, daemon
             ))
             self.waitOutput()
@@ -630,7 +718,46 @@ class Router(Node):
     def getStdOut(self, daemon):
         return self.getLog('out', daemon)
     def getLog(self, log, daemon):
-        return self.cmd('cat {}/{}-{}.{}'.format(self.logdir, self.name, daemon, log))
+        return self.cmd('cat {}/{}/{}.{}'.format(self.logdir, self.name, daemon, log))
+
+    def checkRouterCores(self, reportLeaks=True, reportOnce=False):
+        if reportOnce and not self.reportCores:
+            return
+        reportMade = False
+        traces = ""
+        for daemon in self.daemons:
+            if (self.daemons[daemon] == 1):
+                # Look for core file
+                corefiles = glob.glob('{}/{}/{}_core*.dmp'.format(
+                    self.logdir, self.name, daemon))
+                if (len(corefiles) > 0):
+                    daemon_path = os.path.join(self.daemondir, daemon)
+                    backtrace = subprocess.check_output([
+                        "gdb {} {} --batch -ex bt 2> /dev/null".format(daemon_path, corefiles[0])
+                    ], shell=True)
+                    sys.stderr.write("\n%s: %s crashed. Core file found - Backtrace follows:\n" % (self.name, daemon))
+                    sys.stderr.write("%s" % backtrace)
+                    traces = traces + "\n%s: %s crashed. Core file found - Backtrace follows:\n%s" % (self.name, daemon, backtrace)
+                    reportMade = True
+                elif reportLeaks:
+                    log = self.getStdErr(daemon)
+                    if "memstats" in log:
+                        sys.stderr.write("%s: %s has memory leaks:\n" % (self.name, daemon))
+                        traces = traces + "\n%s: %s has memory leaks:\n" % (self.name, daemon)
+                        log = re.sub("core_handler: ", "", log)
+                        log = re.sub(r"(showing active allocations in memory group [a-zA-Z0-9]+)", r"\n  ## \1", log)
+                        log = re.sub("memstats:  ", "    ", log)
+                        sys.stderr.write(log)
+                        reportMade = True
+                # Look for AddressSanitizer Errors and append to /tmp/AddressSanitzer.txt if found
+                if checkAddressSanitizerError(self.getStdErr(daemon), self.name, daemon):
+                    sys.stderr.write("%s: Daemon %s killed by AddressSanitizer" % (self.name, daemon))
+                    traces = traces + "\n%s: Daemon %s killed by AddressSanitizer" % (self.name, daemon)
+                    reportMade = True
+        if reportMade:
+            self.reportCores = False
+        return traces
+
     def checkRouterRunning(self):
         "Check if router daemons are running and collect crashinfo they don't run"
 
@@ -645,7 +772,7 @@ class Router(Node):
             if (self.daemons[daemon] == 1) and not (daemon in daemonsRunning):
                 sys.stderr.write("%s: Daemon %s not running\n" % (self.name, daemon))
                 # Look for core file
-                corefiles = glob.glob('{}/{}_{}_core*.dmp'.format(
+                corefiles = glob.glob('{}/{}/{}_core*.dmp'.format(
                     self.logdir, self.name, daemon))
                 if (len(corefiles) > 0):
                     daemon_path = os.path.join(self.daemondir, daemon)
@@ -656,9 +783,9 @@ class Router(Node):
                     sys.stderr.write("%s\n" % backtrace)
                 else:
                     # No core found - If we find matching logfile in /tmp, then print last 20 lines from it.
-                    if os.path.isfile('{}/{}-{}.log'.format(self.logdir, self.name, daemon)):
+                    if os.path.isfile('{}/{}/{}.log'.format(self.logdir, self.name, daemon)):
                         log_tail = subprocess.check_output([
-                            "tail -n20 {}/{}-{}.log 2> /dev/null".format(
+                            "tail -n20 {}/{}/{}.log 2> /dev/null".format(
                                 self.logdir, self.name, daemon)
                             ], shell=True)
                         sys.stderr.write("\nFrom %s %s %s log file:\n" % (self.routertype, self.name, daemon))