Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Merge branch 'master' into adrien
[simgrid.git] / tools / tesh / tesh.py
index 48ddd51..4996630 100755 (executable)
@@ -5,12 +5,11 @@
 tesh -- testing shell
 ========================
 
-Copyright (c) 2012-2019. The SimGrid Team. All rights reserved.
+Copyright (c) 2012-2020. The SimGrid Team. All rights reserved.
 
 This program is free software; you can redistribute it and/or modify it
 under the terms of the license (GNU LGPL) which comes with this package.
 
-
 #TODO: child of child of child that printfs. Does it work?
 #TODO: a child dies after its parent. What happen?
 
@@ -25,7 +24,6 @@ under the terms of the license (GNU LGPL) which comes with this package.
 
 """
 
-
 import sys
 import os
 import shlex
@@ -33,6 +31,7 @@ import re
 import difflib
 import signal
 import argparse
+import time
 
 if sys.version_info[0] == 3:
     import subprocess
@@ -46,14 +45,12 @@ else:
 #
 #
 
-
 def isWindows():
     return sys.platform.startswith('win')
 
 # Singleton metaclass that works in Python 2 & 3
 # http://stackoverflow.com/questions/6760685/creating-a-singleton-in-python
 
-
 class _Singleton(type):
     """ A metaclass that creates a Singleton base class when called. """
     _instances = {}
@@ -63,14 +60,13 @@ class _Singleton(type):
             cls._instances[cls] = super(_Singleton, cls).__call__(*args, **kwargs)
         return cls._instances[cls]
 
-
 class Singleton(_Singleton('SingletonMeta', (object,), {})):
     pass
 
-
 SIGNALS_TO_NAMES_DICT = dict((getattr(signal, n), n)
                              for n in dir(signal) if n.startswith('SIG') and '_' not in n)
 
+return_code = 0
 
 # exit correctly
 def tesh_exit(errcode):
@@ -114,27 +110,47 @@ except NameError:
 #
 
 # Global variable. Stores which process group should be killed (or None otherwise)
-running_pgids = list()
+running_pids = list()
+
+# Tests whether the process is dead already
+def process_is_dead(pid):
+    try:
+        os.kill(pid, 0)
+    except ProcessLookupError:
+        return True
+    except OSError as err:
+        if err.errno == errno.ESRCH: # ESRCH == No such process. The process is now dead
+            return True
+    return False
 
-def kill_process_group(pgid):
-    if pgid is None:  # Nobody to kill. We don't know who to kill on windows, or we don't have anyone to kill on signal handler
+# This function send TERM signal + KILL signal after 0.2s to the group of the specified process
+def kill_process_group(pid):
+    if pid is None:  # Nobody to kill. We don't know who to kill on windows, or we don't have anyone to kill on signal handler
         return
 
-    # print("Kill process group {}".format(pgid))
+    try:
+        pgid = os.getpgid(pid)
+    except:
+        # os.getpgid failed. Ok, don't cleanup.
+        return
+    
     try:
         os.killpg(pgid, signal.SIGTERM)
+        if process_is_dead(pid):
+            return
+        time.sleep(0.2)
+        os.killpg(pgid, signal.SIGKILL)
     except OSError:
         # os.killpg failed. OK. Some subprocesses may still be running.
         pass
 
-
 def signal_handler(signal, frame):
     print("Caught signal {}".format(SIGNALS_TO_NAMES_DICT[signal]))
-    global running_pgids
-    running_pgids_copy = running_pgids # Just in case of interthread conflicts.
-    for pgid in running_pgids_copy:
-        kill_process_group(pgid)
-    running_pgids.clear()
+    global running_pids
+    running_pids_copy = running_pids # Just in case of interthread conflicts.
+    for pid in running_pids_copy:
+        kill_process_group(pid)
+    running_pids.clear()
     tesh_exit(5)
 
 
@@ -216,7 +232,7 @@ class Cmd(object):
         self.cwd = os.getcwd()
 
         self.ignore_output = False
-        self.expect_return = 0
+        self.expect_return = [0]
 
         self.output_display = False
 
@@ -292,8 +308,8 @@ class Cmd(object):
             vdefault = m.group(2)
             if vname in os.environ:
                 return "$" + vname
-            else:
-                return vdefault
+            return vdefault
+
         self.args = re.sub(r"\${(\w+):=([^}]*)}", replace_perl_variables, self.args)
 
         # replace bash environment variables ($THINGS) to their values
@@ -321,13 +337,15 @@ class Cmd(object):
 
         self.args += TeshState().args_suffix
 
-        print("[" + FileReader().filename + ":" + str(self.linenumber) + "] " + self.args)
+        logs = list()
+        logs.append("[{file}:{number}] {args}".format(file=FileReader().filename,
+            number=self.linenumber, args=self.args))
 
         args = shlex.split(self.args)
-        #print (args)
 
-        global running_pgids
-        local_pgid = None
+        global running_pids
+        local_pid = None
+        global return_code
 
         try:
             preexec_function = None
@@ -341,27 +359,31 @@ class Cmd(object):
                 stderr=subprocess.STDOUT,
                 universal_newlines=True,
                 preexec_fn=preexec_function)
-            try:
-                if not isWindows():
-                    local_pgid = os.getpgid(proc.pid)
-                    running_pgids.append(local_pgid)
-            except OSError:
-                # os.getpgid failed. OK. No cleanup.
-                pass
+            if not isWindows():
+                local_pid = proc.pid
+                running_pids.append(local_pid)
         except PermissionError:
-            print("[" + FileReader().filename + ":" + str(self.linenumber) +
-                  "] Cannot start '" + args[0] + "': The binary is not executable.")
-            print("[" + FileReader().filename + ":" + str(self.linenumber) + "] Current dir: " + os.getcwd())
-            tesh_exit(3)
+            logs.append("[{file}:{number}] Cannot start '{cmd}': The binary is not executable.".format(
+                file=FileReader().filename, number=self.linenumber, cmd=args[0]))
+            logs.append("[{file}:{number}] Current dir: {dir}".format(file=FileReader().filename,
+                number=self.linenumber, dir=os.getcwd()))
+            return_code = max(3, return_code)
+            print('\n'.join(logs))
+            return
         except NotADirectoryError:
-            print("[" + FileReader().filename + ":" + str(self.linenumber) + "] Cannot start '" +
-                  args[0] + "': The path to binary does not exist.")
-            print("[" + FileReader().filename + ":" + str(self.linenumber) + "] Current dir: " + os.getcwd())
-            tesh_exit(3)
+            logs.append("[{file}:{number}] Cannot start '{cmd}': The path to binary does not exist.".format(
+                file=FileReader().filename, number=self.linenumber, cmd=args[0]))
+            logs.append("[{file}:{number}] Current dir: {dir}".format(file=FileReader().filename,
+                number=self.linenumber, dir=os.getcwd()))
+            return_code = max(3, return_code)
+            print('\n'.join(logs))
+            return
         except FileNotFoundError:
-            print("[" + FileReader().filename + ":" + str(self.linenumber) +
-                  "] Cannot start '" + args[0] + "': File not found")
-            tesh_exit(3)
+            logs.append("[{file}:{number}] Cannot start '{cmd}': File not found.".format(
+                file=FileReader().filename, number=self.linenumber, cmd=args[0]))
+            return_code = max(3, return_code)
+            print('\n'.join(logs))
+            return
         except OSError as osE:
             if osE.errno == 8:
                 osE.strerror += "\nOSError: [Errno 8] Executed scripts should start with shebang line (like #!/usr/bin/env sh)"
@@ -370,36 +392,36 @@ class Cmd(object):
         cmdName = FileReader().filename + ":" + str(self.linenumber)
         try:
             (stdout_data, stderr_data) = proc.communicate("\n".join(self.input_pipe), self.timeout)
-            local_pgid = None
+            local_pid = None
             timeout_reached = False
         except subprocess.TimeoutExpired:
             timeout_reached = True
-            print("Test suite `" + FileReader().filename + "': NOK (<" +
-                  cmdName + "> timeout after " + str(self.timeout) + " sec)")
-            running_pgids.remove(local_pgid)
-            kill_process_group(local_pgid)
+            logs.append("Test suite `{file}': NOK (<{cmd}> timeout after {timeout} sec)".format(
+                file=FileReader().filename, cmd=cmdName, timeout=self.timeout))
+            running_pids.remove(local_pid)
+            kill_process_group(local_pid)
             # Try to get the output of the timeout process, to help in debugging.
             try:
                 (stdout_data, stderr_data) = proc.communicate(timeout=1)
             except subprocess.TimeoutExpired:
-                print("[{file}:{number}] Could not retrieve output. Killing the process group failed?".format(
+                logs.append("[{file}:{number}] Could not retrieve output. Killing the process group failed?".format(
                     file=FileReader().filename, number=self.linenumber))
-                tesh_exit(3)
+                return_code = max(3, return_code)
+                print('\n'.join(logs))
+                return
 
         if self.output_display:
-            print(stdout_data)
+            logs.append(str(stdout_data))
 
         # remove text colors
         ansi_escape = re.compile(r'\x1b[^m]*m')
         stdout_data = ansi_escape.sub('', stdout_data)
 
-        #print ((stdout_data, stderr_data))
-
         if self.ignore_output:
-            print("(ignoring the output of <" + cmdName + "> as requested)")
+            logs.append("(ignoring the output of <{cmd}> as requested)".format(cmd=cmdName))
         else:
             stdouta = stdout_data.split("\n")
-            while len(stdouta) > 0 and stdouta[-1] == "":
+            while stdouta and stdouta[-1] == "":
                 del stdouta[-1]
             stdouta = self.remove_ignored_lines(stdouta)
             stdcpy = stdouta[:]
@@ -419,8 +441,8 @@ class Cmd(object):
                     lineterm="",
                     fromfile='expected',
                     tofile='obtained'))
-            if len(diff) > 0:
-                print("Output of <" + cmdName + "> mismatch:")
+            if diff:
+                logs.append("Output of <{cmd}> mismatch:".format(cmd=cmdName))
                 if self.sort >= 0:  # If sorted, truncate the diff output and show the unsorted version
                     difflen = 0
                     for line in diff:
@@ -428,62 +450,70 @@ class Cmd(object):
                             print(line)
                         difflen += 1
                     if difflen > 50:
-                        print("(diff truncated after 50 lines)")
-                    print("Unsorted observed output:\n")
+                        logs.append("(diff truncated after 50 lines)")
+                    logs.append("Unsorted observed output:\n")
                     for line in stdcpy:
-                        print(line)
+                        logs.append(line)
                 else:  # If not sorted, just display the diff
                     for line in diff:
-                        print(line)
+                        logs.append(line)
 
-                print("Test suite `" + FileReader().filename + "': NOK (<" + cmdName + "> output mismatch)")
+                logs.append("Test suite `{file}': NOK (<{cmd}> output mismatch)".format(
+                    file=FileReader().filename, cmd=cmdName))
                 if lock is not None:
                     lock.release()
                 if TeshState().keep:
                     f = open('obtained', 'w')
                     obtained = stdout_data.split("\n")
-                    while len(obtained) > 0 and obtained[-1] == "":
+                    while obtained and obtained[-1] == "":
                         del obtained[-1]
                     obtained = self.remove_ignored_lines(obtained)
                     for line in obtained:
                         f.write("> " + line + "\n")
                     f.close()
-                    print("Obtained output kept as requested: " + os.path.abspath("obtained"))
-                tesh_exit(2)
+                    logs.append("Obtained output kept as requested: {path}".format(path=os.path.abspath("obtained")))
+                return_code = max(2, return_code)
+                print('\n'.join(logs))
+                return
 
         if timeout_reached:
-            tesh_exit(3)
-
-        #print ((proc.returncode, self.expect_return))
+            return_code = max(3, return_code)
+            print('\n'.join(logs))
+            return
 
-        if proc.returncode != self.expect_return:
+        if not proc.returncode in self.expect_return:
             if proc.returncode >= 0:
-                print("Test suite `" + FileReader().filename + "': NOK (<" +
-                      cmdName + "> returned code " + str(proc.returncode) + ")")
+                logs.append("Test suite `{file}': NOK (<{cmd}> returned code {code})".format(
+                    file=FileReader().filename, cmd=cmdName, code=proc.returncode))
                 if lock is not None:
                     lock.release()
-                tesh_exit(2)
+                return_code = max(2, return_code)
+                print('\n'.join(logs))
+                return
             else:
-                print("Test suite `" + FileReader().filename + "': NOK (<" + cmdName +
-                      "> got signal " + SIGNALS_TO_NAMES_DICT[-proc.returncode] + ")")
+                logs.append("Test suite `{file}': NOK (<{cmd}> got signal {sig})".format(
+                    file=FileReader().filename, cmd=cmdName,
+                    sig=SIGNALS_TO_NAMES_DICT[-proc.returncode]))
                 if lock is not None:
                     lock.release()
-                tesh_exit(-proc.returncode)
+                return_code = max(max(-proc.returncode, 1), return_code)
+                print('\n'.join(logs))
+                return
 
         if lock is not None:
             lock.release()
 
+        print('\n'.join(logs))
+
     def can_run(self):
         return self.args is not None
 
-
 ##############
 #
 # Main
 #
 #
 
-
 if __name__ == '__main__':
     signal.signal(signal.SIGINT, signal_handler)
     signal.signal(signal.SIGTERM, signal_handler)
@@ -501,7 +531,7 @@ if __name__ == '__main__':
     group1.add_argument(
         '--ignore-jenkins',
         action='store_true',
-        help='ignore all cruft generated on SimGrid continous integration servers')
+        help='ignore all cruft generated on SimGrid continuous integration servers')
     group1.add_argument('--wrapper', metavar='arg', help='Run each command in the provided wrapper (eg valgrind)')
     group1.add_argument(
         '--keep',
@@ -515,7 +545,7 @@ if __name__ == '__main__':
         os.chdir(options.cd)
 
     if options.ignore_jenkins:
-        print("Ignore all cruft seen on SimGrid's continous integration servers")
+        print("Ignore all cruft seen on SimGrid's continuous integration servers")
         # Note: regexps should match at the beginning of lines
         TeshState().ignore_regexps_common = [
             re.compile(r"profiling:"),
@@ -571,7 +601,7 @@ if __name__ == '__main__':
     line = f.readfullline()
     while line is not None:
         # print(">>============="+line+"==<<")
-        if len(line) == 0:
+        if not line:
             #print ("END CMD block")
             if cmd.run_if_possible():
                 cmd = Cmd()
@@ -610,16 +640,17 @@ if __name__ == '__main__':
             cmd.output_display = True
             cmd.ignore_output = True
         elif line[0:15] == "! expect return":
-            cmd.expect_return = int(line[16:])
+            cmd.expect_return = [int(line[16:])]
             #print("expect return "+str(int(line[16:])))
         elif line[0:15] == "! expect signal":
-            sig = line[16:]
-            # get the signal integer value from the signal module
-            if sig not in signal.__dict__:
-                fatal_error("unrecognized signal '" + sig + "'")
-            sig = int(signal.__dict__[sig])
-            # popen return -signal when a process ends with a signal
-            cmd.expect_return = -sig
+            cmd.expect_return = []
+            for sig in (line[16:]).split("|"):
+                # get the signal integer value from the signal module
+                if sig not in signal.__dict__:
+                    fatal_error("unrecognized signal '" + sig + "'")
+                sig = int(signal.__dict__[sig])
+                # popen return -signal when a process ends with a signal
+                cmd.expect_return.append(-sig)
         elif line[0:len("! timeout ")] == "! timeout ":
             if "no" in line[len("! timeout "):]:
                 cmd.timeout = None
@@ -647,7 +678,10 @@ if __name__ == '__main__':
 
     TeshState().join_all_threads()
 
-    if f.filename == "(stdin)":
-        print("Test suite from stdin OK")
+    if return_code == 0:
+        if f.filename == "(stdin)":
+            print("Test suite from stdin OK")
+        else:
+            print("Test suite `" + f.filename + "' OK")
     else:
-        print("Test suite `" + f.filename + "' OK")
+        tesh_exit(return_code)