Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
[tesh] bad cmd result: abort -> store retcode
[simgrid.git] / tools / tesh / tesh.py
index aa1f65e..8972bbc 100755 (executable)
@@ -71,6 +71,7 @@ class Singleton(_Singleton('SingletonMeta', (object,), {})):
 SIGNALS_TO_NAMES_DICT = dict((getattr(signal, n), n)
                              for n in dir(signal) if n.startswith('SIG') and '_' not in n)
 
+return_code = 0
 
 # exit correctly
 def tesh_exit(errcode):
@@ -114,8 +115,7 @@ except NameError:
 #
 
 # Global variable. Stores which process group should be killed (or None otherwise)
-pgtokill = None
-
+running_pgids = list()
 
 def kill_process_group(pgid):
     if pgid is None:  # Nobody to kill. We don't know who to kill on windows, or we don't have anyone to kill on signal handler
@@ -131,8 +131,11 @@ def kill_process_group(pgid):
 
 def signal_handler(signal, frame):
     print("Caught signal {}".format(SIGNALS_TO_NAMES_DICT[signal]))
-    if pgtokill is not None:
-        kill_process_group(pgtokill)
+    global running_pgids
+    running_pgids_copy = running_pgids # Just in case of interthread conflicts.
+    for pgid in running_pgids_copy:
+        kill_process_group(pgid)
+    running_pgids.clear()
     tesh_exit(5)
 
 
@@ -324,9 +327,14 @@ class Cmd(object):
         args = shlex.split(self.args)
         #print (args)
 
-        global pgtokill
+        global running_pgids
+        local_pgid = None
+        global return_code
 
         try:
+            preexec_function = None
+            if not isWindows():
+                preexec_function = lambda: os.setpgid(0, 0)
             proc = subprocess.Popen(
                 args,
                 bufsize=1,
@@ -334,10 +342,11 @@ class Cmd(object):
                 stdout=subprocess.PIPE,
                 stderr=subprocess.STDOUT,
                 universal_newlines=True,
-                start_new_session=True)
+                preexec_fn=preexec_function)
             try:
                 if not isWindows():
-                    pgtokill = os.getpgid(proc.pid)
+                    local_pgid = os.getpgid(proc.pid)
+                    running_pgids.append(local_pgid)
             except OSError:
                 # os.getpgid failed. OK. No cleanup.
                 pass
@@ -345,16 +354,19 @@ class Cmd(object):
             print("[" + FileReader().filename + ":" + str(self.linenumber) +
                   "] Cannot start '" + args[0] + "': The binary is not executable.")
             print("[" + FileReader().filename + ":" + str(self.linenumber) + "] Current dir: " + os.getcwd())
-            tesh_exit(3)
+            return_code = max(3, return_code)
+            return
         except NotADirectoryError:
             print("[" + FileReader().filename + ":" + str(self.linenumber) + "] Cannot start '" +
                   args[0] + "': The path to binary does not exist.")
             print("[" + FileReader().filename + ":" + str(self.linenumber) + "] Current dir: " + os.getcwd())
-            tesh_exit(3)
+            return_code = max(3, return_code)
+            return
         except FileNotFoundError:
             print("[" + FileReader().filename + ":" + str(self.linenumber) +
                   "] Cannot start '" + args[0] + "': File not found")
-            tesh_exit(3)
+            return_code = max(3, return_code)
+            return
         except OSError as osE:
             if osE.errno == 8:
                 osE.strerror += "\nOSError: [Errno 8] Executed scripts should start with shebang line (like #!/usr/bin/env sh)"
@@ -363,12 +375,22 @@ class Cmd(object):
         cmdName = FileReader().filename + ":" + str(self.linenumber)
         try:
             (stdout_data, stderr_data) = proc.communicate("\n".join(self.input_pipe), self.timeout)
-            pgtokill = None
+            local_pgid = None
+            timeout_reached = False
         except subprocess.TimeoutExpired:
+            timeout_reached = True
             print("Test suite `" + FileReader().filename + "': NOK (<" +
                   cmdName + "> timeout after " + str(self.timeout) + " sec)")
-            kill_process_group(pgtokill)
-            tesh_exit(3)
+            running_pgids.remove(local_pgid)
+            kill_process_group(local_pgid)
+            # Try to get the output of the timeout process, to help in debugging.
+            try:
+                (stdout_data, stderr_data) = proc.communicate(timeout=1)
+            except subprocess.TimeoutExpired:
+                print("[{file}:{number}] Could not retrieve output. Killing the process group failed?".format(
+                    file=FileReader().filename, number=self.linenumber))
+                return_code = max(3, return_code)
+                return
 
         if self.output_display:
             print(stdout_data)
@@ -433,7 +455,12 @@ class Cmd(object):
                         f.write("> " + line + "\n")
                     f.close()
                     print("Obtained output kept as requested: " + os.path.abspath("obtained"))
-                tesh_exit(2)
+                return_code = max(2, return_code)
+                return
+
+        if timeout_reached:
+            return_code = max(3, return_code)
+            return
 
         #print ((proc.returncode, self.expect_return))
 
@@ -443,13 +470,15 @@ class Cmd(object):
                       cmdName + "> returned code " + str(proc.returncode) + ")")
                 if lock is not None:
                     lock.release()
-                tesh_exit(2)
+                return_code = max(2, return_code)
+                return
             else:
                 print("Test suite `" + FileReader().filename + "': NOK (<" + cmdName +
                       "> got signal " + SIGNALS_TO_NAMES_DICT[-proc.returncode] + ")")
                 if lock is not None:
                     lock.release()
-                tesh_exit(-proc.returncode)
+                return_code = max(max(-proc.returncode, 1), return_code)
+                return
 
         if lock is not None:
             lock.release()
@@ -628,7 +657,10 @@ if __name__ == '__main__':
 
     TeshState().join_all_threads()
 
-    if f.filename == "(stdin)":
-        print("Test suite from stdin OK")
+    if return_code == 0:
+        if f.filename == "(stdin)":
+            print("Test suite from stdin OK")
+        else:
+            print("Test suite `" + f.filename + "' OK")
     else:
-        print("Test suite `" + f.filename + "' OK")
+        tesh_exit(return_code)