Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
tesh: try harder to not loose the cwd between threads
[simgrid.git] / tools / tesh / tesh.py
index a8d52eb..5ba29b8 100755 (executable)
@@ -5,7 +5,7 @@
 tesh -- testing shell
 ========================
 
-Copyright (c) 2012-2017. The SimGrid Team. All rights reserved.
+Copyright (c) 2012-2018. The SimGrid Team. All rights reserved.
 
 This program is free software; you can redistribute it and/or modify it
 under the terms of the license (GNU LGPL) which comes with this package.
@@ -39,14 +39,14 @@ if sys.version_info[0] == 3:
 else:
     raise "This program is expected to run with Python3 only"
 
-
-
 ##############
 #
 # Utilities
 #
 #
 
+def isWindows():
+    return sys.platform.startswith('win')
 
 # Singleton metaclass that works in Python 2 & 3
 # http://stackoverflow.com/questions/6760685/creating-a-singleton-in-python
@@ -98,6 +98,33 @@ except NameError:
     #py2
     FileNotFoundError = OSError
 
+##############
+#
+# Cleanup on signal
+#
+#
+
+# Global variable. Stores which process group should be killed (or None otherwise)
+pgtokill = None
+
+def kill_process_group(pgid):
+    if pgid is None: # Nobody to kill. We don't know who to kill on windows, or we don't have anyone to kill on signal handler
+        return
+
+    # print("Kill process group {}".format(pgid))
+    try:
+        os.killpg(pgid, signal.SIGTERM)
+    except OSError:
+        # os.killpg failed. OK. Some subprocesses may still be running.
+        pass
+
+def signal_handler(signal, frame):
+    print("Caught signal {}".format(SIGNALS_TO_NAMES_DICT[signal]))
+    if pgtokill is not None:
+        kill_process_group(pgtokill)
+    tesh_exit(5)
+
+
 
 ##############
 #
@@ -161,7 +188,7 @@ class TeshState(Singleton):
             t.acquire()
             t.release()
 
-#Command line object
+# Command line object
 class Cmd(object):
     def __init__(self):
         self.input_pipe = []
@@ -172,7 +199,8 @@ class Cmd(object):
         self.linenumber = -1
 
         self.background = False
-        self.cwd = None
+        # Python threads loose the cwd
+        self.cwd = os.getcwd()
 
         self.ignore_output = False
         self.expect_return = 0
@@ -232,8 +260,6 @@ class Cmd(object):
     def run_if_possible(self):
         if self.can_run():
             if self.background:
-                #Python threads loose the cwd
-                self.cwd = os.getcwd()
                 lock = _thread.allocate_lock()
                 lock.acquire()
                 TeshState().add_thread(lock)
@@ -246,10 +272,8 @@ class Cmd(object):
 
 
     def _run(self, lock=None):
-        #Python threads loose the cwd
-        if self.cwd is not None:
-            os.chdir(self.cwd)
-            self.cwd = None
+        # Python threads loose the cwd
+        os.chdir(self.cwd)
 
         #retrocompatibility: support ${aaa:=.} variable format
         def replace_perl_variables(m):
@@ -289,8 +313,24 @@ class Cmd(object):
         args = shlex.split(self.args)
         #print (args)
 
+        global pgtokill
+
         try:
-            proc = subprocess.Popen(args, bufsize=1, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True, preexec_fn=os.setsid)
+            proc = subprocess.Popen(args, bufsize=1, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True, start_new_session=True)
+            try:
+                if not isWindows():
+                    pgtokill = os.getpgid(proc.pid)
+            except OSError:
+                # os.getpgid failed. OK. No cleanup.
+                pass
+        except PermissionError:
+            print("["+FileReader().filename+":"+str(self.linenumber)+"] Cannot start '"+args[0]+"': The binary is not executable.")
+            print("["+FileReader().filename+":"+str(self.linenumber)+"] Current dir: "+os.getcwd())
+            tesh_exit(3)            
+        except NotADirectoryError:
+            print("["+FileReader().filename+":"+str(self.linenumber)+"] Cannot start '"+args[0]+"': The path to binary does not exist.")
+            print("["+FileReader().filename+":"+str(self.linenumber)+"] Current dir: "+os.getcwd())
+            tesh_exit(3)
         except FileNotFoundError:
             print("["+FileReader().filename+":"+str(self.linenumber)+"] Cannot start '"+args[0]+"': File not found")
             tesh_exit(3)
@@ -302,9 +342,10 @@ class Cmd(object):
         cmdName = FileReader().filename+":"+str(self.linenumber)
         try:
             (stdout_data, stderr_data) = proc.communicate("\n".join(self.input_pipe), self.timeout)
+            pgtokill = None
         except subprocess.TimeoutExpired:
             print("Test suite `"+FileReader().filename+"': NOK (<"+cmdName+"> timeout after "+str(self.timeout)+" sec)")
-            os.killpg(os.getpgid(proc.pid), signal.SIGKILL)
+            kill_process_group(pgtokill)
             tesh_exit(3)
 
         if self.output_display:
@@ -396,6 +437,8 @@ class Cmd(object):
 
 
 if __name__ == '__main__':
+    signal.signal(signal.SIGINT, signal_handler)
+    signal.signal(signal.SIGTERM, signal_handler)
 
     parser = argparse.ArgumentParser(description='tesh -- testing shell', add_help=True)
     group1 = parser.add_argument_group('Options')
@@ -421,18 +464,20 @@ if __name__ == '__main__':
         print("Ignore all cruft seen on SimGrid's continous integration servers")
         # Note: regexps should match at the beginning of lines
         TeshState().ignore_regexps_common = [
-           re.compile("profiling:"),
-           re.compile("Unable to clean temporary file C:"),
-           re.compile(".*Configuration change: Set \'contexts/"),
-           re.compile("Picked up JAVA_TOOL_OPTIONS: "),
-           re.compile("Picked up _JAVA_OPTIONS: "),
-           re.compile("==[0-9]+== ?WARNING: ASan doesn\'t fully support"),
-           re.compile("==[0-9]+== ?WARNING: ASan is ignoring requested __asan_handle_no_return: stack top:"),
-           re.compile("False positive error reports may follow"),
-           re.compile("For details see http://code.google.com/p/address-sanitizer/issues/detail\\?id=189"),
-           re.compile("For details see https://github.com/google/sanitizers/issues/189"),
-           re.compile("Python runtime initialized with LC_CTYPE=C .*"),
-           re.compile("cmake: /usr/local/lib/libcurl.so.4: no version information available (required by cmake)"), # Seen on CircleCI
+           re.compile(r"profiling:"),
+           re.compile(r"Unable to clean temporary file C:"),
+           re.compile(r".*Configuration change: Set 'contexts/"),
+           re.compile(r"Picked up JAVA_TOOL_OPTIONS: "),
+           re.compile(r"Picked up _JAVA_OPTIONS: "),
+           re.compile(r"==[0-9]+== ?WARNING: ASan doesn't fully support"),
+           re.compile(r"==[0-9]+== ?WARNING: ASan is ignoring requested __asan_handle_no_return: stack top:"),
+           re.compile(r"False positive error reports may follow"),
+           re.compile(r"For details see http://code.google.com/p/address-sanitizer/issues/detail\?id=189"),
+           re.compile(r"For details see https://github.com/google/sanitizers/issues/189"),
+           re.compile(r"Python runtime initialized with LC_CTYPE=C .*"),
+           re.compile(r"cmake: /usr/local/lib/libcurl\.so\.4: no version information available \(required by cmake\)"), # Seen on CircleCI
+           re.compile(r".*mmap broken on FreeBSD, but dlopen\+thread broken too. Switching to dlopen\+raw contexts\."),
+           re.compile(r".*dlopen\+thread broken on Apple and BSD\. Switching to raw contexts\."),
            ]
         TeshState().jenkins = True # This is a Jenkins build