Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
smpirun: cleanup files and children before dying
authorGabriel Corona <gabriel.corona@loria.fr>
Mon, 24 Aug 2015 11:57:53 +0000 (13:57 +0200)
committerGabriel Corona <gabriel.corona@loria.fr>
Mon, 24 Aug 2015 12:52:40 +0000 (14:52 +0200)
This avoid letting a lot of temp files and/or running processes after
one's death.

This used to happen a lot when the the smpirun process was killed by
tesh on timeout (filling the CI server with a lot of garbage).

src/smpi/smpirun.in
tools/tesh/tesh.pl

index 812acdb..e9e2b43 100755 (executable)
@@ -65,6 +65,44 @@ fi
 EXTOPT=""
 WRAPPER=""
 HOSTFILE=""
 EXTOPT=""
 WRAPPER=""
 HOSTFILE=""
+HOSTFILETMP=0
+
+unset pid
+
+smpirun_cleanup()
+{
+  if [ -z "${KEEP}" ] ; then
+      if [ -z "${PLATFORM}" -a -n "$PLATFORMTMP" ]; then
+        rm -f ${PLATFORMTMP}
+        PLATFORMTMP=""
+      fi
+      if [ ${HOSTFILETMP} = 1 -a -n "$HOSTFILE" ] ; then
+          rm -f ${HOSTFILE}
+          HOSTFILE=""
+      fi
+      if [ ${UNROLLEDHOSTFILETMP} = 1 -a -n "$UNROLLEDHOSTFILE" ] ; then
+          rm -f ${UNROLLEDHOSTFILE}
+          UNROLLEDHOSTFILE=""
+      fi
+      if [ -n ${APPLICATIONTMP} ]; then
+        rm -f ${APPLICATIONTMP}
+        APPLICATIONTMP=""
+      fi
+  fi
+}
+
+smpirun_trap() {
+  smpirun_cleanup
+  if ! [ -z "$pid" ]; then
+    kill -TERM $pid
+  fi
+  unset pid
+  trap - HUP INT QUIT ILL ABRT SEGV FPE ALRM TERM USR1 USR2 BUS
+  kill -TERM $$
+  exit 128
+}
+
+trap smpirun_trap HUP INT QUIT ILL ABRT SEGV FPE ALRM TERM USR1 USR2 BUS
 
 while true; do
     case "$1" in
 
 while true; do
     case "$1" in
@@ -229,7 +267,6 @@ if [ -z "${HOSTFILE}" ] && [ -z "${PLATFORM}" ] ; then
     exit 1
 fi
 
     exit 1
 fi
 
-HOSTFILETMP=0
 if [ -z "${HOSTFILE}" ] ; then
     HOSTFILETMP=1
     HOSTFILE="$(mktemp tmphostXXXXXX)"
 if [ -z "${HOSTFILE}" ] ; then
     HOSTFILETMP=1
     HOSTFILE="$(mktemp tmphostXXXXXX)"
@@ -458,20 +495,12 @@ if [ -n "${KEEP}" ] ; then
         echo "Generated unrolled hostfile ${UNROLLEDHOSTFILE} keeped." 
     fi
 fi
         echo "Generated unrolled hostfile ${UNROLLEDHOSTFILE} keeped." 
     fi
 fi
-${EXEC} ${TRACEOPTIONS} ${SIMOPTS} ${PLATFORMTMP} ${APPLICATIONTMP}
+${EXEC} ${TRACEOPTIONS} ${SIMOPTS} ${PLATFORMTMP} ${APPLICATIONTMP} &
+pid=$!
+wait $pid
 status=$?
 status=$?
+pid=""
 
 
-if [ -z "${KEEP}" ] ; then
-    if [ -z "${PLATFORM}" ]; then
-       rm ${PLATFORMTMP}
-    fi
-    if [ ${HOSTFILETMP} = 1 ] ; then
-        rm ${HOSTFILE}
-    fi
-    if [ ${UNROLLEDHOSTFILETMP} = 1 ] ; then
-        rm ${UNROLLEDHOSTFILE}
-    fi
-    rm ${APPLICATIONTMP}
-fi
+smpirun_cleanup
 
 exit $status
 
 exit $status
index 6acee9a..2dd685b 100755 (executable)
@@ -365,7 +365,7 @@ sub parse_out {
 
   # Did we timeout ? If yes, handle it. If not, kill the forked process.
 
 
   # Did we timeout ? If yes, handle it. If not, kill the forked process.
 
-  if($timeout==-1 and $gotret eq "got signal SIGKILL"){
+  if($timeout==-1 and ($gotret eq "got signal SIGTERM" or $gotret eq "got signal SIGKILL")){
     $gotret="return code 0";
     $timeout=1;
     $gotret= "timeout after $time_to_wait sec";
     $gotret="return code 0";
     $timeout=1;
     $gotret= "timeout after $time_to_wait sec";