From 82a824eaf401ea21fa8f4e111ae0e6f4879ef10c Mon Sep 17 00:00:00 2001 From: Gabriel Corona Date: Mon, 24 Aug 2015 13:57:53 +0200 Subject: [PATCH] smpirun: cleanup files and children before dying This avoid letting a lot of temp files and/or running processes after one's death. This used to happen a lot when the the smpirun process was killed by tesh on timeout (filling the CI server with a lot of garbage). --- src/smpi/smpirun.in | 57 ++++++++++++++++++++++++++++++++++----------- tools/tesh/tesh.pl | 2 +- 2 files changed, 44 insertions(+), 15 deletions(-) diff --git a/src/smpi/smpirun.in b/src/smpi/smpirun.in index 812acdbdfc..e9e2b431ec 100755 --- a/src/smpi/smpirun.in +++ b/src/smpi/smpirun.in @@ -65,6 +65,44 @@ fi EXTOPT="" WRAPPER="" HOSTFILE="" +HOSTFILETMP=0 + +unset pid + +smpirun_cleanup() +{ + if [ -z "${KEEP}" ] ; then + if [ -z "${PLATFORM}" -a -n "$PLATFORMTMP" ]; then + rm -f ${PLATFORMTMP} + PLATFORMTMP="" + fi + if [ ${HOSTFILETMP} = 1 -a -n "$HOSTFILE" ] ; then + rm -f ${HOSTFILE} + HOSTFILE="" + fi + if [ ${UNROLLEDHOSTFILETMP} = 1 -a -n "$UNROLLEDHOSTFILE" ] ; then + rm -f ${UNROLLEDHOSTFILE} + UNROLLEDHOSTFILE="" + fi + if [ -n ${APPLICATIONTMP} ]; then + rm -f ${APPLICATIONTMP} + APPLICATIONTMP="" + fi + fi +} + +smpirun_trap() { + smpirun_cleanup + if ! [ -z "$pid" ]; then + kill -TERM $pid + fi + unset pid + trap - HUP INT QUIT ILL ABRT SEGV FPE ALRM TERM USR1 USR2 BUS + kill -TERM $$ + exit 128 +} + +trap smpirun_trap HUP INT QUIT ILL ABRT SEGV FPE ALRM TERM USR1 USR2 BUS while true; do case "$1" in @@ -229,7 +267,6 @@ if [ -z "${HOSTFILE}" ] && [ -z "${PLATFORM}" ] ; then exit 1 fi -HOSTFILETMP=0 if [ -z "${HOSTFILE}" ] ; then HOSTFILETMP=1 HOSTFILE="$(mktemp tmphostXXXXXX)" @@ -458,20 +495,12 @@ if [ -n "${KEEP}" ] ; then echo "Generated unrolled hostfile ${UNROLLEDHOSTFILE} keeped." fi fi -${EXEC} ${TRACEOPTIONS} ${SIMOPTS} ${PLATFORMTMP} ${APPLICATIONTMP} +${EXEC} ${TRACEOPTIONS} ${SIMOPTS} ${PLATFORMTMP} ${APPLICATIONTMP} & +pid=$! +wait $pid status=$? +pid="" -if [ -z "${KEEP}" ] ; then - if [ -z "${PLATFORM}" ]; then - rm ${PLATFORMTMP} - fi - if [ ${HOSTFILETMP} = 1 ] ; then - rm ${HOSTFILE} - fi - if [ ${UNROLLEDHOSTFILETMP} = 1 ] ; then - rm ${UNROLLEDHOSTFILE} - fi - rm ${APPLICATIONTMP} -fi +smpirun_cleanup exit $status diff --git a/tools/tesh/tesh.pl b/tools/tesh/tesh.pl index 6acee9a38d..2dd685b593 100755 --- a/tools/tesh/tesh.pl +++ b/tools/tesh/tesh.pl @@ -365,7 +365,7 @@ sub parse_out { # Did we timeout ? If yes, handle it. If not, kill the forked process. - if($timeout==-1 and $gotret eq "got signal SIGKILL"){ + if($timeout==-1 and ($gotret eq "got signal SIGTERM" or $gotret eq "got signal SIGKILL")){ $gotret="return code 0"; $timeout=1; $gotret= "timeout after $time_to_wait sec"; -- 2.20.1