Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
tesh: Let a grace period of 1s for the child process to die properly
[simgrid.git] / src / smpi / smpirun.in
1 #! /bin/sh
2
3 # Copyright (c) 2007-2015. The SimGrid Team.
4 # All rights reserved.
5
6 # This program is free software; you can redistribute it and/or modify it
7 # under the terms of the license (GNU LGPL) which comes with this package.
8
9 @CMAKE_SMPI_COMMAND@
10
11 SIMGRID_VERSION="@SIMGRID_VERSION_STRING@"
12 SIMGRID_GITHASH="@SIMGRID_GITHASH@"
13
14 DEFAULT_LOOPBACK_BANDWIDTH="498000000"
15 DEFAULT_LOOPBACK_LATENCY="0.000004"
16 DEFAULT_NETWORK_BANDWIDTH="$((26 * 1024 * 1024))"
17 DEFAULT_NETWORK_LATENCY="0.000005"
18 DEFAULT_NUMPROCS="4"
19 DEFAULT_POWER="100"
20
21 LOOPBACK_BANDWIDTH="${DEFAULT_LOOPBACK_BANDWIDTH}"
22 LOOPBACK_LATENCY="${DEFAULT_LOOPBACK_LATENCY}"
23 NETWORK_BANDWIDTH="${DEFAULT_NETWORK_BANDWIDTH}"
24 NETWORK_LATENCY="${DEFAULT_NETWORK_LATENCY}"
25 POWER="${DEFAULT_POWER}"
26
27 SIMOPTS="--cfg=surf/precision:1e-9 --cfg=network/model:SMPI --cfg=network/TCP_gamma:4194304"
28
29 #usage to print the way this script should be called
30 usage () {
31     cat <<EOF
32 Usage: $0 [OPTIONS] -platform <xmldesc> -hostfile <hostfile> program [program-options]
33 Options:
34   -keep-temps                # don't remove the generated files after execution
35   -wrapper <command>         # use command to run the program (e.g. "valgrind")
36   -map                       # display the machine on which each process rank is mapped
37   -np <numprocs>             # use that amount of processes from the hostfile.
38                              # By default, all processes of the hostfile are used.
39   -trace-ti                  # activate time independant tracing (for replay, default in smpi_simgrid.txt)
40   -trace                     # activate tracing (Paje, default in smpi_simgrid.trace)
41   -trace-comment <comment>   # put a comment on the top of the trace file
42   -trace-comment-file <file> # put file contents on the top of the trace file as comment
43   -trace-grouped             # group MPI processes by location
44   -trace-resource            # trace resource utilization
45   -trace-viva                # generate configuration for Viva's GraphView
46   -trace-file <tracefile>    # name of the tracefile (simgrid_smpi.trace)
47   -ext <value>               # additional parameter (reserved)
48   
49   -version                   # Displays the SimGrid version (human readable)
50   -git-version               # Displays the git hash of SimGrid
51
52 or (deprecated usage):
53   $0 [-keep-temps] [-np <numprocs>] [-bandwidth <bytes/sec>] [-latency <secs>] program [program-options]
54
55 EOF
56 }
57
58 #check if we have at least one parameter
59 if [ $# -eq 0 ]
60 then
61     usage
62     exit
63 fi
64
65 EXTOPT=""
66 WRAPPER=""
67 HOSTFILE=""
68
69 while true; do
70     case "$1" in
71         "-np" | "-n")
72             NUMPROCS="$2"
73             shift 2
74             ;;
75         "-bandwidth")
76             NETWORK_BANDWIDTH="$2"
77             shift 2
78             ;;
79         "-latency")
80             NETWORK_LATENCY="$2"
81             shift 2
82             ;;
83         "-platform")
84             PLATFORM="$2"
85             if [ ! -f "${PLATFORM}" ]; then
86                 echo "[`basename $0`] ** error: the file '${PLATFORM}' does not exist. Aborting."
87                 exit 1
88             fi
89             shift 2
90             ;;
91         "-hostfile")
92             HOSTFILE="$2"
93             if [ ! -f "${HOSTFILE}" ]; then
94                 echo "[`basename $0`] ** error: the file '${HOSTFILE}' does not exist. Aborting."
95                 exit 1
96             fi
97             shift 2
98             ;;
99
100         "-machinefile")
101             HOSTFILE="$2"
102             if [ ! -f "${HOSTFILE}" ]; then
103                 echo "[`basename $0`] ** error: the file '${HOSTFILE}' does not exist. Aborting."
104                 exit 1
105             fi
106             shift 2
107             ;;
108
109         "-ext")
110             EXTOPT="$2"
111             shift 2
112             ;;
113
114         "-map")
115             MAPOPT="true"
116             shift 1
117             ;;
118
119         "-trace")
120             TRACE_ACTIVE="true"
121             shift 1
122             ;;
123
124         "-trace-ti")
125             TRACE_ACTIVE="true"
126             TRACE_TI_ACTIVE="true"
127             shift 1
128             ;;
129
130         "-trace-comment")
131             TRACE_COMMENT="$2"
132             shift 2
133             ;;
134
135         "-trace-comment-file")
136             TRACE_COMMENT_FILE="$2"
137             shift 2
138             ;;
139
140         "-trace-file")
141             TRACE_FILENAME="$2"
142             shift 2
143             ;;
144
145         "-trace-grouped")
146             TRACE_GROUPED="true"
147             shift 1
148             ;;
149
150         "-trace-resource")
151             TRACE_RESOURCE="true"
152             shift 1
153             ;;
154
155         "-trace-viva")
156             TRACE_VIVA="true"
157             shift 1
158             ;;
159
160         "-keep-temps")
161             KEEP="true"
162             shift 1
163             ;;
164
165         "-wrapper")
166             WRAPPER="$2"
167             shift 2
168             ;;
169
170         "-help" | "--help" | "-h")
171             usage
172             exit 0
173             ;;
174
175         "-version" | "--version" | "-v")
176             printf '%b\n' "$SIMGRID_VERSION"
177             exit 0
178             ;;
179             
180         "-git-version" | "--git-version")
181             printf '%b\n' "$SIMGRID_GITHASH"
182             exit 0
183             ;;
184
185         "--cfg="*|"--log="*)
186             for OPT in ${1#*=}
187             do
188                 SIMOPTS="$SIMOPTS ${1%%=*}=$OPT"
189             done
190             shift 1
191             ;;
192         *)
193             break
194             ;;
195     esac
196 done
197
198 if [ -n "$WRAPPER" ]; then
199     EXEC="$WRAPPER $1"
200 else
201     EXEC="$1"
202 fi
203 shift
204
205 # steel --cfg and --logs options
206 while [ $# -gt 0 ]; do
207     case "$1" in
208         "--cfg="*|"--log="*)
209             for OPT in ${1#*=}
210             do
211                 SIMOPTS="$SIMOPTS ${1%%=*}=$OPT"
212             done
213             shift 1
214             ;;
215         *)
216             PROC_ARGS="${PROC_ARGS:+$PROC_ARGS }$1"
217             shift      
218             ;;
219     esac
220 done
221
222
223 ##-----------------------------------
224
225
226 if [ -z "${HOSTFILE}" ] && [ -z "${PLATFORM}" ] ; then
227     echo "No hostfile nor platform specified."
228     usage
229     exit 1
230 fi
231
232 HOSTFILETMP=0
233 if [ -z "${HOSTFILE}" ] ; then
234     HOSTFILETMP=1
235     HOSTFILE="$(mktemp tmphostXXXXXX)"
236     perl -ne 'print "$1\n" if /.*<host.*?id="(.*?)".*?\/>.*/' ${PLATFORM} > ${HOSTFILE}
237     perl -ne 'if (/.*<cluster.*?prefix="(.*?)".*?radical="(.*?)".*?suffix="(.*?)".*/) { 
238                 my ($pre,$rad,$post)=($1,$2,$3); 
239                 for my $elm (split(",",$rad)) { 
240                   if ($elm=~/^([^-]*?)-([^-]*)$/) { 
241                      for (my $i=$1; $i<=$2;$i++) { 
242                         print "$pre$i$post\n"; 
243                      }
244                   } else {
245                      print "$pre$elm$post\n";
246                   }
247                 }
248               } elsif (/<cluster/) {
249                 die ("Unparsable cluster tag. Either provide an hostfile yourself or give the attributes prefix, radical and suffix in that order on the <cluster line");
250               }' ${PLATFORM} >> ${HOSTFILE}
251 fi
252 UNROLLEDHOSTFILETMP=0
253
254 #parse if our lines are terminated by :num_process
255 multiple_processes=`grep -c ":" $HOSTFILE`
256 if [ "${multiple_processes}" -gt 0 ] ; then
257     UNROLLEDHOSTFILETMP=1
258     UNROLLEDHOSTFILE="$(mktemp tmphostXXXXXX)"
259     perl -ne ' do{ for ( 1 .. $2 ) { print "$1\n" } } if /(.*?):(\d+).*/'  ${HOSTFILE}  > ${UNROLLEDHOSTFILE}
260     if [ ${HOSTFILETMP} = 1 ] ; then
261         rm ${HOSTFILE}
262         HOSTFILETMP=0
263     fi
264     HOSTFILE=$UNROLLEDHOSTFILE
265 fi
266
267
268 # Don't use wc -l to compute it to avoid issues with trailing \n at EOF
269 hostfile_procs=`grep -c "[a-zA-Z0-9]" $HOSTFILE`
270 if [ ${hostfile_procs} = 0 ] ; then
271    echo "[`basename $0`] ** error: the hostfile '${HOSTFILE}' is empty. Aborting." >&2
272    exit 1
273 fi
274
275 if [ -z "${NUMPROCS}" ] ; then
276     # Use the amount of processes in the hostfile as default value for the -np parameter
277     NUMPROCS=$hostfile_procs
278 fi
279
280
281 if [ ${NUMPROCS} -gt ${hostfile_procs} ] ; then
282     echo "You requested to use ${NUMPROCS} processes, but there is only ${hostfile_procs} processes in your hostfile..." >&2
283 fi
284
285 ##-------------------------------- DEFAULT or SPECIFIED PLATFORM --------------------------------------
286 if [ -z "${PLATFORM}" ]; then
287     PLATFORMTMP="$(mktemp tmpXXXXXX)"
288
289     cat > ${PLATFORMTMP} <<PLATFORMHEAD
290 <?xml version='1.0'?>
291 <!DOCTYPE platform SYSTEM "http://simgrid.gforge.inria.fr/simgrid.dtd">
292 <platform version="3">
293 <AS id="AS0" routing="Full">
294 PLATFORMHEAD
295
296     i=${NUMPROCS}
297     while [ $i -gt 0 ]; do
298         echo "  <host id=\"host$i\" power=\"${POWER}\"/>" >> ${PLATFORMTMP}
299         echo "  <link id=\"loop$i\" bandwidth=\"${LOOPBACK_BANDWIDTH}\" latency=\"${LOOPBACK_LATENCY}\"/>" >> ${PLATFORMTMP}
300         echo "  <link id=\"link$i\" bandwidth=\"${NETWORK_BANDWIDTH}\" latency=\"${NETWORK_LATENCY}\"/>" >> ${PLATFORMTMP}
301         i=$((i - 1))
302     done
303
304     i=${NUMPROCS}
305     while [ $i -gt 0 ]; do
306         j=${NUMPROCS}
307         while [ $j -gt 0 ]; do
308             if [ $i -eq $j ]; then
309                 echo "  <route src=\"host$i\" dst=\"host$j\"><link_ctn id=\"loop$i\"/></route>" >> ${PLATFORMTMP}
310             else
311                 echo "  <route src=\"host$i\" dst=\"host$j\"><link_ctn id=\"link$i\"/><link_ctn id=\"link$j\"/></route>" >> ${PLATFORMTMP}
312             fi
313             j=$((j - 1))
314         done
315         i=$((i - 1))
316     done
317
318     cat >> ${PLATFORMTMP} <<PLATFORMFOOT
319 </AS>
320 </platform>
321 PLATFORMFOOT
322
323 else
324     PLATFORMTMP=${PLATFORM}
325 fi
326 ##-------------------------------- end DEFAULT or SPECIFIED PLATFORM --------------------------------------
327
328 ##-------------------------------- DEFAULT APPLICATION --------------------------------------
329 APPLICATIONTMP="$(mktemp tmpXXXXXX)"
330 #APPLICATIONTMP="app.xml"
331
332
333 cat > ${APPLICATIONTMP} <<APPLICATIONHEAD
334 <?xml version='1.0'?>
335 <!DOCTYPE platform SYSTEM "http://simgrid.gforge.inria.fr/simgrid.dtd">
336 <platform version="3">
337 APPLICATIONHEAD
338
339 ##---- cache hostnames of hostfile---------------
340 if [ -n "${HOSTFILE}" ] && [ -f ${HOSTFILE} ]; then
341     hostnames=$(cat ${HOSTFILE} | tr '\n\r' '  ')
342     NUMHOSTS=$(cat ${HOSTFILE} | wc -l)
343 fi
344
345 if [ "${EXTOPT}" = "smpi_replay" ]; then
346     APP_TRACES=$PROC_ARGS
347     if [ -n "${APP_TRACES}" ] && [ -f "${APP_TRACES}" ]; then
348         hosttraces=$(cat ${APP_TRACES} | tr '\n\r' '  ' )
349         NUMTRACES=$(cat ${APP_TRACES} | wc -l)
350     else
351         printf "File not found: %s\n", "${APP_TRACES:-\${APP_TRACES\}}" >&2
352         exit 1
353     fi
354 fi
355
356 ##----------------------------------------------------------
357 ##  generate application.xml with hostnames from hostfile:
358 ##  the name of host_i (1<=i<=p, where -np p) is the line i
359 ##  in hostfile (where -hostfile hostfile), or "host$i" if
360 ##  hostfile has less than i lines.
361 ##----------------------------------------------------------
362
363 HAVE_SEQ="`which seq 2>/dev/null`"
364
365 if [ -n "${HAVE_SEQ}" ]; then
366     SEQ=`${HAVE_SEQ} 0 $((${NUMPROCS}-1))`
367 else
368     cnt=0
369     while [ $cnt -lt ${NUMPROCS} ] ; do
370         SEQ="$SEQ $cnt"
371         cnt=$((cnt + 1));
372     done
373 fi
374
375 ##---- generate <process> tags------------------------------
376
377 for i in ${SEQ}
378 do
379     if [ -n "${HOSTFILE}" ]; then
380         j=$(( $i % ${NUMHOSTS} + 1 ))
381     fi
382     ##---- optional display of ranks to process mapping
383     if [ -n "${MAPOPT}" ]; then
384         echo "[rank $i] -> $(echo $hostnames|cut -d' ' -f$j)"
385     fi
386
387     if [ -z "$(echo $hostnames|cut -d' ' -f$j)" ]; then
388         host="host"$($j)
389     else
390         host="$(echo $hostnames|cut -d' ' -f$j)"
391     fi
392     echo "  <process host=\"${host}\" function=\"$i\"> <!-- function name used only for logging -->" >> ${APPLICATIONTMP}
393     echo "    <argument value=\"1\"/> <!-- instance -->" >> ${APPLICATIONTMP}
394     echo "    <argument value=\"$i\"/> <!-- rank -->" >> ${APPLICATIONTMP}
395     if [ "${EXTOPT}" = "smpi_replay" ]; then
396         if  [ ${NUMTRACES} -gt 1 ]; then
397             echo "    <argument value=\"$(echo $hosttraces|cut -d' ' -f$j)\"/>" >> ${APPLICATIONTMP}
398         else
399             echo "    <argument value=\"$(echo $hosttraces|cut -d' ' -f1)\"/>" >> ${APPLICATIONTMP}
400         fi
401     else 
402         for ARG in $PROC_ARGS; do
403             echo "    <argument value=\"${ARG}\"/>" >> ${APPLICATIONTMP}
404         done
405     fi
406     echo "  </process>" >> ${APPLICATIONTMP}
407 done
408
409 cat >> ${APPLICATIONTMP} <<APPLICATIONFOOT
410 </platform>
411 APPLICATIONFOOT
412 ##-------------------------------- end DEFAULT APPLICATION --------------------------------------
413
414 ##---------------------- SMPI TRACING OPTIONS ---------------------------------
415 if [ -n "${TRACE_ACTIVE}" ]; then
416     #define trace filename
417     if [ -n "${TRACE_TI_ACTIVE}" ]; then
418         if [ -z "${TRACE_FILENAME}" ]; then
419             TRACE_FILENAME="smpi_simgrid.txt"
420         fi
421         TRACEOPTIONS="--cfg=tracing:yes --cfg=tracing/filename:${TRACE_FILENAME} --cfg=tracing/smpi:yes --cfg=tracing/smpi/format:TI --cfg=tracing/smpi/computing:yes"
422     else
423         if [ -z "${TRACE_FILENAME}" ]; then
424             TRACE_FILENAME="smpi_simgrid.trace"
425         fi
426         TRACEOPTIONS="--cfg=tracing:yes --cfg=tracing/filename:${TRACE_FILENAME} --cfg=tracing/smpi:yes"
427     fi
428
429     if [ -n "${TRACE_COMMENT}" ]; then
430         TRACEOPTIONS="${TRACEOPTIONS} --cfg=tracing/comment:${TRACE_COMMENT}"
431     fi
432
433     if [ -n "${TRACE_COMMENT_FILE}" ]; then
434         TRACEOPTIONS="${TRACEOPTIONS} --cfg=tracing/comment_file:${TRACE_COMMENT_FILE}"
435     fi
436
437     if [ -n "${TRACE_GROUPED}" ]; then
438         TRACEOPTIONS="${TRACEOPTIONS} --cfg=tracing/smpi/group:yes"
439     fi
440
441     if [ -n "${TRACE_RESOURCE}" ]; then
442         TRACEOPTIONS="${TRACEOPTIONS} --cfg=tracing/categorized:yes --cfg=tracing/uncategorized:yes"
443     fi
444
445     if [ -n "${TRACE_VIVA}" ]; then
446         TRACEOPTIONS="${TRACEOPTIONS} --cfg=viva/categorized:smpi_cat.plist --cfg=viva/uncategorized:smpi_uncat.plist"
447     fi
448 fi
449 ##---------------------- end SMPI TRACING OPTIONS ---------------------------------
450
451 export SMPI_GLOBAL_SIZE=${NUMPROCS}
452 if [ -n "${KEEP}" ] ; then
453     echo ${EXEC} ${TRACEOPTIONS} ${SIMOPTS} ${PLATFORMTMP} ${APPLICATIONTMP}
454     if [ ${HOSTFILETMP} = 1 ] ; then
455         echo "Generated hostfile ${HOSTFILE} keeped."
456     fi
457     if [ ${UNROLLEDHOSTFILETMP} = 1 ] ; then
458         echo "Generated unrolled hostfile ${UNROLLEDHOSTFILE} keeped." 
459     fi
460 fi
461 ${EXEC} ${TRACEOPTIONS} ${SIMOPTS} ${PLATFORMTMP} ${APPLICATIONTMP}
462 status=$?
463
464 if [ -z "${KEEP}" ] ; then
465     if [ -z "${PLATFORM}" ]; then
466         rm ${PLATFORMTMP}
467     fi
468     if [ ${HOSTFILETMP} = 1 ] ; then
469         rm ${HOSTFILE}
470     fi
471     if [ ${UNROLLEDHOSTFILETMP} = 1 ] ; then
472         rm ${UNROLLEDHOSTFILE}
473     fi
474     rm ${APPLICATIONTMP}
475 fi
476
477 exit $status