Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
kill a useless function
[simgrid.git] / src / smpi / smpirun.in
1 #! /bin/sh
2
3 # Copyright (c) 2007-2016, The SimGrid Team. All rights reserved.
4
5 # This program is free software; you can redistribute it and/or modify it
6 # under the terms of the license (GNU LGPL) which comes with this package.
7
8 @CMAKE_SMPI_COMMAND@
9
10 SIMGRID_VERSION="@SIMGRID_VERSION_STRING@"
11 SIMGRID_GITHASH="@SIMGRID_GITHASH@"
12
13 DEFAULT_LOOPBACK_BANDWIDTH="498000000Bps"
14 DEFAULT_LOOPBACK_LATENCY="0.000004s"
15 DEFAULT_NETWORK_BANDWIDTH="$((26 * 1024 * 1024))Bps"
16 DEFAULT_NETWORK_LATENCY="0.000005s"
17 DEFAULT_NUMPROCS="4"
18 DEFAULT_SPEED="100flops"
19
20 LOOPBACK_BANDWIDTH="${DEFAULT_LOOPBACK_BANDWIDTH}"
21 LOOPBACK_LATENCY="${DEFAULT_LOOPBACK_LATENCY}"
22 NETWORK_BANDWIDTH="${DEFAULT_NETWORK_BANDWIDTH}"
23 NETWORK_LATENCY="${DEFAULT_NETWORK_LATENCY}"
24 SPEED="${DEFAULT_SPEED}"
25
26 PRIVATIZE="--cfg=smpi/privatize-global-variables:yes"
27
28 SIMOPTS="--cfg=surf/precision:1e-9 --cfg=network/model:SMPI --cfg=network/TCP-gamma:4194304"
29
30 #usage to print the way this script should be called
31 usage () {
32     cat <<EOF
33 Usage: $0 [OPTIONS] -platform <xmldesc> -hostfile <hostfile> program [program-options]
34 Options:
35   -keep-temps                # don't remove the generated files after execution
36   -wrapper <command>         # use command to run the program (e.g. "valgrind")
37   -map                       # display the machine on which each process rank is mapped
38   -np <numprocs>             # use that amount of processes from the hostfile.
39                              # By default, all processes of the hostfile are used.
40   -no-privatize              # Disable the globals privatization, that is activated by default
41   -trace-ti                  # activate time independant tracing (for replay, default in smpi_simgrid.txt)
42   -trace                     # activate tracing (Paje, default in smpi_simgrid.trace)
43   -trace-comment <comment>   # put a comment on the top of the trace file
44   -trace-comment-file <file> # put file contents on the top of the trace file as comment
45   -trace-grouped             # group MPI processes by location
46   -trace-resource            # trace resource utilization
47   -trace-viva                # generate configuration for Viva's GraphView
48   -trace-file <tracefile>    # name of the tracefile (simgrid_smpi.trace)
49   -ext <value>               # additional parameter (reserved)
50
51   -version                   # Displays the SimGrid version (human readable)
52   -git-version               # Displays the git hash of SimGrid
53
54 or (deprecated usage):
55   $0 [-keep-temps] [-np <numprocs>] [-bandwidth <bytes/sec>] [-latency <secs>] program [program-options]
56
57 EOF
58 }
59
60 #check if we have at least one parameter
61 if [ $# -eq 0 ]
62 then
63     usage
64     exit
65 fi
66
67 EXTOPT=""
68 WRAPPER=""
69 HOSTFILE=""
70 HOSTFILETMP=0
71
72 unset pid
73
74 trapped_signals="HUP INT QUIT ILL ABRT SEGV FPE ALRM TERM USR1 USR2 BUS"
75
76 smpirun_cleanup()
77 {
78   if [ -z "${KEEP}" ] ; then
79       if [ -z "${PLATFORM}" -a -n "$PLATFORMTMP" ]; then
80         rm -f ${PLATFORMTMP}
81         PLATFORMTMP=""
82       fi
83       if [ ${HOSTFILETMP} = 1 -a -n "$HOSTFILE" ] ; then
84           rm -f ${HOSTFILE}
85           HOSTFILE=""
86       fi
87       if [ ${UNROLLEDHOSTFILETMP} = 1 -a -n "$UNROLLEDHOSTFILE" ] ; then
88           rm -f ${UNROLLEDHOSTFILE}
89           UNROLLEDHOSTFILE=""
90       fi
91       if [ -n ${APPLICATIONTMP} ]; then
92         rm -f ${APPLICATIONTMP}
93         APPLICATIONTMP=""
94       fi
95   fi
96 }
97
98 smpirun_trap() {
99   local sig
100   sig="$1"
101
102   # Cleanup and kill the child process:
103   smpirun_cleanup
104   if ! [ -z "$pid" ]; then
105     kill -TERM $pid
106   fi
107   unset pid
108
109   # Raise the same signal again (remove the traps first):
110   trap - $trapped_signals
111   kill -$sig $$
112
113   # This should never happen:
114   kill -ABRT $$
115   kill -TERM $$
116 }
117
118 for s in $trapped_signals; do
119   trap "smpirun_trap $s" $s
120 done
121
122 while true; do
123     case "$1" in
124         "-np" | "-n")
125             NUMPROCS="$2"
126             shift 2
127             ;;
128         "-bandwidth")
129             NETWORK_BANDWIDTH="$2"
130             shift 2
131             ;;
132         "-latency")
133             NETWORK_LATENCY="$2"
134             shift 2
135             ;;
136         "-platform")
137             PLATFORM="$2"
138             if [ ! -f "${PLATFORM}" ]; then
139                 echo "[`basename $0`] ** error: the file '${PLATFORM}' does not exist. Aborting."
140                 exit 1
141             fi
142             shift 2
143             ;;
144         "-hostfile")
145             HOSTFILE="$2"
146             if [ ! -f "${HOSTFILE}" ]; then
147                 echo "[`basename $0`] ** error: the file '${HOSTFILE}' does not exist. Aborting."
148                 exit 1
149             fi
150             shift 2
151             ;;
152         "-machinefile")
153             HOSTFILE="$2"
154             if [ ! -f "${HOSTFILE}" ]; then
155                 echo "[`basename $0`] ** error: the file '${HOSTFILE}' does not exist. Aborting."
156                 exit 1
157             fi
158             shift 2
159             ;;
160         "-ext")
161             EXTOPT="$2"
162             shift 2
163             ;;
164         "-no-privatize")
165             PRIVATIZE=""
166             shift 1
167             ;;
168         "-map")
169             MAPOPT="true"
170             shift 1
171             ;;
172         "-trace")
173             TRACE_ACTIVE="true"
174             shift 1
175             ;;
176         "-trace-ti")
177             TRACE_ACTIVE="true"
178             TRACE_TI_ACTIVE="true"
179             shift 1
180             ;;
181         "-trace-comment")
182             TRACE_COMMENT="$2"
183             shift 2
184             ;;
185         "-trace-comment-file")
186             TRACE_COMMENT_FILE="$2"
187             shift 2
188             ;;
189         "-trace-file")
190             TRACE_FILENAME="$2"
191             shift 2
192             ;;
193         "-trace-grouped")
194             TRACE_GROUPED="true"
195             shift 1
196             ;;
197         "-trace-resource")
198             TRACE_RESOURCE="true"
199             shift 1
200             ;;
201         "-trace-viva")
202             TRACE_VIVA="true"
203             shift 1
204             ;;
205         "-keep-temps")
206             KEEP="true"
207             shift 1
208             ;;
209         "-wrapper")
210             WRAPPER="$2"
211             shift 2
212             ;;
213         "-help" | "--help" | "-h")
214             usage
215             exit 0
216             ;;
217         "-version" | "--version" | "-v")
218             printf '%b\n' "$SIMGRID_VERSION"
219             exit 0
220             ;;
221         "-git-version" | "--git-version")
222             printf '%b\n' "$SIMGRID_GITHASH"
223             exit 0
224             ;;
225         "--cfg="*|"--log="*)
226             for OPT in ${1#*=}
227             do
228                 SIMOPTS="$SIMOPTS ${1%%=*}=$OPT"
229             done
230             shift 1
231             ;;
232         "-foreground")
233             # Nothing to do, compatibility.
234             shift 1
235             ;;
236         *)
237             break
238             ;;
239     esac
240 done
241
242 if [ -n "$WRAPPER" ]; then
243     EXEC="$WRAPPER $1"
244 else
245     EXEC="$1"
246 fi
247 shift
248
249 # steel --cfg and --logs options
250 while [ $# -gt 0 ]; do
251     case "$1" in
252         "--cfg="*|"--log="*)
253             for OPT in ${1#*=}
254             do
255                 SIMOPTS="$SIMOPTS ${1%%=*}=$OPT"
256             done
257             shift 1
258             ;;
259         *)
260             PROC_ARGS="${PROC_ARGS:+$PROC_ARGS }$1"
261             shift      
262             ;;
263     esac
264 done
265
266 if [ -z "${HOSTFILE}" ] && [ -z "${PLATFORM}" ] ; then
267     echo "No hostfile nor platform specified."
268     usage
269     exit 1
270 fi
271
272 if [ -z "${HOSTFILE}" ] ; then
273     HOSTFILETMP=1
274     HOSTFILE="$(mktemp smpitmp-hostfXXXXXX)"
275     perl -ne 'print "$1\n" if /.*<host.*?id="(.*?)".*?\/>.*/' ${PLATFORM} > ${HOSTFILE}
276     perl -ne 'if (/.*<cluster.*?prefix="(.*?)".*?radical="(.*?)".*?suffix="(.*?)".*/) { 
277                 my ($pre,$rad,$post)=($1,$2,$3); 
278                 for my $elm (split(",",$rad)) { 
279                   if ($elm=~/^([^-]*?)-([^-]*)$/) { 
280                      for (my $i=$1; $i<=$2;$i++) { 
281                         print "$pre$i$post\n"; 
282                      }
283                   } else {
284                      print "$pre$elm$post\n";
285                   }
286                 }
287               } elsif (/<cluster/) {
288             die ("Unparsable cluster tag. smpirun uses a primitive regular expression to parse cluster tags. Either provide a hostfile yourself or give the attributes prefix, radical and suffix IN THAT ORDER and ON THE SAME LINE as the opening tag (<cluster)");
289               }' ${PLATFORM} >> ${HOSTFILE}
290 fi
291 UNROLLEDHOSTFILETMP=0
292
293 #parse if our lines are terminated by :num_process
294 multiple_processes=`grep -c ":" $HOSTFILE`
295 if [ "${multiple_processes}" -gt 0 ] ; then
296     UNROLLEDHOSTFILETMP=1
297     UNROLLEDHOSTFILE="$(mktemp smpitmp-hostfXXXXXX)"
298     perl -ne ' do{ for ( 1 .. $2 ) { print "$1\n" } } if /(.*?):(\d+).*/'  ${HOSTFILE}  > ${UNROLLEDHOSTFILE}
299     if [ ${HOSTFILETMP} = 1 ] ; then
300         rm ${HOSTFILE}
301         HOSTFILETMP=0
302     fi
303     HOSTFILE=$UNROLLEDHOSTFILE
304 fi
305
306 # Don't use wc -l to compute it to avoid issues with trailing \n at EOF
307 hostfile_procs=`grep -c "[a-zA-Z0-9]" $HOSTFILE`
308 if [ ${hostfile_procs} = 0 ] ; then
309    echo "[`basename $0`] ** error: the hostfile '${HOSTFILE}' is empty. Aborting." >&2
310    exit 1
311 fi
312
313 if [ -z "${NUMPROCS}" ] ; then
314     # Use the amount of processes in the hostfile as default value for the -np parameter
315     NUMPROCS=$hostfile_procs
316 fi
317
318 if [ ${NUMPROCS} -gt ${hostfile_procs} ] ; then
319     echo "You requested to use ${NUMPROCS} processes, but there is only ${hostfile_procs} processes in your hostfile..." >&2
320 fi
321
322 ##-------------------------------- DEFAULT or SPECIFIED PLATFORM --------------------------------------
323 if [ -z "${PLATFORM}" ]; then
324     PLATFORMTMP="$(mktemp smpitmp-platfXXXXXX)"
325
326     cat > ${PLATFORMTMP} <<PLATFORMHEAD
327 <?xml version='1.0'?>
328 <!DOCTYPE platform SYSTEM "http://simgrid.gforge.inria.fr/simgrid/simgrid.dtd">
329 <platform version="4">
330 <AS id="AS0" routing="Full">
331 PLATFORMHEAD
332
333     i=${NUMPROCS}
334     while [ $i -gt 0 ]; do
335         echo "  <host id=\"host$i\" speed=\"${SPEED}\"/>" >> ${PLATFORMTMP}
336         echo "  <link id=\"loop$i\" bandwidth=\"${LOOPBACK_BANDWIDTH}\" latency=\"${LOOPBACK_LATENCY}\"/>" >> ${PLATFORMTMP}
337         echo "  <link id=\"link$i\" bandwidth=\"${NETWORK_BANDWIDTH}\" latency=\"${NETWORK_LATENCY}\"/>" >> ${PLATFORMTMP}
338         i=$((i - 1))
339     done
340
341     i=${NUMPROCS}
342     while [ $i -gt 0 ]; do
343         j=${NUMPROCS}
344         while [ $j -gt 0 ]; do
345             if [ $i -eq $j ]; then
346                 echo "  <route src=\"host$i\" dst=\"host$j\"><link_ctn id=\"loop$i\"/></route>" >> ${PLATFORMTMP}
347             else
348                 echo "  <route src=\"host$i\" dst=\"host$j\"><link_ctn id=\"link$i\"/><link_ctn id=\"link$j\"/></route>" >> ${PLATFORMTMP}
349             fi
350             j=$((j - 1))
351         done
352         i=$((i - 1))
353     done
354
355     cat >> ${PLATFORMTMP} <<PLATFORMFOOT
356 </AS>
357 </platform>
358 PLATFORMFOOT
359
360 else
361     PLATFORMTMP=${PLATFORM}
362 fi
363 ##-------------------------------- end DEFAULT or SPECIFIED PLATFORM --------------------------------------
364 ##-------------------------------- DEFAULT APPLICATION --------------------------------------
365 APPLICATIONTMP="$(mktemp smpitmp-appXXXXXX)"
366 #APPLICATIONTMP="app.xml"
367
368 cat > ${APPLICATIONTMP} <<APPLICATIONHEAD
369 <?xml version='1.0'?>
370 <!DOCTYPE platform SYSTEM "http://simgrid.gforge.inria.fr/simgrid/simgrid.dtd">
371 <platform version="4">
372 APPLICATIONHEAD
373
374 ##---- cache hostnames of hostfile---------------
375 if [ -n "${HOSTFILE}" ] && [ -f ${HOSTFILE} ]; then
376     hostnames=$(cat ${HOSTFILE} | tr '\n\r' '  ')
377     NUMHOSTS=$(cat ${HOSTFILE} | wc -l)
378 fi
379
380 if [ "${EXTOPT}" = "smpi_replay" ]; then
381     APP_TRACES=$PROC_ARGS
382     if [ -n "${APP_TRACES}" ] && [ -f "${APP_TRACES}" ]; then
383         hosttraces=$(cat ${APP_TRACES} | tr '\n\r' '  ' )
384         NUMTRACES=$(cat ${APP_TRACES} | wc -l)
385     else
386         printf "File not found: %s\n" "${APP_TRACES:-\${APP_TRACES\}}" >&2
387         exit 1
388     fi
389 fi
390
391 ##----------------------------------------------------------
392 ##  generate application.xml with hostnames from hostfile:
393 ##  the name of host_i (1<=i<=p, where -np p) is the line i in hostfile (where -hostfile hostfile), or "host$i" if 
394 ##  hostfile has less than i lines.
395 ##----------------------------------------------------------
396
397 HAVE_SEQ="`which seq 2>/dev/null`"
398
399 if [ -n "${HAVE_SEQ}" ]; then
400     SEQ=`${HAVE_SEQ} 0 $((${NUMPROCS}-1))`
401 else
402     cnt=0
403     while [ $cnt -lt ${NUMPROCS} ] ; do
404         SEQ="$SEQ $cnt"
405         cnt=$((cnt + 1));
406     done
407 fi
408
409 ##---- generate <process> tags------------------------------
410 for i in ${SEQ}
411 do
412     if [ -n "${HOSTFILE}" ]; then
413         j=$(( $i % ${NUMHOSTS} + 1 ))
414     fi
415     ##---- optional display of ranks to process mapping
416     if [ -n "${MAPOPT}" ]; then
417         echo "[rank $i] -> $(echo $hostnames|cut -d' ' -f$j)"
418     fi
419
420     if [ -z "$(echo $hostnames|cut -d' ' -f$j)" ]; then
421         host="host"$($j)
422     else
423         host="$(echo $hostnames|cut -d' ' -f$j)"
424     fi
425     echo "  <process host=\"${host}\" function=\"$i\"> <!-- function name used only for logging -->" >> ${APPLICATIONTMP}
426     echo "    <argument value=\"1\"/> <!-- instance -->" >> ${APPLICATIONTMP}
427     echo "    <argument value=\"$i\"/> <!-- rank -->" >> ${APPLICATIONTMP}
428     if [ "${EXTOPT}" = "smpi_replay" ]; then
429         if  [ ${NUMTRACES} -gt 1 ]; then
430             echo "    <argument value=\"$(echo $hosttraces|cut -d' ' -f$j)\"/>" >> ${APPLICATIONTMP}
431         else
432             echo "    <argument value=\"$(echo $hosttraces|cut -d' ' -f1)\"/>" >> ${APPLICATIONTMP}
433         fi
434     else 
435         for ARG in $PROC_ARGS; do
436             echo "    <argument value=\"${ARG}\"/>" >> ${APPLICATIONTMP}
437         done
438     fi
439     echo "  </process>" >> ${APPLICATIONTMP}
440 done
441
442 cat >> ${APPLICATIONTMP} <<APPLICATIONFOOT
443 </platform>
444 APPLICATIONFOOT
445 ##-------------------------------- end DEFAULT APPLICATION --------------------------------------
446 ##---------------------- SMPI TRACING OPTIONS ---------------------------------
447 if [ -n "${TRACE_ACTIVE}" ]; then
448     #define trace filename
449     if [ -n "${TRACE_TI_ACTIVE}" ]; then
450         if [ -z "${TRACE_FILENAME}" ]; then
451             TRACE_FILENAME="smpi_simgrid.txt"
452         fi
453         TRACEOPTIONS="--cfg=tracing:yes --cfg=tracing/filename:${TRACE_FILENAME} --cfg=tracing/smpi:yes --cfg=tracing/smpi/format:TI --cfg=tracing/smpi/computing:yes"
454     else
455         if [ -z "${TRACE_FILENAME}" ]; then
456             TRACE_FILENAME="smpi_simgrid.trace"
457         fi
458         TRACEOPTIONS="--cfg=tracing:yes --cfg=tracing/filename:${TRACE_FILENAME} --cfg=tracing/smpi:yes"
459     fi
460
461     if [ -n "${TRACE_COMMENT}" ]; then
462         TRACEOPTIONS="${TRACEOPTIONS} --cfg=tracing/comment:${TRACE_COMMENT}"
463     fi
464
465     if [ -n "${TRACE_COMMENT_FILE}" ]; then
466         TRACEOPTIONS="${TRACEOPTIONS} --cfg=tracing/comment-file:${TRACE_COMMENT_FILE}"
467     fi
468
469     if [ -n "${TRACE_GROUPED}" ]; then
470         TRACEOPTIONS="${TRACEOPTIONS} --cfg=tracing/smpi/group:yes"
471     fi
472
473     if [ -n "${TRACE_RESOURCE}" ]; then
474         TRACEOPTIONS="${TRACEOPTIONS} --cfg=tracing/categorized:yes --cfg=tracing/uncategorized:yes"
475     fi
476
477     if [ -n "${TRACE_VIVA}" ]; then
478         TRACEOPTIONS="${TRACEOPTIONS} --cfg=viva/categorized:smpi_cat.plist --cfg=viva/uncategorized:smpi_uncat.plist"
479     fi
480 fi
481 ##---------------------- end SMPI TRACING OPTIONS ---------------------------------
482
483 export SMPI_GLOBAL_SIZE=${NUMPROCS}
484 if [ -n "${KEEP}" ] ; then
485     echo ${EXEC} ${TRACEOPTIONS} ${SIMOPTS} ${PRIVATIZE} ${PLATFORMTMP} ${APPLICATIONTMP}
486     if [ ${HOSTFILETMP} = 1 ] ; then
487         echo "Generated hostfile ${HOSTFILE} kept."
488     fi
489     if [ ${UNROLLEDHOSTFILETMP} = 1 ] ; then
490         echo "Generated unrolled hostfile ${UNROLLEDHOSTFILE} kept." 
491     fi
492 fi
493
494 # Execute the process
495 #
496 # The shell still need to be alive for the duration in order to do some cleanup after the process.
497 #
498 # We are going through great lengths in order to both keep stdin and be able to handle signals:
499 #
500 # * The job is launched in the background in order to be able to handle signals.
501 #
502 # * The FD 3 is used to temporarily store FD 1. This is because the shell connects FD 1 to /dev/null when the command
503 #   is launched in the background: this can be overriden in bash but not in standard bourne shell.
504 exec 3<&0
505 ${EXEC} ${TRACEOPTIONS} ${SIMOPTS} ${PLATFORMTMP} ${APPLICATIONTMP} <&3 3>&- &
506 pid=$!
507 exec 3>&-
508 wait $pid
509 status=$?
510 pid=""
511
512 smpirun_cleanup
513
514 exit $status