{
return simgrid::smpi::app::universe_size;
}
+
+/** @brief Auxiliary method to get list of hosts to deploy app */
+static std::vector<simgrid::s4u::Host*> smpi_get_hosts(simgrid::s4u::Engine* e, const std::string& hostfile)
+{
+ if (hostfile == "") {
+ return e->get_all_hosts();
+ }
+ std::vector<simgrid::s4u::Host*> hosts;
+ std::ifstream in(hostfile.c_str());
+ xbt_assert(in, "smpirun: Cannot open the host file: %s", hostfile.c_str());
+ std::string str;
+ while (std::getline(in, str)) {
+ if (str.size() > 0)
+ hosts.emplace_back(e->host_by_name(str));
+ }
+ xbt_assert(hosts.size(), "smpirun: the hostfile '%s' is empty", hostfile.c_str());
+ return hosts;
+}
+
+/** @brief Read replay configuration from file */
+static std::vector<std::string> smpi_read_replay(const std::string& replayfile)
+{
+ std::vector<std::string> replay;
+ if (replayfile == "")
+ return replay;
+
+ std::ifstream in(replayfile.c_str());
+ xbt_assert(in, "smpirun: Cannot open the replay file: %s", replayfile.c_str());
+ std::string str;
+ while (std::getline(in, str)) {
+ if (str.size() > 0)
+ replay.emplace_back(str);
+ }
+
+ return replay;
+}
+
+/** @brief Build argument vector to pass to process */
+static std::vector<std::string> smpi_deployment_get_args(int rank_id, const std::vector<std::string>& replay, int argc,
+ char* argv[])
+{
+ std::vector<std::string> args{std::to_string(rank_id)};
+ // pass arguments to process only if not a replay execution
+ if (replay.size() == 0) {
+ for (int i = 0; i < argc; i++) {
+ args.push_back(argv[i]);
+ }
+ }
+ /* one trace per process */
+ if (replay.size() > 1) {
+ args.push_back(replay[rank_id]);
+ }
+ return args;
+}
+
+/**
+ * @brief Deploy an SMPI application from a smpirun call
+ *
+ * This used to be done at smpirun script, parsing either the hostfile or the platform XML.
+ * If hostfile isn't provided, get the list of hosts from engine.
+ */
+int smpi_deployment_smpirun(simgrid::s4u::Engine* e, const std::string& hostfile, int np, const std::string& replayfile,
+ int map, int argc, char* argv[])
+{
+ auto hosts = smpi_get_hosts(e, hostfile);
+ auto replay = smpi_read_replay(replayfile);
+ int hosts_size = static_cast<int>(hosts.size());
+ if (np == 0)
+ np = hosts_size;
+
+ xbt_assert(np > 0, "Invalid number of process (np must be > 0). Check your np parameter, platform or hostfile");
+
+ if (np > hosts_size) {
+ printf("You requested to use %d ranks, but there is only %d processes in your hostfile...\n", np, hosts_size);
+ }
+
+ for (int i = 0; i < np; i++) {
+ simgrid::s4u::Host* host = hosts[i % hosts_size];
+ std::string rank_id = std::to_string(i);
+ auto args = smpi_deployment_get_args(i, replay, argc, argv);
+ auto actor = simgrid::s4u::Actor::create(rank_id, host, rank_id, args);
+ /* keeping the same behavior as done in smpirun script, print mapping rank/process */
+ if (map != 0) {
+ printf("[rank %d] -> %s\n", i, host->get_cname());
+ }
+ actor->set_property("instance_id", "smpirun");
+ actor->set_property("rank", rank_id);
+ if (replay.size() > 0)
+ actor->set_property("smpi_replay", "true");
+ /* shared trace file, set it to rank 0 */
+ if (i == 0 && replay.size() == 1)
+ actor->set_property("tracefile", replay[0]);
+ }
+ return np;
+}
\ No newline at end of file
static simgrid::config::Flag<double> smpi_init_sleep(
"smpi/init", "Time to inject inside a call to MPI_Init", 0.0);
+static simgrid::config::Flag<std::string>
+ smpi_hostfile("smpi/hostfile",
+ "Classical MPI hostfile containing list of machines to dispatch "
+ "the processes, one per line",
+ "");
+
+static simgrid::config::Flag<std::string> smpi_replay("smpi/replay",
+ "Replay a trace instead of executing the application", "");
+
+static simgrid::config::Flag<int> smpi_np("smpi/np", "Number of processes to be created", 0);
+
+static simgrid::config::Flag<int> smpi_map("smpi/map", "Display the mapping between nodes and processes", 0);
+
void (*smpi_comm_copy_data_callback)(simgrid::kernel::activity::CommImpl*, void*,
size_t) = &smpi_comm_copy_buffer_callback;
SMPI_init();
- /* This is a ... heavy way to count the MPI ranks */
- int rank_counts = 0;
- simgrid::s4u::Actor::on_creation.connect([&rank_counts](const simgrid::s4u::Actor& actor) {
- if (not actor.is_daemon())
- rank_counts++;
- });
- engine->load_deployment(argv[2]);
+ int rank_counts = smpi_deployment_smpirun(engine, smpi_hostfile.get(), smpi_np.get(), smpi_replay.get(),
+ smpi_map.get(), argc - 2, argv + 2);
SMPI_app_instance_register(smpi_default_instance_name.c_str(), nullptr, rank_counts);
MPI_COMM_WORLD = *smpi_deployment_comm_world(smpi_default_instance_name);
PRIVATIZE="--cfg=smpi/privatization:${SMPI_PRIVATIZATION:-@HAVE_PRIVATIZATION@}"
+NUMPROCS=0
+DEPLOYOPTS=""
+
SIMOPTS="--cfg=surf/precision:1e-9 --cfg=network/model:SMPI"
SMPITMPDIR="$(dirname $(mktemp -u))"
WRAPPER=""
HOSTFILE=""
-HOSTFILETMP=0
-MAPOPT=0
-REPLAY=0
QUIET=""
unset pid
rm -f "${PLATFORMTMP}"
PLATFORMTMP=""
fi
- if [ ${HOSTFILETMP} = 1 ] && [ -n "$HOSTFILE" ] ; then
- rm -f "${HOSTFILE}"
- HOSTFILE=""
- fi
if [ "${UNROLLEDHOSTFILETMP}" = 1 ] && [ -n "$UNROLLEDHOSTFILE" ] ; then
rm -f "${UNROLLEDHOSTFILE}"
UNROLLEDHOSTFILE=""
fi
- if [ -n "${APPLICATIONTMP}" ]; then
- rm -f "${APPLICATIONTMP}"
- APPLICATIONTMP=""
- fi
fi
}
;;
"-replay")
APP_TRACES="$2"
+ if [ ! -f "${APP_TRACES}" ]; then
+ die "the file '${APP_TRACES}' does not exist"
+ fi
+ DEPLOYOPTS="${DEPLOYOPTS} --cfg=smpi/replay:${APP_TRACES}"
shift 2
;;
"-no-privatize")
shift 1
;;
"-map")
- MAPOPT=1
+ DEPLOYOPTS="${DEPLOYOPTS} --cfg=smpi/map:1"
shift 1
;;
"-tmpdir")
exit 1
fi
-if [ -z "${HOSTFILE}" ] ; then
- HOSTFILETMP=1
- HOSTFILE="$(mktemp smpitmp-hostfXXXXXX)"
- @PYTHON_EXECUTABLE@ -c '
-import xml.etree.ElementTree as ET
-import sys
-import re
-
-tree = ET.parse(sys.stdin)
-
-for elem in tree.findall(".//host"):
- print(elem.attrib["id"])
-
-for elem in tree.findall(".//cluster"):
- prefix = elem.attrib["prefix"]
- radical = elem.attrib["radical"]
- suffix = elem.attrib["suffix"]
- for r in radical.split(","):
- m = re.match("^([^-]*?)-([^-]*)$", r)
- if m:
- for i in range(int(m.group(1)), int(m.group(2))):
- print(prefix + str(i) + suffix)
- else:
- print(prefix + r + suffix)
- ' < "${PLATFORM}" > "${HOSTFILE}"
-fi
UNROLLEDHOSTFILETMP=0
# parse if our lines are terminated by :num_process
-if grep -q ':' "$HOSTFILE" ; then
+if [ -n "${HOSTFILE}" ] && grep -q ':' "${HOSTFILE}" ; then
UNROLLEDHOSTFILETMP=1
UNROLLEDHOSTFILE="$(mktemp smpitmp-hostfXXXXXX)"
@PYTHON_EXECUTABLE@ -c '
else:
print(line.strip())
' < "${HOSTFILE}" > "${UNROLLEDHOSTFILE}"
- if [ ${HOSTFILETMP} = 1 ] ; then
- rm "${HOSTFILE}"
- HOSTFILETMP=0
- fi
HOSTFILE=$UNROLLEDHOSTFILE
fi
-# Don't use wc -l to compute it to avoid issues with trailing \n at EOF
-hostfile_procs=$(grep -c "[a-zA-Z0-9]" "$HOSTFILE")
-if [ "${hostfile_procs}" = 0 ] ; then
- die "the hostfile '${HOSTFILE}' is empty"
-fi
-
-if [ -z "${NUMPROCS}" ] ; then
- # Use the amount of processes in the hostfile as default value for the -np parameter
- NUMPROCS=$hostfile_procs
-fi
+DEPLOYOPTS="${DEPLOYOPTS} --cfg=smpi/np:${NUMPROCS}"
+DEPLOYOPTS="${DEPLOYOPTS} --cfg=smpi/hostfile:${HOSTFILE}"
-if [ "${NUMPROCS}" -gt "${hostfile_procs}" ] ; then
- echo "You requested to use ${NUMPROCS} ranks, but there is only ${hostfile_procs} processes in your hostfile..." >&2
-fi
+#if [ "${NUMPROCS}" -gt "${hostfile_procs}" ] ; then
+# echo "You requested to use ${NUMPROCS} ranks, but there is only ${hostfile_procs} processes in your hostfile..." >&2
+#fi
##-------------------------------- DEFAULT or SPECIFIED PLATFORM --------------------------------------
if [ -z "${PLATFORM}" ]; then
PLATFORMTMP=${PLATFORM}
fi
##-------------------------------- end DEFAULT or SPECIFIED PLATFORM --------------------------------------
-##-------------------------------- DEFAULT APPLICATION --------------------------------------
-APPLICATIONTMP="$(mktemp smpitmp-appXXXXXX)"
-#APPLICATIONTMP="app.xml"
-
-cat > "${APPLICATIONTMP}" <<APPLICATIONHEAD
-<?xml version='1.0'?>
-<!DOCTYPE platform SYSTEM "https://simgrid.org/simgrid.dtd">
-<platform version="4.1">
-APPLICATIONHEAD
-
-##---- cache hostnames of hostfile---------------
-if [ -n "${HOSTFILE}" ] && [ -f "${HOSTFILE}" ]; then
- hostnames=$(< "${HOSTFILE}" tr '\n\r' ' ')
-fi
-
-if [ -n "${APP_TRACES}" ]; then
- if [ -f "${APP_TRACES}" ]; then
- hosttraces=$(< "${APP_TRACES}" tr '\n\r' ' ' )
- NUMTRACES=$(< "${APP_TRACES}" wc -l)
- REPLAY=1
- else
- printf "File not found: %s\n" "${APP_TRACES:-\${APP_TRACES\}}" >&2
- exit 1
- fi
-fi
-
-##----------------------------------------------------------
-## generate application.xml with hostnames from hostfile:
-## the name of host_i (1<=i<=p, where -np p) is the line i in hostfile (where -hostfile hostfile), or "host$i" if
-## hostfile has less than i lines.
-##----------------------------------------------------------
-
-HAVE_SEQ="$(which seq 2>/dev/null)"
-
-if [ -n "${HAVE_SEQ}" ]; then
- SEQ=$(${HAVE_SEQ} 0 $(( NUMPROCS - 1)))
-else
- cnt=0
- while [ $cnt -lt "${NUMPROCS}" ] ; do
- SEQ="$SEQ $cnt"
- cnt=$((cnt + 1));
- done
-fi
-
-set -- $hostnames
-
-##---- generate <actor> tags------------------------------
-#prepare arguments at once
-for ARG in $PROC_ARGS; do
- XML_ARGS="${XML_ARGS}""<argument value=\"${ARG}\"/>
-"
-done
-
-for i in ${SEQ}
-do
- j=$(( i % hostfile_procs + 1 ))
- host=$(eval "echo \${$j}")
-
- ##---- optional display of ranks to actor mapping
- if [ ${MAPOPT} = 1 ]; then
- echo "[rank $i] -> $host"
- fi
- {
- echo " <actor host=\"${host}\" function=\"$i\"> <!-- function name used only for logging -->
- <prop id=\"instance_id\" value=\"smpirun\"/>
- <prop id=\"rank\" value=\"$i\"/>"
- if [ ${REPLAY} = 1 ]; then
- echo " <prop id=\"smpi_replay\" value=\"true\"/>"
- if [ "${NUMTRACES}" -gt 1 ]; then
- # Give each process its trace file (as an argument)
- echo " <argument value=\"$(echo "$hosttraces"|cut -d' ' -f$j)\"/>"
- elif [ ${i} = 0 ]; then
- # Give the shared trace file to rank 0 (as a property)
- echo " <prop id=\"tracefile\" value=\"$(echo "$hosttraces"|cut -d' ' -f1)\"/>"
- fi
- else
- echo "${XML_ARGS}"
- fi
- echo " </actor>"
- } >> "${APPLICATIONTMP}"
-done
-
-cat >> "${APPLICATIONTMP}" <<APPLICATIONFOOT
-</platform>
-APPLICATIONFOOT
-##-------------------------------- end DEFAULT APPLICATION --------------------------------------
##---------------------- SMPI TRACING OPTIONS ---------------------------------
if [ -n "${TRACE_ACTIVE}" ]; then
#define trace filename
# Do not remove, this variable may be used by user code (e.g. StarPU)
export SMPI_GLOBAL_SIZE=${NUMPROCS}
if [ -n "${KEEP}" ] && [ -z "${QUIET}" ] ; then
- echo "${EXEC}" ${PRIVATIZE} "${TRACEOPTIONS}" "${SIMOPTS}" "${PLATFORMTMP}" "${APPLICATIONTMP}"
- if [ ${HOSTFILETMP} = 1 ] ; then
- echo "Generated hostfile ${HOSTFILE} kept."
- fi
+ echo "${EXEC}" ${PRIVATIZE} "${TRACEOPTIONS}" "${SIMOPTS}" "${PLATFORMTMP}"
if [ ${UNROLLEDHOSTFILETMP} = 1 ] ; then
echo "Generated unrolled hostfile ${UNROLLEDHOSTFILE} kept."
fi
# * The FD 3 is used to temporarily store FD 1. This is because the shell connects FD 1 to /dev/null when the command
# is launched in the background: this can be overridden in bash but not in standard bourne shell.
exec 3<&0
-${WRAPPER} "@SMPIMAIN@" "${EXEC}" ${PRIVATIZE} ${TRACEOPTIONS} ${SIMOPTS} "${PLATFORMTMP}" "${APPLICATIONTMP}" <&3 3>&- &
+${WRAPPER} "@SMPIMAIN@" "${EXEC}" ${PRIVATIZE} ${DEPLOYOPTS} ${TRACEOPTIONS} ${SIMOPTS} "${PLATFORMTMP}" ${PROC_ARGS} <&3 3>&- &
pid=$!
exec 3>&-
wait $pid
#
if [ ${status} -ne 0 ] ; then
if [ -z "${KEEP}" ] && [ -z "${QUIET}" ]; then
- echo "${EXEC}" ${PRIVATIZE} "${TRACEOPTIONS}" "${SIMOPTS}" "${PLATFORMTMP}" "${APPLICATIONTMP}"
- if [ ${HOSTFILETMP} = 1 ] ; then
- echo "Generated hostfile ${HOSTFILE} kept."
- fi
+ echo "${EXEC}" ${PRIVATIZE} "${TRACEOPTIONS}" "${SIMOPTS}" "${PLATFORMTMP}"
if [ ${UNROLLEDHOSTFILETMP} = 1 ] ; then
echo "Generated unrolled hostfile ${UNROLLEDHOSTFILE} kept."
fi