X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/c02acff354ef915c0c88943e73959ca5c081b524..321102577020b194dfb7ba89e48687952816849e:/src/smpi/internals/smpi_deployment.cpp diff --git a/src/smpi/internals/smpi_deployment.cpp b/src/smpi/internals/smpi_deployment.cpp index d69dbed712..18abaa6af5 100644 --- a/src/smpi/internals/smpi_deployment.cpp +++ b/src/smpi/internals/smpi_deployment.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2004-2021. The SimGrid Team. +/* Copyright (c) 2004-2022. The SimGrid Team. * All rights reserved. */ /* This program is free software; you can redistribute it and/or modify it @@ -7,10 +7,11 @@ #include "smpi_host.hpp" #include "private.hpp" #include "simgrid/s4u/Engine.hpp" +#include "simgrid/s4u/Barrier.hpp" #include "smpi_comm.hpp" #include -XBT_LOG_EXTERNAL_CATEGORY(smpi); +XBT_LOG_EXTERNAL_DEFAULT_CATEGORY(smpi); namespace simgrid { namespace smpi { @@ -20,17 +21,14 @@ static int universe_size = 0; class Instance { public: - Instance(int max_no_processes, MPI_Comm comm) : size_(max_no_processes), comm_world_(comm) + explicit Instance(int max_no_processes) : size_(max_no_processes) { auto* group = new simgrid::smpi::Group(size_); comm_world_ = new simgrid::smpi::Comm(group, nullptr, false, -1); - // FIXME : using MPI_Attr_put with MPI_UNIVERSE_SIZE is forbidden and we make it a no-op (which triggers a warning - // as MPI_ERR_ARG is returned). Directly calling Comm::attr_put breaks for now, as MPI_UNIVERSE_SIZE,is <0 - // instance.comm_world->attr_put(MPI_UNIVERSE_SIZE, reinterpret_cast(instance.size)); - universe_size += max_no_processes; + bar_ = s4u::Barrier::create(size_); } - + s4u::BarrierPtr bar_; unsigned int size_; unsigned int finalized_ranks_ = 0; MPI_Comm comm_world_; @@ -58,9 +56,7 @@ void SMPI_app_instance_register(const char *name, xbt_main_func_t code, int num_ if (code != nullptr) // When started with smpirun, we will not execute a function simgrid::s4u::Engine::get_instance()->register_function(name, code); - Instance instance(num_processes, MPI_COMM_NULL); - - smpi_instances.insert(std::pair(name, instance)); + smpi_instances.try_emplace(name, num_processes); } void smpi_deployment_register_process(const std::string& instance_id, int rank, const simgrid::s4u::Actor* actor) @@ -69,6 +65,12 @@ void smpi_deployment_register_process(const std::string& instance_id, int rank, instance.comm_world_->group()->set_mapping(actor->get_pid(), rank); } +void smpi_deployment_startup_barrier(const std::string& instance_id) +{ + const Instance& instance = smpi_instances.at(instance_id); + instance.bar_->wait(); +} + void smpi_deployment_unregister_process(const std::string& instance_id) { Instance& instance = smpi_instances.at(instance_id); @@ -91,9 +93,8 @@ MPI_Comm* smpi_deployment_comm_world(const std::string& instance_id) } void smpi_deployment_cleanup_instances(){ - for (auto const& item : smpi_instances) { - XBT_CINFO(smpi, "Stalling SMPI instance: %s. Do all your MPI ranks call MPI_Finalize()?", item.first.c_str()); - Instance instance = item.second; + for (auto const& [name, instance] : smpi_instances) { + XBT_INFO("Stalling SMPI instance: %s. Do all your MPI ranks call MPI_Finalize()?", name.c_str()); simgrid::smpi::Comm::destroy(instance.comm_world_); } smpi_instances.clear(); @@ -105,7 +106,7 @@ int smpi_get_universe_size() } /** @brief Auxiliary method to get list of hosts to deploy app */ -static std::vector smpi_get_hosts(simgrid::s4u::Engine* e, const std::string& hostfile) +static std::vector smpi_get_hosts(const simgrid::s4u::Engine* e, const std::string& hostfile) { if (hostfile == "") { return e->get_all_hosts(); @@ -115,10 +116,10 @@ static std::vector smpi_get_hosts(simgrid::s4u::Engine* e, xbt_assert(in, "smpirun: Cannot open the host file: %s", hostfile.c_str()); std::string str; while (std::getline(in, str)) { - if (str.size() > 0) + if (not str.empty()) hosts.emplace_back(e->host_by_name(str)); } - xbt_assert(hosts.size(), "smpirun: the hostfile '%s' is empty", hostfile.c_str()); + xbt_assert(not hosts.empty(), "smpirun: the hostfile '%s' is empty", hostfile.c_str()); return hosts; } @@ -133,7 +134,7 @@ static std::vector smpi_read_replay(const std::string& replayfile) xbt_assert(in, "smpirun: Cannot open the replay file: %s", replayfile.c_str()); std::string str; while (std::getline(in, str)) { - if (str.size() > 0) + if (not str.empty()) replay.emplace_back(str); } @@ -141,20 +142,16 @@ static std::vector smpi_read_replay(const std::string& replayfile) } /** @brief Build argument vector to pass to process */ -static std::vector smpi_deployment_get_args(int rank_id, const std::vector& replay, int argc, - char* argv[]) +static std::vector smpi_deployment_get_args(int rank_id, const std::vector& replay, + const std::vector& run_args) { std::vector args{std::to_string(rank_id)}; // pass arguments to process only if not a replay execution - if (replay.size() == 0) { - for (int i = 0; i < argc; i++) { - args.push_back(argv[i]); - } - } + if (replay.empty()) + args.insert(args.end(), begin(run_args), end(run_args)); /* one trace per process */ - if (replay.size() > 1) { - args.push_back(replay[rank_id]); - } + if (replay.size() > 1) + args.emplace_back(replay[rank_id]); return args; } @@ -164,8 +161,8 @@ static std::vector smpi_deployment_get_args(int rank_id, const std: * This used to be done at smpirun script, parsing either the hostfile or the platform XML. * If hostfile isn't provided, get the list of hosts from engine. */ -int smpi_deployment_smpirun(simgrid::s4u::Engine* e, const std::string& hostfile, int np, const std::string& replayfile, - int map, int argc, char* argv[]) +int smpi_deployment_smpirun(const simgrid::s4u::Engine* e, const std::string& hostfile, int np, + const std::string& replayfile, int map, const std::vector& run_args) { auto hosts = smpi_get_hosts(e, hostfile); auto replay = smpi_read_replay(replayfile); @@ -176,25 +173,25 @@ int smpi_deployment_smpirun(simgrid::s4u::Engine* e, const std::string& hostfile xbt_assert(np > 0, "Invalid number of process (np must be > 0). Check your np parameter, platform or hostfile"); if (np > hosts_size) { - printf("You requested to use %d ranks, but there is only %d processes in your hostfile...\n", np, hosts_size); + XBT_INFO("You requested to use %d ranks, but there is only %d processes in your hostfile...", np, hosts_size); } for (int i = 0; i < np; i++) { simgrid::s4u::Host* host = hosts[i % hosts_size]; std::string rank_id = std::to_string(i); - auto args = smpi_deployment_get_args(i, replay, argc, argv); + auto args = smpi_deployment_get_args(i, replay, run_args); auto actor = simgrid::s4u::Actor::create(rank_id, host, rank_id, args); /* keeping the same behavior as done in smpirun script, print mapping rank/process */ if (map != 0) { - printf("[rank %d] -> %s\n", i, host->get_cname()); + XBT_INFO("[rank %d] -> %s", i, host->get_cname()); } actor->set_property("instance_id", "smpirun"); actor->set_property("rank", rank_id); - if (replay.size() > 0) + if (not replay.empty()) actor->set_property("smpi_replay", "true"); /* shared trace file, set it to rank 0 */ if (i == 0 && replay.size() == 1) actor->set_property("tracefile", replay[0]); } return np; -} \ No newline at end of file +}