From 4d9809aa64447811ca65a1242d4f9e07972c6472 Mon Sep 17 00:00:00 2001 From: Millian Poquet Date: Fri, 16 Nov 2018 16:24:07 +0100 Subject: [PATCH 1/1] [smpi args cleanup] internal changes for clean-up - use actor properties instead of args for instance_id and rank - remove argc/argv from several functions - either by making the desired arguments explicit (replay/offline) - or by using the actor properties (online) --- include/smpi/smpi.h | 7 ++-- src/smpi/bindings/smpi_pmpi.cpp | 23 +++++------ src/smpi/include/smpi_actor.hpp | 4 +- src/smpi/internals/smpi_actor.cpp | 62 ++++++++++++------------------ src/smpi/internals/smpi_global.cpp | 23 ++++++----- src/smpi/internals/smpi_replay.cpp | 24 ++++++------ src/smpi/smpirun.in | 4 +- 7 files changed, 72 insertions(+), 75 deletions(-) diff --git a/include/smpi/smpi.h b/include/smpi/smpi.h index e6572c94e0..8128abfa54 100644 --- a/include/smpi/smpi.h +++ b/include/smpi/smpi.h @@ -1002,9 +1002,10 @@ XBT_PUBLIC int smpi_main(const char* program, int argc, char* argv[]); XBT_PUBLIC void smpi_process_init(int* argc, char*** argv); /* Trace replay specific stuff */ -XBT_PUBLIC void smpi_replay_init(int* argc, char*** argv); // Only initialization -XBT_PUBLIC void smpi_replay_main(int* argc, char*** argv); // Launch the replay once init is done -XBT_PUBLIC void smpi_replay_run(int* argc, char*** argv); // Both init and start +XBT_PUBLIC void smpi_replay_init(const char* instance_id, int rank, double start_delay_flops); // Only initialization +XBT_PUBLIC void smpi_replay_main(int rank, const char* trace_filename); // Launch the replay once init is done +XBT_PUBLIC void smpi_replay_run(const char* instance_id, int rank, double start_delay_flops, + const char* trace_filename); // Both init and start XBT_PUBLIC void SMPI_app_instance_register(const char* name, xbt_main_func_t code, int num_processes); XBT_PUBLIC void SMPI_init(); diff --git a/src/smpi/bindings/smpi_pmpi.cpp b/src/smpi/bindings/smpi_pmpi.cpp index 08daa6639b..76da5cd4c3 100644 --- a/src/smpi/bindings/smpi_pmpi.cpp +++ b/src/smpi/bindings/smpi_pmpi.cpp @@ -31,17 +31,18 @@ int PMPI_Init(int *argc, char ***argv) { xbt_assert(simgrid::s4u::Engine::is_initialized(), "Your MPI program was not properly initialized. The easiest is to use smpirun to start it."); + // Init is called only once per SMPI process if (not smpi_process()->initializing()){ - simgrid::smpi::ActorExt::init(argc, argv); + simgrid::smpi::ActorExt::init(); } if (not smpi_process()->initialized()){ - int rank = simgrid::s4u::this_actor::get_pid(); - TRACE_smpi_init(rank); - TRACE_smpi_comm_in(rank, __func__, new simgrid::instr::NoOpTIData("init")); - TRACE_smpi_comm_out(rank); - TRACE_smpi_computing_init(rank); - TRACE_smpi_sleeping_init(rank); + int rank_traced = simgrid::s4u::this_actor::get_pid(); + TRACE_smpi_init(rank_traced); + TRACE_smpi_comm_in(rank_traced, __func__, new simgrid::instr::NoOpTIData("init")); + TRACE_smpi_comm_out(rank_traced); + TRACE_smpi_computing_init(rank_traced); + TRACE_smpi_sleeping_init(rank_traced); smpi_bench_begin(); smpi_process()->mark_as_initialized(); } @@ -54,13 +55,13 @@ int PMPI_Init(int *argc, char ***argv) int PMPI_Finalize() { smpi_bench_end(); - int rank = simgrid::s4u::this_actor::get_pid(); - TRACE_smpi_comm_in(rank, __func__, new simgrid::instr::NoOpTIData("finalize")); + int rank_traced = simgrid::s4u::this_actor::get_pid(); + TRACE_smpi_comm_in(rank_traced, __func__, new simgrid::instr::NoOpTIData("finalize")); smpi_process()->finalize(); - TRACE_smpi_comm_out(rank); - TRACE_smpi_finalize(rank); + TRACE_smpi_comm_out(rank_traced); + TRACE_smpi_finalize(rank_traced); return MPI_SUCCESS; } diff --git a/src/smpi/include/smpi_actor.hpp b/src/smpi/include/smpi_actor.hpp index 0f1a13ea4d..c493f3abb7 100644 --- a/src/smpi/include/smpi_actor.hpp +++ b/src/smpi/include/smpi_actor.hpp @@ -41,7 +41,7 @@ private: public: explicit ActorExt(simgrid::s4u::ActorPtr actor, simgrid::s4u::Barrier* barrier); ~ActorExt(); - void set_data(int* argc, char*** argv); + void set_data(const char* instance_id); void finalize(); int finalized(); int initializing(); @@ -68,7 +68,7 @@ public: void set_comm_intra(MPI_Comm comm); void set_sampling(int s); int sampling(); - static void init(int* argc, char*** argv); + static void init(); simgrid::s4u::ActorPtr get_actor(); int get_optind(); void set_optind(int optind); diff --git a/src/smpi/internals/smpi_actor.cpp b/src/smpi/internals/smpi_actor.cpp index c358275cd4..1e43e29712 100644 --- a/src/smpi/internals/smpi_actor.cpp +++ b/src/smpi/internals/smpi_actor.cpp @@ -61,20 +61,14 @@ ActorExt::~ActorExt() xbt_mutex_destroy(mailboxes_mutex_); } -void ActorExt::set_data(int* argc, char*** argv) +void ActorExt::set_data(const char* instance_id) { - instance_id_ = std::string((*argv)[1]); + instance_id_ = std::string(instance_id); comm_world_ = smpi_deployment_comm_world(instance_id_); simgrid::s4u::Barrier* barrier = smpi_deployment_finalization_barrier(instance_id_); if (barrier != nullptr) // don't overwrite the current one if the instance has none finalization_barrier_ = barrier; - if (*argc > 3) { - memmove(&(*argv)[0], &(*argv)[2], sizeof(char*) * (*argc - 2)); - (*argv)[(*argc) - 1] = nullptr; - (*argv)[(*argc) - 2] = nullptr; - } - (*argc) -= 2; // set the process attached to the mailbox mailbox_small_->set_receiver(actor_); XBT_DEBUG("<%ld> SMPI process has been initialized: %p", actor_->get_pid(), actor_.get()); @@ -233,42 +227,36 @@ int ActorExt::sampling() return sampling_; } -void ActorExt::init(int* argc, char*** argv) +void ActorExt::init() { - if (smpi_process_count() == 0) { xbt_die("SimGrid was not initialized properly before entering MPI_Init. Aborting, please check compilation process " "and use smpirun\n"); } - if (argc != nullptr && argv != nullptr) { - simgrid::s4u::ActorPtr proc = simgrid::s4u::Actor::self(); - proc->get_impl()->context_->set_cleanup(&SIMIX_process_cleanup); - // cheinrich: I'm not sure what the impact of the SMPI_switch_data_segment on this call is. I moved - // this up here so that I can set the privatized region before the switch. - ActorExt* process = smpi_process_remote(proc); - //if we are in MPI_Init and argc handling has already been done. - if (process->initialized()) - return; - - process->state_ = SmpiProcessState::INITIALIZING; - - char* instance_id = (*argv)[1]; - try { - int rank = std::stoi(std::string((*argv)[2])); - smpi_deployment_register_process(instance_id, rank, proc); - } catch (std::invalid_argument& ia) { - throw std::invalid_argument(std::string("Invalid rank: ") + (*argv)[2]); - } - if (smpi_privatize_global_variables == SmpiPrivStrategies::MMAP) { - /* Now using the segment index of this process */ - process->set_privatized_region(smpi_init_global_memory_segment_process()); - /* Done at the process's creation */ - SMPI_switch_data_segment(proc); - } + simgrid::s4u::ActorPtr proc = simgrid::s4u::Actor::self(); + proc->get_impl()->context_->set_cleanup(&SIMIX_process_cleanup); + // cheinrich: I'm not sure what the impact of the SMPI_switch_data_segment on this call is. I moved + // this up here so that I can set the privatized region before the switch. + ActorExt* process = smpi_process_remote(proc); + // if we are in MPI_Init and argc handling has already been done. + if (process->initialized()) + return; + + if (smpi_privatize_global_variables == SmpiPrivStrategies::MMAP) { + /* Now using the segment index of this process */ + process->set_privatized_region(smpi_init_global_memory_segment_process()); + /* Done at the process's creation */ + SMPI_switch_data_segment(proc); + } + + const char* instance_id = simgrid::s4u::Actor::self()->get_property("instance_id"); + const int rank = xbt_str_parse_int(simgrid::s4u::Actor::self()->get_property("rank"), "Cannot parse rank"); + + process->state_ = SmpiProcessState::INITIALIZING; + smpi_deployment_register_process(instance_id, rank, proc); - process->set_data(argc, argv); - } + process->set_data(instance_id); } int ActorExt::get_optind() diff --git a/src/smpi/internals/smpi_global.cpp b/src/smpi/internals/smpi_global.cpp index 5a9137953c..dcc7d0fed2 100644 --- a/src/smpi/internals/smpi_global.cpp +++ b/src/smpi/internals/smpi_global.cpp @@ -129,7 +129,7 @@ MPI_Comm smpi_process_comm_self(){ } void smpi_process_init(int *argc, char ***argv){ - simgrid::smpi::ActorExt::init(argc, argv); + simgrid::smpi::ActorExt::init(); } void * smpi_process_get_user_data(){ @@ -423,12 +423,17 @@ typedef std::function smpi_entry_point_type; typedef int (* smpi_c_entry_point_type)(int argc, char **argv); typedef void (*smpi_fortran_entry_point_type)(); -static int smpi_run_entry_point(smpi_entry_point_type entry_point, std::vector args) +static int smpi_run_entry_point(smpi_entry_point_type entry_point, const std::string& executable_path, + std::vector args) { // copy C strings, we need them writable std::vector* args4argv = new std::vector(args.size()); std::transform(begin(args), end(args), begin(*args4argv), [](const std::string& s) { return xbt_strdup(s.c_str()); }); + // set argv[0] to executable_path + xbt_free((*args4argv)[0]); + (*args4argv)[0] = xbt_strdup(executable_path.c_str()); + #if !SMPI_IFORT // take a copy of args4argv to keep reference of the allocated strings const std::vector args2str(*args4argv); @@ -437,7 +442,7 @@ static int smpi_run_entry_point(smpi_entry_point_type entry_point, std::vectorpush_back(nullptr); char** argv = args4argv->data(); - simgrid::smpi::ActorExt::init(&argc, &argv); + simgrid::smpi::ActorExt::init(); #if SMPI_IFORT for_rtl_init_ (&argc, argv); #elif SMPI_FLANG @@ -539,7 +544,7 @@ static int visit_libs(struct dl_phdr_info* info, size_t, void* data) } #endif -static void smpi_init_privatization_dlopen(std::string executable) +static void smpi_init_privatization_dlopen(const std::string& executable) { // Prepare the copy of the binary (get its size) struct stat fdin_stat; @@ -575,7 +580,6 @@ static void smpi_init_privatization_dlopen(std::string executable) simix_global->default_function = [executable, fdin_size](std::vector args) { return std::function([executable, fdin_size, args] { - // Copy the dynamic library: std::string target_executable = executable + "_" + std::to_string(getpid()) + "_" + std::to_string(rank) + ".so"; @@ -628,12 +632,12 @@ static void smpi_init_privatization_dlopen(std::string executable) smpi_entry_point_type entry_point = smpi_resolve_function(handle); if (not entry_point) xbt_die("Could not resolve entry point"); - smpi_run_entry_point(entry_point, args); + smpi_run_entry_point(entry_point, executable, args); }); }; } -static void smpi_init_privatization_no_dlopen(std::string executable) +static void smpi_init_privatization_no_dlopen(const std::string& executable) { if (smpi_privatize_global_variables == SmpiPrivStrategies::MMAP) smpi_prepare_global_memory_segment(); @@ -648,8 +652,9 @@ static void smpi_init_privatization_no_dlopen(std::string executable) smpi_backup_global_memory_segment(); // Execute the same entry point for each simulated process: - simix_global->default_function = [entry_point](std::vector args) { - return std::function([entry_point, args] { smpi_run_entry_point(entry_point, args); }); + simix_global->default_function = [entry_point, executable](std::vector args) { + return std::function( + [entry_point, executable, args] { smpi_run_entry_point(entry_point, executable, args); }); }; } diff --git a/src/smpi/internals/smpi_replay.cpp b/src/smpi/internals/smpi_replay.cpp index b4830ffcf0..627d061bf1 100644 --- a/src/smpi/internals/smpi_replay.cpp +++ b/src/smpi/internals/smpi_replay.cpp @@ -701,10 +701,12 @@ void AllToAllVAction::kernel(simgrid::xbt::ReplayAction& action) static std::unordered_map storage; /** @brief Only initialize the replay, don't do it for real */ -void smpi_replay_init(int* argc, char*** argv) +void smpi_replay_init(const char* instance_id, int rank, double start_delay_flops) { if (not smpi_process()->initializing()){ - simgrid::smpi::ActorExt::init(argc, argv); + simgrid::s4u::Actor::self()->set_property("instance_id", instance_id); + simgrid::s4u::Actor::self()->set_property("rank", std::to_string(rank)); + simgrid::smpi::ActorExt::init(); } smpi_process()->mark_as_initialized(); smpi_process()->set_replaying(true); @@ -743,10 +745,9 @@ void smpi_replay_init(int* argc, char*** argv) xbt_replay_action_register("compute", [](simgrid::xbt::ReplayAction& action) { simgrid::smpi::replay::ComputeAction().execute(action); }); //if we have a delayed start, sleep here. - if(*argc>2){ - double value = xbt_str_parse_double((*argv)[2], "%s is not a double"); - XBT_VERB("Delayed start for instance - Sleeping for %f flops ",value ); - smpi_execute_flops(value); + if (start_delay_flops > 0) { + XBT_VERB("Delayed start for instance - Sleeping for %f flops ", start_delay_flops); + smpi_execute_flops(start_delay_flops); } else { // Wait for the other actors to initialize also simgrid::s4u::this_actor::yield(); @@ -754,12 +755,13 @@ void smpi_replay_init(int* argc, char*** argv) } /** @brief actually run the replay after initialization */ -void smpi_replay_main(int* argc, char*** argv) +void smpi_replay_main(int rank, const char* trace_filename) { static int active_processes = 0; active_processes++; storage[simgrid::s4u::this_actor::get_pid()] = simgrid::smpi::replay::RequestStorage(); - simgrid::xbt::replay_runner(*argc, *argv); + std::string rank_string = std::to_string(rank); + simgrid::xbt::replay_runner(rank_string.c_str(), trace_filename); /* and now, finalize everything */ /* One active process will stop. Decrease the counter*/ @@ -794,8 +796,8 @@ void smpi_replay_main(int* argc, char*** argv) } /** @brief chain a replay initialization and a replay start */ -void smpi_replay_run(int* argc, char*** argv) +void smpi_replay_run(const char* instance_id, int rank, double start_delay_flops, const char* trace_filename) { - smpi_replay_init(argc, argv); - smpi_replay_main(argc, argv); + smpi_replay_init(instance_id, rank, start_delay_flops); + smpi_replay_main(rank, trace_filename); } diff --git a/src/smpi/smpirun.in b/src/smpi/smpirun.in index 858bc05cdd..5b2b05350a 100755 --- a/src/smpi/smpirun.in +++ b/src/smpi/smpirun.in @@ -424,8 +424,8 @@ do fi echo " - - " >> ${APPLICATIONTMP} + + " >> ${APPLICATIONTMP} if [ ${REPLAY} = 1 ]; then if [ ${NUMTRACES} -gt 1 ]; then echo " " >> ${APPLICATIONTMP} -- 2.20.1