From: Martin Quinson <624847+mquinson@users.noreply.github.com> Date: Wed, 21 Nov 2018 10:43:13 +0000 (+0100) Subject: Merge pull request #314 from simgrid/smpi-args-cleanup X-Git-Tag: v3_22~795^2~1 X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/commitdiff_plain/cbd8dae6d524fc62b0fb5fcf7b2604e98f953615?hp=64cedab280b98b26b3b336f0a8dcabbaca553c6c Merge pull request #314 from simgrid/smpi-args-cleanup SMPI args cleanup --- diff --git a/.gitignore b/.gitignore index fd10bb3070..a3295e5f05 100644 --- a/.gitignore +++ b/.gitignore @@ -210,7 +210,6 @@ examples/smpi/ampi/smpi_ampi examples/smpi/energy/f77/sef examples/smpi/energy/f90/sef90 examples/smpi/energy/smpi_energy -examples/smpi/load_balancer_replay/load_balancer_replay examples/smpi/mc/smpi_bugged1 examples/smpi/mc/smpi_bugged1_liveness examples/smpi/mc/smpi_bugged2 diff --git a/examples/s4u/replay-comm/s4u-replay-comm.cpp b/examples/s4u/replay-comm/s4u-replay-comm.cpp index 2152cb4624..9eb3379c11 100644 --- a/examples/s4u/replay-comm/s4u-replay-comm.cpp +++ b/examples/s4u/replay-comm/s4u-replay-comm.cpp @@ -30,16 +30,9 @@ class Replayer { public: explicit Replayer(std::vector args) { - int argc; - char* argv[2]; - argv[0] = &args.at(0)[0]; - if (args.size() == 1) { - argc = 1; - } else { - argc = 2; - argv[1] = &args.at(1)[0]; - } - simgrid::xbt::replay_runner(argc, argv); + const char* actor_name = args[0].c_str(); + const char* trace_filename = args[1].c_str(); + simgrid::xbt::replay_runner(actor_name, trace_filename); } void operator()() @@ -97,7 +90,6 @@ int main(int argc, char* argv[]) argv[0], argv[0], argv[0]); e.load_platform(argv[1]); - e.register_default(&simgrid::xbt::replay_runner); e.register_actor("p0"); e.register_actor("p1"); e.load_deployment(argv[2]); diff --git a/examples/s4u/replay-storage/s4u-replay-storage.cpp b/examples/s4u/replay-storage/s4u-replay-storage.cpp index f1584eb7b2..677f5be491 100644 --- a/examples/s4u/replay-storage/s4u-replay-storage.cpp +++ b/examples/s4u/replay-storage/s4u-replay-storage.cpp @@ -40,16 +40,8 @@ class Replayer { public: explicit Replayer(std::vector args) { - int argc; - char* argv[2]; - argv[0] = &args.at(0)[0]; - if (args.size() == 1) { - argc = 1; - } else { - argc = 2; - argv[1] = &args.at(1)[0]; - } - simgrid::xbt::replay_runner(argc, argv); + const char* actor_name = args[0].c_str(); + simgrid::xbt::replay_runner(actor_name, nullptr); } void operator()() @@ -112,7 +104,6 @@ int main(int argc, char* argv[]) argv[0], argv[0], argv[0]); e.load_platform(argv[1]); - e.register_default(&simgrid::xbt::replay_runner); e.register_actor("p0"); e.load_deployment(argv[2]); diff --git a/examples/smpi/load_balancer_replay/CMakeLists.txt b/examples/smpi/load_balancer_replay/CMakeLists.txt deleted file mode 100644 index 73aae8733e..0000000000 --- a/examples/smpi/load_balancer_replay/CMakeLists.txt +++ /dev/null @@ -1,13 +0,0 @@ -if(enable_smpi) - set(CMAKE_C_COMPILER "${CMAKE_BINARY_DIR}/smpi_script/bin/smpicc") - set(CMAKE_CXX_COMPILER "${CMAKE_BINARY_DIR}/smpi_script/bin/smpicxx") - include_directories(BEFORE "${CMAKE_HOME_DIRECTORY}/include/smpi") - - add_executable (load_balancer_replay load_balancer_replay.cpp) - target_link_libraries(load_balancer_replay simgrid) - # ADD_TESH(sampi-load-balancer-replay --setenv srcdir=${CMAKE_CURRENT_SOURCE_DIR} --setenv bindir=${CMAKE_CURRENT_BINARY_DIR} --cd ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/load_balancer_replay.tesh) -endif() - -# Uncomment this to install tesh files -#set(tesh_files ${tesh_files} ${CMAKE_CURRENT_SOURCE_DIR}/load_balancer_replay.tesh PARENT_SCOPE) -set(examples_src ${examples_src} ${CMAKE_CURRENT_SOURCE_DIR}/load_balancer_replay.cpp PARENT_SCOPE) diff --git a/examples/smpi/load_balancer_replay/load_balancer_replay.cpp b/examples/smpi/load_balancer_replay/load_balancer_replay.cpp deleted file mode 100644 index 7c0bfb4a62..0000000000 --- a/examples/smpi/load_balancer_replay/load_balancer_replay.cpp +++ /dev/null @@ -1,23 +0,0 @@ -/* Copyright (c) 2009-2018. The SimGrid Team. All rights reserved. */ - -/* This program is free software; you can redistribute it and/or modify it - * under the terms of the license (GNU LGPL) which comes with this package. */ - -#include "smpi/smpi.h" -#include "smpi/sampi.h" -#include -#include -#include - -XBT_LOG_NEW_DEFAULT_CATEGORY(sampi_load_balancer_test, "Messages specific for this sampi example"); - - -int main(int argc, char* argv[]) -{ - sg_host_load_plugin_init(); - smpi_replay_init(&argc, &argv); - sg_load_balancer_plugin_init(); // Must be called after smpi_replay_init as this will overwrite some replay actions - - smpi_replay_main(&argc, &argv); - return 0; -} diff --git a/examples/smpi/replay/replay.cpp b/examples/smpi/replay/replay.cpp index 48923f933d..807beb2077 100644 --- a/examples/smpi/replay/replay.cpp +++ b/examples/smpi/replay/replay.cpp @@ -4,7 +4,13 @@ * under the terms of the license (GNU LGPL) which comes with this package. */ #include "xbt/replay.hpp" +#include "simgrid/s4u/Actor.hpp" #include "smpi/smpi.h" +#include "xbt/asserts.h" +#include "xbt/str.h" + +#include "xbt/log.h" +XBT_LOG_NEW_DEFAULT_CATEGORY(msg_test, "Messages specific for this msg example"); /* This shows how to extend the trace format by adding a new kind of events. This function is registered through xbt_replay_action_register() below. */ @@ -22,20 +28,31 @@ static void overriding_send(simgrid::xbt::ReplayAction& args) int main(int argc, char* argv[]) { + const char* instance_id = simgrid::s4u::Actor::self()->get_property("instance_id"); + const int rank = xbt_str_parse_int(simgrid::s4u::Actor::self()->get_property("rank"), "Cannot parse rank"); + const char* trace_filename = argv[1]; + double start_delay_flops = 0; + + if (argc > 2) { + start_delay_flops = xbt_str_parse_double(argv[2], "Cannot parse start_delay_flops"); + } + /* Setup things and register default actions */ - smpi_replay_init(&argc, &argv); + smpi_replay_init(instance_id, rank, start_delay_flops); /* Connect your callback function to the "blah" event in the trace files */ xbt_replay_action_register("blah", action_blah); /* The send action is an override, so we have to first save its previous value in a global */ - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); + int new_rank; + MPI_Comm_rank(MPI_COMM_WORLD, &new_rank); + if (new_rank != rank) + XBT_WARN("Rank inconsistency. Got %d, expected %d", new_rank, rank); if (rank == 0) { previous_send = xbt_replay_action_get("send"); xbt_replay_action_register("send", overriding_send); } /* The regular run of the replayer */ - smpi_replay_main(&argc, &argv); + smpi_replay_main(rank, trace_filename); return 0; } diff --git a/examples/smpi/replay_multiple/replay_multiple.c b/examples/smpi/replay_multiple/replay_multiple.c index 2dc72e2b2a..f7a8ef8e3a 100644 --- a/examples/smpi/replay_multiple/replay_multiple.c +++ b/examples/smpi/replay_multiple/replay_multiple.c @@ -13,7 +13,16 @@ XBT_LOG_NEW_DEFAULT_CATEGORY(msg_test, "Messages specific for this msg example"); static int smpi_replay(int argc, char *argv[]) { - smpi_replay_run(&argc, &argv); + const char* instance_id = argv[1]; + int rank = xbt_str_parse_int(argv[2], "Cannot parse rank '%s'"); + const char* trace_filename = argv[3]; + double start_delay_flops = 0; + + if (argc > 4) { + start_delay_flops = xbt_str_parse_double(argv[4], "Cannot parse start_delay_flops"); + } + + smpi_replay_run(instance_id, rank, start_delay_flops, trace_filename); return 0; } diff --git a/examples/smpi/replay_multiple_manual_deploy/replay_multiple_manual.cpp b/examples/smpi/replay_multiple_manual_deploy/replay_multiple_manual.cpp index 2fc978d750..4ccbb8aa7a 100644 --- a/examples/smpi/replay_multiple_manual_deploy/replay_multiple_manual.cpp +++ b/examples/smpi/replay_multiple_manual_deploy/replay_multiple_manual.cpp @@ -56,26 +56,12 @@ static bool job_comparator(const Job* j1, const Job* j2) static void smpi_replay_process(Job* job, simgrid::s4u::BarrierPtr barrier, int rank) { - // Prepare data for smpi_replay_run - int argc = 5; - char** argv = xbt_new(char*, argc); - argv[0] = xbt_strdup("1"); // log only? - argv[1] = xbt_strdup(job->smpi_app_name.c_str()); // application instance - argv[2] = bprintf("%d", rank); // rank - argv[3] = xbt_strdup(job->traces_filenames[rank].c_str()); // smpi trace file for this rank - argv[4] = xbt_strdup("0"); // ? - XBT_INFO("Replaying rank %d of job %d (smpi_app '%s')", rank, job->unique_job_number, job->smpi_app_name.c_str()); - smpi_replay_run(&argc, &argv); + smpi_replay_run(job->smpi_app_name.c_str(), rank, 0, job->traces_filenames[rank].c_str()); XBT_INFO("Finished replaying rank %d of job %d (smpi_app '%s')", rank, job->unique_job_number, job->smpi_app_name.c_str()); barrier->wait(); - - // Memory clean-up — leaks can come from argc/argv modifications from smpi_replay_run - for (int i = 0; i < argc; ++i) - xbt_free(argv[i]); - xbt_free(argv); } // Sleeps for a given amount of time diff --git a/examples/smpi/replay_multiple_manual_deploy/replay_multiple_manual_mixed2_st_sr.tesh b/examples/smpi/replay_multiple_manual_deploy/replay_multiple_manual_mixed2_st_sr.tesh index dca9faf68e..8eaaa2e6aa 100644 --- a/examples/smpi/replay_multiple_manual_deploy/replay_multiple_manual_mixed2_st_sr.tesh +++ b/examples/smpi/replay_multiple_manual_deploy/replay_multiple_manual_mixed2_st_sr.tesh @@ -7,17 +7,17 @@ $ ./replay_multiple_manual ${srcdir:=.}/../../platforms/small_platform_with_rout > [ 0.000000] (maestro@) Job read: app='job1', file='mixed.txt', size=2, start=0, alloc='0,1' > [ 0.000000] (workload@Bourassa) Launching the job executor of job 0 (app 'job0') > [ 0.000000] (job_0000@Bourassa) Executing job 0 (smpi_app 'job0') -> [ 0.000000] (workload@Bourassa) Launching the job executor of job 1 (app 'job1') -> [ 0.000000] (job_0001@Bourassa) Executing job 1 (smpi_app 'job1') > [ 0.000000] (rank_0_0@Bourassa) Replaying rank 0 of job 0 (smpi_app 'job0') > [ 0.000000] (rank_0_1@Fafard) Replaying rank 1 of job 0 (smpi_app 'job0') +> [ 0.000000] (workload@Bourassa) Launching the job executor of job 1 (app 'job1') +> [ 0.000000] (job_0001@Bourassa) Executing job 1 (smpi_app 'job1') > [ 0.000000] (rank_1_0@Bourassa) Replaying rank 0 of job 1 (smpi_app 'job1') > [ 0.000000] (rank_1_1@Fafard) Replaying rank 1 of job 1 (smpi_app 'job1') -> [1473.975664] (rank_1_0@Bourassa) Simulation time 1473.975664 -> [1473.975664] (rank_0_0@Bourassa) Finished replaying rank 0 of job 0 (smpi_app 'job0') +> [1473.975664] (rank_0_0@Bourassa) Simulation time 1473.975664 > [1473.975664] (rank_1_0@Bourassa) Finished replaying rank 0 of job 1 (smpi_app 'job1') -> [1473.975664] (rank_0_1@Fafard) Finished replaying rank 1 of job 0 (smpi_app 'job0') +> [1473.975664] (rank_0_0@Bourassa) Finished replaying rank 0 of job 0 (smpi_app 'job0') > [1473.975664] (rank_1_1@Fafard) Finished replaying rank 1 of job 1 (smpi_app 'job1') +> [1473.975664] (rank_0_1@Fafard) Finished replaying rank 1 of job 0 (smpi_app 'job0') > [1474.975664] (job_0000@Bourassa) Finished job 0 (smpi_app 'job0') > [1474.975664] (job_0001@Bourassa) Finished job 1 (smpi_app 'job1') > [1474.975664] (maestro@) Simulation finished! Final time: 1474.98 diff --git a/examples/smpi/replay_multiple_manual_deploy/replay_multiple_manual_mixed2_st_sr_noise.tesh b/examples/smpi/replay_multiple_manual_deploy/replay_multiple_manual_mixed2_st_sr_noise.tesh index 9f2d990df1..d72130b63b 100644 --- a/examples/smpi/replay_multiple_manual_deploy/replay_multiple_manual_mixed2_st_sr_noise.tesh +++ b/examples/smpi/replay_multiple_manual_deploy/replay_multiple_manual_mixed2_st_sr_noise.tesh @@ -13,11 +13,11 @@ $ ./replay_multiple_manual ${srcdir:=.}/../../platforms/small_platform_with_rout > [ 0.000000] (job_0001@Bourassa) Executing job 1 (smpi_app 'job1') > [ 0.000000] (rank_1_0@Bourassa) Replaying rank 0 of job 1 (smpi_app 'job1') > [ 0.000000] (rank_1_1@Fafard) Replaying rank 1 of job 1 (smpi_app 'job1') -> [1473.975664] (rank_1_0@Bourassa) Simulation time 1473.975664 -> [1473.975664] (rank_0_0@Bourassa) Finished replaying rank 0 of job 0 (smpi_app 'job0') +> [1473.975664] (rank_0_0@Bourassa) Simulation time 1473.975664 > [1473.975664] (rank_1_0@Bourassa) Finished replaying rank 0 of job 1 (smpi_app 'job1') -> [1473.975664] (rank_0_1@Fafard) Finished replaying rank 1 of job 0 (smpi_app 'job0') +> [1473.975664] (rank_0_0@Bourassa) Finished replaying rank 0 of job 0 (smpi_app 'job0') > [1473.975664] (rank_1_1@Fafard) Finished replaying rank 1 of job 1 (smpi_app 'job1') +> [1473.975664] (rank_0_1@Fafard) Finished replaying rank 1 of job 0 (smpi_app 'job0') > [1474.975664] (job_0000@Bourassa) Finished job 0 (smpi_app 'job0') > [1474.975664] (job_0001@Bourassa) Finished job 1 (smpi_app 'job1') > [1474.975664] (maestro@) Simulation finished! Final time: 1474.98 diff --git a/examples/smpi/smpi_msg_masterslave/deployment_masterslave_mailbox_smpi.xml b/examples/smpi/smpi_msg_masterslave/deployment_masterslave_mailbox_smpi.xml index 88ae1240c4..e9a3320fa1 100644 --- a/examples/smpi/smpi_msg_masterslave/deployment_masterslave_mailbox_smpi.xml +++ b/examples/smpi/smpi_msg_masterslave/deployment_masterslave_mailbox_smpi.xml @@ -13,27 +13,27 @@ - - + + - - + + - - + + - - + + - - + + - - + + diff --git a/include/smpi/smpi.h b/include/smpi/smpi.h index e6572c94e0..8128abfa54 100644 --- a/include/smpi/smpi.h +++ b/include/smpi/smpi.h @@ -1002,9 +1002,10 @@ XBT_PUBLIC int smpi_main(const char* program, int argc, char* argv[]); XBT_PUBLIC void smpi_process_init(int* argc, char*** argv); /* Trace replay specific stuff */ -XBT_PUBLIC void smpi_replay_init(int* argc, char*** argv); // Only initialization -XBT_PUBLIC void smpi_replay_main(int* argc, char*** argv); // Launch the replay once init is done -XBT_PUBLIC void smpi_replay_run(int* argc, char*** argv); // Both init and start +XBT_PUBLIC void smpi_replay_init(const char* instance_id, int rank, double start_delay_flops); // Only initialization +XBT_PUBLIC void smpi_replay_main(int rank, const char* trace_filename); // Launch the replay once init is done +XBT_PUBLIC void smpi_replay_run(const char* instance_id, int rank, double start_delay_flops, + const char* trace_filename); // Both init and start XBT_PUBLIC void SMPI_app_instance_register(const char* name, xbt_main_func_t code, int num_processes); XBT_PUBLIC void SMPI_init(); diff --git a/include/xbt/replay.hpp b/include/xbt/replay.hpp index 06581483e2..5ddaaccf12 100644 --- a/include/xbt/replay.hpp +++ b/include/xbt/replay.hpp @@ -21,7 +21,7 @@ namespace xbt { typedef std::vector ReplayAction; XBT_PUBLIC_DATA std::ifstream* action_fs; -XBT_PUBLIC int replay_runner(int argc, char* argv[]); +XBT_PUBLIC int replay_runner(const char* actor_name, const char* trace_filename); } } diff --git a/src/smpi/bindings/smpi_pmpi.cpp b/src/smpi/bindings/smpi_pmpi.cpp index 08daa6639b..76da5cd4c3 100644 --- a/src/smpi/bindings/smpi_pmpi.cpp +++ b/src/smpi/bindings/smpi_pmpi.cpp @@ -31,17 +31,18 @@ int PMPI_Init(int *argc, char ***argv) { xbt_assert(simgrid::s4u::Engine::is_initialized(), "Your MPI program was not properly initialized. The easiest is to use smpirun to start it."); + // Init is called only once per SMPI process if (not smpi_process()->initializing()){ - simgrid::smpi::ActorExt::init(argc, argv); + simgrid::smpi::ActorExt::init(); } if (not smpi_process()->initialized()){ - int rank = simgrid::s4u::this_actor::get_pid(); - TRACE_smpi_init(rank); - TRACE_smpi_comm_in(rank, __func__, new simgrid::instr::NoOpTIData("init")); - TRACE_smpi_comm_out(rank); - TRACE_smpi_computing_init(rank); - TRACE_smpi_sleeping_init(rank); + int rank_traced = simgrid::s4u::this_actor::get_pid(); + TRACE_smpi_init(rank_traced); + TRACE_smpi_comm_in(rank_traced, __func__, new simgrid::instr::NoOpTIData("init")); + TRACE_smpi_comm_out(rank_traced); + TRACE_smpi_computing_init(rank_traced); + TRACE_smpi_sleeping_init(rank_traced); smpi_bench_begin(); smpi_process()->mark_as_initialized(); } @@ -54,13 +55,13 @@ int PMPI_Init(int *argc, char ***argv) int PMPI_Finalize() { smpi_bench_end(); - int rank = simgrid::s4u::this_actor::get_pid(); - TRACE_smpi_comm_in(rank, __func__, new simgrid::instr::NoOpTIData("finalize")); + int rank_traced = simgrid::s4u::this_actor::get_pid(); + TRACE_smpi_comm_in(rank_traced, __func__, new simgrid::instr::NoOpTIData("finalize")); smpi_process()->finalize(); - TRACE_smpi_comm_out(rank); - TRACE_smpi_finalize(rank); + TRACE_smpi_comm_out(rank_traced); + TRACE_smpi_finalize(rank_traced); return MPI_SUCCESS; } diff --git a/src/smpi/include/smpi_actor.hpp b/src/smpi/include/smpi_actor.hpp index 0f1a13ea4d..c493f3abb7 100644 --- a/src/smpi/include/smpi_actor.hpp +++ b/src/smpi/include/smpi_actor.hpp @@ -41,7 +41,7 @@ private: public: explicit ActorExt(simgrid::s4u::ActorPtr actor, simgrid::s4u::Barrier* barrier); ~ActorExt(); - void set_data(int* argc, char*** argv); + void set_data(const char* instance_id); void finalize(); int finalized(); int initializing(); @@ -68,7 +68,7 @@ public: void set_comm_intra(MPI_Comm comm); void set_sampling(int s); int sampling(); - static void init(int* argc, char*** argv); + static void init(); simgrid::s4u::ActorPtr get_actor(); int get_optind(); void set_optind(int optind); diff --git a/src/smpi/internals/smpi_actor.cpp b/src/smpi/internals/smpi_actor.cpp index c358275cd4..1e43e29712 100644 --- a/src/smpi/internals/smpi_actor.cpp +++ b/src/smpi/internals/smpi_actor.cpp @@ -61,20 +61,14 @@ ActorExt::~ActorExt() xbt_mutex_destroy(mailboxes_mutex_); } -void ActorExt::set_data(int* argc, char*** argv) +void ActorExt::set_data(const char* instance_id) { - instance_id_ = std::string((*argv)[1]); + instance_id_ = std::string(instance_id); comm_world_ = smpi_deployment_comm_world(instance_id_); simgrid::s4u::Barrier* barrier = smpi_deployment_finalization_barrier(instance_id_); if (barrier != nullptr) // don't overwrite the current one if the instance has none finalization_barrier_ = barrier; - if (*argc > 3) { - memmove(&(*argv)[0], &(*argv)[2], sizeof(char*) * (*argc - 2)); - (*argv)[(*argc) - 1] = nullptr; - (*argv)[(*argc) - 2] = nullptr; - } - (*argc) -= 2; // set the process attached to the mailbox mailbox_small_->set_receiver(actor_); XBT_DEBUG("<%ld> SMPI process has been initialized: %p", actor_->get_pid(), actor_.get()); @@ -233,42 +227,36 @@ int ActorExt::sampling() return sampling_; } -void ActorExt::init(int* argc, char*** argv) +void ActorExt::init() { - if (smpi_process_count() == 0) { xbt_die("SimGrid was not initialized properly before entering MPI_Init. Aborting, please check compilation process " "and use smpirun\n"); } - if (argc != nullptr && argv != nullptr) { - simgrid::s4u::ActorPtr proc = simgrid::s4u::Actor::self(); - proc->get_impl()->context_->set_cleanup(&SIMIX_process_cleanup); - // cheinrich: I'm not sure what the impact of the SMPI_switch_data_segment on this call is. I moved - // this up here so that I can set the privatized region before the switch. - ActorExt* process = smpi_process_remote(proc); - //if we are in MPI_Init and argc handling has already been done. - if (process->initialized()) - return; - - process->state_ = SmpiProcessState::INITIALIZING; - - char* instance_id = (*argv)[1]; - try { - int rank = std::stoi(std::string((*argv)[2])); - smpi_deployment_register_process(instance_id, rank, proc); - } catch (std::invalid_argument& ia) { - throw std::invalid_argument(std::string("Invalid rank: ") + (*argv)[2]); - } - if (smpi_privatize_global_variables == SmpiPrivStrategies::MMAP) { - /* Now using the segment index of this process */ - process->set_privatized_region(smpi_init_global_memory_segment_process()); - /* Done at the process's creation */ - SMPI_switch_data_segment(proc); - } + simgrid::s4u::ActorPtr proc = simgrid::s4u::Actor::self(); + proc->get_impl()->context_->set_cleanup(&SIMIX_process_cleanup); + // cheinrich: I'm not sure what the impact of the SMPI_switch_data_segment on this call is. I moved + // this up here so that I can set the privatized region before the switch. + ActorExt* process = smpi_process_remote(proc); + // if we are in MPI_Init and argc handling has already been done. + if (process->initialized()) + return; + + if (smpi_privatize_global_variables == SmpiPrivStrategies::MMAP) { + /* Now using the segment index of this process */ + process->set_privatized_region(smpi_init_global_memory_segment_process()); + /* Done at the process's creation */ + SMPI_switch_data_segment(proc); + } + + const char* instance_id = simgrid::s4u::Actor::self()->get_property("instance_id"); + const int rank = xbt_str_parse_int(simgrid::s4u::Actor::self()->get_property("rank"), "Cannot parse rank"); + + process->state_ = SmpiProcessState::INITIALIZING; + smpi_deployment_register_process(instance_id, rank, proc); - process->set_data(argc, argv); - } + process->set_data(instance_id); } int ActorExt::get_optind() diff --git a/src/smpi/internals/smpi_global.cpp b/src/smpi/internals/smpi_global.cpp index 5a9137953c..dcc7d0fed2 100644 --- a/src/smpi/internals/smpi_global.cpp +++ b/src/smpi/internals/smpi_global.cpp @@ -129,7 +129,7 @@ MPI_Comm smpi_process_comm_self(){ } void smpi_process_init(int *argc, char ***argv){ - simgrid::smpi::ActorExt::init(argc, argv); + simgrid::smpi::ActorExt::init(); } void * smpi_process_get_user_data(){ @@ -423,12 +423,17 @@ typedef std::function smpi_entry_point_type; typedef int (* smpi_c_entry_point_type)(int argc, char **argv); typedef void (*smpi_fortran_entry_point_type)(); -static int smpi_run_entry_point(smpi_entry_point_type entry_point, std::vector args) +static int smpi_run_entry_point(smpi_entry_point_type entry_point, const std::string& executable_path, + std::vector args) { // copy C strings, we need them writable std::vector* args4argv = new std::vector(args.size()); std::transform(begin(args), end(args), begin(*args4argv), [](const std::string& s) { return xbt_strdup(s.c_str()); }); + // set argv[0] to executable_path + xbt_free((*args4argv)[0]); + (*args4argv)[0] = xbt_strdup(executable_path.c_str()); + #if !SMPI_IFORT // take a copy of args4argv to keep reference of the allocated strings const std::vector args2str(*args4argv); @@ -437,7 +442,7 @@ static int smpi_run_entry_point(smpi_entry_point_type entry_point, std::vectorpush_back(nullptr); char** argv = args4argv->data(); - simgrid::smpi::ActorExt::init(&argc, &argv); + simgrid::smpi::ActorExt::init(); #if SMPI_IFORT for_rtl_init_ (&argc, argv); #elif SMPI_FLANG @@ -539,7 +544,7 @@ static int visit_libs(struct dl_phdr_info* info, size_t, void* data) } #endif -static void smpi_init_privatization_dlopen(std::string executable) +static void smpi_init_privatization_dlopen(const std::string& executable) { // Prepare the copy of the binary (get its size) struct stat fdin_stat; @@ -575,7 +580,6 @@ static void smpi_init_privatization_dlopen(std::string executable) simix_global->default_function = [executable, fdin_size](std::vector args) { return std::function([executable, fdin_size, args] { - // Copy the dynamic library: std::string target_executable = executable + "_" + std::to_string(getpid()) + "_" + std::to_string(rank) + ".so"; @@ -628,12 +632,12 @@ static void smpi_init_privatization_dlopen(std::string executable) smpi_entry_point_type entry_point = smpi_resolve_function(handle); if (not entry_point) xbt_die("Could not resolve entry point"); - smpi_run_entry_point(entry_point, args); + smpi_run_entry_point(entry_point, executable, args); }); }; } -static void smpi_init_privatization_no_dlopen(std::string executable) +static void smpi_init_privatization_no_dlopen(const std::string& executable) { if (smpi_privatize_global_variables == SmpiPrivStrategies::MMAP) smpi_prepare_global_memory_segment(); @@ -648,8 +652,9 @@ static void smpi_init_privatization_no_dlopen(std::string executable) smpi_backup_global_memory_segment(); // Execute the same entry point for each simulated process: - simix_global->default_function = [entry_point](std::vector args) { - return std::function([entry_point, args] { smpi_run_entry_point(entry_point, args); }); + simix_global->default_function = [entry_point, executable](std::vector args) { + return std::function( + [entry_point, executable, args] { smpi_run_entry_point(entry_point, executable, args); }); }; } diff --git a/src/smpi/internals/smpi_replay.cpp b/src/smpi/internals/smpi_replay.cpp index b4830ffcf0..627d061bf1 100644 --- a/src/smpi/internals/smpi_replay.cpp +++ b/src/smpi/internals/smpi_replay.cpp @@ -701,10 +701,12 @@ void AllToAllVAction::kernel(simgrid::xbt::ReplayAction& action) static std::unordered_map storage; /** @brief Only initialize the replay, don't do it for real */ -void smpi_replay_init(int* argc, char*** argv) +void smpi_replay_init(const char* instance_id, int rank, double start_delay_flops) { if (not smpi_process()->initializing()){ - simgrid::smpi::ActorExt::init(argc, argv); + simgrid::s4u::Actor::self()->set_property("instance_id", instance_id); + simgrid::s4u::Actor::self()->set_property("rank", std::to_string(rank)); + simgrid::smpi::ActorExt::init(); } smpi_process()->mark_as_initialized(); smpi_process()->set_replaying(true); @@ -743,10 +745,9 @@ void smpi_replay_init(int* argc, char*** argv) xbt_replay_action_register("compute", [](simgrid::xbt::ReplayAction& action) { simgrid::smpi::replay::ComputeAction().execute(action); }); //if we have a delayed start, sleep here. - if(*argc>2){ - double value = xbt_str_parse_double((*argv)[2], "%s is not a double"); - XBT_VERB("Delayed start for instance - Sleeping for %f flops ",value ); - smpi_execute_flops(value); + if (start_delay_flops > 0) { + XBT_VERB("Delayed start for instance - Sleeping for %f flops ", start_delay_flops); + smpi_execute_flops(start_delay_flops); } else { // Wait for the other actors to initialize also simgrid::s4u::this_actor::yield(); @@ -754,12 +755,13 @@ void smpi_replay_init(int* argc, char*** argv) } /** @brief actually run the replay after initialization */ -void smpi_replay_main(int* argc, char*** argv) +void smpi_replay_main(int rank, const char* trace_filename) { static int active_processes = 0; active_processes++; storage[simgrid::s4u::this_actor::get_pid()] = simgrid::smpi::replay::RequestStorage(); - simgrid::xbt::replay_runner(*argc, *argv); + std::string rank_string = std::to_string(rank); + simgrid::xbt::replay_runner(rank_string.c_str(), trace_filename); /* and now, finalize everything */ /* One active process will stop. Decrease the counter*/ @@ -794,8 +796,8 @@ void smpi_replay_main(int* argc, char*** argv) } /** @brief chain a replay initialization and a replay start */ -void smpi_replay_run(int* argc, char*** argv) +void smpi_replay_run(const char* instance_id, int rank, double start_delay_flops, const char* trace_filename) { - smpi_replay_init(argc, argv); - smpi_replay_main(argc, argv); + smpi_replay_init(instance_id, rank, start_delay_flops); + smpi_replay_main(rank, trace_filename); } diff --git a/src/smpi/smpirun.in b/src/smpi/smpirun.in index 858bc05cdd..5b2b05350a 100755 --- a/src/smpi/smpirun.in +++ b/src/smpi/smpirun.in @@ -424,8 +424,8 @@ do fi echo " - - " >> ${APPLICATIONTMP} + + " >> ${APPLICATIONTMP} if [ ${REPLAY} = 1 ]; then if [ ${NUMTRACES} -gt 1 ]; then echo " " >> ${APPLICATIONTMP} diff --git a/src/xbt/xbt_replay.cpp b/src/xbt/xbt_replay.cpp index 7ee69817d6..e782ab2c7a 100644 --- a/src/xbt/xbt_replay.cpp +++ b/src/xbt/xbt_replay.cpp @@ -55,7 +55,7 @@ bool ReplayReader::get(ReplayAction* action) return not fs->eof(); } -static ReplayAction* get_action(char* name) +static ReplayAction* get_action(const char* name) { ReplayAction* action; @@ -117,32 +117,31 @@ static void handle_action(ReplayAction& action) * @ingroup XBT_replay * @brief function used internally to actually run the replay */ -int replay_runner(int argc, char* argv[]) +int replay_runner(const char* actor_name, const char* trace_filename) { - if (simgrid::xbt::action_fs) { // A unique trace file + std::string actor_name_string(actor_name); + if (simgrid::xbt::action_fs) { // * myqueue = action_queues.at(std::string(argv[0])); + if (action_queues.find(actor_name_string) != action_queues.end()) { + std::queue* myqueue = action_queues.at(actor_name_string); delete myqueue; - action_queues.erase(std::string(argv[0])); + action_queues.erase(actor_name_string); } } else { // Should have got my trace file in argument + xbt_assert(trace_filename != nullptr); simgrid::xbt::ReplayAction evt; - xbt_assert(argc >= 2, "No '%s' agent function provided, no simulation-wide trace file provided, " - "and no process-wide trace file provided in deployment file. Aborting.", - argv[0]); - simgrid::xbt::ReplayReader reader(argv[1]); + simgrid::xbt::ReplayReader reader(trace_filename); while (reader.get(&evt)) { - if (evt.front().compare(argv[0]) == 0) { + if (evt.front().compare(actor_name) == 0) { simgrid::xbt::handle_action(evt); } else { - XBT_WARN("Ignore trace element not for me"); + XBT_WARN("Ignore trace element not for me (target='%s', I am '%s')", evt.front().c_str(), actor_name); } evt.clear(); } diff --git a/tools/cmake/DefinePackages.cmake b/tools/cmake/DefinePackages.cmake index 5353dc066d..8b92768ff5 100644 --- a/tools/cmake/DefinePackages.cmake +++ b/tools/cmake/DefinePackages.cmake @@ -1013,7 +1013,6 @@ set(CMAKEFILES_TXT examples/smpi/smpi_msg_masterslave/CMakeLists.txt examples/smpi/replay_multiple/CMakeLists.txt examples/smpi/replay_multiple_manual_deploy/CMakeLists.txt - examples/smpi/load_balancer_replay/CMakeLists.txt examples/smpi/energy/f77/CMakeLists.txt examples/smpi/energy/f90/CMakeLists.txt