From: SUTER Frederic Date: Mon, 17 May 2021 08:23:13 +0000 (+0200) Subject: SIMIX_run becomes EngineImpl::run X-Git-Tag: v3.28~277 X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/commitdiff_plain/b94ef9f53ddfc2ac1f56fee3e5b1962da78cafe9 SIMIX_run becomes EngineImpl::run --- diff --git a/include/simgrid/simix.h b/include/simgrid/simix.h index e69bb74621..89daa7025e 100644 --- a/include/simgrid/simix.h +++ b/include/simgrid/simix.h @@ -45,7 +45,7 @@ XBT_PUBLIC void SIMIX_global_init(int* argc, char** argv); XBT_PUBLIC void SIMIX_set_maestro(void (*code)(void*), void* data); /* Simulation execution */ -XBT_PUBLIC void SIMIX_run(); +XBT_ATTRIB_DEPRECATED_v332("Please use EngineImpl:run()") XBT_PUBLIC void SIMIX_run(); XBT_PUBLIC double SIMIX_get_clock(); XBT_ATTRIB_DEPRECATED_v329("Please use simgrid::kernel::timer::Timer::set()") XBT_PUBLIC smx_timer_t diff --git a/src/kernel/EngineImpl.cpp b/src/kernel/EngineImpl.cpp index 0635354b61..b119614040 100644 --- a/src/kernel/EngineImpl.cpp +++ b/src/kernel/EngineImpl.cpp @@ -4,18 +4,27 @@ * under the terms of the license (GNU LGPL) which comes with this package. */ #include "src/kernel/EngineImpl.hpp" +#include "mc/mc.h" #include "simgrid/Exception.hpp" #include "simgrid/kernel/routing/NetPoint.hpp" #include "simgrid/kernel/routing/NetZoneImpl.hpp" #include "simgrid/s4u/Host.hpp" +#include "simgrid/sg_config.hpp" +#include "src/include/surf/surf.hpp" //get_clock() and surf_solve() #include "src/kernel/resource/DiskImpl.hpp" +#include "src/mc/mc_record.hpp" +#include "src/mc/mc_replay.hpp" #include "src/simix/smx_private.hpp" #include "src/surf/network_interface.hpp" #include "src/surf/xml/platf.hpp" // FIXME: KILLME. There must be a better way than mimicking XML here +XBT_LOG_NEW_DEFAULT_CATEGORY(ker_engine, "Logging specific to Engine (kernel)"); + namespace simgrid { namespace kernel { +config::Flag cfg_breakpoint{"debug/breakpoint", + "When non-negative, raise a SIGTRAP after given (simulated) time", -1.0}; EngineImpl::~EngineImpl() { /* Since hosts_ is a std::map, the hosts are destroyed in the lexicographic order, which ensures that the output is @@ -67,5 +76,163 @@ void EngineImpl::add_model(std::shared_ptr model, const std::ve models_prio_[model_name] = std::move(model); } +void EngineImpl::run() +{ + if (MC_record_replay_is_active()) { + mc::replay(MC_record_path()); + return; + } + + double time = 0; + + do { + XBT_DEBUG("New Schedule Round; size(queue)=%zu", simix_global->actors_to_run.size()); + + if (cfg_breakpoint >= 0.0 && surf_get_clock() >= cfg_breakpoint) { + XBT_DEBUG("Breakpoint reached (%g)", cfg_breakpoint.get()); + cfg_breakpoint = -1.0; +#ifdef SIGTRAP + std::raise(SIGTRAP); +#else + std::raise(SIGABRT); +#endif + } + + simix_global->execute_tasks(); + + while (not simix_global->actors_to_run.empty()) { + XBT_DEBUG("New Sub-Schedule Round; size(queue)=%zu", simix_global->actors_to_run.size()); + + /* Run all processes that are ready to run, possibly in parallel */ + simix_global->run_all_actors(); + + /* answer sequentially and in a fixed arbitrary order all the simcalls that were issued during that sub-round */ + + /* WARNING, the order *must* be fixed or you'll jeopardize the simulation reproducibility (see RR-7653) */ + + /* Here, the order is ok because: + * + * Short proof: only maestro adds stuff to the actors_to_run array, so the execution order of user contexts do + * not impact its order. + * + * Long proof: actors remain sorted through an arbitrary (implicit, complex but fixed) order in all cases. + * + * - if there is no kill during the simulation, actors remain sorted according by their PID. + * Rationale: This can be proved inductively. + * Assume that actors_to_run is sorted at a beginning of one round (it is at round 0: the deployment file + * is parsed linearly). + * Let's show that it is still so at the end of this round. + * - if an actor is added when being created, that's from maestro. It can be either at startup + * time (and then in PID order), or in response to a process_create simcall. Since simcalls are handled + * in arbitrary order (inductive hypothesis), we are fine. + * - If an actor is added because it's getting killed, its subsequent actions shouldn't matter + * - If an actor gets added to actors_to_run because one of their blocking action constituting the meat + * of a simcall terminates, we're still good. Proof: + * - You are added from ActorImpl::simcall_answer() only. When this function is called depends on the + * resource kind (network, cpu, disk, whatever), but the same arguments hold. Let's take communications + * as an example. + * - For communications, this function is called from SIMIX_comm_finish(). + * This function itself don't mess with the order since simcalls are handled in FIFO order. + * The function is called: + * - before the comm starts (invalid parameters, or resource already dead or whatever). + * The order then trivial holds since maestro didn't interrupt its handling of the simcall yet + * - because the communication failed or were canceled after startup. In this case, it's called from + * the function we are in, by the chunk: + * set = model->states.failed_action_set; + * while ((synchro = extract(set))) + * SIMIX_simcall_post((smx_synchro_t) synchro->data); + * This order is also fixed because it depends of the order in which the surf actions were + * added to the system, and only maestro can add stuff this way, through simcalls. + * We thus use the inductive hypothesis once again to conclude that the order in which synchros are + * popped out of the set does not depend on the user code's execution order. + * - because the communication terminated. In this case, synchros are served in the order given by + * set = model->states.done_action_set; + * while ((synchro = extract(set))) + * SIMIX_simcall_post((smx_synchro_t) synchro->data); + * and the argument is very similar to the previous one. + * So, in any case, the orders of calls to CommImpl::finish() do not depend on the order in which user + * actors are executed. + * So, in any cases, the orders of actors within actors_to_run do not depend on the order in which + * user actors were executed previously. + * So, if there is no killing in the simulation, the simulation reproducibility is not jeopardized. + * - If there is some actor killings, the order is changed by this decision that comes from user-land + * But this decision may not have been motivated by a situation that were different because the simulation is + * not reproducible. + * So, even the order change induced by the actor killing is perfectly reproducible. + * + * So science works, bitches [http://xkcd.com/54/]. + * + * We could sort the actors_that_ran array completely so that we can describe the order in which simcalls are + * handled (like "according to the PID of issuer"), but it's not mandatory (order is fixed already even if + * unfriendly). + * That would thus be a pure waste of time. + */ + + for (auto const& actor : simix_global->actors_that_ran) { + if (actor->simcall_.call_ != simix::Simcall::NONE) { + actor->simcall_handle(0); + } + } + + simix_global->execute_tasks(); + do { + simix_global->wake_all_waiting_actors(); + } while (simix_global->execute_tasks()); + + /* If only daemon processes remain, cancel their actions, mark them to die and reschedule them */ + if (simix_global->process_list.size() == simix_global->daemons.size()) + for (auto const& dmon : simix_global->daemons) { + XBT_DEBUG("Kill %s", dmon->get_cname()); + simix_global->maestro_->kill(dmon); + } + } + + time = timer::Timer::next(); + if (time > -1.0 || not simix_global->process_list.empty()) { + XBT_DEBUG("Calling surf_solve"); + time = surf_solve(time); + XBT_DEBUG("Moving time ahead : %g", time); + } + + /* Notify all the hosts that have failed */ + /* FIXME: iterate through the list of failed host and mark each of them */ + /* as failed. On each host, signal all the running processes with host_fail */ + + // Execute timers and tasks until there isn't anything to be done: + bool again = false; + do { + again = timer::Timer::execute_all(); + if (simix_global->execute_tasks()) + again = true; + simix_global->wake_all_waiting_actors(); + } while (again); + + /* Clean actors to destroy */ + simix_global->empty_trash(); + + XBT_DEBUG("### time %f, #processes %zu, #to_run %zu", time, simix_global->process_list.size(), + simix_global->actors_to_run.size()); + + if (time < 0. && simix_global->actors_to_run.empty() && not simix_global->process_list.empty()) { + if (simix_global->process_list.size() <= simix_global->daemons.size()) { + XBT_CRITICAL("Oops! Daemon actors cannot do any blocking activity (communications, synchronization, etc) " + "once the simulation is over. Please fix your on_exit() functions."); + } else { + XBT_CRITICAL("Oops! Deadlock or code not perfectly clean."); + } + simix_global->display_all_actor_status(); + simgrid::s4u::Engine::on_deadlock(); + for (auto const& kv : simix_global->process_list) { + XBT_DEBUG("Kill %s", kv.second->get_cname()); + simix_global->maestro_->kill(kv.second); + } + } + } while (time > -1.0 || not simix_global->actors_to_run.empty()); + + if (not simix_global->process_list.empty()) + THROW_IMPOSSIBLE; + + simgrid::s4u::Engine::on_simulation_end(); +} } // namespace kernel } // namespace simgrid diff --git a/src/kernel/EngineImpl.hpp b/src/kernel/EngineImpl.hpp index 69963c84db..151c9ae99a 100644 --- a/src/kernel/EngineImpl.hpp +++ b/src/kernel/EngineImpl.hpp @@ -62,6 +62,9 @@ public: else return res->second; } + + /** @brief Run the main simulation loop. */ + void run(); }; } // namespace kernel diff --git a/src/s4u/s4u_Engine.cpp b/src/s4u/s4u_Engine.cpp index 6f68977e20..8a083e16a9 100644 --- a/src/s4u/s4u_Engine.cpp +++ b/src/s4u/s4u_Engine.cpp @@ -318,7 +318,7 @@ void Engine::run() const if (MC_is_active()) { MC_run(); } else { - SIMIX_run(); + pimpl->run(); } } diff --git a/src/simix/smx_global.cpp b/src/simix/smx_global.cpp index 6afc6430b8..618ee85353 100644 --- a/src/simix/smx_global.cpp +++ b/src/simix/smx_global.cpp @@ -256,8 +256,6 @@ void Global::display_all_actor_status() const } } -config::Flag cfg_breakpoint{"debug/breakpoint", - "When non-negative, raise a SIGTRAP after given (simulated) time", -1.0}; } // namespace simix } // namespace simgrid @@ -388,167 +386,9 @@ double SIMIX_get_clock() } } -/** - * @ingroup SIMIX_API - * @brief Run the main simulation loop. - */ -void SIMIX_run() +void SIMIX_run() // XBT_ATTRIB_DEPRECATED_v332 { - if (MC_record_replay_is_active()) { - simgrid::mc::replay(MC_record_path()); - return; - } - - double time = 0; - - do { - XBT_DEBUG("New Schedule Round; size(queue)=%zu", simix_global->actors_to_run.size()); - - if (simgrid::simix::cfg_breakpoint >= 0.0 && surf_get_clock() >= simgrid::simix::cfg_breakpoint) { - XBT_DEBUG("Breakpoint reached (%g)", simgrid::simix::cfg_breakpoint.get()); - simgrid::simix::cfg_breakpoint = -1.0; -#ifdef SIGTRAP - std::raise(SIGTRAP); -#else - std::raise(SIGABRT); -#endif - } - - simix_global->execute_tasks(); - - while (not simix_global->actors_to_run.empty()) { - XBT_DEBUG("New Sub-Schedule Round; size(queue)=%zu", simix_global->actors_to_run.size()); - - /* Run all processes that are ready to run, possibly in parallel */ - simix_global->run_all_actors(); - - /* answer sequentially and in a fixed arbitrary order all the simcalls that were issued during that sub-round */ - - /* WARNING, the order *must* be fixed or you'll jeopardize the simulation reproducibility (see RR-7653) */ - - /* Here, the order is ok because: - * - * Short proof: only maestro adds stuff to the actors_to_run array, so the execution order of user contexts do - * not impact its order. - * - * Long proof: actors remain sorted through an arbitrary (implicit, complex but fixed) order in all cases. - * - * - if there is no kill during the simulation, actors remain sorted according by their PID. - * Rationale: This can be proved inductively. - * Assume that actors_to_run is sorted at a beginning of one round (it is at round 0: the deployment file - * is parsed linearly). - * Let's show that it is still so at the end of this round. - * - if an actor is added when being created, that's from maestro. It can be either at startup - * time (and then in PID order), or in response to a process_create simcall. Since simcalls are handled - * in arbitrary order (inductive hypothesis), we are fine. - * - If an actor is added because it's getting killed, its subsequent actions shouldn't matter - * - If an actor gets added to actors_to_run because one of their blocking action constituting the meat - * of a simcall terminates, we're still good. Proof: - * - You are added from ActorImpl::simcall_answer() only. When this function is called depends on the - * resource kind (network, cpu, disk, whatever), but the same arguments hold. Let's take communications - * as an example. - * - For communications, this function is called from SIMIX_comm_finish(). - * This function itself don't mess with the order since simcalls are handled in FIFO order. - * The function is called: - * - before the comm starts (invalid parameters, or resource already dead or whatever). - * The order then trivial holds since maestro didn't interrupt its handling of the simcall yet - * - because the communication failed or were canceled after startup. In this case, it's called from - * the function we are in, by the chunk: - * set = model->states.failed_action_set; - * while ((synchro = extract(set))) - * SIMIX_simcall_post((smx_synchro_t) synchro->data); - * This order is also fixed because it depends of the order in which the surf actions were - * added to the system, and only maestro can add stuff this way, through simcalls. - * We thus use the inductive hypothesis once again to conclude that the order in which synchros are - * popped out of the set does not depend on the user code's execution order. - * - because the communication terminated. In this case, synchros are served in the order given by - * set = model->states.done_action_set; - * while ((synchro = extract(set))) - * SIMIX_simcall_post((smx_synchro_t) synchro->data); - * and the argument is very similar to the previous one. - * So, in any case, the orders of calls to CommImpl::finish() do not depend on the order in which user - * actors are executed. - * So, in any cases, the orders of actors within actors_to_run do not depend on the order in which - * user actors were executed previously. - * So, if there is no killing in the simulation, the simulation reproducibility is not jeopardized. - * - If there is some actor killings, the order is changed by this decision that comes from user-land - * But this decision may not have been motivated by a situation that were different because the simulation is - * not reproducible. - * So, even the order change induced by the actor killing is perfectly reproducible. - * - * So science works, bitches [http://xkcd.com/54/]. - * - * We could sort the actors_that_ran array completely so that we can describe the order in which simcalls are - * handled (like "according to the PID of issuer"), but it's not mandatory (order is fixed already even if - * unfriendly). - * That would thus be a pure waste of time. - */ - - for (auto const& actor : simix_global->actors_that_ran) { - if (actor->simcall_.call_ != simgrid::simix::Simcall::NONE) { - actor->simcall_handle(0); - } - } - - simix_global->execute_tasks(); - do { - simix_global->wake_all_waiting_actors(); - } while (simix_global->execute_tasks()); - - /* If only daemon processes remain, cancel their actions, mark them to die and reschedule them */ - if (simix_global->process_list.size() == simix_global->daemons.size()) - for (auto const& dmon : simix_global->daemons) { - XBT_DEBUG("Kill %s", dmon->get_cname()); - simix_global->maestro_->kill(dmon); - } - } - - time = simgrid::kernel::timer::Timer::next(); - if (time > -1.0 || not simix_global->process_list.empty()) { - XBT_DEBUG("Calling surf_solve"); - time = surf_solve(time); - XBT_DEBUG("Moving time ahead : %g", time); - } - - /* Notify all the hosts that have failed */ - /* FIXME: iterate through the list of failed host and mark each of them */ - /* as failed. On each host, signal all the running processes with host_fail */ - - // Execute timers and tasks until there isn't anything to be done: - bool again = false; - do { - again = simgrid::kernel::timer::Timer::execute_all(); - if (simix_global->execute_tasks()) - again = true; - simix_global->wake_all_waiting_actors(); - } while (again); - - /* Clean actors to destroy */ - simix_global->empty_trash(); - - XBT_DEBUG("### time %f, #processes %zu, #to_run %zu", time, simix_global->process_list.size(), - simix_global->actors_to_run.size()); - - if (time < 0. && simix_global->actors_to_run.empty() && not simix_global->process_list.empty()) { - if (simix_global->process_list.size() <= simix_global->daemons.size()) { - XBT_CRITICAL("Oops! Daemon actors cannot do any blocking activity (communications, synchronization, etc) " - "once the simulation is over. Please fix your on_exit() functions."); - } else { - XBT_CRITICAL("Oops! Deadlock or code not perfectly clean."); - } - simix_global->display_all_actor_status(); - simgrid::s4u::Engine::on_deadlock(); - for (auto const& kv : simix_global->process_list) { - XBT_DEBUG("Kill %s", kv.second->get_cname()); - simix_global->maestro_->kill(kv.second); - } - } - } while (time > -1.0 || not simix_global->actors_to_run.empty()); - - if (not simix_global->process_list.empty()) - THROW_IMPOSSIBLE; - - simgrid::s4u::Engine::on_simulation_end(); + simgrid::kernel::EngineImpl::get_instance()->run(); } double SIMIX_timer_next() // XBT_ATTRIB_DEPRECATED_v329 diff --git a/src/smpi/internals/smpi_global.cpp b/src/smpi/internals/smpi_global.cpp index 1f2f20c601..ba06a81e18 100644 --- a/src/smpi/internals/smpi_global.cpp +++ b/src/smpi/internals/smpi_global.cpp @@ -4,12 +4,13 @@ * under the terms of the license (GNU LGPL) which comes with this package. */ #include "mc/mc.h" -#include "simgrid/s4u/Engine.hpp" #include "simgrid/plugins/file_system.h" +#include "simgrid/s4u/Engine.hpp" #include "smpi_coll.hpp" +#include "smpi_config.hpp" #include "smpi_f2c.hpp" #include "smpi_host.hpp" -#include "smpi_config.hpp" +#include "src/kernel/EngineImpl.hpp" #include "src/kernel/activity/CommImpl.hpp" #include "src/simix/smx_private.hpp" #include "src/smpi/include/smpi_actor.hpp" @@ -564,7 +565,7 @@ int smpi_main(const char* executable, int argc, char* argv[]) if (MC_is_active()) { MC_run(); } else { - SIMIX_run(); + simgrid::kernel::EngineImpl::get_instance()->run(); xbt_os_walltimer_stop(global_timer); simgrid::smpi::utils::print_time_analysis(xbt_os_timer_elapsed(global_timer));