X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/76d4849864c227687e17bdd93c5b1338e9b4cb50..617d14f39e028a3bf39fc8e0ce691ed2e709bc8e:/src/kernel/EngineImpl.cpp diff --git a/src/kernel/EngineImpl.cpp b/src/kernel/EngineImpl.cpp index be693d6f5e..6572a174e4 100644 --- a/src/kernel/EngineImpl.cpp +++ b/src/kernel/EngineImpl.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2016-2021. The SimGrid Team. All rights reserved. */ +/* Copyright (c) 2016-2022. The SimGrid Team. All rights reserved. */ /* This program is free software; you can redistribute it and/or modify it * under the terms of the license (GNU LGPL) which comes with this package. */ @@ -41,24 +41,6 @@ config::Flag cfg_breakpoint{"debug/breakpoint", "When non-negative, raise a SIGTRAP after given (simulated) time", -1.0}; config::Flag cfg_verbose_exit{"debug/verbose-exit", "Display the actor status at exit", true}; -xbt_dynar_t get_actors_addr() -{ -#if SIMGRID_HAVE_MC - return EngineImpl::get_instance()->get_actors_vector(); -#else - xbt_die("This function is intended to be used when compiling with MC"); -#endif -} - -xbt_dynar_t get_dead_actors_addr() -{ -#if SIMGRID_HAVE_MC - return EngineImpl::get_instance()->get_dead_actors_vector(); -#else - xbt_die("This function is intended to be used when compiling with MC"); -#endif -} - constexpr std::initializer_list> context_factories = { #if HAVE_RAW_CONTEXTS {"raw", &context::raw_factory}, @@ -200,10 +182,17 @@ EngineImpl::~EngineImpl() for (auto const& kv : mailboxes_) delete kv.second; - /* Free the remaining data structures */ + /* Kill all actors (but maestro) */ + maestro_->kill_all(); + run_all_actors(); + empty_trash(); + + delete maestro_; + delete context_factory_; + + /* Free the remaining data structures */ #if SIMGRID_HAVE_MC xbt_dynar_free(&actors_vector_); - xbt_dynar_free(&dead_actors_vector_); #endif /* clear models before freeing handle, network models can use external callback defined in the handle */ models_prio_.clear(); @@ -216,9 +205,9 @@ void EngineImpl::initialize(int* argc, char** argv) EngineImpl::instance_ = this; #if SIMGRID_HAVE_MC // The communication initialization is done ASAP, as we need to get some init parameters from the MC for different - // layers. But simix_global needs to be created, as we send the address of some of its fields to the MC that wants to + // layers. But instance_ needs to be created, as we send the address of some of its fields to the MC that wants to // read them directly. - simgrid::mc::AppSide::initialize(); + simgrid::mc::AppSide::initialize(actors_vector_); #endif if (xbt_initialized == 0) { @@ -323,18 +312,6 @@ void EngineImpl::shutdown() xbt_die("Bailing out to avoid that stop-before-start madness. Please fix your code."); } - /* Kill all actors (but maestro) */ - instance_->maestro_->kill_all(); - instance_->run_all_actors(); - instance_->empty_trash(); - - /* Let's free maestro now */ - delete instance_->maestro_; - instance_->maestro_ = nullptr; - - /* Finish context module and SURF */ - instance_->destroy_context_factory(); - while (not timer::kernel_timers().empty()) { delete timer::kernel_timers().top().second; timer::kernel_timers().pop(); @@ -349,6 +326,12 @@ void EngineImpl::shutdown() void EngineImpl::seal_platform() const { + /* Seal only once */ + static bool sealed = false; + if (sealed) + return; + sealed = true; + /* sealing resources before run: links */ for (auto const& kv : links_) kv.second->get_iface()->seal(); @@ -359,7 +342,7 @@ void EngineImpl::seal_platform() const void EngineImpl::load_platform(const std::string& platf) { double start = xbt_os_time(); - if (boost::algorithm::ends_with(platf, ".so") or boost::algorithm::ends_with(platf, ".dylib")) { + if (boost::algorithm::ends_with(platf, ".so") || boost::algorithm::ends_with(platf, ".dylib")) { #ifdef _WIN32 xbt_die("loading platform through shared library isn't supported on windows"); #else @@ -419,15 +402,14 @@ void EngineImpl::add_split_duplex_link(const std::string& name, std::unique_ptr< } /** Wake up all actors waiting for a Surf action to finish */ -void EngineImpl::wake_all_waiting_actors() const +void EngineImpl::handle_ended_actions() const { for (auto const& model : models_) { XBT_DEBUG("Handling the failed actions (if any)"); while (auto* action = model->extract_failed_action()) { XBT_DEBUG(" Handling Action %p", action); - if (action->get_activity() != nullptr) { - // If nobody told the interface that the activity has failed, that's because no actor waits on it (maestro - // started it). SimDAG I see you! + if (action->get_activity() != nullptr) { // Skip vcpu actions + // Action failures are not automatically reported when the action is started by maestro (as in SimDAG) if (action->get_activity()->get_actor() == maestro_) action->get_activity()->get_iface()->complete(s4u::Activity::State::FAILED); @@ -437,11 +419,8 @@ void EngineImpl::wake_all_waiting_actors() const XBT_DEBUG("Handling the terminated actions (if any)"); while (auto* action = model->extract_done_action()) { XBT_DEBUG(" Handling Action %p", action); - if (action->get_activity() == nullptr) - XBT_DEBUG("probably vcpu's action %p, skip", action); - else { - // If nobody told the interface that the activity is finished, that's because no actor waits on it (maestro - // started it). SimDAG I see you! + if (action->get_activity() != nullptr) { + // Action termination are not automatically reported when the action is started by maestro (as in SimDAG) action->get_activity()->set_finish_time(action->get_finish_time()); if (action->get_activity()->get_actor() == maestro_) @@ -461,7 +440,11 @@ void EngineImpl::wake_all_waiting_actors() const */ void EngineImpl::run_all_actors() { - instance_->get_context_factory()->run_all(); + instance_->get_context_factory()->run_all(actors_to_run_); + + for (auto const& actor : actors_to_run_) + if (actor->to_be_freed()) + actor->cleanup_from_kernel(); actors_to_run_.swap(actors_that_ran_); actors_to_run_.clear(); @@ -473,32 +456,7 @@ actor::ActorImpl* EngineImpl::get_actor_by_pid(aid_t pid) if (item != actor_list_.end()) return item->second; - // Search the trash - for (auto& a : actors_to_destroy_) - if (a.get_pid() == pid) - return &a; - return nullptr; // Not found, even in the trash -} - -/** Execute all the tasks that are queued, e.g. `.then()` callbacks of futures. */ -bool EngineImpl::execute_tasks() -{ - if (tasks.empty()) - return false; - - std::vector> tasksTemp; - do { - // We don't want the callbacks to modify the vector we are iterating over: - tasks.swap(tasksTemp); - - // Execute all the queued tasks: - for (auto& task : tasksTemp) - task(); - - tasksTemp.clear(); - } while (not tasks.empty()); - - return true; + return nullptr; // Not found } void EngineImpl::remove_daemon(actor::ActorImpl* actor) @@ -531,9 +489,6 @@ void EngineImpl::empty_trash() XBT_DEBUG("Getting rid of %s (refcount: %d)", actor->get_cname(), actor->get_refcount()); intrusive_ptr_release(actor); } -#if SIMGRID_HAVE_MC - xbt_dynar_reset(dead_actors_vector_); -#endif } void EngineImpl::display_all_actor_status() const @@ -542,7 +497,7 @@ void EngineImpl::display_all_actor_status() const /* List the actors and their state */ XBT_INFO("Legend of the following listing: \"Actor (@): \""); for (auto const& kv : actor_list_) { - actor::ActorImpl* actor = kv.second; + const actor::ActorImpl* actor = kv.second; if (actor->waiting_synchro_) { const char* synchro_description = "unknown"; @@ -556,19 +511,19 @@ void EngineImpl::display_all_actor_status() const if (boost::dynamic_pointer_cast(actor->waiting_synchro_) != nullptr) synchro_description = "sleeping"; - if (boost::dynamic_pointer_cast(actor->waiting_synchro_) != nullptr) + if (boost::dynamic_pointer_cast(actor->waiting_synchro_) != nullptr) synchro_description = "synchronization"; if (boost::dynamic_pointer_cast(actor->waiting_synchro_) != nullptr) synchro_description = "I/O"; - XBT_INFO("Actor %ld (%s@%s): waiting for %s activity %#zx (%s) in state %d to finish", actor->get_pid(), + XBT_INFO("Actor %ld (%s@%s): waiting for %s activity %#zx (%s) in state %s to finish", actor->get_pid(), actor->get_cname(), actor->get_host()->get_cname(), synchro_description, (xbt_log_no_loc ? (size_t)0xDEADBEEF : (size_t)actor->waiting_synchro_.get()), - actor->waiting_synchro_->get_cname(), (int)actor->waiting_synchro_->state_); + actor->waiting_synchro_->get_cname(), actor->waiting_synchro_->get_state_str()); } else { XBT_INFO("Actor %ld (%s@%s) simcall %s", actor->get_pid(), actor->get_cname(), actor->get_host()->get_cname(), - SIMIX_simcall_name(actor->simcall_)); + actor->simcall_.get_cname()); } } } @@ -700,8 +655,17 @@ void EngineImpl::run(double max_date) { seal_platform(); + if (MC_is_active()) { +#if SIMGRID_HAVE_MC + mc::AppSide::get()->main_loop(); +#else + xbt_die("MC_is_active() is not supposed to return true in non-MC settings"); +#endif + THROW_IMPOSSIBLE; // main_loop never returns + } + if (MC_record_replay_is_active()) { - mc::replay(MC_record_path()); + mc::RecordTrace::replay(MC_record_path()); empty_trash(); return; } @@ -714,7 +678,7 @@ void EngineImpl::run(double max_date) if (cfg_breakpoint >= 0.0 && simgrid_get_clock() >= cfg_breakpoint) { XBT_DEBUG("Breakpoint reached (%g)", cfg_breakpoint.get()); - cfg_breakpoint = -1.0; + cfg_breakpoint = -1.0; // Let the simulation continue without hiting the breakpoint again and again #ifdef SIGTRAP std::raise(SIGTRAP); #else @@ -722,86 +686,22 @@ void EngineImpl::run(double max_date) #endif } - execute_tasks(); - while (not actors_to_run_.empty()) { XBT_DEBUG("New Sub-Schedule Round; size(queue)=%zu", actors_to_run_.size()); /* Run all actors that are ready to run, possibly in parallel */ run_all_actors(); - /* answer sequentially and in a fixed arbitrary order all the simcalls that were issued during that sub-round */ - - /* WARNING, the order *must* be fixed or you'll jeopardize the simulation reproducibility (see RR-7653) */ - - /* Here, the order is ok because: - * - * Short proof: only maestro adds stuff to the actors_to_run array, so the execution order of user contexts do - * not impact its order. - * - * Long proof: actors remain sorted through an arbitrary (implicit, complex but fixed) order in all cases. - * - * - if there is no kill during the simulation, actors remain sorted according by their PID. - * Rationale: This can be proved inductively. - * Assume that actors_to_run is sorted at a beginning of one round (it is at round 0: the deployment file - * is parsed linearly). - * Let's show that it is still so at the end of this round. - * - if an actor is added when being created, that's from maestro. It can be either at startup - * time (and then in PID order), or in response to a process_create simcall. Since simcalls are handled - * in arbitrary order (inductive hypothesis), we are fine. - * - If an actor is added because it's getting killed, its subsequent actions shouldn't matter - * - If an actor gets added to actors_to_run because one of their blocking action constituting the meat - * of a simcall terminates, we're still good. Proof: - * - You are added from ActorImpl::simcall_answer() only. When this function is called depends on the - * resource kind (network, cpu, disk, whatever), but the same arguments hold. Let's take communications - * as an example. - * - For communications, this function is called from CommImpl::finish(). - * This function itself don't mess with the order since simcalls are handled in FIFO order. - * The function is called: - * - before the comm starts (invalid parameters, or resource already dead or whatever). - * The order then trivial holds since maestro didn't interrupt its handling of the simcall yet - * - because the communication failed or were canceled after startup. In this case, it's called from - * the function we are in, by the chunk: - * set = model->states.failed_action_set; - * while ((synchro = extract(set))) - * SIMIX_simcall_post((smx_synchro_t) synchro->data); - * This order is also fixed because it depends of the order in which the surf actions were - * added to the system, and only maestro can add stuff this way, through simcalls. - * We thus use the inductive hypothesis once again to conclude that the order in which synchros are - * popped out of the set does not depend on the user code's execution order. - * - because the communication terminated. In this case, synchros are served in the order given by - * set = model->states.done_action_set; - * while ((synchro = extract(set))) - * SIMIX_simcall_post((smx_synchro_t) synchro->data); - * and the argument is very similar to the previous one. - * So, in any case, the orders of calls to CommImpl::finish() do not depend on the order in which user - * actors are executed. - * So, in any cases, the orders of actors within actors_to_run do not depend on the order in which - * user actors were executed previously. - * So, if there is no killing in the simulation, the simulation reproducibility is not jeopardized. - * - If there is some actor killings, the order is changed by this decision that comes from user-land - * But this decision may not have been motivated by a situation that were different because the simulation is - * not reproducible. - * So, even the order change induced by the actor killing is perfectly reproducible. - * - * So science works, bitches [http://xkcd.com/54/]. - * - * We could sort the actors_that_ran array completely so that we can describe the order in which simcalls are - * handled (like "according to the PID of issuer"), but it's not mandatory (order is fixed already even if - * unfriendly). - * That would thus be a pure waste of time. + /* answer sequentially and in a fixed arbitrary order all the simcalls that were issued during that sub-round. + * The order must be fixed for the simulation to be reproducible (see RR-7653). It's OK here because only maestro + * changes the list. Killer actors are moved to the end to let victims finish their simcall before dying, but + * the order remains reproducible (even if arbitrarily). No need to sort the vector for sake of reproducibility. */ - - for (auto const& actor : actors_that_ran_) { - if (actor->simcall_.call_ != simix::Simcall::NONE) { + for (auto const& actor : actors_that_ran_) + if (actor->simcall_.call_ != actor::Simcall::Type::NONE) actor->simcall_handle(0); - } - } - execute_tasks(); - do { - wake_all_waiting_actors(); - } while (execute_tasks()); + handle_ended_actions(); /* If only daemon actors remain, cancel their actions, mark them to die and reschedule them */ if (actor_list_.size() == daemons_.size()) @@ -824,17 +724,11 @@ void EngineImpl::run(double max_date) elapsed_time = solve(next_time); XBT_DEBUG("Moving time ahead. NOW=%g; elapsed: %g", NOW, elapsed_time); - /* Notify all the hosts that have failed */ - /* FIXME: iterate through the list of failed host and mark each of them */ - /* as failed. On each host, signal all the running actors with host_fail */ - - // Execute timers and tasks until there isn't anything to be done: + // Execute timers until there isn't anything to be done: bool again = false; do { again = timer::Timer::execute_all(); - if (execute_tasks()) - again = true; - wake_all_waiting_actors(); + handle_ended_actions(); } while (again); /* Clean actors to destroy */ @@ -857,7 +751,6 @@ void EngineImpl::run(double max_date) maestro_->kill(kv.second); } } - } while ((vetoed_activities == nullptr || vetoed_activities->empty()) && ((elapsed_time > -1.0 && not double_equals(max_date, NOW, 0.00001)) || has_actors_to_run())); @@ -873,8 +766,3 @@ double EngineImpl::get_clock() } } // namespace kernel } // namespace simgrid - -void SIMIX_run() // XBT_ATTRIB_DEPRECATED_v332 -{ - simgrid::kernel::EngineImpl::get_instance()->run(-1); -}