From: Martin Quinson Date: Wed, 6 May 2020 20:27:27 +0000 (+0200) Subject: the on_exit() of each actor is also executed when the simulation deadlocks X-Git-Tag: v3.26~629 X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/commitdiff_plain/57b4c9b9a53ac467c4b41700cba7f0dce1d4c445 the on_exit() of each actor is also executed when the simulation deadlocks --- diff --git a/ChangeLog b/ChangeLog index 6b6b66c586..6b426d9558 100644 --- a/ChangeLog +++ b/ChangeLog @@ -17,6 +17,9 @@ Important user-visible changes: these values if needed. Network and routing models already providing separate loopbacks (clusters and such) are not impacted by this change. +S4U: + - the on_exit() of each actor is also executed when the simulation deadlocks. + General: - LTO is now enabled for Intel/clang compilers. - LTO behavior on GCC can be parameterized using LTO_EXTRA_FLAG in cmake. diff --git a/examples/s4u/actor-exiting/s4u-actor-exiting.cpp b/examples/s4u/actor-exiting/s4u-actor-exiting.cpp index dfd0f80aef..aa2215bea5 100644 --- a/examples/s4u/actor-exiting/s4u-actor-exiting.cpp +++ b/examples/s4u/actor-exiting/s4u-actor-exiting.cpp @@ -40,12 +40,32 @@ static void actor_a() // Register a lambda function to be executed once it stops simgrid::s4u::this_actor::on_exit([](bool /*failed*/) { XBT_INFO("I stop now"); }); - simgrid::s4u::this_actor::execute(1e9); + simgrid::s4u::this_actor::sleep_for(1); } static void actor_b() { - simgrid::s4u::this_actor::execute(2e9); + simgrid::s4u::this_actor::sleep_for(2); +} + +static void actor_c() +{ + // Register a lambda function to be executed once it stops + simgrid::s4u::this_actor::on_exit([](bool failed) { + if (failed) { + XBT_INFO("I was killed!"); + if (xbt_log_no_loc) + XBT_INFO("The backtrace would be displayed here if --log=no_loc would not have been passed"); + else + xbt_backtrace_display_current(); + } else + XBT_INFO("Exiting gracefully."); + }); + + simgrid::s4u::this_actor::sleep_for(3); + XBT_INFO("And now, induce a deadlock by waiting for a message that will never come\n\n"); + simgrid::s4u::Mailbox::by_name("nobody")->get(); + xbt_die("Receiving is not supposed to succeed when nobody is sending"); } int main(int argc, char* argv[]) @@ -65,6 +85,7 @@ int main(int argc, char* argv[]) /* Create some actors */ simgrid::s4u::Actor::create("A", simgrid::s4u::Host::by_name("Tremblay"), actor_a); simgrid::s4u::Actor::create("B", simgrid::s4u::Host::by_name("Fafard"), actor_b); + simgrid::s4u::Actor::create("C", simgrid::s4u::Host::by_name("Ginette"), actor_c); e.run(); /* - Run the simulation */ diff --git a/examples/s4u/actor-exiting/s4u-actor-exiting.tesh b/examples/s4u/actor-exiting/s4u-actor-exiting.tesh index e59ceb8e02..dbbf4db1c0 100644 --- a/examples/s4u/actor-exiting/s4u-actor-exiting.tesh +++ b/examples/s4u/actor-exiting/s4u-actor-exiting.tesh @@ -1,8 +1,19 @@ #!/usr/bin/env tesh -$ ${bindir:=.}/s4u-actor-exiting ${platfdir}/small_platform.xml "--log=root.fmt:[%10.6r]%e(%P@%h)%e%m%n" -> [ 10.194200] (A@Tremblay) I stop now -> [ 10.194200] (maestro@) Actor A terminates now -> [ 26.213694] (maestro@) Actor A gets destroyed now -> [ 26.213694] (maestro@) Actor B terminates now -> [ 26.213694] (maestro@) Actor B gets destroyed now +$ ${bindir:=.}/s4u-actor-exiting ${platfdir}/small_platform.xml "--log=root.fmt:[%10.6r]%e(%P@%h)%e%m%n" --log=no_loc +> [ 1.000000] (A@Tremblay) I stop now +> [ 1.000000] (maestro@) Actor A terminates now +> [ 2.000000] (maestro@) Actor A gets destroyed now +> [ 2.000000] (maestro@) Actor B terminates now +> [ 3.000000] (maestro@) Actor B gets destroyed now +> [ 3.000000] (C@Ginette) And now, induce a deadlock by waiting for a message that will never come +> +> +> [ 3.000000] (maestro@) Oops! Deadlock or code not perfectly clean. +> [ 3.000000] (maestro@) 1 actors are still running, waiting for something. +> [ 3.000000] (maestro@) Legend of the following listing: "Actor (@): " +> [ 3.000000] (maestro@) Actor 3 (C@Ginette): waiting for communication activity 0xdeadbeef () in state 0 to finish +> [ 3.000000] (C@Ginette) I was killed! +> [ 3.000000] (C@Ginette) The backtrace would be displayed here if --log=no_loc would not have been passed +> [ 3.000000] (maestro@) Actor C terminates now +> [ 3.000000] (maestro@) Actor C gets destroyed now diff --git a/include/simgrid/s4u/Actor.hpp b/include/simgrid/s4u/Actor.hpp index 6d8ef155f4..a8c3da8cb4 100644 --- a/include/simgrid/s4u/Actor.hpp +++ b/include/simgrid/s4u/Actor.hpp @@ -367,7 +367,7 @@ XBT_PUBLIC void exit(); * blocking functions forbidden in this setting, but also modifications to the global state. * * The parameter of on_exit's callbacks denotes whether or not the actor's execution failed. - * It will be set to true if the actor was killed or failed because of an exception, + * It will be set to true if the actor was killed or failed because of an exception or if the simulation deadlocked, * while it will remain to false if the actor terminated gracefully. */ diff --git a/include/simgrid/s4u/Engine.hpp b/include/simgrid/s4u/Engine.hpp index fc1c747927..7294e60ea8 100644 --- a/include/simgrid/s4u/Engine.hpp +++ b/include/simgrid/s4u/Engine.hpp @@ -181,7 +181,8 @@ public: /** Callback fired when the time jumps into the future */ static xbt::signal on_time_advance; - /** Callback fired when the time cannot advance because of inter-actors deadlock */ + /** Callback fired when the time cannot advance because of inter-actors deadlock. Note that the on_exit of each actor + * is also executed on deadlock. */ static xbt::signal on_deadlock; private: diff --git a/src/simix/smx_global.cpp b/src/simix/smx_global.cpp index 20b1f4afb8..36454f61f6 100644 --- a/src/simix/smx_global.cpp +++ b/src/simix/smx_global.cpp @@ -251,8 +251,9 @@ void Global::display_all_actor_status() synchro_description = "I/O"; XBT_INFO("Actor %ld (%s@%s): waiting for %s activity %p (%s) in state %d to finish", actor->get_pid(), - actor->get_cname(), actor->get_host()->get_cname(), synchro_description, actor->waiting_synchro.get(), - name, (int)actor->waiting_synchro->state_); + actor->get_cname(), actor->get_host()->get_cname(), synchro_description, + (xbt_log_no_loc ? (void*)0xDEADBEEF : actor->waiting_synchro.get()), name, + (int)actor->waiting_synchro->state_); } else { XBT_INFO("Actor %ld (%s@%s)", actor->get_pid(), actor->get_cname(), actor->get_host()->get_cname()); } @@ -548,19 +549,26 @@ void SIMIX_run() XBT_DEBUG("### time %f, #processes %zu, #to_run %zu", time, simix_global->process_list.size(), simix_global->actors_to_run.size()); - } while (time > -1.0 || not simix_global->actors_to_run.empty()); - if (not simix_global->process_list.empty()) { - if (simix_global->process_list.size() <= simix_global->daemons.size()) { - XBT_CRITICAL("Oops! Daemon actors cannot do any blocking activity (communications, synchronization, etc) " - "once the simulation is over. Please fix your on_exit() functions."); - } else { - XBT_CRITICAL("Oops! Deadlock or code not perfectly clean."); + if (time < 0. && simix_global->actors_to_run.empty() && not simix_global->process_list.empty()) { + if (simix_global->process_list.size() <= simix_global->daemons.size()) { + XBT_CRITICAL("Oops! Daemon actors cannot do any blocking activity (communications, synchronization, etc) " + "once the simulation is over. Please fix your on_exit() functions."); + } else { + XBT_CRITICAL("Oops! Deadlock or code not perfectly clean."); + } + simix_global->display_all_actor_status(); + simgrid::s4u::Engine::on_deadlock(); + for (auto const& kv : simix_global->process_list) { + XBT_DEBUG("Kill %s", kv.second->get_cname()); + simix_global->maestro_->kill(kv.second); + } } - simix_global->display_all_actor_status(); - simgrid::s4u::Engine::on_deadlock(); - xbt_abort(); - } + } while (time > -1.0 || not simix_global->actors_to_run.empty()); + + if (not simix_global->process_list.empty()) + THROW_IMPOSSIBLE; + simgrid::s4u::Engine::on_simulation_end(); }