X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/a4c8f1898670317d0fa33bf1b1a904ea922b78cc..739a8e3ebb789b7155f033bfdacba9478095e190:/src/simix/smx_global.cpp diff --git a/src/simix/smx_global.cpp b/src/simix/smx_global.cpp index ca3e4b9edb..68a74dae84 100644 --- a/src/simix/smx_global.cpp +++ b/src/simix/smx_global.cpp @@ -34,16 +34,16 @@ void (*SMPI_switch_data_segment)(simgrid::s4u::ActorPtr) = nullptr; namespace simgrid { namespace simix { -simgrid::config::Flag cfg_verbose_exit{ - "debug/verbose-exit", {"verbose-exit"}, "Display the actor status at exit", true}; -} +config::Flag cfg_verbose_exit{"debug/verbose-exit", {"verbose-exit"}, "Display the actor status at exit", true}; +} // namespace simix } // namespace simgrid + XBT_ATTRIB_NORETURN static void inthandler(int) { if (simgrid::simix::cfg_verbose_exit) { XBT_INFO("CTRL-C pressed. The current status will be displayed before exit (disable that behavior with option " "'debug/verbose-exit')."); - SIMIX_display_process_status(); + simix_global->display_all_actor_status(); } else { XBT_INFO("CTRL-C pressed, exiting. Hiding the current process status since 'debug/verbose-exit' is set to false."); @@ -54,24 +54,25 @@ XBT_ATTRIB_NORETURN static void inthandler(int) #ifndef _WIN32 static void segvhandler(int signum, siginfo_t* siginfo, void* /*context*/) { - if (siginfo->si_signo == SIGSEGV && siginfo->si_code == SEGV_ACCERR) { - fprintf(stderr, "Access violation detected.\n" - "This probably comes from a programming error in your code, or from a stack\n" - "overflow. If you are certain of your code, try increasing the stack size\n" - " --cfg=contexts/stack-size=XXX (current size is %u KiB).\n" - "\n" - "If it does not help, this may have one of the following causes:\n" - "a bug in SimGrid, a bug in the OS or a bug in a third-party libraries.\n" - "Failing hardware can sometimes generate such errors too.\n" - "\n" - "If you think you've found a bug in SimGrid, please report it along with a\n" - "Minimal Working Example (MWE) reproducing your problem and a full backtrace\n" - "of the fault captured with gdb or valgrind.\n", + if ((siginfo->si_signo == SIGSEGV && siginfo->si_code == SEGV_ACCERR) || siginfo->si_signo == SIGBUS) { + fprintf(stderr, + "Access violation or Bus error detected.\n" + "This probably comes from a programming error in your code, or from a stack\n" + "overflow. If you are certain of your code, try increasing the stack size\n" + " --cfg=contexts/stack-size=XXX (current size is %u KiB).\n" + "\n" + "If it does not help, this may have one of the following causes:\n" + "a bug in SimGrid, a bug in the OS or a bug in a third-party libraries.\n" + "Failing hardware can sometimes generate such errors too.\n" + "\n" + "If you think you've found a bug in SimGrid, please report it along with a\n" + "Minimal Working Example (MWE) reproducing your problem and a full backtrace\n" + "of the fault captured with gdb or valgrind.\n", smx_context_stack_size / 1024); } else if (siginfo->si_signo == SIGSEGV) { fprintf(stderr, "Segmentation fault.\n"); #if HAVE_SMPI - if (smpi_enabled() && smpi_privatize_global_variables == SmpiPrivStrategies::NONE) { + if (smpi_enabled() && smpi_cfg_privatization() == SmpiPrivStrategies::NONE) { #if HAVE_PRIVATIZATION fprintf(stderr, "Try to enable SMPI variable privatization with --cfg=smpi/privatization:yes.\n"); #else @@ -113,14 +114,17 @@ static void install_segvhandler() action.sa_flags = SA_ONSTACK | SA_RESETHAND | SA_SIGINFO; sigemptyset(&action.sa_mask); - if (sigaction(SIGSEGV, &action, &old_action) == -1) { - XBT_WARN("Failed to register signal handler for SIGSEGV: %s", strerror(errno)); - return; - } - if ((old_action.sa_flags & SA_SIGINFO) || old_action.sa_handler != SIG_DFL) { - XBT_DEBUG("A signal handler was already installed for SIGSEGV (%p). Restore it.", - (old_action.sa_flags & SA_SIGINFO) ? (void*)old_action.sa_sigaction : (void*)old_action.sa_handler); - sigaction(SIGSEGV, &old_action, nullptr); + /* Linux tend to raise only SIGSEGV where other systems also raise SIGBUS on severe error */ + for (int sig : {SIGSEGV, SIGBUS}) { + if (sigaction(sig, &action, &old_action) == -1) { + XBT_WARN("Failed to register signal handler for signal %d: %s", sig, strerror(errno)); + continue; + } + if ((old_action.sa_flags & SA_SIGINFO) || old_action.sa_handler != SIG_DFL) { + XBT_DEBUG("A signal handler was already installed for signal %d (%p). Restore it.", sig, + (old_action.sa_flags & SA_SIGINFO) ? (void*)old_action.sa_sigaction : (void*)old_action.sa_handler); + sigaction(sig, &old_action, nullptr); + } } } @@ -130,7 +134,7 @@ static void install_segvhandler() namespace simgrid { namespace simix { -Timer* Timer::set(double date, simgrid::xbt::Task&& callback) +Timer* Timer::set(double date, xbt::Task&& callback) { Timer* timer = new Timer(date, std::move(callback)); timer->handle_ = simix_timers.emplace(std::make_pair(date, timer)); @@ -140,7 +144,7 @@ Timer* Timer::set(double date, simgrid::xbt::Task&& callback) /** @brief cancels a timer that was added earlier */ void Timer::remove() { - simgrid::simix::simix_timers.erase(handle_); + simix_timers.erase(handle_); delete this; } @@ -169,7 +173,7 @@ bool Global::execute_tasks() void Global::empty_trash() { while (not actors_to_destroy.empty()) { - smx_actor_t actor = &actors_to_destroy.front(); + kernel::actor::ActorImpl* actor = &actors_to_destroy.front(); actors_to_destroy.pop_front(); XBT_DEBUG("Getting rid of %s (refcount: %d)", actor->get_cname(), actor->get_refcount()); intrusive_ptr_release(actor); @@ -193,11 +197,73 @@ void Global::run_all_actors() actors_to_run.clear(); } -simgrid::config::Flag cfg_breakpoint{ - "debug/breakpoint", {"simix/breakpoint"}, "When non-negative, raise a SIGTRAP after given (simulated) time", -1.0}; +/** Wake up all actors waiting for a Surf action to finish */ +void Global::wake_all_waiting_actors() +{ + for (auto const& model : all_existing_models) { + kernel::resource::Action* action; + + XBT_DEBUG("Handling the failed actions (if any)"); + while ((action = model->extract_failed_action())) { + XBT_DEBUG(" Handling Action %p", action); + if (action->get_activity() != nullptr) + kernel::activity::ActivityImplPtr(action->get_activity())->post(); + } + XBT_DEBUG("Handling the terminated actions (if any)"); + while ((action = model->extract_done_action())) { + XBT_DEBUG(" Handling Action %p", action); + if (action->get_activity() == nullptr) + XBT_DEBUG("probably vcpu's action %p, skip", action); + else + kernel::activity::ActivityImplPtr(action->get_activity())->post(); + } + } } + +void Global::display_all_actor_status() +{ + XBT_INFO("%zu actors are still running, waiting for something.", process_list.size()); + /* List the actors and their state */ + XBT_INFO("Legend of the following listing: \"Actor (@): \""); + for (auto const& kv : process_list) { + kernel::actor::ActorImpl* actor = kv.second; + + if (actor->waiting_synchro) { + const char* synchro_description = "unknown"; + // we don't care about the Activity type to get its name, use RawImpl + const char* name = boost::static_pointer_cast>( + actor->waiting_synchro) + ->get_cname(); + + if (boost::dynamic_pointer_cast(actor->waiting_synchro) != nullptr) + synchro_description = "execution"; + + if (boost::dynamic_pointer_cast(actor->waiting_synchro) != nullptr) + synchro_description = "communication"; + + if (boost::dynamic_pointer_cast(actor->waiting_synchro) != nullptr) + synchro_description = "sleeping"; + + if (boost::dynamic_pointer_cast(actor->waiting_synchro) != nullptr) + synchro_description = "synchronization"; + + if (boost::dynamic_pointer_cast(actor->waiting_synchro) != nullptr) + synchro_description = "I/O"; + + XBT_INFO("Actor %ld (%s@%s): waiting for %s activity %p (%s) in state %d to finish", actor->get_pid(), + actor->get_cname(), actor->get_host()->get_cname(), synchro_description, actor->waiting_synchro.get(), + name, (int)actor->waiting_synchro->state_); + } else { + XBT_INFO("Actor %ld (%s@%s)", actor->get_pid(), actor->get_cname(), actor->get_host()->get_cname()); + } + } } +config::Flag cfg_breakpoint{ + "debug/breakpoint", {"simix/breakpoint"}, "When non-negative, raise a SIGTRAP after given (simulated) time", -1.0}; +} // namespace simix +} // namespace simgrid + static simgrid::simix::ActorCode maestro_code; void SIMIX_set_maestro(void (*code)(void*), void* data) { @@ -223,11 +289,11 @@ void SIMIX_global_init(int *argc, char **argv) surf_init(argc, argv); /* Initialize SURF structures */ simix_global.reset(new simgrid::simix::Global()); - simix_global->maestro_process = nullptr; + simix_global->maestro_ = nullptr; SIMIX_context_mod_init(); // Either create a new context with maestro or create - // a context object with the current context mestro): + // a context object with the current context maestro): simgrid::kernel::actor::create_maestro(maestro_code); /* Prepare to display some more info when dying on Ctrl-C pressing */ @@ -238,12 +304,7 @@ void SIMIX_global_init(int *argc, char **argv) #endif /* register a function to be called by SURF after the environment creation */ sg_platf_init(); - simgrid::s4u::on_platform_created.connect(surf_presolve); - - simgrid::s4u::Storage::on_creation.connect([](simgrid::s4u::Storage const& storage) { - sg_storage_t s = simgrid::s4u::Storage::by_name(storage.get_name()); - xbt_assert(s != nullptr, "Storage not found for name %s", storage.get_cname()); - }); + simgrid::s4u::Engine::on_platform_created.connect(surf_presolve); } if (simgrid::config::get_value("debug/clean-atexit")) @@ -272,7 +333,7 @@ void SIMIX_clean() } #if HAVE_SMPI - if (SIMIX_process_count()>0){ + if (simix_global->process_list.size() > 0) { if(smpi_process()->initialized()){ xbt_die("Process exited without calling MPI_Finalize - Killing simulation"); }else{ @@ -283,8 +344,8 @@ void SIMIX_clean() #endif /* Kill all processes (but maestro) */ - simix_global->maestro_process->kill_all(); - simix_global->context_factory->run_all(); + simix_global->maestro_->kill_all(); + simix_global->run_all_actors(); simix_global->empty_trash(); /* Exit the SIMIX network module */ @@ -306,8 +367,8 @@ void SIMIX_clean() #endif /* Let's free maestro now */ - delete simix_global->maestro_process; - simix_global->maestro_process = nullptr; + delete simix_global->maestro_; + simix_global->maestro_ = nullptr; /* Finish context module and SURF */ SIMIX_context_mod_exit(); @@ -332,30 +393,7 @@ double SIMIX_get_clock() } } -/** Wake up all processes waiting for a Surf action to finish */ -static void SIMIX_wake_processes() -{ - for (auto const& model : all_existing_models) { - simgrid::kernel::resource::Action* action; - - XBT_DEBUG("Handling the processes whose action failed (if any)"); - while ((action = model->extract_failed_action())) { - XBT_DEBUG(" Handling Action %p",action); - if (action->get_activity() != nullptr) - simgrid::kernel::activity::ActivityImplPtr(action->get_activity())->post(); - } - XBT_DEBUG("Handling the processes whose action terminated normally (if any)"); - while ((action = model->extract_done_action())) { - XBT_DEBUG(" Handling Action %p",action); - if (action->get_activity() == nullptr) - XBT_DEBUG("probably vcpu's action %p, skip", action); - else - simgrid::kernel::activity::ActivityImplPtr(action->get_activity())->post(); - } - } -} - -/** Handle any pending timer */ +/** Handle any pending timer. Returns if something was actually run. */ static bool SIMIX_execute_timers() { bool result = false; @@ -413,22 +451,22 @@ void SIMIX_run() * Short proof: only maestro adds stuff to the actors_to_run array, so the execution order of user contexts do * not impact its order. * - * Long proof: processes remain sorted through an arbitrary (implicit, complex but fixed) order in all cases. + * Long proof: actors remain sorted through an arbitrary (implicit, complex but fixed) order in all cases. * - * - if there is no kill during the simulation, processes remain sorted according by their PID. + * - if there is no kill during the simulation, actors remain sorted according by their PID. * Rationale: This can be proved inductively. * Assume that actors_to_run is sorted at a beginning of one round (it is at round 0: the deployment file * is parsed linearly). * Let's show that it is still so at the end of this round. - * - if a process is added when being created, that's from maestro. It can be either at startup + * - if an actor is added when being created, that's from maestro. It can be either at startup * time (and then in PID order), or in response to a process_create simcall. Since simcalls are handled * in arbitrary order (inductive hypothesis), we are fine. - * - If a process is added because it's getting killed, its subsequent actions shouldn't matter - * - If a process gets added to actors_to_run because one of their blocking action constituting the meat + * - If an actor is added because it's getting killed, its subsequent actions shouldn't matter + * - If an actor gets added to actors_to_run because one of their blocking action constituting the meat * of a simcall terminates, we're still good. Proof: - * - You are added from SIMIX_simcall_answer() only. When this function is called depends on the resource - * kind (network, cpu, disk, whatever), but the same arguments hold. Let's take communications as an - * example. + * - You are added from ActorImpl::simcall_answer() only. When this function is called depends on the + * resource kind (network, cpu, disk, whatever), but the same arguments hold. Let's take communications + * as an example. * - For communications, this function is called from SIMIX_comm_finish(). * This function itself don't mess with the order since simcalls are handled in FIFO order. * The function is called: @@ -442,21 +480,21 @@ void SIMIX_run() * This order is also fixed because it depends of the order in which the surf actions were * added to the system, and only maestro can add stuff this way, through simcalls. * We thus use the inductive hypothesis once again to conclude that the order in which synchros are - * poped out of the set does not depend on the user code's execution order. + * popped out of the set does not depend on the user code's execution order. * - because the communication terminated. In this case, synchros are served in the order given by * set = model->states.done_action_set; * while ((synchro = extract(set))) * SIMIX_simcall_post((smx_synchro_t) synchro->data); * and the argument is very similar to the previous one. - * So, in any case, the orders of calls to SIMIX_comm_finish() do not depend on the order in which user - * processes are executed. - * So, in any cases, the orders of processes within actors_to_run do not depend on the order in which - * user processes were executed previously. + * So, in any case, the orders of calls to CommImpl::finish() do not depend on the order in which user + * actors are executed. + * So, in any cases, the orders of actors within actors_to_run do not depend on the order in which + * user actors were executed previously. * So, if there is no killing in the simulation, the simulation reproducibility is not jeopardized. - * - If there is some process killings, the order is changed by this decision that comes from user-land + * - If there is some actor killings, the order is changed by this decision that comes from user-land * But this decision may not have been motivated by a situation that were different because the simulation is * not reproducible. - * So, even the order change induced by the process killing is perfectly reproducible. + * So, even the order change induced by the actor killing is perfectly reproducible. * * So science works, bitches [http://xkcd.com/54/]. * @@ -466,22 +504,22 @@ void SIMIX_run() * That would thus be a pure waste of time. */ - for (smx_actor_t const& process : simix_global->actors_that_ran) { - if (process->simcall.call != SIMCALL_NONE) { - SIMIX_simcall_handle(&process->simcall, 0); + for (auto const& actor : simix_global->actors_that_ran) { + if (actor->simcall.call_ != SIMCALL_NONE) { + actor->simcall_handle(0); } } simix_global->execute_tasks(); do { - SIMIX_wake_processes(); + simix_global->wake_all_waiting_actors(); } while (simix_global->execute_tasks()); /* If only daemon processes remain, cancel their actions, mark them to die and reschedule them */ if (simix_global->process_list.size() == simix_global->daemons.size()) for (auto const& dmon : simix_global->daemons) { XBT_DEBUG("Kill %s", dmon->get_cname()); - simix_global->maestro_process->kill(dmon); + simix_global->maestro_->kill(dmon); } } @@ -502,7 +540,7 @@ void SIMIX_run() again = SIMIX_execute_timers(); if (simix_global->execute_tasks()) again = true; - SIMIX_wake_processes(); + simix_global->wake_all_waiting_actors(); } while (again); /* Clean actors to destroy */ @@ -510,95 +548,53 @@ void SIMIX_run() XBT_DEBUG("### time %f, #processes %zu, #to_run %zu", time, simix_global->process_list.size(), simix_global->actors_to_run.size()); - } while (time > -1.0 || not simix_global->actors_to_run.empty()); if (not simix_global->process_list.empty()) { - if (simix_global->process_list.size() <= simix_global->daemons.size()) { XBT_CRITICAL("Oops! Daemon actors cannot do any blocking activity (communications, synchronization, etc) " "once the simulation is over. Please fix your on_exit() functions."); } else { XBT_CRITICAL("Oops! Deadlock or code not perfectly clean."); } - SIMIX_display_process_status(); - simgrid::s4u::on_deadlock(); + simix_global->display_all_actor_status(); + simgrid::s4u::Engine::on_deadlock(); xbt_abort(); } - simgrid::s4u::on_simulation_end(); + simgrid::s4u::Engine::on_simulation_end(); } -double SIMIX_timer_next() +double SIMIX_timer_next() // XBT_ATTRIB_DEPRECATED_v329 { return simgrid::simix::Timer::next(); } -smx_timer_t SIMIX_timer_set(double date, void (*callback)(void*), void *arg) +smx_timer_t SIMIX_timer_set(double date, void (*callback)(void*), void* arg) // XBT_ATTRIB_DEPRECATED_v329 { return simgrid::simix::Timer::set(date, std::bind(callback, arg)); } -smx_timer_t SIMIX_timer_set(double date, simgrid::xbt::Task&& callback) // deprecated -{ - return simgrid::simix::Timer::set(date, std::move(callback)); -} - /** @brief cancels a timer that was added earlier */ -void SIMIX_timer_remove(smx_timer_t timer) { +void SIMIX_timer_remove(smx_timer_t timer) // XBT_ATTRIB_DEPRECATED_v329 +{ timer->remove(); } /** @brief Returns the date at which the timer will trigger (or 0 if nullptr timer) */ -double SIMIX_timer_get_date(smx_timer_t timer) { +double SIMIX_timer_get_date(smx_timer_t timer) // XBT_ATTRIB_DEPRECATED_v329 +{ return timer ? timer->get_date() : 0; } -void SIMIX_display_process_status() +void SIMIX_display_process_status() // XBT_ATTRIB_DEPRECATED_v329 { - int nbprocess = simix_global->process_list.size(); - - XBT_INFO("%d processes are still running, waiting for something.", nbprocess); - /* List the process and their state */ - XBT_INFO("Legend of the following listing: \"Process (@): \""); - for (auto const& kv : simix_global->process_list) { - smx_actor_t process = kv.second; - - if (process->waiting_synchro) { - - const char* synchro_description = "unknown"; - // we don't care about the Activity type to get its name, use RawImpl - const char* name = - boost::static_pointer_cast>( - process->waiting_synchro) - ->get_cname(); - - if (boost::dynamic_pointer_cast(process->waiting_synchro) != nullptr) - synchro_description = "execution"; - - if (boost::dynamic_pointer_cast(process->waiting_synchro) != nullptr) - synchro_description = "communication"; - - if (boost::dynamic_pointer_cast(process->waiting_synchro) != nullptr) - synchro_description = "sleeping"; - - if (boost::dynamic_pointer_cast(process->waiting_synchro) != nullptr) - synchro_description = "synchronization"; - - if (boost::dynamic_pointer_cast(process->waiting_synchro) != nullptr) - synchro_description = "I/O"; - - XBT_INFO("Process %ld (%s@%s): waiting for %s synchro %p (%s) in state %d to finish", process->get_pid(), - process->get_cname(), process->get_host()->get_cname(), synchro_description, - process->waiting_synchro.get(), name, (int)process->waiting_synchro->state_); - } - else { - XBT_INFO("Process %ld (%s@%s)", process->get_pid(), process->get_cname(), process->get_host()->get_cname()); - } - } + simix_global->display_all_actor_status(); } int SIMIX_is_maestro() { - smx_actor_t self = SIMIX_process_self(); - return simix_global == nullptr /*SimDag*/ || self == nullptr || self == simix_global->maestro_process; + if (simix_global == nullptr) // SimDag + return true; + simgrid::kernel::actor::ActorImpl* self = SIMIX_process_self(); + return self == nullptr || self == simix_global->maestro_; }