X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/1f5cc4e090af49a98da0e25e9ee21a8ce6ce30f8..9ceefed14c83a0f6ea5f78e3acafd53181dc4fa1:/src/kernel/EngineImpl.cpp diff --git a/src/kernel/EngineImpl.cpp b/src/kernel/EngineImpl.cpp index cb65b17d0b..24116e2e14 100644 --- a/src/kernel/EngineImpl.cpp +++ b/src/kernel/EngineImpl.cpp @@ -17,8 +17,7 @@ #include "src/mc/mc_replay.hpp" #include "src/smpi/include/smpi_actor.hpp" #include "src/surf/network_interface.hpp" -#include "src/surf/xml/platf.hpp" // FIXME: KILLME. There must be a better way than mimicking XML here -#include "surf/surf.hpp" //surf_presolve() and surf_solve() +#include "src/surf/xml/platf.hpp" #include "xbt/xbt_modinter.h" /* whether initialization was already done */ #include @@ -30,7 +29,10 @@ #include "src/mc/remote/AppSide.hpp" #endif +double NOW = 0; + XBT_LOG_NEW_DEFAULT_CATEGORY(ker_engine, "Logging specific to Engine (kernel)"); + namespace simgrid { namespace kernel { EngineImpl* EngineImpl::instance_ = nullptr; /* That singleton is awful too. */ @@ -57,6 +59,37 @@ xbt_dynar_t get_dead_actors_addr() #endif } +constexpr std::initializer_list> context_factories = { +#if HAVE_RAW_CONTEXTS + {"raw", &context::raw_factory}, +#endif +#if HAVE_UCONTEXT_CONTEXTS + {"ucontext", &context::sysv_factory}, +#endif +#if HAVE_BOOST_CONTEXTS + {"boost", &context::boost_factory}, +#endif + {"thread", &context::thread_factory}, +}; + +static_assert(context_factories.size() > 0, "No context factories are enabled for this build"); + +// Create the list of possible contexts: +static inline std::string contexts_list() +{ + std::string res; + std::string sep = ""; + for (auto const& factory : context_factories) { + res += sep + factory.first; + sep = ", "; + } + return res; +} + +static config::Flag context_factory_name("contexts/factory", + (std::string("Possible values: ") + contexts_list()).c_str(), + context_factories.begin()->first); + } // namespace kernel } // namespace simgrid @@ -89,7 +122,7 @@ static void segvhandler(int signum, siginfo_t* siginfo, void* /*context*/) "If you think you've found a bug in SimGrid, please report it along with a\n" "Minimal Working Example (MWE) reproducing your problem and a full backtrace\n" "of the fault captured with gdb or valgrind.\n", - smx_context_stack_size / 1024); + simgrid::kernel::context::stack_size / 1024); } else if (siginfo->si_signo == SIGSEGV) { fprintf(stderr, "Segmentation fault.\n"); #if HAVE_SMPI @@ -205,12 +238,68 @@ void EngineImpl::initialize(int* argc, char** argv) /* register a function to be called by SURF after the environment creation */ sg_platf_init(); - s4u::Engine::on_platform_created.connect(surf_presolve); + s4u::Engine::on_platform_created.connect([this]() { this->presolve(); }); if (config::get_value("debug/clean-atexit")) atexit(shutdown); } +void EngineImpl::context_mod_init() const +{ + xbt_assert(not instance_->has_context_factory()); + +#if HAVE_SMPI && defined(__NetBSD__) + smpi_init_options_internal(false); + std::string priv = config::get_value("smpi/privatization"); + if (context_factory_name == "thread" && (priv == "dlopen" || priv == "yes" || priv == "default" || priv == "1")) { + XBT_WARN("dlopen+thread broken on Apple and BSD. Switching to raw contexts."); + context_factory_name = "raw"; + } +#endif + +#if HAVE_SMPI && defined(__FreeBSD__) + smpi_init_options_internal(false); + if (context_factory_name == "thread" && config::get_value("smpi/privatization") != "no") { + XBT_WARN("mmap broken on FreeBSD, but dlopen+thread broken too. Switching to dlopen+raw contexts."); + context_factory_name = "raw"; + } +#endif + + /* select the context factory to use to create the contexts */ + if (context::factory_initializer != nullptr) { // Give Java a chance to hijack the factory mechanism + instance_->set_context_factory(context::factory_initializer()); + return; + } + /* use the factory specified by --cfg=contexts/factory:value */ + for (auto const& factory : context_factories) + if (context_factory_name == factory.first) { + instance_->set_context_factory(factory.second()); + break; + } + + if (not instance_->has_context_factory()) { + XBT_ERROR("Invalid context factory specified. Valid factories on this machine:"); +#if HAVE_RAW_CONTEXTS + XBT_ERROR(" raw: high performance context factory implemented specifically for SimGrid"); +#else + XBT_ERROR(" (raw contexts were disabled at compilation time on this machine -- check configure logs for details)"); +#endif +#if HAVE_UCONTEXT_CONTEXTS + XBT_ERROR(" ucontext: classical system V contexts (implemented with makecontext, swapcontext and friends)"); +#else + XBT_ERROR(" (ucontext was disabled at compilation time on this machine -- check configure logs for details)"); +#endif +#if HAVE_BOOST_CONTEXTS + XBT_ERROR(" boost: this uses the boost libraries context implementation"); +#else + XBT_ERROR(" (boost was disabled at compilation time on this machine -- check configure logs for details. Did you " + "install the libboost-context-dev package?)"); +#endif + XBT_ERROR(" thread: slow portability layer using pthreads as provided by gcc"); + xbt_die("Please use a valid factory."); + } +} + void EngineImpl::shutdown() { if (EngineImpl::instance_ == nullptr) @@ -240,7 +329,8 @@ void EngineImpl::shutdown() instance_->empty_trash(); /* Let's free maestro now */ - instance_->destroy_maestro(); + delete instance_->maestro_; + instance_->maestro_ = nullptr; /* Finish context module and SURF */ instance_->destroy_context_factory(); @@ -326,16 +416,35 @@ void EngineImpl::wake_all_waiting_actors() const XBT_DEBUG("Handling the failed actions (if any)"); while (auto* action = model->extract_failed_action()) { XBT_DEBUG(" Handling Action %p", action); - if (action->get_activity() != nullptr) + if (action->get_activity() != nullptr) { + // If nobody told the interface that the activity has failed, that's because no actor waits on it (maestro + // started it). SimDAG I see you! + if (action->get_activity()->get_actor() == maestro_) + action->get_activity()->get_iface()->complete(s4u::Activity::State::FAILED); + activity::ActivityImplPtr(action->get_activity())->post(); + } } XBT_DEBUG("Handling the terminated actions (if any)"); while (auto* action = model->extract_done_action()) { XBT_DEBUG(" Handling Action %p", action); if (action->get_activity() == nullptr) XBT_DEBUG("probably vcpu's action %p, skip", action); - else + else { + // If nobody told the interface that the activity is finished, that's because no actor waits on it (maestro + // started it). SimDAG I see you! + // TODO: do the same for other activity kinds once comms are cleaned up + auto* exec = dynamic_cast(action->get_activity()); + if (exec != nullptr && exec->get_actor() == maestro_) { + exec->set_finish_time(action->get_finish_time()); + exec->get_iface()->complete(s4u::Activity::State::FINISHED); + } + auto* io = dynamic_cast(action->get_activity()); + if (io != nullptr && io->get_actor() == maestro_) + io->get_iface()->complete(s4u::Activity::State::FINISHED); + activity::ActivityImplPtr(action->get_activity())->post(); + } } } } @@ -460,7 +569,130 @@ void EngineImpl::display_all_actor_status() const } } -void EngineImpl::run() +void EngineImpl::presolve() const +{ + XBT_DEBUG("Consume all trace events occurring before the starting time."); + double next_event_date; + while ((next_event_date = profile::future_evt_set.next_date()) != -1.0) { + if (next_event_date > NOW) + break; + + double value = -1.0; + resource::Resource* resource = nullptr; + while (auto* event = profile::future_evt_set.pop_leq(next_event_date, &value, &resource)) { + if (value >= 0) + resource->apply_event(event, value); + } + } + + XBT_DEBUG("Set every models in the right state by updating them to 0."); + for (auto const& model : models_) + model->update_actions_state(NOW, 0.0); +} + +double EngineImpl::solve(double max_date) const +{ + double time_delta = -1.0; /* duration */ + double value = -1.0; + resource::Resource* resource = nullptr; + + if (max_date != -1.0) { + xbt_assert(max_date >= NOW, "You asked to simulate up to %f, but that's in the past already", max_date); + + time_delta = max_date - NOW; + } + + XBT_DEBUG("Looking for next event in all models"); + for (auto model : models_) { + if (not model->next_occurring_event_is_idempotent()) { + continue; + } + double next_event = model->next_occurring_event(NOW); + if ((time_delta < 0.0 || next_event < time_delta) && next_event >= 0.0) { + time_delta = next_event; + } + } + + XBT_DEBUG("Min for resources (remember that NS3 don't update that value): %f", time_delta); + + XBT_DEBUG("Looking for next trace event"); + + while (true) { // Handle next occurring events until none remains + double next_event_date = profile::future_evt_set.next_date(); + XBT_DEBUG("Next TRACE event: %f", next_event_date); + + for (auto model : models_) { + /* Skip all idempotent models, they were already treated above + * NS3 is the one to handled here */ + if (model->next_occurring_event_is_idempotent()) + continue; + + if (next_event_date != -1.0) { + time_delta = std::min(next_event_date - NOW, time_delta); + } else { + time_delta = std::max(next_event_date - NOW, time_delta); // Get the positive component + } + + XBT_DEBUG("Run the NS3 network at most %fs", time_delta); + // run until min or next flow + double model_next_action_end = model->next_occurring_event(time_delta); + + XBT_DEBUG("Min for network : %f", model_next_action_end); + if (model_next_action_end >= 0.0) + time_delta = model_next_action_end; + } + + if (next_event_date < 0.0 || (next_event_date > NOW + time_delta)) { + // next event may have already occurred or will after the next resource change, then bail out + XBT_DEBUG("no next usable TRACE event. Stop searching for it"); + break; + } + + XBT_DEBUG("Updating models (min = %g, NOW = %g, next_event_date = %g)", time_delta, NOW, next_event_date); + + while (auto* event = profile::future_evt_set.pop_leq(next_event_date, &value, &resource)) { + if (resource->is_used() || (watched_hosts().find(resource->get_cname()) != watched_hosts().end())) { + time_delta = next_event_date - NOW; + XBT_DEBUG("This event invalidates the next_occurring_event() computation of models. Next event set to %f", + time_delta); + } + // FIXME: I'm too lame to update NOW live, so I change it and restore it so that the real update with surf_min + // will work + double round_start = NOW; + NOW = next_event_date; + /* update state of the corresponding resource to the new value. Does not touch lmm. + It will be modified if needed when updating actions */ + XBT_DEBUG("Calling update_resource_state for resource %s", resource->get_cname()); + resource->apply_event(event, value); + NOW = round_start; + } + } + + /* FIXME: Moved this test to here to avoid stopping simulation if there are actions running on cpus and all cpus are + * with availability = 0. This may cause an infinite loop if one cpu has a trace with periodicity = 0 and the other a + * trace with periodicity > 0. + * The options are: all traces with same periodicity(0 or >0) or we need to change the way how the events are managed + */ + if (time_delta < 0) { + XBT_DEBUG("No next event at all. Bail out now."); + return -1.0; + } + + XBT_DEBUG("Duration set to %f", time_delta); + + // Bump the time: jump into the future + NOW = NOW + time_delta; + + // Inform the models of the date change + for (auto const& model : models_) + model->update_actions_state(NOW, time_delta); + + s4u::Engine::on_time_advance(time_delta); + + return time_delta; +} + +void EngineImpl::run(double max_date) { if (MC_record_replay_is_active()) { mc::replay(MC_record_path()); @@ -468,7 +700,8 @@ void EngineImpl::run() return; } - double time = 0; + double elapsed_time = -1; + std::set* vetoed_activities = s4u::Activity::get_vetoed_activities(); do { XBT_DEBUG("New Schedule Round; size(queue)=%zu", actors_to_run_.size()); @@ -516,7 +749,7 @@ void EngineImpl::run() * - You are added from ActorImpl::simcall_answer() only. When this function is called depends on the * resource kind (network, cpu, disk, whatever), but the same arguments hold. Let's take communications * as an example. - * - For communications, this function is called from SIMIX_comm_finish(). + * - For communications, this function is called from CommImpl::finish(). * This function itself don't mess with the order since simcalls are handled in FIFO order. * The function is called: * - before the comm starts (invalid parameters, or resource already dead or whatever). @@ -572,13 +805,19 @@ void EngineImpl::run() } } - time = timer::Timer::next(); - if (time > -1.0 || not actor_list_.empty()) { - XBT_DEBUG("Calling surf_solve"); - time = surf_solve(time); - XBT_DEBUG("Moving time ahead : %g", time); + // Compute the max_date of the next solve. + // It's either when a timer occurs, or when user-specified deadline is reached, or -1 if none is given + double next_time = timer::Timer::next(); + if (next_time < 0 && max_date > -1) { + next_time = max_date; + } else if (next_time > -1 && max_date > -1) { // either both <0, or both >0 + next_time = std::min(next_time, max_date); } + XBT_DEBUG("Calling solve(%g) %g", next_time, NOW); + elapsed_time = solve(next_time); + XBT_DEBUG("Moving time ahead. NOW=%g; elapsed: %g", NOW, elapsed_time); + /* Notify all the hosts that have failed */ /* FIXME: iterate through the list of failed host and mark each of them */ /* as failed. On each host, signal all the running actors with host_fail */ @@ -595,9 +834,10 @@ void EngineImpl::run() /* Clean actors to destroy */ empty_trash(); - XBT_DEBUG("### time %f, #actors %zu, #to_run %zu", time, actor_list_.size(), actors_to_run_.size()); + XBT_DEBUG("### elapsed time %f, #actors %zu, #to_run %zu, #vetoed %d", elapsed_time, actor_list_.size(), + actors_to_run_.size(), (vetoed_activities == nullptr ? -1 : static_cast(vetoed_activities->size()))); - if (time < 0. && actors_to_run_.empty() && not actor_list_.empty()) { + if (elapsed_time < 0. && actors_to_run_.empty() && not actor_list_.empty()) { if (actor_list_.size() <= daemons_.size()) { XBT_CRITICAL("Oops! Daemon actors cannot do any blocking activity (communications, synchronization, etc) " "once the simulation is over. Please fix your on_exit() functions."); @@ -611,17 +851,24 @@ void EngineImpl::run() maestro_->kill(kv.second); } } - } while (time > -1.0 || has_actors_to_run()); - if (not actor_list_.empty()) + } while ((vetoed_activities == nullptr || vetoed_activities->empty()) && + ((elapsed_time > -1.0 && not double_equals(max_date, NOW, 0.00001)) || has_actors_to_run())); + + if (not actor_list_.empty() && max_date < 0 && not(vetoed_activities == nullptr || vetoed_activities->empty())) THROW_IMPOSSIBLE; simgrid::s4u::Engine::on_simulation_end(); } + +double EngineImpl::get_clock() +{ + return NOW; +} } // namespace kernel } // namespace simgrid void SIMIX_run() // XBT_ATTRIB_DEPRECATED_v332 { - simgrid::kernel::EngineImpl::get_instance()->run(); + simgrid::kernel::EngineImpl::get_instance()->run(-1); }