X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/08e7455d67920bbd7a87f440d00f2c1e071314a0..64a60fa58ca5dc30df8b2780c4901773f219d8ea:/src/kernel/activity/ExecImpl.cpp diff --git a/src/kernel/activity/ExecImpl.cpp b/src/kernel/activity/ExecImpl.cpp index 3e27a03b91..460edbc072 100644 --- a/src/kernel/activity/ExecImpl.cpp +++ b/src/kernel/activity/ExecImpl.cpp @@ -5,7 +5,10 @@ #include "src/kernel/activity/ExecImpl.hpp" #include "simgrid/Exception.hpp" +#include "simgrid/kernel/routing/NetPoint.hpp" #include "simgrid/modelchecker.h" +#include "simgrid/s4u/Exec.hpp" +#include "src/mc/checker/SimcallObserver.hpp" #include "src/mc/mc_replay.hpp" #include "src/surf/HostImpl.hpp" #include "src/surf/cpu_interface.hpp" @@ -13,50 +16,15 @@ #include "simgrid/s4u/Host.hpp" -#include - XBT_LOG_EXTERNAL_DEFAULT_CATEGORY(simix_process); -void simcall_HANDLER_execution_waitany_for(smx_simcall_t simcall, simgrid::kernel::activity::ExecImpl* execs[], - size_t count, double timeout) -{ - if (timeout < 0.0) { - simcall->timeout_cb_ = nullptr; - } else { - simcall->timeout_cb_ = simgrid::simix::Timer::set(SIMIX_get_clock() + timeout, [simcall, execs, count]() { - simcall->timeout_cb_ = nullptr; - for (size_t i = 0; i < count; i++) { - // Remove the first occurrence of simcall: - auto* exec = execs[i]; - auto j = boost::range::find(exec->simcalls_, simcall); - if (j != exec->simcalls_.end()) - exec->simcalls_.erase(j); - } - simcall_execution_waitany_for__set__result(simcall, -1); - simcall->issuer_->simcall_answer(); - }); - } - - for (size_t i = 0; i < count; i++) { - /* associate this simcall to the the synchro */ - auto* exec = execs[i]; - exec->simcalls_.push_back(simcall); - - /* see if the synchro is already finished */ - if (exec->state_ != simgrid::kernel::activity::State::WAITING && - exec->state_ != simgrid::kernel::activity::State::RUNNING) { - exec->finish(); - break; - } - } -} - namespace simgrid { namespace kernel { namespace activity { ExecImpl::ExecImpl() { + piface_ = new s4u::Exec(this); actor::ActorImpl* self = actor::ActorImpl::self(); if (self) { actor_ = self; @@ -78,7 +46,7 @@ ExecImpl& ExecImpl::set_hosts(const std::vector& hosts) ExecImpl& ExecImpl::set_timeout(double timeout) { - if (timeout > 0 && not MC_is_active() && not MC_record_replay_is_active()) { + if (timeout >= 0 && not MC_is_active() && not MC_record_replay_is_active()) { timeout_detector_.reset(hosts_.front()->pimpl_cpu->sleep(timeout)); timeout_detector_->set_activity(this); } @@ -113,10 +81,14 @@ ExecImpl* ExecImpl::start() surf_action_->set_sharing_penalty(sharing_penalty_); surf_action_->set_category(get_tracing_category()); - if (bound_ > 0) + if (bound_ > 0) { surf_action_->set_bound(bound_); + surf_action_->set_user_bound(bound_); + } } else { - surf_action_ = surf_host_model->execute_parallel(hosts_, flops_amounts_.data(), bytes_amounts_.data(), -1); + // FIXME[donassolo]: verify if all hosts belongs to the same netZone? + auto host_model = hosts_.front()->get_netpoint()->get_englobing_zone()->get_host_model(); + surf_action_ = host_model->execute_parallel(hosts_, flops_amounts_.data(), bytes_amounts_.data(), -1); } surf_action_->set_activity(this); } @@ -150,19 +122,27 @@ ExecImpl& ExecImpl::set_sharing_penalty(double sharing_penalty) void ExecImpl::post() { - if (hosts_.size() == 1 && not hosts_.front()->is_on()) { /* FIXME: handle resource failure for parallel tasks too */ - /* If the host running the synchro failed, notice it. This way, the asking + xbt_assert(surf_action_ != nullptr); + if (std::any_of(hosts_.begin(), hosts_.end(), [](const s4u::Host* host) { return not host->is_on(); })) { + /* If one of the hosts running the synchro failed, notice it. This way, the asking * process can be killed if it runs on that host itself */ state_ = State::FAILED; - } else if (surf_action_ && surf_action_->get_state() == resource::Action::State::FAILED) { - /* If the host running the synchro didn't fail, then the synchro was canceled */ + } else if (surf_action_->get_state() == resource::Action::State::FAILED) { + /* If all the hosts are running the synchro didn't fail, then the synchro was canceled */ state_ = State::CANCELED; } else if (timeout_detector_ && timeout_detector_->get_state() == resource::Action::State::FINISHED) { - state_ = State::TIMEOUT; + if (surf_action_->get_remains() > 0.0) { + surf_action_->set_state(resource::Action::State::FAILED); + state_ = State::TIMEOUT; + } else { + state_ = State::DONE; + } } else { state_ = State::DONE; } + get_iface()->set_finish_time(surf_action_->get_finish_time()); + clean_action(); timeout_detector_.reset(); if (actor_) { @@ -184,17 +164,12 @@ void ExecImpl::finish() * simcall */ if (simcall->call_ == simix::Simcall::NONE) // FIXME: maybe a better way to handle this case - continue; // if process handling comm is killed - if (simcall->call_ == simix::Simcall::EXECUTION_WAITANY_FOR) { - simgrid::kernel::activity::ExecImpl** execs = simcall_execution_waitany_for__get__execs(simcall); - size_t count = simcall_execution_waitany_for__get__count(simcall); - - for (size_t i = 0; i < count; i++) { - // Remove the first occurrence of simcall: - auto* exec = execs[i]; - auto j = boost::range::find(exec->simcalls_, simcall); - if (j != exec->simcalls_.end()) - exec->simcalls_.erase(j); + continue; // if process handling comm is killed + if (auto* observer = dynamic_cast(simcall->observer_)) { // simcall is a wait_any? + const auto* execs = observer->get_execs(); + + for (auto* exec : *execs) { + exec->unregister_simcall(simcall); if (simcall->timeout_cb_) { simcall->timeout_cb_->remove(); @@ -203,12 +178,11 @@ void ExecImpl::finish() } if (not MC_is_active() && not MC_record_replay_is_active()) { - ExecImpl** element = std::find(execs, execs + count, this); - int rank = (element != execs + count) ? element - execs : -1; - simcall_execution_waitany_for__set__result(simcall, rank); + auto element = std::find(execs->begin(), execs->end(), this); + int rank = element != execs->end() ? static_cast(std::distance(execs->begin(), element)) : -1; + observer->set_result(rank); } } - switch (state_) { case State::DONE: /* do nothing, synchro done */ @@ -256,9 +230,7 @@ ActivityImpl* ExecImpl::migrate(s4u::Host* to) new_action->set_remains(old_action->get_remains()); new_action->set_activity(this); new_action->set_sharing_penalty(old_action->get_sharing_penalty()); - - // FIXME: the user-defined bound seem to not be kept by LMM, that seem to overwrite it for the multi-core modeling. - // I hope that the user did not provide any. + new_action->set_user_bound(old_action->get_user_bound()); old_action->set_activity(nullptr); old_action->cancel(); @@ -270,6 +242,33 @@ ActivityImpl* ExecImpl::migrate(s4u::Host* to) return this; } +void ExecImpl::wait_any_for(actor::ActorImpl* issuer, const std::vector* execs, double timeout) +{ + if (timeout < 0.0) { + issuer->simcall_.timeout_cb_ = nullptr; + } else { + issuer->simcall_.timeout_cb_ = simgrid::simix::Timer::set(SIMIX_get_clock() + timeout, [issuer, execs]() { + issuer->simcall_.timeout_cb_ = nullptr; + for (auto* exec : *execs) + exec->unregister_simcall(&issuer->simcall_); + // default result (-1) is set in mc::ExecutionWaitanySimcall + issuer->simcall_answer(); + }); + } + + for (auto* exec : *execs) { + /* associate this simcall to the the synchro */ + exec->simcalls_.push_back(&issuer->simcall_); + + /* see if the synchro is already finished */ + if (exec->state_ != simgrid::kernel::activity::State::WAITING && + exec->state_ != simgrid::kernel::activity::State::RUNNING) { + exec->finish(); + break; + } + } +} + /************* * Callbacks * *************/