X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/eb08fb3c624342be6f198555c0ebb0d847c65d65..19aa33ec87e87e1ed8f4742cc4dd60aedb837892:/src/kernel/activity/ExecImpl.cpp diff --git a/src/kernel/activity/ExecImpl.cpp b/src/kernel/activity/ExecImpl.cpp index 76b136ff85..0f925c9600 100644 --- a/src/kernel/activity/ExecImpl.cpp +++ b/src/kernel/activity/ExecImpl.cpp @@ -1,11 +1,13 @@ -/* Copyright (c) 2007-2019. The SimGrid Team. All rights reserved. */ +/* Copyright (c) 2007-2021. The SimGrid Team. All rights reserved. */ /* This program is free software; you can redistribute it and/or modify it * under the terms of the license (GNU LGPL) which comes with this package. */ #include "src/kernel/activity/ExecImpl.hpp" #include "simgrid/Exception.hpp" +#include "simgrid/kernel/routing/NetPoint.hpp" #include "simgrid/modelchecker.h" +#include "simgrid/s4u/Exec.hpp" #include "src/mc/mc_replay.hpp" #include "src/surf/HostImpl.hpp" #include "src/surf/cpu_interface.hpp" @@ -17,51 +19,6 @@ XBT_LOG_EXTERNAL_DEFAULT_CATEGORY(simix_process); -void simcall_HANDLER_execution_wait(smx_simcall_t simcall, simgrid::kernel::activity::ExecImpl* synchro, double timeout) -{ - XBT_DEBUG("Wait for execution of synchro %p, state %d", synchro, (int)synchro->state_); - xbt_assert(std::isfinite(timeout), "timeout is not finite!"); - - /* Associate this simcall to the synchro */ - synchro->register_simcall(simcall); - - /* set surf's synchro */ - if (MC_is_active() || MC_record_replay_is_active()) { - int idx = SIMCALL_GET_MC_VALUE(*simcall); - if (idx == 0) { - synchro->state_ = simgrid::kernel::activity::State::DONE; - } else { - /* If we reached this point, the wait simcall must have a timeout */ - /* Otherwise it shouldn't be enabled and executed by the MC */ - if (timeout < 0.0) - THROW_IMPOSSIBLE; - synchro->state_ = simgrid::kernel::activity::State::TIMEOUT; - } - synchro->finish(); - return; - } - - /* If the synchro is already finished then perform the error handling */ - if (synchro->state_ != simgrid::kernel::activity::State::RUNNING) { - synchro->finish(); - } else { /* we need a sleep action (even when there is no timeout) to be notified of host failures */ - synchro->set_timeout(timeout); - } -} - -void simcall_HANDLER_execution_test(smx_simcall_t simcall, simgrid::kernel::activity::ExecImpl* synchro) -{ - bool res = (synchro->state_ != simgrid::kernel::activity::State::WAITING && - synchro->state_ != simgrid::kernel::activity::State::RUNNING); - if (res) { - synchro->simcalls_.push_back(simcall); - synchro->finish(); - } else { - simcall->issuer_->simcall_answer(); - } - simcall_execution_test__set__result(simcall, res); -} - void simcall_HANDLER_execution_waitany_for(smx_simcall_t simcall, simgrid::kernel::activity::ExecImpl* execs[], size_t count, double timeout) { @@ -69,6 +26,7 @@ void simcall_HANDLER_execution_waitany_for(smx_simcall_t simcall, simgrid::kerne simcall->timeout_cb_ = nullptr; } else { simcall->timeout_cb_ = simgrid::simix::Timer::set(SIMIX_get_clock() + timeout, [simcall, execs, count]() { + simcall->timeout_cb_ = nullptr; for (size_t i = 0; i < count; i++) { // Remove the first occurrence of simcall: auto* exec = execs[i]; @@ -99,18 +57,19 @@ namespace simgrid { namespace kernel { namespace activity { -ExecImpl::~ExecImpl() +ExecImpl::ExecImpl() { - if (timeout_detector_) - timeout_detector_->unref(); - XBT_DEBUG("Destroy exec %p", this); + piface_ = new s4u::Exec(this); + actor::ActorImpl* self = actor::ActorImpl::self(); + if (self) { + actor_ = self; + self->activities_.emplace_back(this); + } } ExecImpl& ExecImpl::set_host(s4u::Host* host) { - if (not hosts_.empty()) - hosts_.clear(); - hosts_.push_back(host); + hosts_.assign(1, host); return *this; } @@ -122,8 +81,8 @@ ExecImpl& ExecImpl::set_hosts(const std::vector& hosts) ExecImpl& ExecImpl::set_timeout(double timeout) { - if (timeout > 0 && not MC_is_active() && not MC_record_replay_is_active()) { - timeout_detector_ = hosts_.front()->pimpl_cpu->sleep(timeout); + if (timeout >= 0 && not MC_is_active() && not MC_record_replay_is_active()) { + timeout_detector_.reset(hosts_.front()->pimpl_cpu->sleep(timeout)); timeout_detector_->set_activity(this); } return *this; @@ -131,9 +90,7 @@ ExecImpl& ExecImpl::set_timeout(double timeout) ExecImpl& ExecImpl::set_flops_amount(double flops_amount) { - if (not flops_amounts_.empty()) - flops_amounts_.clear(); - flops_amounts_.push_back(flops_amount); + flops_amounts_.assign(1, flops_amount); return *this; } @@ -159,10 +116,14 @@ ExecImpl* ExecImpl::start() surf_action_->set_sharing_penalty(sharing_penalty_); surf_action_->set_category(get_tracing_category()); - if (bound_ > 0) + if (bound_ > 0) { surf_action_->set_bound(bound_); + surf_action_->set_user_bound(bound_); + } } else { - surf_action_ = surf_host_model->execute_parallel(hosts_, flops_amounts_.data(), bytes_amounts_.data(), -1); + // FIXME[donassolo]: verify if all hosts belongs to the same netZone? + auto host_model = hosts_.front()->get_netpoint()->get_englobing_zone()->get_host_model(); + surf_action_ = host_model->execute_parallel(hosts_, flops_amounts_.data(), bytes_amounts_.data(), -1); } surf_action_->set_activity(this); } @@ -196,26 +157,33 @@ ExecImpl& ExecImpl::set_sharing_penalty(double sharing_penalty) void ExecImpl::post() { - if (hosts_.size() == 1 && not hosts_.front()->is_on()) { /* FIXME: handle resource failure for parallel tasks too */ - /* If the host running the synchro failed, notice it. This way, the asking + xbt_assert(surf_action_ != nullptr); + if (std::any_of(hosts_.begin(), hosts_.end(), [](const s4u::Host* host) { return not host->is_on(); })) { + /* If one of the hosts running the synchro failed, notice it. This way, the asking * process can be killed if it runs on that host itself */ state_ = State::FAILED; - } else if (surf_action_ && surf_action_->get_state() == resource::Action::State::FAILED) { - /* If the host running the synchro didn't fail, then the synchro was canceled */ + } else if (surf_action_->get_state() == resource::Action::State::FAILED) { + /* If all the hosts are running the synchro didn't fail, then the synchro was canceled */ state_ = State::CANCELED; } else if (timeout_detector_ && timeout_detector_->get_state() == resource::Action::State::FINISHED) { - state_ = State::TIMEOUT; + if (surf_action_->get_remains() > 0.0) { + surf_action_->set_state(resource::Action::State::FAILED); + state_ = State::TIMEOUT; + } else { + state_ = State::DONE; + } } else { state_ = State::DONE; } - clean_action(); + get_iface()->set_finish_time(surf_action_->get_finish_time()); - if (timeout_detector_) { - timeout_detector_->unref(); - timeout_detector_ = nullptr; + clean_action(); + timeout_detector_.reset(); + if (actor_) { + actor_->activities_.remove(this); + actor_ = nullptr; } - /* Answer all simcalls associated with the synchro */ finish(); } @@ -230,9 +198,9 @@ void ExecImpl::finish() * list. Afterwards, get the position of the actual synchro in the waitany list and return it as the result of the * simcall */ - if (simcall->call_ == SIMCALL_NONE) // FIXME: maybe a better way to handle this case - continue; // if process handling comm is killed - if (simcall->call_ == SIMCALL_EXECUTION_WAITANY_FOR) { + if (simcall->call_ == simix::Simcall::NONE) // FIXME: maybe a better way to handle this case + continue; // if process handling comm is killed + if (simcall->call_ == simix::Simcall::EXECUTION_WAITANY_FOR) { simgrid::kernel::activity::ExecImpl** execs = simcall_execution_waitany_for__get__execs(simcall); size_t count = simcall_execution_waitany_for__get__count(simcall); @@ -255,9 +223,7 @@ void ExecImpl::finish() simcall_execution_waitany_for__set__result(simcall, rank); } } - switch (state_) { - case State::DONE: /* do nothing, synchro done */ XBT_DEBUG("ExecImpl::finish(): execution successful"); @@ -265,7 +231,7 @@ void ExecImpl::finish() case State::FAILED: XBT_DEBUG("ExecImpl::finish(): host '%s' failed", simcall->issuer_->get_host()->get_cname()); - simcall->issuer_->context_->iwannadie = true; + simcall->issuer_->context_->set_wannadie(); if (simcall->issuer_->get_host()->is_on()) simcall->issuer_->exception_ = std::make_exception_ptr(simgrid::HostFailureException(XBT_THROW_POINT, "Host failed")); @@ -287,12 +253,12 @@ void ExecImpl::finish() xbt_die("Internal error in ExecImpl::finish(): unexpected synchro state %d", static_cast(state_)); } - simcall->issuer_->waiting_synchro = nullptr; + simcall->issuer_->waiting_synchro_ = nullptr; /* Fail the process if the host is down */ if (simcall->issuer_->get_host()->is_on()) simcall->issuer_->simcall_answer(); else - simcall->issuer_->context_->iwannadie = true; + simcall->issuer_->context_->set_wannadie(); } } @@ -304,9 +270,7 @@ ActivityImpl* ExecImpl::migrate(s4u::Host* to) new_action->set_remains(old_action->get_remains()); new_action->set_activity(this); new_action->set_sharing_penalty(old_action->get_sharing_penalty()); - - // FIXME: the user-defined bound seem to not be kept by LMM, that seem to overwrite it for the multi-core modeling. - // I hope that the user did not provide any. + new_action->set_user_bound(old_action->get_user_bound()); old_action->set_activity(nullptr); old_action->cancel();