A
lgorithmique
N
umérique
D
istribuée
Public GIT Repository
projects
/
simgrid.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
| inline |
side by side
Further simplify the way host failures are detected
[simgrid.git]
/
src
/
kernel
/
activity
/
CommImpl.cpp
diff --git
a/src/kernel/activity/CommImpl.cpp
b/src/kernel/activity/CommImpl.cpp
index
27055bc
..
4c1deb7
100644
(file)
--- a/
src/kernel/activity/CommImpl.cpp
+++ b/
src/kernel/activity/CommImpl.cpp
@@
-96,7
+96,7
@@
CommImpl::~CommImpl()
{
XBT_DEBUG("Really free communication %p in state %s (detached = %d)", this, get_state_str(), detached_);
- clean
up_surf
();
+ clean
_action
();
if (detached_ && get_state() != State::DONE) {
/* the communication has failed and was detached:
@@
-121,25
+121,25
@@
CommImpl* CommImpl::start()
/* Getting the network_model from the origin host
* Valid while we have a single network model, otherwise we would need to change this function to first get the
- * routes and later create the respective
surf
actions */
+ * routes and later create the respective
model
actions */
auto net_model = from_->get_netpoint()->get_englobing_zone()->get_network_model();
-
surf
_action_ = net_model->communicate(from_, to_, size_, rate_, false);
-
surf
_action_->set_activity(this);
-
surf
_action_->set_category(get_tracing_category());
- set_start_time(
surf
_action_->get_start_time());
+
model
_action_ = net_model->communicate(from_, to_, size_, rate_, false);
+
model
_action_->set_activity(this);
+
model
_action_->set_category(get_tracing_category());
+ set_start_time(
model
_action_->get_start_time());
set_state(State::RUNNING);
on_start(*this);
- XBT_DEBUG("Starting communication %p from '%s' to '%s' (
surf_
action: %p; state: %s)", this, from_->get_cname(),
- to_->get_cname(),
surf
_action_, get_state_str());
+ XBT_DEBUG("Starting communication %p from '%s' to '%s' (
model
action: %p; state: %s)", this, from_->get_cname(),
+ to_->get_cname(),
model
_action_, get_state_str());
/* If a link is failed, detect it immediately */
- if (
surf
_action_->get_state() == resource::Action::State::FAILED) {
+ if (
model
_action_->get_state() == resource::Action::State::FAILED) {
XBT_DEBUG("Communication from '%s' to '%s' failed to start because of a link failure", from_->get_cname(),
to_->get_cname());
set_state(State::LINK_FAILURE);
-
post
();
+
finish
();
} else if ((src_actor_ != nullptr && src_actor_->is_suspended()) ||
(dst_actor_ != nullptr && dst_actor_->is_suspended())) {
@@
-154,7
+154,7
@@
CommImpl* CommImpl::start()
"communication",
dst_actor_->get_cname(), dst_actor_->get_host()->get_cname());
-
surf
_action_->suspend();
+
model
_action_->suspend();
}
}
@@
-282,7
+282,7
@@
ActivityImplPtr CommImpl::irecv(actor::CommIrecvSimcall* observer)
// find a match in the list of already received comms
other_comm = mbox->find_matching_comm(CommImplType::SEND, observer->get_match_fun(), observer->get_payload(),
this_synchro, /*done*/ true, /*remove_matching*/ true);
- if (other_comm
&& other_comm->surf
_action_ && other_comm->get_remaining() < 1e-12) {
+ if (other_comm
&& other_comm->model
_action_ && other_comm->get_remaining() < 1e-12) {
XBT_DEBUG("comm %p has been already sent, and is finished, destroy it", other_comm.get());
other_comm->set_state(State::DONE);
other_comm->set_mailbox(nullptr);
@@
-370,16
+370,16
@@
void CommImpl::wait_any_for(actor::ActorImpl* issuer, const std::vector<CommImpl
void CommImpl::suspend()
{
/* FIXME: shall we suspend also the timeout synchro? */
- if (
surf
_action_)
-
surf
_action_->suspend();
+ if (
model
_action_)
+
model
_action_->suspend();
/* if not created yet, the action will be suspended on creation, in CommImpl::start() */
}
void CommImpl::resume()
{
/*FIXME: check what happen with the timeouts */
- if (
surf
_action_)
-
surf
_action_->resume();
+ if (
model
_action_)
+
model
_action_->resume();
/* in the other case, the synchro were not really suspended yet, see CommImpl::suspend() and CommImpl::start() */
}
@@
-391,58
+391,12
@@
void CommImpl::cancel()
mbox_->remove(this);
set_state(State::CANCELED);
}
- } else if (not MC_is_active() /* when running the MC there are no
surf
actions */
+ } else if (not MC_is_active() /* when running the MC there are no
model
actions */
&& not MC_record_replay_is_active() && (get_state() == State::READY || get_state() == State::RUNNING)) {
-
surf
_action_->cancel();
+
model
_action_->cancel();
}
}
-/** @brief This is part of the cleanup process, probably an internal command */
-void CommImpl::cleanup_surf()
-{
- clean_action();
-
- if (src_timeout_) {
- src_timeout_->unref();
- src_timeout_ = nullptr;
- }
-
- if (dst_timeout_) {
- dst_timeout_->unref();
- dst_timeout_ = nullptr;
- }
-}
-
-void CommImpl::post()
-{
- on_completion(*this);
-
- /* Update synchro state */
- if (src_timeout_ && src_timeout_->get_state() == resource::Action::State::FINISHED)
- set_state(State::SRC_TIMEOUT);
- else if (dst_timeout_ && dst_timeout_->get_state() == resource::Action::State::FINISHED)
- set_state(State::DST_TIMEOUT);
- else if ((from_ && not from_->is_on()) || (src_timeout_ && src_timeout_->get_state() == resource::Action::State::FAILED))
- set_state(State::SRC_HOST_FAILURE);
- else if ((to_ && not to_->is_on()) || (dst_timeout_ && dst_timeout_->get_state() == resource::Action::State::FAILED))
- set_state(State::DST_HOST_FAILURE);
- else if (surf_action_ && surf_action_->get_state() == resource::Action::State::FAILED) {
- set_state(State::LINK_FAILURE);
- } else if (get_state() == State::RUNNING) {
- xbt_assert(from_ && from_->is_on());
- xbt_assert(to_ && to_->is_on());
- set_state(State::DONE);
- }
-
- XBT_DEBUG("CommImpl::post(): comm %p, state %s, src_proc %p, dst_proc %p, detached: %d", this, get_state_str(),
- src_actor_.get(), dst_actor_.get(), detached_);
-
- /* destroy the surf actions associated with the Simix communication */
- cleanup_surf();
-
- /* Answer all simcalls associated with the synchro */
- finish();
-}
void CommImpl::set_exception(actor::ActorImpl* issuer)
{
switch (get_state()) {
@@
-510,7
+464,34
@@
void CommImpl::set_exception(actor::ActorImpl* issuer)
void CommImpl::finish()
{
- XBT_DEBUG("CommImpl::finish() in state %s", get_state_str());
+ XBT_DEBUG("CommImpl::finish() comm %p, state %s, src_proc %p, dst_proc %p, detached: %d", this, get_state_str(),
+ src_actor_.get(), dst_actor_.get(), detached_);
+
+ on_completion(*this);
+
+ /* Update synchro state */
+ if (src_timeout_ && src_timeout_->get_state() == resource::Action::State::FINISHED)
+ set_state(State::SRC_TIMEOUT);
+ else if (dst_timeout_ && dst_timeout_->get_state() == resource::Action::State::FINISHED)
+ set_state(State::DST_TIMEOUT);
+ else if ((from_ && not from_->is_on()) ||
+ (src_timeout_ && src_timeout_->get_state() == resource::Action::State::FAILED))
+ set_state(State::SRC_HOST_FAILURE);
+ else if ((to_ && not to_->is_on()) || (dst_timeout_ && dst_timeout_->get_state() == resource::Action::State::FAILED))
+ set_state(State::DST_HOST_FAILURE);
+ else if (model_action_ && model_action_->get_state() == resource::Action::State::FAILED) {
+ set_state(State::LINK_FAILURE);
+ } else if (get_state() == State::RUNNING) {
+ xbt_assert(from_ && from_->is_on());
+ xbt_assert(to_ && to_->is_on());
+ set_state(State::DONE);
+ }
+ src_timeout_ = nullptr;
+ dst_timeout_ = nullptr;
+
+ /* destroy the model actions associated with the communication activity */
+ clean_action();
+
/* If the synchro is still in a rendez-vous point then remove from it */
if (mbox_)
mbox_->remove(this);