1 /* Copyright (c) 2009-2019. The SimGrid Team. All rights reserved. */
3 /* This program is free software; you can redistribute it and/or modify it
4 * under the terms of the license (GNU LGPL) which comes with this package. */
7 #include "simgrid/Exception.hpp"
8 #include "src/kernel/activity/MailboxImpl.hpp"
9 #include "src/mc/mc_replay.hpp"
10 #include "src/simix/smx_private.hpp"
11 #include "src/surf/cpu_interface.hpp"
12 #include "src/surf/network_interface.hpp"
14 #include <boost/range/algorithm.hpp>
16 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(simix_network, simix, "SIMIX network-related synchronization");
18 static void SIMIX_waitany_remove_simcall_from_actions(smx_simcall_t simcall);
20 /******************************************************************************/
21 /* Communication synchros */
22 /******************************************************************************/
23 XBT_PRIVATE void simcall_HANDLER_comm_send(smx_simcall_t simcall, smx_actor_t src, smx_mailbox_t mbox, double task_size,
24 double rate, void* src_buff, size_t src_buff_size,
25 int (*match_fun)(void*, void*, simgrid::kernel::activity::CommImpl*),
26 void (*copy_data_fun)(smx_activity_t, void*, size_t), void* data,
29 smx_activity_t comm = simcall_HANDLER_comm_isend(simcall, src, mbox, task_size, rate,
30 src_buff, src_buff_size, match_fun, nullptr, copy_data_fun,
32 SIMCALL_SET_MC_VALUE(simcall, 0);
33 simcall_HANDLER_comm_wait(simcall, comm, timeout);
36 XBT_PRIVATE smx_activity_t simcall_HANDLER_comm_isend(
37 smx_simcall_t /*simcall*/, smx_actor_t src_proc, smx_mailbox_t mbox, double task_size, double rate, void* src_buff,
38 size_t src_buff_size, int (*match_fun)(void*, void*, simgrid::kernel::activity::CommImpl*),
39 void (*clean_fun)(void*), // used to free the synchro in case of problem after a detached send
40 void (*copy_data_fun)(smx_activity_t, void*, size_t), // used to copy data if not default one
41 void* data, int detached)
43 XBT_DEBUG("send from mailbox %p", mbox);
45 /* Prepare a synchro describing us, so that it gets passed to the user-provided filter of other side */
46 simgrid::kernel::activity::CommImplPtr this_comm = simgrid::kernel::activity::CommImplPtr(
47 new simgrid::kernel::activity::CommImpl(simgrid::kernel::activity::CommImpl::Type::SEND));
49 /* Look for communication synchro matching our needs. We also provide a description of
50 * ourself so that the other side also gets a chance of choosing if it wants to match with us.
52 * If it is not found then push our communication into the rendez-vous point */
53 simgrid::kernel::activity::CommImplPtr other_comm =
54 mbox->find_matching_comm(simgrid::kernel::activity::CommImpl::Type::RECEIVE, match_fun, data, this_comm,
55 /*done*/ false, /*remove_matching*/ true);
58 other_comm = std::move(this_comm);
60 if (mbox->permanent_receiver_ != nullptr) {
61 //this mailbox is for small messages, which have to be sent right now
62 other_comm->state_ = SIMIX_READY;
63 other_comm->dst_actor_ = mbox->permanent_receiver_.get();
64 mbox->done_comm_queue_.push_back(other_comm);
65 XBT_DEBUG("pushing a message into the permanent receive list %p, comm %p", mbox, other_comm.get());
68 mbox->push(other_comm);
71 XBT_DEBUG("Receive already pushed");
73 other_comm->state_ = SIMIX_READY;
74 other_comm->type = simgrid::kernel::activity::CommImpl::Type::READY;
76 src_proc->comms.push_back(other_comm);
79 other_comm->detached = true;
80 other_comm->clean_fun = clean_fun;
82 other_comm->clean_fun = nullptr;
85 /* Setup the communication synchro */
86 other_comm->src_actor_ = src_proc;
87 other_comm->task_size_ = task_size;
88 other_comm->rate_ = rate;
89 other_comm->src_buff_ = src_buff;
90 other_comm->src_buff_size_ = src_buff_size;
91 other_comm->src_data_ = data;
93 other_comm->match_fun = match_fun;
94 other_comm->copy_data_fun = copy_data_fun;
97 if (MC_is_active() || MC_record_replay_is_active()) {
98 other_comm->state_ = SIMIX_RUNNING;
99 return (detached ? nullptr : other_comm);
104 return (detached ? nullptr : other_comm);
107 XBT_PRIVATE void simcall_HANDLER_comm_recv(smx_simcall_t simcall, smx_actor_t receiver, smx_mailbox_t mbox,
108 void* dst_buff, size_t* dst_buff_size,
109 int (*match_fun)(void*, void*, simgrid::kernel::activity::CommImpl*),
110 void (*copy_data_fun)(smx_activity_t, void*, size_t), void* data,
111 double timeout, double rate)
113 smx_activity_t comm = simcall_HANDLER_comm_irecv(simcall, receiver, mbox, dst_buff, dst_buff_size, match_fun,
114 copy_data_fun, data, rate);
115 SIMCALL_SET_MC_VALUE(simcall, 0);
116 simcall_HANDLER_comm_wait(simcall, comm, timeout);
119 XBT_PRIVATE smx_activity_t simcall_HANDLER_comm_irecv(smx_simcall_t /*simcall*/, smx_actor_t receiver,
120 smx_mailbox_t mbox, void* dst_buff, size_t* dst_buff_size,
121 simix_match_func_t match_fun,
122 void (*copy_data_fun)(smx_activity_t, void*, size_t), void* data,
125 simgrid::kernel::activity::CommImplPtr this_synchro = simgrid::kernel::activity::CommImplPtr(
126 new simgrid::kernel::activity::CommImpl(simgrid::kernel::activity::CommImpl::Type::RECEIVE));
127 XBT_DEBUG("recv from mbox %p. this_synchro=%p", mbox, this_synchro.get());
129 simgrid::kernel::activity::CommImplPtr other_comm;
130 //communication already done, get it inside the list of completed comms
131 if (mbox->permanent_receiver_ != nullptr && not mbox->done_comm_queue_.empty()) {
133 XBT_DEBUG("We have a comm that has probably already been received, trying to match it, to skip the communication");
134 //find a match in the list of already received comms
135 other_comm = mbox->find_matching_comm(simgrid::kernel::activity::CommImpl::Type::SEND, match_fun, data,
136 this_synchro, /*done*/ true,
137 /*remove_matching*/ true);
138 //if not found, assume the receiver came first, register it to the mailbox in the classical way
139 if (not other_comm) {
140 XBT_DEBUG("We have messages in the permanent receive list, but not the one we are looking for, pushing request into list");
141 other_comm = std::move(this_synchro);
142 mbox->push(other_comm);
144 if (other_comm->surf_action_ && other_comm->remains() < 1e-12) {
145 XBT_DEBUG("comm %p has been already sent, and is finished, destroy it", other_comm.get());
146 other_comm->state_ = SIMIX_DONE;
147 other_comm->type = simgrid::kernel::activity::CommImpl::Type::DONE;
148 other_comm->mbox = nullptr;
152 /* Prepare a comm describing us, so that it gets passed to the user-provided filter of other side */
154 /* Look for communication activity matching our needs. We also provide a description of
155 * ourself so that the other side also gets a chance of choosing if it wants to match with us.
157 * If it is not found then push our communication into the rendez-vous point */
158 other_comm = mbox->find_matching_comm(simgrid::kernel::activity::CommImpl::Type::SEND, match_fun, data,
159 this_synchro, /*done*/ false,
160 /*remove_matching*/ true);
162 if (other_comm == nullptr) {
163 XBT_DEBUG("Receive pushed first (%zu comm enqueued so far)", mbox->comm_queue_.size());
164 other_comm = std::move(this_synchro);
165 mbox->push(other_comm);
167 XBT_DEBUG("Match my %p with the existing %p", this_synchro.get(), other_comm.get());
169 other_comm->state_ = SIMIX_READY;
170 other_comm->type = simgrid::kernel::activity::CommImpl::Type::READY;
172 receiver->comms.push_back(other_comm);
175 /* Setup communication synchro */
176 other_comm->dst_actor_ = receiver;
177 other_comm->dst_buff_ = dst_buff;
178 other_comm->dst_buff_size_ = dst_buff_size;
179 other_comm->dst_data_ = data;
181 if (rate > -1.0 && (other_comm->rate_ < 0.0 || rate < other_comm->rate_))
182 other_comm->rate_ = rate;
184 other_comm->match_fun = match_fun;
185 other_comm->copy_data_fun = copy_data_fun;
187 if (MC_is_active() || MC_record_replay_is_active()) {
188 other_comm->state_ = SIMIX_RUNNING;
195 void simcall_HANDLER_comm_wait(smx_simcall_t simcall, smx_activity_t synchro, double timeout)
197 /* Associate this simcall to the wait synchro */
198 XBT_DEBUG("simcall_HANDLER_comm_wait, %p", synchro.get());
200 synchro->simcalls_.push_back(simcall);
201 simcall->issuer->waiting_synchro = synchro;
203 if (MC_is_active() || MC_record_replay_is_active()) {
204 int idx = SIMCALL_GET_MC_VALUE(simcall);
206 synchro->state_ = SIMIX_DONE;
208 /* If we reached this point, the wait simcall must have a timeout */
209 /* Otherwise it shouldn't be enabled and executed by the MC */
213 simgrid::kernel::activity::CommImplPtr comm =
214 boost::static_pointer_cast<simgrid::kernel::activity::CommImpl>(synchro);
215 if (comm->src_actor_ == simcall->issuer)
216 comm->state_ = SIMIX_SRC_TIMEOUT;
218 comm->state_ = SIMIX_DST_TIMEOUT;
221 SIMIX_comm_finish(synchro);
225 /* If the synchro has already finish perform the error handling, */
226 /* otherwise set up a waiting timeout on the right side */
227 if (synchro->state_ != SIMIX_WAITING && synchro->state_ != SIMIX_RUNNING) {
228 SIMIX_comm_finish(synchro);
229 } else { /* we need a sleep action (even when there is no timeout) to be notified of host failures */
230 simgrid::kernel::resource::Action* sleep = simcall->issuer->get_host()->pimpl_cpu->sleep(timeout);
231 sleep->set_data(synchro.get());
233 simgrid::kernel::activity::CommImplPtr comm =
234 boost::static_pointer_cast<simgrid::kernel::activity::CommImpl>(synchro);
235 if (simcall->issuer == comm->src_actor_)
236 comm->src_timeout_ = sleep;
238 comm->dst_timeout_ = sleep;
242 void simcall_HANDLER_comm_test(smx_simcall_t simcall, smx_activity_t synchro)
244 simgrid::kernel::activity::CommImplPtr comm =
245 boost::static_pointer_cast<simgrid::kernel::activity::CommImpl>(synchro);
249 if (MC_is_active() || MC_record_replay_is_active()){
250 res = comm->src_actor_ && comm->dst_actor_;
252 synchro->state_ = SIMIX_DONE;
254 res = synchro->state_ != SIMIX_WAITING && synchro->state_ != SIMIX_RUNNING;
257 simcall_comm_test__set__result(simcall, res);
258 if (simcall_comm_test__get__result(simcall)) {
259 synchro->simcalls_.push_back(simcall);
260 SIMIX_comm_finish(synchro);
262 SIMIX_simcall_answer(simcall);
266 void simcall_HANDLER_comm_testany(smx_simcall_t simcall, simgrid::kernel::activity::ActivityImplPtr comms[],
269 // The default result is -1 -- this means, "nothing is ready".
270 // It can be changed below, but only if something matches.
271 simcall_comm_testany__set__result(simcall, -1);
273 if (MC_is_active() || MC_record_replay_is_active()){
274 int idx = SIMCALL_GET_MC_VALUE(simcall);
276 SIMIX_simcall_answer(simcall);
278 simgrid::kernel::activity::ActivityImplPtr synchro = comms[idx];
279 simcall_comm_testany__set__result(simcall, idx);
280 synchro->simcalls_.push_back(simcall);
281 synchro->state_ = SIMIX_DONE;
282 SIMIX_comm_finish(synchro);
287 for (std::size_t i = 0; i != count; ++i) {
288 simgrid::kernel::activity::ActivityImplPtr synchro = comms[i];
289 if (synchro->state_ != SIMIX_WAITING && synchro->state_ != SIMIX_RUNNING) {
290 simcall_comm_testany__set__result(simcall, i);
291 synchro->simcalls_.push_back(simcall);
292 SIMIX_comm_finish(synchro);
296 SIMIX_simcall_answer(simcall);
299 void simcall_HANDLER_comm_waitany(smx_simcall_t simcall, xbt_dynar_t synchros, double timeout)
301 if (MC_is_active() || MC_record_replay_is_active()){
303 xbt_die("Timeout not implemented for waitany in the model-checker");
304 int idx = SIMCALL_GET_MC_VALUE(simcall);
305 smx_activity_t synchro = xbt_dynar_get_as(synchros, idx, smx_activity_t);
306 synchro->simcalls_.push_back(simcall);
307 simcall_comm_waitany__set__result(simcall, idx);
308 synchro->state_ = SIMIX_DONE;
309 SIMIX_comm_finish(synchro);
314 simcall->timer = NULL;
316 simcall->timer = SIMIX_timer_set(SIMIX_get_clock() + timeout, [simcall]() {
317 SIMIX_waitany_remove_simcall_from_actions(simcall);
318 simcall_comm_waitany__set__result(simcall, -1);
319 SIMIX_simcall_answer(simcall);
324 simgrid::kernel::activity::ActivityImpl* ptr;
325 xbt_dynar_foreach(synchros, cursor, ptr){
326 smx_activity_t synchro = simgrid::kernel::activity::ActivityImplPtr(ptr);
327 /* associate this simcall to the the synchro */
328 synchro->simcalls_.push_back(simcall);
330 /* see if the synchro is already finished */
331 if (synchro->state_ != SIMIX_WAITING && synchro->state_ != SIMIX_RUNNING) {
332 SIMIX_comm_finish(synchro);
338 void SIMIX_waitany_remove_simcall_from_actions(smx_simcall_t simcall)
340 unsigned int cursor = 0;
341 xbt_dynar_t synchros = simcall_comm_waitany__get__comms(simcall);
343 simgrid::kernel::activity::ActivityImpl* ptr;
344 xbt_dynar_foreach(synchros, cursor, ptr){
345 smx_activity_t synchro = simgrid::kernel::activity::ActivityImplPtr(ptr);
347 // Remove the first occurence of simcall:
348 auto i = boost::range::find(synchro->simcalls_, simcall);
349 if (i != synchro->simcalls_.end())
350 synchro->simcalls_.erase(i);
355 * @brief Answers the SIMIX simcalls associated to a communication synchro.
356 * @param synchro a finished communication synchro
358 void SIMIX_comm_finish(smx_activity_t synchro)
360 simgrid::kernel::activity::CommImplPtr comm =
361 boost::static_pointer_cast<simgrid::kernel::activity::CommImpl>(synchro);
363 while (not synchro->simcalls_.empty()) {
364 smx_simcall_t simcall = synchro->simcalls_.front();
365 synchro->simcalls_.pop_front();
367 /* If a waitany simcall is waiting for this synchro to finish, then remove it from the other synchros in the waitany
368 * list. Afterwards, get the position of the actual synchro in the waitany dynar and return it as the result of the
371 if (simcall->call == SIMCALL_NONE) //FIXME: maybe a better way to handle this case
372 continue; // if process handling comm is killed
373 if (simcall->call == SIMCALL_COMM_WAITANY) {
374 SIMIX_waitany_remove_simcall_from_actions(simcall);
375 if (simcall->timer) {
376 SIMIX_timer_remove(simcall->timer);
377 simcall->timer = nullptr;
379 if (not MC_is_active() && not MC_record_replay_is_active())
380 simcall_comm_waitany__set__result(simcall,
381 xbt_dynar_search(simcall_comm_waitany__get__comms(simcall), &synchro));
384 /* If the synchro is still in a rendez-vous point then remove from it */
386 comm->mbox->remove(comm);
388 XBT_DEBUG("SIMIX_comm_finish: synchro state = %d", (int)synchro->state_);
390 /* Check out for errors */
392 if (not simcall->issuer->get_host()->is_on()) {
393 simcall->issuer->context_->iwannadie = true;
394 simcall->issuer->exception_ =
395 std::make_exception_ptr(simgrid::HostFailureException(XBT_THROW_POINT, "Host failed"));
397 switch (comm->state_) {
400 XBT_DEBUG("Communication %p complete!", synchro.get());
404 case SIMIX_SRC_TIMEOUT:
405 simcall->issuer->exception_ = std::make_exception_ptr(
406 simgrid::TimeoutError(XBT_THROW_POINT, "Communication timeouted because of the sender"));
409 case SIMIX_DST_TIMEOUT:
410 simcall->issuer->exception_ = std::make_exception_ptr(
411 simgrid::TimeoutError(XBT_THROW_POINT, "Communication timeouted because of the receiver"));
414 case SIMIX_SRC_HOST_FAILURE:
415 if (simcall->issuer == comm->src_actor_)
416 simcall->issuer->context_->iwannadie = true;
418 simcall->issuer->exception_ =
419 std::make_exception_ptr(simgrid::NetworkFailureException(XBT_THROW_POINT, "Remote peer failed"));
422 case SIMIX_DST_HOST_FAILURE:
423 if (simcall->issuer == comm->dst_actor_)
424 simcall->issuer->context_->iwannadie = true;
426 simcall->issuer->exception_ =
427 std::make_exception_ptr(simgrid::NetworkFailureException(XBT_THROW_POINT, "Remote peer failed"));
430 case SIMIX_LINK_FAILURE:
431 XBT_DEBUG("Link failure in synchro %p between '%s' and '%s': posting an exception to the issuer: %s (%p) "
433 synchro.get(), comm->src_actor_ ? comm->src_actor_->get_host()->get_cname() : nullptr,
434 comm->dst_actor_ ? comm->dst_actor_->get_host()->get_cname() : nullptr,
435 simcall->issuer->get_cname(), simcall->issuer, comm->detached);
436 if (comm->src_actor_ == simcall->issuer) {
437 XBT_DEBUG("I'm source");
438 } else if (comm->dst_actor_ == simcall->issuer) {
439 XBT_DEBUG("I'm dest");
441 XBT_DEBUG("I'm neither source nor dest");
443 simcall->issuer->throw_exception(
444 std::make_exception_ptr(simgrid::NetworkFailureException(XBT_THROW_POINT, "Link failure")));
448 if (simcall->issuer == comm->dst_actor_)
449 simcall->issuer->exception_ = std::make_exception_ptr(
450 simgrid::CancelException(XBT_THROW_POINT, "Communication canceled by the sender"));
452 simcall->issuer->exception_ = std::make_exception_ptr(
453 simgrid::CancelException(XBT_THROW_POINT, "Communication canceled by the receiver"));
457 xbt_die("Unexpected synchro state in SIMIX_comm_finish: %d", (int)synchro->state_);
461 /* if there is an exception during a waitany or a testany, indicate the position of the failed communication */
462 if (simcall->issuer->exception_ &&
463 (simcall->call == SIMCALL_COMM_WAITANY || simcall->call == SIMCALL_COMM_TESTANY)) {
464 // First retrieve the rank of our failing synchro
466 if (simcall->call == SIMCALL_COMM_WAITANY) {
467 rank = xbt_dynar_search(simcall_comm_waitany__get__comms(simcall), &synchro);
468 } else if (simcall->call == SIMCALL_COMM_TESTANY) {
470 auto* comms = simcall_comm_testany__get__comms(simcall);
471 auto count = simcall_comm_testany__get__count(simcall);
472 auto element = std::find(comms, comms + count, synchro);
473 if (element == comms + count)
476 rank = element - comms;
479 // In order to modify the exception we have to rethrow it:
481 std::rethrow_exception(simcall->issuer->exception_);
482 } catch (simgrid::TimeoutError& e) {
484 simcall->issuer->exception_ = std::make_exception_ptr(e);
485 } catch (simgrid::NetworkFailureException& e) {
487 simcall->issuer->exception_ = std::make_exception_ptr(e);
488 } catch (simgrid::CancelException& e) {
490 simcall->issuer->exception_ = std::make_exception_ptr(e);
494 simcall->issuer->waiting_synchro = nullptr;
495 simcall->issuer->comms.remove(synchro);
497 if (simcall->issuer == comm->src_actor_) {
498 if (comm->dst_actor_)
499 comm->dst_actor_->comms.remove(synchro);
500 } else if (simcall->issuer == comm->dst_actor_) {
501 if (comm->src_actor_)
502 comm->src_actor_->comms.remove(synchro);
505 comm->dst_actor_->comms.remove(synchro);
506 comm->src_actor_->comms.remove(synchro);
510 if (simcall->issuer->get_host()->is_on())
511 SIMIX_simcall_answer(simcall);
513 simcall->issuer->context_->iwannadie = true;
517 void SIMIX_comm_copy_buffer_callback(smx_activity_t synchro, void* buff, size_t buff_size)
519 simgrid::kernel::activity::CommImplPtr comm =
520 boost::static_pointer_cast<simgrid::kernel::activity::CommImpl>(synchro);
522 XBT_DEBUG("Copy the data over");
523 memcpy(comm->dst_buff_, buff, buff_size);
524 if (comm->detached) { // if this is a detached send, the source buffer was duplicated by SMPI sender to make the original buffer available to the application ASAP
526 comm->src_buff_ = nullptr;