1 /* Copyright (c) 2009-2016. The SimGrid Team. All rights reserved. */
3 /* This program is free software; you can redistribute it and/or modify it
4 * under the terms of the license (GNU LGPL) which comes with this package. */
6 #include "src/surf/surf_interface.hpp"
7 #include "src/simix/smx_private.h"
10 #include "src/mc/mc_replay.h"
12 #include "simgrid/s4u/mailbox.hpp"
14 #include "src/simix/SynchroComm.hpp"
16 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(simix_network, simix, "SIMIX network-related synchronization");
18 static void SIMIX_mbox_free(void *data);
19 static xbt_dict_t mailboxes = xbt_dict_new_homogeneous(SIMIX_mbox_free);
21 static void SIMIX_waitany_remove_simcall_from_actions(smx_simcall_t simcall);
22 static void SIMIX_comm_copy_data(smx_synchro_t comm);
23 static smx_synchro_t SIMIX_comm_new(e_smx_comm_type_t type);
24 static inline void SIMIX_mbox_push(smx_mailbox_t mbox, smx_synchro_t comm);
25 static smx_synchro_t _find_matching_comm(std::deque<smx_synchro_t> *deque, e_smx_comm_type_t type,
26 int (*match_fun)(void *, void *,smx_synchro_t), void *user_data, smx_synchro_t my_synchro, bool remove_matching);
27 static void SIMIX_comm_start(smx_synchro_t synchro);
29 void SIMIX_mailbox_exit(void)
31 xbt_dict_free(&mailboxes);
34 /******************************************************************************/
35 /* Rendez-Vous Points */
36 /******************************************************************************/
38 smx_mailbox_t SIMIX_mbox_create(const char *name)
40 xbt_assert(name, "Mailboxes must have a name");
41 /* two processes may have pushed the same mbox_create simcall at the same time */
42 smx_mailbox_t mbox = (smx_mailbox_t) xbt_dict_get_or_null(mailboxes, name);
45 mbox = xbt_new0(s_smx_mailbox_t, 1);
46 mbox->name = xbt_strdup(name);
47 mbox->comm_queue = new std::deque<smx_synchro_t>();
48 mbox->done_comm_queue = nullptr; // Allocated on need only
49 mbox->permanent_receiver=NULL;
51 XBT_DEBUG("Creating a mailbox at %p with name %s", mbox, name);
52 xbt_dict_set(mailboxes, mbox->name, mbox, NULL);
57 void SIMIX_mbox_free(void *data)
59 XBT_DEBUG("mbox free %p", data);
60 smx_mailbox_t mbox = (smx_mailbox_t) data;
62 delete mbox->comm_queue;
63 delete mbox->done_comm_queue;
68 smx_mailbox_t SIMIX_mbox_get_by_name(const char *name)
70 return (smx_mailbox_t) xbt_dict_get_or_null(mailboxes, name);
73 smx_synchro_t SIMIX_mbox_get_head(smx_mailbox_t mbox)
75 return mbox->comm_queue->empty()? nullptr:mbox->comm_queue->front();
79 * \brief get the receiver (process associated to the mailbox)
80 * \param mbox The rendez-vous point
81 * \return process The receiving process (NULL if not set)
83 smx_process_t SIMIX_mbox_get_receiver(smx_mailbox_t mbox)
85 return mbox->permanent_receiver;
89 * \brief set the receiver of the rendez vous point to allow eager sends
90 * \param mbox The rendez-vous point
91 * \param process The receiving process
93 void SIMIX_mbox_set_receiver(smx_mailbox_t mbox, smx_process_t process)
95 mbox->permanent_receiver=process;
96 if (mbox->done_comm_queue == nullptr)
97 mbox->done_comm_queue = new std::deque<smx_synchro_t>();
101 * \brief Pushes a communication synchro into a rendez-vous point
102 * \param mbox The mailbox
103 * \param comm The communication synchro
105 static inline void SIMIX_mbox_push(smx_mailbox_t mbox, smx_synchro_t synchro)
107 simgrid::simix::Comm *comm = static_cast<simgrid::simix::Comm*>(synchro);
109 mbox->comm_queue->push_back(comm);
114 * \brief Removes a communication synchro from a rendez-vous point
115 * \param mbox The rendez-vous point
116 * \param comm The communication synchro
118 void SIMIX_mbox_remove(smx_mailbox_t mbox, smx_synchro_t synchro)
120 simgrid::simix::Comm *comm = static_cast<simgrid::simix::Comm*>(synchro);
123 for (auto it = mbox->comm_queue->begin(); it != mbox->comm_queue->end(); it++)
125 mbox->comm_queue->erase(it);
128 xbt_die("Cannot remove this comm that is not part of the mailbox");
132 * \brief Checks if there is a communication synchro queued in a deque matching our needs
133 * \param type The type of communication we are looking for (comm_send, comm_recv)
134 * \return The communication synchro if found, NULL otherwise
136 static smx_synchro_t _find_matching_comm(std::deque<smx_synchro_t> *deque, e_smx_comm_type_t type,
137 int (*match_fun)(void *, void *,smx_synchro_t), void *this_user_data, smx_synchro_t my_synchro, bool remove_matching)
139 void* other_user_data = NULL;
141 for(auto it = deque->begin(); it != deque->end(); it++){
142 smx_synchro_t synchro = *it;
143 simgrid::simix::Comm *comm = static_cast<simgrid::simix::Comm*>(synchro);
145 if (comm->type == SIMIX_COMM_SEND) {
146 other_user_data = comm->src_data;
147 } else if (comm->type == SIMIX_COMM_RECEIVE) {
148 other_user_data = comm->dst_data;
150 if (comm->type == type &&
151 (! match_fun || match_fun(this_user_data, other_user_data, synchro)) &&
152 (!comm->match_fun || comm->match_fun(other_user_data, this_user_data, my_synchro))) {
153 XBT_DEBUG("Found a matching communication synchro %p", comm);
158 comm->mbox_cpy = comm->mbox;
163 XBT_DEBUG("Sorry, communication synchro %p does not match our needs:"
164 " its type is %d but we are looking for a comm of type %d (or maybe the filtering didn't match)",
165 comm, (int)comm->type, (int)type);
167 XBT_DEBUG("No matching communication synchro found");
171 /******************************************************************************/
172 /* Communication synchros */
173 /******************************************************************************/
176 * \brief Creates a new communicate synchro
177 * \param type The direction of communication (comm_send, comm_recv)
178 * \return The new communicate synchro
180 smx_synchro_t SIMIX_comm_new(e_smx_comm_type_t type)
182 simgrid::simix::Comm *comm = new simgrid::simix::Comm();
183 comm->state = SIMIX_WAITING;
189 XBT_DEBUG("Create communicate synchro %p", comm);
195 * \brief Destroy a communicate synchro
196 * \param synchro The communicate synchro to be destroyed
198 void SIMIX_comm_destroy(smx_synchro_t synchro)
200 simgrid::simix::Comm *comm = static_cast<simgrid::simix::Comm*>(synchro);
202 XBT_DEBUG("Destroy synchro %p (refcount: %d), state: %d", comm, comm->refcount, (int)comm->state);
204 if (comm->refcount <= 0) {
205 xbt_backtrace_display_current();
206 xbt_die("The refcount of comm %p is already 0 before decreasing it. "
207 "That's a bug! If you didn't test and/or wait the same communication twice in your code, then the bug is SimGrid's...", synchro);
210 if (comm->refcount > 0)
212 XBT_DEBUG("Really free communication %p; refcount is now %d", comm, comm->refcount);
214 SIMIX_comm_destroy_internal_actions(synchro);
216 if (comm->detached && comm->state != SIMIX_DONE) {
217 /* the communication has failed and was detached:
218 * we have to free the buffer */
219 if (comm->clean_fun) {
220 comm->clean_fun(comm->src_buff);
222 comm->src_buff = NULL;
226 SIMIX_mbox_remove(comm->mbox, comm);
231 void SIMIX_comm_destroy_internal_actions(smx_synchro_t synchro)
233 simgrid::simix::Comm *comm = static_cast<simgrid::simix::Comm*>(synchro);
234 if (comm->surf_comm){
235 comm->surf_comm->unref();
236 comm->surf_comm = NULL;
239 if (comm->src_timeout){
240 comm->src_timeout->unref();
241 comm->src_timeout = NULL;
244 if (comm->dst_timeout){
245 comm->dst_timeout->unref();
246 comm->dst_timeout = NULL;
250 void simcall_HANDLER_comm_send(smx_simcall_t simcall, smx_process_t src, smx_mailbox_t mbox,
251 double task_size, double rate,
252 void *src_buff, size_t src_buff_size,
253 int (*match_fun)(void *, void *,smx_synchro_t),
254 void (*copy_data_fun)(smx_synchro_t, void*, size_t),
255 void *data, double timeout){
256 smx_synchro_t comm = simcall_HANDLER_comm_isend(simcall, src, mbox, task_size, rate,
257 src_buff, src_buff_size, match_fun, NULL, copy_data_fun,
259 SIMCALL_SET_MC_VALUE(simcall, 0);
260 simcall_HANDLER_comm_wait(simcall, comm, timeout);
262 smx_synchro_t simcall_HANDLER_comm_isend(smx_simcall_t simcall, smx_process_t src_proc, smx_mailbox_t mbox,
263 double task_size, double rate,
264 void *src_buff, size_t src_buff_size,
265 int (*match_fun)(void *, void *,smx_synchro_t),
266 void (*clean_fun)(void *), // used to free the synchro in case of problem after a detached send
267 void (*copy_data_fun)(smx_synchro_t, void*, size_t),// used to copy data if not default one
268 void *data, int detached)
270 XBT_DEBUG("send from %p", mbox);
272 /* Prepare a synchro describing us, so that it gets passed to the user-provided filter of other side */
273 smx_synchro_t this_synchro = SIMIX_comm_new(SIMIX_COMM_SEND);
275 /* Look for communication synchro matching our needs. We also provide a description of
276 * ourself so that the other side also gets a chance of choosing if it wants to match with us.
278 * If it is not found then push our communication into the rendez-vous point */
279 smx_synchro_t other_synchro =
280 _find_matching_comm(mbox->comm_queue, SIMIX_COMM_RECEIVE, match_fun, data, this_synchro, /*remove_matching*/true);
281 simgrid::simix::Comm *other_comm = static_cast<simgrid::simix::Comm*>(other_synchro);
284 if (!other_synchro) {
285 other_synchro = this_synchro;
286 other_comm = static_cast<simgrid::simix::Comm*>(other_synchro);
288 if (mbox->permanent_receiver!=NULL){
289 //this mailbox is for small messages, which have to be sent right now
290 other_synchro->state = SIMIX_READY;
291 other_comm->dst_proc=mbox->permanent_receiver;
292 other_comm->refcount++;
293 mbox->done_comm_queue->push_back(other_synchro);
294 other_comm->mbox=mbox;
295 XBT_DEBUG("pushing a message into the permanent receive fifo %p, comm %p", mbox, &(other_comm));
298 SIMIX_mbox_push(mbox, this_synchro);
301 XBT_DEBUG("Receive already pushed");
303 SIMIX_comm_destroy(this_synchro);
305 other_comm->state = SIMIX_READY;
306 other_comm->type = SIMIX_COMM_READY;
309 xbt_fifo_push(src_proc->comms, other_synchro);
311 /* if the communication synchro is detached then decrease the refcount
312 * by one, so it will be eliminated by the receiver's destroy call */
314 other_comm->detached = 1;
315 other_comm->refcount--;
316 other_comm->clean_fun = clean_fun;
318 other_comm->clean_fun = NULL;
321 /* Setup the communication synchro */
322 other_comm->src_proc = src_proc;
323 other_comm->task_size = task_size;
324 other_comm->rate = rate;
325 other_comm->src_buff = src_buff;
326 other_comm->src_buff_size = src_buff_size;
327 other_comm->src_data = data;
329 other_comm->match_fun = match_fun;
330 other_comm->copy_data_fun = copy_data_fun;
333 if (MC_is_active() || MC_record_replay_is_active()) {
334 other_comm->state = SIMIX_RUNNING;
335 return (detached ? NULL : other_comm);
338 SIMIX_comm_start(other_comm);
339 return (detached ? NULL : other_comm);
342 void simcall_HANDLER_comm_recv(smx_simcall_t simcall, smx_process_t receiver, smx_mailbox_t mbox,
343 void *dst_buff, size_t *dst_buff_size,
344 int (*match_fun)(void *, void *, smx_synchro_t),
345 void (*copy_data_fun)(smx_synchro_t, void*, size_t),
346 void *data, double timeout, double rate)
348 smx_synchro_t comm = SIMIX_comm_irecv(receiver, mbox, dst_buff, dst_buff_size, match_fun, copy_data_fun, data, rate);
349 SIMCALL_SET_MC_VALUE(simcall, 0);
350 simcall_HANDLER_comm_wait(simcall, comm, timeout);
353 smx_synchro_t simcall_HANDLER_comm_irecv(smx_simcall_t simcall, smx_process_t receiver, smx_mailbox_t mbox,
354 void *dst_buff, size_t *dst_buff_size,
355 int (*match_fun)(void *, void *, smx_synchro_t),
356 void (*copy_data_fun)(smx_synchro_t, void*, size_t),
357 void *data, double rate)
359 return SIMIX_comm_irecv(receiver, mbox, dst_buff, dst_buff_size, match_fun, copy_data_fun, data, rate);
362 smx_synchro_t SIMIX_comm_irecv(smx_process_t dst_proc, smx_mailbox_t mbox, void *dst_buff, size_t *dst_buff_size,
363 int (*match_fun)(void *, void *, smx_synchro_t),
364 void (*copy_data_fun)(smx_synchro_t, void*, size_t), // used to copy data if not default one
365 void *data, double rate)
367 XBT_DEBUG("recv from %p %p", mbox, mbox->comm_queue);
368 smx_synchro_t this_synchro = SIMIX_comm_new(SIMIX_COMM_RECEIVE);
370 smx_synchro_t other_synchro;
371 //communication already done, get it inside the fifo of completed comms
372 if (mbox->permanent_receiver && ! mbox->done_comm_queue->empty()) {
374 XBT_DEBUG("We have a comm that has probably already been received, trying to match it, to skip the communication");
375 //find a match in the already received fifo
376 other_synchro = _find_matching_comm(mbox->done_comm_queue, SIMIX_COMM_SEND, match_fun, data, this_synchro,/*remove_matching*/true);
377 //if not found, assume the receiver came first, register it to the mailbox in the classical way
378 if (!other_synchro) {
379 XBT_DEBUG("We have messages in the permanent receive list, but not the one we are looking for, pushing request into fifo");
380 other_synchro = this_synchro;
381 SIMIX_mbox_push(mbox, this_synchro);
383 simgrid::simix::Comm *other_comm = static_cast<simgrid::simix::Comm*>(other_synchro);
385 if(other_comm->surf_comm && SIMIX_comm_get_remains(other_comm)==0.0) {
386 XBT_DEBUG("comm %p has been already sent, and is finished, destroy it",other_comm);
387 other_comm->state = SIMIX_DONE;
388 other_comm->type = SIMIX_COMM_DONE;
389 other_comm->mbox = NULL;
391 other_comm->refcount--;
392 SIMIX_comm_destroy(this_synchro);
395 /* Prepare a synchro describing us, so that it gets passed to the user-provided filter of other side */
397 /* Look for communication synchro matching our needs. We also provide a description of
398 * ourself so that the other side also gets a chance of choosing if it wants to match with us.
400 * If it is not found then push our communication into the rendez-vous point */
401 other_synchro = _find_matching_comm(mbox->comm_queue, SIMIX_COMM_SEND, match_fun, data, this_synchro,/*remove_matching*/true);
403 if (!other_synchro) {
404 XBT_DEBUG("Receive pushed first %zu", mbox->comm_queue->size());
405 other_synchro = this_synchro;
406 SIMIX_mbox_push(mbox, this_synchro);
408 SIMIX_comm_destroy(this_synchro);
409 simgrid::simix::Comm *other_comm = static_cast<simgrid::simix::Comm*>(other_synchro);
411 other_comm->state = SIMIX_READY;
412 other_comm->type = SIMIX_COMM_READY;
414 xbt_fifo_push(dst_proc->comms, other_synchro);
417 /* Setup communication synchro */
418 simgrid::simix::Comm *other_comm = static_cast<simgrid::simix::Comm*>(other_synchro);
419 other_comm->dst_proc = dst_proc;
420 other_comm->dst_buff = dst_buff;
421 other_comm->dst_buff_size = dst_buff_size;
422 other_comm->dst_data = data;
424 if (rate != -1.0 && (other_comm->rate == -1.0 || rate < other_comm->rate))
425 other_comm->rate = rate;
427 other_comm->match_fun = match_fun;
428 other_comm->copy_data_fun = copy_data_fun;
430 if (MC_is_active() || MC_record_replay_is_active()) {
431 other_synchro->state = SIMIX_RUNNING;
432 return other_synchro;
435 SIMIX_comm_start(other_synchro);
436 return other_synchro;
439 smx_synchro_t simcall_HANDLER_comm_iprobe(smx_simcall_t simcall, smx_mailbox_t mbox,
440 int type, int src, int tag,
441 int (*match_fun)(void *, void *, smx_synchro_t),
443 return SIMIX_comm_iprobe(simcall->issuer, mbox, type, src, tag, match_fun, data);
446 smx_synchro_t SIMIX_comm_iprobe(smx_process_t dst_proc, smx_mailbox_t mbox, int type, int src,
447 int tag, int (*match_fun)(void *, void *, smx_synchro_t), void *data)
449 XBT_DEBUG("iprobe from %p %p", mbox, mbox->comm_queue);
450 smx_synchro_t this_synchro;
453 this_synchro=SIMIX_comm_new(SIMIX_COMM_SEND);
454 smx_type = SIMIX_COMM_RECEIVE;
456 this_synchro=SIMIX_comm_new(SIMIX_COMM_RECEIVE);
457 smx_type = SIMIX_COMM_SEND;
459 smx_synchro_t other_synchro=NULL;
460 if(mbox->permanent_receiver && ! mbox->done_comm_queue->empty()){
461 XBT_DEBUG("first check in the permanent recv mailbox, to see if we already got something");
463 _find_matching_comm(mbox->done_comm_queue, (e_smx_comm_type_t) smx_type, match_fun, data, this_synchro,/*remove_matching*/false);
466 XBT_DEBUG("check if we have more luck in the normal mailbox");
467 other_synchro = _find_matching_comm(mbox->comm_queue, (e_smx_comm_type_t) smx_type, match_fun, data, this_synchro,/*remove_matching*/false);
471 simgrid::simix::Comm *other_comm = static_cast<simgrid::simix::Comm*>(other_synchro);
472 other_comm->refcount--;
475 SIMIX_comm_destroy(this_synchro);
476 return other_synchro;
479 void simcall_HANDLER_comm_wait(smx_simcall_t simcall, smx_synchro_t synchro, double timeout)
481 /* the simcall may be a wait, a send or a recv */
484 /* Associate this simcall to the wait synchro */
485 XBT_DEBUG("simcall_HANDLER_comm_wait, %p", synchro);
487 xbt_fifo_push(synchro->simcalls, simcall);
488 simcall->issuer->waiting_synchro = synchro;
490 if (MC_is_active() || MC_record_replay_is_active()) {
491 int idx = SIMCALL_GET_MC_VALUE(simcall);
493 synchro->state = SIMIX_DONE;
495 /* If we reached this point, the wait simcall must have a timeout */
496 /* Otherwise it shouldn't be enabled and executed by the MC */
500 simgrid::simix::Comm *comm = static_cast<simgrid::simix::Comm*>(synchro);
501 if (comm->src_proc == simcall->issuer)
502 comm->state = SIMIX_SRC_TIMEOUT;
504 comm->state = SIMIX_DST_TIMEOUT;
507 SIMIX_comm_finish(synchro);
511 /* If the synchro has already finish perform the error handling, */
512 /* otherwise set up a waiting timeout on the right side */
513 if (synchro->state != SIMIX_WAITING && synchro->state != SIMIX_RUNNING) {
514 SIMIX_comm_finish(synchro);
515 } else { /* if (timeout >= 0) { we need a surf sleep action even when there is no timeout, otherwise surf won't tell us when the host fails */
516 sleep = surf_host_sleep(simcall->issuer->host, timeout);
517 sleep->setData(synchro);
519 simgrid::simix::Comm *comm = static_cast<simgrid::simix::Comm*>(synchro);
520 if (simcall->issuer == comm->src_proc)
521 comm->src_timeout = sleep;
523 comm->dst_timeout = sleep;
527 void simcall_HANDLER_comm_test(smx_simcall_t simcall, smx_synchro_t synchro)
529 simgrid::simix::Comm *comm = static_cast<simgrid::simix::Comm*>(synchro);
531 if(MC_is_active() || MC_record_replay_is_active()){
532 simcall_comm_test__set__result(simcall, comm->src_proc && comm->dst_proc);
533 if(simcall_comm_test__get__result(simcall)){
534 synchro->state = SIMIX_DONE;
535 xbt_fifo_push(synchro->simcalls, simcall);
536 SIMIX_comm_finish(synchro);
538 SIMIX_simcall_answer(simcall);
543 simcall_comm_test__set__result(simcall, (synchro->state != SIMIX_WAITING && synchro->state != SIMIX_RUNNING));
544 if (simcall_comm_test__get__result(simcall)) {
545 xbt_fifo_push(synchro->simcalls, simcall);
546 SIMIX_comm_finish(synchro);
548 SIMIX_simcall_answer(simcall);
552 void simcall_HANDLER_comm_testany(smx_simcall_t simcall, xbt_dynar_t synchros)
555 smx_synchro_t synchro;
556 simcall_comm_testany__set__result(simcall, -1);
558 if (MC_is_active() || MC_record_replay_is_active()){
559 int idx = SIMCALL_GET_MC_VALUE(simcall);
561 SIMIX_simcall_answer(simcall);
563 synchro = xbt_dynar_get_as(synchros, idx, smx_synchro_t);
564 simcall_comm_testany__set__result(simcall, idx);
565 xbt_fifo_push(synchro->simcalls, simcall);
566 synchro->state = SIMIX_DONE;
567 SIMIX_comm_finish(synchro);
572 xbt_dynar_foreach(simcall_comm_testany__get__comms(simcall), cursor,synchro) {
573 if (synchro->state != SIMIX_WAITING && synchro->state != SIMIX_RUNNING) {
574 simcall_comm_testany__set__result(simcall, cursor);
575 xbt_fifo_push(synchro->simcalls, simcall);
576 SIMIX_comm_finish(synchro);
580 SIMIX_simcall_answer(simcall);
583 void simcall_HANDLER_comm_waitany(smx_simcall_t simcall, xbt_dynar_t synchros)
585 smx_synchro_t synchro;
586 unsigned int cursor = 0;
588 if (MC_is_active() || MC_record_replay_is_active()){
589 int idx = SIMCALL_GET_MC_VALUE(simcall);
590 synchro = xbt_dynar_get_as(synchros, idx, smx_synchro_t);
591 xbt_fifo_push(synchro->simcalls, simcall);
592 simcall_comm_waitany__set__result(simcall, idx);
593 synchro->state = SIMIX_DONE;
594 SIMIX_comm_finish(synchro);
598 xbt_dynar_foreach(synchros, cursor, synchro){
599 /* associate this simcall to the the synchro */
600 xbt_fifo_push(synchro->simcalls, simcall);
602 /* see if the synchro is already finished */
603 if (synchro->state != SIMIX_WAITING && synchro->state != SIMIX_RUNNING){
604 SIMIX_comm_finish(synchro);
610 void SIMIX_waitany_remove_simcall_from_actions(smx_simcall_t simcall)
612 smx_synchro_t synchro;
613 unsigned int cursor = 0;
614 xbt_dynar_t synchros = simcall_comm_waitany__get__comms(simcall);
616 xbt_dynar_foreach(synchros, cursor, synchro) {
617 xbt_fifo_remove(synchro->simcalls, simcall);
622 * \brief Starts the simulation of a communication synchro.
623 * \param synchro the communication synchro
625 static inline void SIMIX_comm_start(smx_synchro_t synchro)
627 simgrid::simix::Comm *comm = static_cast<simgrid::simix::Comm*>(synchro);
629 /* If both the sender and the receiver are already there, start the communication */
630 if (synchro->state == SIMIX_READY) {
632 sg_host_t sender = comm->src_proc->host;
633 sg_host_t receiver = comm->dst_proc->host;
635 XBT_DEBUG("Starting communication %p from '%s' to '%s'", synchro, sg_host_get_name(sender), sg_host_get_name(receiver));
637 comm->surf_comm = surf_network_model_communicate(surf_network_model, sender, receiver, comm->task_size, comm->rate);
638 comm->surf_comm->setData(synchro);
639 comm->state = SIMIX_RUNNING;
641 /* If a link is failed, detect it immediately */
642 if (comm->surf_comm->getState() == simgrid::surf::Action::State::failed) {
643 XBT_DEBUG("Communication from '%s' to '%s' failed to start because of a link failure",
644 sg_host_get_name(sender), sg_host_get_name(receiver));
645 comm->state = SIMIX_LINK_FAILURE;
646 SIMIX_comm_destroy_internal_actions(synchro);
649 /* If any of the process is suspend, create the synchro but stop its execution,
650 it will be restarted when the sender process resume */
651 if (SIMIX_process_is_suspended(comm->src_proc) ||
652 SIMIX_process_is_suspended(comm->dst_proc)) {
653 /* FIXME: check what should happen with the synchro state */
655 if (SIMIX_process_is_suspended(comm->src_proc))
656 XBT_DEBUG("The communication is suspended on startup because src (%s:%s) were suspended since it initiated the communication",
657 sg_host_get_name(comm->src_proc->host), comm->src_proc->name);
659 XBT_DEBUG("The communication is suspended on startup because dst (%s:%s) were suspended since it initiated the communication",
660 sg_host_get_name(comm->dst_proc->host), comm->dst_proc->name);
662 comm->surf_comm->suspend();
668 * \brief Answers the SIMIX simcalls associated to a communication synchro.
669 * \param synchro a finished communication synchro
671 void SIMIX_comm_finish(smx_synchro_t synchro)
673 simgrid::simix::Comm *comm = static_cast<simgrid::simix::Comm*>(synchro);
674 unsigned int destroy_count = 0;
675 smx_simcall_t simcall;
677 while ((simcall = (smx_simcall_t) xbt_fifo_shift(synchro->simcalls))) {
679 /* If a waitany simcall is waiting for this synchro to finish, then remove
680 it from the other synchros in the waitany list. Afterwards, get the
681 position of the actual synchro in the waitany dynar and
682 return it as the result of the simcall */
684 if (simcall->call == SIMCALL_NONE) //FIXME: maybe a better way to handle this case
685 continue; // if process handling comm is killed
686 if (simcall->call == SIMCALL_COMM_WAITANY) {
687 SIMIX_waitany_remove_simcall_from_actions(simcall);
688 if (!MC_is_active() && !MC_record_replay_is_active())
689 simcall_comm_waitany__set__result(simcall, xbt_dynar_search(simcall_comm_waitany__get__comms(simcall), &synchro));
692 /* If the synchro is still in a rendez-vous point then remove from it */
694 SIMIX_mbox_remove(comm->mbox, synchro);
696 XBT_DEBUG("SIMIX_comm_finish: synchro state = %d", (int)synchro->state);
698 /* Check out for errors */
700 if (simcall->issuer->host->isOff()) {
701 simcall->issuer->context->iwannadie = 1;
702 SMX_EXCEPTION(simcall->issuer, host_error, 0, "Host failed");
705 switch (synchro->state) {
708 XBT_DEBUG("Communication %p complete!", synchro);
709 SIMIX_comm_copy_data(synchro);
712 case SIMIX_SRC_TIMEOUT:
713 SMX_EXCEPTION(simcall->issuer, timeout_error, 0, "Communication timeouted because of sender");
716 case SIMIX_DST_TIMEOUT:
717 SMX_EXCEPTION(simcall->issuer, timeout_error, 0, "Communication timeouted because of receiver");
720 case SIMIX_SRC_HOST_FAILURE:
721 if (simcall->issuer == comm->src_proc)
722 simcall->issuer->context->iwannadie = 1;
723 // SMX_EXCEPTION(simcall->issuer, host_error, 0, "Host failed");
725 SMX_EXCEPTION(simcall->issuer, network_error, 0, "Remote peer failed");
728 case SIMIX_DST_HOST_FAILURE:
729 if (simcall->issuer == comm->dst_proc)
730 simcall->issuer->context->iwannadie = 1;
731 // SMX_EXCEPTION(simcall->issuer, host_error, 0, "Host failed");
733 SMX_EXCEPTION(simcall->issuer, network_error, 0, "Remote peer failed");
736 case SIMIX_LINK_FAILURE:
738 XBT_DEBUG("Link failure in synchro %p between '%s' and '%s': posting an exception to the issuer: %s (%p) detached:%d",
740 comm->src_proc ? sg_host_get_name(comm->src_proc->host) : NULL,
741 comm->dst_proc ? sg_host_get_name(comm->dst_proc->host) : NULL,
742 simcall->issuer->name, simcall->issuer, comm->detached);
743 if (comm->src_proc == simcall->issuer) {
744 XBT_DEBUG("I'm source");
745 } else if (comm->dst_proc == simcall->issuer) {
746 XBT_DEBUG("I'm dest");
748 XBT_DEBUG("I'm neither source nor dest");
750 SMX_EXCEPTION(simcall->issuer, network_error, 0, "Link failure");
754 if (simcall->issuer == comm->dst_proc)
755 SMX_EXCEPTION(simcall->issuer, cancel_error, 0, "Communication canceled by the sender");
757 SMX_EXCEPTION(simcall->issuer, cancel_error, 0, "Communication canceled by the receiver");
761 xbt_die("Unexpected synchro state in SIMIX_comm_finish: %d", (int)synchro->state);
764 /* if there is an exception during a waitany or a testany, indicate the position of the failed communication */
765 if (simcall->issuer->doexception) {
766 if (simcall->call == SIMCALL_COMM_WAITANY) {
767 simcall->issuer->running_ctx->exception.value = xbt_dynar_search(simcall_comm_waitany__get__comms(simcall), &synchro);
769 else if (simcall->call == SIMCALL_COMM_TESTANY) {
770 simcall->issuer->running_ctx->exception.value = xbt_dynar_search(simcall_comm_testany__get__comms(simcall), &synchro);
774 if (simcall->issuer->host->isOff()) {
775 simcall->issuer->context->iwannadie = 1;
778 simcall->issuer->waiting_synchro = NULL;
779 xbt_fifo_remove(simcall->issuer->comms, synchro);
781 if(simcall->issuer == comm->src_proc){
783 xbt_fifo_remove(comm->dst_proc->comms, synchro);
785 if(simcall->issuer == comm->dst_proc){
787 xbt_fifo_remove(comm->src_proc->comms, synchro);
790 SIMIX_simcall_answer(simcall);
794 while (destroy_count-- > 0)
795 SIMIX_comm_destroy(synchro);
799 * \brief This function is called when a Surf communication synchro is finished.
800 * \param synchro the corresponding Simix communication
802 void SIMIX_post_comm(smx_synchro_t synchro)
804 simgrid::simix::Comm *comm = static_cast<simgrid::simix::Comm*>(synchro);
806 /* Update synchro state */
807 if (comm->src_timeout &&
808 comm->src_timeout->getState() == simgrid::surf::Action::State::done)
809 synchro->state = SIMIX_SRC_TIMEOUT;
810 else if (comm->dst_timeout &&
811 comm->dst_timeout->getState() == simgrid::surf::Action::State::done)
812 synchro->state = SIMIX_DST_TIMEOUT;
813 else if (comm->src_timeout &&
814 comm->src_timeout->getState() == simgrid::surf::Action::State::failed)
815 synchro->state = SIMIX_SRC_HOST_FAILURE;
816 else if (comm->dst_timeout &&
817 comm->dst_timeout->getState() == simgrid::surf::Action::State::failed)
818 synchro->state = SIMIX_DST_HOST_FAILURE;
819 else if (comm->surf_comm &&
820 comm->surf_comm->getState() == simgrid::surf::Action::State::failed) {
821 synchro->state = SIMIX_LINK_FAILURE;
823 synchro->state = SIMIX_DONE;
825 XBT_DEBUG("SIMIX_post_comm: comm %p, state %d, src_proc %p, dst_proc %p, detached: %d",
826 comm, (int)comm->state, comm->src_proc, comm->dst_proc, comm->detached);
828 /* destroy the surf actions associated with the Simix communication */
829 SIMIX_comm_destroy_internal_actions(comm);
831 /* if there are simcalls associated with the synchro, then answer them */
832 if (xbt_fifo_size(synchro->simcalls)) {
833 SIMIX_comm_finish(comm);
837 void SIMIX_comm_cancel(smx_synchro_t synchro)
839 simgrid::simix::Comm *comm = static_cast<simgrid::simix::Comm*>(synchro);
841 /* if the synchro is a waiting state means that it is still in a mbox */
842 /* so remove from it and delete it */
843 if (comm->state == SIMIX_WAITING) {
844 SIMIX_mbox_remove(comm->mbox, synchro);
845 comm->state = SIMIX_CANCELED;
847 else if (!MC_is_active() /* when running the MC there are no surf actions */
848 && !MC_record_replay_is_active()
849 && (comm->state == SIMIX_READY || comm->state == SIMIX_RUNNING)) {
851 comm->surf_comm->cancel();
855 void SIMIX_comm_suspend(smx_synchro_t synchro)
857 simgrid::simix::Comm *comm = static_cast<simgrid::simix::Comm*>(synchro);
859 /*FIXME: shall we suspend also the timeout synchro? */
861 comm->surf_comm->suspend();
862 /* in the other case, the action will be suspended on creation, in SIMIX_comm_start() */
865 void SIMIX_comm_resume(smx_synchro_t synchro)
867 simgrid::simix::Comm *comm = static_cast<simgrid::simix::Comm*>(synchro);
869 /*FIXME: check what happen with the timeouts */
871 comm->surf_comm->resume();
872 /* in the other case, the synchro were not really suspended yet, see SIMIX_comm_suspend() and SIMIX_comm_start() */
876 /************* synchro Getters **************/
879 * \brief get the amount remaining from the communication
880 * \param synchro The communication
882 double SIMIX_comm_get_remains(smx_synchro_t synchro)
886 simgrid::simix::Comm *comm = static_cast<simgrid::simix::Comm*>(synchro);
889 switch (synchro->state) {
892 remains = comm->surf_comm->getRemains();
897 remains = 0; /*FIXME: check what should be returned */
901 remains = 0; /*FIXME: is this correct? */
907 e_smx_state_t SIMIX_comm_get_state(smx_synchro_t synchro)
909 return synchro->state;
913 * \brief Return the user data associated to the sender of the communication
914 * \param synchro The communication
915 * \return the user data
917 void* SIMIX_comm_get_src_data(smx_synchro_t synchro)
919 simgrid::simix::Comm *comm = static_cast<simgrid::simix::Comm*>(synchro);
921 return comm->src_data;
925 * \brief Return the user data associated to the receiver of the communication
926 * \param synchro The communication
927 * \return the user data
929 void* SIMIX_comm_get_dst_data(smx_synchro_t synchro)
931 simgrid::simix::Comm *comm = static_cast<simgrid::simix::Comm*>(synchro);
933 return comm->dst_data;
936 smx_process_t SIMIX_comm_get_src_proc(smx_synchro_t synchro)
938 simgrid::simix::Comm *comm = static_cast<simgrid::simix::Comm*>(synchro);
940 return comm->src_proc;
943 smx_process_t SIMIX_comm_get_dst_proc(smx_synchro_t synchro)
945 simgrid::simix::Comm *comm = static_cast<simgrid::simix::Comm*>(synchro);
947 return comm->dst_proc;
950 /******************************************************************************/
951 /* SIMIX_comm_copy_data callbacks */
952 /******************************************************************************/
953 static void (*SIMIX_comm_copy_data_callback) (smx_synchro_t, void*, size_t) = &SIMIX_comm_copy_pointer_callback;
955 void SIMIX_comm_set_copy_data_callback(void (*callback) (smx_synchro_t, void*, size_t))
957 SIMIX_comm_copy_data_callback = callback;
960 void SIMIX_comm_copy_pointer_callback(smx_synchro_t synchro, void* buff, size_t buff_size)
962 simgrid::simix::Comm *comm = static_cast<simgrid::simix::Comm*>(synchro);
964 xbt_assert((buff_size == sizeof(void *)), "Cannot copy %zu bytes: must be sizeof(void*)", buff_size);
965 *(void **) (comm->dst_buff) = buff;
968 void SIMIX_comm_copy_buffer_callback(smx_synchro_t synchro, void* buff, size_t buff_size)
970 simgrid::simix::Comm *comm = static_cast<simgrid::simix::Comm*>(synchro);
972 XBT_DEBUG("Copy the data over");
973 memcpy(comm->dst_buff, buff, buff_size);
974 if (comm->detached) { // if this is a detached send, the source buffer was duplicated by SMPI sender to make the original buffer available to the application ASAP
976 comm->src_buff = NULL;
982 * \brief Copy the communication data from the sender's buffer to the receiver's one
983 * \param comm The communication
985 void SIMIX_comm_copy_data(smx_synchro_t synchro)
987 simgrid::simix::Comm *comm = static_cast<simgrid::simix::Comm*>(synchro);
989 size_t buff_size = comm->src_buff_size;
990 /* If there is no data to copy then return */
991 if (!comm->src_buff || !comm->dst_buff || comm->copied)
994 XBT_DEBUG("Copying comm %p data from %s (%p) -> %s (%p) (%zu bytes)",
996 comm->src_proc ? sg_host_get_name(comm->src_proc->host) : "a finished process",
998 comm->dst_proc ? sg_host_get_name(comm->dst_proc->host) : "a finished process",
999 comm->dst_buff, buff_size);
1001 /* Copy at most dst_buff_size bytes of the message to receiver's buffer */
1002 if (comm->dst_buff_size)
1003 buff_size = MIN(buff_size, *(comm->dst_buff_size));
1005 /* Update the receiver's buffer size to the copied amount */
1006 if (comm->dst_buff_size)
1007 *comm->dst_buff_size = buff_size;
1010 if(comm->copy_data_fun)
1011 comm->copy_data_fun (comm, comm->src_buff, buff_size);
1013 SIMIX_comm_copy_data_callback (comm, comm->src_buff, buff_size);
1017 /* Set the copied flag so we copy data only once */
1018 /* (this function might be called from both communication ends) */