1 /* Copyright (c) 2009-2015. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
7 #include "src/surf/surf_interface.hpp"
8 #include "src/simix/smx_private.h"
11 #include "src/mc/mc_replay.h"
13 #include "simgrid/s4u/mailbox.hpp"
15 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(simix_network, simix, "SIMIX network-related synchronization");
17 static void SIMIX_mbox_free(void *data);
18 static xbt_dict_t mailboxes = xbt_dict_new_homogeneous(SIMIX_mbox_free);
20 static void SIMIX_waitany_remove_simcall_from_actions(smx_simcall_t simcall);
21 static void SIMIX_comm_copy_data(smx_synchro_t comm);
22 static smx_synchro_t SIMIX_comm_new(e_smx_comm_type_t type);
23 static inline void SIMIX_mbox_push(smx_mailbox_t mbox, smx_synchro_t comm);
24 static smx_synchro_t SIMIX_fifo_probe_comm(xbt_fifo_t fifo, e_smx_comm_type_t type,
25 int (*match_fun)(void *, void *,smx_synchro_t),
26 void *user_data, smx_synchro_t my_synchro);
27 static smx_synchro_t SIMIX_fifo_get_comm(xbt_fifo_t fifo, e_smx_comm_type_t type,
28 int (*match_fun)(void *, void *,smx_synchro_t),
29 void *user_data, smx_synchro_t my_synchro);
30 static void SIMIX_comm_start(smx_synchro_t synchro);
32 void SIMIX_mailbox_exit(void)
34 xbt_dict_free(&mailboxes);
37 /******************************************************************************/
38 /* Rendez-Vous Points */
39 /******************************************************************************/
41 smx_mailbox_t SIMIX_mbox_create(const char *name)
43 /* two processes may have pushed the same mbox_create simcall at the same time */
44 smx_mailbox_t mbox = name ? (smx_mailbox_t) xbt_dict_get_or_null(mailboxes, name) : NULL;
47 mbox = xbt_new0(s_smx_mailbox_t, 1);
48 mbox->name = name ? xbt_strdup(name) : NULL;
49 mbox->comm_fifo = xbt_fifo_new();
50 mbox->done_comm_fifo = xbt_fifo_new();
51 mbox->permanent_receiver=NULL;
53 XBT_DEBUG("Creating a mailbox at %p with name %s", mbox, name);
56 xbt_dict_set(mailboxes, mbox->name, mbox, NULL);
61 void SIMIX_mbox_destroy(smx_mailbox_t mbox)
64 xbt_dict_remove(mailboxes, mbox->name);
67 void SIMIX_mbox_free(void *data)
69 XBT_DEBUG("mbox free %p", data);
70 smx_mailbox_t mbox = (smx_mailbox_t) data;
72 xbt_fifo_free(mbox->comm_fifo);
73 xbt_fifo_free(mbox->done_comm_fifo);
78 smx_mailbox_t SIMIX_mbox_get_by_name(const char *name)
80 return (smx_mailbox_t) xbt_dict_get_or_null(mailboxes, name);
83 int SIMIX_mbox_comm_count_by_host(smx_mailbox_t mbox, sg_host_t host)
85 smx_synchro_t comm = NULL;
86 xbt_fifo_item_t item = NULL;
89 xbt_fifo_foreach(mbox->comm_fifo, item, comm, smx_synchro_t) {
90 if (comm->comm.src_proc->host == host)
97 smx_synchro_t SIMIX_mbox_get_head(smx_mailbox_t mbox)
99 return (smx_synchro_t) xbt_fifo_get_item_content(
100 xbt_fifo_get_first_item(mbox->comm_fifo));
104 * \brief get the receiver (process associated to the mailbox)
105 * \param mbox The rendez-vous point
106 * \return process The receiving process (NULL if not set)
108 smx_process_t SIMIX_mbox_get_receiver(smx_mailbox_t mbox)
110 return mbox->permanent_receiver;
114 * \brief set the receiver of the rendez vous point to allow eager sends
115 * \param mbox The rendez-vous point
116 * \param process The receiving process
118 void SIMIX_mbox_set_receiver(smx_mailbox_t mbox, smx_process_t process)
120 mbox->permanent_receiver=process;
124 * \brief Pushes a communication synchro into a rendez-vous point
125 * \param mbox The mailbox
126 * \param comm The communication synchro
128 static inline void SIMIX_mbox_push(smx_mailbox_t mbox, smx_synchro_t comm)
130 xbt_fifo_push(mbox->comm_fifo, comm);
131 comm->comm.mbox = mbox;
135 * \brief Removes a communication synchro from a rendez-vous point
136 * \param mbox The rendez-vous point
137 * \param comm The communication synchro
139 void SIMIX_mbox_remove(smx_mailbox_t mbox, smx_synchro_t comm)
141 xbt_fifo_remove(mbox->comm_fifo, comm);
142 comm->comm.mbox = NULL;
146 * \brief Checks if there is a communication synchro queued in a fifo matching our needs
147 * \param type The type of communication we are looking for (comm_send, comm_recv)
148 * \return The communication synchro if found, NULL otherwise
150 smx_synchro_t SIMIX_fifo_get_comm(xbt_fifo_t fifo, e_smx_comm_type_t type,
151 int (*match_fun)(void *, void *,smx_synchro_t),
152 void *this_user_data, smx_synchro_t my_synchro)
154 smx_synchro_t synchro;
155 xbt_fifo_item_t item;
156 void* other_user_data = NULL;
158 xbt_fifo_foreach(fifo, item, synchro, smx_synchro_t) {
159 if (synchro->comm.type == SIMIX_COMM_SEND) {
160 other_user_data = synchro->comm.src_data;
161 } else if (synchro->comm.type == SIMIX_COMM_RECEIVE) {
162 other_user_data = synchro->comm.dst_data;
164 if (synchro->comm.type == type &&
165 (!match_fun || match_fun(this_user_data, other_user_data, synchro)) &&
166 (!synchro->comm.match_fun || synchro->comm.match_fun(other_user_data, this_user_data, my_synchro))) {
167 XBT_DEBUG("Found a matching communication synchro %p", synchro);
168 xbt_fifo_remove_item(fifo, item);
169 xbt_fifo_free_item(item);
170 synchro->comm.refcount++;
172 synchro->comm.mbox_cpy = synchro->comm.mbox;
174 synchro->comm.mbox = NULL;
177 XBT_DEBUG("Sorry, communication synchro %p does not match our needs:"
178 " its type is %d but we are looking for a comm of type %d (or maybe the filtering didn't match)",
179 synchro, (int)synchro->comm.type, (int)type);
181 XBT_DEBUG("No matching communication synchro found");
187 * \brief Checks if there is a communication synchro queued in a fifo matching our needs, but leave it there
188 * \param type The type of communication we are looking for (comm_send, comm_recv)
189 * \return The communication synchro if found, NULL otherwise
191 smx_synchro_t SIMIX_fifo_probe_comm(xbt_fifo_t fifo, e_smx_comm_type_t type,
192 int (*match_fun)(void *, void *,smx_synchro_t),
193 void *this_user_data, smx_synchro_t my_synchro)
195 smx_synchro_t synchro;
196 xbt_fifo_item_t item;
197 void* other_user_data = NULL;
199 xbt_fifo_foreach(fifo, item, synchro, smx_synchro_t) {
200 if (synchro->comm.type == SIMIX_COMM_SEND) {
201 other_user_data = synchro->comm.src_data;
202 } else if (synchro->comm.type == SIMIX_COMM_RECEIVE) {
203 other_user_data = synchro->comm.dst_data;
205 if (synchro->comm.type == type &&
206 (!match_fun || match_fun(this_user_data, other_user_data, synchro)) &&
207 (!synchro->comm.match_fun || synchro->comm.match_fun(other_user_data, this_user_data, my_synchro))) {
208 XBT_DEBUG("Found a matching communication synchro %p", synchro);
209 synchro->comm.refcount++;
213 XBT_DEBUG("Sorry, communication synchro %p does not match our needs:"
214 " its type is %d but we are looking for a comm of type %d (or maybe the filtering didn't match)",
215 synchro, (int)synchro->comm.type, (int)type);
217 XBT_DEBUG("No matching communication synchro found");
220 /******************************************************************************/
221 /* Communication synchros */
222 /******************************************************************************/
225 * \brief Creates a new communicate synchro
226 * \param type The direction of communication (comm_send, comm_recv)
227 * \return The new communicate synchro
229 smx_synchro_t SIMIX_comm_new(e_smx_comm_type_t type)
231 smx_synchro_t synchro;
233 /* alloc structures */
234 synchro = (smx_synchro_t) xbt_mallocator_get(simix_global->synchro_mallocator);
236 synchro->type = SIMIX_SYNC_COMMUNICATE;
237 synchro->state = SIMIX_WAITING;
239 /* set communication */
240 synchro->comm.type = type;
241 synchro->comm.refcount = 1;
242 synchro->comm.src_data=NULL;
243 synchro->comm.dst_data=NULL;
245 synchro->category = NULL;
247 XBT_DEBUG("Create communicate synchro %p", synchro);
253 * \brief Destroy a communicate synchro
254 * \param synchro The communicate synchro to be destroyed
256 void SIMIX_comm_destroy(smx_synchro_t synchro)
258 XBT_DEBUG("Destroy synchro %p (refcount: %d), state: %d",
259 synchro, synchro->comm.refcount, (int)synchro->state);
261 if (synchro->comm.refcount <= 0) {
262 xbt_backtrace_display_current();
263 xbt_die("The refcount of comm %p is already 0 before decreasing it. "
264 "That's a bug! If you didn't test and/or wait the same communication twice in your code, then the bug is SimGrid's...", synchro);
266 synchro->comm.refcount--;
267 if (synchro->comm.refcount > 0)
269 XBT_DEBUG("Really free communication %p; refcount is now %d", synchro,
270 synchro->comm.refcount);
272 xbt_free(synchro->name);
273 SIMIX_comm_destroy_internal_actions(synchro);
275 if (synchro->comm.detached && synchro->state != SIMIX_DONE) {
276 /* the communication has failed and was detached:
277 * we have to free the buffer */
278 if (synchro->comm.clean_fun) {
279 synchro->comm.clean_fun(synchro->comm.src_buff);
281 synchro->comm.src_buff = NULL;
284 if(synchro->comm.mbox)
285 SIMIX_mbox_remove(synchro->comm.mbox, synchro);
287 xbt_mallocator_release(simix_global->synchro_mallocator, synchro);
290 void SIMIX_comm_destroy_internal_actions(smx_synchro_t synchro)
292 if (synchro->comm.surf_comm){
293 synchro->comm.surf_comm->unref();
294 synchro->comm.surf_comm = NULL;
297 if (synchro->comm.src_timeout){
298 synchro->comm.src_timeout->unref();
299 synchro->comm.src_timeout = NULL;
302 if (synchro->comm.dst_timeout){
303 synchro->comm.dst_timeout->unref();
304 synchro->comm.dst_timeout = NULL;
308 void simcall_HANDLER_comm_send(smx_simcall_t simcall, smx_process_t src, smx_mailbox_t mbox,
309 double task_size, double rate,
310 void *src_buff, size_t src_buff_size,
311 int (*match_fun)(void *, void *,smx_synchro_t),
312 void (*copy_data_fun)(smx_synchro_t, void*, size_t),
313 void *data, double timeout){
314 smx_synchro_t comm = simcall_HANDLER_comm_isend(simcall, src, mbox, task_size, rate,
315 src_buff, src_buff_size, match_fun, NULL, copy_data_fun,
317 SIMCALL_SET_MC_VALUE(simcall, 0);
318 simcall_HANDLER_comm_wait(simcall, comm, timeout);
320 smx_synchro_t simcall_HANDLER_comm_isend(smx_simcall_t simcall, smx_process_t src_proc, smx_mailbox_t mbox,
321 double task_size, double rate,
322 void *src_buff, size_t src_buff_size,
323 int (*match_fun)(void *, void *,smx_synchro_t),
324 void (*clean_fun)(void *), // used to free the synchro in case of problem after a detached send
325 void (*copy_data_fun)(smx_synchro_t, void*, size_t),// used to copy data if not default one
326 void *data, int detached)
328 XBT_DEBUG("send from %p", mbox);
330 /* Prepare a synchro describing us, so that it gets passed to the user-provided filter of other side */
331 smx_synchro_t this_synchro = SIMIX_comm_new(SIMIX_COMM_SEND);
333 /* Look for communication synchro matching our needs. We also provide a description of
334 * ourself so that the other side also gets a chance of choosing if it wants to match with us.
336 * If it is not found then push our communication into the rendez-vous point */
337 smx_synchro_t other_synchro = SIMIX_fifo_get_comm(mbox->comm_fifo, SIMIX_COMM_RECEIVE, match_fun, data, this_synchro);
339 if (!other_synchro) {
340 other_synchro = this_synchro;
342 if (mbox->permanent_receiver!=NULL){
343 //this mailbox is for small messages, which have to be sent right now
344 other_synchro->state = SIMIX_READY;
345 other_synchro->comm.dst_proc=mbox->permanent_receiver;
346 other_synchro->comm.refcount++;
347 xbt_fifo_push(mbox->done_comm_fifo,other_synchro);
348 other_synchro->comm.mbox=mbox;
349 XBT_DEBUG("pushing a message into the permanent receive fifo %p, comm %p", mbox, &(other_synchro->comm));
352 SIMIX_mbox_push(mbox, this_synchro);
355 XBT_DEBUG("Receive already pushed");
357 SIMIX_comm_destroy(this_synchro);
359 other_synchro->state = SIMIX_READY;
360 other_synchro->comm.type = SIMIX_COMM_READY;
363 xbt_fifo_push(src_proc->comms, other_synchro);
365 /* if the communication synchro is detached then decrease the refcount
366 * by one, so it will be eliminated by the receiver's destroy call */
368 other_synchro->comm.detached = 1;
369 other_synchro->comm.refcount--;
370 other_synchro->comm.clean_fun = clean_fun;
372 other_synchro->comm.clean_fun = NULL;
375 /* Setup the communication synchro */
376 other_synchro->comm.src_proc = src_proc;
377 other_synchro->comm.task_size = task_size;
378 other_synchro->comm.rate = rate;
379 other_synchro->comm.src_buff = src_buff;
380 other_synchro->comm.src_buff_size = src_buff_size;
381 other_synchro->comm.src_data = data;
383 other_synchro->comm.match_fun = match_fun;
384 other_synchro->comm.copy_data_fun = copy_data_fun;
387 if (MC_is_active() || MC_record_replay_is_active()) {
388 other_synchro->state = SIMIX_RUNNING;
389 return (detached ? NULL : other_synchro);
392 SIMIX_comm_start(other_synchro);
393 return (detached ? NULL : other_synchro);
396 void simcall_HANDLER_comm_recv(smx_simcall_t simcall, smx_process_t receiver, smx_mailbox_t mbox,
397 void *dst_buff, size_t *dst_buff_size,
398 int (*match_fun)(void *, void *, smx_synchro_t),
399 void (*copy_data_fun)(smx_synchro_t, void*, size_t),
400 void *data, double timeout, double rate)
402 smx_synchro_t comm = SIMIX_comm_irecv(receiver, mbox, dst_buff,
403 dst_buff_size, match_fun, copy_data_fun, data, rate);
404 SIMCALL_SET_MC_VALUE(simcall, 0);
405 simcall_HANDLER_comm_wait(simcall, comm, timeout);
408 smx_synchro_t simcall_HANDLER_comm_irecv(smx_simcall_t simcall, smx_process_t receiver, smx_mailbox_t mbox,
409 void *dst_buff, size_t *dst_buff_size,
410 int (*match_fun)(void *, void *, smx_synchro_t),
411 void (*copy_data_fun)(smx_synchro_t, void*, size_t),
412 void *data, double rate)
414 return SIMIX_comm_irecv(receiver, mbox, dst_buff, dst_buff_size, match_fun, copy_data_fun, data, rate);
417 smx_synchro_t SIMIX_comm_irecv(smx_process_t dst_proc, smx_mailbox_t mbox, void *dst_buff, size_t *dst_buff_size,
418 int (*match_fun)(void *, void *, smx_synchro_t),
419 void (*copy_data_fun)(smx_synchro_t, void*, size_t), // used to copy data if not default one
420 void *data, double rate)
422 XBT_DEBUG("recv from %p %p", mbox, mbox->comm_fifo);
423 smx_synchro_t this_synchro = SIMIX_comm_new(SIMIX_COMM_RECEIVE);
425 smx_synchro_t other_synchro;
426 //communication already done, get it inside the fifo of completed comms
427 if (mbox->permanent_receiver && xbt_fifo_size(mbox->done_comm_fifo)!=0) {
429 XBT_DEBUG("We have a comm that has probably already been received, trying to match it, to skip the communication");
430 //find a match in the already received fifo
431 other_synchro = SIMIX_fifo_get_comm(mbox->done_comm_fifo, SIMIX_COMM_SEND, match_fun, data, this_synchro);
432 //if not found, assume the receiver came first, register it to the mailbox in the classical way
433 if (!other_synchro) {
434 XBT_DEBUG("We have messages in the permanent receive list, but not the one we are looking for, pushing request into fifo");
435 other_synchro = this_synchro;
436 SIMIX_mbox_push(mbox, this_synchro);
438 if(other_synchro->comm.surf_comm && SIMIX_comm_get_remains(other_synchro)==0.0) {
439 XBT_DEBUG("comm %p has been already sent, and is finished, destroy it",&(other_synchro->comm));
440 other_synchro->state = SIMIX_DONE;
441 other_synchro->comm.type = SIMIX_COMM_DONE;
442 other_synchro->comm.mbox = NULL;
444 other_synchro->comm.refcount--;
445 SIMIX_comm_destroy(this_synchro);
448 /* Prepare a synchro describing us, so that it gets passed to the user-provided filter of other side */
450 /* Look for communication synchro matching our needs. We also provide a description of
451 * ourself so that the other side also gets a chance of choosing if it wants to match with us.
453 * If it is not found then push our communication into the rendez-vous point */
454 other_synchro = SIMIX_fifo_get_comm(mbox->comm_fifo, SIMIX_COMM_SEND, match_fun, data, this_synchro);
456 if (!other_synchro) {
457 XBT_DEBUG("Receive pushed first %d", xbt_fifo_size(mbox->comm_fifo));
458 other_synchro = this_synchro;
459 SIMIX_mbox_push(mbox, this_synchro);
461 SIMIX_comm_destroy(this_synchro);
462 other_synchro->state = SIMIX_READY;
463 other_synchro->comm.type = SIMIX_COMM_READY;
464 //other_synchro->comm.refcount--;
466 xbt_fifo_push(dst_proc->comms, other_synchro);
469 /* Setup communication synchro */
470 other_synchro->comm.dst_proc = dst_proc;
471 other_synchro->comm.dst_buff = dst_buff;
472 other_synchro->comm.dst_buff_size = dst_buff_size;
473 other_synchro->comm.dst_data = data;
475 if (rate != -1.0 && (other_synchro->comm.rate == -1.0 || rate < other_synchro->comm.rate))
476 other_synchro->comm.rate = rate;
478 other_synchro->comm.match_fun = match_fun;
479 other_synchro->comm.copy_data_fun = copy_data_fun;
481 if (MC_is_active() || MC_record_replay_is_active()) {
482 other_synchro->state = SIMIX_RUNNING;
483 return other_synchro;
486 SIMIX_comm_start(other_synchro);
487 return other_synchro;
490 smx_synchro_t simcall_HANDLER_comm_iprobe(smx_simcall_t simcall, smx_mailbox_t mbox,
491 int type, int src, int tag,
492 int (*match_fun)(void *, void *, smx_synchro_t),
494 return SIMIX_comm_iprobe(simcall->issuer, mbox, type, src, tag, match_fun, data);
497 smx_synchro_t SIMIX_comm_iprobe(smx_process_t dst_proc, smx_mailbox_t mbox, int type, int src,
498 int tag, int (*match_fun)(void *, void *, smx_synchro_t), void *data)
500 XBT_DEBUG("iprobe from %p %p", mbox, mbox->comm_fifo);
501 smx_synchro_t this_synchro;
504 this_synchro=SIMIX_comm_new(SIMIX_COMM_SEND);
505 smx_type = SIMIX_COMM_RECEIVE;
507 this_synchro=SIMIX_comm_new(SIMIX_COMM_RECEIVE);
508 smx_type = SIMIX_COMM_SEND;
510 smx_synchro_t other_synchro=NULL;
511 if(mbox->permanent_receiver && xbt_fifo_size(mbox->done_comm_fifo)!=0){
512 //find a match in the already received fifo
513 XBT_DEBUG("first try in the perm recv mailbox");
515 other_synchro = SIMIX_fifo_probe_comm(
516 mbox->done_comm_fifo, (e_smx_comm_type_t) smx_type,
517 match_fun, data, this_synchro);
521 XBT_DEBUG("try in the normal mailbox");
522 other_synchro = SIMIX_fifo_probe_comm(
523 mbox->comm_fifo, (e_smx_comm_type_t) smx_type,
524 match_fun, data, this_synchro);
527 if(other_synchro)other_synchro->comm.refcount--;
529 SIMIX_comm_destroy(this_synchro);
530 return other_synchro;
533 void simcall_HANDLER_comm_wait(smx_simcall_t simcall, smx_synchro_t synchro, double timeout)
535 /* the simcall may be a wait, a send or a recv */
538 /* Associate this simcall to the wait synchro */
539 XBT_DEBUG("simcall_HANDLER_comm_wait, %p", synchro);
541 xbt_fifo_push(synchro->simcalls, simcall);
542 simcall->issuer->waiting_synchro = synchro;
544 if (MC_is_active() || MC_record_replay_is_active()) {
545 int idx = SIMCALL_GET_MC_VALUE(simcall);
547 synchro->state = SIMIX_DONE;
549 /* If we reached this point, the wait simcall must have a timeout */
550 /* Otherwise it shouldn't be enabled and executed by the MC */
554 if (synchro->comm.src_proc == simcall->issuer)
555 synchro->state = SIMIX_SRC_TIMEOUT;
557 synchro->state = SIMIX_DST_TIMEOUT;
560 SIMIX_comm_finish(synchro);
564 /* If the synchro has already finish perform the error handling, */
565 /* otherwise set up a waiting timeout on the right side */
566 if (synchro->state != SIMIX_WAITING && synchro->state != SIMIX_RUNNING) {
567 SIMIX_comm_finish(synchro);
568 } else { /* if (timeout >= 0) { we need a surf sleep action even when there is no timeout, otherwise surf won't tell us when the host fails */
569 sleep = surf_host_sleep(simcall->issuer->host, timeout);
570 sleep->setData(synchro);
572 if (simcall->issuer == synchro->comm.src_proc)
573 synchro->comm.src_timeout = sleep;
575 synchro->comm.dst_timeout = sleep;
579 void simcall_HANDLER_comm_test(smx_simcall_t simcall, smx_synchro_t synchro)
581 if(MC_is_active() || MC_record_replay_is_active()){
582 simcall_comm_test__set__result(simcall, synchro->comm.src_proc && synchro->comm.dst_proc);
583 if(simcall_comm_test__get__result(simcall)){
584 synchro->state = SIMIX_DONE;
585 xbt_fifo_push(synchro->simcalls, simcall);
586 SIMIX_comm_finish(synchro);
588 SIMIX_simcall_answer(simcall);
593 simcall_comm_test__set__result(simcall, (synchro->state != SIMIX_WAITING && synchro->state != SIMIX_RUNNING));
594 if (simcall_comm_test__get__result(simcall)) {
595 xbt_fifo_push(synchro->simcalls, simcall);
596 SIMIX_comm_finish(synchro);
598 SIMIX_simcall_answer(simcall);
602 void simcall_HANDLER_comm_testany(smx_simcall_t simcall, xbt_dynar_t synchros)
605 smx_synchro_t synchro;
606 simcall_comm_testany__set__result(simcall, -1);
608 if (MC_is_active() || MC_record_replay_is_active()){
609 int idx = SIMCALL_GET_MC_VALUE(simcall);
611 SIMIX_simcall_answer(simcall);
613 synchro = xbt_dynar_get_as(synchros, idx, smx_synchro_t);
614 simcall_comm_testany__set__result(simcall, idx);
615 xbt_fifo_push(synchro->simcalls, simcall);
616 synchro->state = SIMIX_DONE;
617 SIMIX_comm_finish(synchro);
622 xbt_dynar_foreach(simcall_comm_testany__get__comms(simcall), cursor,synchro) {
623 if (synchro->state != SIMIX_WAITING && synchro->state != SIMIX_RUNNING) {
624 simcall_comm_testany__set__result(simcall, cursor);
625 xbt_fifo_push(synchro->simcalls, simcall);
626 SIMIX_comm_finish(synchro);
630 SIMIX_simcall_answer(simcall);
633 void simcall_HANDLER_comm_waitany(smx_simcall_t simcall, xbt_dynar_t synchros)
635 smx_synchro_t synchro;
636 unsigned int cursor = 0;
638 if (MC_is_active() || MC_record_replay_is_active()){
639 int idx = SIMCALL_GET_MC_VALUE(simcall);
640 synchro = xbt_dynar_get_as(synchros, idx, smx_synchro_t);
641 xbt_fifo_push(synchro->simcalls, simcall);
642 simcall_comm_waitany__set__result(simcall, idx);
643 synchro->state = SIMIX_DONE;
644 SIMIX_comm_finish(synchro);
648 xbt_dynar_foreach(synchros, cursor, synchro){
649 /* associate this simcall to the the synchro */
650 xbt_fifo_push(synchro->simcalls, simcall);
652 /* see if the synchro is already finished */
653 if (synchro->state != SIMIX_WAITING && synchro->state != SIMIX_RUNNING){
654 SIMIX_comm_finish(synchro);
660 void SIMIX_waitany_remove_simcall_from_actions(smx_simcall_t simcall)
662 smx_synchro_t synchro;
663 unsigned int cursor = 0;
664 xbt_dynar_t synchros = simcall_comm_waitany__get__comms(simcall);
666 xbt_dynar_foreach(synchros, cursor, synchro) {
667 xbt_fifo_remove(synchro->simcalls, simcall);
672 * \brief Starts the simulation of a communication synchro.
673 * \param synchro the communication synchro
675 static inline void SIMIX_comm_start(smx_synchro_t synchro)
677 /* If both the sender and the receiver are already there, start the communication */
678 if (synchro->state == SIMIX_READY) {
680 sg_host_t sender = synchro->comm.src_proc->host;
681 sg_host_t receiver = synchro->comm.dst_proc->host;
683 XBT_DEBUG("Starting communication %p from '%s' to '%s'", synchro,
684 sg_host_get_name(sender), sg_host_get_name(receiver));
686 synchro->comm.surf_comm = surf_network_model_communicate(surf_network_model,
688 synchro->comm.task_size, synchro->comm.rate);
690 synchro->comm.surf_comm->setData(synchro);
692 synchro->state = SIMIX_RUNNING;
694 /* If a link is failed, detect it immediately */
695 if (synchro->comm.surf_comm->getState() == simgrid::surf::Action::State::failed) {
696 XBT_DEBUG("Communication from '%s' to '%s' failed to start because of a link failure",
697 sg_host_get_name(sender), sg_host_get_name(receiver));
698 synchro->state = SIMIX_LINK_FAILURE;
699 SIMIX_comm_destroy_internal_actions(synchro);
702 /* If any of the process is suspend, create the synchro but stop its execution,
703 it will be restarted when the sender process resume */
704 if (SIMIX_process_is_suspended(synchro->comm.src_proc) ||
705 SIMIX_process_is_suspended(synchro->comm.dst_proc)) {
706 /* FIXME: check what should happen with the synchro state */
708 if (SIMIX_process_is_suspended(synchro->comm.src_proc))
709 XBT_DEBUG("The communication is suspended on startup because src (%s:%s) were suspended since it initiated the communication",
710 sg_host_get_name(synchro->comm.src_proc->host), synchro->comm.src_proc->name);
712 XBT_DEBUG("The communication is suspended on startup because dst (%s:%s) were suspended since it initiated the communication",
713 sg_host_get_name(synchro->comm.dst_proc->host), synchro->comm.dst_proc->name);
715 synchro->comm.surf_comm->suspend();
722 * \brief Answers the SIMIX simcalls associated to a communication synchro.
723 * \param synchro a finished communication synchro
725 void SIMIX_comm_finish(smx_synchro_t synchro)
727 unsigned int destroy_count = 0;
728 smx_simcall_t simcall;
730 while ((simcall = (smx_simcall_t) xbt_fifo_shift(synchro->simcalls))) {
732 /* If a waitany simcall is waiting for this synchro to finish, then remove
733 it from the other synchros in the waitany list. Afterwards, get the
734 position of the actual synchro in the waitany dynar and
735 return it as the result of the simcall */
737 if (simcall->call == SIMCALL_NONE) //FIXME: maybe a better way to handle this case
738 continue; // if process handling comm is killed
739 if (simcall->call == SIMCALL_COMM_WAITANY) {
740 SIMIX_waitany_remove_simcall_from_actions(simcall);
741 if (!MC_is_active() && !MC_record_replay_is_active())
742 simcall_comm_waitany__set__result(simcall, xbt_dynar_search(simcall_comm_waitany__get__comms(simcall), &synchro));
745 /* If the synchro is still in a rendez-vous point then remove from it */
746 if (synchro->comm.mbox)
747 SIMIX_mbox_remove(synchro->comm.mbox, synchro);
749 XBT_DEBUG("SIMIX_comm_finish: synchro state = %d", (int)synchro->state);
751 /* Check out for errors */
753 if (simcall->issuer->host->isOff()) {
754 simcall->issuer->context->iwannadie = 1;
755 SMX_EXCEPTION(simcall->issuer, host_error, 0, "Host failed");
758 switch (synchro->state) {
761 XBT_DEBUG("Communication %p complete!", synchro);
762 SIMIX_comm_copy_data(synchro);
765 case SIMIX_SRC_TIMEOUT:
766 SMX_EXCEPTION(simcall->issuer, timeout_error, 0,
767 "Communication timeouted because of sender");
770 case SIMIX_DST_TIMEOUT:
771 SMX_EXCEPTION(simcall->issuer, timeout_error, 0,
772 "Communication timeouted because of receiver");
775 case SIMIX_SRC_HOST_FAILURE:
776 if (simcall->issuer == synchro->comm.src_proc)
777 simcall->issuer->context->iwannadie = 1;
778 // SMX_EXCEPTION(simcall->issuer, host_error, 0, "Host failed");
780 SMX_EXCEPTION(simcall->issuer, network_error, 0, "Remote peer failed");
783 case SIMIX_DST_HOST_FAILURE:
784 if (simcall->issuer == synchro->comm.dst_proc)
785 simcall->issuer->context->iwannadie = 1;
786 // SMX_EXCEPTION(simcall->issuer, host_error, 0, "Host failed");
788 SMX_EXCEPTION(simcall->issuer, network_error, 0, "Remote peer failed");
791 case SIMIX_LINK_FAILURE:
793 XBT_DEBUG("Link failure in synchro %p between '%s' and '%s': posting an exception to the issuer: %s (%p) detached:%d",
795 synchro->comm.src_proc ? sg_host_get_name(synchro->comm.src_proc->host) : NULL,
796 synchro->comm.dst_proc ? sg_host_get_name(synchro->comm.dst_proc->host) : NULL,
797 simcall->issuer->name, simcall->issuer, synchro->comm.detached);
798 if (synchro->comm.src_proc == simcall->issuer) {
799 XBT_DEBUG("I'm source");
800 } else if (synchro->comm.dst_proc == simcall->issuer) {
801 XBT_DEBUG("I'm dest");
803 XBT_DEBUG("I'm neither source nor dest");
805 SMX_EXCEPTION(simcall->issuer, network_error, 0, "Link failure");
809 if (simcall->issuer == synchro->comm.dst_proc)
810 SMX_EXCEPTION(simcall->issuer, cancel_error, 0,
811 "Communication canceled by the sender");
813 SMX_EXCEPTION(simcall->issuer, cancel_error, 0,
814 "Communication canceled by the receiver");
818 xbt_die("Unexpected synchro state in SIMIX_comm_finish: %d", (int)synchro->state);
821 /* if there is an exception during a waitany or a testany, indicate the position of the failed communication */
822 if (simcall->issuer->doexception) {
823 if (simcall->call == SIMCALL_COMM_WAITANY) {
824 simcall->issuer->running_ctx->exception.value = xbt_dynar_search(simcall_comm_waitany__get__comms(simcall), &synchro);
826 else if (simcall->call == SIMCALL_COMM_TESTANY) {
827 simcall->issuer->running_ctx->exception.value = xbt_dynar_search(simcall_comm_testany__get__comms(simcall), &synchro);
831 if (simcall->issuer->host->isOff()) {
832 simcall->issuer->context->iwannadie = 1;
835 simcall->issuer->waiting_synchro = NULL;
836 xbt_fifo_remove(simcall->issuer->comms, synchro);
837 if(synchro->comm.detached){
838 if(simcall->issuer == synchro->comm.src_proc){
839 if(synchro->comm.dst_proc)
840 xbt_fifo_remove(synchro->comm.dst_proc->comms, synchro);
842 if(simcall->issuer == synchro->comm.dst_proc){
843 if(synchro->comm.src_proc)
844 xbt_fifo_remove(synchro->comm.src_proc->comms, synchro);
847 SIMIX_simcall_answer(simcall);
851 while (destroy_count-- > 0)
852 SIMIX_comm_destroy(synchro);
856 * \brief This function is called when a Surf communication synchro is finished.
857 * \param synchro the corresponding Simix communication
859 void SIMIX_post_comm(smx_synchro_t synchro)
861 /* Update synchro state */
862 if (synchro->comm.src_timeout &&
863 synchro->comm.src_timeout->getState() == simgrid::surf::Action::State::done)
864 synchro->state = SIMIX_SRC_TIMEOUT;
865 else if (synchro->comm.dst_timeout &&
866 synchro->comm.dst_timeout->getState() == simgrid::surf::Action::State::done)
867 synchro->state = SIMIX_DST_TIMEOUT;
868 else if (synchro->comm.src_timeout &&
869 synchro->comm.src_timeout->getState() == simgrid::surf::Action::State::failed)
870 synchro->state = SIMIX_SRC_HOST_FAILURE;
871 else if (synchro->comm.dst_timeout &&
872 synchro->comm.dst_timeout->getState() == simgrid::surf::Action::State::failed)
873 synchro->state = SIMIX_DST_HOST_FAILURE;
874 else if (synchro->comm.surf_comm &&
875 synchro->comm.surf_comm->getState() == simgrid::surf::Action::State::failed) {
876 XBT_DEBUG("Puta madre. Surf says that the link broke");
877 synchro->state = SIMIX_LINK_FAILURE;
879 synchro->state = SIMIX_DONE;
881 XBT_DEBUG("SIMIX_post_comm: comm %p, state %d, src_proc %p, dst_proc %p, detached: %d",
882 synchro, (int)synchro->state, synchro->comm.src_proc, synchro->comm.dst_proc, synchro->comm.detached);
884 /* destroy the surf actions associated with the Simix communication */
885 SIMIX_comm_destroy_internal_actions(synchro);
887 /* if there are simcalls associated with the synchro, then answer them */
888 if (xbt_fifo_size(synchro->simcalls)) {
889 SIMIX_comm_finish(synchro);
893 void SIMIX_comm_cancel(smx_synchro_t synchro)
895 /* if the synchro is a waiting state means that it is still in a mbox */
896 /* so remove from it and delete it */
897 if (synchro->state == SIMIX_WAITING) {
898 SIMIX_mbox_remove(synchro->comm.mbox, synchro);
899 synchro->state = SIMIX_CANCELED;
901 else if (!MC_is_active() /* when running the MC there are no surf actions */
902 && !MC_record_replay_is_active()
903 && (synchro->state == SIMIX_READY || synchro->state == SIMIX_RUNNING)) {
905 synchro->comm.surf_comm->cancel();
909 void SIMIX_comm_suspend(smx_synchro_t synchro)
911 /*FIXME: shall we suspend also the timeout synchro? */
912 if (synchro->comm.surf_comm)
913 synchro->comm.surf_comm->suspend();
914 /* in the other case, the action will be suspended on creation, in SIMIX_comm_start() */
917 void SIMIX_comm_resume(smx_synchro_t synchro)
919 /*FIXME: check what happen with the timeouts */
920 if (synchro->comm.surf_comm)
921 synchro->comm.surf_comm->resume();
922 /* in the other case, the synchro were not really suspended yet, see SIMIX_comm_suspend() and SIMIX_comm_start() */
926 /************* synchro Getters **************/
929 * \brief get the amount remaining from the communication
930 * \param synchro The communication
932 double SIMIX_comm_get_remains(smx_synchro_t synchro)
940 switch (synchro->state) {
943 remains = synchro->comm.surf_comm->getRemains();
948 remains = 0; /*FIXME: check what should be returned */
952 remains = 0; /*FIXME: is this correct? */
958 e_smx_state_t SIMIX_comm_get_state(smx_synchro_t synchro)
960 return synchro->state;
964 * \brief Return the user data associated to the sender of the communication
965 * \param synchro The communication
966 * \return the user data
968 void* SIMIX_comm_get_src_data(smx_synchro_t synchro)
970 return synchro->comm.src_data;
974 * \brief Return the user data associated to the receiver of the communication
975 * \param synchro The communication
976 * \return the user data
978 void* SIMIX_comm_get_dst_data(smx_synchro_t synchro)
980 return synchro->comm.dst_data;
983 smx_process_t SIMIX_comm_get_src_proc(smx_synchro_t synchro)
985 return synchro->comm.src_proc;
988 smx_process_t SIMIX_comm_get_dst_proc(smx_synchro_t synchro)
990 return synchro->comm.dst_proc;
993 /******************************************************************************/
994 /* SIMIX_comm_copy_data callbacks */
995 /******************************************************************************/
996 static void (*SIMIX_comm_copy_data_callback) (smx_synchro_t, void*, size_t) =
997 &SIMIX_comm_copy_pointer_callback;
1000 SIMIX_comm_set_copy_data_callback(void (*callback) (smx_synchro_t, void*, size_t))
1002 SIMIX_comm_copy_data_callback = callback;
1005 void SIMIX_comm_copy_pointer_callback(smx_synchro_t comm, void* buff, size_t buff_size)
1007 xbt_assert((buff_size == sizeof(void *)),
1008 "Cannot copy %zu bytes: must be sizeof(void*)", buff_size);
1009 *(void **) (comm->comm.dst_buff) = buff;
1012 void SIMIX_comm_copy_buffer_callback(smx_synchro_t comm, void* buff, size_t buff_size)
1014 XBT_DEBUG("Copy the data over");
1015 memcpy(comm->comm.dst_buff, buff, buff_size);
1016 if (comm->comm.detached) { // if this is a detached send, the source buffer was duplicated by SMPI sender to make the original buffer available to the application ASAP
1018 comm->comm.src_buff = NULL;
1024 * \brief Copy the communication data from the sender's buffer to the receiver's one
1025 * \param comm The communication
1027 void SIMIX_comm_copy_data(smx_synchro_t comm)
1029 size_t buff_size = comm->comm.src_buff_size;
1030 /* If there is no data to be copy then return */
1031 if (!comm->comm.src_buff || !comm->comm.dst_buff || comm->comm.copied)
1034 XBT_DEBUG("Copying comm %p data from %s (%p) -> %s (%p) (%zu bytes)",
1036 comm->comm.src_proc ? sg_host_get_name(comm->comm.src_proc->host) : "a finished process",
1037 comm->comm.src_buff,
1038 comm->comm.dst_proc ? sg_host_get_name(comm->comm.dst_proc->host) : "a finished process",
1039 comm->comm.dst_buff, buff_size);
1041 /* Copy at most dst_buff_size bytes of the message to receiver's buffer */
1042 if (comm->comm.dst_buff_size)
1043 buff_size = MIN(buff_size, *(comm->comm.dst_buff_size));
1045 /* Update the receiver's buffer size to the copied amount */
1046 if (comm->comm.dst_buff_size)
1047 *comm->comm.dst_buff_size = buff_size;
1050 if(comm->comm.copy_data_fun)
1051 comm->comm.copy_data_fun (comm, comm->comm.src_buff, buff_size);
1053 SIMIX_comm_copy_data_callback (comm, comm->comm.src_buff, buff_size);
1057 /* Set the copied flag so we copy data only once */
1058 /* (this function might be called from both communication ends) */
1059 comm->comm.copied = 1;