1 /* Copyright (c) 2009-2015. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
7 #include "src/surf/surf_interface.hpp"
8 #include "src/simix/smx_private.h"
11 #include "src/mc/mc_replay.h"
14 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(simix_network, simix, "SIMIX network-related synchronization");
16 static xbt_dict_t mailboxes = NULL;
17 XBT_EXPORT_NO_IMPORT(unsigned long int) smx_total_comms = 0;
19 static void SIMIX_waitany_remove_simcall_from_actions(smx_simcall_t simcall);
20 static void SIMIX_comm_copy_data(smx_synchro_t comm);
21 static smx_synchro_t SIMIX_comm_new(e_smx_comm_type_t type);
22 static inline void SIMIX_mbox_push(smx_mailbox_t mbox, smx_synchro_t comm);
23 static smx_synchro_t SIMIX_fifo_probe_comm(xbt_fifo_t fifo, e_smx_comm_type_t type,
24 int (*match_fun)(void *, void *,smx_synchro_t),
25 void *user_data, smx_synchro_t my_synchro);
26 static smx_synchro_t SIMIX_fifo_get_comm(xbt_fifo_t fifo, e_smx_comm_type_t type,
27 int (*match_fun)(void *, void *,smx_synchro_t),
28 void *user_data, smx_synchro_t my_synchro);
29 static void SIMIX_mbox_free(void *data);
30 static void SIMIX_comm_start(smx_synchro_t synchro);
32 void SIMIX_network_init(void)
34 mailboxes = xbt_dict_new_homogeneous(SIMIX_mbox_free);
37 void SIMIX_network_exit(void)
39 xbt_dict_free(&mailboxes);
42 /******************************************************************************/
43 /* Rendez-Vous Points */
44 /******************************************************************************/
46 smx_mailbox_t SIMIX_mbox_create(const char *name)
48 /* two processes may have pushed the same mbox_create simcall at the same time */
49 smx_mailbox_t mbox = name ? (smx_mailbox_t) xbt_dict_get_or_null(mailboxes, name) : NULL;
52 mbox = xbt_new0(s_smx_mailbox_t, 1);
53 mbox->name = name ? xbt_strdup(name) : NULL;
54 mbox->comm_fifo = xbt_fifo_new();
55 mbox->done_comm_fifo = xbt_fifo_new();
56 mbox->permanent_receiver=NULL;
58 XBT_DEBUG("Creating a mailbox at %p with name %s", mbox, name);
61 xbt_dict_set(mailboxes, mbox->name, mbox, NULL);
66 void SIMIX_mbox_destroy(smx_mailbox_t mbox)
69 xbt_dict_remove(mailboxes, mbox->name);
72 void SIMIX_mbox_free(void *data)
74 XBT_DEBUG("mbox free %p", data);
75 smx_mailbox_t mbox = (smx_mailbox_t) data;
77 xbt_fifo_free(mbox->comm_fifo);
78 xbt_fifo_free(mbox->done_comm_fifo);
83 smx_mailbox_t SIMIX_mbox_get_by_name(const char *name)
85 return (smx_mailbox_t) xbt_dict_get_or_null(mailboxes, name);
88 int SIMIX_mbox_comm_count_by_host(smx_mailbox_t mbox, sg_host_t host)
90 smx_synchro_t comm = NULL;
91 xbt_fifo_item_t item = NULL;
94 xbt_fifo_foreach(mbox->comm_fifo, item, comm, smx_synchro_t) {
95 if (comm->comm.src_proc->host == host)
102 smx_synchro_t SIMIX_mbox_get_head(smx_mailbox_t mbox)
104 return (smx_synchro_t) xbt_fifo_get_item_content(
105 xbt_fifo_get_first_item(mbox->comm_fifo));
109 * \brief get the receiver (process associated to the mailbox)
110 * \param mbox The rendez-vous point
111 * \return process The receiving process (NULL if not set)
113 smx_process_t SIMIX_mbox_get_receiver(smx_mailbox_t mbox)
115 return mbox->permanent_receiver;
119 * \brief set the receiver of the rendez vous point to allow eager sends
120 * \param mbox The rendez-vous point
121 * \param process The receiving process
123 void SIMIX_mbox_set_receiver(smx_mailbox_t mbox, smx_process_t process)
125 mbox->permanent_receiver=process;
129 * \brief Pushes a communication synchro into a rendez-vous point
130 * \param mbox The mailbox
131 * \param comm The communication synchro
133 static inline void SIMIX_mbox_push(smx_mailbox_t mbox, smx_synchro_t comm)
135 xbt_fifo_push(mbox->comm_fifo, comm);
136 comm->comm.mbox = mbox;
140 * \brief Removes a communication synchro from a rendez-vous point
141 * \param mbox The rendez-vous point
142 * \param comm The communication synchro
144 void SIMIX_mbox_remove(smx_mailbox_t mbox, smx_synchro_t comm)
146 xbt_fifo_remove(mbox->comm_fifo, comm);
147 comm->comm.mbox = NULL;
151 * \brief Checks if there is a communication synchro queued in a fifo matching our needs
152 * \param type The type of communication we are looking for (comm_send, comm_recv)
153 * \return The communication synchro if found, NULL otherwise
155 smx_synchro_t SIMIX_fifo_get_comm(xbt_fifo_t fifo, e_smx_comm_type_t type,
156 int (*match_fun)(void *, void *,smx_synchro_t),
157 void *this_user_data, smx_synchro_t my_synchro)
159 smx_synchro_t synchro;
160 xbt_fifo_item_t item;
161 void* other_user_data = NULL;
163 xbt_fifo_foreach(fifo, item, synchro, smx_synchro_t) {
164 if (synchro->comm.type == SIMIX_COMM_SEND) {
165 other_user_data = synchro->comm.src_data;
166 } else if (synchro->comm.type == SIMIX_COMM_RECEIVE) {
167 other_user_data = synchro->comm.dst_data;
169 if (synchro->comm.type == type &&
170 (!match_fun || match_fun(this_user_data, other_user_data, synchro)) &&
171 (!synchro->comm.match_fun || synchro->comm.match_fun(other_user_data, this_user_data, my_synchro))) {
172 XBT_DEBUG("Found a matching communication synchro %p", synchro);
173 xbt_fifo_remove_item(fifo, item);
174 xbt_fifo_free_item(item);
175 synchro->comm.refcount++;
177 synchro->comm.mbox_cpy = synchro->comm.mbox;
179 synchro->comm.mbox = NULL;
182 XBT_DEBUG("Sorry, communication synchro %p does not match our needs:"
183 " its type is %d but we are looking for a comm of type %d (or maybe the filtering didn't match)",
184 synchro, (int)synchro->comm.type, (int)type);
186 XBT_DEBUG("No matching communication synchro found");
192 * \brief Checks if there is a communication synchro queued in a fifo matching our needs, but leave it there
193 * \param type The type of communication we are looking for (comm_send, comm_recv)
194 * \return The communication synchro if found, NULL otherwise
196 smx_synchro_t SIMIX_fifo_probe_comm(xbt_fifo_t fifo, e_smx_comm_type_t type,
197 int (*match_fun)(void *, void *,smx_synchro_t),
198 void *this_user_data, smx_synchro_t my_synchro)
200 smx_synchro_t synchro;
201 xbt_fifo_item_t item;
202 void* other_user_data = NULL;
204 xbt_fifo_foreach(fifo, item, synchro, smx_synchro_t) {
205 if (synchro->comm.type == SIMIX_COMM_SEND) {
206 other_user_data = synchro->comm.src_data;
207 } else if (synchro->comm.type == SIMIX_COMM_RECEIVE) {
208 other_user_data = synchro->comm.dst_data;
210 if (synchro->comm.type == type &&
211 (!match_fun || match_fun(this_user_data, other_user_data, synchro)) &&
212 (!synchro->comm.match_fun || synchro->comm.match_fun(other_user_data, this_user_data, my_synchro))) {
213 XBT_DEBUG("Found a matching communication synchro %p", synchro);
214 synchro->comm.refcount++;
218 XBT_DEBUG("Sorry, communication synchro %p does not match our needs:"
219 " its type is %d but we are looking for a comm of type %d (or maybe the filtering didn't match)",
220 synchro, (int)synchro->comm.type, (int)type);
222 XBT_DEBUG("No matching communication synchro found");
225 /******************************************************************************/
226 /* Communication synchros */
227 /******************************************************************************/
230 * \brief Creates a new communicate synchro
231 * \param type The direction of communication (comm_send, comm_recv)
232 * \return The new communicate synchro
234 smx_synchro_t SIMIX_comm_new(e_smx_comm_type_t type)
236 smx_synchro_t synchro;
238 /* alloc structures */
239 synchro = (smx_synchro_t) xbt_mallocator_get(simix_global->synchro_mallocator);
241 synchro->type = SIMIX_SYNC_COMMUNICATE;
242 synchro->state = SIMIX_WAITING;
244 /* set communication */
245 synchro->comm.type = type;
246 synchro->comm.refcount = 1;
247 synchro->comm.src_data=NULL;
248 synchro->comm.dst_data=NULL;
250 synchro->category = NULL;
252 XBT_DEBUG("Create communicate synchro %p", synchro);
259 * \brief Destroy a communicate synchro
260 * \param synchro The communicate synchro to be destroyed
262 void SIMIX_comm_destroy(smx_synchro_t synchro)
264 XBT_DEBUG("Destroy synchro %p (refcount: %d), state: %d",
265 synchro, synchro->comm.refcount, (int)synchro->state);
267 if (synchro->comm.refcount <= 0) {
268 xbt_backtrace_display_current();
269 xbt_die("The refcount of comm %p is already 0 before decreasing it. "
270 "That's a bug! If you didn't test and/or wait the same communication twice in your code, then the bug is SimGrid's...", synchro);
272 synchro->comm.refcount--;
273 if (synchro->comm.refcount > 0)
275 XBT_DEBUG("Really free communication %p; refcount is now %d", synchro,
276 synchro->comm.refcount);
278 xbt_free(synchro->name);
279 SIMIX_comm_destroy_internal_actions(synchro);
281 if (synchro->comm.detached && synchro->state != SIMIX_DONE) {
282 /* the communication has failed and was detached:
283 * we have to free the buffer */
284 if (synchro->comm.clean_fun) {
285 synchro->comm.clean_fun(synchro->comm.src_buff);
287 synchro->comm.src_buff = NULL;
290 if(synchro->comm.mbox)
291 SIMIX_mbox_remove(synchro->comm.mbox, synchro);
293 xbt_mallocator_release(simix_global->synchro_mallocator, synchro);
296 void SIMIX_comm_destroy_internal_actions(smx_synchro_t synchro)
298 if (synchro->comm.surf_comm){
299 synchro->comm.surf_comm->unref();
300 synchro->comm.surf_comm = NULL;
303 if (synchro->comm.src_timeout){
304 synchro->comm.src_timeout->unref();
305 synchro->comm.src_timeout = NULL;
308 if (synchro->comm.dst_timeout){
309 synchro->comm.dst_timeout->unref();
310 synchro->comm.dst_timeout = NULL;
314 void simcall_HANDLER_comm_send(smx_simcall_t simcall, smx_process_t src, smx_mailbox_t mbox,
315 double task_size, double rate,
316 void *src_buff, size_t src_buff_size,
317 int (*match_fun)(void *, void *,smx_synchro_t),
318 void (*copy_data_fun)(smx_synchro_t, void*, size_t),
319 void *data, double timeout){
320 smx_synchro_t comm = simcall_HANDLER_comm_isend(simcall, src, mbox, task_size, rate,
321 src_buff, src_buff_size, match_fun, NULL, copy_data_fun,
323 SIMCALL_SET_MC_VALUE(simcall, 0);
324 simcall_HANDLER_comm_wait(simcall, comm, timeout);
326 smx_synchro_t simcall_HANDLER_comm_isend(smx_simcall_t simcall, smx_process_t src_proc, smx_mailbox_t mbox,
327 double task_size, double rate,
328 void *src_buff, size_t src_buff_size,
329 int (*match_fun)(void *, void *,smx_synchro_t),
330 void (*clean_fun)(void *), // used to free the synchro in case of problem after a detached send
331 void (*copy_data_fun)(smx_synchro_t, void*, size_t),// used to copy data if not default one
332 void *data, int detached)
334 XBT_DEBUG("send from %p", mbox);
336 /* Prepare a synchro describing us, so that it gets passed to the user-provided filter of other side */
337 smx_synchro_t this_synchro = SIMIX_comm_new(SIMIX_COMM_SEND);
339 /* Look for communication synchro matching our needs. We also provide a description of
340 * ourself so that the other side also gets a chance of choosing if it wants to match with us.
342 * If it is not found then push our communication into the rendez-vous point */
343 smx_synchro_t other_synchro = SIMIX_fifo_get_comm(mbox->comm_fifo, SIMIX_COMM_RECEIVE, match_fun, data, this_synchro);
345 if (!other_synchro) {
346 other_synchro = this_synchro;
348 if (mbox->permanent_receiver!=NULL){
349 //this mailbox is for small messages, which have to be sent right now
350 other_synchro->state = SIMIX_READY;
351 other_synchro->comm.dst_proc=mbox->permanent_receiver;
352 other_synchro->comm.refcount++;
353 xbt_fifo_push(mbox->done_comm_fifo,other_synchro);
354 other_synchro->comm.mbox=mbox;
355 XBT_DEBUG("pushing a message into the permanent receive fifo %p, comm %p", mbox, &(other_synchro->comm));
358 SIMIX_mbox_push(mbox, this_synchro);
361 XBT_DEBUG("Receive already pushed");
363 SIMIX_comm_destroy(this_synchro);
364 --smx_total_comms; // this creation was a pure waste
366 other_synchro->state = SIMIX_READY;
367 other_synchro->comm.type = SIMIX_COMM_READY;
370 xbt_fifo_push(src_proc->comms, other_synchro);
372 /* if the communication synchro is detached then decrease the refcount
373 * by one, so it will be eliminated by the receiver's destroy call */
375 other_synchro->comm.detached = 1;
376 other_synchro->comm.refcount--;
377 other_synchro->comm.clean_fun = clean_fun;
379 other_synchro->comm.clean_fun = NULL;
382 /* Setup the communication synchro */
383 other_synchro->comm.src_proc = src_proc;
384 other_synchro->comm.task_size = task_size;
385 other_synchro->comm.rate = rate;
386 other_synchro->comm.src_buff = src_buff;
387 other_synchro->comm.src_buff_size = src_buff_size;
388 other_synchro->comm.src_data = data;
390 other_synchro->comm.match_fun = match_fun;
391 other_synchro->comm.copy_data_fun = copy_data_fun;
394 if (MC_is_active() || MC_record_replay_is_active()) {
395 other_synchro->state = SIMIX_RUNNING;
396 return (detached ? NULL : other_synchro);
399 SIMIX_comm_start(other_synchro);
400 return (detached ? NULL : other_synchro);
403 void simcall_HANDLER_comm_recv(smx_simcall_t simcall, smx_process_t receiver, smx_mailbox_t mbox,
404 void *dst_buff, size_t *dst_buff_size,
405 int (*match_fun)(void *, void *, smx_synchro_t),
406 void (*copy_data_fun)(smx_synchro_t, void*, size_t),
407 void *data, double timeout, double rate)
409 smx_synchro_t comm = SIMIX_comm_irecv(receiver, mbox, dst_buff,
410 dst_buff_size, match_fun, copy_data_fun, data, rate);
411 SIMCALL_SET_MC_VALUE(simcall, 0);
412 simcall_HANDLER_comm_wait(simcall, comm, timeout);
415 smx_synchro_t simcall_HANDLER_comm_irecv(smx_simcall_t simcall, smx_process_t receiver, smx_mailbox_t mbox,
416 void *dst_buff, size_t *dst_buff_size,
417 int (*match_fun)(void *, void *, smx_synchro_t),
418 void (*copy_data_fun)(smx_synchro_t, void*, size_t),
419 void *data, double rate)
421 return SIMIX_comm_irecv(receiver, mbox, dst_buff, dst_buff_size, match_fun, copy_data_fun, data, rate);
424 smx_synchro_t SIMIX_comm_irecv(smx_process_t dst_proc, smx_mailbox_t mbox, void *dst_buff, size_t *dst_buff_size,
425 int (*match_fun)(void *, void *, smx_synchro_t),
426 void (*copy_data_fun)(smx_synchro_t, void*, size_t), // used to copy data if not default one
427 void *data, double rate)
429 XBT_DEBUG("recv from %p %p", mbox, mbox->comm_fifo);
430 smx_synchro_t this_synchro = SIMIX_comm_new(SIMIX_COMM_RECEIVE);
432 smx_synchro_t other_synchro;
433 //communication already done, get it inside the fifo of completed comms
434 if (mbox->permanent_receiver && xbt_fifo_size(mbox->done_comm_fifo)!=0) {
436 XBT_DEBUG("We have a comm that has probably already been received, trying to match it, to skip the communication");
437 //find a match in the already received fifo
438 other_synchro = SIMIX_fifo_get_comm(mbox->done_comm_fifo, SIMIX_COMM_SEND, match_fun, data, this_synchro);
439 //if not found, assume the receiver came first, register it to the mailbox in the classical way
440 if (!other_synchro) {
441 XBT_DEBUG("We have messages in the permanent receive list, but not the one we are looking for, pushing request into fifo");
442 other_synchro = this_synchro;
443 SIMIX_mbox_push(mbox, this_synchro);
445 if(other_synchro->comm.surf_comm && SIMIX_comm_get_remains(other_synchro)==0.0) {
446 XBT_DEBUG("comm %p has been already sent, and is finished, destroy it",&(other_synchro->comm));
447 other_synchro->state = SIMIX_DONE;
448 other_synchro->comm.type = SIMIX_COMM_DONE;
449 other_synchro->comm.mbox = NULL;
451 other_synchro->comm.refcount--;
452 SIMIX_comm_destroy(this_synchro);
453 --smx_total_comms; // this creation was a pure waste
456 /* Prepare a synchro describing us, so that it gets passed to the user-provided filter of other side */
458 /* Look for communication synchro matching our needs. We also provide a description of
459 * ourself so that the other side also gets a chance of choosing if it wants to match with us.
461 * If it is not found then push our communication into the rendez-vous point */
462 other_synchro = SIMIX_fifo_get_comm(mbox->comm_fifo, SIMIX_COMM_SEND, match_fun, data, this_synchro);
464 if (!other_synchro) {
465 XBT_DEBUG("Receive pushed first %d", xbt_fifo_size(mbox->comm_fifo));
466 other_synchro = this_synchro;
467 SIMIX_mbox_push(mbox, this_synchro);
469 SIMIX_comm_destroy(this_synchro);
470 --smx_total_comms; // this creation was a pure waste
471 other_synchro->state = SIMIX_READY;
472 other_synchro->comm.type = SIMIX_COMM_READY;
473 //other_synchro->comm.refcount--;
475 xbt_fifo_push(dst_proc->comms, other_synchro);
478 /* Setup communication synchro */
479 other_synchro->comm.dst_proc = dst_proc;
480 other_synchro->comm.dst_buff = dst_buff;
481 other_synchro->comm.dst_buff_size = dst_buff_size;
482 other_synchro->comm.dst_data = data;
484 if (rate != -1.0 && (other_synchro->comm.rate == -1.0 || rate < other_synchro->comm.rate))
485 other_synchro->comm.rate = rate;
487 other_synchro->comm.match_fun = match_fun;
488 other_synchro->comm.copy_data_fun = copy_data_fun;
490 if (MC_is_active() || MC_record_replay_is_active()) {
491 other_synchro->state = SIMIX_RUNNING;
492 return other_synchro;
495 SIMIX_comm_start(other_synchro);
496 return other_synchro;
499 smx_synchro_t simcall_HANDLER_comm_iprobe(smx_simcall_t simcall, smx_mailbox_t mbox,
500 int type, int src, int tag,
501 int (*match_fun)(void *, void *, smx_synchro_t),
503 return SIMIX_comm_iprobe(simcall->issuer, mbox, type, src, tag, match_fun, data);
506 smx_synchro_t SIMIX_comm_iprobe(smx_process_t dst_proc, smx_mailbox_t mbox, int type, int src,
507 int tag, int (*match_fun)(void *, void *, smx_synchro_t), void *data)
509 XBT_DEBUG("iprobe from %p %p", mbox, mbox->comm_fifo);
510 smx_synchro_t this_synchro;
513 this_synchro=SIMIX_comm_new(SIMIX_COMM_SEND);
514 smx_type = SIMIX_COMM_RECEIVE;
516 this_synchro=SIMIX_comm_new(SIMIX_COMM_RECEIVE);
517 smx_type = SIMIX_COMM_SEND;
519 smx_synchro_t other_synchro=NULL;
520 if(mbox->permanent_receiver && xbt_fifo_size(mbox->done_comm_fifo)!=0){
521 //find a match in the already received fifo
522 XBT_DEBUG("first try in the perm recv mailbox");
524 other_synchro = SIMIX_fifo_probe_comm(
525 mbox->done_comm_fifo, (e_smx_comm_type_t) smx_type,
526 match_fun, data, this_synchro);
530 XBT_DEBUG("try in the normal mailbox");
531 other_synchro = SIMIX_fifo_probe_comm(
532 mbox->comm_fifo, (e_smx_comm_type_t) smx_type,
533 match_fun, data, this_synchro);
536 if(other_synchro)other_synchro->comm.refcount--;
538 SIMIX_comm_destroy(this_synchro);
540 return other_synchro;
543 void simcall_HANDLER_comm_wait(smx_simcall_t simcall, smx_synchro_t synchro, double timeout)
545 /* the simcall may be a wait, a send or a recv */
548 /* Associate this simcall to the wait synchro */
549 XBT_DEBUG("simcall_HANDLER_comm_wait, %p", synchro);
551 xbt_fifo_push(synchro->simcalls, simcall);
552 simcall->issuer->waiting_synchro = synchro;
554 if (MC_is_active() || MC_record_replay_is_active()) {
555 int idx = SIMCALL_GET_MC_VALUE(simcall);
557 synchro->state = SIMIX_DONE;
559 /* If we reached this point, the wait simcall must have a timeout */
560 /* Otherwise it shouldn't be enabled and executed by the MC */
564 if (synchro->comm.src_proc == simcall->issuer)
565 synchro->state = SIMIX_SRC_TIMEOUT;
567 synchro->state = SIMIX_DST_TIMEOUT;
570 SIMIX_comm_finish(synchro);
574 /* If the synchro has already finish perform the error handling, */
575 /* otherwise set up a waiting timeout on the right side */
576 if (synchro->state != SIMIX_WAITING && synchro->state != SIMIX_RUNNING) {
577 SIMIX_comm_finish(synchro);
578 } else { /* if (timeout >= 0) { we need a surf sleep action even when there is no timeout, otherwise surf won't tell us when the host fails */
579 sleep = surf_host_sleep(simcall->issuer->host, timeout);
580 sleep->setData(synchro);
582 if (simcall->issuer == synchro->comm.src_proc)
583 synchro->comm.src_timeout = sleep;
585 synchro->comm.dst_timeout = sleep;
589 void simcall_HANDLER_comm_test(smx_simcall_t simcall, smx_synchro_t synchro)
591 if(MC_is_active() || MC_record_replay_is_active()){
592 simcall_comm_test__set__result(simcall, synchro->comm.src_proc && synchro->comm.dst_proc);
593 if(simcall_comm_test__get__result(simcall)){
594 synchro->state = SIMIX_DONE;
595 xbt_fifo_push(synchro->simcalls, simcall);
596 SIMIX_comm_finish(synchro);
598 SIMIX_simcall_answer(simcall);
603 simcall_comm_test__set__result(simcall, (synchro->state != SIMIX_WAITING && synchro->state != SIMIX_RUNNING));
604 if (simcall_comm_test__get__result(simcall)) {
605 xbt_fifo_push(synchro->simcalls, simcall);
606 SIMIX_comm_finish(synchro);
608 SIMIX_simcall_answer(simcall);
612 void simcall_HANDLER_comm_testany(smx_simcall_t simcall, xbt_dynar_t synchros)
615 smx_synchro_t synchro;
616 simcall_comm_testany__set__result(simcall, -1);
618 if (MC_is_active() || MC_record_replay_is_active()){
619 int idx = SIMCALL_GET_MC_VALUE(simcall);
621 SIMIX_simcall_answer(simcall);
623 synchro = xbt_dynar_get_as(synchros, idx, smx_synchro_t);
624 simcall_comm_testany__set__result(simcall, idx);
625 xbt_fifo_push(synchro->simcalls, simcall);
626 synchro->state = SIMIX_DONE;
627 SIMIX_comm_finish(synchro);
632 xbt_dynar_foreach(simcall_comm_testany__get__comms(simcall), cursor,synchro) {
633 if (synchro->state != SIMIX_WAITING && synchro->state != SIMIX_RUNNING) {
634 simcall_comm_testany__set__result(simcall, cursor);
635 xbt_fifo_push(synchro->simcalls, simcall);
636 SIMIX_comm_finish(synchro);
640 SIMIX_simcall_answer(simcall);
643 void simcall_HANDLER_comm_waitany(smx_simcall_t simcall, xbt_dynar_t synchros)
645 smx_synchro_t synchro;
646 unsigned int cursor = 0;
648 if (MC_is_active() || MC_record_replay_is_active()){
649 int idx = SIMCALL_GET_MC_VALUE(simcall);
650 synchro = xbt_dynar_get_as(synchros, idx, smx_synchro_t);
651 xbt_fifo_push(synchro->simcalls, simcall);
652 simcall_comm_waitany__set__result(simcall, idx);
653 synchro->state = SIMIX_DONE;
654 SIMIX_comm_finish(synchro);
658 xbt_dynar_foreach(synchros, cursor, synchro){
659 /* associate this simcall to the the synchro */
660 xbt_fifo_push(synchro->simcalls, simcall);
662 /* see if the synchro is already finished */
663 if (synchro->state != SIMIX_WAITING && synchro->state != SIMIX_RUNNING){
664 SIMIX_comm_finish(synchro);
670 void SIMIX_waitany_remove_simcall_from_actions(smx_simcall_t simcall)
672 smx_synchro_t synchro;
673 unsigned int cursor = 0;
674 xbt_dynar_t synchros = simcall_comm_waitany__get__comms(simcall);
676 xbt_dynar_foreach(synchros, cursor, synchro) {
677 xbt_fifo_remove(synchro->simcalls, simcall);
682 * \brief Starts the simulation of a communication synchro.
683 * \param synchro the communication synchro
685 static inline void SIMIX_comm_start(smx_synchro_t synchro)
687 /* If both the sender and the receiver are already there, start the communication */
688 if (synchro->state == SIMIX_READY) {
690 sg_host_t sender = synchro->comm.src_proc->host;
691 sg_host_t receiver = synchro->comm.dst_proc->host;
693 XBT_DEBUG("Starting communication %p from '%s' to '%s'", synchro,
694 sg_host_get_name(sender), sg_host_get_name(receiver));
696 synchro->comm.surf_comm = surf_network_model_communicate(surf_network_model,
698 synchro->comm.task_size, synchro->comm.rate);
700 synchro->comm.surf_comm->setData(synchro);
702 synchro->state = SIMIX_RUNNING;
704 /* If a link is failed, detect it immediately */
705 if (synchro->comm.surf_comm->getState() == simgrid::surf::Action::State::failed) {
706 XBT_DEBUG("Communication from '%s' to '%s' failed to start because of a link failure",
707 sg_host_get_name(sender), sg_host_get_name(receiver));
708 synchro->state = SIMIX_LINK_FAILURE;
709 SIMIX_comm_destroy_internal_actions(synchro);
712 /* If any of the process is suspend, create the synchro but stop its execution,
713 it will be restarted when the sender process resume */
714 if (SIMIX_process_is_suspended(synchro->comm.src_proc) ||
715 SIMIX_process_is_suspended(synchro->comm.dst_proc)) {
716 /* FIXME: check what should happen with the synchro state */
718 if (SIMIX_process_is_suspended(synchro->comm.src_proc))
719 XBT_DEBUG("The communication is suspended on startup because src (%s:%s) were suspended since it initiated the communication",
720 sg_host_get_name(synchro->comm.src_proc->host), synchro->comm.src_proc->name);
722 XBT_DEBUG("The communication is suspended on startup because dst (%s:%s) were suspended since it initiated the communication",
723 sg_host_get_name(synchro->comm.dst_proc->host), synchro->comm.dst_proc->name);
725 synchro->comm.surf_comm->suspend();
732 * \brief Answers the SIMIX simcalls associated to a communication synchro.
733 * \param synchro a finished communication synchro
735 void SIMIX_comm_finish(smx_synchro_t synchro)
737 unsigned int destroy_count = 0;
738 smx_simcall_t simcall;
740 while ((simcall = (smx_simcall_t) xbt_fifo_shift(synchro->simcalls))) {
742 /* If a waitany simcall is waiting for this synchro to finish, then remove
743 it from the other synchros in the waitany list. Afterwards, get the
744 position of the actual synchro in the waitany dynar and
745 return it as the result of the simcall */
747 if (simcall->call == SIMCALL_NONE) //FIXME: maybe a better way to handle this case
748 continue; // if process handling comm is killed
749 if (simcall->call == SIMCALL_COMM_WAITANY) {
750 SIMIX_waitany_remove_simcall_from_actions(simcall);
751 if (!MC_is_active() && !MC_record_replay_is_active())
752 simcall_comm_waitany__set__result(simcall, xbt_dynar_search(simcall_comm_waitany__get__comms(simcall), &synchro));
755 /* If the synchro is still in a rendez-vous point then remove from it */
756 if (synchro->comm.mbox)
757 SIMIX_mbox_remove(synchro->comm.mbox, synchro);
759 XBT_DEBUG("SIMIX_comm_finish: synchro state = %d", (int)synchro->state);
761 /* Check out for errors */
763 if (simcall->issuer->host->isOff()) {
764 simcall->issuer->context->iwannadie = 1;
765 SMX_EXCEPTION(simcall->issuer, host_error, 0, "Host failed");
768 switch (synchro->state) {
771 XBT_DEBUG("Communication %p complete!", synchro);
772 SIMIX_comm_copy_data(synchro);
775 case SIMIX_SRC_TIMEOUT:
776 SMX_EXCEPTION(simcall->issuer, timeout_error, 0,
777 "Communication timeouted because of sender");
780 case SIMIX_DST_TIMEOUT:
781 SMX_EXCEPTION(simcall->issuer, timeout_error, 0,
782 "Communication timeouted because of receiver");
785 case SIMIX_SRC_HOST_FAILURE:
786 if (simcall->issuer == synchro->comm.src_proc)
787 simcall->issuer->context->iwannadie = 1;
788 // SMX_EXCEPTION(simcall->issuer, host_error, 0, "Host failed");
790 SMX_EXCEPTION(simcall->issuer, network_error, 0, "Remote peer failed");
793 case SIMIX_DST_HOST_FAILURE:
794 if (simcall->issuer == synchro->comm.dst_proc)
795 simcall->issuer->context->iwannadie = 1;
796 // SMX_EXCEPTION(simcall->issuer, host_error, 0, "Host failed");
798 SMX_EXCEPTION(simcall->issuer, network_error, 0, "Remote peer failed");
801 case SIMIX_LINK_FAILURE:
803 XBT_DEBUG("Link failure in synchro %p between '%s' and '%s': posting an exception to the issuer: %s (%p) detached:%d",
805 synchro->comm.src_proc ? sg_host_get_name(synchro->comm.src_proc->host) : NULL,
806 synchro->comm.dst_proc ? sg_host_get_name(synchro->comm.dst_proc->host) : NULL,
807 simcall->issuer->name, simcall->issuer, synchro->comm.detached);
808 if (synchro->comm.src_proc == simcall->issuer) {
809 XBT_DEBUG("I'm source");
810 } else if (synchro->comm.dst_proc == simcall->issuer) {
811 XBT_DEBUG("I'm dest");
813 XBT_DEBUG("I'm neither source nor dest");
815 SMX_EXCEPTION(simcall->issuer, network_error, 0, "Link failure");
819 if (simcall->issuer == synchro->comm.dst_proc)
820 SMX_EXCEPTION(simcall->issuer, cancel_error, 0,
821 "Communication canceled by the sender");
823 SMX_EXCEPTION(simcall->issuer, cancel_error, 0,
824 "Communication canceled by the receiver");
828 xbt_die("Unexpected synchro state in SIMIX_comm_finish: %d", (int)synchro->state);
831 /* if there is an exception during a waitany or a testany, indicate the position of the failed communication */
832 if (simcall->issuer->doexception) {
833 if (simcall->call == SIMCALL_COMM_WAITANY) {
834 simcall->issuer->running_ctx->exception.value = xbt_dynar_search(simcall_comm_waitany__get__comms(simcall), &synchro);
836 else if (simcall->call == SIMCALL_COMM_TESTANY) {
837 simcall->issuer->running_ctx->exception.value = xbt_dynar_search(simcall_comm_testany__get__comms(simcall), &synchro);
841 if (simcall->issuer->host->isOff()) {
842 simcall->issuer->context->iwannadie = 1;
845 simcall->issuer->waiting_synchro = NULL;
846 xbt_fifo_remove(simcall->issuer->comms, synchro);
847 if(synchro->comm.detached){
848 if(simcall->issuer == synchro->comm.src_proc){
849 if(synchro->comm.dst_proc)
850 xbt_fifo_remove(synchro->comm.dst_proc->comms, synchro);
852 if(simcall->issuer == synchro->comm.dst_proc){
853 if(synchro->comm.src_proc)
854 xbt_fifo_remove(synchro->comm.src_proc->comms, synchro);
857 SIMIX_simcall_answer(simcall);
861 while (destroy_count-- > 0)
862 SIMIX_comm_destroy(synchro);
866 * \brief This function is called when a Surf communication synchro is finished.
867 * \param synchro the corresponding Simix communication
869 void SIMIX_post_comm(smx_synchro_t synchro)
871 /* Update synchro state */
872 if (synchro->comm.src_timeout &&
873 synchro->comm.src_timeout->getState() == simgrid::surf::Action::State::done)
874 synchro->state = SIMIX_SRC_TIMEOUT;
875 else if (synchro->comm.dst_timeout &&
876 synchro->comm.dst_timeout->getState() == simgrid::surf::Action::State::done)
877 synchro->state = SIMIX_DST_TIMEOUT;
878 else if (synchro->comm.src_timeout &&
879 synchro->comm.src_timeout->getState() == simgrid::surf::Action::State::failed)
880 synchro->state = SIMIX_SRC_HOST_FAILURE;
881 else if (synchro->comm.dst_timeout &&
882 synchro->comm.dst_timeout->getState() == simgrid::surf::Action::State::failed)
883 synchro->state = SIMIX_DST_HOST_FAILURE;
884 else if (synchro->comm.surf_comm &&
885 synchro->comm.surf_comm->getState() == simgrid::surf::Action::State::failed) {
886 XBT_DEBUG("Puta madre. Surf says that the link broke");
887 synchro->state = SIMIX_LINK_FAILURE;
889 synchro->state = SIMIX_DONE;
891 XBT_DEBUG("SIMIX_post_comm: comm %p, state %d, src_proc %p, dst_proc %p, detached: %d",
892 synchro, (int)synchro->state, synchro->comm.src_proc, synchro->comm.dst_proc, synchro->comm.detached);
894 /* destroy the surf actions associated with the Simix communication */
895 SIMIX_comm_destroy_internal_actions(synchro);
897 /* if there are simcalls associated with the synchro, then answer them */
898 if (xbt_fifo_size(synchro->simcalls)) {
899 SIMIX_comm_finish(synchro);
903 void SIMIX_comm_cancel(smx_synchro_t synchro)
905 /* if the synchro is a waiting state means that it is still in a mbox */
906 /* so remove from it and delete it */
907 if (synchro->state == SIMIX_WAITING) {
908 SIMIX_mbox_remove(synchro->comm.mbox, synchro);
909 synchro->state = SIMIX_CANCELED;
911 else if (!MC_is_active() /* when running the MC there are no surf actions */
912 && !MC_record_replay_is_active()
913 && (synchro->state == SIMIX_READY || synchro->state == SIMIX_RUNNING)) {
915 synchro->comm.surf_comm->cancel();
919 void SIMIX_comm_suspend(smx_synchro_t synchro)
921 /*FIXME: shall we suspend also the timeout synchro? */
922 if (synchro->comm.surf_comm)
923 synchro->comm.surf_comm->suspend();
924 /* in the other case, the action will be suspended on creation, in SIMIX_comm_start() */
927 void SIMIX_comm_resume(smx_synchro_t synchro)
929 /*FIXME: check what happen with the timeouts */
930 if (synchro->comm.surf_comm)
931 synchro->comm.surf_comm->resume();
932 /* in the other case, the synchro were not really suspended yet, see SIMIX_comm_suspend() and SIMIX_comm_start() */
936 /************* synchro Getters **************/
939 * \brief get the amount remaining from the communication
940 * \param synchro The communication
942 double SIMIX_comm_get_remains(smx_synchro_t synchro)
950 switch (synchro->state) {
953 remains = synchro->comm.surf_comm->getRemains();
958 remains = 0; /*FIXME: check what should be returned */
962 remains = 0; /*FIXME: is this correct? */
968 e_smx_state_t SIMIX_comm_get_state(smx_synchro_t synchro)
970 return synchro->state;
974 * \brief Return the user data associated to the sender of the communication
975 * \param synchro The communication
976 * \return the user data
978 void* SIMIX_comm_get_src_data(smx_synchro_t synchro)
980 return synchro->comm.src_data;
984 * \brief Return the user data associated to the receiver of the communication
985 * \param synchro The communication
986 * \return the user data
988 void* SIMIX_comm_get_dst_data(smx_synchro_t synchro)
990 return synchro->comm.dst_data;
993 smx_process_t SIMIX_comm_get_src_proc(smx_synchro_t synchro)
995 return synchro->comm.src_proc;
998 smx_process_t SIMIX_comm_get_dst_proc(smx_synchro_t synchro)
1000 return synchro->comm.dst_proc;
1003 /******************************************************************************/
1004 /* SIMIX_comm_copy_data callbacks */
1005 /******************************************************************************/
1006 static void (*SIMIX_comm_copy_data_callback) (smx_synchro_t, void*, size_t) =
1007 &SIMIX_comm_copy_pointer_callback;
1010 SIMIX_comm_set_copy_data_callback(void (*callback) (smx_synchro_t, void*, size_t))
1012 SIMIX_comm_copy_data_callback = callback;
1015 void SIMIX_comm_copy_pointer_callback(smx_synchro_t comm, void* buff, size_t buff_size)
1017 xbt_assert((buff_size == sizeof(void *)),
1018 "Cannot copy %zu bytes: must be sizeof(void*)", buff_size);
1019 *(void **) (comm->comm.dst_buff) = buff;
1022 void SIMIX_comm_copy_buffer_callback(smx_synchro_t comm, void* buff, size_t buff_size)
1024 XBT_DEBUG("Copy the data over");
1025 memcpy(comm->comm.dst_buff, buff, buff_size);
1026 if (comm->comm.detached) { // if this is a detached send, the source buffer was duplicated by SMPI sender to make the original buffer available to the application ASAP
1028 comm->comm.src_buff = NULL;
1034 * \brief Copy the communication data from the sender's buffer to the receiver's one
1035 * \param comm The communication
1037 void SIMIX_comm_copy_data(smx_synchro_t comm)
1039 size_t buff_size = comm->comm.src_buff_size;
1040 /* If there is no data to be copy then return */
1041 if (!comm->comm.src_buff || !comm->comm.dst_buff || comm->comm.copied)
1044 XBT_DEBUG("Copying comm %p data from %s (%p) -> %s (%p) (%zu bytes)",
1046 comm->comm.src_proc ? sg_host_get_name(comm->comm.src_proc->host) : "a finished process",
1047 comm->comm.src_buff,
1048 comm->comm.dst_proc ? sg_host_get_name(comm->comm.dst_proc->host) : "a finished process",
1049 comm->comm.dst_buff, buff_size);
1051 /* Copy at most dst_buff_size bytes of the message to receiver's buffer */
1052 if (comm->comm.dst_buff_size)
1053 buff_size = MIN(buff_size, *(comm->comm.dst_buff_size));
1055 /* Update the receiver's buffer size to the copied amount */
1056 if (comm->comm.dst_buff_size)
1057 *comm->comm.dst_buff_size = buff_size;
1060 if(comm->comm.copy_data_fun)
1061 comm->comm.copy_data_fun (comm, comm->comm.src_buff, buff_size);
1063 SIMIX_comm_copy_data_callback (comm, comm->comm.src_buff, buff_size);
1067 /* Set the copied flag so we copy data only once */
1068 /* (this function might be called from both communication ends) */
1069 comm->comm.copied = 1;