Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Remove comm from src_proc->comms and dst_proc->comms when finish it,
[simgrid.git] / src / simix / smx_network.c
1 /* Copyright (c) 2009, 2010. The SimGrid Team.
2  * All rights reserved.                                                     */
3
4 /* This program is free software; you can redistribute it and/or modify it
5  * under the terms of the license (GNU LGPL) which comes with this package. */
6
7 #include "smx_private.h"
8 #include "xbt/log.h"
9 #include "mc/mc.h"
10 #include "xbt/dict.h"
11
12 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(simix_network, simix,
13                                 "Logging specific to SIMIX (network)");
14
15 static xbt_dict_t rdv_points = NULL;
16 XBT_IMPORT_NO_EXPORT(unsigned long int) smx_total_comms = 0;
17
18 static void SIMIX_waitany_remove_simcall_from_actions(smx_simcall_t simcall);
19 static void SIMIX_comm_copy_data(smx_action_t comm);
20 static smx_action_t SIMIX_comm_new(e_smx_comm_type_t type);
21 static XBT_INLINE void SIMIX_rdv_push(smx_rdv_t rdv, smx_action_t comm);
22 static smx_action_t SIMIX_fifo_probe_comm(xbt_fifo_t fifo, e_smx_comm_type_t type,
23                                         int (*match_fun)(void *, void *,smx_action_t),
24                                         void *user_data, smx_action_t my_action);
25 static smx_action_t SIMIX_fifo_get_comm(xbt_fifo_t fifo, e_smx_comm_type_t type,
26                                         int (*match_fun)(void *, void *,smx_action_t),
27                                         void *user_data, smx_action_t my_action);
28 static void SIMIX_rdv_free(void *data);
29
30 void SIMIX_network_init(void)
31 {
32   rdv_points = xbt_dict_new_homogeneous(SIMIX_rdv_free);
33   if(MC_is_active())
34     MC_ignore_data_bss(&smx_total_comms, sizeof(smx_total_comms));
35 }
36
37 void SIMIX_network_exit(void)
38 {
39   xbt_dict_free(&rdv_points);
40 }
41
42 /******************************************************************************/
43 /*                           Rendez-Vous Points                               */
44 /******************************************************************************/
45
46 smx_rdv_t SIMIX_pre_rdv_create(smx_simcall_t simcall, const char *name){
47   return SIMIX_rdv_create(name);
48 }
49 smx_rdv_t SIMIX_rdv_create(const char *name)
50 {
51   /* two processes may have pushed the same rdv_create simcall at the same time */
52   smx_rdv_t rdv = name ? xbt_dict_get_or_null(rdv_points, name) : NULL;
53
54   if (!rdv) {
55     rdv = xbt_new0(s_smx_rvpoint_t, 1);
56     rdv->name = name ? xbt_strdup(name) : NULL;
57     rdv->comm_fifo = xbt_fifo_new();
58     rdv->done_comm_fifo = xbt_fifo_new();
59     rdv->permanent_receiver=NULL;
60
61     XBT_DEBUG("Creating a mailbox at %p with name %s\n", rdv, name);
62
63     if (rdv->name)
64       xbt_dict_set(rdv_points, rdv->name, rdv, NULL);
65   }
66   return rdv;
67 }
68
69 void SIMIX_pre_rdv_destroy(smx_simcall_t simcall, smx_rdv_t rdv){
70   return SIMIX_rdv_destroy(rdv);
71 }
72 void SIMIX_rdv_destroy(smx_rdv_t rdv)
73 {
74   if (rdv->name)
75     xbt_dict_remove(rdv_points, rdv->name);
76 }
77
78 void SIMIX_rdv_free(void *data)
79 {
80   XBT_DEBUG("rdv free %p", data);
81   smx_rdv_t rdv = (smx_rdv_t) data;
82   xbt_free(rdv->name);
83   xbt_fifo_free(rdv->comm_fifo);
84   xbt_fifo_free(rdv->done_comm_fifo);
85
86   xbt_free(rdv);  
87 }
88
89 xbt_dict_t SIMIX_get_rdv_points()
90 {
91   return rdv_points;
92 }
93
94 smx_rdv_t SIMIX_pre_rdv_get_by_name(smx_simcall_t simcall, const char *name){
95   return SIMIX_rdv_get_by_name(name);
96 }
97 smx_rdv_t SIMIX_rdv_get_by_name(const char *name)
98 {
99   return xbt_dict_get_or_null(rdv_points, name);
100 }
101
102 int SIMIX_pre_rdv_comm_count_by_host(smx_simcall_t simcall, smx_rdv_t rdv, smx_host_t host){
103   return SIMIX_rdv_comm_count_by_host(rdv, host);
104 }
105 int SIMIX_rdv_comm_count_by_host(smx_rdv_t rdv, smx_host_t host)
106 {
107   smx_action_t comm = NULL;
108   xbt_fifo_item_t item = NULL;
109   int count = 0;
110
111   xbt_fifo_foreach(rdv->comm_fifo, item, comm, smx_action_t) {
112     if (comm->comm.src_proc->smx_host == host)
113       count++;
114   }
115
116   return count;
117 }
118
119 smx_action_t SIMIX_pre_rdv_get_head(smx_simcall_t simcall, smx_rdv_t rdv){
120   return SIMIX_rdv_get_head(rdv);
121 }
122 smx_action_t SIMIX_rdv_get_head(smx_rdv_t rdv)
123 {
124   return xbt_fifo_get_item_content(xbt_fifo_get_first_item(rdv->comm_fifo));
125 }
126
127 smx_process_t SIMIX_pre_rdv_get_receiver(smx_simcall_t simcall, smx_rdv_t rdv){
128   return SIMIX_rdv_get_receiver(rdv);
129 }
130 /**
131  *  \brief get the receiver (process associated to the mailbox)
132  *  \param rdv The rendez-vous point
133  *  \return process The receiving process (NULL if not set)
134  */
135 smx_process_t SIMIX_rdv_get_receiver(smx_rdv_t rdv)
136 {
137   return rdv->permanent_receiver;
138 }
139
140 void SIMIX_pre_rdv_set_receiver(smx_simcall_t simcall, smx_rdv_t rdv,
141                             smx_process_t process){
142   SIMIX_rdv_set_receiver(rdv, process);
143 }
144 /**
145  *  \brief set the receiver of the rendez vous point to allow eager sends
146  *  \param rdv The rendez-vous point
147  *  \param process The receiving process
148  */
149 void SIMIX_rdv_set_receiver(smx_rdv_t rdv, smx_process_t process)
150 {
151   rdv->permanent_receiver=process;
152 }
153
154 /**
155  *  \brief Pushes a communication action into a rendez-vous point
156  *  \param rdv The rendez-vous point
157  *  \param comm The communication action
158  */
159 static XBT_INLINE void SIMIX_rdv_push(smx_rdv_t rdv, smx_action_t comm)
160 {
161   xbt_fifo_push(rdv->comm_fifo, comm);
162   comm->comm.rdv = rdv;
163 }
164
165 /**
166  *  \brief Removes a communication action from a rendez-vous point
167  *  \param rdv The rendez-vous point
168  *  \param comm The communication action
169  */
170 XBT_INLINE void SIMIX_rdv_remove(smx_rdv_t rdv, smx_action_t comm)
171 {
172   xbt_fifo_remove(rdv->comm_fifo, comm);
173   comm->comm.rdv = NULL;
174 }
175
176 /**
177  *  \brief Checks if there is a communication action queued in a fifo matching our needs
178  *  \param type The type of communication we are looking for (comm_send, comm_recv)
179  *  \return The communication action if found, NULL otherwise
180  */
181 smx_action_t SIMIX_fifo_get_comm(xbt_fifo_t fifo, e_smx_comm_type_t type,
182                                  int (*match_fun)(void *, void *,smx_action_t),
183                                  void *this_user_data, smx_action_t my_action)
184 {
185   smx_action_t action;
186   xbt_fifo_item_t item;
187   void* other_user_data = NULL;
188
189   xbt_fifo_foreach(fifo, item, action, smx_action_t) {
190     if (action->comm.type == SIMIX_COMM_SEND) {
191       other_user_data = action->comm.src_data;
192     } else if (action->comm.type == SIMIX_COMM_RECEIVE) {
193       other_user_data = action->comm.dst_data;
194     }
195     if (action->comm.type == type &&
196         (!match_fun              ||              match_fun(this_user_data,  other_user_data, action)) &&
197         (!action->comm.match_fun || action->comm.match_fun(other_user_data, this_user_data,  my_action))) {
198       XBT_DEBUG("Found a matching communication action %p", action);
199       xbt_fifo_remove_item(fifo, item);
200       xbt_fifo_free_item(item);
201       action->comm.refcount++;
202       action->comm.rdv = NULL;
203       return action;
204     }
205     XBT_DEBUG("Sorry, communication action %p does not match our needs:"
206               " its type is %d but we are looking for a comm of type %d (or maybe the filtering didn't match)",
207               action, (int)action->comm.type, (int)type);
208   }
209   XBT_DEBUG("No matching communication action found");
210   return NULL;
211 }
212
213
214 /**
215  *  \brief Checks if there is a communication action queued in a fifo matching our needs, but leave it there
216  *  \param type The type of communication we are looking for (comm_send, comm_recv)
217  *  \return The communication action if found, NULL otherwise
218  */
219 smx_action_t SIMIX_fifo_probe_comm(xbt_fifo_t fifo, e_smx_comm_type_t type,
220                                  int (*match_fun)(void *, void *,smx_action_t),
221                                  void *this_user_data, smx_action_t my_action)
222 {
223   smx_action_t action;
224   xbt_fifo_item_t item;
225   void* other_user_data = NULL;
226
227   xbt_fifo_foreach(fifo, item, action, smx_action_t) {
228     if (action->comm.type == SIMIX_COMM_SEND) {
229       other_user_data = action->comm.src_data;
230     } else if (action->comm.type == SIMIX_COMM_RECEIVE) {
231       other_user_data = action->comm.dst_data;
232     }
233     if (action->comm.type == type &&
234         (!match_fun              ||              match_fun(this_user_data,  other_user_data, action)) &&
235         (!action->comm.match_fun || action->comm.match_fun(other_user_data, this_user_data,  my_action))) {
236       XBT_DEBUG("Found a matching communication action %p", action);
237       action->comm.refcount++;
238
239       return action;
240     }
241     XBT_DEBUG("Sorry, communication action %p does not match our needs:"
242               " its type is %d but we are looking for a comm of type %d (or maybe the filtering didn't match)",
243               action, (int)action->comm.type, (int)type);
244   }
245   XBT_DEBUG("No matching communication action found");
246   return NULL;
247 }
248 /******************************************************************************/
249 /*                            Communication Actions                            */
250 /******************************************************************************/
251
252 /**
253  *  \brief Creates a new communicate action
254  *  \param type The direction of communication (comm_send, comm_recv)
255  *  \return The new communicate action
256  */
257 smx_action_t SIMIX_comm_new(e_smx_comm_type_t type)
258 {
259   smx_action_t act;
260
261   /* alloc structures */
262   act = xbt_mallocator_get(simix_global->action_mallocator);
263
264   act->type = SIMIX_ACTION_COMMUNICATE;
265   act->state = SIMIX_WAITING;
266
267   /* set communication */
268   act->comm.type = type;
269   act->comm.refcount = 1;
270   act->comm.src_data=NULL;
271   act->comm.dst_data=NULL;
272
273
274 #ifdef HAVE_LATENCY_BOUND_TRACKING
275   //initialize with unknown value
276   act->latency_limited = -1;
277 #endif
278
279 #ifdef HAVE_TRACING
280   act->category = NULL;
281 #endif
282
283   XBT_DEBUG("Create communicate action %p", act);
284   ++smx_total_comms;
285
286   return act;
287 }
288
289 void SIMIX_pre_comm_destroy(smx_simcall_t simcall, smx_action_t action){
290   SIMIX_comm_destroy(action);
291 }
292 /**
293  *  \brief Destroy a communicate action
294  *  \param action The communicate action to be destroyed
295  */
296 void SIMIX_comm_destroy(smx_action_t action)
297 {
298   XBT_DEBUG("Destroy action %p (refcount: %d), state: %d",
299             action, action->comm.refcount, (int)action->state);
300
301   if (action->comm.refcount <= 0) {
302     xbt_backtrace_display_current();
303     xbt_die("The refcount of comm %p is already 0 before decreasing it. "
304             "That's a bug! If you didn't test and/or wait the same communication twice in your code, then the bug is SimGrid's...", action);
305   }
306   action->comm.refcount--;
307   if (action->comm.refcount > 0)
308       return;
309   XBT_DEBUG("Really free communication %p; refcount is now %d", action,
310             action->comm.refcount);
311
312 #ifdef HAVE_LATENCY_BOUND_TRACKING
313   action->latency_limited = SIMIX_comm_is_latency_bounded( action ) ;
314 #endif
315
316   xbt_free(action->name);
317   SIMIX_comm_destroy_internal_actions(action);
318
319   if (action->comm.detached && action->state != SIMIX_DONE) {
320     /* the communication has failed and was detached:
321      * we have to free the buffer */
322     if (action->comm.clean_fun) {
323       action->comm.clean_fun(action->comm.src_buff);
324     }
325     action->comm.src_buff = NULL;
326   }
327
328   if(action->comm.rdv)
329     SIMIX_rdv_remove(action->comm.rdv, action);
330
331   xbt_mallocator_release(simix_global->action_mallocator, action);
332 }
333
334 void SIMIX_comm_destroy_internal_actions(smx_action_t action)
335 {
336   if (action->comm.surf_comm){
337 #ifdef HAVE_LATENCY_BOUND_TRACKING
338     action->latency_limited = SIMIX_comm_is_latency_bounded(action);
339 #endif
340     action->comm.surf_comm->model_type->action_unref(action->comm.surf_comm);
341     action->comm.surf_comm = NULL;
342   }
343
344   if (action->comm.src_timeout){
345     action->comm.src_timeout->model_type->action_unref(action->comm.src_timeout);
346     action->comm.src_timeout = NULL;
347   }
348
349   if (action->comm.dst_timeout){
350     action->comm.dst_timeout->model_type->action_unref(action->comm.dst_timeout);
351     action->comm.dst_timeout = NULL;
352   }
353 }
354
355 void SIMIX_pre_comm_send(smx_simcall_t simcall, smx_rdv_t rdv,
356                                   double task_size, double rate,
357                                   void *src_buff, size_t src_buff_size,
358                                   int (*match_fun)(void *, void *,smx_action_t),
359                                   void *data, double timeout){
360   smx_action_t comm = SIMIX_comm_isend(simcall->issuer, rdv, task_size, rate,
361                                        src_buff, src_buff_size, match_fun, NULL,
362                                        data, 0);
363   simcall->mc_value = 0;
364   SIMIX_pre_comm_wait(simcall, comm, timeout);
365 }
366 smx_action_t SIMIX_pre_comm_isend(smx_simcall_t simcall, smx_rdv_t rdv,
367                                   double task_size, double rate,
368                                   void *src_buff, size_t src_buff_size,
369                                   int (*match_fun)(void *, void *,smx_action_t),
370                                   void (*clean_fun)(void *), 
371                                   void *data, int detached){
372   return SIMIX_comm_isend(simcall->issuer, rdv, task_size, rate, src_buff,
373                           src_buff_size, match_fun, clean_fun, data, detached);
374
375 }
376 smx_action_t SIMIX_comm_isend(smx_process_t src_proc, smx_rdv_t rdv,
377                               double task_size, double rate,
378                               void *src_buff, size_t src_buff_size,
379                               int (*match_fun)(void *, void *,smx_action_t),
380                               void (*clean_fun)(void *), // used to free the action in case of problem after a detached send
381                               void *data,
382                               int detached)
383 {
384   XBT_DEBUG("send from %p\n", rdv);
385
386   /* Prepare an action describing us, so that it gets passed to the user-provided filter of other side */
387   smx_action_t this_action = SIMIX_comm_new(SIMIX_COMM_SEND);
388
389   /* Look for communication action matching our needs. We also provide a description of
390    * ourself so that the other side also gets a chance of choosing if it wants to match with us.
391    *
392    * If it is not found then push our communication into the rendez-vous point */
393   smx_action_t other_action = SIMIX_fifo_get_comm(rdv->comm_fifo, SIMIX_COMM_RECEIVE, match_fun, data, this_action);
394
395   if (!other_action) {
396     other_action = this_action;
397
398     if (rdv->permanent_receiver!=NULL){
399       //this mailbox is for small messages, which have to be sent right now
400       other_action->state = SIMIX_READY;
401       other_action->comm.dst_proc=rdv->permanent_receiver;
402       other_action->comm.refcount++;
403       other_action->comm.rdv = rdv;
404       xbt_fifo_push(rdv->done_comm_fifo,other_action);
405       other_action->comm.rdv=rdv;
406       XBT_DEBUG("pushing a message into the permanent receive fifo %p, comm %p \n", rdv, &(other_action->comm));
407
408     }else{
409       SIMIX_rdv_push(rdv, this_action);
410     }
411   } else {
412     XBT_DEBUG("Receive already pushed\n");
413
414     SIMIX_comm_destroy(this_action);
415     --smx_total_comms; // this creation was a pure waste
416
417     other_action->state = SIMIX_READY;
418     other_action->comm.type = SIMIX_COMM_READY;
419
420   }
421   xbt_fifo_push(src_proc->comms, other_action);
422
423   /* if the communication action is detached then decrease the refcount
424    * by one, so it will be eliminated by the receiver's destroy call */
425   if (detached) {
426     other_action->comm.detached = 1;
427     other_action->comm.refcount--;
428     other_action->comm.clean_fun = clean_fun;
429   } else {
430     other_action->comm.clean_fun = NULL;
431   }
432
433   /* Setup the communication action */
434   other_action->comm.src_proc = src_proc;
435   other_action->comm.task_size = task_size;
436   other_action->comm.rate = rate;
437   other_action->comm.src_buff = src_buff;
438   other_action->comm.src_buff_size = src_buff_size;
439   other_action->comm.src_data = data;
440
441   other_action->comm.match_fun = match_fun;
442
443   if (MC_is_active()) {
444     other_action->state = SIMIX_RUNNING;
445     return other_action;
446   }
447
448   SIMIX_comm_start(other_action);
449   return (detached ? NULL : other_action);
450 }
451
452 void SIMIX_pre_comm_recv(smx_simcall_t simcall, smx_rdv_t rdv,
453                                   void *dst_buff, size_t *dst_buff_size,
454                                   int (*match_fun)(void *, void *, smx_action_t),
455                                   void *data, double timeout){
456   smx_action_t comm = SIMIX_comm_irecv(simcall->issuer, rdv, dst_buff,
457                                        dst_buff_size, match_fun, data);
458   simcall->mc_value = 0;
459   SIMIX_pre_comm_wait(simcall, comm, timeout);
460 }
461 smx_action_t SIMIX_pre_comm_irecv(smx_simcall_t simcall, smx_rdv_t rdv,
462                                   void *dst_buff, size_t *dst_buff_size,
463                                   int (*match_fun)(void *, void *, smx_action_t),
464                                   void *data){
465   return SIMIX_comm_irecv(simcall->issuer, rdv, dst_buff, dst_buff_size,
466                           match_fun, data);
467 }
468 smx_action_t SIMIX_comm_irecv(smx_process_t dst_proc, smx_rdv_t rdv,
469                               void *dst_buff, size_t *dst_buff_size,
470                               int (*match_fun)(void *, void *, smx_action_t), void *data)
471 {
472   XBT_DEBUG("recv from %p %p\n", rdv, rdv->comm_fifo);
473   smx_action_t this_action = SIMIX_comm_new(SIMIX_COMM_RECEIVE);
474
475   smx_action_t other_action;
476   //communication already done, get it inside the fifo of completed comms
477   //permanent receive v1
478   //int already_received=0;
479   if(rdv->permanent_receiver && xbt_fifo_size(rdv->done_comm_fifo)!=0){
480
481     XBT_DEBUG("We have a comm that has probably already been received, trying to match it, to skip the communication\n");
482     //find a match in the already received fifo
483     other_action = SIMIX_fifo_get_comm(rdv->done_comm_fifo, SIMIX_COMM_SEND, match_fun, data, this_action);
484     //if not found, assume the receiver came first, register it to the mailbox in the classical way
485     if (!other_action)  {
486       XBT_DEBUG("We have messages in the permanent receive list, but not the one we are looking for, pushing request into fifo\n");
487       other_action = this_action;
488       SIMIX_rdv_push(rdv, this_action);
489     }else{
490       if(other_action->comm.surf_comm &&        SIMIX_comm_get_remains(other_action)==0.0)
491       {
492         XBT_DEBUG("comm %p has been already sent, and is finished, destroy it\n",&(other_action->comm));
493         other_action->state = SIMIX_DONE;
494         other_action->comm.type = SIMIX_COMM_DONE;
495         other_action->comm.rdv = NULL;
496         //SIMIX_comm_destroy(this_action);
497         //--smx_total_comms; // this creation was a pure waste
498         //already_received=1;
499         //other_action->comm.refcount--;
500       }/*else{
501          XBT_DEBUG("Not yet finished, we have to wait %d\n", xbt_fifo_size(rdv->comm_fifo));
502          }*/
503       other_action->comm.refcount--;
504       SIMIX_comm_destroy(this_action);
505       --smx_total_comms; // this creation was a pure waste
506     }
507   }else{
508     /* Prepare an action describing us, so that it gets passed to the user-provided filter of other side */
509
510     /* Look for communication action matching our needs. We also provide a description of
511      * ourself so that the other side also gets a chance of choosing if it wants to match with us.
512      *
513      * If it is not found then push our communication into the rendez-vous point */
514     other_action = SIMIX_fifo_get_comm(rdv->comm_fifo, SIMIX_COMM_SEND, match_fun, data, this_action);
515
516     if (!other_action) {
517       XBT_DEBUG("Receive pushed first %d\n", xbt_fifo_size(rdv->comm_fifo));
518       other_action = this_action;
519       SIMIX_rdv_push(rdv, this_action);
520     } else {
521       SIMIX_comm_destroy(this_action);
522       --smx_total_comms; // this creation was a pure waste
523       other_action->state = SIMIX_READY;
524       other_action->comm.type = SIMIX_COMM_READY;
525       //other_action->comm.refcount--;
526     }
527     xbt_fifo_push(dst_proc->comms, other_action);
528   }
529
530   /* Setup communication action */
531   other_action->comm.dst_proc = dst_proc;
532   other_action->comm.dst_buff = dst_buff;
533   other_action->comm.dst_buff_size = dst_buff_size;
534   other_action->comm.dst_data = data;
535
536   other_action->comm.match_fun = match_fun;
537
538
539   /*if(already_received)//do the actual copy, because the first one after the comm didn't have all the info
540     SIMIX_comm_copy_data(other_action);*/
541
542
543   if (MC_is_active()) {
544     other_action->state = SIMIX_RUNNING;
545     return other_action;
546   }
547
548   SIMIX_comm_start(other_action);
549   // }
550   return other_action;
551 }
552
553 smx_action_t SIMIX_pre_comm_iprobe(smx_simcall_t simcall, smx_rdv_t rdv,
554                                    int src, int tag,
555                                    int (*match_fun)(void *, void *, smx_action_t),
556                                    void *data){
557   return SIMIX_comm_iprobe(simcall->issuer, rdv, src, tag, match_fun, data);
558 }
559
560 smx_action_t SIMIX_comm_iprobe(smx_process_t dst_proc, smx_rdv_t rdv, int src,
561                               int tag, int (*match_fun)(void *, void *, smx_action_t), void *data)
562 {
563   XBT_DEBUG("iprobe from %p %p\n", rdv, rdv->comm_fifo);
564   smx_action_t this_action = SIMIX_comm_new(SIMIX_COMM_RECEIVE);
565
566   smx_action_t other_action=NULL;
567   if(rdv->permanent_receiver && xbt_fifo_size(rdv->done_comm_fifo)!=0){
568     //find a match in the already received fifo
569       XBT_DEBUG("first try in the perm recv mailbox \n");
570
571     other_action = SIMIX_fifo_probe_comm(rdv->done_comm_fifo, SIMIX_COMM_SEND, match_fun, data, this_action);
572   }
573  // }else{
574     if(!other_action){
575         XBT_DEBUG("second try in the other mailbox");
576         other_action = SIMIX_fifo_probe_comm(rdv->comm_fifo, SIMIX_COMM_SEND, match_fun, data, this_action);
577     }
578 //  }
579   if(other_action)other_action->comm.refcount--;
580
581   SIMIX_comm_destroy(this_action);
582   --smx_total_comms;
583   return other_action;
584 }
585
586 void SIMIX_pre_comm_wait(smx_simcall_t simcall, smx_action_t action, double timeout)
587 {
588   int idx = simcall->mc_value;
589   /* the simcall may be a wait, a send or a recv */
590   surf_action_t sleep;
591
592   /* Associate this simcall to the wait action */
593   XBT_DEBUG("SIMIX_pre_comm_wait, %p", action);
594
595   xbt_fifo_push(action->simcalls, simcall);
596   simcall->issuer->waiting_action = action;
597
598   if (MC_is_active()) {
599     if (idx == 0) {
600       action->state = SIMIX_DONE;
601     } else {
602       /* If we reached this point, the wait simcall must have a timeout */
603       /* Otherwise it shouldn't be enabled and executed by the MC */
604       if (timeout == -1)
605         THROW_IMPOSSIBLE;
606
607       if (action->comm.src_proc == simcall->issuer)
608         action->state = SIMIX_SRC_TIMEOUT;
609       else
610         action->state = SIMIX_DST_TIMEOUT;
611     }
612
613     SIMIX_comm_finish(action);
614     return;
615   }
616
617   /* If the action has already finish perform the error handling, */
618   /* otherwise set up a waiting timeout on the right side         */
619   if (action->state != SIMIX_WAITING && action->state != SIMIX_RUNNING) {
620     SIMIX_comm_finish(action);
621   } else { /* if (timeout >= 0) { we need a surf sleep action even when there is no timeout, otherwise surf won't tell us when the host fails */
622     sleep = surf_workstation_model->extension.workstation.sleep(simcall->issuer->smx_host->host, timeout);
623     surf_workstation_model->action_data_set(sleep, action);
624
625     if (simcall->issuer == action->comm.src_proc)
626       action->comm.src_timeout = sleep;
627     else
628       action->comm.dst_timeout = sleep;
629   }
630 }
631
632 void SIMIX_pre_comm_test(smx_simcall_t simcall, smx_action_t action)
633 {
634   if(MC_is_active()){
635     simcall_comm_test__set__result(simcall, action->comm.src_proc && action->comm.dst_proc);
636     if(simcall_comm_test__get__result(simcall)){
637       action->state = SIMIX_DONE;
638       xbt_fifo_push(action->simcalls, simcall);
639       SIMIX_comm_finish(action);
640     }else{
641       SIMIX_simcall_answer(simcall);
642     }
643     return;
644   }
645
646   simcall_comm_test__set__result(simcall, (action->state != SIMIX_WAITING && action->state != SIMIX_RUNNING));
647   if (simcall_comm_test__get__result(simcall)) {
648     xbt_fifo_push(action->simcalls, simcall);
649     SIMIX_comm_finish(action);
650   } else {
651     SIMIX_simcall_answer(simcall);
652   }
653 }
654
655 void SIMIX_pre_comm_testany(smx_simcall_t simcall, xbt_dynar_t actions)
656 {
657   int idx = simcall->mc_value;
658   unsigned int cursor;
659   smx_action_t action;
660   simcall_comm_testany__set__result(simcall, -1);
661
662   if (MC_is_active()){
663     if(idx == -1){
664       SIMIX_simcall_answer(simcall);
665     }else{
666       action = xbt_dynar_get_as(actions, idx, smx_action_t);
667       simcall_comm_testany__set__result(simcall, idx);
668       xbt_fifo_push(action->simcalls, simcall);
669       action->state = SIMIX_DONE;
670       SIMIX_comm_finish(action);
671     }
672     return;
673   }
674
675   xbt_dynar_foreach(simcall_comm_testany__get__comms(simcall), cursor,action) {
676     if (action->state != SIMIX_WAITING && action->state != SIMIX_RUNNING) {
677       simcall_comm_testany__set__result(simcall, cursor);
678       xbt_fifo_push(action->simcalls, simcall);
679       SIMIX_comm_finish(action);
680       return;
681     }
682   }
683   SIMIX_simcall_answer(simcall);
684 }
685
686 void SIMIX_pre_comm_waitany(smx_simcall_t simcall, xbt_dynar_t actions)
687 {
688   int idx = simcall->mc_value;
689   smx_action_t action;
690   unsigned int cursor = 0;
691
692   if (MC_is_active()){
693     action = xbt_dynar_get_as(actions, idx, smx_action_t);
694     xbt_fifo_push(action->simcalls, simcall);
695     simcall_comm_waitany__set__result(simcall, idx);
696     action->state = SIMIX_DONE;
697     SIMIX_comm_finish(action);
698     return;
699   }
700
701   xbt_dynar_foreach(actions, cursor, action){
702     /* associate this simcall to the the action */
703     xbt_fifo_push(action->simcalls, simcall);
704
705     /* see if the action is already finished */
706     if (action->state != SIMIX_WAITING && action->state != SIMIX_RUNNING){
707       SIMIX_comm_finish(action);
708       break;
709     }
710   }
711 }
712
713 void SIMIX_waitany_remove_simcall_from_actions(smx_simcall_t simcall)
714 {
715   smx_action_t action;
716   unsigned int cursor = 0;
717   xbt_dynar_t actions = simcall_comm_waitany__get__comms(simcall);
718
719   xbt_dynar_foreach(actions, cursor, action) {
720     xbt_fifo_remove(action->simcalls, simcall);
721   }
722 }
723
724 /**
725  *  \brief Starts the simulation of a communication action.
726  *  \param action the communication action
727  */
728 XBT_INLINE void SIMIX_comm_start(smx_action_t action)
729 {
730   /* If both the sender and the receiver are already there, start the communication */
731   if (action->state == SIMIX_READY) {
732
733     smx_host_t sender = action->comm.src_proc->smx_host;
734     smx_host_t receiver = action->comm.dst_proc->smx_host;
735
736     XBT_DEBUG("Starting communication %p from '%s' to '%s'", action,
737               SIMIX_host_get_name(sender), SIMIX_host_get_name(receiver));
738
739     action->comm.surf_comm = surf_workstation_model->extension.workstation.
740       communicate(sender->host, receiver->host, action->comm.task_size, action->comm.rate);
741
742     surf_workstation_model->action_data_set(action->comm.surf_comm, action);
743
744     action->state = SIMIX_RUNNING;
745
746     /* If a link is failed, detect it immediately */
747     if (surf_workstation_model->action_state_get(action->comm.surf_comm) == SURF_ACTION_FAILED) {
748       XBT_DEBUG("Communication from '%s' to '%s' failed to start because of a link failure",
749                 SIMIX_host_get_name(sender), SIMIX_host_get_name(receiver));
750       action->state = SIMIX_LINK_FAILURE;
751       SIMIX_comm_destroy_internal_actions(action);
752     }
753
754     /* If any of the process is suspend, create the action but stop its execution,
755        it will be restarted when the sender process resume */
756     if (SIMIX_process_is_suspended(action->comm.src_proc) ||
757         SIMIX_process_is_suspended(action->comm.dst_proc)) {
758       /* FIXME: check what should happen with the action state */
759
760       if (SIMIX_process_is_suspended(action->comm.src_proc))
761         XBT_DEBUG("The communication is suspended on startup because src (%s:%s) were suspended since it initiated the communication",
762                   SIMIX_host_get_name(action->comm.src_proc->smx_host), action->comm.src_proc->name);
763       else
764         XBT_DEBUG("The communication is suspended on startup because dst (%s:%s) were suspended since it initiated the communication",
765                   SIMIX_host_get_name(action->comm.dst_proc->smx_host), action->comm.dst_proc->name);
766
767       surf_workstation_model->suspend(action->comm.surf_comm);
768
769     }
770   }
771 }
772
773 /**
774  * \brief Answers the SIMIX simcalls associated to a communication action.
775  * \param action a finished communication action
776  */
777 void SIMIX_comm_finish(smx_action_t action)
778 {
779   unsigned int destroy_count = 0;
780   smx_simcall_t simcall;
781
782   while ((simcall = xbt_fifo_shift(action->simcalls))) {
783
784     /* If a waitany simcall is waiting for this action to finish, then remove
785        it from the other actions in the waitany list. Afterwards, get the
786        position of the actual action in the waitany dynar and
787        return it as the result of the simcall */
788     if (simcall->call == SIMCALL_COMM_WAITANY) {
789       SIMIX_waitany_remove_simcall_from_actions(simcall);
790       if (!MC_is_active())
791         simcall_comm_waitany__set__result(simcall, xbt_dynar_search(simcall_comm_waitany__get__comms(simcall), &action));
792     }
793
794     /* If the action is still in a rendez-vous point then remove from it */
795     if (action->comm.rdv)
796       SIMIX_rdv_remove(action->comm.rdv, action);
797
798     XBT_DEBUG("SIMIX_comm_finish: action state = %d", (int)action->state);
799
800     /* Check out for errors */
801     switch (action->state) {
802
803     case SIMIX_DONE:
804       XBT_DEBUG("Communication %p complete!", action);
805       SIMIX_comm_copy_data(action);
806       break;
807
808     case SIMIX_SRC_TIMEOUT:
809       SMX_EXCEPTION(simcall->issuer, timeout_error, 0,
810                     "Communication timeouted because of sender");
811       break;
812
813     case SIMIX_DST_TIMEOUT:
814       SMX_EXCEPTION(simcall->issuer, timeout_error, 0,
815                     "Communication timeouted because of receiver");
816       break;
817
818     case SIMIX_SRC_HOST_FAILURE:
819       if (simcall->issuer == action->comm.src_proc)
820         simcall->issuer->context->iwannadie = 1;
821 //          SMX_EXCEPTION(simcall->issuer, host_error, 0, "Host failed");
822       else
823         SMX_EXCEPTION(simcall->issuer, network_error, 0, "Remote peer failed");
824       break;
825
826     case SIMIX_DST_HOST_FAILURE:
827       if (simcall->issuer == action->comm.dst_proc)
828         simcall->issuer->context->iwannadie = 1;
829 //          SMX_EXCEPTION(simcall->issuer, host_error, 0, "Host failed");
830       else
831         SMX_EXCEPTION(simcall->issuer, network_error, 0, "Remote peer failed");
832       break;
833
834     case SIMIX_LINK_FAILURE:
835       XBT_DEBUG("Link failure in action %p between '%s' and '%s': posting an exception to the issuer: %s (%p) detached:%d",
836                 action,
837                 action->comm.src_proc ? action->comm.src_proc->smx_host->name : NULL,
838                 action->comm.dst_proc ? action->comm.dst_proc->smx_host->name : NULL,
839                 simcall->issuer->name, simcall->issuer, action->comm.detached);
840       if (action->comm.src_proc == simcall->issuer) {
841         XBT_DEBUG("I'm source");
842       } else if (action->comm.dst_proc == simcall->issuer) {
843         XBT_DEBUG("I'm dest");
844       } else {
845         XBT_DEBUG("I'm neither source nor dest");
846       }
847       SMX_EXCEPTION(simcall->issuer, network_error, 0, "Link failure");
848       break;
849
850     case SIMIX_CANCELED:
851       if (simcall->issuer == action->comm.dst_proc)
852         SMX_EXCEPTION(simcall->issuer, cancel_error, 0,
853                       "Communication canceled by the sender");
854       else
855         SMX_EXCEPTION(simcall->issuer, cancel_error, 0,
856                       "Communication canceled by the receiver");
857       break;
858
859     default:
860       xbt_die("Unexpected action state in SIMIX_comm_finish: %d", (int)action->state);
861     }
862
863     /* if there is an exception during a waitany or a testany, indicate the position of the failed communication */
864     if (simcall->issuer->doexception) {
865       if (simcall->call == SIMCALL_COMM_WAITANY) {
866         simcall->issuer->running_ctx->exception.value = xbt_dynar_search(simcall_comm_waitany__get__comms(simcall), &action);
867       }
868       else if (simcall->call == SIMCALL_COMM_TESTANY) {
869         simcall->issuer->running_ctx->exception.value = xbt_dynar_search(simcall_comm_testany__get__comms(simcall), &action);
870       }
871     }
872
873     if (surf_workstation_model->extension.
874         workstation.get_state(simcall->issuer->smx_host->host) != SURF_RESOURCE_ON) {
875       simcall->issuer->context->iwannadie = 1;
876     }
877
878     simcall->issuer->waiting_action = NULL;
879     xbt_fifo_remove(simcall->issuer->comms, action);
880     if(action->comm.detached){
881       if(simcall->issuer == action->comm.src_proc){
882         if(action->comm.dst_proc)
883           xbt_fifo_remove(action->comm.dst_proc->comms, action);
884       }
885       if(simcall->issuer == action->comm.dst_proc){
886         if(action->comm.src_proc)
887           xbt_fifo_remove(action->comm.src_proc->comms, action);
888       }
889     }
890     SIMIX_simcall_answer(simcall);
891     destroy_count++;
892   }
893
894   while (destroy_count-- > 0)
895     SIMIX_comm_destroy(action);
896 }
897
898 /**
899  * \brief This function is called when a Surf communication action is finished.
900  * \param action the corresponding Simix communication
901  */
902 void SIMIX_post_comm(smx_action_t action)
903 {
904   /* Update action state */
905   if (action->comm.src_timeout &&
906       surf_workstation_model->action_state_get(action->comm.src_timeout) == SURF_ACTION_DONE)
907     action->state = SIMIX_SRC_TIMEOUT;
908   else if (action->comm.dst_timeout &&
909            surf_workstation_model->action_state_get(action->comm.dst_timeout) == SURF_ACTION_DONE)
910     action->state = SIMIX_DST_TIMEOUT;
911   else if (action->comm.src_timeout &&
912            surf_workstation_model->action_state_get(action->comm.src_timeout) == SURF_ACTION_FAILED)
913     action->state = SIMIX_SRC_HOST_FAILURE;
914   else if (action->comm.dst_timeout &&
915            surf_workstation_model->action_state_get(action->comm.dst_timeout) == SURF_ACTION_FAILED)
916     action->state = SIMIX_DST_HOST_FAILURE;
917   else if (action->comm.surf_comm &&
918            surf_workstation_model->action_state_get(action->comm.surf_comm) == SURF_ACTION_FAILED) {
919     XBT_DEBUG("Puta madre. Surf says that the link broke");
920     action->state = SIMIX_LINK_FAILURE;
921   } else
922     action->state = SIMIX_DONE;
923
924   XBT_DEBUG("SIMIX_post_comm: comm %p, state %d, src_proc %p, dst_proc %p, detached: %d",
925             action, (int)action->state, action->comm.src_proc, action->comm.dst_proc, action->comm.detached);
926
927   /* destroy the surf actions associated with the Simix communication */
928   SIMIX_comm_destroy_internal_actions(action);
929
930   /* remove the communication action from the list of pending communications
931    * of both processes (if they still exist) */
932   if (action->comm.src_proc) {
933     xbt_fifo_remove(action->comm.src_proc->comms, action);
934   }
935   if (action->comm.dst_proc) {
936     xbt_fifo_remove(action->comm.dst_proc->comms, action);
937   }
938
939   /* if there are simcalls associated with the action, then answer them */
940   if (xbt_fifo_size(action->simcalls)) {
941     SIMIX_comm_finish(action);
942   }
943 }
944
945 void SIMIX_pre_comm_cancel(smx_simcall_t simcall, smx_action_t action){
946   SIMIX_comm_cancel(action);
947 }
948 void SIMIX_comm_cancel(smx_action_t action)
949 {
950   /* if the action is a waiting state means that it is still in a rdv */
951   /* so remove from it and delete it */
952   if (action->state == SIMIX_WAITING) {
953     SIMIX_rdv_remove(action->comm.rdv, action);
954     action->state = SIMIX_CANCELED;
955   }
956   else if (!MC_is_active() /* when running the MC there are no surf actions */
957            && (action->state == SIMIX_READY || action->state == SIMIX_RUNNING)) {
958
959     surf_workstation_model->action_cancel(action->comm.surf_comm);
960   }
961 }
962
963 void SIMIX_comm_suspend(smx_action_t action)
964 {
965   /*FIXME: shall we suspend also the timeout actions? */
966   if (action->comm.surf_comm)
967     surf_workstation_model->suspend(action->comm.surf_comm);
968   /* in the other case, the action will be suspended on creation, in SIMIX_comm_start() */
969 }
970
971 void SIMIX_comm_resume(smx_action_t action)
972 {
973   /*FIXME: check what happen with the timeouts */
974   if (action->comm.surf_comm)
975     surf_workstation_model->resume(action->comm.surf_comm);
976   /* in the other case, the action were not really suspended yet, see SIMIX_comm_suspend() and SIMIX_comm_start() */
977 }
978
979
980 /************* Action Getters **************/
981
982 double SIMIX_pre_comm_get_remains(smx_simcall_t simcall, smx_action_t action){
983   return SIMIX_comm_get_remains(action);
984 }
985 /**
986  *  \brief get the amount remaining from the communication
987  *  \param action The communication
988  */
989 double SIMIX_comm_get_remains(smx_action_t action)
990 {
991   double remains;
992
993   if(!action){
994     return 0;
995   }
996
997   switch (action->state) {
998
999   case SIMIX_RUNNING:
1000     remains = surf_workstation_model->get_remains(action->comm.surf_comm);
1001     break;
1002
1003   case SIMIX_WAITING:
1004   case SIMIX_READY:
1005     remains = 0; /*FIXME: check what should be returned */
1006     break;
1007
1008   default:
1009     remains = 0; /*FIXME: is this correct? */
1010     break;
1011   }
1012   return remains;
1013 }
1014
1015 e_smx_state_t SIMIX_pre_comm_get_state(smx_simcall_t simcall, smx_action_t action){
1016   return SIMIX_comm_get_state(action);
1017 }
1018 e_smx_state_t SIMIX_comm_get_state(smx_action_t action)
1019 {
1020   return action->state;
1021 }
1022
1023 void* SIMIX_pre_comm_get_src_data(smx_simcall_t simcall, smx_action_t action){
1024   return SIMIX_comm_get_src_data(action);
1025 }
1026 /**
1027  *  \brief Return the user data associated to the sender of the communication
1028  *  \param action The communication
1029  *  \return the user data
1030  */
1031 void* SIMIX_comm_get_src_data(smx_action_t action)
1032 {
1033   return action->comm.src_data;
1034 }
1035
1036 void* SIMIX_pre_comm_get_dst_data(smx_simcall_t simcall, smx_action_t action){
1037   return SIMIX_comm_get_dst_data(action);
1038 }
1039 /**
1040  *  \brief Return the user data associated to the receiver of the communication
1041  *  \param action The communication
1042  *  \return the user data
1043  */
1044 void* SIMIX_comm_get_dst_data(smx_action_t action)
1045 {
1046   return action->comm.dst_data;
1047 }
1048
1049 smx_process_t SIMIX_pre_comm_get_src_proc(smx_simcall_t simcall, smx_action_t action){
1050   return SIMIX_comm_get_src_proc(action);
1051 }
1052 smx_process_t SIMIX_comm_get_src_proc(smx_action_t action)
1053 {
1054   return action->comm.src_proc;
1055 }
1056
1057 smx_process_t SIMIX_pre_comm_get_dst_proc(smx_simcall_t simcall, smx_action_t action){
1058   return SIMIX_comm_get_dst_proc(action);
1059 }
1060 smx_process_t SIMIX_comm_get_dst_proc(smx_action_t action)
1061 {
1062   return action->comm.dst_proc;
1063 }
1064
1065 #ifdef HAVE_LATENCY_BOUND_TRACKING
1066 /**
1067  *  \brief verify if communication is latency bounded
1068  *  \param comm The communication
1069  */
1070 XBT_INLINE int SIMIX_comm_is_latency_bounded(smx_action_t action)
1071 {
1072   if(!action){
1073     return 0;
1074   }
1075   if (action->comm.surf_comm){
1076     XBT_DEBUG("Getting latency limited for surf_action (%p)", action->comm.surf_comm);
1077     action->latency_limited = surf_workstation_model->get_latency_limited(action->comm.surf_comm);
1078     XBT_DEBUG("Action limited is %d", action->latency_limited);
1079   }
1080   return action->latency_limited;
1081 }
1082 #endif
1083
1084 /******************************************************************************/
1085 /*                    SIMIX_comm_copy_data callbacks                       */
1086 /******************************************************************************/
1087 static void (*SIMIX_comm_copy_data_callback) (smx_action_t, void*, size_t) =
1088   &SIMIX_comm_copy_pointer_callback;
1089
1090 void
1091 SIMIX_comm_set_copy_data_callback(void (*callback) (smx_action_t, void*, size_t))
1092 {
1093   SIMIX_comm_copy_data_callback = callback;
1094 }
1095
1096 void SIMIX_comm_copy_pointer_callback(smx_action_t comm, void* buff, size_t buff_size)
1097 {
1098   xbt_assert((buff_size == sizeof(void *)),
1099              "Cannot copy %zu bytes: must be sizeof(void*)", buff_size);
1100   *(void **) (comm->comm.dst_buff) = buff;
1101 }
1102
1103 void SIMIX_comm_copy_buffer_callback(smx_action_t comm, void* buff, size_t buff_size)
1104 {
1105   XBT_DEBUG("Copy the data over");
1106   memcpy(comm->comm.dst_buff, buff, buff_size);
1107   if (comm->comm.detached) { // if this is a detached send, the source buffer was duplicated by SMPI sender to make the original buffer available to the application ASAP
1108     xbt_free(buff);
1109     comm->comm.src_buff = NULL;
1110   }
1111 }
1112
1113
1114 /**
1115  *  \brief Copy the communication data from the sender's buffer to the receiver's one
1116  *  \param comm The communication
1117  */
1118 void SIMIX_comm_copy_data(smx_action_t comm)
1119 {
1120   size_t buff_size = comm->comm.src_buff_size;
1121   /* If there is no data to be copy then return */
1122   if (!comm->comm.src_buff || !comm->comm.dst_buff || comm->comm.copied)
1123     return;
1124
1125   XBT_DEBUG("Copying comm %p data from %s (%p) -> %s (%p) (%zu bytes)",
1126             comm,
1127             comm->comm.src_proc ? comm->comm.src_proc->smx_host->name : "a finished process",
1128             comm->comm.src_buff,
1129             comm->comm.dst_proc ? comm->comm.dst_proc->smx_host->name : "a finished process",
1130             comm->comm.dst_buff, buff_size);
1131
1132   /* Copy at most dst_buff_size bytes of the message to receiver's buffer */
1133   if (comm->comm.dst_buff_size)
1134     buff_size = MIN(buff_size, *(comm->comm.dst_buff_size));
1135
1136   /* Update the receiver's buffer size to the copied amount */
1137   if (comm->comm.dst_buff_size)
1138     *comm->comm.dst_buff_size = buff_size;
1139
1140   if (buff_size > 0)
1141     SIMIX_comm_copy_data_callback (comm, comm->comm.src_buff, buff_size);
1142
1143   /* Set the copied flag so we copy data only once */
1144   /* (this function might be called from both communication ends) */
1145   comm->comm.copied = 1;
1146 }