Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
1f1954d1cf1b11d5148175bf12a32d4e3d1bba5a
[simgrid.git] / src / msg / msg_mailbox.c
1 #include "mailbox.h"
2 #include "msg/private.h"
3
4 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(msg_mailbox, msg,
5                                 "Logging specific to MSG (mailbox)");
6
7 static xbt_dict_t msg_mailboxes = NULL;
8
9 void MSG_mailbox_mod_init(void)
10 {
11   msg_mailboxes = xbt_dict_new();
12 }
13
14 void MSG_mailbox_mod_exit(void)
15 {
16   xbt_dict_free(&msg_mailboxes);
17 }
18
19 msg_mailbox_t MSG_mailbox_create(const char *alias)
20 {
21   msg_mailbox_t mailbox = xbt_new0(s_msg_mailbox_t, 1);
22
23   mailbox->tasks = xbt_fifo_new();
24   mailbox->cond = NULL;
25   mailbox->alias = alias ? xbt_strdup(alias) : NULL;
26   mailbox->hostname = NULL;
27
28   return mailbox;
29 }
30
31 msg_mailbox_t MSG_mailbox_new(const char *alias)
32 {
33   msg_mailbox_t mailbox = MSG_mailbox_create(alias);
34
35   /* add the mbox in the dictionary */
36   xbt_dict_set(msg_mailboxes, alias, mailbox, MSG_mailbox_free);
37
38   return mailbox;
39 }
40
41 void MSG_mailbox_free(void *mailbox)
42 {
43   msg_mailbox_t _mailbox = (msg_mailbox_t) mailbox;
44
45   if (_mailbox->hostname)
46     free(_mailbox->hostname);
47
48   xbt_fifo_free(_mailbox->tasks);
49   free(_mailbox->alias);
50
51   free(_mailbox);
52 }
53
54 smx_cond_t MSG_mailbox_get_cond(msg_mailbox_t mailbox)
55 {
56   return mailbox->cond;
57 }
58
59 void MSG_mailbox_remove(msg_mailbox_t mailbox, m_task_t task)
60 {
61   xbt_fifo_remove(mailbox->tasks, task);
62 }
63
64 int MSG_mailbox_is_empty(msg_mailbox_t mailbox)
65 {
66   return (NULL == xbt_fifo_get_first_item(mailbox->tasks));
67 }
68
69 m_task_t MSG_mailbox_pop_head(msg_mailbox_t mailbox)
70 {
71   return (m_task_t) xbt_fifo_shift(mailbox->tasks);
72 }
73
74 m_task_t MSG_mailbox_get_head(msg_mailbox_t mailbox)
75 {
76   xbt_fifo_item_t item;
77
78   if (!(item = xbt_fifo_get_first_item(mailbox->tasks)))
79     return NULL;
80
81   return (m_task_t) xbt_fifo_get_item_content(item);
82 }
83
84
85 m_task_t MSG_mailbox_get_first_host_task(msg_mailbox_t mailbox, m_host_t host)
86 {
87   m_task_t task = NULL;
88   xbt_fifo_item_t item = NULL;
89
90   xbt_fifo_foreach(mailbox->tasks, item, task, m_task_t)
91     if (task->simdata->source == host) {
92     xbt_fifo_remove_item(mailbox->tasks, item);
93     return task;
94   }
95
96   return NULL;
97 }
98
99 int
100 MSG_mailbox_get_count_host_waiting_tasks(msg_mailbox_t mailbox, m_host_t host)
101 {
102   m_task_t task = NULL;
103   xbt_fifo_item_t item = NULL;
104   int count = 0;
105
106   xbt_fifo_foreach(mailbox->tasks, item, task, m_task_t) {
107     if (task->simdata->source == host)
108       count++;
109   }
110
111   return count;
112 }
113
114 void MSG_mailbox_set_cond(msg_mailbox_t mailbox, smx_cond_t cond)
115 {
116   mailbox->cond = cond;
117 }
118
119 const char *MSG_mailbox_get_alias(msg_mailbox_t mailbox)
120 {
121   return mailbox->alias;
122 }
123
124 const char *MSG_mailbox_get_hostname(msg_mailbox_t mailbox)
125 {
126   return mailbox->hostname;
127 }
128
129 void MSG_mailbox_set_hostname(msg_mailbox_t mailbox, const char *hostname)
130 {
131   mailbox->hostname = xbt_strdup(hostname);
132 }
133
134 msg_mailbox_t MSG_mailbox_get_by_alias(const char *alias)
135 {
136
137   msg_mailbox_t mailbox = xbt_dict_get_or_null(msg_mailboxes, alias);
138
139   if (!mailbox) {
140     mailbox = MSG_mailbox_new(alias);
141     MSG_mailbox_set_hostname(mailbox, MSG_host_self()->name);
142   }
143
144   return mailbox;
145 }
146
147 msg_mailbox_t MSG_mailbox_get_by_channel(m_host_t host, m_channel_t channel)
148 {
149   xbt_assert0((host != NULL), "Invalid host");
150   xbt_assert1((channel >= 0)
151               && (channel < msg_global->max_channel), "Invalid channel %d",
152               channel);
153
154   return host->simdata->mailboxes[(size_t) channel];
155 }
156
157 MSG_error_t
158 MSG_mailbox_get_task_ext(msg_mailbox_t mailbox, m_task_t * task,
159                          m_host_t host, double timeout)
160 {
161   m_process_t process = MSG_process_self();
162   m_task_t t = NULL;
163   m_host_t h = NULL;
164   simdata_task_t t_simdata = NULL;
165   simdata_host_t h_simdata = NULL;
166   double start_time = SIMIX_get_clock();
167
168   smx_cond_t cond = NULL;       //conditional wait if the task isn't on the channel yet
169
170   CHECK_HOST();
171
172   /* Sanity check */
173   xbt_assert0(task, "Null pointer for the task storage");
174
175   if (*task)
176     CRITICAL0
177       ("MSG_task_get() was asked to write in a non empty task struct.");
178
179   /* Get the task */
180   h = MSG_host_self();
181   h_simdata = h->simdata;
182
183   SIMIX_mutex_lock(h->simdata->mutex);
184
185   if (MSG_mailbox_get_cond(mailbox)) {
186     CRITICAL1("A process is already blocked on the channel %s",
187               MSG_mailbox_get_alias(mailbox));
188     SIMIX_cond_display_info(MSG_mailbox_get_cond(mailbox));
189     xbt_die("Go fix your code!");
190   }
191
192   while (1) {
193     /* if the mailbox is empty (has no task */
194     if (!MSG_mailbox_is_empty(mailbox)) {
195       if (!host) {
196         /* pop the head of the mailbox */
197         t = MSG_mailbox_pop_head(mailbox);
198         break;
199       } else {
200         /* get the first task of the host */
201         if ((t = MSG_mailbox_get_first_host_task(mailbox, host)))
202           break;
203       }
204     }
205
206     if ((timeout > 0) && (SIMIX_get_clock() - start_time >= timeout)) {
207       SIMIX_mutex_unlock(h->simdata->mutex);
208       MSG_mailbox_set_cond(mailbox, NULL);
209       SIMIX_cond_destroy(cond);
210       MSG_RETURN(MSG_TRANSFER_FAILURE);
211     }
212
213     if (!cond) {
214       cond = SIMIX_cond_init();
215       MSG_mailbox_set_cond(mailbox, cond);
216     }
217
218     if (timeout > 0)
219       SIMIX_cond_wait_timeout(cond, h->simdata->mutex, timeout - start_time);
220     else
221       SIMIX_cond_wait(MSG_mailbox_get_cond(mailbox), h->simdata->mutex);
222
223     if (SIMIX_host_get_state(h_simdata->smx_host) == 0) {
224       SIMIX_mutex_unlock(h->simdata->mutex);
225       MSG_mailbox_set_cond(mailbox, NULL);
226       SIMIX_cond_destroy(cond);
227       MSG_RETURN(MSG_HOST_FAILURE);
228     }
229   }
230
231
232   DEBUG1("OK, got a task (%s)", t->name);
233   /* clean conditional */
234   if (cond) {
235     MSG_mailbox_set_cond(mailbox, NULL);
236     SIMIX_cond_destroy(cond);
237   }
238
239   SIMIX_mutex_unlock(h->simdata->mutex);
240
241   t_simdata = t->simdata;
242   t_simdata->receiver = process;
243   *task = t;
244
245   SIMIX_mutex_lock(t_simdata->mutex);
246
247   /* Transfer */
248   /* create SIMIX action to the communication */
249   t_simdata->comm =
250     SIMIX_action_communicate(t_simdata->sender->simdata->m_host->simdata->
251                              smx_host,
252                              process->simdata->m_host->simdata->smx_host,
253                              t->name, t_simdata->message_size,
254                              t_simdata->rate);
255
256   /* This is a hack. We know that both the receiver and the sender will
257      need to look at the content of t_simdata->comm. And it needs to be
258      destroyed. However, we don't known whether the receiver or the sender
259      will get to it first. So by setting with refcount to 2 we can enforce
260      that things happen correctly. An alternative would be to only do ++ and
261      -- on this refcount and to sprinkle them judiciously throughout the code,
262      which appears perhaps worse? Or perhaps the refcount field of
263      task->simdata can be used for this? At any rate, this will do for now */
264   t_simdata->comm->refcount = 2;
265
266   /* if the process is suspend, create the action but stop its execution, it will be restart when the sender process resume */
267   if (MSG_process_is_suspended(t_simdata->sender)) {
268     DEBUG1("Process sender (%s) suspended", t_simdata->sender->name);
269     SIMIX_action_set_priority(t_simdata->comm, 0);
270   }
271
272   process->simdata->waiting_task = t;
273   SIMIX_register_action_to_condition(t_simdata->comm, t_simdata->cond);
274
275   while (1) {
276     SIMIX_cond_wait(t_simdata->cond, t_simdata->mutex);
277
278     if (SIMIX_action_get_state(t_simdata->comm) != SURF_ACTION_RUNNING)
279       break;
280   }
281
282   SIMIX_unregister_action_to_condition(t_simdata->comm, t_simdata->cond);
283   process->simdata->waiting_task = NULL;
284
285   /* If sender still around (it didn't free the comm yet), note that it's not waiting anymore */
286   if (t_simdata->comm->refcount == 2) {
287     t->simdata->sender->simdata->waiting_task = NULL;
288   }
289
290   /* for this process, don't need to change in get function */
291   SIMIX_mutex_unlock(t_simdata->mutex);
292
293
294   if (SIMIX_action_get_state(t_simdata->comm) == SURF_ACTION_DONE) {
295     if (t_simdata->comm->refcount == 1) {
296       SIMIX_action_destroy(t_simdata->comm);
297       t_simdata->comm = NULL;
298     } else {
299       t_simdata->comm->refcount--;
300     }
301     t_simdata->refcount--;
302     MSG_RETURN(MSG_OK);
303   } else if (SIMIX_host_get_state(h_simdata->smx_host) == 0) {
304     if (t_simdata->comm->refcount == 1) {
305       SIMIX_action_destroy(t_simdata->comm);
306       t_simdata->comm = NULL;
307     } else {
308       t_simdata->comm->refcount--;
309     }
310     t_simdata->refcount--;
311     MSG_RETURN(MSG_HOST_FAILURE);
312   } else {
313     if (t_simdata->comm->refcount == 1) {
314       SIMIX_action_destroy(t_simdata->comm);
315       t_simdata->comm = NULL;
316     } else {
317       t_simdata->comm->refcount--;
318     }
319     t_simdata->refcount--;
320     MSG_RETURN(MSG_TRANSFER_FAILURE);
321   }
322 }
323
324 MSG_error_t
325 MSG_mailbox_put_with_timeout(msg_mailbox_t mailbox, m_task_t task,
326                              double timeout)
327 {
328   m_process_t process = MSG_process_self();
329   const char *hostname;
330   simdata_task_t t_simdata = NULL;
331   m_host_t local_host = NULL;
332   m_host_t remote_host = NULL;
333   smx_cond_t cond = NULL;
334
335   CHECK_HOST();
336
337   t_simdata = task->simdata;
338   t_simdata->sender = process;
339   t_simdata->source = MSG_process_get_host(process);
340
341   xbt_assert0(t_simdata->refcount == 1,
342               "This task is still being used somewhere else. You cannot send it now. Go fix your code!");
343
344   t_simdata->comm = NULL;
345
346   t_simdata->refcount++;
347   local_host = ((simdata_process_t) process->simdata)->m_host;
348   msg_global->sent_msg++;
349
350   /* get the host name containing the mailbox */
351   hostname = MSG_mailbox_get_hostname(mailbox);
352
353   remote_host = MSG_get_host_by_name(hostname);
354
355   if (!remote_host)
356     THROW1(not_found_error, 0, "Host %s not fount", hostname);
357
358
359   DEBUG4("Trying to send a task (%g kB) from %s to %s on the channel %s",
360          t_simdata->message_size / 1000, local_host->name,
361          remote_host->name, MSG_mailbox_get_alias(mailbox));
362
363   SIMIX_mutex_lock(remote_host->simdata->mutex);
364
365   /* put the task in the mailbox */
366   xbt_fifo_push(mailbox->tasks, task);
367
368   if ((cond = MSG_mailbox_get_cond(mailbox))) {
369     DEBUG0("Somebody is listening. Let's wake him up!");
370     SIMIX_cond_signal(cond);
371   }
372
373   SIMIX_mutex_unlock(remote_host->simdata->mutex);
374
375   SIMIX_mutex_lock(t_simdata->mutex);
376
377   process->simdata->waiting_task = task;
378
379   if (timeout > 0) {
380     xbt_ex_t e;
381     double time;
382     double time_elapsed;
383     time = SIMIX_get_clock();
384
385     TRY {
386       /*verify if the action that ends is the correct. Call the wait_timeout with the new time. If the timeout occurs, an exception is raised */
387       while (1) {
388         time_elapsed = SIMIX_get_clock() - time;
389         SIMIX_cond_wait_timeout(t_simdata->cond, t_simdata->mutex,
390                                 timeout - time_elapsed);
391
392         if ((t_simdata->comm != NULL)
393             && (SIMIX_action_get_state(t_simdata->comm) !=
394                 SURF_ACTION_RUNNING))
395           break;
396       }
397     }
398     CATCH(e) {
399       if (e.category == timeout_error) {
400         xbt_ex_free(e);
401         /* verify if the timeout happened and the communication didn't started yet */
402         if (t_simdata->comm == NULL) {
403           process->simdata->waiting_task = NULL;
404
405           /* remove the task from the mailbox */
406           MSG_mailbox_remove(mailbox, task);
407
408           if (t_simdata->receiver && t_simdata->receiver->simdata) {    /* receiver still around */
409             t_simdata->receiver->simdata->waiting_task = NULL;
410           }
411
412           SIMIX_mutex_unlock(t_simdata->mutex);
413           MSG_RETURN(MSG_TRANSFER_FAILURE);
414         }
415       } else {
416         RETHROW;
417       }
418     }
419   } else {
420     while (1) {
421       SIMIX_cond_wait(t_simdata->cond, t_simdata->mutex);
422
423       if (SIMIX_action_get_state(t_simdata->comm) != SURF_ACTION_RUNNING)
424         break;
425     }
426   }
427
428   DEBUG1("Action terminated %s", task->name);
429   process->simdata->waiting_task = NULL;
430
431   if (t_simdata->comm->refcount == 2) { //receiver didn't free it yet: he's still around
432     t_simdata->receiver->simdata->waiting_task = NULL;
433   }
434
435   SIMIX_mutex_unlock(task->simdata->mutex);
436
437   if (SIMIX_action_get_state(t_simdata->comm) == SURF_ACTION_DONE) {
438     if (t_simdata->comm->refcount == 1) {
439       SIMIX_action_destroy(t_simdata->comm);
440       t_simdata->comm = NULL;
441     } else {
442       t_simdata->comm->refcount--;
443     }
444     MSG_RETURN(MSG_OK);
445   } else if (SIMIX_host_get_state(local_host->simdata->smx_host) == 0) {
446     if (t_simdata->comm->refcount == 1) {
447       SIMIX_action_destroy(t_simdata->comm);
448       t_simdata->comm = NULL;
449     } else {
450       t_simdata->comm->refcount--;
451     }
452     MSG_RETURN(MSG_HOST_FAILURE);
453   } else {
454     if (t_simdata->comm->refcount == 1) {
455       SIMIX_action_destroy(t_simdata->comm);
456       t_simdata->comm = NULL;
457     } else {
458       t_simdata->comm->refcount--;
459     }
460     MSG_RETURN(MSG_TRANSFER_FAILURE);
461   }
462 }