Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Fix issue when a host failed while executing a task (the glass is only half full...
[simgrid.git] / src / msg / msg_mailbox.c
index dda4eb5..997b138 100644 (file)
@@ -6,48 +6,49 @@
 /* This program is free software; you can redistribute it and/or modify it
  * under the terms of the license (GNU LGPL) which comes with this package. */
 
-#include "mailbox.h"
-#include "msg/private.h"
+#include "msg_mailbox.h"
+#include "msg_private.h"
+
 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(msg_mailbox, msg,
                                 "Logging specific to MSG (mailbox)");
 
 msg_mailbox_t MSG_mailbox_new(const char *alias)
 {
-  return SIMIX_req_rdv_create(alias);
+  return simcall_rdv_create(alias);
 }
 
 void MSG_mailbox_free(void *mailbox)
 {
-  SIMIX_req_rdv_destroy((msg_mailbox_t)mailbox);
+  simcall_rdv_destroy((msg_mailbox_t)mailbox);
 }
 
 int MSG_mailbox_is_empty(msg_mailbox_t mailbox)
 {
-  return (NULL == SIMIX_req_rdv_get_head(mailbox));
+  return (NULL == simcall_rdv_get_head(mailbox));
 }
 
 m_task_t MSG_mailbox_get_head(msg_mailbox_t mailbox)
 {
-  smx_action_t comm = SIMIX_req_rdv_get_head(mailbox);
+  smx_action_t comm = simcall_rdv_get_head(mailbox);
 
   if (!comm)
     return NULL;
 
-  return (m_task_t) SIMIX_req_comm_get_data(comm);
+  return (m_task_t) simcall_comm_get_src_data(comm);
 }
 
 int
 MSG_mailbox_get_count_host_waiting_tasks(msg_mailbox_t mailbox,
                                          m_host_t host)
 {
-  return SIMIX_req_rdv_comm_count_by_host(mailbox,
-                                      host->simdata->smx_host);
+  return simcall_rdv_comm_count_by_host(mailbox,
+                                      host->smx_host);
 }
 
 msg_mailbox_t MSG_mailbox_get_by_alias(const char *alias)
 {
 
-  msg_mailbox_t mailbox = SIMIX_req_rdv_get_by_name(alias);
+  msg_mailbox_t mailbox = simcall_rdv_get_by_name(alias);
 
   if (!mailbox)
     mailbox = MSG_mailbox_new(alias);
@@ -55,51 +56,33 @@ msg_mailbox_t MSG_mailbox_get_by_alias(const char *alias)
   return mailbox;
 }
 
-msg_mailbox_t MSG_mailbox_get_by_channel(m_host_t host,
-                                         m_channel_t channel)
-{
-  xbt_assert0((host != NULL), "Invalid host");
-  xbt_assert1((channel >= 0)
-              && (channel < msg_global->max_channel), "Invalid channel %d",
-              channel);
-
-  return host->simdata->mailboxes[(size_t) channel];
-}
-
 MSG_error_t
 MSG_mailbox_get_task_ext(msg_mailbox_t mailbox, m_task_t * task,
                          m_host_t host, double timeout)
 {
   xbt_ex_t e;
   MSG_error_t ret = MSG_OK;
-  smx_action_t comm = NULL;
-#ifdef HAVE_TRACING
-  double start_time = 0;
-#endif
   /* We no longer support getting a task from a specific host */
   if (host)
     THROW_UNIMPLEMENTED;
 
-  CHECK_HOST();
 #ifdef HAVE_TRACING
   TRACE_msg_task_get_start();
-  start_time = MSG_get_clock();
+  volatile double start_time = MSG_get_clock();
 #endif
 
   /* Sanity check */
-  xbt_assert0(task, "Null pointer for the task storage");
+  xbt_assert(task, "Null pointer for the task storage");
 
   if (*task)
-    CRITICAL0
-        ("MSG_task_get() was asked to write in a non empty task struct.");
+    XBT_WARN
+        ("Asked to write the received task in a non empty struct -- proceeding.");
 
   /* Try to receive it by calling SIMIX network layer */
   TRY {
-    comm = SIMIX_req_comm_irecv(mailbox, task, NULL, NULL, NULL);
-    SIMIX_req_comm_wait(comm, timeout);
-    SIMIX_req_comm_destroy(comm);
-    DEBUG2("Got task %s from %p",(*task)->name,mailbox);
-    (*task)->simdata->refcount--;
+    simcall_comm_recv(mailbox, task, NULL, NULL, NULL, timeout);
+    XBT_DEBUG("Got task %s from %p",(*task)->name,mailbox);
+    (*task)->simdata->isused=0;
   }
   CATCH(e) {
     switch (e.category) {
@@ -113,18 +96,18 @@ MSG_mailbox_get_task_ext(msg_mailbox_t mailbox, m_task_t * task,
       ret = MSG_TIMEOUT;
       break;
     default:
-       xbt_backtrace_display(&e);
-      xbt_die(bprintf("Unhandled SIMIX network exception: %s", e.msg));
+      RETHROW;
     }
     xbt_ex_free(e);
   }
 
-  if (ret != MSG_HOST_FAILURE &&
-      ret != MSG_TRANSFER_FAILURE && ret != MSG_TIMEOUT) {
 #ifdef HAVE_TRACING
+  if (ret != MSG_HOST_FAILURE &&
+      ret != MSG_TRANSFER_FAILURE &&
+      ret != MSG_TIMEOUT) {
     TRACE_msg_task_get_end(start_time, *task);
-#endif
   }
+#endif
   MSG_RETURN(ret);
 }
 
@@ -136,45 +119,43 @@ MSG_mailbox_put_with_timeout(msg_mailbox_t mailbox, m_task_t task,
   MSG_error_t ret = MSG_OK;
   simdata_task_t t_simdata = NULL;
   m_process_t process = MSG_process_self();
-#ifdef HAVE_TRACING
-  int call_end = 0;
-#endif
-  CHECK_HOST();
+  simdata_process_t p_simdata = SIMIX_process_self_get_data(process);
 
 #ifdef HAVE_TRACING
-  call_end = TRACE_msg_task_put_start(task);    //must be after CHECK_HOST()
+  int call_end = TRACE_msg_task_put_start(task);    //must be after CHECK_HOST()
 #endif
 
-
   /* Prepare the task to send */
   t_simdata = task->simdata;
   t_simdata->sender = process;
-  t_simdata->source = MSG_host_self();
+  t_simdata->source = ((simdata_process_t) SIMIX_process_self_get_data(process))->m_host;
 
-  xbt_assert0(t_simdata->refcount == 1,
+  xbt_assert(t_simdata->isused == 0,
               "This task is still being used somewhere else. You cannot send it now. Go fix your code!");
 
-  t_simdata->refcount++;
+  t_simdata->isused=1;
+  t_simdata->comm = NULL;
   msg_global->sent_msg++;
 
-  process->simdata->waiting_task = task;
+
+  p_simdata->waiting_task = task;
 
   /* Try to send it by calling SIMIX network layer */
   TRY {
-    t_simdata->comm = SIMIX_req_comm_isend(mailbox, t_simdata->message_size,
-                       t_simdata->rate, task, sizeof(void *), NULL, task);
+      smx_action_t comm = simcall_comm_isend(mailbox, t_simdata->message_size,
+                                  t_simdata->rate, task, sizeof(void *),
+                                  NULL, NULL, task, 0);
 #ifdef HAVE_TRACING
-    SIMIX_req_set_category(t_simdata->comm, task->category);
+    if (TRACE_is_enabled()) {
+      simcall_set_category(comm, task->category);
+    }
 #endif
-    SIMIX_req_comm_wait(t_simdata->comm, timeout);
-    SIMIX_req_comm_destroy(t_simdata->comm);
+     t_simdata->comm = comm;
+     simcall_comm_wait(comm, timeout);
   }
 
   CATCH(e) {
     switch (e.category) {
-    case host_error:
-      ret = MSG_HOST_FAILURE;
-      break;
     case network_error:
       ret = MSG_TRANSFER_FAILURE;
       break;
@@ -182,18 +163,33 @@ MSG_mailbox_put_with_timeout(msg_mailbox_t mailbox, m_task_t task,
       ret = MSG_TIMEOUT;
       break;
     default:
-      xbt_die(bprintf("Unhandled SIMIX network exception: %s", e.msg));
+      RETHROW;
     }
     xbt_ex_free(e);
 
-    /* Decrement the refcount only on failure */
-    t_simdata->refcount--;
+    /* If the send failed, it is not used anymore */
+    t_simdata->isused = 0;
   }
 
-  process->simdata->waiting_task = NULL;
+
+  p_simdata->waiting_task = NULL;
 #ifdef HAVE_TRACING
   if (call_end)
     TRACE_msg_task_put_end();
 #endif
   MSG_RETURN(ret);
 }
+
+#ifdef MSG_USE_DEPRECATED
+msg_mailbox_t MSG_mailbox_get_by_channel(m_host_t host,
+                                         m_channel_t channel)
+{
+  XBT_WARN("DEPRECATED! Now use MSG_mailbox_get_by_alias");
+  xbt_assert((host != NULL), "Invalid host");
+  xbt_assert((channel >= 0)
+              && (channel < msg_global->max_channel), "Invalid channel %d",
+              channel);
+
+  return host->mailboxes[(size_t) channel];
+}
+#endif