Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Change variable waiting_task to waiting_action on msg process control.
[simgrid.git] / src / msg / msg_mailbox.c
index 1f1954d..c285b60 100644 (file)
@@ -180,17 +180,18 @@ MSG_mailbox_get_task_ext(msg_mailbox_t mailbox, m_task_t * task,
   h = MSG_host_self();
   h_simdata = h->simdata;
 
-  SIMIX_mutex_lock(h->simdata->mutex);
+  SIMIX_mutex_lock(h_simdata->mutex);   //FIXME: lock the mailbox instead
 
   if (MSG_mailbox_get_cond(mailbox)) {
-    CRITICAL1("A process is already blocked on the channel %s",
-              MSG_mailbox_get_alias(mailbox));
+    CRITICAL1
+      ("A process is already blocked on the channel %s (meaning that someone is already doing a get on this)",
+       MSG_mailbox_get_alias(mailbox));
     SIMIX_cond_display_info(MSG_mailbox_get_cond(mailbox));
     xbt_die("Go fix your code!");
   }
 
   while (1) {
-    /* if the mailbox is empty (has no task */
+    /* if the mailbox is not empty (has a task) */
     if (!MSG_mailbox_is_empty(mailbox)) {
       if (!host) {
         /* pop the head of the mailbox */
@@ -203,8 +204,8 @@ MSG_mailbox_get_task_ext(msg_mailbox_t mailbox, m_task_t * task,
       }
     }
 
-    if ((timeout > 0) && (SIMIX_get_clock() - start_time >= timeout)) {
-      SIMIX_mutex_unlock(h->simdata->mutex);
+    if ((timeout > 0) && (SIMIX_get_clock() - start_time >= timeout)) { // Timeout already elapsed
+      SIMIX_mutex_unlock(h_simdata->mutex);
       MSG_mailbox_set_cond(mailbox, NULL);
       SIMIX_cond_destroy(cond);
       MSG_RETURN(MSG_TRANSFER_FAILURE);
@@ -216,12 +217,13 @@ MSG_mailbox_get_task_ext(msg_mailbox_t mailbox, m_task_t * task,
     }
 
     if (timeout > 0)
-      SIMIX_cond_wait_timeout(cond, h->simdata->mutex, timeout - start_time);
+      SIMIX_cond_wait_timeout(cond, h_simdata->mutex,
+                              timeout - start_time + SIMIX_get_clock());
     else
-      SIMIX_cond_wait(MSG_mailbox_get_cond(mailbox), h->simdata->mutex);
+      SIMIX_cond_wait(cond, h_simdata->mutex);
 
     if (SIMIX_host_get_state(h_simdata->smx_host) == 0) {
-      SIMIX_mutex_unlock(h->simdata->mutex);
+      SIMIX_mutex_unlock(h_simdata->mutex);
       MSG_mailbox_set_cond(mailbox, NULL);
       SIMIX_cond_destroy(cond);
       MSG_RETURN(MSG_HOST_FAILURE);
@@ -236,7 +238,7 @@ MSG_mailbox_get_task_ext(msg_mailbox_t mailbox, m_task_t * task,
     SIMIX_cond_destroy(cond);
   }
 
-  SIMIX_mutex_unlock(h->simdata->mutex);
+  SIMIX_mutex_unlock(h_simdata->mutex);
 
   t_simdata = t->simdata;
   t_simdata->receiver = process;
@@ -247,76 +249,51 @@ MSG_mailbox_get_task_ext(msg_mailbox_t mailbox, m_task_t * task,
   /* Transfer */
   /* create SIMIX action to the communication */
   t_simdata->comm =
-    SIMIX_action_communicate(t_simdata->sender->simdata->m_host->simdata->
-                             smx_host,
+    SIMIX_action_communicate(t_simdata->sender->simdata->m_host->
+                             simdata->smx_host,
                              process->simdata->m_host->simdata->smx_host,
                              t->name, t_simdata->message_size,
                              t_simdata->rate);
 
-  /* This is a hack. We know that both the receiver and the sender will
-     need to look at the content of t_simdata->comm. And it needs to be
-     destroyed. However, we don't known whether the receiver or the sender
-     will get to it first. So by setting with refcount to 2 we can enforce
-     that things happen correctly. An alternative would be to only do ++ and
-     -- on this refcount and to sprinkle them judiciously throughout the code,
-     which appears perhaps worse? Or perhaps the refcount field of
-     task->simdata can be used for this? At any rate, this will do for now */
-  t_simdata->comm->refcount = 2;
+  SIMIX_action_use(t_simdata->comm);
 
   /* if the process is suspend, create the action but stop its execution, it will be restart when the sender process resume */
   if (MSG_process_is_suspended(t_simdata->sender)) {
     DEBUG1("Process sender (%s) suspended", t_simdata->sender->name);
     SIMIX_action_set_priority(t_simdata->comm, 0);
   }
-
-  process->simdata->waiting_task = t;
   SIMIX_register_action_to_condition(t_simdata->comm, t_simdata->cond);
+  // breaking point if asynchrounous
+  process->simdata->waiting_action = t_simdata->comm;
 
   while (1) {
     SIMIX_cond_wait(t_simdata->cond, t_simdata->mutex);
 
     if (SIMIX_action_get_state(t_simdata->comm) != SURF_ACTION_RUNNING)
       break;
+    if (!SIMIX_host_get_state(h_simdata->smx_host))
+      break;
+    if (!SIMIX_host_get_state(process->simdata->m_host->simdata->smx_host))
+      break;
   }
 
   SIMIX_unregister_action_to_condition(t_simdata->comm, t_simdata->cond);
-  process->simdata->waiting_task = NULL;
-
-  /* If sender still around (it didn't free the comm yet), note that it's not waiting anymore */
-  if (t_simdata->comm->refcount == 2) {
-    t->simdata->sender->simdata->waiting_task = NULL;
-  }
+  process->simdata->waiting_action = NULL;
 
   /* for this process, don't need to change in get function */
   SIMIX_mutex_unlock(t_simdata->mutex);
 
-
   if (SIMIX_action_get_state(t_simdata->comm) == SURF_ACTION_DONE) {
-    if (t_simdata->comm->refcount == 1) {
-      SIMIX_action_destroy(t_simdata->comm);
+    if (SIMIX_action_destroy(t_simdata->comm))
       t_simdata->comm = NULL;
-    } else {
-      t_simdata->comm->refcount--;
-    }
-    t_simdata->refcount--;
     MSG_RETURN(MSG_OK);
   } else if (SIMIX_host_get_state(h_simdata->smx_host) == 0) {
-    if (t_simdata->comm->refcount == 1) {
-      SIMIX_action_destroy(t_simdata->comm);
+    if (SIMIX_action_destroy(t_simdata->comm))
       t_simdata->comm = NULL;
-    } else {
-      t_simdata->comm->refcount--;
-    }
-    t_simdata->refcount--;
     MSG_RETURN(MSG_HOST_FAILURE);
   } else {
-    if (t_simdata->comm->refcount == 1) {
-      SIMIX_action_destroy(t_simdata->comm);
+    if (SIMIX_action_destroy(t_simdata->comm))
       t_simdata->comm = NULL;
-    } else {
-      t_simdata->comm->refcount--;
-    }
-    t_simdata->refcount--;
     MSG_RETURN(MSG_TRANSFER_FAILURE);
   }
 }
@@ -360,7 +337,7 @@ MSG_mailbox_put_with_timeout(msg_mailbox_t mailbox, m_task_t task,
          t_simdata->message_size / 1000, local_host->name,
          remote_host->name, MSG_mailbox_get_alias(mailbox));
 
-  SIMIX_mutex_lock(remote_host->simdata->mutex);
+  SIMIX_mutex_lock(remote_host->simdata->mutex);        /* FIXME: lock the mailbox instead */
 
   /* put the task in the mailbox */
   xbt_fifo_push(mailbox->tasks, task);
@@ -374,7 +351,7 @@ MSG_mailbox_put_with_timeout(msg_mailbox_t mailbox, m_task_t task,
 
   SIMIX_mutex_lock(t_simdata->mutex);
 
-  process->simdata->waiting_task = task;
+  process->simdata->waiting_action = t_simdata->comm;   // for debugging and status displaying purpose
 
   if (timeout > 0) {
     xbt_ex_t e;
@@ -389,9 +366,14 @@ MSG_mailbox_put_with_timeout(msg_mailbox_t mailbox, m_task_t task,
         SIMIX_cond_wait_timeout(t_simdata->cond, t_simdata->mutex,
                                 timeout - time_elapsed);
 
-        if ((t_simdata->comm != NULL)
-            && (SIMIX_action_get_state(t_simdata->comm) !=
-                SURF_ACTION_RUNNING))
+        if (t_simdata->comm)
+          SIMIX_action_use(t_simdata->comm);
+        if (t_simdata->comm && (SIMIX_action_get_state(t_simdata->comm) !=
+                                SURF_ACTION_RUNNING))
+          break;
+        if (!SIMIX_host_get_state(local_host->simdata->smx_host))
+          break;
+        if (!SIMIX_host_get_state(remote_host->simdata->smx_host))
           break;
       }
     }
@@ -400,14 +382,15 @@ MSG_mailbox_put_with_timeout(msg_mailbox_t mailbox, m_task_t task,
         xbt_ex_free(e);
         /* verify if the timeout happened and the communication didn't started yet */
         if (t_simdata->comm == NULL) {
-          process->simdata->waiting_task = NULL;
+          DEBUG1("Action terminated %s (there was a timeout)", task->name);
+          process->simdata->waiting_action = NULL;
 
           /* remove the task from the mailbox */
           MSG_mailbox_remove(mailbox, task);
 
-          if (t_simdata->receiver && t_simdata->receiver->simdata) {    /* receiver still around */
-            t_simdata->receiver->simdata->waiting_task = NULL;
-          }
+/*           if (t_simdata->receiver && t_simdata->receiver->simdata) {    /\* receiver still around *\/ */
+/*             t_simdata->receiver->simdata->waiting_task = NULL; */
+/*           } */
 
           SIMIX_mutex_unlock(t_simdata->mutex);
           MSG_RETURN(MSG_TRANSFER_FAILURE);
@@ -417,46 +400,42 @@ MSG_mailbox_put_with_timeout(msg_mailbox_t mailbox, m_task_t task,
       }
     }
   } else {
-    while (1) {
+    while (1) {                 //FIXME: factorize with the code right above
       SIMIX_cond_wait(t_simdata->cond, t_simdata->mutex);
 
-      if (SIMIX_action_get_state(t_simdata->comm) != SURF_ACTION_RUNNING)
+      if (t_simdata->comm)
+        SIMIX_action_use(t_simdata->comm);
+      if (t_simdata->comm
+          && SIMIX_action_get_state(t_simdata->comm) != SURF_ACTION_RUNNING)
+        break;
+      if (!SIMIX_host_get_state(local_host->simdata->smx_host))
+        break;
+      if (!SIMIX_host_get_state(remote_host->simdata->smx_host))
         break;
     }
   }
 
   DEBUG1("Action terminated %s", task->name);
-  process->simdata->waiting_task = NULL;
-
-  if (t_simdata->comm->refcount == 2) { //receiver didn't free it yet: he's still around
-    t_simdata->receiver->simdata->waiting_task = NULL;
-  }
+  process->simdata->waiting_action = NULL;
+/*   if (t_simdata->receiver && t_simdata->receiver->simdata) {    /\* receiver still around *\/ */
+/*     t_simdata->receiver->simdata->waiting_task = NULL; */
+/*   } */
 
-  SIMIX_mutex_unlock(task->simdata->mutex);
+  SIMIX_mutex_unlock(t_simdata->mutex);
 
-  if (SIMIX_action_get_state(t_simdata->comm) == SURF_ACTION_DONE) {
-    if (t_simdata->comm->refcount == 1) {
-      SIMIX_action_destroy(t_simdata->comm);
+  if (t_simdata->comm
+      && SIMIX_action_get_state(t_simdata->comm) == SURF_ACTION_DONE) {
+    if (SIMIX_action_destroy(t_simdata->comm))
       t_simdata->comm = NULL;
-    } else {
-      t_simdata->comm->refcount--;
-    }
+    t_simdata->refcount--;
     MSG_RETURN(MSG_OK);
   } else if (SIMIX_host_get_state(local_host->simdata->smx_host) == 0) {
-    if (t_simdata->comm->refcount == 1) {
-      SIMIX_action_destroy(t_simdata->comm);
+    if (t_simdata->comm && SIMIX_action_destroy(t_simdata->comm))
       t_simdata->comm = NULL;
-    } else {
-      t_simdata->comm->refcount--;
-    }
     MSG_RETURN(MSG_HOST_FAILURE);
   } else {
-    if (t_simdata->comm->refcount == 1) {
-      SIMIX_action_destroy(t_simdata->comm);
+    if (t_simdata->comm && SIMIX_action_destroy(t_simdata->comm))
       t_simdata->comm = NULL;
-    } else {
-      t_simdata->comm->refcount--;
-    }
     MSG_RETURN(MSG_TRANSFER_FAILURE);
   }
 }