Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Fix possible crashes and leaks with dsends during processes cleanup
[simgrid.git] / src / simix / smx_process.c
index 0b150c8..f2f550c 100644 (file)
@@ -46,28 +46,38 @@ void SIMIX_process_cleanup(smx_process_t process)
     SIMIX_comm_cancel(action);
 
     if (action->comm.src_proc == process) {
-      XBT_DEBUG("Found an unfinished send comm %p (detached = %d), state %d",
-          action, action->comm.detached, action->state);
+      XBT_DEBUG("Found an unfinished send comm %p (detached = %d), state %d, src = %p, dst = %p",
+          action, action->comm.detached, action->state, action->comm.src_proc, action->comm.dst_proc);
       action->comm.src_proc = NULL;
 
       if (action->comm.detached) {
-        /* the receiver was supposed to destroy the comm after completion,
-         * but the comm will actually never finish */
-        action->comm.refcount++;
+         if (action->comm.refcount == 0) {
+           /* I'm not supposed to destroy a detached comm from the sender side,
+            * unless there is no receiver matching the rdv */
+           action->comm.refcount++;
+           SIMIX_comm_destroy(action);
+         }
+      }
+      else {
+        SIMIX_comm_destroy(action);
       }
     }
     else if (action->comm.dst_proc == process){
-      XBT_DEBUG("Found an unfinished recv comm %p, state %d", action, action->state);
+      XBT_DEBUG("Found an unfinished recv comm %p, state %d, src = %p, dst = %p",
+          action, action->state, action->comm.src_proc, action->comm.dst_proc);
       action->comm.dst_proc = NULL;
+
+      if (action->comm.detached && action->comm.refcount == 1
+          && action->comm.src_proc != NULL) {
+        /* the comm will be freed right now, remove it from the sender */
+        xbt_fifo_remove(action->comm.src_proc->comms, action);
+      }
+      SIMIX_comm_destroy(action);
     }
     else {
-      XBT_DEBUG("Strange, I'm not in comm %p, state = %d, src = %p, dst = %p", action,
-          action->state, action->comm.src_proc, action->comm.dst_proc);
-      THROW_IMPOSSIBLE;
+      xbt_die("Communication action %p is in my list but I'm not the sender "
+          "or the receiver", action);
     }
-
-    /* FIXME uncommenting this instruction crashes complex simulations
-    SIMIX_comm_destroy(action); */
   }
 
   /*xbt_swag_remove(process, simix_global->process_to_run);*/
@@ -214,7 +224,8 @@ void SIMIX_process_create(smx_process_t *process,
  */
 void SIMIX_process_runall(void)
 {
-  SIMIX_context_runall(simix_global->process_to_run);
+  SIMIX_context_runall();
+
   xbt_dynar_t tmp = simix_global->process_that_ran;
   simix_global->process_that_ran = simix_global->process_to_run;
   simix_global->process_to_run = tmp;
@@ -316,10 +327,17 @@ void SIMIX_pre_process_suspend(smx_req_t req)
 
 void SIMIX_process_suspend(smx_process_t process, smx_process_t issuer)
 {
+  xbt_assert((process != NULL), "Invalid parameters");
+
+  if (process->suspended) {
+    XBT_DEBUG("Process '%s' is already suspended", process->name);
+    return;
+  }
+
   process->suspended = 1;
 
   /* If we are suspending another process, and it is waiting on an action,
-     suspend it's action. */
+     suspend its action. */
   if (process != issuer) {
 
     if (process->waiting_action) {
@@ -340,7 +358,8 @@ void SIMIX_process_suspend(smx_process_t process, smx_process_t issuer)
           break;
 
         default:
-          THROW_IMPOSSIBLE;
+          xbt_die("Internal error in SIMIX_process_suspend: unexpected action type %d",
+              process->waiting_action->type);
       }
     }
   }
@@ -350,6 +369,11 @@ void SIMIX_process_resume(smx_process_t process, smx_process_t issuer)
 {
   xbt_assert((process != NULL), "Invalid parameters");
 
+  if (!process->suspended) {
+    XBT_DEBUG("Process '%s' is not suspended", process->name);
+    return;
+  }
+
   process->suspended = 0;
 
   /* If we are resuming another process, resume the action it was waiting for
@@ -374,7 +398,8 @@ void SIMIX_process_resume(smx_process_t process, smx_process_t issuer)
           break;
 
         default:
-          THROW_IMPOSSIBLE;
+          xbt_die("Internal error in SIMIX_process_resume: unexpected action type %d",
+              process->waiting_action->type);
       }
     }
     else {
@@ -560,7 +585,7 @@ void SIMIX_process_yield(void)
   SIMIX_context_suspend(self->context);
 
   /* Ok, maestro returned control to us */
-  XBT_DEBUG("Maestro returned control to me: '%s'", self->name);
+  XBT_DEBUG("Control returned to me: '%s'", self->name);
 
   if (self->context->iwannadie){
     XBT_DEBUG("I wanna die!");