Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
don't destroy detached comm from the sender side during process cleanup
[simgrid.git] / src / simix / smx_process.c
index 0f70809..f816973 100644 (file)
@@ -49,6 +49,8 @@ void SIMIX_process_cleanup(smx_process_t process)
   XBT_DEBUG("Cleanup process %s (%p), waiting action %p",
       process->name, process, process->waiting_action);
 
+  SIMIX_process_on_exit_runall(process);
+
   /* cancel non-blocking communications */
   smx_action_t action;
   while ((action = xbt_fifo_pop(process->comms))) {
@@ -62,20 +64,12 @@ void SIMIX_process_cleanup(smx_process_t process)
           action, action->comm.detached, (int)action->state, action->comm.src_proc, action->comm.dst_proc);
       action->comm.src_proc = NULL;
 
-      if (action->comm.detached) {
-         if (action->comm.refcount == 0) {
-           XBT_DEBUG("Increase the refcount before destroying it since it's detached");
-           /* I'm not supposed to destroy a detached comm from the sender side,
-            * unless there is no receiver matching the rdv */
-           action->comm.refcount++;
-           SIMIX_comm_destroy(action);
-         }
-         else {
-           XBT_DEBUG("Don't destroy it since its refcount is %d", action->comm.refcount);
-         }
-      } else {
+      /* I'm not supposed to destroy a detached comm from the sender side, */
+      if (!action->comm.detached)
         SIMIX_comm_destroy(action);
-      }
+      else
+        XBT_DEBUG("Don't destroy it since it's a detached comm");
+      
     }
     else if (action->comm.dst_proc == process){
       XBT_DEBUG("Found an unfinished recv comm %p, state %d, src = %p, dst = %p",
@@ -101,7 +95,7 @@ void SIMIX_process_cleanup(smx_process_t process)
   process->context->iwannadie = 0;
 }
 
-/** 
+/**
  * Garbage collection
  *
  * Should be called some time to time to free the memory allocated for processes
@@ -137,18 +131,12 @@ void SIMIX_create_maestro_process()
   /* Create maestro process and intilialize it */
   maestro = xbt_new0(s_smx_process_t, 1);
   maestro->pid = simix_process_maxpid++;
+  maestro->ppid = -1;
   maestro->name = (char *) "";
   maestro->running_ctx = xbt_new(xbt_running_ctx_t, 1);
   XBT_RUNNING_CTX_INITIALIZE(maestro->running_ctx);
   maestro->context = SIMIX_context_new(NULL, 0, NULL, NULL, maestro);
   maestro->simcall.issuer = maestro;
-
-  if (SIMIX_process_self()) {
-    maestro->ppid = SIMIX_process_get_PID(SIMIX_process_self());
-  } else {
-    maestro->ppid = -1;
-  }
-
   simix_global->maestro_process = maestro;
   return;
 }
@@ -185,17 +173,17 @@ void SIMIX_process_stop(smx_process_t arg) {
 smx_process_t SIMIX_process_create_from_wrapper(smx_process_arg_t args) {
 
   smx_process_t process;
-  simix_global->create_process_function(
-      &process,
-      args->name,
-      args->code,
-      args->data,
-      args->hostname,
-      args->kill_time,
-      args->argc,
-      args->argv,
-      args->properties,
-      args->auto_restart);
+  simix_global->create_process_function(&process,
+                                        args->name,
+                                        args->code,
+                                        args->data,
+                                        args->hostname,
+                                        args->kill_time,
+                                        args->argc,
+                                        args->argv,
+                                        args->properties,
+                                        args->auto_restart,
+                                        NULL);
   xbt_free(args);
   return process;
 }
@@ -211,9 +199,9 @@ void SIMIX_pre_process_create(smx_simcall_t simcall,
                           int argc, char **argv,
                           xbt_dict_t properties,
                           int auto_restart){
-  SIMIX_process_create_with_parent(process, name, code, data, hostname,
-                              kill_time, argc, argv, properties, auto_restart,
-                              simcall->issuer);
+  SIMIX_process_create(process, name, code, data, hostname,
+                       kill_time, argc, argv, properties, auto_restart,
+                       simcall->issuer);
 }
 /**
  * \brief Internal function to create a process.
@@ -232,21 +220,9 @@ void SIMIX_process_create(smx_process_t *process,
                           double kill_time,
                           int argc, char **argv,
                           xbt_dict_t properties,
-                          int auto_restart) {
-  SIMIX_process_create_with_parent(process, name, code, data, hostname,
-                                   kill_time, argc, argv, properties, auto_restart, NULL);
-}
-
-void SIMIX_process_create_with_parent(smx_process_t *process,
-                                 const char *name,
-                                 xbt_main_func_t code,
-                                 void *data,
-                                 const char *hostname,
-                                 double kill_time,
-                                 int argc, char **argv,
-                                 xbt_dict_t properties,
-                                 int auto_restart,
-                                 smx_process_t parent_process) {
+                          int auto_restart,
+                          smx_process_t parent_process)
+{
   *process = NULL;
   smx_host_t host = SIMIX_host_get_by_name(hostname);
 
@@ -271,7 +247,7 @@ void SIMIX_process_create_with_parent(smx_process_t *process,
     (*process)->data = data;
     (*process)->comms = xbt_fifo_new();
     (*process)->simcall.issuer = *process;
-    
+
      if (parent_process) {
        (*process)->ppid = SIMIX_process_get_PID(parent_process);
      } else {
@@ -309,10 +285,8 @@ void SIMIX_process_create_with_parent(smx_process_t *process,
     xbt_swag_insert(*process, simix_global->process_list);
     XBT_DEBUG("Inserting %s(%s) in the to_run list", (*process)->name, sg_host_name(host));
     xbt_dynar_push_as(simix_global->process_to_run, smx_process_t, *process);
-  }
 
-  if (kill_time > SIMIX_get_clock()) {
-    if (simix_global->kill_process_function) {
+    if (kill_time > SIMIX_get_clock() && simix_global->kill_process_function) {
       XBT_DEBUG("Process %s(%s) will be kill at time %f", (*process)->name,
           sg_host_name((*process)->smx_host), kill_time);
       SIMIX_timer_set(kill_time, simix_global->kill_process_function, *process);
@@ -380,6 +354,10 @@ void SIMIX_process_kill(smx_process_t process, smx_process_t issuer) {
       SIMIX_process_sleep_destroy(process->waiting_action);
       break;
 
+    case SIMIX_ACTION_JOIN:
+      SIMIX_process_sleep_destroy(process->waiting_action);
+      break;
+
     case SIMIX_ACTION_SYNCHRO:
       SIMIX_synchro_stop_waiting(process, &process->simcall);
       SIMIX_synchro_destroy(process->waiting_action);
@@ -536,7 +514,7 @@ void SIMIX_process_resume(smx_process_t process, smx_process_t issuer)
 
       switch (process->waiting_action->type) {
 
-        case SIMIX_ACTION_EXECUTE:          
+        case SIMIX_ACTION_EXECUTE:
         case SIMIX_ACTION_PARALLEL_EXECUTE:
           SIMIX_host_execution_resume(process->waiting_action);
           break;
@@ -599,7 +577,7 @@ int SIMIX_process_get_PPID(smx_process_t self){
 }
 
 void* SIMIX_pre_process_self_get_data(smx_simcall_t simcall, smx_process_t self){
-  return SIMIX_process_self_get_data(self);    
+  return SIMIX_process_self_get_data(self);
 }
 
 void* SIMIX_process_self_get_data(smx_process_t self)
@@ -691,6 +669,44 @@ xbt_dict_t SIMIX_process_get_properties(smx_process_t process)
   return process->properties;
 }
 
+void SIMIX_pre_process_join(smx_simcall_t simcall, smx_process_t process, double timeout)
+{
+  smx_action_t action = SIMIX_process_join(simcall->issuer, process, timeout);
+  xbt_fifo_push(action->simcalls, simcall);
+  simcall->issuer->waiting_action = action;
+}
+
+static int SIMIX_process_join_finish(smx_process_exit_status_t status, smx_action_t action){
+  if (action->sleep.surf_sleep) {
+    surf_action_cancel(action->sleep.surf_sleep);
+
+    smx_simcall_t simcall;
+    while ((simcall = xbt_fifo_shift(action->simcalls))) {
+      simcall_process_sleep__set__result(simcall, SIMIX_DONE);
+      simcall->issuer->waiting_action = NULL;
+      if (simcall->issuer->suspended) {
+        XBT_DEBUG("Wait! This process is suspended and can't wake up now.");
+        simcall->issuer->suspended = 0;
+        SIMIX_pre_process_suspend(simcall, simcall->issuer);
+      } else {
+        SIMIX_simcall_answer(simcall);
+      }
+    }
+    surf_action_unref(action->sleep.surf_sleep);
+    action->sleep.surf_sleep = NULL;
+  }
+  xbt_mallocator_release(simix_global->action_mallocator, action);
+  return 0;
+}
+
+smx_action_t SIMIX_process_join(smx_process_t issuer, smx_process_t process, double timeout)
+{
+  smx_action_t res = SIMIX_process_sleep(issuer, timeout);
+  res->type = SIMIX_ACTION_JOIN;
+  SIMIX_process_on_exit(process, (int_f_pvoid_pvoid_t)SIMIX_process_join_finish, res);
+  return res;
+}
+
 void SIMIX_pre_process_sleep(smx_simcall_t simcall, double duration)
 {
   if (MC_is_active()) {
@@ -736,7 +752,7 @@ void SIMIX_post_process_sleep(smx_action_t action)
 {
   smx_simcall_t simcall;
   e_smx_state_t state;
-  xbt_assert(action->type == SIMIX_ACTION_SLEEP);
+  xbt_assert(action->type == SIMIX_ACTION_SLEEP || action->type == SIMIX_ACTION_JOIN);
 
   while ((simcall = xbt_fifo_shift(action->simcalls))) {
 
@@ -760,7 +776,13 @@ void SIMIX_post_process_sleep(smx_action_t action)
     }
     simcall_process_sleep__set__result(simcall, state);
     simcall->issuer->waiting_action = NULL;
-    SIMIX_simcall_answer(simcall);
+    if (simcall->issuer->suspended) {
+      XBT_DEBUG("Wait! This process is suspended and can't wake up now.");
+      simcall->issuer->suspended = 0;
+      SIMIX_pre_process_suspend(simcall, simcall->issuer);
+    } else {
+      SIMIX_simcall_answer(simcall);
+    }
   }
 
   SIMIX_process_sleep_destroy(action);
@@ -769,11 +791,14 @@ void SIMIX_post_process_sleep(smx_action_t action)
 void SIMIX_process_sleep_destroy(smx_action_t action)
 {
   XBT_DEBUG("Destroy action %p", action);
-  xbt_assert(action->type == SIMIX_ACTION_SLEEP);
+  xbt_assert(action->type == SIMIX_ACTION_SLEEP || action->type == SIMIX_ACTION_JOIN);
 
-  if (action->sleep.surf_sleep)
+  if (action->sleep.surf_sleep) {
     surf_action_unref(action->sleep.surf_sleep);
-  xbt_mallocator_release(simix_global->action_mallocator, action);
+    action->sleep.surf_sleep = NULL;
+  }
+  if (action->type == SIMIX_ACTION_SLEEP)
+    xbt_mallocator_release(simix_global->action_mallocator, action);
 }
 
 void SIMIX_process_sleep_suspend(smx_action_t action)
@@ -784,11 +809,12 @@ void SIMIX_process_sleep_suspend(smx_action_t action)
 
 void SIMIX_process_sleep_resume(smx_action_t action)
 {
+  XBT_DEBUG("Action state is %d on process_sleep_resume.", action->state);
   xbt_assert(action->type == SIMIX_ACTION_SLEEP);
   surf_action_resume(action->sleep.surf_sleep);
 }
 
-/** 
+/**
  * \brief Calling this function makes the process to yield.
  *
  * Only the current process can call this function, giving back the control to
@@ -816,10 +842,11 @@ void SIMIX_process_yield(smx_process_t self)
     SIMIX_process_stop(self);
   }
 
-  if(self->suspended) {
+  if (self->suspended) {
+    XBT_DEBUG("Hey! I'm suspended.");
     xbt_assert(!self->doexception, "Gloups! This exception may be lost by subsequent calls.");
     self->suspended = 0;
-    SIMIX_process_suspend(self,self);
+    SIMIX_process_suspend(self, self);
   }
 
   if (self->doexception) {
@@ -886,19 +913,20 @@ xbt_dynar_t SIMIX_processes_as_dynar(void) {
 
 void SIMIX_process_on_exit_runall(smx_process_t process) {
   s_smx_process_exit_fun_t exit_fun;
-
+  smx_process_exit_status_t exit_status = (process->context->iwannadie) ?
+                                         SMX_EXIT_FAILURE : SMX_EXIT_SUCCESS;
   while (!xbt_dynar_is_empty(process->on_exit)) {
     exit_fun = xbt_dynar_pop_as(process->on_exit,s_smx_process_exit_fun_t);
-    (exit_fun.fun)(exit_fun.arg);
+    (exit_fun.fun)((void*)exit_status, exit_fun.arg);
   }
 }
 
 void SIMIX_pre_process_on_exit(smx_simcall_t simcall, smx_process_t process,
-                              int_f_pvoid_t fun, void *data) {
+                              int_f_pvoid_pvoid_t fun, void *data) {
   SIMIX_process_on_exit(process, fun, data);
 }
 
-void SIMIX_process_on_exit(smx_process_t process, int_f_pvoid_t fun, void *data) {
+void SIMIX_process_on_exit(smx_process_t process, int_f_pvoid_pvoid_t fun, void *data) {
   xbt_assert(process, "current process not found: are you in maestro context ?");
 
   if (!process->on_exit) {
@@ -912,7 +940,7 @@ void SIMIX_process_on_exit(smx_process_t process, int_f_pvoid_t fun, void *data)
 
 void SIMIX_pre_process_auto_restart_set(smx_simcall_t simcall, smx_process_t process,
                                        int auto_restart) {
-  SIMIX_process_auto_restart_set(process, auto_restart);       
+  SIMIX_process_auto_restart_set(process, auto_restart);
 }
 /**
  * \brief Sets the auto-restart status of the process.
@@ -924,7 +952,7 @@ void SIMIX_process_auto_restart_set(smx_process_t process, int auto_restart) {
 }
 
 smx_process_t SIMIX_pre_process_restart(smx_simcall_t simcall, smx_process_t process) {
-  return SIMIX_process_restart(process, simcall->issuer);      
+  return SIMIX_process_restart(process, simcall->issuer);
 }
 /**
  * \brief Restart a process.
@@ -962,19 +990,19 @@ smx_process_t SIMIX_process_restart(smx_process_t process, smx_process_t issuer)
                                           arg.argc,
                                           arg.argv,
                                           arg.properties,
-                                          arg.auto_restart);
-  }
-  else {
+                                          arg.auto_restart,
+                                          NULL);
+  else {
     simcall_process_create(&new_process,
-                                          arg.argv[0],
-                                          arg.code,
-                                          arg.data,
-                                          arg.hostname,
-                                          arg.kill_time,
-                                          arg.argc,
-                                          arg.argv,
-                                          arg.properties,
-                                          arg.auto_restart);
+                           arg.argv[0],
+                           arg.code,
+                           arg.data,
+                           arg.hostname,
+                           arg.kill_time,
+                           arg.argc,
+                           arg.argv,
+                           arg.properties,
+                           arg.auto_restart);
 
   }
   return new_process;