Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Fix issue when a host failed while executing a task (the glass is only half full...
[simgrid.git] / src / simix / smx_host.c
index d3be3ca..815d947 100644 (file)
@@ -90,7 +90,7 @@ xbt_dict_t SIMIX_host_get_dict(void)
   void **host = NULL;
 
   xbt_lib_foreach(host_lib, cursor, name, host){
-         if(host[SIMIX_HOST_LEVEL])
+    if(host[SIMIX_HOST_LEVEL])
             xbt_dict_set(host_dict,name,host[SIMIX_HOST_LEVEL], NULL);
   }
   return host_dict;
@@ -204,7 +204,7 @@ smx_action_t SIMIX_host_execute(const char *name, smx_host_t host,
   if (!MC_IS_ENABLED) {
     action->execution.surf_exec =
       surf_workstation_model->extension.workstation.execute(host->host,
-         computation_amount);
+    computation_amount);
     surf_workstation_model->action_data_set(action->execution.surf_exec, action);
     surf_workstation_model->set_priority(action->execution.surf_exec, priority);
   }
@@ -243,7 +243,7 @@ smx_action_t SIMIX_host_parallel_execute( const char *name,
     action->execution.surf_exec =
       surf_workstation_model->extension.workstation.
       execute_parallel_task(host_nb, workstation_list, computation_amount,
-                           communication_amount, amount, rate);
+                      communication_amount, rate);
 
     surf_workstation_model->action_data_set(action->execution.surf_exec, action);
   }
@@ -254,16 +254,19 @@ smx_action_t SIMIX_host_parallel_execute( const char *name,
 
 void SIMIX_host_execution_destroy(smx_action_t action)
 {
+  int destroyed=0;
   XBT_DEBUG("Destroy action %p", action);
 
-  xbt_free(action->name);
 
   if (action->execution.surf_exec) {
-    surf_workstation_model->action_unref(action->execution.surf_exec);
+    destroyed = surf_workstation_model->action_unref(action->execution.surf_exec);
     action->execution.surf_exec = NULL;
   }
 
-  xbt_mallocator_release(simix_global->action_mallocator, action);
+  if (destroyed) {
+    xbt_free(action->name);
+    xbt_mallocator_release(simix_global->action_mallocator, action);
+  }
 }
 
 void SIMIX_host_execution_cancel(smx_action_t action)
@@ -340,7 +343,7 @@ void SIMIX_execution_finish(smx_action_t action)
 
       case SIMIX_DONE:
         /* do nothing, action done */
-       XBT_DEBUG("SIMIX_execution_finished: execution successful");
+  XBT_DEBUG("SIMIX_execution_finished: execution successful");
         break;
 
       case SIMIX_FAILED:
@@ -368,14 +371,15 @@ void SIMIX_execution_finish(smx_action_t action)
 
 void SIMIX_post_host_execute(smx_action_t action)
 {
-  /* FIXME: check if the host running the action failed or not*/
-  /*if(surf_workstation_model->extension.workstation.get_state(action->host->host))*/
-
-  /* If the host running the action didn't fail, then the action was canceled */
-  if (surf_workstation_model->action_state_get(action->execution.surf_exec) == SURF_ACTION_FAILED)
+  if (surf_workstation_model->extension.workstation.get_state(action->execution.host->host)==SURF_RESOURCE_OFF) {
+    /* if the host running the action failed, notice it so that the asking process can be killed if it runs on that host itself */
+    action->state = SIMIX_FAILED;
+  } else if (surf_workstation_model->action_state_get(action->execution.surf_exec) == SURF_ACTION_FAILED) {
+    /* If the host running the action didn't fail, then the action was canceled */
      action->state = SIMIX_CANCELED;
-  else
+  } else {
      action->state = SIMIX_DONE;
+  }
 
   if (action->execution.surf_exec) {
     surf_workstation_model->action_unref(action->execution.surf_exec);