Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Create a dict with all hosts with state SURF_RESOURCE_OFF during a SIMIX_process_stop.
[simgrid.git] / src / simix / smx_host.c
index c54c9fb..b771e85 100644 (file)
@@ -13,6 +13,7 @@
 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(simix_host, simix,
                                 "Logging specific to SIMIX (hosts)");
 
+xbt_dict_t watched_hosts_lib;
 
 static void SIMIX_execution_finish(smx_action_t action);
 
@@ -217,6 +218,11 @@ void SIMIX_host_add_auto_restart_process(smx_host_t host,
   arg->properties = properties;
   arg->auto_restart = auto_restart;
 
+  if( SIMIX_host_get_state(host) == SURF_RESOURCE_OFF
+      && !xbt_dict_get_or_null(watched_hosts_lib,host->name)){
+    xbt_dict_set(watched_hosts_lib,host->name,host,NULL);
+    XBT_DEBUG("Have push host %s to watched_hosts_lib because state == SURF_RESOURCE_OFF",host->name);
+  }
   xbt_dynar_push_as(host->auto_restart_processes,smx_process_arg_t,arg);
 }
 
@@ -422,14 +428,18 @@ void SIMIX_execution_finish(smx_action_t action)
 
 void SIMIX_post_host_execute(smx_action_t action)
 {
-  if (surf_workstation_model->extension.workstation.get_state(action->execution.host->host)==SURF_RESOURCE_OFF) {
-    /* if the host running the action failed, notice it so that the asking process can be killed if it runs on that host itself */
+  if (action->type == SIMIX_ACTION_EXECUTE && /* FIMXE: handle resource failure
+                                               * for parallel tasks too */
+      surf_workstation_model->extension.workstation.get_state(action->execution.host->host) == SURF_RESOURCE_OFF) {
+    /* If the host running the action failed, notice it so that the asking
+     * process can be killed if it runs on that host itself */
     action->state = SIMIX_FAILED;
   } else if (surf_workstation_model->action_state_get(action->execution.surf_exec) == SURF_ACTION_FAILED) {
-    /* If the host running the action didn't fail, then the action was canceled */
-     action->state = SIMIX_CANCELED;
+    /* If the host running the action didn't fail, then the action was
+     * canceled */
+    action->state = SIMIX_CANCELED;
   } else {
-     action->state = SIMIX_DONE;
+    action->state = SIMIX_DONE;
   }
 
   if (action->execution.surf_exec) {