X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/272ccad1b68b6d9c17069f3c934886925bb15b5d..9d7e857328377861f81e95fbc3138032b51b7da9:/src/simix/smx_host.c diff --git a/src/simix/smx_host.c b/src/simix/smx_host.c index 299864a5f9..f663ea260a 100644 --- a/src/simix/smx_host.c +++ b/src/simix/smx_host.c @@ -67,7 +67,7 @@ void SIMIX_host_destroy(void *h) SIMIX_display_process_status(); THROWF(arg_error, 0, "%s", msg); } - + xbt_dynar_free(&host->auto_restart_processes); xbt_swag_free(host->process_list); /* Clean host structure */ @@ -176,6 +176,50 @@ void* SIMIX_host_get_data(smx_host_t host) return host->data; } +void _SIMIX_host_free_process_arg(void *); +void _SIMIX_host_free_process_arg(void *data) { + smx_process_arg_t arg = *(void**)data; + int i; + xbt_free(arg->name); + for (i = 0; i < arg->argc; i++) { + xbt_free(arg->argv[i]); + } + xbt_free(arg->argv); + xbt_free(arg); +} +void SIMIX_host_add_auto_restart_process(smx_host_t host, + const char *name, + xbt_main_func_t code, + void *data, + const char *hostname, + double kill_time, + int argc, char **argv, + xbt_dict_t properties, + int auto_restart) { + if (!host->auto_restart_processes) { + host->auto_restart_processes = xbt_dynar_new(sizeof(smx_process_arg_t),_SIMIX_host_free_process_arg); + } + smx_process_arg_t arg = xbt_new(s_smx_process_arg_t,1); + + arg->name = xbt_strdup(name); + arg->code = code; + arg->data = data; + arg->hostname = hostname; + arg->kill_time = kill_time; + arg->argc = argc; + arg->argv = xbt_new(char*,argc + 1); + + int i; + for (i = 0; i < argc; i++) { + arg->argv[i] = xbt_strdup(argv[i]); + } + + arg->properties = properties; + arg->auto_restart = auto_restart; + + xbt_dynar_push_as(host->auto_restart_processes,smx_process_arg_t,arg); +} + void SIMIX_host_set_data(smx_host_t host, void *data) { @@ -348,13 +392,8 @@ void SIMIX_execution_finish(smx_action_t action) case SIMIX_FAILED: XBT_DEBUG("SIMIX_execution_finished: host '%s' failed", simcall->issuer->smx_host->name); - if (simcall->issuer->smx_host == action->execution.host) { - // add a reference to the action that will be destroyed when the killed process is cleaned up, and by the end of the current function - surf_action_ref(action->execution.surf_exec); - SIMIX_process_kill(simcall->issuer); - } else { - SMX_EXCEPTION(simcall->issuer, host_error, 0, "Host failed"); - } + simcall->issuer->context->iwannadie = 1; + //SMX_EXCEPTION(simcall->issuer, host_error, 0, "Host failed"); break; case SIMIX_CANCELED: @@ -366,7 +405,13 @@ void SIMIX_execution_finish(smx_action_t action) xbt_die("Internal error in SIMIX_execution_finish: unexpected action state %d", (int)action->state); } - simcall->issuer->waiting_action = NULL; + /* check if the host is down */ + if (surf_workstation_model->extension. + workstation.get_state(simcall->issuer->smx_host->host) != SURF_RESOURCE_ON) { + simcall->issuer->context->iwannadie = 1; + } + + simcall->issuer->waiting_action = NULL; simcall->host_execution_wait.result = action->state; SIMIX_simcall_answer(simcall); } @@ -377,14 +422,18 @@ void SIMIX_execution_finish(smx_action_t action) void SIMIX_post_host_execute(smx_action_t action) { - if (surf_workstation_model->extension.workstation.get_state(action->execution.host->host)==SURF_RESOURCE_OFF) { - /* if the host running the action failed, notice it so that the asking process can be killed if it runs on that host itself */ + if (action->type == SIMIX_ACTION_EXECUTE && /* FIMXE: handle resource failure + * for parallel tasks too */ + surf_workstation_model->extension.workstation.get_state(action->execution.host->host) == SURF_RESOURCE_OFF) { + /* If the host running the action failed, notice it so that the asking + * process can be killed if it runs on that host itself */ action->state = SIMIX_FAILED; } else if (surf_workstation_model->action_state_get(action->execution.surf_exec) == SURF_ACTION_FAILED) { - /* If the host running the action didn't fail, then the action was canceled */ - action->state = SIMIX_CANCELED; + /* If the host running the action didn't fail, then the action was + * canceled */ + action->state = SIMIX_CANCELED; } else { - action->state = SIMIX_DONE; + action->state = SIMIX_DONE; } if (action->execution.surf_exec) {