SIMIX_display_process_status();
THROWF(arg_error, 0, "%s", msg);
}
-
+ xbt_dynar_free(&host->auto_restart_processes);
xbt_swag_free(host->process_list);
/* Clean host structure */
void **host = NULL;
xbt_lib_foreach(host_lib, cursor, name, host){
- if(host[SIMIX_HOST_LEVEL])
+ if(host[SIMIX_HOST_LEVEL])
xbt_dict_set(host_dict,name,host[SIMIX_HOST_LEVEL], NULL);
}
return host_dict;
return host->data;
}
+void _SIMIX_host_free_process_arg(void *);
+void _SIMIX_host_free_process_arg(void *data) {
+ smx_process_arg_t arg = *(void**)data;
+ int i;
+ xbt_free(arg->name);
+ for (i = 0; i < arg->argc; i++) {
+ xbt_free(arg->argv[i]);
+ }
+ xbt_free(arg->argv);
+ xbt_free(arg);
+}
+void SIMIX_host_add_auto_restart_process(smx_host_t host,
+ const char *name,
+ xbt_main_func_t code,
+ void *data,
+ const char *hostname,
+ double kill_time,
+ int argc, char **argv,
+ xbt_dict_t properties,
+ int auto_restart) {
+ if (!host->auto_restart_processes) {
+ host->auto_restart_processes = xbt_dynar_new(sizeof(smx_process_arg_t),_SIMIX_host_free_process_arg);
+ }
+ smx_process_arg_t arg = xbt_new(s_smx_process_arg_t,1);
+
+ arg->name = xbt_strdup(name);
+ arg->code = code;
+ arg->data = data;
+ arg->hostname = hostname;
+ arg->kill_time = kill_time;
+ arg->argc = argc;
+ arg->argv = xbt_new(char*,argc + 1);
+
+ int i;
+ for (i = 0; i < argc; i++) {
+ arg->argv[i] = xbt_strdup(argv[i]);
+ }
+
+ arg->properties = properties;
+ arg->auto_restart = auto_restart;
+
+ xbt_dynar_push_as(host->auto_restart_processes,smx_process_arg_t,arg);
+}
+
void SIMIX_host_set_data(smx_host_t host, void *data)
{
if (!MC_IS_ENABLED) {
action->execution.surf_exec =
surf_workstation_model->extension.workstation.execute(host->host,
- computation_amount);
+ computation_amount);
surf_workstation_model->action_data_set(action->execution.surf_exec, action);
surf_workstation_model->set_priority(action->execution.surf_exec, priority);
}
action->execution.surf_exec =
surf_workstation_model->extension.workstation.
execute_parallel_task(host_nb, workstation_list, computation_amount,
- communication_amount, amount, rate);
+ communication_amount, rate);
surf_workstation_model->action_data_set(action->execution.surf_exec, action);
}
void SIMIX_host_execution_destroy(smx_action_t action)
{
+ int destroyed=0;
XBT_DEBUG("Destroy action %p", action);
- xbt_free(action->name);
if (action->execution.surf_exec) {
- surf_workstation_model->action_unref(action->execution.surf_exec);
+ destroyed = surf_workstation_model->action_unref(action->execution.surf_exec);
action->execution.surf_exec = NULL;
}
- xbt_mallocator_release(simix_global->action_mallocator, action);
+ if (destroyed) {
+ xbt_free(action->name);
+ xbt_mallocator_release(simix_global->action_mallocator, action);
+ }
}
void SIMIX_host_execution_cancel(smx_action_t action)
void SIMIX_host_execution_resume(smx_action_t action)
{
if(action->execution.surf_exec)
- surf_workstation_model->suspend(action->execution.surf_exec);
+ surf_workstation_model->resume(action->execution.surf_exec);
}
void SIMIX_execution_finish(smx_action_t action)
case SIMIX_DONE:
/* do nothing, action done */
- XBT_DEBUG("SIMIX_execution_finished: execution successful");
+ XBT_DEBUG("SIMIX_execution_finished: execution successful");
break;
case SIMIX_FAILED:
XBT_DEBUG("SIMIX_execution_finished: host '%s' failed", simcall->issuer->smx_host->name);
- SMX_EXCEPTION(simcall->issuer, host_error, 0, "Host failed");
+ simcall->issuer->context->iwannadie = 1;
+ //SMX_EXCEPTION(simcall->issuer, host_error, 0, "Host failed");
break;
case SIMIX_CANCELED:
xbt_die("Internal error in SIMIX_execution_finish: unexpected action state %d",
(int)action->state);
}
- simcall->issuer->waiting_action = NULL;
+ /* check if the host is down */
+ if (surf_workstation_model->extension.
+ workstation.get_state(simcall->issuer->smx_host->host) != SURF_RESOURCE_ON) {
+ simcall->issuer->context->iwannadie = 1;
+ }
+
+ simcall->issuer->waiting_action = NULL;
simcall->host_execution_wait.result = action->state;
SIMIX_simcall_answer(simcall);
}
void SIMIX_post_host_execute(smx_action_t action)
{
- /* FIXME: check if the host running the action failed or not*/
- /*if(surf_workstation_model->extension.workstation.get_state(action->host->host))*/
-
- /* If the host running the action didn't fail, then the action was canceled */
- if (surf_workstation_model->action_state_get(action->execution.surf_exec) == SURF_ACTION_FAILED)
- action->state = SIMIX_CANCELED;
- else
- action->state = SIMIX_DONE;
+ if (action->type == SIMIX_ACTION_EXECUTE && /* FIMXE: handle resource failure
+ * for parallel tasks too */
+ surf_workstation_model->extension.workstation.get_state(action->execution.host->host) == SURF_RESOURCE_OFF) {
+ /* If the host running the action failed, notice it so that the asking
+ * process can be killed if it runs on that host itself */
+ action->state = SIMIX_FAILED;
+ } else if (surf_workstation_model->action_state_get(action->execution.surf_exec) == SURF_ACTION_FAILED) {
+ /* If the host running the action didn't fail, then the action was
+ * canceled */
+ action->state = SIMIX_CANCELED;
+ } else {
+ action->state = SIMIX_DONE;
+ }
if (action->execution.surf_exec) {
surf_workstation_model->action_unref(action->execution.surf_exec);