From e631fc4ebfccaf24508e14216a9138669a635c6a Mon Sep 17 00:00:00 2001 From: Samuel Lepetit Date: Wed, 27 Jun 2012 16:29:56 +0200 Subject: [PATCH 1/1] First bricks for auto_restart support in SIMIX/MSG. --- .../masterslave/deployment_masterslave.xml | 10 +-- include/msg/msg.h | 1 + include/simgrid/simix.h | 8 ++- src/gras/Virtu/sg_process.c | 2 +- src/msg/msg_private.h | 2 +- src/msg/msg_process.c | 18 +++++- src/simix/smx_context_base.c | 1 - src/simix/smx_deployment.c | 9 ++- src/simix/smx_host.c | 46 ++++++++++++- src/simix/smx_host_private.h | 12 ++++ src/simix/smx_process.c | 64 +++++++++++++++---- src/simix/smx_process_private.h | 31 ++++++--- src/simix/smx_smurf.c | 7 +- src/simix/smx_smurf_private.h | 7 ++ src/simix/smx_user.c | 21 +++++- src/xbt/xbt_sg_synchro.c | 2 +- 16 files changed, 197 insertions(+), 44 deletions(-) diff --git a/examples/msg/masterslave/deployment_masterslave.xml b/examples/msg/masterslave/deployment_masterslave.xml index 992d8f00cc..18b7e5d1cc 100644 --- a/examples/msg/masterslave/deployment_masterslave.xml +++ b/examples/msg/masterslave/deployment_masterslave.xml @@ -13,9 +13,9 @@ - - - - - + + + + + diff --git a/include/msg/msg.h b/include/msg/msg.h index 8194d19eb3..4d256962ab 100644 --- a/include/msg/msg.h +++ b/include/msg/msg.h @@ -156,6 +156,7 @@ XBT_PUBLIC(MSG_error_t) MSG_process_suspend(m_process_t process); XBT_PUBLIC(MSG_error_t) MSG_process_resume(m_process_t process); XBT_PUBLIC(int) MSG_process_is_suspended(m_process_t process); XBT_PUBLIC(void) MSG_process_on_exit(int_f_pvoid_t fun, void *data); +XBT_PUBLIC(void) MSG_process_auto_restart_set(m_process_t process, int auto_restart); /************************** Task handling ************************************/ XBT_PUBLIC(m_task_t) MSG_task_create(const char *name, diff --git a/include/simgrid/simix.h b/include/simgrid/simix.h index bdfa009e86..d61c83375c 100644 --- a/include/simgrid/simix.h +++ b/include/simgrid/simix.h @@ -96,7 +96,8 @@ typedef void (*smx_creation_func_t) ( /* process */ smx_process_t*, /* kill_time */ double, /* argc */ int, /* argv */ char**, - /* props */ xbt_dict_t); + /* props */ xbt_dict_t, + /* auto_restart */ int); /******************************* Networking ***********************************/ @@ -314,7 +315,8 @@ XBT_PUBLIC(void) simcall_process_create(smx_process_t *process, const char *hostname, double kill_time, int argc, char **argv, - xbt_dict_t properties); + xbt_dict_t properties, + int auto_restart); XBT_PUBLIC(void) simcall_process_kill(smx_process_t process); XBT_PUBLIC(void) simcall_process_killall(void); @@ -336,7 +338,7 @@ XBT_PUBLIC(int) simcall_process_is_suspended(smx_process_t process); XBT_PUBLIC(xbt_dict_t) simcall_process_get_properties(smx_process_t host); XBT_PUBLIC(void) simcall_process_set_kill_time(smx_process_t process, double kill_time); XBT_PUBLIC(void) simcall_process_on_exit(smx_process_t process, int_f_pvoid_t fun, void *data); - +XBT_PUBLIC(void) simcall_process_auto_restart_set(smx_process_t process, int auto_restart); /* Sleep control */ XBT_PUBLIC(e_smx_state_t) simcall_process_sleep(double duration); diff --git a/src/gras/Virtu/sg_process.c b/src/gras/Virtu/sg_process.c index b5244179d0..4dfee8349b 100644 --- a/src/gras/Virtu/sg_process.c +++ b/src/gras/Virtu/sg_process.c @@ -29,7 +29,7 @@ void gras_agent_spawn(const char *name, smx_process_t process; simcall_process_create(&process, name, code, NULL, - gras_os_myname(), -1.0, argc, argv, properties); + gras_os_myname(), -1.0, argc, argv, properties, 0); } /* ************************************************************************** diff --git a/src/msg/msg_private.h b/src/msg/msg_private.h index 7cf96e6e22..b9aaf07325 100644 --- a/src/msg/msg_private.h +++ b/src/msg/msg_private.h @@ -147,7 +147,7 @@ void MSG_process_cleanup_from_SIMIX(smx_process_t smx_proc); void MSG_process_create_from_SIMIX(smx_process_t *process, const char *name, xbt_main_func_t code, void *data, const char *hostname, double kill_time, int argc, - char **argv, xbt_dict_t properties); + char **argv, xbt_dict_t properties, int auto_restart); void MSG_comm_copy_data_from_SIMIX(smx_action_t comm, void* buff, size_t buff_size); void _MSG_action_init(void); diff --git a/src/msg/msg_process.c b/src/msg/msg_process.c index 5d3c7d42e1..8da8fac972 100644 --- a/src/msg/msg_process.c +++ b/src/msg/msg_process.c @@ -65,13 +65,16 @@ void MSG_process_cleanup_from_SIMIX(smx_process_t smx_proc) void MSG_process_create_from_SIMIX(smx_process_t* process, const char *name, xbt_main_func_t code, void *data, const char *hostname, double kill_time, int argc, char **argv, - xbt_dict_t properties) + xbt_dict_t properties, int auto_restart) { m_host_t host = MSG_get_host_by_name(hostname); m_process_t p = MSG_process_create_with_environment(name, code, data, host, argc, argv, properties); - MSG_process_set_kill_time(p,kill_time); + if (p) { + MSG_process_set_kill_time(p,kill_time); + MSG_process_auto_restart_set(p,auto_restart); + } *((m_process_t*) process) = p; } @@ -181,7 +184,7 @@ m_process_t MSG_process_create_with_environment(const char *name, /* Let's create the process: SIMIX may decide to start it right now, * even before returning the flow control to us */ simcall_process_create(&process, name, code, simdata, SIMIX_host_get_name(host->smx_host), -1, - argc, argv, properties); + argc, argv, properties,0); if (!process) { /* Undo everything we have just changed */ @@ -488,3 +491,12 @@ smx_context_t MSG_process_get_smx_ctx(m_process_t process) { void MSG_process_on_exit(int_f_pvoid_t fun, void *data) { simcall_process_on_exit(MSG_process_self(),fun,data); } +/** + * \ingroup m_process_management + * \brief Sets the "auto-restart" flag of the process. + * If the flag is set to 1, the process will be automatically restarted when + * its host comes back up. + */ +XBT_PUBLIC(void) MSG_process_auto_restart_set(m_process_t process, int auto_restart) { + simcall_process_auto_restart_set(process,auto_restart); +} diff --git a/src/simix/smx_context_base.c b/src/simix/smx_context_base.c index 944dd7a1b5..56efcf1403 100644 --- a/src/simix/smx_context_base.c +++ b/src/simix/smx_context_base.c @@ -83,7 +83,6 @@ void smx_ctx_base_free(smx_context_t context) void smx_ctx_base_stop(smx_context_t context) { - SIMIX_process_on_exit_runall(context->data); if (context->cleanup_func) context->cleanup_func(context->data); context->iwannadie = 0; diff --git a/src/simix/smx_deployment.c b/src/simix/smx_deployment.c index c7e943cf2b..a25155acca 100644 --- a/src/simix/smx_deployment.c +++ b/src/simix/smx_deployment.c @@ -19,6 +19,8 @@ static char *parse_host = NULL; static double start_time = 0.0; static double kill_time = -1.0; +static int auto_restart = 0; + extern int surf_parse_lineno; static void parse_process_init(void) @@ -35,8 +37,8 @@ static void parse_process_init(void) parse_argc = 1; start_time = surf_parse_get_double(A_surfxml_process_start_time); kill_time = surf_parse_get_double(A_surfxml_process_kill_time); + auto_restart = A_surfxml_process_on_failure == A_surfxml_process_on_failure_DIE ? 0 : 1; } - static void parse_argument(void) { parse_argv = xbt_realloc(parse_argv, (parse_argc + 2) * sizeof(char *)); @@ -75,10 +77,11 @@ static void parse_process_finalize(void) kill_time, parse_argc, parse_argv, - current_property_set); + current_property_set, + auto_restart); else simcall_process_create(&process, parse_argv[0], parse_code, NULL, parse_host, kill_time, parse_argc, parse_argv, - current_property_set); + current_property_set,auto_restart); /* verify if process has been created (won't be the case if the host is currently dead, but that's fine) */ if (!process) { diff --git a/src/simix/smx_host.c b/src/simix/smx_host.c index 6e6a93f64c..c54c9fbb1e 100644 --- a/src/simix/smx_host.c +++ b/src/simix/smx_host.c @@ -67,7 +67,7 @@ void SIMIX_host_destroy(void *h) SIMIX_display_process_status(); THROWF(arg_error, 0, "%s", msg); } - + xbt_dynar_free(&host->auto_restart_processes); xbt_swag_free(host->process_list); /* Clean host structure */ @@ -176,6 +176,50 @@ void* SIMIX_host_get_data(smx_host_t host) return host->data; } +void _SIMIX_host_free_process_arg(void *); +void _SIMIX_host_free_process_arg(void *data) { + smx_process_arg_t arg = *(void**)data; + int i; + xbt_free(arg->name); + for (i = 0; i < arg->argc; i++) { + xbt_free(arg->argv[i]); + } + xbt_free(arg->argv); + xbt_free(arg); +} +void SIMIX_host_add_auto_restart_process(smx_host_t host, + const char *name, + xbt_main_func_t code, + void *data, + const char *hostname, + double kill_time, + int argc, char **argv, + xbt_dict_t properties, + int auto_restart) { + if (!host->auto_restart_processes) { + host->auto_restart_processes = xbt_dynar_new(sizeof(smx_process_arg_t),_SIMIX_host_free_process_arg); + } + smx_process_arg_t arg = xbt_new(s_smx_process_arg_t,1); + + arg->name = xbt_strdup(name); + arg->code = code; + arg->data = data; + arg->hostname = hostname; + arg->kill_time = kill_time; + arg->argc = argc; + arg->argv = xbt_new(char*,argc + 1); + + int i; + for (i = 0; i < argc; i++) { + arg->argv[i] = xbt_strdup(argv[i]); + } + + arg->properties = properties; + arg->auto_restart = auto_restart; + + xbt_dynar_push_as(host->auto_restart_processes,smx_process_arg_t,arg); +} + void SIMIX_host_set_data(smx_host_t host, void *data) { diff --git a/src/simix/smx_host_private.h b/src/simix/smx_host_private.h index 66d587e5a5..ad195d8562 100644 --- a/src/simix/smx_host_private.h +++ b/src/simix/smx_host_private.h @@ -15,6 +15,7 @@ typedef struct s_smx_host { char *name; /**< @brief host name if any */ void *host; /* SURF modeling */ xbt_swag_t process_list; + xbt_dynar_t auto_restart_processes; void *data; /**< @brief user data */ } s_smx_host_t; @@ -22,6 +23,17 @@ smx_host_t SIMIX_host_create(const char *name, void *workstation, void *data); void SIMIX_host_destroy(void *host); void SIMIX_host_set_data(smx_host_t host, void *data); void* SIMIX_host_get_data(smx_host_t host); + +void SIMIX_host_add_auto_restart_process(smx_host_t host, + const char *name, + xbt_main_func_t code, + void *data, + const char *hostname, + double kill_time, + int argc, char **argv, + xbt_dict_t properties, + int auto_restart); + xbt_dict_t SIMIX_host_get_properties(smx_host_t host); double SIMIX_host_get_speed(smx_host_t host); double SIMIX_host_get_available_speed(smx_host_t host); diff --git a/src/simix/smx_process.c b/src/simix/smx_process.c index 7c7dbb6457..b3e2a37163 100644 --- a/src/simix/smx_process.c +++ b/src/simix/smx_process.c @@ -144,6 +144,27 @@ void SIMIX_create_maestro_process() simix_global->maestro_process = maestro; return; } +/** + * \brief Stops a process. + * Stops the process, execute all the registered on_exit functions + * and stops its context. + */ +void SIMIX_process_stop(smx_process_t arg) { + /* execute the on_exit functions */ + SIMIX_process_on_exit_runall(arg); + /* Add the process to the list of process to restart, only if + * the host is down + */ + if (arg->auto_restart && !SIMIX_host_get_state(arg->smx_host)) { + SIMIX_host_add_auto_restart_process(arg->smx_host,arg->name,arg->code, arg->data, + arg->smx_host->name, + arg->kill_time, + arg->argc,arg->argv,arg->properties, + arg->auto_restart); + } + /* stop the context */ + SIMIX_context_stop(arg->context); +} /** * \brief Same as SIMIX_process_create() but with only one argument (used by timers). @@ -162,7 +183,8 @@ smx_process_t SIMIX_process_create_from_wrapper(smx_process_arg_t args) { args->kill_time, args->argc, args->argv, - args->properties); + args->properties, + args->auto_restart); xbt_free(args); return process; } @@ -183,7 +205,8 @@ void SIMIX_process_create(smx_process_t *process, const char *hostname, double kill_time, int argc, char **argv, - xbt_dict_t properties) { + xbt_dict_t properties, + int auto_restart) { *process = NULL; smx_host_t host = SIMIX_host_get_by_name(hostname); @@ -206,6 +229,13 @@ void SIMIX_process_create(smx_process_t *process, (*process)->data = data; (*process)->comms = xbt_fifo_new(); (*process)->simcall.issuer = *process; + /* Process data for auto-restart */ + (*process)->auto_restart = auto_restart; + (*process)->code = code; + (*process)->argc = argc; + (*process)->argv = argv; + (*process)->kill_time = kill_time; + XBT_VERB("Create context %s", (*process)->name); (*process)->context = SIMIX_context_new(code, argc, argv, @@ -288,18 +318,18 @@ void SIMIX_process_kill(smx_process_t process) { SIMIX_comm_destroy(process->waiting_action); break; - case SIMIX_ACTION_SLEEP: - SIMIX_process_sleep_destroy(process->waiting_action); - break; + case SIMIX_ACTION_SLEEP: + SIMIX_process_sleep_destroy(process->waiting_action); + break; - case SIMIX_ACTION_SYNCHRO: - SIMIX_synchro_stop_waiting(process, &process->simcall); - SIMIX_synchro_destroy(process->waiting_action); - break; + case SIMIX_ACTION_SYNCHRO: + SIMIX_synchro_stop_waiting(process, &process->simcall); + SIMIX_synchro_destroy(process->waiting_action); + break; - case SIMIX_ACTION_IO: - SIMIX_io_destroy(process->waiting_action); - break; + case SIMIX_ACTION_IO: + SIMIX_io_destroy(process->waiting_action); + break; } } if(!xbt_dynar_member(simix_global->process_to_run, &(process))) @@ -652,7 +682,7 @@ void SIMIX_process_yield(smx_process_t self) if (self->context->iwannadie){ XBT_DEBUG("I wanna die!"); - SIMIX_context_stop(self->context); + SIMIX_process_stop(self); } if(self->suspended) { @@ -739,3 +769,11 @@ void SIMIX_process_on_exit(smx_process_t process, int_f_pvoid_t fun, void *data) xbt_dynar_push_as(process->on_exit,s_smx_process_exit_fun_t,exit_fun); } +/** + * \brief Sets the auto-restart status of the process. + * If set to 1, the process will be automatically restarted when its host + * comes back. + */ +void SIMIX_process_auto_restart_set(smx_process_t process, int auto_restart) { + process->auto_restart = auto_restart; +} diff --git a/src/simix/smx_process_private.h b/src/simix/smx_process_private.h index c6ac97a90f..a87613c8e8 100644 --- a/src/simix/smx_process_private.h +++ b/src/simix/smx_process_private.h @@ -15,6 +15,19 @@ typedef struct s_smx_process_exit_fun { void *arg; } s_smx_process_exit_fun_t, *smx_process_exit_fun_t; +typedef struct s_smx_process_arg { + char *name; + xbt_main_func_t code; + void *data; + const char *hostname; + int argc; + char **argv; + double kill_time; + xbt_dict_t properties; + unsigned auto_restart:1; +} s_smx_process_arg_t, *smx_process_arg_t; + + /** @brief Process datatype */ typedef struct s_smx_process { s_xbt_swag_hookup_t process_hookup; @@ -30,6 +43,8 @@ typedef struct s_smx_process { unsigned doexception:1; unsigned blocked:1; unsigned suspended:1; + unsigned auto_restart:1; + smx_host_t new_host; /* if not null, the host on which the process must migrate to */ smx_action_t waiting_action; /* the current blocking action if any */ xbt_fifo_t comms; /* the current non-blocking communication actions */ @@ -37,18 +52,14 @@ typedef struct s_smx_process { s_smx_simcall_t simcall; void *data; /* kept for compatibility, it should be replaced with moddata */ xbt_dynar_t on_exit; /* list of functions executed when the process dies */ -} s_smx_process_t; -typedef struct s_smx_process_arg { - const char *name; xbt_main_func_t code; - void *data; - char *hostname; int argc; char **argv; double kill_time; - xbt_dict_t properties; -} s_smx_process_arg_t, *smx_process_arg_t; + +} s_smx_process_t; + void SIMIX_process_create(smx_process_t *process, const char *name, @@ -57,12 +68,14 @@ void SIMIX_process_create(smx_process_t *process, const char *hostname, double kill_time, int argc, char **argv, - xbt_dict_t properties); + xbt_dict_t properties, + int auto_restart); void SIMIX_process_runall(void); void SIMIX_process_kill(smx_process_t process); void SIMIX_process_killall(smx_process_t issuer); smx_process_t SIMIX_process_create_from_wrapper(smx_process_arg_t args); void SIMIX_create_maestro_process(void); +void SIMIX_process_stop(smx_process_t arg); void SIMIX_process_cleanup(smx_process_t arg); void SIMIX_process_empty_trash(void); void SIMIX_process_yield(smx_process_t self); @@ -90,5 +103,7 @@ void SIMIX_post_process_sleep(smx_action_t action); void SIMIX_process_sleep_suspend(smx_action_t action); void SIMIX_process_sleep_resume(smx_action_t action); void SIMIX_process_sleep_destroy(smx_action_t action); +void SIMIX_process_auto_restart_set(smx_process_t process, int auto_restart); + #endif diff --git a/src/simix/smx_smurf.c b/src/simix/smx_smurf.c index 0e7ac6ce92..8ade58f3ef 100644 --- a/src/simix/smx_smurf.c +++ b/src/simix/smx_smurf.c @@ -312,7 +312,8 @@ void SIMIX_simcall_pre(smx_simcall_t simcall, int value) simcall->process_create.kill_time, simcall->process_create.argc, simcall->process_create.argv, - simcall->process_create.properties); + simcall->process_create.properties, + simcall->process_create.auto_restart); SIMIX_simcall_answer(simcall); break; @@ -363,6 +364,10 @@ void SIMIX_simcall_pre(smx_simcall_t simcall, int value) simcall->process_on_exit.data); SIMIX_simcall_answer(simcall); break; + case SIMCALL_PROCESS_AUTO_RESTART_SET: + SIMIX_process_auto_restart_set(simcall->process_auto_restart.process,simcall->process_auto_restart.auto_restart); + SIMIX_simcall_answer(simcall); + break; case SIMCALL_PROCESS_SET_DATA: SIMIX_process_set_data( simcall->process_set_data.process, diff --git a/src/simix/smx_smurf_private.h b/src/simix/smx_smurf_private.h index 6bc53914be..3fe6281247 100644 --- a/src/simix/smx_smurf_private.h +++ b/src/simix/smx_smurf_private.h @@ -46,6 +46,7 @@ SIMCALL_ENUM_ELEMENT(SIMCALL_PROCESS_IS_SUSPENDED),\ SIMCALL_ENUM_ELEMENT(SIMCALL_PROCESS_GET_PROPERTIES),\ SIMCALL_ENUM_ELEMENT(SIMCALL_PROCESS_SLEEP),\ SIMCALL_ENUM_ELEMENT(SIMCALL_PROCESS_ON_EXIT),\ +SIMCALL_ENUM_ELEMENT(SIMCALL_PROCESS_AUTO_RESTART_SET),\ SIMCALL_ENUM_ELEMENT(SIMCALL_RDV_CREATE),\ SIMCALL_ENUM_ELEMENT(SIMCALL_RDV_DESTROY),\ SIMCALL_ENUM_ELEMENT(SIMCALL_RDV_GEY_BY_NAME),\ @@ -228,6 +229,7 @@ typedef struct s_smx_simcall { int argc; char **argv; xbt_dict_t properties; + int auto_restart; } process_create; struct { @@ -296,6 +298,11 @@ typedef struct s_smx_simcall { void *data; } process_on_exit; + struct { + smx_process_t process; + int auto_restart; + } process_auto_restart; + struct { const char *name; smx_rdv_t result; diff --git a/src/simix/smx_user.c b/src/simix/smx_user.c index 803a6e6580..3ea1f92f17 100644 --- a/src/simix/smx_user.c +++ b/src/simix/smx_user.c @@ -354,7 +354,8 @@ void simcall_process_create(smx_process_t *process, const char *name, const char *hostname, double kill_time, int argc, char **argv, - xbt_dict_t properties) + xbt_dict_t properties, + int auto_restart) { smx_simcall_t simcall = SIMIX_simcall_mine(); @@ -368,6 +369,7 @@ void simcall_process_create(smx_process_t *process, const char *name, simcall->process_create.argc = argc; simcall->process_create.argv = argv; simcall->process_create.properties = properties; + simcall->process_create.auto_restart = auto_restart; SIMIX_simcall_push(simcall->issuer); } @@ -523,7 +525,6 @@ void simcall_process_set_data(smx_process_t process, void *data) /** * \brief Set the kill time of a process. - * * \param process a process * \param kill_time a double */ @@ -609,7 +610,7 @@ xbt_dict_t simcall_process_get_properties(smx_process_t process) SIMIX_simcall_push(simcall->issuer); return simcall->process_get_properties.result; } -/** \ingroup m_process_management +/** * \brief Add an on_exit function * Add an on_exit function which will be executed when the process exits/is killed. */ @@ -623,6 +624,20 @@ XBT_PUBLIC(void) simcall_process_on_exit(smx_process_t process, int_f_pvoid_t fu SIMIX_simcall_push(simcall->issuer); } +/** + * \brief Sets the process to be auto-restarted or not by SIMIX when its host comes back up. + * Will restart the process when the host comes back up if auto_restart is set to 1. + */ + +XBT_PUBLIC(void) simcall_process_auto_restart_set(smx_process_t process, int auto_restart) { + smx_simcall_t simcall = SIMIX_simcall_mine(); + + simcall->call = SIMCALL_PROCESS_AUTO_RESTART_SET; + simcall->process_auto_restart.process = process; + simcall->process_auto_restart.auto_restart = auto_restart; + + SIMIX_simcall_push(simcall->issuer); +} /** \brief Creates a new sleep SIMIX action. * * This function creates a SURF action and allocates the data necessary diff --git a/src/xbt/xbt_sg_synchro.c b/src/xbt/xbt_sg_synchro.c index e9bbaeba23..7e3bcba31d 100644 --- a/src/xbt/xbt_sg_synchro.c +++ b/src/xbt/xbt_sg_synchro.c @@ -66,7 +66,7 @@ xbt_thread_t xbt_thread_create(const char *name, void_f_pvoid_t code, simcall_process_create(&res->s_process, name, xbt_thread_create_wrapper, res, SIMIX_host_self_get_name(), -1.0, 0, NULL, - /*props */ NULL); + /*props */ NULL,0); res->joinable = joinable; res->done = 0; res->cond = xbt_cond_init(); -- 2.20.1