Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
First bricks for auto_restart support in SIMIX/MSG.
authorSamuel Lepetit <samuel.lepetit@inria.fr>
Wed, 27 Jun 2012 14:29:56 +0000 (16:29 +0200)
committerSamuel Lepetit <samuel.lepetit@inria.fr>
Wed, 27 Jun 2012 14:31:16 +0000 (16:31 +0200)
16 files changed:
examples/msg/masterslave/deployment_masterslave.xml
include/msg/msg.h
include/simgrid/simix.h
src/gras/Virtu/sg_process.c
src/msg/msg_private.h
src/msg/msg_process.c
src/simix/smx_context_base.c
src/simix/smx_deployment.c
src/simix/smx_host.c
src/simix/smx_host_private.h
src/simix/smx_process.c
src/simix/smx_process_private.h
src/simix/smx_smurf.c
src/simix/smx_smurf_private.h
src/simix/smx_user.c
src/xbt/xbt_sg_synchro.c

index 992d8f0..18b7e5d 100644 (file)
@@ -13,9 +13,9 @@
      <argument value="Tremblay"/> <!-- Me! I can work too! -->
   </process>
   <!-- The slave process (with no argument) -->
-  <process host="Tremblay" function="slave"/>
-  <process host="Jupiter" function="slave"/>
-  <process host="Fafard" function="slave"/>
-  <process host="Ginette" function="slave"/>
-  <process host="Bourassa" function="slave"/>
+  <process host="Tremblay" function="slave" on_failure="RESTART"/>
+  <process host="Jupiter" function="slave" on_failure="RESTART"/>
+  <process host="Fafard" function="slave" on_failure="RESTART"/>
+  <process host="Ginette" function="slave" on_failure="RESTART"/>
+  <process host="Bourassa" function="slave" on_failure="RESTART"/>
 </platform>
index 8194d19..4d25696 100644 (file)
@@ -156,6 +156,7 @@ XBT_PUBLIC(MSG_error_t) MSG_process_suspend(m_process_t process);
 XBT_PUBLIC(MSG_error_t) MSG_process_resume(m_process_t process);
 XBT_PUBLIC(int) MSG_process_is_suspended(m_process_t process);
 XBT_PUBLIC(void) MSG_process_on_exit(int_f_pvoid_t fun, void *data);
+XBT_PUBLIC(void) MSG_process_auto_restart_set(m_process_t process, int auto_restart);
 
 /************************** Task handling ************************************/
 XBT_PUBLIC(m_task_t) MSG_task_create(const char *name,
index bdfa009..d61c833 100644 (file)
@@ -96,7 +96,8 @@ typedef void (*smx_creation_func_t) ( /* process */ smx_process_t*,
                                       /* kill_time */ double,
                                       /* argc */ int,
                                       /* argv */ char**,
-                                      /* props */ xbt_dict_t);
+                                      /* props */ xbt_dict_t,
+                                      /* auto_restart */ int);
 
 
 /******************************* Networking ***********************************/
@@ -314,7 +315,8 @@ XBT_PUBLIC(void) simcall_process_create(smx_process_t *process,
                                           const char *hostname,
                                           double kill_time,
                                           int argc, char **argv,
-                                          xbt_dict_t properties);
+                                          xbt_dict_t properties,
+                                          int auto_restart);
 
 XBT_PUBLIC(void) simcall_process_kill(smx_process_t process);
 XBT_PUBLIC(void) simcall_process_killall(void);
@@ -336,7 +338,7 @@ XBT_PUBLIC(int) simcall_process_is_suspended(smx_process_t process);
 XBT_PUBLIC(xbt_dict_t) simcall_process_get_properties(smx_process_t host);
 XBT_PUBLIC(void) simcall_process_set_kill_time(smx_process_t process, double kill_time);
 XBT_PUBLIC(void) simcall_process_on_exit(smx_process_t process, int_f_pvoid_t fun, void *data);
-
+XBT_PUBLIC(void) simcall_process_auto_restart_set(smx_process_t process, int auto_restart);
 /* Sleep control */
 XBT_PUBLIC(e_smx_state_t) simcall_process_sleep(double duration);
 
index b524417..4dfee83 100644 (file)
@@ -29,7 +29,7 @@ void gras_agent_spawn(const char *name,
 
   smx_process_t process;
   simcall_process_create(&process, name, code, NULL,
-                           gras_os_myname(), -1.0, argc, argv, properties);
+                           gras_os_myname(), -1.0, argc, argv, properties, 0);
 }
 
 /* **************************************************************************
index 7cf96e6..b9aaf07 100644 (file)
@@ -147,7 +147,7 @@ void MSG_process_cleanup_from_SIMIX(smx_process_t smx_proc);
 void MSG_process_create_from_SIMIX(smx_process_t *process, const char *name,
                                    xbt_main_func_t code, void *data,
                                    const char *hostname, double kill_time,  int argc,
-                                   char **argv, xbt_dict_t properties);
+                                   char **argv, xbt_dict_t properties, int auto_restart);
 void MSG_comm_copy_data_from_SIMIX(smx_action_t comm, void* buff, size_t buff_size);
 
 void _MSG_action_init(void);
index 5d3c7d4..8da8fac 100644 (file)
@@ -65,13 +65,16 @@ void MSG_process_cleanup_from_SIMIX(smx_process_t smx_proc)
 void MSG_process_create_from_SIMIX(smx_process_t* process, const char *name,
                                     xbt_main_func_t code, void *data,
                                     const char *hostname, double kill_time, int argc, char **argv,
-                                    xbt_dict_t properties)
+                                    xbt_dict_t properties, int auto_restart)
 {
   m_host_t host = MSG_get_host_by_name(hostname);
   m_process_t p = MSG_process_create_with_environment(name, code, data,
                                                       host, argc, argv,
                                                       properties);
-  MSG_process_set_kill_time(p,kill_time);
+  if (p) {
+    MSG_process_set_kill_time(p,kill_time);
+    MSG_process_auto_restart_set(p,auto_restart);
+  }
   *((m_process_t*) process) = p;
 }
 
@@ -181,7 +184,7 @@ m_process_t MSG_process_create_with_environment(const char *name,
   /* Let's create the process: SIMIX may decide to start it right now,
    * even before returning the flow control to us */
   simcall_process_create(&process, name, code, simdata, SIMIX_host_get_name(host->smx_host), -1,
-                           argc, argv, properties);
+                           argc, argv, properties,0);
 
   if (!process) {
     /* Undo everything we have just changed */
@@ -488,3 +491,12 @@ smx_context_t MSG_process_get_smx_ctx(m_process_t process) {
 void MSG_process_on_exit(int_f_pvoid_t fun, void *data) {
   simcall_process_on_exit(MSG_process_self(),fun,data);
 }
+/**
+ * \ingroup m_process_management
+ * \brief Sets the "auto-restart" flag of the process.
+ * If the flag is set to 1, the process will be automatically restarted when
+ * its host comes back up.
+ */
+XBT_PUBLIC(void) MSG_process_auto_restart_set(m_process_t process, int auto_restart) {
+  simcall_process_auto_restart_set(process,auto_restart);
+}
index 944dd7a..56efcf1 100644 (file)
@@ -83,7 +83,6 @@ void smx_ctx_base_free(smx_context_t context)
 
 void smx_ctx_base_stop(smx_context_t context)
 {
-  SIMIX_process_on_exit_runall(context->data);
   if (context->cleanup_func)
     context->cleanup_func(context->data);
   context->iwannadie = 0;
index c7e943c..a25155a 100644 (file)
@@ -19,6 +19,8 @@ static char *parse_host = NULL;
 static double start_time = 0.0;
 static double kill_time = -1.0;
 
+static int auto_restart = 0;
+
 extern int surf_parse_lineno;
 
 static void parse_process_init(void)
@@ -35,8 +37,8 @@ static void parse_process_init(void)
   parse_argc = 1;
   start_time = surf_parse_get_double(A_surfxml_process_start_time);
   kill_time  = surf_parse_get_double(A_surfxml_process_kill_time);
+  auto_restart = A_surfxml_process_on_failure == A_surfxml_process_on_failure_DIE ? 0 : 1;
 }
-
 static void parse_argument(void)
 {
   parse_argv = xbt_realloc(parse_argv, (parse_argc + 2) * sizeof(char *));
@@ -75,10 +77,11 @@ static void parse_process_finalize(void)
                                             kill_time,
                                             parse_argc,
                                             parse_argv,
-                                            current_property_set);
+                                            current_property_set,
+                                            auto_restart);
     else
       simcall_process_create(&process, parse_argv[0], parse_code, NULL, parse_host, kill_time, parse_argc, parse_argv,
-                               current_property_set);
+                               current_property_set,auto_restart);
 
     /* verify if process has been created (won't be the case if the host is currently dead, but that's fine) */
     if (!process) {
index 6e6a93f..c54c9fb 100644 (file)
@@ -67,7 +67,7 @@ void SIMIX_host_destroy(void *h)
     SIMIX_display_process_status();
     THROWF(arg_error, 0, "%s", msg);
   }
-
+  xbt_dynar_free(&host->auto_restart_processes);
   xbt_swag_free(host->process_list);
 
   /* Clean host structure */
@@ -176,6 +176,50 @@ void* SIMIX_host_get_data(smx_host_t host)
 
   return host->data;
 }
+void _SIMIX_host_free_process_arg(void *);
+void _SIMIX_host_free_process_arg(void *data) {
+  smx_process_arg_t arg = *(void**)data;
+  int i;
+  xbt_free(arg->name);
+  for (i = 0; i < arg->argc; i++) {
+    xbt_free(arg->argv[i]);
+  }
+  xbt_free(arg->argv);
+  xbt_free(arg);
+}
+void SIMIX_host_add_auto_restart_process(smx_host_t host,
+                                         const char *name,
+                                         xbt_main_func_t code,
+                                         void *data,
+                                         const char *hostname,
+                                         double kill_time,
+                                         int argc, char **argv,
+                                         xbt_dict_t properties,
+                                         int auto_restart) {
+  if (!host->auto_restart_processes) {
+    host->auto_restart_processes = xbt_dynar_new(sizeof(smx_process_arg_t),_SIMIX_host_free_process_arg);
+  }
+  smx_process_arg_t arg = xbt_new(s_smx_process_arg_t,1);
+
+  arg->name = xbt_strdup(name);
+  arg->code = code;
+  arg->data = data;
+  arg->hostname = hostname;
+  arg->kill_time = kill_time;
+  arg->argc = argc;
+  arg->argv = xbt_new(char*,argc + 1);
+
+  int i;
+  for (i = 0; i < argc; i++) {
+    arg->argv[i] = xbt_strdup(argv[i]);
+  }
+
+  arg->properties = properties;
+  arg->auto_restart = auto_restart;
+
+  xbt_dynar_push_as(host->auto_restart_processes,smx_process_arg_t,arg);
+}
+
 
 void SIMIX_host_set_data(smx_host_t host, void *data)
 {
index 66d587e..ad195d8 100644 (file)
@@ -15,6 +15,7 @@ typedef struct s_smx_host {
   char *name;              /**< @brief host name if any */
   void *host;                   /* SURF modeling */
   xbt_swag_t process_list;
+  xbt_dynar_t auto_restart_processes;
   void *data;              /**< @brief user data */
 } s_smx_host_t;
 
@@ -22,6 +23,17 @@ smx_host_t SIMIX_host_create(const char *name, void *workstation, void *data);
 void SIMIX_host_destroy(void *host);
 void SIMIX_host_set_data(smx_host_t host, void *data);
 void* SIMIX_host_get_data(smx_host_t host);
+
+void SIMIX_host_add_auto_restart_process(smx_host_t host,
+                                         const char *name,
+                                         xbt_main_func_t code,
+                                         void *data,
+                                         const char *hostname,
+                                         double kill_time,
+                                         int argc, char **argv,
+                                         xbt_dict_t properties,
+                                         int auto_restart);
+
 xbt_dict_t SIMIX_host_get_properties(smx_host_t host);
 double SIMIX_host_get_speed(smx_host_t host);
 double SIMIX_host_get_available_speed(smx_host_t host);
index 7c7dbb6..b3e2a37 100644 (file)
@@ -144,6 +144,27 @@ void SIMIX_create_maestro_process()
   simix_global->maestro_process = maestro;
   return;
 }
+/**
+ * \brief Stops a process.
+ * Stops the process, execute all the registered on_exit functions
+ * and stops its context.
+ */
+void SIMIX_process_stop(smx_process_t arg) {
+  /* execute the on_exit functions */
+  SIMIX_process_on_exit_runall(arg);
+  /* Add the process to the list of process to restart, only if
+   * the host is down
+   */
+  if (arg->auto_restart && !SIMIX_host_get_state(arg->smx_host)) {
+    SIMIX_host_add_auto_restart_process(arg->smx_host,arg->name,arg->code, arg->data,
+                                        arg->smx_host->name,
+                                        arg->kill_time,
+                                        arg->argc,arg->argv,arg->properties,
+                                        arg->auto_restart);
+  }
+  /* stop the context */
+  SIMIX_context_stop(arg->context);
+}
 
 /**
  * \brief Same as SIMIX_process_create() but with only one argument (used by timers).
@@ -162,7 +183,8 @@ smx_process_t SIMIX_process_create_from_wrapper(smx_process_arg_t args) {
       args->kill_time,
       args->argc,
       args->argv,
-      args->properties);
+      args->properties,
+      args->auto_restart);
   xbt_free(args);
   return process;
 }
@@ -183,7 +205,8 @@ void SIMIX_process_create(smx_process_t *process,
                           const char *hostname,
                           double kill_time,
                           int argc, char **argv,
-                          xbt_dict_t properties) {
+                          xbt_dict_t properties,
+                          int auto_restart) {
 
   *process = NULL;
   smx_host_t host = SIMIX_host_get_by_name(hostname);
@@ -206,6 +229,13 @@ void SIMIX_process_create(smx_process_t *process,
     (*process)->data = data;
     (*process)->comms = xbt_fifo_new();
     (*process)->simcall.issuer = *process;
+    /* Process data for auto-restart */
+    (*process)->auto_restart = auto_restart;
+    (*process)->code = code;
+    (*process)->argc = argc;
+    (*process)->argv = argv;
+    (*process)->kill_time = kill_time;
+
 
     XBT_VERB("Create context %s", (*process)->name);
     (*process)->context = SIMIX_context_new(code, argc, argv,
@@ -288,18 +318,18 @@ void SIMIX_process_kill(smx_process_t process) {
         SIMIX_comm_destroy(process->waiting_action);
         break;
 
-      case SIMIX_ACTION_SLEEP:
-  SIMIX_process_sleep_destroy(process->waiting_action);
-  break;
+        case SIMIX_ACTION_SLEEP:
+          SIMIX_process_sleep_destroy(process->waiting_action);
+          break;
 
-      case SIMIX_ACTION_SYNCHRO:
-  SIMIX_synchro_stop_waiting(process, &process->simcall);
-  SIMIX_synchro_destroy(process->waiting_action);
-  break;
+        case SIMIX_ACTION_SYNCHRO:
+          SIMIX_synchro_stop_waiting(process, &process->simcall);
+          SIMIX_synchro_destroy(process->waiting_action);
+          break;
 
-      case SIMIX_ACTION_IO:
-        SIMIX_io_destroy(process->waiting_action);
-        break;
+        case SIMIX_ACTION_IO:
+          SIMIX_io_destroy(process->waiting_action);
+          break;
     }
   }
   if(!xbt_dynar_member(simix_global->process_to_run, &(process)))
@@ -652,7 +682,7 @@ void SIMIX_process_yield(smx_process_t self)
 
   if (self->context->iwannadie){
     XBT_DEBUG("I wanna die!");
-    SIMIX_context_stop(self->context);
+    SIMIX_process_stop(self);
   }
 
   if(self->suspended) {
@@ -739,3 +769,11 @@ void SIMIX_process_on_exit(smx_process_t process, int_f_pvoid_t fun, void *data)
 
   xbt_dynar_push_as(process->on_exit,s_smx_process_exit_fun_t,exit_fun);
 }
+/**
+ * \brief Sets the auto-restart status of the process.
+ * If set to 1, the process will be automatically restarted when its host
+ * comes back.
+ */
+void SIMIX_process_auto_restart_set(smx_process_t process, int auto_restart) {
+  process->auto_restart = auto_restart;
+}
index c6ac97a..a87613c 100644 (file)
@@ -15,6 +15,19 @@ typedef struct s_smx_process_exit_fun {
   void *arg;
 } s_smx_process_exit_fun_t, *smx_process_exit_fun_t;
 
+typedef struct s_smx_process_arg {
+  char *name;
+  xbt_main_func_t code;
+  void *data;
+  const char *hostname;
+  int argc;
+  char **argv;
+  double kill_time;
+  xbt_dict_t properties;
+  unsigned auto_restart:1;
+} s_smx_process_arg_t, *smx_process_arg_t;
+
+
 /** @brief Process datatype */
 typedef struct s_smx_process {
   s_xbt_swag_hookup_t process_hookup;
@@ -30,6 +43,8 @@ typedef struct s_smx_process {
   unsigned doexception:1;
   unsigned blocked:1;
   unsigned suspended:1;
+  unsigned auto_restart:1;
+
   smx_host_t new_host;          /* if not null, the host on which the process must migrate to */
   smx_action_t waiting_action;  /* the current blocking action if any */
   xbt_fifo_t comms;       /* the current non-blocking communication actions */
@@ -37,18 +52,14 @@ typedef struct s_smx_process {
   s_smx_simcall_t simcall;
   void *data;                   /* kept for compatibility, it should be replaced with moddata */
   xbt_dynar_t on_exit;     /* list of functions executed when the process dies */
-} s_smx_process_t;
 
-typedef struct s_smx_process_arg {
-  const char *name;
   xbt_main_func_t code;
-  void *data;
-  char *hostname;
   int argc;
   char **argv;
   double kill_time;
-  xbt_dict_t properties;
-} s_smx_process_arg_t, *smx_process_arg_t;
+
+} s_smx_process_t;
+
 
 void SIMIX_process_create(smx_process_t *process,
                           const char *name,
@@ -57,12 +68,14 @@ void SIMIX_process_create(smx_process_t *process,
                           const char *hostname,
                           double kill_time,
                           int argc, char **argv,
-                          xbt_dict_t properties);
+                          xbt_dict_t properties,
+                          int auto_restart);
 void SIMIX_process_runall(void);
 void SIMIX_process_kill(smx_process_t process);
 void SIMIX_process_killall(smx_process_t issuer);
 smx_process_t SIMIX_process_create_from_wrapper(smx_process_arg_t args);
 void SIMIX_create_maestro_process(void);
+void SIMIX_process_stop(smx_process_t arg);
 void SIMIX_process_cleanup(smx_process_t arg);
 void SIMIX_process_empty_trash(void);
 void SIMIX_process_yield(smx_process_t self);
@@ -90,5 +103,7 @@ void SIMIX_post_process_sleep(smx_action_t action);
 void SIMIX_process_sleep_suspend(smx_action_t action);
 void SIMIX_process_sleep_resume(smx_action_t action);
 void SIMIX_process_sleep_destroy(smx_action_t action);
+void SIMIX_process_auto_restart_set(smx_process_t process, int auto_restart);
+
 
 #endif
index 0e7ac6c..8ade58f 100644 (file)
@@ -312,7 +312,8 @@ void SIMIX_simcall_pre(smx_simcall_t simcall, int value)
     simcall->process_create.kill_time,
     simcall->process_create.argc,
     simcall->process_create.argv,
-    simcall->process_create.properties);
+    simcall->process_create.properties,
+    simcall->process_create.auto_restart);
       SIMIX_simcall_answer(simcall);
       break;
 
@@ -363,6 +364,10 @@ void SIMIX_simcall_pre(smx_simcall_t simcall, int value)
                             simcall->process_on_exit.data);
       SIMIX_simcall_answer(simcall);
     break;
+    case SIMCALL_PROCESS_AUTO_RESTART_SET:
+      SIMIX_process_auto_restart_set(simcall->process_auto_restart.process,simcall->process_auto_restart.auto_restart);
+      SIMIX_simcall_answer(simcall);
+    break;
     case SIMCALL_PROCESS_SET_DATA:
       SIMIX_process_set_data(
     simcall->process_set_data.process,
index 6bc5391..3fe6281 100644 (file)
@@ -46,6 +46,7 @@ SIMCALL_ENUM_ELEMENT(SIMCALL_PROCESS_IS_SUSPENDED),\
 SIMCALL_ENUM_ELEMENT(SIMCALL_PROCESS_GET_PROPERTIES),\
 SIMCALL_ENUM_ELEMENT(SIMCALL_PROCESS_SLEEP),\
 SIMCALL_ENUM_ELEMENT(SIMCALL_PROCESS_ON_EXIT),\
+SIMCALL_ENUM_ELEMENT(SIMCALL_PROCESS_AUTO_RESTART_SET),\
 SIMCALL_ENUM_ELEMENT(SIMCALL_RDV_CREATE),\
 SIMCALL_ENUM_ELEMENT(SIMCALL_RDV_DESTROY),\
 SIMCALL_ENUM_ELEMENT(SIMCALL_RDV_GEY_BY_NAME),\
@@ -228,6 +229,7 @@ typedef struct s_smx_simcall {
       int argc;
       char **argv;
       xbt_dict_t properties;
+      int auto_restart;
     } process_create;
 
     struct {
@@ -296,6 +298,11 @@ typedef struct s_smx_simcall {
       void *data;
     } process_on_exit;
 
+    struct {
+      smx_process_t process;
+      int auto_restart;
+    } process_auto_restart;
+
     struct {
       const char *name;
       smx_rdv_t result;
index 803a6e6..3ea1f92 100644 (file)
@@ -354,7 +354,8 @@ void simcall_process_create(smx_process_t *process, const char *name,
                               const char *hostname,
                               double kill_time,
                               int argc, char **argv,
-                              xbt_dict_t properties)
+                              xbt_dict_t properties,
+                              int auto_restart)
 {
   smx_simcall_t simcall = SIMIX_simcall_mine();
 
@@ -368,6 +369,7 @@ void simcall_process_create(smx_process_t *process, const char *name,
   simcall->process_create.argc = argc;
   simcall->process_create.argv = argv;
   simcall->process_create.properties = properties;
+  simcall->process_create.auto_restart = auto_restart;
   SIMIX_simcall_push(simcall->issuer);
 }
 
@@ -523,7 +525,6 @@ void simcall_process_set_data(smx_process_t process, void *data)
 
 /**
  * \brief Set the kill time of a process.
- *
  * \param process a process
  * \param kill_time a double
  */
@@ -609,7 +610,7 @@ xbt_dict_t simcall_process_get_properties(smx_process_t process)
   SIMIX_simcall_push(simcall->issuer);
   return simcall->process_get_properties.result;
 }
-/** \ingroup m_process_management
+/**
  * \brief Add an on_exit function
  * Add an on_exit function which will be executed when the process exits/is killed.
  */
@@ -623,6 +624,20 @@ XBT_PUBLIC(void) simcall_process_on_exit(smx_process_t process, int_f_pvoid_t fu
 
   SIMIX_simcall_push(simcall->issuer);
 }
+/**
+ * \brief Sets the process to be auto-restarted or not by SIMIX when its host comes back up.
+ * Will restart the process when the host comes back up if auto_restart is set to 1.
+ */
+
+XBT_PUBLIC(void) simcall_process_auto_restart_set(smx_process_t process, int auto_restart) {
+  smx_simcall_t simcall = SIMIX_simcall_mine();
+
+  simcall->call = SIMCALL_PROCESS_AUTO_RESTART_SET;
+  simcall->process_auto_restart.process = process;
+  simcall->process_auto_restart.auto_restart = auto_restart;
+
+  SIMIX_simcall_push(simcall->issuer);
+}
 /** \brief Creates a new sleep SIMIX action.
  *
  * This function creates a SURF action and allocates the data necessary
index e9bbaeb..7e3bcba 100644 (file)
@@ -66,7 +66,7 @@ xbt_thread_t xbt_thread_create(const char *name, void_f_pvoid_t code,
   simcall_process_create(&res->s_process, name,
                            xbt_thread_create_wrapper, res,
                            SIMIX_host_self_get_name(), -1.0, 0, NULL,
-                           /*props */ NULL);
+                           /*props */ NULL,0);
   res->joinable = joinable;
   res->done = 0;
   res->cond = xbt_cond_init();