Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
avoid double free issues when on host failure+process run
authorMartin Quinson <martin.quinson@loria.fr>
Tue, 5 Jun 2012 13:08:12 +0000 (15:08 +0200)
committerMartin Quinson <martin.quinson@loria.fr>
Tue, 5 Jun 2012 13:08:12 +0000 (15:08 +0200)
src/include/surf/surf.h
src/simix/smx_host.c

index 23ab220..887d92e 100644 (file)
@@ -299,7 +299,7 @@ typedef struct surf_model {
 
   double (*action_get_start_time) (surf_action_t action);     /**< Return the start time of an action */
   double (*action_get_finish_time) (surf_action_t action);     /**< Return the finish time of an action */
-  int (*action_unref) (surf_action_t action);     /**< Specify that we don't use that action anymore */
+  int (*action_unref) (surf_action_t action);     /**< Specify that we don't use that action anymore. Returns true if the action were destroyed and false if someone still has references on it. */
   void (*action_cancel) (surf_action_t action);     /**< Cancel a running action */
   void (*action_recycle) (surf_action_t action);     /**< Recycle an action */
   void (*action_data_set) (surf_action_t action, void *data);     /**< Set the user data of an action */
index 9fea490..8a708bf 100644 (file)
@@ -254,16 +254,19 @@ smx_action_t SIMIX_host_parallel_execute( const char *name,
 
 void SIMIX_host_execution_destroy(smx_action_t action)
 {
+  int destroyed=0;
   XBT_DEBUG("Destroy action %p", action);
 
-  xbt_free(action->name);
 
   if (action->execution.surf_exec) {
-    surf_workstation_model->action_unref(action->execution.surf_exec);
+    destroyed = surf_workstation_model->action_unref(action->execution.surf_exec);
     action->execution.surf_exec = NULL;
   }
 
-  xbt_mallocator_release(simix_global->action_mallocator, action);
+  if (destroyed) {
+    xbt_free(action->name);
+    xbt_mallocator_release(simix_global->action_mallocator, action);
+  }
 }
 
 void SIMIX_host_execution_cancel(smx_action_t action)
@@ -345,11 +348,13 @@ void SIMIX_execution_finish(smx_action_t action)
 
       case SIMIX_FAILED:
         XBT_DEBUG("SIMIX_execution_finished: host '%s' failed", simcall->issuer->smx_host->name);
-        if (simcall->issuer->smx_host == action->execution.host)
+        if (simcall->issuer->smx_host == action->execution.host) {
+          // add a reference to the action that will be destroyed when the killed process is cleaned up, and by the end of the current function
+          surf_action_ref(action->execution.surf_exec);
           SIMIX_process_kill(simcall->issuer);
-//          simcall->issuer->context->iwannadie = 1; // Bye bye, little process.
-        else
+        } else {
           SMX_EXCEPTION(simcall->issuer, host_error, 0, "Host failed");
+        }
         break;
 
       case SIMIX_CANCELED: