Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Simix: clean unfinished comms when terminating a process
[simgrid.git] / src / simix / smx_network.c
index f2b2258..3c4440e 100644 (file)
@@ -63,8 +63,7 @@ void SIMIX_rdv_destroy(smx_rdv_t rdv)
 void SIMIX_rdv_free(void *data)
 {
   smx_rdv_t rdv = (smx_rdv_t) data;
-  if (rdv->name)
-    xbt_free(rdv->name);
+  xbt_free(rdv->name);
   xbt_fifo_free(rdv->comm_fifo);
   xbt_free(rdv);  
 }
@@ -258,10 +257,6 @@ void SIMIX_comm_destroy(smx_action_t action)
     action->latency_limited = SIMIX_comm_is_latency_bounded( action ) ;
 #endif
 
-#ifdef HAVE_TRACING
-  TRACE_smx_action_destroy(action);
-#endif
-
   xbt_free(action->name);
   SIMIX_comm_destroy_internal_actions(action);
 
@@ -314,9 +309,10 @@ smx_action_t SIMIX_comm_isend(smx_process_t src_proc, smx_rdv_t rdv,
     action->state = SIMIX_READY;
     action->comm.type = SIMIX_COMM_READY;
   }
+  xbt_fifo_push(src_proc->comms, action);
 
-  /* If the communication action is detached then decrease the refcount
-   * by one, so it will be eliminated by the receivers destroy call */
+  /* if the communication action is detached then decrease the refcount
+   * by one, so it will be eliminated by the receiver's destroy call */
   if (detached) {
     action->comm.detached = 1;
     action->comm.refcount--;
@@ -357,6 +353,7 @@ smx_action_t SIMIX_comm_irecv(smx_process_t dst_proc, smx_rdv_t rdv,
     action->state = SIMIX_READY;
     action->comm.type = SIMIX_COMM_READY;
   }
+  xbt_fifo_push(dst_proc->comms, action);
 
   /* Setup communication request */
   action->comm.dst_proc = dst_proc;
@@ -517,6 +514,7 @@ XBT_INLINE void SIMIX_comm_start(smx_action_t action)
 {
   /* If both the sender and the receiver are already there, start the communication */
   if (action->state == SIMIX_READY) {
+
     smx_host_t sender = action->comm.src_proc->smx_host;
     smx_host_t receiver = action->comm.dst_proc->smx_host;
 
@@ -530,10 +528,6 @@ XBT_INLINE void SIMIX_comm_start(smx_action_t action)
 
     action->state = SIMIX_RUNNING;
 
-#ifdef HAVE_TRACING
-    TRACE_smx_action_communicate(action, action->comm.src_proc);
-#endif
-
     /* If a link is failed, detect it immediately */
     if (surf_workstation_model->action_state_get(action->comm.surf_comm) == SURF_ACTION_FAILED) {
       XBT_DEBUG("Communication from '%s' to '%s' failed to start because of a link failure",
@@ -585,7 +579,7 @@ void SIMIX_comm_finish(smx_action_t action)
 
       case SIMIX_SRC_TIMEOUT:
         TRY {
-          THROW0(timeout_error, 0, "Communication timeouted because of sender");
+          THROWF(timeout_error, 0, "Communication timeouted because of sender");
         }
        CATCH(req->issuer->running_ctx->exception) {
           req->issuer->doexception = 1;
@@ -594,7 +588,7 @@ void SIMIX_comm_finish(smx_action_t action)
 
       case SIMIX_DST_TIMEOUT:
         TRY {
-          THROW0(timeout_error, 0, "Communication timeouted because of receiver");
+          THROWF(timeout_error, 0, "Communication timeouted because of receiver");
         }
        CATCH(req->issuer->running_ctx->exception) {
           req->issuer->doexception = 1;
@@ -604,9 +598,9 @@ void SIMIX_comm_finish(smx_action_t action)
       case SIMIX_SRC_HOST_FAILURE:
         TRY {
           if (req->issuer == action->comm.src_proc)
-            THROW0(host_error, 0, "Host failed");
+            THROWF(host_error, 0, "Host failed");
           else
-            THROW0(network_error, 0, "Remote peer failed");
+            THROWF(network_error, 0, "Remote peer failed");
         }
        CATCH(req->issuer->running_ctx->exception) {
           req->issuer->doexception = 1;
@@ -616,9 +610,9 @@ void SIMIX_comm_finish(smx_action_t action)
       case SIMIX_DST_HOST_FAILURE:
         TRY {
           if (req->issuer == action->comm.dst_proc)
-            THROW0(host_error, 0, "Host failed");
+            THROWF(host_error, 0, "Host failed");
           else
-            THROW0(network_error, 0, "Remote peer failed");
+            THROWF(network_error, 0, "Remote peer failed");
         }
        CATCH(req->issuer->running_ctx->exception) {
           req->issuer->doexception = 1;
@@ -630,7 +624,7 @@ void SIMIX_comm_finish(smx_action_t action)
          XBT_DEBUG("Link failure in action %p between '%s' and '%s': posting an exception to the issuer: %s (%p)",
              action, action->comm.src_proc->smx_host->name, action->comm.dst_proc->smx_host->name,
              req->issuer->name, req->issuer);
-          THROW0(network_error, 0, "Link failure");
+          THROWF(network_error, 0, "Link failure");
         }
        CATCH(req->issuer->running_ctx->exception) {
           req->issuer->doexception = 1;
@@ -681,13 +675,21 @@ void SIMIX_post_comm(smx_action_t action)
   else
     action->state = SIMIX_DONE;
 
-  XBT_DEBUG("SIMIX_post_comm: action state = %d", action->state);
+  XBT_DEBUG("SIMIX_post_comm: comm %p, state %d, src_proc %p, dst_proc %p, detached: %d",
+      action, action->state, action->comm.src_proc, action->comm.dst_proc, action->comm.detached);
+
+  /* remove the action from pending communications of both processes (if they still exist) */
+  if (action->comm.src_proc) {
+    xbt_fifo_remove(action->comm.src_proc->comms, action);
+  }
+  if (action->comm.dst_proc) {
+    xbt_fifo_remove(action->comm.dst_proc->comms, action);
+  }
 
-  /* After this point the surf actions associated with the simix communicate
-     action are no longer needed, thus we delete them. */
+  /* destroy the surf actions associated with the Simix communication */
   SIMIX_comm_destroy_internal_actions(action);
 
-  /* If there are requests associated with the action, then answer them */
+  /* if there are requests associated with the action, then answer them */
   if (xbt_fifo_size(action->request_list))
     SIMIX_comm_finish(action);
 }
@@ -729,6 +731,10 @@ double SIMIX_comm_get_remains(smx_action_t action)
 {
   double remains;
 
+  if(!action){
+      return 0;
+  }
+
   switch (action->state) {
 
     case SIMIX_RUNNING:
@@ -789,6 +795,9 @@ smx_process_t SIMIX_comm_get_dst_proc(smx_action_t action)
  */
 XBT_INLINE int SIMIX_comm_is_latency_bounded(smx_action_t action)
 {
+  if(!action){
+      return 0;
+  }
   if (action->comm.surf_comm){
       XBT_DEBUG("Getting latency limited for surf_action (%p)", action->comm.surf_comm);
       action->latency_limited = surf_workstation_model->get_latency_limited(action->comm.surf_comm);
@@ -812,7 +821,7 @@ SIMIX_comm_set_copy_data_callback(void (*callback) (smx_action_t, size_t))
 
 void SIMIX_comm_copy_pointer_callback(smx_action_t comm, size_t buff_size)
 {
-  xbt_assert1((buff_size == sizeof(void *)),
+  xbt_assert((buff_size == sizeof(void *)),
               "Cannot copy %zu bytes: must be sizeof(void*)", buff_size);
   *(void **) (comm->comm.dst_buff) = comm->comm.src_buff;
 }