Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
cleanup the debug messages
[simgrid.git] / src / simix / smx_network.c
index c3265f6..e10c1cd 100644 (file)
@@ -26,7 +26,7 @@ static void SIMIX_rdv_free(void *data);
 
 void SIMIX_network_init(void)
 {
-  rdv_points = xbt_dict_new();
+  rdv_points = xbt_dict_new_homogeneous(SIMIX_rdv_free);
 }
 
 void SIMIX_network_exit(void)
@@ -49,7 +49,7 @@ smx_rdv_t SIMIX_rdv_create(const char *name)
     rdv->comm_fifo = xbt_fifo_new();
 
     if (rdv->name)
-      xbt_dict_set(rdv_points, rdv->name, rdv, SIMIX_rdv_free);
+      xbt_dict_set(rdv_points, rdv->name, rdv, NULL);
   }
   return rdv;
 }
@@ -245,10 +245,11 @@ void SIMIX_comm_destroy(smx_action_t action)
   XBT_DEBUG("Destroy action %p (refcount: %d), state: %d",
       action, action->comm.refcount, action->state);
 
-  xbt_assert(action->comm.refcount > 0,
-      "The refcount of comm %p is already 0 before decreasing it. "
-      "That's a bug!", action);
-
+  if (action->comm.refcount <= 0) {
+       xbt_backtrace_display_current();
+    xbt_die("the refcount of comm %p is already 0 before decreasing it. "
+            "That's a bug!", action);
+  }
   action->comm.refcount--;
   if (action->comm.refcount > 0)
     return;
@@ -265,7 +266,8 @@ void SIMIX_comm_destroy(smx_action_t action)
   if (action->comm.detached && action->state != SIMIX_DONE) {
     /* the communication has failed and was detached:
      * we have to free the buffer */
-    ((void_f_pvoid_t) action->comm.src_data)(action->comm.src_buff);
+    action->comm.clean_fun(action->comm.src_buff);
+    action->comm.src_buff = NULL;
   }
 
   xbt_mallocator_release(simix_global->action_mallocator, action);
@@ -295,7 +297,9 @@ void SIMIX_comm_destroy_internal_actions(smx_action_t action)
 smx_action_t SIMIX_comm_isend(smx_process_t src_proc, smx_rdv_t rdv,
                               double task_size, double rate,
                               void *src_buff, size_t src_buff_size,
-                              int (*match_fun)(void *, void *), void *data,
+                              int (*match_fun)(void *, void *),
+                              void (*clean_fun)(void *), // used to free the action in case of problem after a detached send
+                              void *data,
                               int detached)
 {
   smx_action_t action;
@@ -318,6 +322,9 @@ smx_action_t SIMIX_comm_isend(smx_process_t src_proc, smx_rdv_t rdv,
   if (detached) {
     action->comm.detached = 1;
     action->comm.refcount--;
+    action->comm.clean_fun = clean_fun;
+  } else {
+    action->comm.clean_fun = NULL;
   }
 
   /* Setup the communication request */
@@ -334,7 +341,7 @@ smx_action_t SIMIX_comm_isend(smx_process_t src_proc, smx_rdv_t rdv,
   }
 
   SIMIX_comm_start(action);
-  return action;
+  return (detached ? NULL : action);
 }
 
 smx_action_t SIMIX_comm_irecv(smx_process_t dst_proc, smx_rdv_t rdv,
@@ -374,6 +381,7 @@ smx_action_t SIMIX_comm_irecv(smx_process_t dst_proc, smx_rdv_t rdv,
 
 void SIMIX_pre_comm_wait(smx_req_t req, smx_action_t action, double timeout, int idx)
 {
+
   /* the request may be a wait, a send or a recv */
   surf_action_t sleep;
 
@@ -554,7 +562,7 @@ XBT_INLINE void SIMIX_comm_start(smx_action_t action)
  */
 void SIMIX_comm_finish(smx_action_t action)
 {
-  unsigned int destroy_count = 0;
+  volatile unsigned int destroy_count = 0;
   smx_req_t req;
 
   while ((req = xbt_fifo_shift(action->request_list))) {
@@ -627,11 +635,18 @@ void SIMIX_comm_finish(smx_action_t action)
 
       case SIMIX_LINK_FAILURE:
         TRY {
-         XBT_DEBUG("Link failure in action %p between '%s' and '%s': posting an exception to the issuer: %s (%p)",
+         XBT_INFO("Link failure in action %p between '%s' and '%s': posting an exception to the issuer: %s (%p) detached:%d",
              action,
              action->comm.src_proc ? action->comm.src_proc->smx_host->name : NULL,
              action->comm.dst_proc ? action->comm.dst_proc->smx_host->name : NULL,
-             req->issuer->name, req->issuer);
+             req->issuer->name, req->issuer,action->comm.detached);
+         if (action->comm.src_proc == req->issuer) {
+                 XBT_INFO("I'm source");
+         } else if (action->comm.dst_proc == req->issuer) {
+                 XBT_INFO("I'm dest");
+         } else {
+                 XBT_INFO("I'm neither source nor dest");
+         }
           THROWF(network_error, 0, "Link failure");
         }
        CATCH(req->issuer->running_ctx->exception) {
@@ -697,9 +712,10 @@ void SIMIX_post_comm(smx_action_t action)
           surf_workstation_model->action_state_get(action->comm.dst_timeout) == SURF_ACTION_FAILED)
      action->state = SIMIX_DST_HOST_FAILURE;
   else if (action->comm.surf_comm &&
-          surf_workstation_model->action_state_get(action->comm.surf_comm) == SURF_ACTION_FAILED)
+          surf_workstation_model->action_state_get(action->comm.surf_comm) == SURF_ACTION_FAILED) {
+         XBT_INFO("Puta madre. Surf says that the link broke");
      action->state = SIMIX_LINK_FAILURE;
-  else
+  else
     action->state = SIMIX_DONE;
 
   XBT_DEBUG("SIMIX_post_comm: comm %p, state %d, src_proc %p, dst_proc %p, detached: %d",
@@ -858,7 +874,18 @@ void SIMIX_comm_copy_pointer_callback(smx_action_t comm, size_t buff_size)
 
 void SIMIX_comm_copy_buffer_callback(smx_action_t comm, size_t buff_size)
 {
+  XBT_DEBUG("Copy the data over");
+  memcpy(comm->comm.dst_buff, comm->comm.src_buff, buff_size);
+}
+
+void smpi_comm_copy_data_callback(smx_action_t comm, size_t buff_size)
+{
+  XBT_DEBUG("Copy the data over");
   memcpy(comm->comm.dst_buff, comm->comm.src_buff, buff_size);
+  if (comm->comm.detached) { // if this is a detached send, the source buffer was duplicated by SMPI sender to make the original buffer available to the application ASAP
+         comm->comm.clean_fun(comm->comm.src_buff);
+         comm->comm.src_buff = NULL;
+  }
 }
 
 /**
@@ -885,10 +912,8 @@ void SIMIX_comm_copy_data(smx_action_t comm)
   if (comm->comm.dst_buff_size)
     *comm->comm.dst_buff_size = buff_size;
 
-  if (buff_size == 0)
-    return;
-
-  (*SIMIX_comm_copy_data_callback) (comm, buff_size);
+  if (buff_size > 0)
+    SIMIX_comm_copy_data_callback (comm, buff_size);
 
   /* Set the copied flag so we copy data only once */
   /* (this function might be called from both communication ends) */