Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Change the prototype of copy data callbacks to add the source buffer
[simgrid.git] / src / simix / smx_network.c
index 6b4922f..0ee16e8 100644 (file)
@@ -18,16 +18,14 @@ unsigned long int smx_total_comms = 0;
 static void SIMIX_waitany_req_remove_from_actions(smx_req_t req);
 static void SIMIX_comm_copy_data(smx_action_t comm);
 static smx_action_t SIMIX_comm_new(e_smx_comm_type_t type);
-static void SIMIX_comm_remove_from_processes(smx_action_t action);
 static XBT_INLINE void SIMIX_rdv_push(smx_rdv_t rdv, smx_action_t comm);
-static XBT_INLINE void SIMIX_rdv_remove(smx_rdv_t rdv, smx_action_t comm);
 static smx_action_t SIMIX_rdv_get_request(smx_rdv_t rdv, e_smx_comm_type_t type,
                                          int (*match_fun)(void *, void *), void *);
 static void SIMIX_rdv_free(void *data);
 
 void SIMIX_network_init(void)
 {
-  rdv_points = xbt_dict_new();
+  rdv_points = xbt_dict_new_homogeneous(SIMIX_rdv_free);
 }
 
 void SIMIX_network_exit(void)
@@ -50,7 +48,7 @@ smx_rdv_t SIMIX_rdv_create(const char *name)
     rdv->comm_fifo = xbt_fifo_new();
 
     if (rdv->name)
-      xbt_dict_set(rdv_points, rdv->name, rdv, SIMIX_rdv_free);
+      xbt_dict_set(rdv_points, rdv->name, rdv, NULL);
   }
   return rdv;
 }
@@ -109,7 +107,7 @@ static XBT_INLINE void SIMIX_rdv_push(smx_rdv_t rdv, smx_action_t comm)
  *  \param rdv The rendez-vous point
  *  \param comm The communication request
  */
-static XBT_INLINE void SIMIX_rdv_remove(smx_rdv_t rdv, smx_action_t comm)
+XBT_INLINE void SIMIX_rdv_remove(smx_rdv_t rdv, smx_action_t comm)
 {
   xbt_fifo_remove(rdv->comm_fifo, comm);
   comm->comm.rdv = NULL;
@@ -135,7 +133,7 @@ smx_action_t SIMIX_rdv_get_request(smx_rdv_t rdv, e_smx_comm_type_t type,
   xbt_fifo_item_t item;
   void* req_data = NULL;
 
-  xbt_fifo_foreach(rdv->comm_fifo, item, action, smx_action_t){
+  xbt_fifo_foreach(rdv->comm_fifo, item, action, smx_action_t) {
     if (action->comm.type == SIMIX_COMM_SEND) {
       req_data = action->comm.src_data;
     } else if (action->comm.type == SIMIX_COMM_RECEIVE) {
@@ -188,7 +186,7 @@ int SIMIX_comm_has_recv_match(smx_rdv_t rdv, int (*match_fun)(void*, void*), voi
   smx_action_t action;
   xbt_fifo_item_t item;
 
-  xbt_fifo_foreach(rdv->comm_fifo, item, action, smx_action_t){
+  xbt_fifo_foreach(rdv->comm_fifo, item, action, smx_action_t) {
     if (action->comm.type == SIMIX_COMM_RECEIVE
         && (!match_fun || match_fun(data, action->comm.dst_data))) {
       XBT_DEBUG("Found a matching communication action %p", action);
@@ -214,6 +212,7 @@ smx_action_t SIMIX_comm_new(e_smx_comm_type_t type)
 
   /* alloc structures */
   act = xbt_mallocator_get(simix_global->action_mallocator);
+
   act->type = SIMIX_ACTION_COMMUNICATE;
   act->state = SIMIX_WAITING;
 
@@ -242,12 +241,14 @@ smx_action_t SIMIX_comm_new(e_smx_comm_type_t type)
  */
 void SIMIX_comm_destroy(smx_action_t action)
 {
-  XBT_DEBUG("Destroy action %p (refcount:%d)", action, action->comm.refcount);
+  XBT_DEBUG("Destroy action %p (refcount: %d), state: %d",
+      action, action->comm.refcount, action->state);
 
-  if (action->comm.refcount <= 0)
+  if (action->comm.refcount <= 0) {
+       xbt_backtrace_display_current();
     xbt_die("the refcount of comm %p is already 0 before decreasing it. "
             "That's a bug!", action);
-
+  }
   action->comm.refcount--;
   if (action->comm.refcount > 0)
     return;
@@ -264,7 +265,8 @@ void SIMIX_comm_destroy(smx_action_t action)
   if (action->comm.detached && action->state != SIMIX_DONE) {
     /* the communication has failed and was detached:
      * we have to free the buffer */
-    ((void_f_pvoid_t) action->comm.src_data)(action->comm.src_buff);
+    action->comm.clean_fun(action->comm.src_buff);
+    action->comm.src_buff = NULL;
   }
 
   xbt_mallocator_release(simix_global->action_mallocator, action);
@@ -294,7 +296,9 @@ void SIMIX_comm_destroy_internal_actions(smx_action_t action)
 smx_action_t SIMIX_comm_isend(smx_process_t src_proc, smx_rdv_t rdv,
                               double task_size, double rate,
                               void *src_buff, size_t src_buff_size,
-                              int (*match_fun)(void *, void *), void *data,
+                              int (*match_fun)(void *, void *),
+                              void (*clean_fun)(void *), // used to free the action in case of problem after a detached send
+                              void *data,
                               int detached)
 {
   smx_action_t action;
@@ -317,6 +321,9 @@ smx_action_t SIMIX_comm_isend(smx_process_t src_proc, smx_rdv_t rdv,
   if (detached) {
     action->comm.detached = 1;
     action->comm.refcount--;
+    action->comm.clean_fun = clean_fun;
+  } else {
+    action->comm.clean_fun = NULL;
   }
 
   /* Setup the communication request */
@@ -333,7 +340,7 @@ smx_action_t SIMIX_comm_isend(smx_process_t src_proc, smx_rdv_t rdv,
   }
 
   SIMIX_comm_start(action);
-  return action;
+  return (detached ? NULL : action);
 }
 
 smx_action_t SIMIX_comm_irecv(smx_process_t dst_proc, smx_rdv_t rdv,
@@ -373,6 +380,7 @@ smx_action_t SIMIX_comm_irecv(smx_process_t dst_proc, smx_rdv_t rdv,
 
 void SIMIX_pre_comm_wait(smx_req_t req, smx_action_t action, double timeout, int idx)
 {
+
   /* the request may be a wait, a send or a recv */
   surf_action_t sleep;
 
@@ -486,8 +494,10 @@ void SIMIX_pre_comm_waitany(smx_req_t req, int idx)
   }
 
   xbt_dynar_foreach(actions, cursor, action){
-    /* Associate this request to the action */
+    /* associate this request to the the action */
     xbt_fifo_push(action->request_list, req);
+
+    /* see if the action is already finished */
     if (action->state != SIMIX_WAITING && action->state != SIMIX_RUNNING){
       SIMIX_comm_finish(action);
       break;
@@ -547,13 +557,15 @@ XBT_INLINE void SIMIX_comm_start(smx_action_t action)
   }
 }
 
+/**
+ * \brief Answers the SIMIX requests associated to a communication action.
+ * \param action a finished communication action
+ */
 void SIMIX_comm_finish(smx_action_t action)
 {
-  unsigned int destroy_count = 0;
+  volatile unsigned int destroy_count = 0;
   smx_req_t req;
 
-  SIMIX_comm_remove_from_processes(action);
-
   while ((req = xbt_fifo_shift(action->request_list))) {
 
     /* If a waitany request is waiting for this action to finish, then remove
@@ -624,11 +636,18 @@ void SIMIX_comm_finish(smx_action_t action)
 
       case SIMIX_LINK_FAILURE:
         TRY {
-         XBT_DEBUG("Link failure in action %p between '%s' and '%s': posting an exception to the issuer: %s (%p)",
-             action,
-             action->comm.src_proc ? action->comm.src_proc->smx_host->name : NULL,
-             action->comm.dst_proc ? action->comm.dst_proc->smx_host->name : NULL,
-             req->issuer->name, req->issuer);
+          XBT_DEBUG("Link failure in action %p between '%s' and '%s': posting an exception to the issuer: %s (%p) detached:%d",
+              action,
+              action->comm.src_proc ? action->comm.src_proc->smx_host->name : NULL,
+              action->comm.dst_proc ? action->comm.dst_proc->smx_host->name : NULL,
+              req->issuer->name, req->issuer, action->comm.detached);
+          if (action->comm.src_proc == req->issuer) {
+            XBT_DEBUG("I'm source");
+          } else if (action->comm.dst_proc == req->issuer) {
+            XBT_DEBUG("I'm dest");
+          } else {
+            XBT_DEBUG("I'm neither source nor dest");
+          }
           THROWF(network_error, 0, "Link failure");
         }
        CATCH(req->issuer->running_ctx->exception) {
@@ -636,8 +655,22 @@ void SIMIX_comm_finish(smx_action_t action)
         }
         break;
 
+      case SIMIX_CANCELED:
+        TRY {
+          if (req->issuer == action->comm.dst_proc) {
+            THROWF(cancel_error, 0, "Communication canceled by the sender");
+          }
+          else {
+            THROWF(cancel_error, 0, "Communication canceled by the receiver");
+          }
+        }
+        CATCH(req->issuer->running_ctx->exception) {
+          req->issuer->doexception = 1;
+        }
+        break;
+
       default:
-        THROW_IMPOSSIBLE;
+        xbt_die("Unexpected action state in SIMIX_comm_finish: %d", action->state);
     }
 
     /* if there is an exception during a waitany or a testany, indicate the position of the failed communication */
@@ -651,6 +684,7 @@ void SIMIX_comm_finish(smx_action_t action)
     }
 
     req->issuer->waiting_action = NULL;
+    xbt_fifo_remove(req->issuer->comms, action);
     SIMIX_request_answer(req);
     destroy_count++;
   }
@@ -659,6 +693,10 @@ void SIMIX_comm_finish(smx_action_t action)
     SIMIX_comm_destroy(action);
 }
 
+/**
+ * \brief This function is called when a Surf communication action is finished.
+ * \param action the corresponding Simix communication
+ */
 void SIMIX_post_comm(smx_action_t action)
 {
   /* Update action state */
@@ -675,9 +713,10 @@ void SIMIX_post_comm(smx_action_t action)
           surf_workstation_model->action_state_get(action->comm.dst_timeout) == SURF_ACTION_FAILED)
      action->state = SIMIX_DST_HOST_FAILURE;
   else if (action->comm.surf_comm &&
-          surf_workstation_model->action_state_get(action->comm.surf_comm) == SURF_ACTION_FAILED)
+          surf_workstation_model->action_state_get(action->comm.surf_comm) == SURF_ACTION_FAILED) {
+         XBT_DEBUG("Puta madre. Surf says that the link broke");
      action->state = SIMIX_LINK_FAILURE;
-  else
+  else
     action->state = SIMIX_DONE;
 
   XBT_DEBUG("SIMIX_post_comm: comm %p, state %d, src_proc %p, dst_proc %p, detached: %d",
@@ -686,28 +725,19 @@ void SIMIX_post_comm(smx_action_t action)
   /* destroy the surf actions associated with the Simix communication */
   SIMIX_comm_destroy_internal_actions(action);
 
-  /* if there are requests associated with the action, then answer them */
-  if (xbt_fifo_size(action->request_list)) {
-    SIMIX_comm_finish(action);
-  }
-  else {
-    SIMIX_comm_remove_from_processes(action);
-  }
-}
-
-/**
- * \brief Removes a communication action from the list of pending communications
- * of both processes (if they still exist)
- * \param action a communication action
- */
-static void SIMIX_comm_remove_from_processes(smx_action_t action) {
-
+  /* remove the communication action from the list of pending communications
+   * of both processes (if they still exist) */
   if (action->comm.src_proc) {
     xbt_fifo_remove(action->comm.src_proc->comms, action);
   }
   if (action->comm.dst_proc) {
     xbt_fifo_remove(action->comm.dst_proc->comms, action);
   }
+
+  /* if there are requests associated with the action, then answer them */
+  if (xbt_fifo_size(action->request_list)) {
+    SIMIX_comm_finish(action);
+  }
 }
 
 void SIMIX_comm_cancel(smx_action_t action)
@@ -716,12 +746,11 @@ void SIMIX_comm_cancel(smx_action_t action)
   /* so remove from it and delete it */
   if (action->state == SIMIX_WAITING) {
     SIMIX_rdv_remove(action->comm.rdv, action);
-    action->state = SIMIX_FAILED;
+    action->state = SIMIX_CANCELED;
   }
-  else if (!MC_IS_ENABLED
+  else if (!MC_IS_ENABLED /* when running the MC there are no surf actions */
       && (action->state == SIMIX_READY || action->state == SIMIX_RUNNING)) {
 
-    /* when running the MC there are no surf actions */
     surf_workstation_model->action_cancel(action->comm.surf_comm);
   }
 }
@@ -828,25 +857,36 @@ XBT_INLINE int SIMIX_comm_is_latency_bounded(smx_action_t action)
 /******************************************************************************/
 /*                    SIMIX_comm_copy_data callbacks                       */
 /******************************************************************************/
-static void (*SIMIX_comm_copy_data_callback) (smx_action_t, size_t) =
+static void (*SIMIX_comm_copy_data_callback) (smx_action_t, void*, size_t) =
     &SIMIX_comm_copy_pointer_callback;
 
 void
-SIMIX_comm_set_copy_data_callback(void (*callback) (smx_action_t, size_t))
+SIMIX_comm_set_copy_data_callback(void (*callback) (smx_action_t, void*, size_t))
 {
   SIMIX_comm_copy_data_callback = callback;
 }
 
-void SIMIX_comm_copy_pointer_callback(smx_action_t comm, size_t buff_size)
+void SIMIX_comm_copy_pointer_callback(smx_action_t comm, void* buff, size_t buff_size)
 {
   xbt_assert((buff_size == sizeof(void *)),
               "Cannot copy %zu bytes: must be sizeof(void*)", buff_size);
-  *(void **) (comm->comm.dst_buff) = comm->comm.src_buff;
+  *(void **) (comm->comm.dst_buff) = buff;
+}
+
+void SIMIX_comm_copy_buffer_callback(smx_action_t comm, void* buff, size_t buff_size)
+{
+  XBT_DEBUG("Copy the data over");
+  memcpy(comm->comm.dst_buff, buff, buff_size);
 }
 
-void SIMIX_comm_copy_buffer_callback(smx_action_t comm, size_t buff_size)
+void smpi_comm_copy_data_callback(smx_action_t comm, void* buff, size_t buff_size)
 {
-  memcpy(comm->comm.dst_buff, comm->comm.src_buff, buff_size);
+  XBT_DEBUG("Copy the data over");
+  memcpy(comm->comm.dst_buff, buff, buff_size);
+  if (comm->comm.detached) { // if this is a detached send, the source buffer was duplicated by SMPI sender to make the original buffer available to the application ASAP
+    xbt_free(buff);
+    comm->comm.src_buff = NULL;
+  }
 }
 
 /**
@@ -862,8 +902,10 @@ void SIMIX_comm_copy_data(smx_action_t comm)
 
   XBT_DEBUG("Copying comm %p data from %s (%p) -> %s (%p) (%zu bytes)",
          comm,
-         comm->comm.src_proc->smx_host->name, comm->comm.src_buff,
-         comm->comm.dst_proc->smx_host->name, comm->comm.dst_buff, buff_size);
+         comm->comm.src_proc ? comm->comm.src_proc->smx_host->name : "a finished process",
+         comm->comm.src_buff,
+         comm->comm.dst_proc ? comm->comm.dst_proc->smx_host->name : "a finished process",
+         comm->comm.dst_buff, buff_size);
 
   /* Copy at most dst_buff_size bytes of the message to receiver's buffer */
   if (comm->comm.dst_buff_size)
@@ -873,10 +915,8 @@ void SIMIX_comm_copy_data(smx_action_t comm)
   if (comm->comm.dst_buff_size)
     *comm->comm.dst_buff_size = buff_size;
 
-  if (buff_size == 0)
-    return;
-
-  (*SIMIX_comm_copy_data_callback) (comm, buff_size);
+  if (buff_size > 0)
+    SIMIX_comm_copy_data_callback (comm, comm->comm.src_buff, buff_size);
 
   /* Set the copied flag so we copy data only once */
   /* (this function might be called from both communication ends) */