Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
SIMIX_CANCELED is a more adapted state in SIMIX_comm_cancel()
[simgrid.git] / src / simix / smx_network.c
index d1c83d3..d5d4631 100644 (file)
@@ -13,16 +13,16 @@ XBT_LOG_NEW_DEFAULT_SUBCATEGORY(simix_network, simix,
                                 "Logging specific to SIMIX (network)");
 
 static xbt_dict_t rdv_points = NULL;
+unsigned long int smx_total_comms = 0;
 
-static XBT_INLINE void SIMIX_comm_start(smx_action_t action);
-static void SIMIX_comm_finish(smx_action_t action);
 static void SIMIX_waitany_req_remove_from_actions(smx_req_t req);
 static void SIMIX_comm_copy_data(smx_action_t comm);
 static smx_action_t SIMIX_comm_new(e_smx_comm_type_t type);
+static void SIMIX_comm_remove_from_processes(smx_action_t action);
 static XBT_INLINE void SIMIX_rdv_push(smx_rdv_t rdv, smx_action_t comm);
 static XBT_INLINE void SIMIX_rdv_remove(smx_rdv_t rdv, smx_action_t comm);
 static smx_action_t SIMIX_rdv_get_request(smx_rdv_t rdv, e_smx_comm_type_t type,
-                                                                                 int (*match_fun)(void *, void *), void *);
+                                         int (*match_fun)(void *, void *), void *);
 static void SIMIX_rdv_free(void *data);
 
 void SIMIX_network_init(void)
@@ -64,8 +64,7 @@ void SIMIX_rdv_destroy(smx_rdv_t rdv)
 void SIMIX_rdv_free(void *data)
 {
   smx_rdv_t rdv = (smx_rdv_t) data;
-  if (rdv->name)
-    xbt_free(rdv->name);
+  xbt_free(rdv->name);
   xbt_fifo_free(rdv->comm_fifo);
   xbt_free(rdv);  
 }
@@ -116,6 +115,13 @@ static XBT_INLINE void SIMIX_rdv_remove(smx_rdv_t rdv, smx_action_t comm)
   comm->comm.rdv = NULL;
 }
 
+/**
+ *  \brief Wrapper to SIMIX_rdv_get_request
+ */
+smx_action_t SIMIX_comm_get_send_match(smx_rdv_t rdv, int (*match_fun)(void*, void*), void* data) {
+   return SIMIX_rdv_get_request(rdv, SIMIX_COMM_SEND, match_fun, data);
+}
+
 /**
  *  \brief Checks if there is a communication action queued in a rendez-vous matching our needs
  *  \param type The type of communication we are looking for (comm_send, comm_recv)
@@ -124,6 +130,7 @@ static XBT_INLINE void SIMIX_rdv_remove(smx_rdv_t rdv, smx_action_t comm)
 smx_action_t SIMIX_rdv_get_request(smx_rdv_t rdv, e_smx_comm_type_t type,
                                    int (*match_fun)(void *, void *), void *data)
 {
+  // FIXME rewrite this function by using SIMIX_rdv_has_send/recv_match
   smx_action_t action;
   xbt_fifo_item_t item;
   void* req_data = NULL;
@@ -135,21 +142,63 @@ smx_action_t SIMIX_rdv_get_request(smx_rdv_t rdv, e_smx_comm_type_t type,
       req_data = action->comm.dst_data;
     }
     if (action->comm.type == type && (!match_fun || match_fun(data, req_data))) {
-      DEBUG1("Found a matching communication action %p", action);
+      XBT_DEBUG("Found a matching communication action %p", action);
       xbt_fifo_remove_item(rdv->comm_fifo, item);
       xbt_fifo_free_item(item);
       action->comm.refcount++;
       action->comm.rdv = NULL;
       return action;
     }
-    DEBUG3("Sorry, communication action %p does not match our needs:"
+    XBT_DEBUG("Sorry, communication action %p does not match our needs:"
            " its type is %d but we are looking for a comm of type %d",
            action, action->comm.type, type);
   }
-  DEBUG0("No matching communication action found");
+  XBT_DEBUG("No matching communication action found");
   return NULL;
 }
 
+/**
+ *  \brief Checks if there is a send communication action
+ *  queued in a rendez-vous matching our needs.
+ *  \return 1 if found, 0 otherwise
+ */
+int SIMIX_comm_has_send_match(smx_rdv_t rdv, int (*match_fun)(void*, void*), void* data) {
+
+  smx_action_t action;
+  xbt_fifo_item_t item;
+
+  xbt_fifo_foreach(rdv->comm_fifo, item, action, smx_action_t){
+    if (action->comm.type == SIMIX_COMM_SEND
+        && (!match_fun || match_fun(data, action->comm.src_data))) {
+      XBT_DEBUG("Found a matching communication action %p", action);
+      return 1;
+    }
+  }
+  XBT_DEBUG("No matching communication action found");
+  return 0;
+}
+
+/**
+ *  \brief Checks if there is a recv communication action
+ *  queued in a rendez-vous matching our needs.
+ *  \return 1 if found, 0 otherwise
+ */
+int SIMIX_comm_has_recv_match(smx_rdv_t rdv, int (*match_fun)(void*, void*), void* data) {
+
+  smx_action_t action;
+  xbt_fifo_item_t item;
+
+  xbt_fifo_foreach(rdv->comm_fifo, item, action, smx_action_t){
+    if (action->comm.type == SIMIX_COMM_RECEIVE
+        && (!match_fun || match_fun(data, action->comm.dst_data))) {
+      XBT_DEBUG("Found a matching communication action %p", action);
+      return 1;
+    }
+  }
+  XBT_DEBUG("No matching communication action found");
+  return 0;
+}
+
 /******************************************************************************/
 /*                            Comunication Actions                            */
 /******************************************************************************/
@@ -164,10 +213,9 @@ smx_action_t SIMIX_comm_new(e_smx_comm_type_t type)
   smx_action_t act;
 
   /* alloc structures */
-  act = xbt_new0(s_smx_action_t, 1);
+  act = xbt_mallocator_get(simix_global->action_mallocator);
   act->type = SIMIX_ACTION_COMMUNICATE;
   act->state = SIMIX_WAITING;
-  act->request_list = xbt_fifo_new();
 
   /* set communication */
   act->comm.type = type;
@@ -182,7 +230,8 @@ smx_action_t SIMIX_comm_new(e_smx_comm_type_t type)
   act->category = NULL;
 #endif
 
-  DEBUG1("Create communicate action %p", act);
+  XBT_DEBUG("Create communicate action %p", act);
+  ++smx_total_comms;
 
   return act;
 }
@@ -193,33 +242,32 @@ smx_action_t SIMIX_comm_new(e_smx_comm_type_t type)
  */
 void SIMIX_comm_destroy(smx_action_t action)
 {
-  DEBUG2("Destroy action %p (refcount:%d)", action, action->comm.refcount);
+  XBT_DEBUG("Destroy action %p (refcount:%d)", action, action->comm.refcount);
 
   if (action->comm.refcount <= 0)
-    xbt_die(bprintf("the refcount of comm %p is already 0 before decreasing it. That's a bug!",action));
+    xbt_die("the refcount of comm %p is already 0 before decreasing it. "
+            "That's a bug!", action);
 
   action->comm.refcount--;
   if (action->comm.refcount > 0)
     return;
-  DEBUG2("Really free communication %p; refcount is now %d", action,
+  XBT_DEBUG("Really free communication %p; refcount is now %d", action,
         action->comm.refcount);
 
 #ifdef HAVE_LATENCY_BOUND_TRACKING
     action->latency_limited = SIMIX_comm_is_latency_bounded( action ) ;
 #endif
 
-#ifdef HAVE_TRACING
-  TRACE_smx_action_destroy(action);
-#endif
-
-  if (action->name)
-    xbt_free(action->name);
-
-  xbt_fifo_free(action->request_list);
-
+  xbt_free(action->name);
   SIMIX_comm_destroy_internal_actions(action);
 
-  xbt_free(action);
+  if (action->comm.detached && action->state != SIMIX_DONE) {
+    /* the communication has failed and was detached:
+     * we have to free the buffer */
+    ((void_f_pvoid_t) action->comm.src_data)(action->comm.src_buff);
+  }
+
+  xbt_mallocator_release(simix_global->action_mallocator, action);
 }
 
 void SIMIX_comm_destroy_internal_actions(smx_action_t action)
@@ -246,7 +294,8 @@ void SIMIX_comm_destroy_internal_actions(smx_action_t action)
 smx_action_t SIMIX_comm_isend(smx_process_t src_proc, smx_rdv_t rdv,
                               double task_size, double rate,
                               void *src_buff, size_t src_buff_size,
-                              int (*match_fun)(void *, void *), void *data)
+                              int (*match_fun)(void *, void *), void *data,
+                              int detached)
 {
   smx_action_t action;
 
@@ -261,6 +310,14 @@ smx_action_t SIMIX_comm_isend(smx_process_t src_proc, smx_rdv_t rdv,
     action->state = SIMIX_READY;
     action->comm.type = SIMIX_COMM_READY;
   }
+  xbt_fifo_push(src_proc->comms, action);
+
+  /* if the communication action is detached then decrease the refcount
+   * by one, so it will be eliminated by the receiver's destroy call */
+  if (detached) {
+    action->comm.detached = 1;
+    action->comm.refcount--;
+  }
 
   /* Setup the communication request */
   action->comm.src_proc = src_proc;
@@ -297,6 +354,7 @@ smx_action_t SIMIX_comm_irecv(smx_process_t dst_proc, smx_rdv_t rdv,
     action->state = SIMIX_READY;
     action->comm.type = SIMIX_COMM_READY;
   }
+  xbt_fifo_push(dst_proc->comms, action);
 
   /* Setup communication request */
   action->comm.dst_proc = dst_proc;
@@ -313,26 +371,25 @@ smx_action_t SIMIX_comm_irecv(smx_process_t dst_proc, smx_rdv_t rdv,
   return action;
 }
 
-void SIMIX_pre_comm_wait(smx_req_t req, int idx)
+void SIMIX_pre_comm_wait(smx_req_t req, smx_action_t action, double timeout, int idx)
 {
-  smx_action_t action = req->comm_wait.comm;
-  double timeout = req->comm_wait.timeout;
+  /* the request may be a wait, a send or a recv */
   surf_action_t sleep;
 
   /* Associate this request to the action */
   xbt_fifo_push(action->request_list, req);
   req->issuer->waiting_action = action;
 
-  if (MC_IS_ENABLED){
-    if(idx == 0){
+  if (MC_IS_ENABLED) {
+    if (idx == 0) {
       action->state = SIMIX_DONE;
-    }else{
+    } else {
       /* If we reached this point, the wait request must have a timeout */
       /* Otherwise it shouldn't be enabled and executed by the MC */
-      if(timeout == -1)
+      if (timeout == -1)
         THROW_IMPOSSIBLE;
 
-      if(action->comm.src_proc == req->issuer)
+      if (action->comm.src_proc == req->issuer)
         action->state = SIMIX_SRC_TIMEOUT;
       else
         action->state = SIMIX_DST_TIMEOUT;
@@ -453,14 +510,16 @@ void SIMIX_waitany_req_remove_from_actions(smx_req_t req)
  *  \brief Start the simulation of a communication request
  *  \param action The communication action
  */
-static XBT_INLINE void SIMIX_comm_start(smx_action_t action)
+
+XBT_INLINE void SIMIX_comm_start(smx_action_t action)
 {
   /* If both the sender and the receiver are already there, start the communication */
   if (action->state == SIMIX_READY) {
+
     smx_host_t sender = action->comm.src_proc->smx_host;
     smx_host_t receiver = action->comm.dst_proc->smx_host;
 
-    DEBUG3("Starting communication %p from '%s' to '%s'", action,
+    XBT_DEBUG("Starting communication %p from '%s' to '%s'", action,
            SIMIX_host_get_name(sender), SIMIX_host_get_name(receiver));
 
     action->comm.surf_comm = surf_workstation_model->extension.workstation.
@@ -470,13 +529,9 @@ static XBT_INLINE void SIMIX_comm_start(smx_action_t action)
 
     action->state = SIMIX_RUNNING;
 
-#ifdef HAVE_TRACING
-    TRACE_smx_action_communicate(action, action->comm.src_proc);
-#endif
-
     /* If a link is failed, detect it immediately */
     if (surf_workstation_model->action_state_get(action->comm.surf_comm) == SURF_ACTION_FAILED) {
-      DEBUG2("Communication from '%s' to '%s' failed to start because of a link failure",
+      XBT_DEBUG("Communication from '%s' to '%s' failed to start because of a link failure",
          SIMIX_host_get_name(sender), SIMIX_host_get_name(receiver));
       action->state = SIMIX_LINK_FAILURE;
       SIMIX_comm_destroy_internal_actions(action);
@@ -494,8 +549,11 @@ static XBT_INLINE void SIMIX_comm_start(smx_action_t action)
 
 void SIMIX_comm_finish(smx_action_t action)
 {
+  unsigned int destroy_count = 0;
   smx_req_t req;
 
+  SIMIX_comm_remove_from_processes(action);
+
   while ((req = xbt_fifo_shift(action->request_list))) {
 
     /* If a waitany request is waiting for this action to finish, then remove
@@ -504,7 +562,7 @@ void SIMIX_comm_finish(smx_action_t action)
        return it as the result of the call */
     if (req->call == REQ_COMM_WAITANY) {
       SIMIX_waitany_req_remove_from_actions(req);
-      if(!MC_IS_ENABLED)
+      if (!MC_IS_ENABLED)
         req->comm_waitany.result = xbt_dynar_search(req->comm_waitany.comms, &action);
     }
 
@@ -512,19 +570,19 @@ void SIMIX_comm_finish(smx_action_t action)
     if (action->comm.rdv)
       SIMIX_rdv_remove(action->comm.rdv, action);
 
-    DEBUG1("SIMIX_comm_finish: action state = %d", action->state);
+    XBT_DEBUG("SIMIX_comm_finish: action state = %d", action->state);
 
     /* Check out for errors */
     switch (action->state) {
 
       case SIMIX_DONE:
-        DEBUG1("Communication %p complete!", action);
+        XBT_DEBUG("Communication %p complete!", action);
         SIMIX_comm_copy_data(action);
         break;
 
       case SIMIX_SRC_TIMEOUT:
         TRY {
-          THROW0(timeout_error, 0, "Communication timeouted because of sender");
+          THROWF(timeout_error, 0, "Communication timeouted because of sender");
         }
        CATCH(req->issuer->running_ctx->exception) {
           req->issuer->doexception = 1;
@@ -533,7 +591,7 @@ void SIMIX_comm_finish(smx_action_t action)
 
       case SIMIX_DST_TIMEOUT:
         TRY {
-          THROW0(timeout_error, 0, "Communication timeouted because of receiver");
+          THROWF(timeout_error, 0, "Communication timeouted because of receiver");
         }
        CATCH(req->issuer->running_ctx->exception) {
           req->issuer->doexception = 1;
@@ -543,9 +601,9 @@ void SIMIX_comm_finish(smx_action_t action)
       case SIMIX_SRC_HOST_FAILURE:
         TRY {
           if (req->issuer == action->comm.src_proc)
-            THROW0(host_error, 0, "Host failed");
+            THROWF(host_error, 0, "Host failed");
           else
-            THROW0(network_error, 0, "Remote peer failed");
+            THROWF(network_error, 0, "Remote peer failed");
         }
        CATCH(req->issuer->running_ctx->exception) {
           req->issuer->doexception = 1;
@@ -555,9 +613,9 @@ void SIMIX_comm_finish(smx_action_t action)
       case SIMIX_DST_HOST_FAILURE:
         TRY {
           if (req->issuer == action->comm.dst_proc)
-            THROW0(host_error, 0, "Host failed");
+            THROWF(host_error, 0, "Host failed");
           else
-            THROW0(network_error, 0, "Remote peer failed");
+            THROWF(network_error, 0, "Remote peer failed");
         }
        CATCH(req->issuer->running_ctx->exception) {
           req->issuer->doexception = 1;
@@ -566,10 +624,12 @@ void SIMIX_comm_finish(smx_action_t action)
 
       case SIMIX_LINK_FAILURE:
         TRY {
-         DEBUG5("Link failure in action %p between '%s' and '%s': posting an exception to the issuer: %s (%p)",
-             action, action->comm.src_proc->smx_host->name, action->comm.dst_proc->smx_host->name,
+         XBT_DEBUG("Link failure in action %p between '%s' and '%s': posting an exception to the issuer: %s (%p)",
+             action,
+             action->comm.src_proc ? action->comm.src_proc->smx_host->name : NULL,
+             action->comm.dst_proc ? action->comm.dst_proc->smx_host->name : NULL,
              req->issuer->name, req->issuer);
-          THROW0(network_error, 0, "Link failure");
+          THROWF(network_error, 0, "Link failure");
         }
        CATCH(req->issuer->running_ctx->exception) {
           req->issuer->doexception = 1;
@@ -592,7 +652,11 @@ void SIMIX_comm_finish(smx_action_t action)
 
     req->issuer->waiting_action = NULL;
     SIMIX_request_answer(req);
+    destroy_count++;
   }
+
+  while (destroy_count-- > 0)
+    SIMIX_comm_destroy(action);
 }
 
 void SIMIX_post_comm(smx_action_t action)
@@ -616,28 +680,49 @@ void SIMIX_post_comm(smx_action_t action)
   else
     action->state = SIMIX_DONE;
 
-  DEBUG1("SIMIX_post_comm: action state = %d", action->state);
+  XBT_DEBUG("SIMIX_post_comm: comm %p, state %d, src_proc %p, dst_proc %p, detached: %d",
+      action, action->state, action->comm.src_proc, action->comm.dst_proc, action->comm.detached);
 
-  /* After this point the surf actions associated with the simix communicate
-     action are no longer needed, thus we delete them. */
+  /* destroy the surf actions associated with the Simix communication */
   SIMIX_comm_destroy_internal_actions(action);
 
-  /* If there are requests associated with the action, then answer them */
-  if (xbt_fifo_size(action->request_list))
+  /* if there are requests associated with the action, then answer them */
+  if (xbt_fifo_size(action->request_list)) {
     SIMIX_comm_finish(action);
+  }
+  else {
+    SIMIX_comm_remove_from_processes(action);
+  }
+}
+
+/**
+ * \brief Removes a communication action from the list of pending communications
+ * of both processes (if they still exist)
+ * \param action a communication action
+ */
+static void SIMIX_comm_remove_from_processes(smx_action_t action) {
+
+  if (action->comm.src_proc) {
+    xbt_fifo_remove(action->comm.src_proc->comms, action);
+  }
+  if (action->comm.dst_proc) {
+    xbt_fifo_remove(action->comm.dst_proc->comms, action);
+  }
 }
 
 void SIMIX_comm_cancel(smx_action_t action)
 {
-  /* If the action is a waiting state means that it is still in a rdv */
+  /* if the action is a waiting state means that it is still in a rdv */
   /* so remove from it and delete it */
   if (action->state == SIMIX_WAITING) {
     SIMIX_rdv_remove(action->comm.rdv, action);
-    action->state = SIMIX_FAILED;
-  } else {
-    /* When running the MC there are no surf actions */
-    if(!MC_IS_ENABLED)
-      surf_workstation_model->action_cancel(action->comm.surf_comm);
+    action->state = SIMIX_CANCELED;
+  }
+  else if (!MC_IS_ENABLED
+      && (action->state == SIMIX_READY || action->state == SIMIX_RUNNING)) {
+
+    /* when running the MC there are no surf actions */
+    surf_workstation_model->action_cancel(action->comm.surf_comm);
   }
 }
 
@@ -664,6 +749,10 @@ double SIMIX_comm_get_remains(smx_action_t action)
 {
   double remains;
 
+  if(!action){
+      return 0;
+  }
+
   switch (action->state) {
 
     case SIMIX_RUNNING:
@@ -707,33 +796,6 @@ void* SIMIX_comm_get_dst_data(smx_action_t action)
   return action->comm.dst_data;
 }
 
-void* SIMIX_comm_get_src_buff(smx_action_t action)
-{
-  return action->comm.src_buff;
-}
-
-void* SIMIX_comm_get_dst_buff(smx_action_t action)
-{
-  return action->comm.dst_buff;
-}
-
-size_t SIMIX_comm_get_src_buff_size(smx_action_t action)
-{
-  return action->comm.src_buff_size;
-}
-
-size_t SIMIX_comm_get_dst_buff_size(smx_action_t action)
-{
-  size_t buff_size;
-
-  if (action->comm.dst_buff_size)
-    buff_size = *(action->comm.dst_buff_size);
-  else
-    buff_size = 0;
-
-  return buff_size;
-}
-
 smx_process_t SIMIX_comm_get_src_proc(smx_action_t action)
 {
   return action->comm.src_proc;
@@ -751,10 +813,13 @@ smx_process_t SIMIX_comm_get_dst_proc(smx_action_t action)
  */
 XBT_INLINE int SIMIX_comm_is_latency_bounded(smx_action_t action)
 {
+  if(!action){
+      return 0;
+  }
   if (action->comm.surf_comm){
-      DEBUG1("Getting latency limited for surf_action (%p)", action->comm.surf_comm);
+      XBT_DEBUG("Getting latency limited for surf_action (%p)", action->comm.surf_comm);
       action->latency_limited = surf_workstation_model->get_latency_limited(action->comm.surf_comm);
-      DEBUG1("Action limited is %d", action->latency_limited);
+      XBT_DEBUG("Action limited is %d", action->latency_limited);
   }
   return action->latency_limited;
 }
@@ -774,7 +839,7 @@ SIMIX_comm_set_copy_data_callback(void (*callback) (smx_action_t, size_t))
 
 void SIMIX_comm_copy_pointer_callback(smx_action_t comm, size_t buff_size)
 {
-  xbt_assert1((buff_size == sizeof(void *)),
+  xbt_assert((buff_size == sizeof(void *)),
               "Cannot copy %zu bytes: must be sizeof(void*)", buff_size);
   *(void **) (comm->comm.dst_buff) = comm->comm.src_buff;
 }
@@ -795,7 +860,7 @@ void SIMIX_comm_copy_data(smx_action_t comm)
   if (!comm->comm.src_buff || !comm->comm.dst_buff || comm->comm.copied == 1)
     return;
 
-  DEBUG6("Copying comm %p data from %s (%p) -> %s (%p) (%zu bytes)",
+  XBT_DEBUG("Copying comm %p data from %s (%p) -> %s (%p) (%zu bytes)",
          comm,
          comm->comm.src_proc->smx_host->name, comm->comm.src_buff,
          comm->comm.dst_proc->smx_host->name, comm->comm.dst_buff, buff_size);