Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Make SIMIX_req_comm(test/wait/testany/waitany) requests to destroy the comm actions.
[simgrid.git] / src / simix / smx_network.c
index 1a7e1cd..a5868d1 100644 (file)
@@ -19,11 +19,10 @@ static void SIMIX_comm_finish(smx_action_t action);
 static void SIMIX_waitany_req_remove_from_actions(smx_req_t req);
 static void SIMIX_comm_copy_data(smx_action_t comm);
 static smx_action_t SIMIX_comm_new(e_smx_comm_type_t type);
-static XBT_INLINE void SIMIX_comm_wait_for_completion(smx_action_t comm,
-                                                      double timeout);
 static XBT_INLINE void SIMIX_rdv_push(smx_rdv_t rdv, smx_action_t comm);
 static XBT_INLINE void SIMIX_rdv_remove(smx_rdv_t rdv, smx_action_t comm);
-static smx_action_t SIMIX_rdv_get_request(smx_rdv_t rdv, e_smx_comm_type_t type);
+static smx_action_t SIMIX_rdv_get_request(smx_rdv_t rdv, e_smx_comm_type_t type,
+                                                                                 int (*match_fun)(void *, void *), void *);
 static void SIMIX_rdv_free(void *data);
 
 void SIMIX_network_init(void)
@@ -50,8 +49,8 @@ smx_rdv_t SIMIX_rdv_create(const char *name)
     rdv->name = name ? xbt_strdup(name) : NULL;
     rdv->comm_fifo = xbt_fifo_new();
 
-    if (name)
-      xbt_dict_set(rdv_points, name, rdv, SIMIX_rdv_free);
+    if (rdv->name)
+      xbt_dict_set(rdv_points, rdv->name, rdv, SIMIX_rdv_free);
   }
   return rdv;
 }
@@ -59,7 +58,7 @@ smx_rdv_t SIMIX_rdv_create(const char *name)
 void SIMIX_rdv_destroy(smx_rdv_t rdv)
 {
   if (rdv->name)
-    xbt_dict_remove(rdv_points, rdv->name); 
+    xbt_dict_remove(rdv_points, rdv->name);
 }
 
 void SIMIX_rdv_free(void *data)
@@ -118,24 +117,36 @@ static XBT_INLINE void SIMIX_rdv_remove(smx_rdv_t rdv, smx_action_t comm)
 }
 
 /**
- *  \brief Checks if there is a communication request queued in a rendez-vous matching our needs
+ *  \brief Checks if there is a communication action queued in a rendez-vous matching our needs
  *  \param type The type of communication we are looking for (comm_send, comm_recv)
- *  \return The communication request if found, NULL otherwise
+ *  \return The communication action if found, NULL otherwise
  */
-smx_action_t SIMIX_rdv_get_request(smx_rdv_t rdv, e_smx_comm_type_t type)
+smx_action_t SIMIX_rdv_get_request(smx_rdv_t rdv, e_smx_comm_type_t type,
+                                   int (*match_fun)(void *, void *), void *data)
 {
-  smx_action_t comm = (smx_action_t)
-      xbt_fifo_get_item_content(xbt_fifo_get_first_item(rdv->comm_fifo));
-
-  if (comm && comm->comm.type == type) {
-    DEBUG0("Communication request found!");
-    xbt_fifo_shift(rdv->comm_fifo);
-    comm->comm.refcount++;
-    comm->comm.rdv = NULL;
-    return comm;
+  smx_action_t action;
+  xbt_fifo_item_t item;
+  void* req_data = NULL;
+
+  xbt_fifo_foreach(rdv->comm_fifo, item, action, smx_action_t){
+    if (action->comm.type == SIMIX_COMM_SEND) {
+      req_data = action->comm.src_data;
+    } else if (action->comm.type == SIMIX_COMM_RECEIVE) {
+      req_data = action->comm.dst_data;
+    }
+    if (action->comm.type == type && (!match_fun || match_fun(data, req_data))) {
+      DEBUG1("Found a matching communication action %p", action);
+      xbt_fifo_remove_item(rdv->comm_fifo, item);
+      xbt_fifo_free_item(item);
+      action->comm.refcount++;
+      action->comm.rdv = NULL;
+      return action;
+    }
+    DEBUG3("Sorry, communication action %p does not match our needs:"
+           " its type is %d but we are looking for a comm of type %d",
+           action, action->comm.type, type);
   }
-
-  DEBUG0("Communication request not found");
+  DEBUG0("No matching communication action found");
   return NULL;
 }
 
@@ -162,12 +173,17 @@ smx_action_t SIMIX_comm_new(e_smx_comm_type_t type)
   act->comm.type = type;
   act->comm.refcount = 1;
 
+#ifdef HAVE_LATENCY_BOUND_TRACKING
+  //initialize with unknown value
+  act->latency_limited = -1;
+#endif
+
 #ifdef HAVE_TRACING
   act->category = NULL;
 #endif
 
   DEBUG1("Create communicate action %p", act);
-  
+
   return act;
 }
 
@@ -177,27 +193,21 @@ smx_action_t SIMIX_comm_new(e_smx_comm_type_t type)
  */
 void SIMIX_comm_destroy(smx_action_t action)
 {
-  DEBUG1("Destroy action %p", action);
-
-  if(!(action->comm.refcount > 0))
-         xbt_die(bprintf("the refcount of comm %p is already 0 before decreasing it. That's a bug!",action));
+  DEBUG2("Destroy action %p (refcount:%d)", action, action->comm.refcount);
 
-#ifdef HAVE_LATENCY_BOUND_TRACKING
-  //save is latency limited flag to use afterwards
-  if (action->comm.surf_comm) {
-    DEBUG2("adding key %p with latency limited value %d to the dict", action,
-           SIMIX_comm_is_latency_bounded(action));
-    xbt_dicti_set(simix_global->latency_limited_dict, (uintptr_t) action,
-                  SIMIX_comm_is_latency_bounded(action));
-  }
-#endif
+  if (action->comm.refcount <= 0)
+    xbt_die(bprintf("the refcount of comm %p is already 0 before decreasing it. That's a bug!",action));
 
   action->comm.refcount--;
   if (action->comm.refcount > 0)
     return;
-  VERB2("Really free communication %p; refcount is now %d", action,
+  DEBUG2("Really free communication %p; refcount is now %d", action,
         action->comm.refcount);
 
+#ifdef HAVE_LATENCY_BOUND_TRACKING
+    action->latency_limited = SIMIX_comm_is_latency_bounded( action ) ;
+#endif
+
 #ifdef HAVE_TRACING
   TRACE_smx_action_destroy(action);
 #endif
@@ -209,12 +219,21 @@ void SIMIX_comm_destroy(smx_action_t action)
 
   SIMIX_comm_destroy_internal_actions(action);
 
+  if (action->comm.detached && action->state != SIMIX_DONE) {
+    /* the communication has failed and was detached:
+     * we have to free the buffer */
+    ((void_f_pvoid_t) action->comm.src_data)(action->comm.src_buff);
+  }
+
   xbt_free(action);
 }
 
 void SIMIX_comm_destroy_internal_actions(smx_action_t action)
 {
   if (action->comm.surf_comm){
+#ifdef HAVE_LATENCY_BOUND_TRACKING
+    action->latency_limited = SIMIX_comm_is_latency_bounded(action);
+#endif
     action->comm.surf_comm->model_type->action_unref(action->comm.surf_comm);
     action->comm.surf_comm = NULL;
   }
@@ -232,48 +251,58 @@ void SIMIX_comm_destroy_internal_actions(smx_action_t action)
 
 smx_action_t SIMIX_comm_isend(smx_process_t src_proc, smx_rdv_t rdv,
                               double task_size, double rate,
-                              void *src_buff, size_t src_buff_size, void *data)
+                              void *src_buff, size_t src_buff_size,
+                              int (*match_fun)(void *, void *), void *data,
+                              int detached)
 {
   smx_action_t action;
 
   /* Look for communication request matching our needs.
      If it is not found then create it and push it into the rendez-vous point */
-  action = SIMIX_rdv_get_request(rdv, SIMIX_COMM_RECEIVE);
+  action = SIMIX_rdv_get_request(rdv, SIMIX_COMM_RECEIVE, match_fun, data);
 
   if (!action) {
     action = SIMIX_comm_new(SIMIX_COMM_SEND);
     SIMIX_rdv_push(rdv, action);
-  }else{
+  } else {
     action->state = SIMIX_READY;
     action->comm.type = SIMIX_COMM_READY;
   }
 
+  /* If the communication action is detached then decrease the refcount
+   * by one, so it will be eliminated by the receivers destroy call */
+  if(detached){
+    action->comm.detached = 1;
+    action->comm.refcount--;
+  }
+
   /* Setup the communication request */
   action->comm.src_proc = src_proc;
   action->comm.task_size = task_size;
   action->comm.rate = rate;
   action->comm.src_buff = src_buff;
   action->comm.src_buff_size = src_buff_size;
-  action->comm.data = data;
-#ifdef HAVE_MC
-  if(_surf_do_model_check){
+  action->comm.src_data = data;
+
+  if (MC_IS_ENABLED) {
     action->state = SIMIX_RUNNING;
     return action;
   }
-#endif
+
   SIMIX_comm_start(action);
   return action;
 }
 
 smx_action_t SIMIX_comm_irecv(smx_process_t dst_proc, smx_rdv_t rdv,
-                      void *dst_buff, size_t *dst_buff_size)
+                      void *dst_buff, size_t *dst_buff_size,
+                      int (*match_fun)(void *, void *), void *data)
 {
   smx_action_t action;
 
   /* Look for communication request matching our needs.
    * If it is not found then create it and push it into the rendez-vous point
    */
-  action = SIMIX_rdv_get_request(rdv, SIMIX_COMM_SEND);
+  action = SIMIX_rdv_get_request(rdv, SIMIX_COMM_SEND, match_fun, data);
 
   if (!action) {
     action = SIMIX_comm_new(SIMIX_COMM_RECEIVE);
@@ -287,19 +316,18 @@ smx_action_t SIMIX_comm_irecv(smx_process_t dst_proc, smx_rdv_t rdv,
   action->comm.dst_proc = dst_proc;
   action->comm.dst_buff = dst_buff;
   action->comm.dst_buff_size = dst_buff_size;
+  action->comm.dst_data = data;
 
-#ifdef HAVE_MC
-  if(_surf_do_model_check){
+  if (MC_IS_ENABLED) {
     action->state = SIMIX_RUNNING;
     return action;
   }
-#endif
 
   SIMIX_comm_start(action);
   return action;
 }
 
-void SIMIX_pre_comm_wait(smx_req_t req)
+void SIMIX_pre_comm_wait(smx_req_t req, int idx)
 {
   smx_action_t action = req->comm_wait.comm;
   double timeout = req->comm_wait.timeout;
@@ -309,12 +337,24 @@ void SIMIX_pre_comm_wait(smx_req_t req)
   xbt_fifo_push(action->request_list, req);
   req->issuer->waiting_action = action;
 
-#ifdef HAVE_MC
-  if(_surf_do_model_check){
-    action->state = SIMIX_DONE;
+  if (MC_IS_ENABLED){
+    if(idx == 0){
+      action->state = SIMIX_DONE;
+    }else{
+      /* If we reached this point, the wait request must have a timeout */
+      /* Otherwise it shouldn't be enabled and executed by the MC */
+      if(timeout == -1)
+        THROW_IMPOSSIBLE;
+
+      if(action->comm.src_proc == req->issuer)
+        action->state = SIMIX_SRC_TIMEOUT;
+      else
+        action->state = SIMIX_DST_TIMEOUT;
+    }
+
     SIMIX_comm_finish(action);
+    return;
   }
-#endif
 
   /* If the action has already finish perform the error handling, */
   /* otherwise set up a waiting timeout on the right side         */
@@ -334,26 +374,78 @@ void SIMIX_pre_comm_wait(smx_req_t req)
 void SIMIX_pre_comm_test(smx_req_t req)
 {
   smx_action_t action = req->comm_test.comm;
-  req->comm_test.result = (action->state != SIMIX_WAITING && action->state != SIMIX_RUNNING);
 
+  if(MC_IS_ENABLED){
+    req->comm_test.result = action->comm.src_proc && action->comm.dst_proc;
+    if(req->comm_test.result){
+      action->state = SIMIX_DONE;
+      xbt_fifo_push(action->request_list, req);
+      SIMIX_comm_finish(action);
+    }else{
+      SIMIX_request_answer(req);
+    }
+    return;
+  }
+
+  req->comm_test.result = (action->state != SIMIX_WAITING && action->state != SIMIX_RUNNING);
   if (req->comm_test.result) {
     xbt_fifo_push(action->request_list, req);
     SIMIX_comm_finish(action);
-  }
-  else {
+  } else {
     SIMIX_request_answer(req);
   }
 }
 
-void SIMIX_pre_comm_waitany(smx_req_t req)
+void SIMIX_pre_comm_testany(smx_req_t req, int idx)
+{
+  unsigned int cursor;
+  smx_action_t action;
+  xbt_dynar_t actions = req->comm_testany.comms;
+  req->comm_testany.result = -1;
+
+  if (MC_IS_ENABLED){
+    if(idx == -1){
+      SIMIX_request_answer(req);
+    }else{
+      action = xbt_dynar_get_as(actions, idx, smx_action_t);
+      req->comm_testany.result = idx;
+      xbt_fifo_push(action->request_list, req);
+      action->state = SIMIX_DONE;
+      SIMIX_comm_finish(action);
+    }
+    return;
+  }
+
+  xbt_dynar_foreach(req->comm_testany.comms,cursor,action) {
+    if (action->state != SIMIX_WAITING && action->state != SIMIX_RUNNING) {
+      req->comm_testany.result = cursor;
+      xbt_fifo_push(action->request_list, req);
+      SIMIX_comm_finish(action);
+      return;
+    }
+  }
+  SIMIX_request_answer(req);
+}
+
+void SIMIX_pre_comm_waitany(smx_req_t req, int idx)
 {
   smx_action_t action;
   unsigned int cursor = 0;
   xbt_dynar_t actions = req->comm_waitany.comms;
+
+  if (MC_IS_ENABLED){
+    action = xbt_dynar_get_as(actions, idx, smx_action_t);
+    xbt_fifo_push(action->request_list, req);
+    req->comm_waitany.result = idx;
+    action->state = SIMIX_DONE;
+    SIMIX_comm_finish(action);
+    return;
+  }
+
   xbt_dynar_foreach(actions, cursor, action){
     /* Associate this request to the action */
     xbt_fifo_push(action->request_list, req);
-    if(action->state != SIMIX_WAITING && action->state != SIMIX_RUNNING){
+    if (action->state != SIMIX_WAITING && action->state != SIMIX_RUNNING){
       SIMIX_comm_finish(action);
       break;
     }
@@ -383,11 +475,10 @@ static XBT_INLINE void SIMIX_comm_start(smx_action_t action)
     smx_host_t receiver = action->comm.dst_proc->smx_host;
 
     DEBUG3("Starting communication %p from '%s' to '%s'", action,
-         SIMIX_host_get_name(sender), SIMIX_host_get_name(receiver));
+           SIMIX_host_get_name(sender), SIMIX_host_get_name(receiver));
 
-    action->comm.surf_comm =
-      surf_workstation_model->extension.workstation.
-      communicate(sender->host, receiver->host, action->comm.task_size, action->comm.rate);
+    action->comm.surf_comm = surf_workstation_model->extension.workstation.
+        communicate(sender->host, receiver->host, action->comm.task_size, action->comm.rate);
 
     surf_workstation_model->action_data_set(action->comm.surf_comm, action);
 
@@ -395,7 +486,6 @@ static XBT_INLINE void SIMIX_comm_start(smx_action_t action)
 
 #ifdef HAVE_TRACING
     TRACE_smx_action_communicate(action, action->comm.src_proc);
-    TRACE_surf_action(action->comm.surf_comm, action->category);
 #endif
 
     /* If a link is failed, detect it immediately */
@@ -418,21 +508,23 @@ static XBT_INLINE void SIMIX_comm_start(smx_action_t action)
 
 void SIMIX_comm_finish(smx_action_t action)
 {
+  unsigned int destroy_count = 0;
   smx_req_t req;
 
-  while((req = xbt_fifo_shift(action->request_list))){
+  while ((req = xbt_fifo_shift(action->request_list))) {
 
     /* If a waitany request is waiting for this action to finish, then remove
        it from the other actions in the waitany list. Afterwards, get the
        position of the actual action in the waitany request's actions dynar and
        return it as the result of the call */
-    if(req->call == REQ_COMM_WAITANY){
+    if (req->call == REQ_COMM_WAITANY) {
       SIMIX_waitany_req_remove_from_actions(req);
-      req->comm_waitany.result = xbt_dynar_search(req->comm_waitany.comms, &action);
+      if(!MC_IS_ENABLED)
+        req->comm_waitany.result = xbt_dynar_search(req->comm_waitany.comms, &action);
     }
 
     /* If the action is still in a rendez-vous point then remove from it */
-    if(action->comm.rdv)
+    if (action->comm.rdv)
       SIMIX_rdv_remove(action->comm.rdv, action);
 
     DEBUG1("SIMIX_comm_finish: action state = %d", action->state);
@@ -465,7 +557,7 @@ void SIMIX_comm_finish(smx_action_t action)
 
       case SIMIX_SRC_HOST_FAILURE:
         TRY {
-          if(req->issuer == action->comm.src_proc)
+          if (req->issuer == action->comm.src_proc)
             THROW0(host_error, 0, "Host failed");
           else
             THROW0(network_error, 0, "Remote peer failed");
@@ -502,27 +594,42 @@ void SIMIX_comm_finish(smx_action_t action)
       default:
         THROW_IMPOSSIBLE;
     }
+
+    /* if there is an exception during a waitany or a testany, indicate the position of the failed communication */
+    if (req->issuer->doexception) {
+      if (req->call == REQ_COMM_WAITANY) {
+        req->issuer->running_ctx->exception.value = xbt_dynar_search(req->comm_waitany.comms, &action);
+      }
+      else if (req->call == REQ_COMM_TESTANY) {
+        req->issuer->running_ctx->exception.value = xbt_dynar_search(req->comm_testany.comms, &action);
+      }
+    }
+
     req->issuer->waiting_action = NULL;
     SIMIX_request_answer(req);
+    destroy_count++;
   }
+
+  while(destroy_count-- > 0)
+    SIMIX_comm_destroy(action);
 }
 
 void SIMIX_post_comm(smx_action_t action)
 {
   /* Update action state */
-  if(action->comm.src_timeout &&
+  if (action->comm.src_timeout &&
      surf_workstation_model->action_state_get(action->comm.src_timeout) == SURF_ACTION_DONE)
      action->state = SIMIX_SRC_TIMEOUT;
-  else if(action->comm.dst_timeout &&
+  else if (action->comm.dst_timeout &&
           surf_workstation_model->action_state_get(action->comm.dst_timeout) == SURF_ACTION_DONE)
      action->state = SIMIX_DST_TIMEOUT;
-  else if(action->comm.src_timeout &&
+  else if (action->comm.src_timeout &&
           surf_workstation_model->action_state_get(action->comm.src_timeout) == SURF_ACTION_FAILED)
      action->state = SIMIX_SRC_HOST_FAILURE;
-  else if(action->comm.dst_timeout &&
+  else if (action->comm.dst_timeout &&
           surf_workstation_model->action_state_get(action->comm.dst_timeout) == SURF_ACTION_FAILED)
      action->state = SIMIX_DST_HOST_FAILURE;
-  else if(action->comm.surf_comm &&
+  else if (action->comm.surf_comm &&
           surf_workstation_model->action_state_get(action->comm.surf_comm) == SURF_ACTION_FAILED)
      action->state = SIMIX_LINK_FAILURE;
   else
@@ -535,7 +642,7 @@ void SIMIX_post_comm(smx_action_t action)
   SIMIX_comm_destroy_internal_actions(action);
 
   /* If there are requests associated with the action, then answer them */
-  if(xbt_fifo_size(action->request_list))
+  if (xbt_fifo_size(action->request_list))
     SIMIX_comm_finish(action);
 }
 
@@ -547,7 +654,9 @@ void SIMIX_comm_cancel(smx_action_t action)
     SIMIX_rdv_remove(action->comm.rdv, action);
     action->state = SIMIX_FAILED;
   } else {
-    surf_workstation_model->action_cancel(action->comm.surf_comm);
+    /* When running the MC there are no surf actions */
+    if(!MC_IS_ENABLED)
+      surf_workstation_model->action_cancel(action->comm.surf_comm);
   }
 }
 
@@ -598,13 +707,23 @@ e_smx_state_t SIMIX_comm_get_state(smx_action_t action)
 }
 
 /**
- *  \brief Return the user data associated to the communication
+ *  \brief Return the user data associated to the sender of the communication
  *  \param action The communication
  *  \return the user data
  */
-void* SIMIX_comm_get_data(smx_action_t action)
+void* SIMIX_comm_get_src_data(smx_action_t action)
 {
-  return action->comm.data;
+  return action->comm.src_data;
+}
+
+/**
+ *  \brief Return the user data associated to the receiver of the communication
+ *  \param action The communication
+ *  \return the user data
+ */
+void* SIMIX_comm_get_dst_data(smx_action_t action)
+{
+  return action->comm.dst_data;
 }
 
 void* SIMIX_comm_get_src_buff(smx_action_t action)
@@ -651,21 +770,12 @@ smx_process_t SIMIX_comm_get_dst_proc(smx_action_t action)
  */
 XBT_INLINE int SIMIX_comm_is_latency_bounded(smx_action_t action)
 {
-  //try to find comm on the list of finished flows
-  uintptr_t key = 0;
-  uintptr_t data = 0;
-  xbt_dict_cursor_t cursor;
-  xbt_dict_foreach(simix_global->latency_limited_dict, cursor, key, data) {
-    DEBUG2("comparing key=%p with comm=%p", (void *) key, (void *) action);
-    if ((void *) action == (void *) key) {
-      DEBUG2("key %p found, return value latency limited value %d",
-             (void *) key, (int) data);
-      xbt_dict_cursor_free(&cursor);
-      return (int) data;
-    }
+  if (action->comm.surf_comm){
+      DEBUG1("Getting latency limited for surf_action (%p)", action->comm.surf_comm);
+      action->latency_limited = surf_workstation_model->get_latency_limited(action->comm.surf_comm);
+      DEBUG1("Action limited is %d", action->latency_limited);
   }
-
-  return surf_workstation_model->get_latency_limited(action->comm.surf_comm);
+  return action->latency_limited;
 }
 #endif
 
@@ -719,6 +829,7 @@ void SIMIX_comm_copy_data(smx_action_t comm)
 
   if (buff_size == 0)
     return;
+
   (*SIMIX_comm_copy_data_callback) (comm, buff_size);
 
   /* Set the copied flag so we copy data only once */