Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Register get_route_latency in global_routing, and use it.
[simgrid.git] / src / surf / network.c
index 51a46e1..b294a4c 100644 (file)
@@ -15,6 +15,7 @@ surf_model_t surf_network_model = NULL;
 static lmm_system_t network_maxmin_system = NULL;
 static void (*network_solve) (lmm_system_t) = NULL;
 
+double sg_sender_gap = 0.0;
 double sg_latency_factor = 1.0; /* default value; can be set by model or from command line */
 double sg_bandwidth_factor = 1.0;       /* default value; can be set by model or from command line */
 double sg_weight_S_parameter = 0.0;     /* default value; can be set by model or from command line */
@@ -22,6 +23,66 @@ double sg_weight_S_parameter = 0.0;     /* default value; can be set by model or
 double sg_tcp_gamma = 0.0;
 int sg_network_fullduplex = 0;
 
+xbt_dict_t gap_lookup = NULL;
+
+static double net_get_link_bandwidth(const void *link);
+
+static void gap_append(double size, const link_CM02_t link, surf_action_network_CM02_t action) {
+   const char* src = link->lmm_resource.generic_resource.name;
+   xbt_fifo_t fifo;
+   surf_action_network_CM02_t last_action;
+   double bw;
+
+   if(sg_sender_gap > 0.0) {
+      if(!gap_lookup) {
+         gap_lookup = xbt_dict_new();
+      }
+      fifo = (xbt_fifo_t)xbt_dict_get_or_null(gap_lookup, src);
+      action->sender.gap = 0.0;
+      if(fifo && xbt_fifo_size(fifo) > 0) {
+         /* Compute gap from last send */
+         last_action = (surf_action_network_CM02_t)xbt_fifo_get_item_content(xbt_fifo_get_last_item(fifo));
+         bw = net_get_link_bandwidth(link);
+         action->sender.gap = last_action->sender.gap + max(sg_sender_gap, last_action->sender.size / bw);
+         action->latency += action->sender.gap;
+      }
+      /* Append action as last send */
+      action->sender.link_name = link->lmm_resource.generic_resource.name;
+      fifo = (xbt_fifo_t)xbt_dict_get_or_null(gap_lookup, action->sender.link_name);
+      if(!fifo) {
+         fifo = xbt_fifo_new();
+         xbt_dict_set(gap_lookup, action->sender.link_name, fifo, NULL);
+      }
+      action->sender.fifo_item = xbt_fifo_push(fifo, action);
+      action->sender.size = size;
+   }
+}
+
+static void gap_unknown(surf_action_network_CM02_t action) {
+   action->sender.gap = 0.0;
+   action->sender.link_name = NULL;
+   action->sender.fifo_item = NULL;
+   action->sender.size = 0.0;
+}
+
+static void gap_remove(surf_action_network_CM02_t action) {
+   xbt_fifo_t fifo;
+   size_t size;
+
+   if(sg_sender_gap > 0.0 && action->sender.link_name && action->sender.fifo_item) {
+      fifo = (xbt_fifo_t)xbt_dict_get_or_null(gap_lookup, action->sender.link_name);
+      xbt_fifo_remove_item(fifo, action->sender.fifo_item);
+      size = xbt_fifo_size(fifo);
+      if(size == 0) {
+         xbt_fifo_free(fifo);
+         xbt_dict_remove(gap_lookup, action->sender.link_name);
+         size = xbt_dict_size(gap_lookup);
+         if(size == 0) {
+            xbt_dict_free(&gap_lookup);
+         }
+      }
+   }
+}
 
 /******************************************************************************/
 /*                           Factors callbacks                                */
@@ -112,8 +173,7 @@ static link_CM02_t net_link_new(char *name,
                             state_initial, state_trace,
                             bw_initial, bw_trace);
 
-  xbt_assert1(!xbt_dict_get_or_null
-              (surf_network_model->resource_set, name),
+  xbt_assert(!xbt_lib_get_or_null(link_lib, name, SURF_LINK_LEVEL),
               "Link '%s' declared several times in the platform file.",
               name);
 
@@ -125,11 +185,7 @@ static link_CM02_t net_link_new(char *name,
   if (policy == SURF_LINK_FATPIPE)
     lmm_constraint_shared(nw_link->lmm_resource.constraint);
 
-  xbt_dict_set(surf_network_model->resource_set, name, nw_link,
-               surf_resource_free);
-#ifdef HAVE_TRACING
-  TRACE_surf_link_declaration(nw_link, name, bw_initial, lat_initial);
-#endif
+  xbt_lib_set(link_lib, name, SURF_LINK_LEVEL, nw_link);
 
   return nw_link;
 }
@@ -144,14 +200,14 @@ static void net_parse_link_init(void)
   e_surf_resource_state_t state_initial_link = SURF_RESOURCE_ON;
   e_surf_link_sharing_policy_t policy_initial_link = SURF_LINK_SHARED;
   tmgr_trace_t state_trace;
-  DEBUG0("link_CM02");
+  XBT_DEBUG("link_CM02");
   name_link = xbt_strdup(A_surfxml_link_id);
   surf_parse_get_double(&bw_initial, A_surfxml_link_bandwidth);
   bw_trace = tmgr_trace_new(A_surfxml_link_bandwidth_file);
   surf_parse_get_double(&lat_initial, A_surfxml_link_latency);
   lat_trace = tmgr_trace_new(A_surfxml_link_latency_file);
 
-  xbt_assert0((A_surfxml_link_state == A_surfxml_link_state_ON)
+  xbt_assert((A_surfxml_link_state == A_surfxml_link_state_ON)
               || (A_surfxml_link_state ==
                   A_surfxml_link_state_OFF), "Invalid state");
   if (A_surfxml_link_state == A_surfxml_link_state_ON)
@@ -219,11 +275,11 @@ static void net_add_traces(void)
   xbt_dict_foreach(trace_connect_list_link_avail, cursor, trace_name, elm) {
     tmgr_trace_t trace = xbt_dict_get_or_null(traces_set_list, trace_name);
     link_CM02_t link =
-        xbt_dict_get_or_null(surf_network_model->resource_set, elm);
+        xbt_lib_get_or_null(link_lib, elm, SURF_LINK_LEVEL);
 
-    xbt_assert2(link, "Cannot connect trace %s to link %s: link undefined",
+    xbt_assert(link, "Cannot connect trace %s to link %s: link undefined",
                 trace_name, elm);
-    xbt_assert2(trace,
+    xbt_assert(trace,
                 "Cannot connect trace %s to link %s: trace undefined",
                 trace_name, elm);
 
@@ -234,11 +290,11 @@ static void net_add_traces(void)
   xbt_dict_foreach(trace_connect_list_bandwidth, cursor, trace_name, elm) {
     tmgr_trace_t trace = xbt_dict_get_or_null(traces_set_list, trace_name);
     link_CM02_t link =
-        xbt_dict_get_or_null(surf_network_model->resource_set, elm);
+               xbt_lib_get_or_null(link_lib, elm, SURF_LINK_LEVEL);
 
-    xbt_assert2(link, "Cannot connect trace %s to link %s: link undefined",
+    xbt_assert(link, "Cannot connect trace %s to link %s: link undefined",
                 trace_name, elm);
-    xbt_assert2(trace,
+    xbt_assert(trace,
                 "Cannot connect trace %s to link %s: trace undefined",
                 trace_name, elm);
 
@@ -249,11 +305,11 @@ static void net_add_traces(void)
   xbt_dict_foreach(trace_connect_list_latency, cursor, trace_name, elm) {
     tmgr_trace_t trace = xbt_dict_get_or_null(traces_set_list, trace_name);
     link_CM02_t link =
-        xbt_dict_get_or_null(surf_network_model->resource_set, elm);
+               xbt_lib_get_or_null(link_lib, elm, SURF_LINK_LEVEL);
 
-    xbt_assert2(link, "Cannot connect trace %s to link %s: link undefined",
+    xbt_assert(link, "Cannot connect trace %s to link %s: link undefined",
                 trace_name, elm);
-    xbt_assert2(trace,
+    xbt_assert(trace,
                 "Cannot connect trace %s to link %s: trace undefined",
                 trace_name, elm);
 
@@ -289,7 +345,7 @@ static int net_action_unref(surf_action_t action)
     if (action->category)
       xbt_free(action->category);
 #endif
-    free(action);
+    surf_action_free(&action);
     return 1;
   }
   return 0;
@@ -349,6 +405,8 @@ static double net_share_resources(double now)
     }
   }
 
+  XBT_DEBUG("Min of share resources %f", min);
+
   return min;
 }
 
@@ -379,19 +437,29 @@ static void net_update_actions_state(double now, double delta)
                                    action->weight);
     }
 #ifdef HAVE_TRACING
-    xbt_dynar_t route =
-        global_routing->get_route(action->src_name, action->dst_name);
-    link_CM02_t link;
-    unsigned int i;
-    xbt_dynar_foreach(route, i, link) {
-      TRACE_surf_link_set_utilization(link,
-                                      action->generic_action.data,
-                                      (surf_action_t) action,
-                                      lmm_variable_getvalue
-                                      (action->variable), now - delta,
-                                      delta);
+    if (TRACE_is_active()) {
+      xbt_dynar_t route = global_routing->get_route(action->src_name,
+                                                    action->dst_name);
+      link_CM02_t link;
+      unsigned int i;
+      xbt_dynar_foreach(route, i, link) {
+        TRACE_surf_link_set_utilization(link->lmm_resource.generic_resource.name,
+                                        action->generic_action.data,
+                                        (surf_action_t) action,
+                                        lmm_variable_getvalue
+                                        (action->variable), now - delta,
+                                        delta);
+      }
     }
 #endif
+    if(!lmm_get_number_of_cnst_from_var(network_maxmin_system, action->variable)) {
+                               /* There is actually no link used, hence an infinite bandwidth.
+                                * This happens often when using models like vivaldi.
+                                * In such case, just make sure that the action completes immediately.
+                                */
+       double_update(&(action->generic_action.remains),
+                       action->generic_action.remains);
+    }
     double_update(&(action->generic_action.remains),
                   lmm_variable_getvalue(action->variable) * deltap);
     if (action->generic_action.max_duration != NO_MAX_DURATION)
@@ -402,11 +470,13 @@ static void net_update_actions_state(double now, double delta)
       action->generic_action.finish = surf_get_clock();
       surf_network_model->action_state_set((surf_action_t) action,
                                            SURF_ACTION_DONE);
+      gap_remove(action);
     } else if ((action->generic_action.max_duration != NO_MAX_DURATION)
                && (action->generic_action.max_duration <= 0)) {
       action->generic_action.finish = surf_get_clock();
       surf_network_model->action_state_set((surf_action_t) action,
                                            SURF_ACTION_DONE);
+      gap_remove(action);
     }
   }
 
@@ -438,7 +508,7 @@ static void net_update_resource_state(void *id,
                                 (nw_link->lmm_resource.power.peak *
                                  nw_link->lmm_resource.power.scale));
 #ifdef HAVE_TRACING
-    TRACE_surf_link_set_bandwidth(date, nw_link,
+    TRACE_surf_link_set_bandwidth(date, nw_link->lmm_resource.generic_resource.name,
                                   sg_bandwidth_factor *
                                   (nw_link->lmm_resource.power.peak *
                                    nw_link->lmm_resource.power.scale));
@@ -480,9 +550,9 @@ static void net_update_resource_state(void *id,
                                                       action->lat_current)));
 
         if (action->rate < sg_tcp_gamma / (2.0 * action->lat_current)) {
-          INFO0("Flow is limited BYBANDWIDTH");
+          XBT_INFO("Flow is limited BYBANDWIDTH");
         } else {
-          INFO1("Flow is limited BYLATENCY, latency of flow is %f",
+          XBT_INFO("Flow is limited BYLATENCY, latency of flow is %f",
                 action->lat_current);
         }
       }
@@ -516,7 +586,7 @@ static void net_update_resource_state(void *id,
     if (tmgr_trace_event_free(event_type))
       nw_link->lmm_resource.state_event = NULL;
   } else {
-    CRITICAL0("Unknown event ! \n");
+    XBT_CRITICAL("Unknown event ! \n");
     xbt_abort();
   }
 
@@ -533,26 +603,29 @@ static surf_action_t net_communicate(const char *src_name,
   int failed = 0;
   surf_action_network_CM02_t action = NULL;
   double bandwidth_bound;
+  double latency=0.0;
   /* LARGE PLATFORMS HACK:
      Add a link_CM02_t *link and a int link_nb to network_card_CM02_t. It will represent local links for this node
      Use the cluster_id for ->id */
 
-  xbt_dynar_t route = global_routing->get_route(src_name, dst_name);
   xbt_dynar_t back_route = NULL;
   int constraints_per_variable = 0;
+  xbt_dynar_t route;
+  // I will need this route for some time so require for no cleanup
+  global_routing->get_route_latency(src_name, dst_name, &route, &latency, 0);
 
   if (sg_network_fullduplex == 1) {
-    back_route = global_routing->get_route(src_name, dst_name);
+    back_route = global_routing->get_route(dst_name, src_name);
   }
 
   /* LARGE PLATFORMS HACK:
      total_route_size = route_size + src->link_nb + dst->nb */
 
-  XBT_IN4("(%s,%s,%g,%g)", src_name, dst_name, size, rate);
+  XBT_IN("(%s,%s,%g,%g)", src_name, dst_name, size, rate);
   /* LARGE PLATFORMS HACK:
      assert on total_route_size */
-  xbt_assert2(xbt_dynar_length(route),
-              "You're trying to send data from %s to %s but there is no connection between these two hosts.",
+  xbt_assert(xbt_dynar_length(route) || latency,
+              "You're trying to send data from %s to %s but there is no connection at all between these two hosts.",
               src_name, dst_name);
 
   xbt_dynar_foreach(route, i, link) {
@@ -567,17 +640,15 @@ static surf_action_t net_communicate(const char *src_name,
 #ifdef HAVE_LATENCY_BOUND_TRACKING
   (action->generic_action).latency_limited = 0;
 #endif
+  action->weight = action->latency = latency;
 
   xbt_swag_insert(action, action->generic_action.state_set);
   action->rate = rate;
 
-  action->latency = 0.0;
-  action->weight = 0.0;
   bandwidth_bound = -1.0;
+
   xbt_dynar_foreach(route, i, link) {
-    action->latency += link->lat_current;
     action->weight +=
-        link->lat_current +
         sg_weight_S_parameter /
         (link->lmm_resource.power.peak * link->lmm_resource.power.scale);
     if (bandwidth_bound < 0.0)
@@ -599,6 +670,16 @@ static surf_action_t net_communicate(const char *src_name,
       (*bandwidth_constraint_callback) (action->rate, bandwidth_bound,
                                         size);
 
+  if(xbt_dynar_length(route) > 0) {
+    link = *(link_CM02_t*)xbt_dynar_get_ptr(route, 0);
+    gap_append(size, link, action);
+    XBT_DEBUG("Comm %p: %s -> %s gap=%f (lat=%f)",
+           action, src_name, dst_name, action->sender.gap, action->latency);
+  } else {
+    gap_unknown(action);
+  }
+
+
   /* LARGE PLATFORMS HACK:
      lmm_variable_new(..., total_route_size) */
   if (back_route != NULL) {
@@ -642,7 +723,7 @@ static surf_action_t net_communicate(const char *src_name,
   }
 
   if (sg_network_fullduplex == 1) {
-    DEBUG1("Fullduplex active adding backward flow using 5%c", '%');
+    XBT_DEBUG("Fullduplex active adding backward flow using 5%c", '%');
     xbt_dynar_foreach(back_route, i, link) {
       lmm_expand(network_maxmin_system, link->lmm_resource.constraint,
                  action->variable, .05);
@@ -651,14 +732,16 @@ static surf_action_t net_communicate(const char *src_name,
   /* LARGE PLATFORMS HACK:
      expand also with src->link and dst->link */
 #ifdef HAVE_TRACING
-  action->src_name = xbt_new(char, strlen(src_name) + 1);
-  strncpy(action->src_name, src_name, strlen(src_name) + 1);
-
-  action->dst_name = xbt_new(char, strlen(dst_name) + 1);
-  strncpy(action->dst_name, dst_name, strlen(dst_name) + 1);
+  if (TRACE_is_active()) {
+    action->src_name = xbt_strdup(src_name);
+    action->dst_name = xbt_strdup(dst_name);
+  } else {
+    action->src_name = action->dst_name = NULL;
+  }
 #endif
 
-  XBT_OUT;
+  xbt_dynar_free(&route);
+  XBT_OUT();
 
   return (surf_action_t) action;
 }
@@ -715,6 +798,13 @@ void net_action_set_max_duration(surf_action_t action, double duration)
   action->max_duration = duration;
 }
 
+#ifdef HAVE_TRACING
+static void net_action_set_category(surf_action_t action, const char *category)
+{
+  action->category = xbt_strdup (category);
+}
+#endif
+
 static void net_finalize(void)
 {
   surf_model_exit(surf_network_model);
@@ -750,7 +840,10 @@ static void surf_network_model_init_internal(void)
   surf_network_model->suspend = net_action_suspend;
   surf_network_model->resume = net_action_resume;
   surf_network_model->is_suspended = net_action_is_suspended;
-  surf_cpu_model->set_max_duration = net_action_set_max_duration;
+  surf_network_model->set_max_duration = net_action_set_max_duration;
+#ifdef HAVE_TRACING
+  surf_network_model->set_category = net_action_set_category;
+#endif
 
   surf_network_model->extension.network.communicate = net_communicate;
   surf_network_model->extension.network.get_route = net_get_route;
@@ -770,7 +863,8 @@ static void surf_network_model_init_internal(void)
                        net_link_new(xbt_strdup("__loopback__"),
                                     498000000, NULL, 0.000015, NULL,
                                     SURF_RESOURCE_ON, NULL,
-                                    SURF_LINK_FATPIPE, NULL));
+                                    SURF_LINK_FATPIPE, NULL),
+                      net_get_link_latency);
 }
 
 
@@ -791,6 +885,7 @@ void surf_network_model_init_SMPI(const char *filename)
   xbt_dynar_push(model_list, &surf_network_model);
   network_solve = lmm_solve;
 
+  xbt_cfg_setdefault_double(_surf_cfg_set, "network/sender_gap", 10e-6);
   xbt_cfg_setdefault_double(_surf_cfg_set, "network/weight_S", 8775);
 
   update_model_description(surf_network_model_description,