Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Dynamic factors: implement and test
authorBruno Donassolo <bruno.donassolo@inria.fr>
Mon, 17 May 2021 17:26:18 +0000 (19:26 +0200)
committerBruno Donassolo <bruno.donassolo@inria.fr>
Mon, 24 May 2021 07:37:16 +0000 (09:37 +0200)
Fully implements the dynamic factors through the NetworkModelIntf API.

Added an example showing its usage.

Added UT and doc.

MANIFEST.in
examples/cpp/CMakeLists.txt
examples/cpp/network-factors/s4u-network-factors.cpp [new file with mode: 0644]
examples/cpp/network-factors/s4u-network-factors.tesh [new file with mode: 0644]
include/simgrid/kernel/resource/NetworkModelIntf.hpp
src/kernel/resource/NetworkModelIntf_test.cpp [new file with mode: 0644]
src/surf/network_cm02.cpp
tools/cmake/Tests.cmake

index c264244..c73898a 100644 (file)
@@ -274,6 +274,8 @@ include examples/cpp/mc-electric-fence/s4u-mc-electric-fence.cpp
 include examples/cpp/mc-electric-fence/s4u-mc-electric-fence.tesh
 include examples/cpp/mc-failing-assert/s4u-mc-failing-assert.cpp
 include examples/cpp/mc-failing-assert/s4u-mc-failing-assert.tesh
+include examples/cpp/network-factors/s4u-network-factors.cpp
+include examples/cpp/network-factors/s4u-network-factors.tesh
 include examples/cpp/network-ns3-wifi/s4u-network-ns3-wifi.cpp
 include examples/cpp/network-ns3-wifi/s4u-network-ns3-wifi.tesh
 include examples/cpp/network-ns3/3hosts_2links_d.xml
@@ -2203,6 +2205,7 @@ include src/kernel/resource/Action.cpp
 include src/kernel/resource/DiskImpl.cpp
 include src/kernel/resource/DiskImpl.hpp
 include src/kernel/resource/Model.cpp
+include src/kernel/resource/NetworkModelIntf_test.cpp
 include src/kernel/resource/profile/DatedValue.cpp
 include src/kernel/resource/profile/DatedValue.hpp
 include src/kernel/resource/profile/Event.hpp
index 1c58818..0260d9c 100644 (file)
@@ -80,7 +80,7 @@ foreach (example actor-create actor-daemon actor-exiting actor-join actor-kill
                  replay-comm replay-io
                  routing-get-clusters
                  synchro-barrier synchro-condition-variable synchro-condition-variable-waituntil synchro-mutex synchro-semaphore
-                 clusters-multicpu)
+                 clusters-multicpu network-factors)
 
   # Use default source file unless specified otherwise
   if(NOT DEFINED _${example}_sources)
diff --git a/examples/cpp/network-factors/s4u-network-factors.cpp b/examples/cpp/network-factors/s4u-network-factors.cpp
new file mode 100644 (file)
index 0000000..a5a5c9a
--- /dev/null
@@ -0,0 +1,234 @@
+/* Copyright (c) 2010-2021. The SimGrid Team. All rights reserved.          */
+
+/* This program is free software; you can redistribute it and/or modify it
+ * under the terms of the license (GNU LGPL) which comes with this package. */
+
+/* This example shows how to build set customized communication factors
+ *
+ * It uses the interface provided by NetworkModelIntf to register 2 callbacks that
+ * are called everytime a communication occurs.
+ *
+ * These factors are used to change the communication time depending on the message size
+ * and destination.
+ *
+ * This example uses factors obtained by some experiments on dahu cluster in Grid'5000.
+ * You must change the values according to the calibration of your enviroment.
+ */
+
+#include <map>
+#include <simgrid/kernel/resource/NetworkModelIntf.hpp>
+#include <simgrid/s4u.hpp>
+namespace sg4 = simgrid::s4u;
+
+XBT_LOG_NEW_DEFAULT_CATEGORY(s4u_network_factors, "Messages specific for this s4u example");
+
+/* Factors used in this platform, for remote and local communications
+ * Obtained from dahu cluster. Obs.: just an example, change the values according
+ * to the calibration on your environment */
+static const std::map<double, double> REMOTE_BW_FACTOR = {
+    {0, 1.0000000000000002},         {8000, 1.0000000000000002},     {15798, 0.07435006650635523},
+    {64000, 0.3163352696348148},     {6000000, 0.13003278960133288}, {42672591, 0.10354740223279707},
+    {160097505, 0.40258935729656503}};
+static const std::map<double, double> LOCAL_BW_FACTOR = {{0, 0.17591906192813994},
+                                                         {16000, 0.12119203247138953},
+                                                         {6000000, 0.07551057012803415},
+                                                         {36900419, 0.04281516758309203},
+                                                         {160097505, 0.17440518795992602}};
+
+static const std::map<double, double> REMOTE_LAT_FACTOR = {{0, 0.0},
+                                                           {8000, 1731.7102918851567},
+                                                           {15798, 1441.073993161278},
+                                                           {64000, 1761.4784830658123},
+                                                           {6000000, 0.0},
+                                                           {42672591, 0.0},
+                                                           {160097505, 970913.4558162984}};
+static const std::map<double, double> LOCAL_LAT_FACTOR  = {
+    {0, 0.0}, {16000, 650.2212383180362}, {6000000, 0.0}, {36900419, 0.0}, {160097505, 1017885.3518765072}};
+
+/* bandwidth and latency used on the platform */
+constexpr static double BW_REMOTE = 12.5e9;
+constexpr static double BW_LOCAL  = 25e9;
+constexpr static double LATENCY   = .1e-6;
+
+/*************************************************************************************************/
+/** @brief Create a simple platform based on Dahu cluster */
+static void load_platform(const sg4::Engine& e)
+{
+  /**
+   * Inspired on dahu cluster on Grenoble
+   *     ________________
+   *     |               |
+   *     |     dahu      |
+   *     |_______________|
+   *     / /   | |    \ \
+   *    / /    | |     \ \     <-- 12.5GBps links
+   *   / /     | |      \ \
+   * host1     ...      hostN
+   */
+
+  auto* root         = sg4::create_star_zone("dahu");
+  std::string prefix = "dahu-", suffix = ".grid5000.fr";
+
+  for (int id = 0; id < 32; id++) {
+    std::string hostname = prefix + std::to_string(id) + suffix;
+    /* create host */
+    sg4::Host* host = root->create_host(hostname, 1)->set_core_count(32)->seal();
+    /* create UP/DOWN link */
+    sg4::Link* l_up   = root->create_link(hostname + "_up", BW_REMOTE)->set_latency(LATENCY)->seal();
+    sg4::Link* l_down = root->create_link(hostname + "_down", BW_REMOTE)->set_latency(LATENCY)->seal();
+
+    /* add link UP/DOWN for communications from the host */
+    root->add_route(host->get_netpoint(), nullptr, nullptr, nullptr, std::vector<sg4::Link*>{l_up}, false);
+    root->add_route(nullptr, host->get_netpoint(), nullptr, nullptr, std::vector<sg4::Link*>{l_down}, false);
+
+    sg4::Link* loopback = root->create_link(hostname + "_loopback", BW_LOCAL)->set_latency(LATENCY)->seal();
+    root->add_route(host->get_netpoint(), host->get_netpoint(), nullptr, nullptr, std::vector<sg4::Link*>{loopback});
+  }
+
+  root->seal();
+}
+
+/*************************************************************************************************/
+/** @brief Auxiliary method to get factor for a message size */
+static double get_factor_from_map(const std::map<double, double>& factors, double size)
+{
+  double factor = 1.0;
+  for (auto const& fact : factors) {
+    if (size < fact.first) {
+      break;
+    } else {
+      factor = fact.second;
+    }
+  }
+  return factor;
+}
+
+/**
+ * @brief Callback to set latency factor for a communication
+ *
+ * Set different factors for local (loopback) and remote communications.
+ * Function signature is defined by API
+ *
+ * @param size Message size
+ * @param src Host origin
+ * @param dst Host destination
+ */
+static double latency_factor_cb(double size, const sg4::Host* src, const sg4::Host* dst,
+                                const std::vector<sg4::Link*>& /*links*/,
+                                const std::unordered_set<sg4::NetZone*>& /*netzones*/)
+{
+  if (src->get_name() == dst->get_name()) {
+    /* local communication factors */
+    return get_factor_from_map(LOCAL_LAT_FACTOR, size);
+  } else {
+    return get_factor_from_map(REMOTE_LAT_FACTOR, size);
+  }
+}
+
+/**
+ * @brief Callback to set bandwidth factor for a communication
+ *
+ * Set different factors for local (loopback) and remote communications.
+ * Function signature is defined by API
+ *
+ * @param size Message size
+ * @param src Host origin
+ * @param dst Host destination
+ */
+static double bandwidth_factor_cb(double size, const sg4::Host* src, const sg4::Host* dst,
+                                  const std::vector<sg4::Link*>& /*links*/,
+                                  const std::unordered_set<sg4::NetZone*>& /*netzones*/)
+{
+  if (src->get_name() == dst->get_name()) {
+    /* local communication factors */
+    return get_factor_from_map(LOCAL_BW_FACTOR, size);
+  } else {
+    return get_factor_from_map(REMOTE_BW_FACTOR, size);
+  }
+}
+
+/*************************************************************************************************/
+class Sender {
+  std::vector<sg4::Host*> hosts_;
+
+public:
+  explicit Sender(const std::vector<sg4::Host*>& hosts) : hosts_{hosts} {}
+  void operator()() const
+  {
+    const std::vector<double> msg_sizes = {64e3, 64e6, 64e9}; // 64KB, 64MB, 64GB
+
+    for (double size : msg_sizes) {
+      for (const auto* host : hosts_) {
+        std::string msg;
+        /* calculating the estimated communication time depending of message size and destination */
+        if (host->get_name() == sg4::this_actor::get_host()->get_name()) {
+          double lat_factor = get_factor_from_map(LOCAL_LAT_FACTOR, size);
+          double bw_factor  = get_factor_from_map(LOCAL_BW_FACTOR, size);
+          double est_time   = sg4::Engine::get_clock() + size / (BW_LOCAL * bw_factor) + LATENCY * lat_factor;
+
+          msg = "Local communication: size=" + std::to_string(size) + ". Use bw_factor=" + std::to_string(bw_factor) +
+                " lat_factor=" + std::to_string(lat_factor) + ". Estimated finished time=" + std::to_string(est_time);
+        } else {
+          double lat_factor = get_factor_from_map(REMOTE_LAT_FACTOR, size);
+          double bw_factor  = get_factor_from_map(REMOTE_BW_FACTOR, size);
+          double est_time   = sg4::Engine::get_clock() + (size / (BW_REMOTE * bw_factor)) + LATENCY * lat_factor * 2;
+          msg = "Remote communication: size=" + std::to_string(size) + ". Use bw_factor=" + std::to_string(bw_factor) +
+                " lat_factor=" + std::to_string(lat_factor) + ". Estimated finished time=" + std::to_string(est_time);
+        }
+
+        /* Create a communication representing the ongoing communication */
+        auto mbox     = sg4::Mailbox::by_name(host->get_name());
+        auto* payload = new std::string(msg);
+        mbox->put(payload, size);
+      }
+    }
+
+    XBT_INFO("Done dispatching all messages");
+    /* sending message to stop receivers */
+    for (const auto* host : hosts_) {
+      auto mbox = sg4::Mailbox::by_name(host->get_name());
+      mbox->put(new std::string("finalize"), 0);
+    }
+  }
+};
+
+/* Receiver actor: wait for messages on the mailbox identified by the hostname */
+class Receiver {
+public:
+  void operator()() const
+  {
+    auto mbox = sg4::Mailbox::by_name(sg4::this_actor::get_host()->get_name());
+    // Receiving the message was all we were supposed to do
+    for (bool cont = true; cont;) {
+      auto received = mbox->get_unique<std::string>();
+      XBT_INFO("I got a '%s'.", received->c_str());
+      cont = (*received != "finalize"); // If it's a finalize message, we're done
+    }
+  }
+};
+
+/*************************************************************************************************/
+int main(int argc, char* argv[])
+{
+  sg4::Engine e(&argc, argv);
+  /* setting network model to default one */
+  e.set_config("network/model:CM02");
+
+  /* create platform */
+  load_platform(e);
+  /* setting network factors callbacks */
+  simgrid::kernel::resource::NetworkModelIntf* model = e.get_netzone_root()->get_network_model();
+  model->set_lat_factor_cb(latency_factor_cb);
+  model->set_bw_factor_cb(bandwidth_factor_cb);
+
+  sg4::Host* host        = e.host_by_name("dahu-1.grid5000.fr");
+  sg4::Host* host_remote = e.host_by_name("dahu-10.grid5000.fr");
+  sg4::Actor::create(std::string("receiver-local"), host, Receiver());
+  sg4::Actor::create(std::string("receiver-remote"), host_remote, Receiver());
+  sg4::Actor::create(std::string("sender") + std::string(host->get_name()), host, Sender({host, host_remote}));
+
+  /* runs the simulation */
+  e.run();
+
+  return 0;
+}
diff --git a/examples/cpp/network-factors/s4u-network-factors.tesh b/examples/cpp/network-factors/s4u-network-factors.tesh
new file mode 100644 (file)
index 0000000..86d7a07
--- /dev/null
@@ -0,0 +1,11 @@
+$ ${bindir:=.}/s4u-network-factors "--log=root.fmt:[%10.6r]%e(%i:%a@%h)%e%m%n"
+> [  0.000000] (0:maestro@) Configuration change: Set 'network/model' to 'CM02'
+> [  0.000086] (1:receiver-local@dahu-1.grid5000.fr) I got a 'Local communication: size=64000.000000. Use bw_factor=0.121192 lat_factor=650.221238. Estimated finished time=0.000086'.
+> [  0.000455] (2:receiver-remote@dahu-10.grid5000.fr) I got a 'Remote communication: size=64000.000000. Use bw_factor=0.316335 lat_factor=1761.478483. Estimated finished time=0.000455'.
+> [  0.060247] (1:receiver-local@dahu-1.grid5000.fr) I got a 'Local communication: size=64000000.000000. Use bw_factor=0.042815 lat_factor=0.000000. Estimated finished time=0.060247'.
+> [  0.109692] (2:receiver-remote@dahu-10.grid5000.fr) I got a 'Remote communication: size=64000000.000000. Use bw_factor=0.103547 lat_factor=0.000000. Estimated finished time=0.109692'.
+> [ 14.889943] (1:receiver-local@dahu-1.grid5000.fr) I got a 'Local communication: size=64000000000.000000. Use bw_factor=0.174405 lat_factor=1017885.351877. Estimated finished time=14.889943'.
+> [ 27.801800] (2:receiver-remote@dahu-10.grid5000.fr) I got a 'Remote communication: size=64000000000.000000. Use bw_factor=0.402589 lat_factor=970913.455816. Estimated finished time=27.801800'.
+> [ 27.801800] (3:senderdahu-1.grid5000.fr@dahu-1.grid5000.fr) Done dispatching all messages
+> [ 27.801800] (1:receiver-local@dahu-1.grid5000.fr) I got a 'finalize'.
+> [ 27.801800] (2:receiver-remote@dahu-10.grid5000.fr) I got a 'finalize'.
index bafd98e..129009f 100644 (file)
@@ -6,8 +6,7 @@
 #ifndef SIMGRID_KERNEL_RESOURCE_NETWORKMODELINTF_HPP
 #define SIMGRID_KERNEL_RESOURCE_NETWORKMODELINTF_HPP
 
-#include <simgrid/s4u/Link.hpp>
-#include <simgrid/s4u/NetZone.hpp>
+#include <simgrid/forward.h>
 
 #include <unordered_set>
 #include <vector>
@@ -18,14 +17,30 @@ namespace resource {
 
 /** @ingroup SURF_interface
  * @brief Network Model interface class
- * @details Defines the methods that a Network model must implement
  */
 class XBT_PUBLIC NetworkModelIntf {
 public:
-  using NetworkFactorCb     = double(double size, const std::vector<s4u::Link*>& links,
+  /**
+   * @brief Callback to set the bandwidth and latency factors used in a communication
+   *
+   * This callback offers more flexibility when setting the network factors.
+   * It is an alternative to SimGrid's configs, such as network/latency-factors
+   * and network/bandwidth-factors.
+   *
+   * @param size Communication size in bytes
+   * @param src Source host
+   * @param dst Destination host
+   * @param links Vectors with the links used in this comm
+   * @param netzones Set with NetZones involved in the comm
+   * @return Multiply factor
+   */
+  using NetworkFactorCb = double(double size, const s4u::Host* src, const s4u::Host* dst,
+                                 const std::vector<s4u::Link*>& links,
                                  const std::unordered_set<s4u::NetZone*>& netzones);
-  virtual void set_lat_factor_cb(const std::function<NetworkFactorCb>& cb)        = 0;
-  virtual void set_bw_factor_cb(const std::function<NetworkFactorCb>& cb)         = 0;
+  /** @brief Configure the latency factor callback */
+  virtual void set_lat_factor_cb(const std::function<NetworkFactorCb>& cb) = 0;
+  /** @brief Configure the bandwidth factor callback */
+  virtual void set_bw_factor_cb(const std::function<NetworkFactorCb>& cb) = 0;
 };
 
 } // namespace resource
diff --git a/src/kernel/resource/NetworkModelIntf_test.cpp b/src/kernel/resource/NetworkModelIntf_test.cpp
new file mode 100644 (file)
index 0000000..b4eb9f3
--- /dev/null
@@ -0,0 +1,73 @@
+/* Copyright (c) 2017-2021. The SimGrid Team. All rights reserved.               */
+
+/* This program is free software; you can redistribute it and/or modify it
+ * under the terms of the license (GNU LGPL) which comes with this package. */
+
+#include "catch.hpp"
+
+#include "simgrid/kernel/resource/NetworkModelIntf.hpp"
+#include "simgrid/s4u/Engine.hpp"
+#include "simgrid/sg_config.hpp"
+
+static double factor_cb(double, const simgrid::s4u::Host*, const simgrid::s4u::Host*,
+                        const std::vector<simgrid::s4u::Link*>&, const std::unordered_set<simgrid::s4u::NetZone*>&)
+{
+  return 1.0;
+}
+
+TEST_CASE("kernel::resource::NetworkModelIntf: Factors invalid callbacks: exception", "")
+{
+  for (const auto& model : std::vector<std::string>{"LV08", "SMPI", "IB", "CM02"}) {
+    _sg_cfg_init_status = 0; /* HACK: clear config global to be able to do set_config in UTs */
+    simgrid::s4u::Engine e("test");
+    e.set_config("network/model:" + model);
+    simgrid::s4u::create_full_zone("root");
+
+    SECTION("Model: " + model)
+    {
+      simgrid::kernel::resource::NetworkModelIntf* model = e.get_netzone_root()->get_network_model();
+      REQUIRE_THROWS_AS(model->set_lat_factor_cb({}), std::invalid_argument);
+      REQUIRE_THROWS_AS(model->set_lat_factor_cb(nullptr), std::invalid_argument);
+      REQUIRE_THROWS_AS(model->set_bw_factor_cb({}), std::invalid_argument);
+      REQUIRE_THROWS_AS(model->set_bw_factor_cb(nullptr), std::invalid_argument);
+    }
+  }
+}
+
+TEST_CASE("kernel::resource::NetworkModelIntf: Invalid network/latency-factor and network/bandwidth-factor", "")
+{
+  for (const auto& model : std::vector<std::string>{"LV08", "CM02"}) {
+    _sg_cfg_init_status = 0; /* HACK: clear config global to be able to do set_config in UTs */
+    simgrid::s4u::Engine e("test");
+    e.set_config("network/model:" + model);
+    e.set_config("network/latency-factor:10");
+    e.set_config("network/bandwidth-factor:0.3");
+    simgrid::s4u::create_full_zone("root");
+
+    SECTION("Model: " + model)
+    {
+      simgrid::kernel::resource::NetworkModelIntf* model = e.get_netzone_root()->get_network_model();
+      REQUIRE_THROWS_AS(model->set_lat_factor_cb(factor_cb), std::invalid_argument);
+      REQUIRE_THROWS_AS(model->set_bw_factor_cb(factor_cb), std::invalid_argument);
+    }
+  }
+}
+
+TEST_CASE("kernel::resource::NetworkModelIntf: Invalid smpi/lat-factor and smpi/bw-factor", "")
+{
+  for (const auto& model : std::vector<std::string>{"SMPI", "IB"}) {
+    _sg_cfg_init_status = 0; /* HACK: clear config global to be able to do set_config in UTs */
+    simgrid::s4u::Engine e("test");
+    e.set_config("network/model:" + model);
+    e.set_config("smpi/lat-factor:65472:0.940694;15424:0.697866;9376:0.58729;5776:1.08739;3484:0.77493");
+    e.set_config("smpi/bw-factor:65472:11.6436;15424:3.48845");
+    simgrid::s4u::create_full_zone("root");
+
+    SECTION("Model: " + model)
+    {
+      simgrid::kernel::resource::NetworkModelIntf* model = e.get_netzone_root()->get_network_model();
+      REQUIRE_THROWS_AS(model->set_lat_factor_cb(factor_cb), std::invalid_argument);
+      REQUIRE_THROWS_AS(model->set_bw_factor_cb(factor_cb), std::invalid_argument);
+    }
+  }
+}
index 08bd06a..9c88a3f 100644 (file)
@@ -204,7 +204,9 @@ Action* NetworkCm02Model::communicate(s4u::Host* src, s4u::Host* dst, double siz
   double latency = 0.0;
   std::vector<LinkImpl*> back_route;
   std::vector<LinkImpl*> route;
+  std::vector<s4u::Link*> s4u_route;
   std::unordered_set<kernel::routing::NetZoneImpl*> netzones;
+  std::unordered_set<s4u::NetZone*> s4u_netzones;
 
   XBT_IN("(%s,%s,%g,%g)", src->get_cname(), dst->get_cname(), size, rate);
 
@@ -268,14 +270,37 @@ Action* NetworkCm02Model::communicate(s4u::Host* src, s4u::Host* dst, double siz
         });
   }
 
-  double bandwidth_bound = route.empty() ? -1.0 : get_bandwidth_factor(size) * route.front()->get_bandwidth();
+  /* transform data to user structures if necessary */
+  if (lat_factor_cb_ || bw_factor_cb_) {
+    std::for_each(route.begin(), route.end(), [&s4u_route](LinkImpl* l) { s4u_route.push_back(l->get_iface()); });
+    std::for_each(netzones.begin(), netzones.end(),
+                  [&s4u_netzones](kernel::routing::NetZoneImpl* n) { s4u_netzones.insert(n->get_iface()); });
+  }
+  double bw_factor;
+  if (bw_factor_cb_) {
+    bw_factor = bw_factor_cb_(size, src, dst, s4u_route, s4u_netzones);
+  } else {
+    bw_factor = get_bandwidth_factor(size);
+  }
+
+  double bandwidth_bound = route.empty() ? -1.0 : bw_factor * route.front()->get_bandwidth();
 
   for (auto const& link : route)
-    bandwidth_bound = std::min(bandwidth_bound, get_bandwidth_factor(size) * link->get_bandwidth());
+    bandwidth_bound = std::min(bandwidth_bound, bw_factor * link->get_bandwidth());
 
   action->lat_current_ = action->latency_;
-  action->latency_ *= get_latency_factor(size);
-  action->set_user_bound(get_bandwidth_constraint(action->get_user_bound(), bandwidth_bound, size));
+  if (lat_factor_cb_) {
+    action->latency_ *= lat_factor_cb_(size, src, dst, s4u_route, s4u_netzones);
+  } else {
+    action->latency_ *= get_latency_factor(size);
+  }
+
+  if (bw_constraint_cb_) {
+    action->set_user_bound(
+        bw_constraint_cb_(action->get_user_bound(), bandwidth_bound, size, src, dst, s4u_route, s4u_netzones));
+  } else {
+    action->set_user_bound(get_bandwidth_constraint(action->get_user_bound(), bandwidth_bound, size));
+  }
 
   size_t constraints_per_variable = route.size();
   constraints_per_variable += back_route.size();
index b732dc3..2c6ea4c 100644 (file)
@@ -123,6 +123,7 @@ ENDIF()
 
 # New tests should use the Catch Framework
 set(UNIT_TESTS  src/xbt/unit-tests_main.cpp
+                src/kernel/resource/NetworkModelIntf_test.cpp
                 src/kernel/resource/profile/Profile_test.cpp
                 src/kernel/routing/DijkstraZone_test.cpp
                 src/kernel/routing/DragonflyZone_test.cpp