Fix multicore ptask with communications

author Bruno Donassolo <bruno.donassolo@inria.fr>

Fri, 24 Dec 2021 18:28:52 +0000 (19:28 +0100)

committer Bruno Donassolo <bruno.donassolo@inria.fr>

Fri, 24 Dec 2021 18:45:31 +0000 (19:45 +0100)
author Bruno Donassolo <bruno.donassolo@inria.fr>
Fri, 24 Dec 2021 18:28:52 +0000 (19:28 +0100)
committer Bruno Donassolo <bruno.donassolo@inria.fr>
Fri, 24 Dec 2021 18:45:31 +0000 (19:45 +0100)
diff --git a/src/surf/ptask_L07.cpp b/src/surf/ptask_L07.cpp

index 60f1625..fcdc0eb 100644 (file)
--- a/src/surf/ptask_L07.cpp
+++ b/src/surf/ptask_L07.cpp
@@ -191,16 +191,10 @@ L07Action::L07Action(Model* model, const std::vector<s4u::Host*>& host_list, con
  
    /* Expand it for the CPUs even if there is nothing to compute, to make sure that it gets expended even if there is no
     * communication either */
-  double bound = std::numeric_limits<double>::max();
    for (size_t i = 0; i < host_list.size(); i++) {
      model->get_maxmin_system()->expand(host_list[i]->get_cpu()->get_constraint(), get_variable(),
                                         (flops_amount == nullptr ? 0.0 : flops_amount[i]));
-    if (flops_amount && flops_amount[i] > 0)
-      bound = std::min(bound, host_list[i]->get_cpu()->get_speed(1.0) * host_list[i]->get_cpu()->get_speed_ratio() /
-                                  flops_amount[i]);
    }
-  if (bound < std::numeric_limits<double>::max())
-    model->get_maxmin_system()->update_variable_bound(get_variable(), bound);
  
    if (bytes_amount != nullptr) {
      for (size_t k = 0; k < host_list.size() * host_list.size(); k++) {
@@ -218,6 +212,8 @@ L07Action::L07Action(Model* model, const std::vector<s4u::Host*>& host_list, con
      this->set_cost(1.0);
      this->set_remains(0.0);
    }
+  /* finally calculate the initial bound value */
+  updateBound();
  }
  
  Action* NetworkL07Model::communicate(s4u::Host* src, s4u::Host* dst, double size, double rate)
@@ -285,10 +281,10 @@ void CpuL07::on_speed_change()
    const lmm::Element* elem = nullptr;
  
    get_model()->get_maxmin_system()->update_constraint_bound(get_constraint(), get_core_count() * speed_.peak * speed_.scale);
-  while (const auto* var = get_constraint()->get_variable(&elem)) {
-    const Action* action = var->get_id();
  
-    get_model()->get_maxmin_system()->update_variable_bound(action->get_variable(), speed_.scale * speed_.peak);
+  while (const auto* var = get_constraint()->get_variable(&elem)) {
+    auto* action = static_cast<L07Action*>(var->get_id());
+    action->updateBound();
    }
  
    CpuImpl::on_speed_change();
@@ -378,9 +374,10 @@ L07Action::~L07Action()
    }
  }
  
-void L07Action::updateBound()
+double L07Action::calculateNetworkBound()
  {
    double lat_current = 0.0;
+  double lat_bound   = std::numeric_limits<double>::max();
  
    size_t host_count = hostList_.size();
  
@@ -397,13 +394,36 @@ void L07Action::updateBound()
        }
      }
    }
-  double lat_bound = NetworkModel::cfg_tcp_gamma / (2.0 * lat_current);
-  XBT_DEBUG("action (%p) : lat_bound = %g", this, lat_bound);
-  if ((latency_ <= 0.0) && is_running()) {
+  if (lat_current > 0) {
+    lat_bound = NetworkModel::cfg_tcp_gamma / (2.0 * lat_current);
+  }
+  return lat_bound;
+}
+
+double L07Action::calculateCpuBound()
+{
+  double cpu_bound = std::numeric_limits<double>::max();
+  for (size_t i = 0; i < hostList_.size(); i++) {
+    if (computationAmount_ && computationAmount_[i] > 0) {
+      cpu_bound = std::min(cpu_bound, hostList_[i]->get_cpu()->get_speed(1.0) *
+                                          hostList_[i]->get_cpu()->get_speed_ratio() / computationAmount_[i]);
+    }
+  }
+  return cpu_bound;
+}
+
+void L07Action::updateBound()
+{
+  double bound = std::min(calculateNetworkBound(), calculateCpuBound());
+
+  XBT_DEBUG("action (%p) : bound = %g", this, bound);
+
+  /* latency has been paid (or no latency), we can set the appropriate bound for multicore or network limit */
+  if ((bound < std::numeric_limits<double>::max()) && (latency_ <= 0.0)) {
      if (rate_ < 0)
-      get_model()->get_maxmin_system()->update_variable_bound(get_variable(), lat_bound);
+      get_model()->get_maxmin_system()->update_variable_bound(get_variable(), bound);
      else
-      get_model()->get_maxmin_system()->update_variable_bound(get_variable(), std::min(rate_, lat_bound));
+      get_model()->get_maxmin_system()->update_variable_bound(get_variable(), std::min(rate_, bound));
    }
  }
  
diff --git a/src/surf/ptask_L07.hpp b/src/surf/ptask_L07.hpp

index 7187117..7e39862 100644 (file)
--- a/src/surf/ptask_L07.hpp
+++ b/src/surf/ptask_L07.hpp
@@ -131,6 +131,21 @@ class L07Action : public CpuAction {
    friend CpuAction* HostL07Model::execute_parallel(const std::vector<s4u::Host*>& host_list, const double* flops_amount,
                                                     const double* bytes_amount, double rate);
    friend Action* NetworkL07Model::communicate(s4u::Host* src, s4u::Host* dst, double size, double rate);
+  /**
+   * @brief Calculate the CPU bound for the parallel task
+   *
+   * The task is bounded by the slowest CPU running the ptask, considering the current pstate of each CPU.
+   * Return MAX_DOUBLE if ptask has no computation.
+   */
+  double calculateCpuBound();
+
+  /**
+   * @brief Calculate the network bound for the parallel task
+   *
+   * The network bound depends on the largest latency between the communication in the ptask.
+   * Return MAX_DOUBLE if latency is 0 (or ptask doesn't have any communication)
+   */
+  double calculateNetworkBound();
  
  public:
    L07Action() = delete;
author	Bruno Donassolo <bruno.donassolo@inria.fr>
	Fri, 24 Dec 2021 18:28:52 +0000 (19:28 +0100)
committer	Bruno Donassolo <bruno.donassolo@inria.fr>
	Fri, 24 Dec 2021 18:45:31 +0000 (19:45 +0100)
src/surf/ptask_L07.cpp		patch \| blob \| history
src/surf/ptask_L07.hpp		patch \| blob \| history