Implement Comm::wait_all_for (wait_all with a timeout).

author Arnaud Giersch <arnaud.giersch@univ-fcomte.fr>

Mon, 21 Jun 2021 15:04:20 +0000 (17:04 +0200)

committer Arnaud Giersch <arnaud.giersch@univ-fcomte.fr>

Tue, 22 Jun 2021 19:58:22 +0000 (21:58 +0200)
author Arnaud Giersch <arnaud.giersch@univ-fcomte.fr>
Mon, 21 Jun 2021 15:04:20 +0000 (17:04 +0200)
committer Arnaud Giersch <arnaud.giersch@univ-fcomte.fr>
Tue, 22 Jun 2021 19:58:22 +0000 (21:58 +0200)
diff --git a/ChangeLog b/ChangeLog

index dee30f0..1d8b498 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -8,6 +8,7 @@ New features:
     associated example (examples/cpp/plugin-prodcons).
  
  S4U:
     associated example (examples/cpp/plugin-prodcons).
  
  S4U:
+ - New: simgrid::s4u::Comm::wait_all_for() (like Comm::Wait_all, but with a timeout).
   - Fixed a bug where Activity::wait_for() killed the activity on timeout.
     Explicitly cancel the activity to get back to previous behavior.
  
   - Fixed a bug where Activity::wait_for() killed the activity on timeout.
     Explicitly cancel the activity to get back to previous behavior.
  
@@ -32,7 +33,7 @@ SMPI:
      It should now work with smpi/auto-shared-malloc-thresh.
    - Improve error handling and reporting in multiple places
    - Improve correctness checks on the MPI code.(MPI_Op and MPI_Datatype
      It should now work with smpi/auto-shared-malloc-thresh.
    - Improve error handling and reporting in multiple places
    - Improve correctness checks on the MPI code.(MPI_Op and MPI_Datatype
-    validity checks, truncated messages are now an error, return errors 
+    validity checks, truncated messages are now an error, return errors
      when explicitely deleted handles are reused, ...)
    - RMA: multiple fixes and stability improvements.
    - analysis (-analyze flag in smpirun):
      when explicitely deleted handles are reused, ...)
    - RMA: multiple fixes and stability improvements.
    - analysis (-analyze flag in smpirun):
diff --git a/include/simgrid/comm.h b/include/simgrid/comm.h

index 540cab4..093cf13 100644 (file)
--- a/include/simgrid/comm.h
+++ b/include/simgrid/comm.h
@@ -17,6 +17,7 @@ XBT_PUBLIC int sg_comm_test(sg_comm_t comm);
  XBT_PUBLIC sg_error_t sg_comm_wait(sg_comm_t comm);
  XBT_PUBLIC sg_error_t sg_comm_wait_for(sg_comm_t comm, double timeout);
  XBT_PUBLIC void sg_comm_wait_all(sg_comm_t* comms, size_t count);
  XBT_PUBLIC sg_error_t sg_comm_wait(sg_comm_t comm);
  XBT_PUBLIC sg_error_t sg_comm_wait_for(sg_comm_t comm, double timeout);
  XBT_PUBLIC void sg_comm_wait_all(sg_comm_t* comms, size_t count);
+XBT_PUBLIC size_t sg_comm_wait_all_for(sg_comm_t* comms, size_t count, double timeout);
  XBT_PUBLIC int sg_comm_wait_any_for(sg_comm_t* comms, size_t count, double timeout);
  XBT_PUBLIC int sg_comm_wait_any(sg_comm_t* comms, size_t count);
  XBT_PUBLIC void sg_comm_unref(sg_comm_t comm);
  XBT_PUBLIC int sg_comm_wait_any_for(sg_comm_t* comms, size_t count, double timeout);
  XBT_PUBLIC int sg_comm_wait_any(sg_comm_t* comms, size_t count);
  XBT_PUBLIC void sg_comm_unref(sg_comm_t comm);
diff --git a/include/simgrid/s4u/Comm.hpp b/include/simgrid/s4u/Comm.hpp

index 9fb2288..946b175 100644 (file)
--- a/include/simgrid/s4u/Comm.hpp
+++ b/include/simgrid/s4u/Comm.hpp
@@ -75,6 +75,9 @@ public:
  
    /*! take a vector s4u::CommPtr and return when all of them is finished. */
    static void wait_all(const std::vector<CommPtr>* comms);
  
    /*! take a vector s4u::CommPtr and return when all of them is finished. */
    static void wait_all(const std::vector<CommPtr>* comms);
+  /*! Same as wait_all, but with a timeout. Return the number of terminated comm (less than comms.size() if the timeout
+   * occurs). */
+  static size_t wait_all_for(const std::vector<CommPtr>* comms, double timeout);
    /*! take a vector s4u::CommPtr and return the rank of the first finished one (or -1 if none is done). */
    static int test_any(const std::vector<CommPtr>* comms);
  
    /*! take a vector s4u::CommPtr and return the rank of the first finished one (or -1 if none is done). */
    static int test_any(const std::vector<CommPtr>* comms);
  
diff --git a/src/s4u/s4u_Comm.cpp b/src/s4u/s4u_Comm.cpp

index 079626e..32d11e9 100644 (file)
--- a/src/s4u/s4u_Comm.cpp
+++ b/src/s4u/s4u_Comm.cpp
@@ -8,6 +8,7 @@
  
  #include "simgrid/Exception.hpp"
  #include "simgrid/s4u/Comm.hpp"
  
  #include "simgrid/Exception.hpp"
  #include "simgrid/s4u/Comm.hpp"
+#include "simgrid/s4u/Engine.hpp"
  #include "simgrid/s4u/Mailbox.hpp"
  
  #include <simgrid/comm.h>
  #include "simgrid/s4u/Mailbox.hpp"
  
  #include <simgrid/comm.h>
@@ -52,11 +53,31 @@ int Comm::wait_any_for(const std::vector<CommPtr>* comms, double timeout)
  void Comm::wait_all(const std::vector<CommPtr>* comms)
  {
    // TODO: this should be a simcall or something
  void Comm::wait_all(const std::vector<CommPtr>* comms)
  {
    // TODO: this should be a simcall or something
-  // TODO: we are missing a version with timeout
-  for (CommPtr comm : *comms)
+  for (auto& comm : *comms)
      comm->wait();
  }
  
      comm->wait();
  }
  
+size_t Comm::wait_all_for(const std::vector<CommPtr>* comms, double timeout)
+{
+  if (timeout < 0.0) {
+    wait_all(comms);
+    return comms->size();
+  }
+
+  double deadline = Engine::get_clock() + timeout;
+  std::vector<CommPtr> waited_comm(1, nullptr);
+  for (size_t i = 0; i < comms->size(); i++) {
+    double wait_timeout = std::max(0.0, deadline - Engine::get_clock());
+    waited_comm[0]      = (*comms)[i];
+    // Using wait_any_for() here (and not wait_for) because we don't want comms to be invalidated on timeout
+    if (wait_any_for(&waited_comm, wait_timeout) == -1) {
+      XBT_DEBUG("Timeout (%g): i = %zu", wait_timeout, i);
+      return i;
+    }
+  }
+  return comms->size();
+}
+
  CommPtr Comm::set_rate(double rate)
  {
    xbt_assert(state_ == State::INITED, "You cannot use %s() once your communication started (not implemented)",
  CommPtr Comm::set_rate(double rate)
  {
    xbt_assert(state_ == State::INITED, "You cannot use %s() once your communication started (not implemented)",
@@ -319,12 +340,20 @@ sg_error_t sg_comm_wait_for(sg_comm_t comm, double timeout)
  }
  
  void sg_comm_wait_all(sg_comm_t* comms, size_t count)
  }
  
  void sg_comm_wait_all(sg_comm_t* comms, size_t count)
+{
+  sg_comm_wait_all_for(comms, count, -1);
+}
+
+size_t sg_comm_wait_all_for(sg_comm_t* comms, size_t count, double timeout)
  {
    std::vector<simgrid::s4u::CommPtr> s4u_comms;
  {
    std::vector<simgrid::s4u::CommPtr> s4u_comms;
-  for (unsigned int i = 0; i < count; i++)
+  for (size_t i = 0; i < count; i++)
      s4u_comms.emplace_back(comms[i], false);
  
      s4u_comms.emplace_back(comms[i], false);
  
-  simgrid::s4u::Comm::wait_all(&s4u_comms);
+  size_t pos = simgrid::s4u::Comm::wait_all_for(&s4u_comms, timeout);
+  for (size_t i = pos; i < count; i++)
+    s4u_comms[i]->add_ref();
+  return pos;
  }
  
  int sg_comm_wait_any(sg_comm_t* comms, size_t count)
  }
  
  int sg_comm_wait_any(sg_comm_t* comms, size_t count)
author	Arnaud Giersch <arnaud.giersch@univ-fcomte.fr>
	Mon, 21 Jun 2021 15:04:20 +0000 (17:04 +0200)
committer	Arnaud Giersch <arnaud.giersch@univ-fcomte.fr>
	Tue, 22 Jun 2021 19:58:22 +0000 (21:58 +0200)
ChangeLog		patch \| blob \| history
include/simgrid/comm.h		patch \| blob \| history
include/simgrid/s4u/Comm.hpp		patch \| blob \| history
src/s4u/s4u_Comm.cpp		patch \| blob \| history