From: Arnaud Giersch Date: Mon, 21 Jun 2021 15:04:20 +0000 (+0200) Subject: Implement Comm::wait_all_for (wait_all with a timeout). X-Git-Tag: v3.28~79 X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/commitdiff_plain/e39b010eca568061812e3981dd439ca98fa0b9b4 Implement Comm::wait_all_for (wait_all with a timeout). --- diff --git a/ChangeLog b/ChangeLog index dee30f0f5e..1d8b498cc6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -8,6 +8,7 @@ New features: associated example (examples/cpp/plugin-prodcons). S4U: + - New: simgrid::s4u::Comm::wait_all_for() (like Comm::Wait_all, but with a timeout). - Fixed a bug where Activity::wait_for() killed the activity on timeout. Explicitly cancel the activity to get back to previous behavior. @@ -32,7 +33,7 @@ SMPI: It should now work with smpi/auto-shared-malloc-thresh. - Improve error handling and reporting in multiple places - Improve correctness checks on the MPI code.(MPI_Op and MPI_Datatype - validity checks, truncated messages are now an error, return errors + validity checks, truncated messages are now an error, return errors when explicitely deleted handles are reused, ...) - RMA: multiple fixes and stability improvements. - analysis (-analyze flag in smpirun): diff --git a/include/simgrid/comm.h b/include/simgrid/comm.h index 540cab4b07..093cf1306d 100644 --- a/include/simgrid/comm.h +++ b/include/simgrid/comm.h @@ -17,6 +17,7 @@ XBT_PUBLIC int sg_comm_test(sg_comm_t comm); XBT_PUBLIC sg_error_t sg_comm_wait(sg_comm_t comm); XBT_PUBLIC sg_error_t sg_comm_wait_for(sg_comm_t comm, double timeout); XBT_PUBLIC void sg_comm_wait_all(sg_comm_t* comms, size_t count); +XBT_PUBLIC size_t sg_comm_wait_all_for(sg_comm_t* comms, size_t count, double timeout); XBT_PUBLIC int sg_comm_wait_any_for(sg_comm_t* comms, size_t count, double timeout); XBT_PUBLIC int sg_comm_wait_any(sg_comm_t* comms, size_t count); XBT_PUBLIC void sg_comm_unref(sg_comm_t comm); diff --git a/include/simgrid/s4u/Comm.hpp b/include/simgrid/s4u/Comm.hpp index 9fb2288747..946b175274 100644 --- a/include/simgrid/s4u/Comm.hpp +++ b/include/simgrid/s4u/Comm.hpp @@ -75,6 +75,9 @@ public: /*! take a vector s4u::CommPtr and return when all of them is finished. */ static void wait_all(const std::vector* comms); + /*! Same as wait_all, but with a timeout. Return the number of terminated comm (less than comms.size() if the timeout + * occurs). */ + static size_t wait_all_for(const std::vector* comms, double timeout); /*! take a vector s4u::CommPtr and return the rank of the first finished one (or -1 if none is done). */ static int test_any(const std::vector* comms); diff --git a/src/s4u/s4u_Comm.cpp b/src/s4u/s4u_Comm.cpp index 079626e2cd..32d11e9d20 100644 --- a/src/s4u/s4u_Comm.cpp +++ b/src/s4u/s4u_Comm.cpp @@ -8,6 +8,7 @@ #include "simgrid/Exception.hpp" #include "simgrid/s4u/Comm.hpp" +#include "simgrid/s4u/Engine.hpp" #include "simgrid/s4u/Mailbox.hpp" #include @@ -52,11 +53,31 @@ int Comm::wait_any_for(const std::vector* comms, double timeout) void Comm::wait_all(const std::vector* comms) { // TODO: this should be a simcall or something - // TODO: we are missing a version with timeout - for (CommPtr comm : *comms) + for (auto& comm : *comms) comm->wait(); } +size_t Comm::wait_all_for(const std::vector* comms, double timeout) +{ + if (timeout < 0.0) { + wait_all(comms); + return comms->size(); + } + + double deadline = Engine::get_clock() + timeout; + std::vector waited_comm(1, nullptr); + for (size_t i = 0; i < comms->size(); i++) { + double wait_timeout = std::max(0.0, deadline - Engine::get_clock()); + waited_comm[0] = (*comms)[i]; + // Using wait_any_for() here (and not wait_for) because we don't want comms to be invalidated on timeout + if (wait_any_for(&waited_comm, wait_timeout) == -1) { + XBT_DEBUG("Timeout (%g): i = %zu", wait_timeout, i); + return i; + } + } + return comms->size(); +} + CommPtr Comm::set_rate(double rate) { xbt_assert(state_ == State::INITED, "You cannot use %s() once your communication started (not implemented)", @@ -319,12 +340,20 @@ sg_error_t sg_comm_wait_for(sg_comm_t comm, double timeout) } void sg_comm_wait_all(sg_comm_t* comms, size_t count) +{ + sg_comm_wait_all_for(comms, count, -1); +} + +size_t sg_comm_wait_all_for(sg_comm_t* comms, size_t count, double timeout) { std::vector s4u_comms; - for (unsigned int i = 0; i < count; i++) + for (size_t i = 0; i < count; i++) s4u_comms.emplace_back(comms[i], false); - simgrid::s4u::Comm::wait_all(&s4u_comms); + size_t pos = simgrid::s4u::Comm::wait_all_for(&s4u_comms, timeout); + for (size_t i = pos; i < count; i++) + s4u_comms[i]->add_ref(); + return pos; } int sg_comm_wait_any(sg_comm_t* comms, size_t count)