- Make the host_load plugin available from Python. See examples/python/plugin-host-load
- Mailbox::get_async() does not return a pair anymore. Use comm.get_payload() instead.
- Comm::waitall() is gone. Please use ActivitySet() instead.
+ - Comm::waitallfor() is gone too. Its semantic was unclear on timeout anyway.
----------------------------------------------------------------------------
include examples/python/comm-throttling/comm-throttling.tesh
include examples/python/comm-wait/comm-wait.py
include examples/python/comm-wait/comm-wait.tesh
-include examples/python/comm-waitall/comm-waitall.py
-include examples/python/comm-waitall/comm-waitall.tesh
-include examples/python/comm-waitallfor/comm-waitallfor.py
-include examples/python/comm-waitallfor/comm-waitallfor.tesh
include examples/python/comm-waitany/comm-waitany.py
include examples/python/comm-waitany/comm-waitany.tesh
include examples/python/comm-waituntil/comm-waituntil.py
include teshsuite/s4u/vm-live-migration/vm-live-migration.tesh
include teshsuite/s4u/vm-suicide/vm-suicide.cpp
include teshsuite/s4u/vm-suicide/vm-suicide.tesh
-include teshsuite/s4u/wait-all-for/wait-all-for.cpp
-include teshsuite/s4u/wait-all-for/wait-all-for.tesh
include teshsuite/s4u/wait-any-for/wait-any-for.cpp
include teshsuite/s4u/wait-any-for/wait-any-for.tesh
include teshsuite/smpi/MBI/CollArgGenerator.py
foreach(example actor-create actor-daemon actor-join actor-kill actor-migrate actor-suspend actor-yield actor-lifetime
activityset-testany activityset-waitall activityset-waitallfor activityset-waitany
app-masterworkers
- comm-wait comm-waitallfor comm-waitany comm-failure comm-host2host comm-pingpong
- comm-ready comm-suspend comm-testany comm-throttling comm-waitallfor comm-waituntil
+ comm-wait comm-waitany comm-failure comm-host2host comm-pingpong
+ comm-ready comm-suspend comm-testany comm-throttling comm-waituntil
exec-async exec-basic exec-dvfs exec-remote exec-ptask
task-io task-simple task-switch-host task-variable-load
platform-comm-serialize platform-profile platform-failures
+++ /dev/null
-# Copyright (c) 2010-2023. The SimGrid Team. All rights reserved.
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of the license (GNU LGPL) which comes with this package.
-
-"""
-This example implements the following scenario:
-- Multiple workers consume jobs (Job) from a shared mailbox (worker)
-- A client first dispatches several jobs (with a simulated 'cost' - i.e. time the worker will 'process' the job)
-- The client then waits for all job results for a maximum time set by the 'wait timeout' (Comm.wait_all_for)
-- The client then displays the status of individual jobs.
-"""
-
-
-from argparse import ArgumentParser
-from dataclasses import dataclass
-from typing import List
-from uuid import uuid4
-import sys
-
-from simgrid import Actor, Comm, Engine, Host, Mailbox, this_actor
-
-
-SIMULATED_JOB_SIZE_BYTES = 1024
-SIMULATED_RESULT_SIZE_BYTES = 1024 * 1024
-
-
-def parse_requests(requests_str: str) -> List[float]:
- return [float(item.strip()) for item in requests_str.split(",")]
-
-
-def create_parser() -> ArgumentParser:
- parser = ArgumentParser()
- parser.add_argument(
- '--platform',
- type=str,
- required=True,
- help='path to the platform description'
- )
- parser.add_argument(
- "--workers",
- type=int,
- default=1,
- help="number of worker actors to start"
- )
- parser.add_argument(
- "--jobs",
- type=parse_requests,
- default="1,2,3,4,5",
- help="duration of individual jobs sent to the workers by the client"
- )
- parser.add_argument(
- "--wait-timeout",
- type=float,
- default=5.0,
- help="number of seconds before the client gives up waiting for results and aborts the simulation"
- )
- return parser
-
-
-@dataclass
-class Job:
- job_id: str
- duration: float
- result_mailbox: Mailbox
-
-
-def worker(worker_id: str):
- this_actor.info(f"{worker_id} started")
- mailbox: Mailbox = Mailbox.by_name("worker")
- while True:
- job: Job = mailbox.get()
- this_actor.info(f"{worker_id} working on {job.job_id} (will take {job.duration}s to complete)")
- this_actor.sleep_for(job.duration)
- job.result_mailbox.put(f"{worker_id}", SIMULATED_RESULT_SIZE_BYTES)
-
-
-@dataclass
-class AsyncJobResult:
- job: Job
- comm: Comm
-
- @property
- def complete(self) -> bool:
- return self.comm.test()
-
- @property
- def status(self) -> str:
- return "complete" if self.complete else "pending"
-
-
-def client(client_id: str, jobs: List[float], wait_timeout: float):
- worker_mailbox: Mailbox = Mailbox.by_name("worker")
- this_actor.info(f"{client_id} started")
- async_job_results: list[AsyncJobResult] = []
- for job_idx, job_duration in enumerate(jobs):
- result_mailbox: Mailbox = Mailbox.by_name(str(uuid4()))
- job = Job(job_id=f"job-{job_idx}", duration=job_duration, result_mailbox=result_mailbox)
- out_comm = worker_mailbox.put_init(Job(
- job_id=f"job-{job_idx}",
- duration=job_duration,
- result_mailbox=result_mailbox
- ), SIMULATED_JOB_SIZE_BYTES)
- out_comm.detach()
- result_comm = result_mailbox.get_async()
- async_job_results.append(AsyncJobResult(
- job=job,
- comm=result_comm
- ))
- this_actor.info(f"awaiting results for all jobs (timeout={wait_timeout}s)")
- completed_comms = Comm.wait_all_for([entry.comm for entry in async_job_results], wait_timeout)
- logger = this_actor.warning if completed_comms < len(async_job_results) else this_actor.info
- logger(f"received {completed_comms}/{len(async_job_results)} results")
- for result in async_job_results:
- this_actor.info(f"{result.job.job_id}"
- f" status={result.status}"
- f" result_payload={result.comm.get_payload() if result.complete else ''}")
-
-
-def main():
- settings = create_parser().parse_known_args()[0]
- e = Engine(sys.argv)
- e.load_platform(settings.platform)
- Actor.create("client", Host.by_name("Tremblay"), client, "client", settings.jobs, settings.wait_timeout)
- for worker_idx in range(settings.workers):
- Actor.create("worker", Host.by_name("Ruby"), worker, f"worker-{worker_idx}").daemonize()
- e.run()
-
-
-if __name__ == "__main__":
- main()
+++ /dev/null
-#!/usr/bin/env tesh
-
-p Testing Comm.wait_all_for()
-
-$ ${pythoncmd:=python3} ${PYTHON_TOOL_OPTIONS:=} ${bindir:=.}/comm-waitallfor.py --platform ${platfdir}/small_platform_fatpipe.xml --workers 1 --wait-timeout 1 --jobs 1,2,3,4,5 "--log=root.fmt:[%10.6r]%e(%i:%a@%h)%e%m%n"
->[ 0.000000] (2:worker@Ruby) worker-0 started
->[ 0.000000] (1:client@Tremblay) client started
->[ 0.000000] (1:client@Tremblay) awaiting results for all jobs (timeout=1.0s)
->[ 0.001954] (2:worker@Ruby) worker-0 working on job-0 (will take 1.0s to complete)
->[ 1.000000] (1:client@Tremblay) received 0/5 results
->[ 1.000000] (1:client@Tremblay) job-0 status=pending result_payload=
->[ 1.000000] (1:client@Tremblay) job-1 status=pending result_payload=
->[ 1.000000] (1:client@Tremblay) job-2 status=pending result_payload=
->[ 1.000000] (1:client@Tremblay) job-3 status=pending result_payload=
->[ 1.000000] (1:client@Tremblay) job-4 status=pending result_payload=
-
-$ ${pythoncmd:=python3} ${PYTHON_TOOL_OPTIONS:=} ${bindir:=.}/comm-waitallfor.py --platform ${platfdir}/small_platform_fatpipe.xml --workers 1 --wait-timeout 5 --jobs 1,2,3,4,5 "--log=root.fmt:[%10.6r]%e(%i:%a@%h)%e%m%n"
->[ 0.000000] (2:worker@Ruby) worker-0 started
->[ 0.000000] (1:client@Tremblay) client started
->[ 0.000000] (1:client@Tremblay) awaiting results for all jobs (timeout=5.0s)
->[ 0.001954] (2:worker@Ruby) worker-0 working on job-0 (will take 1.0s to complete)
->[ 1.008029] (2:worker@Ruby) worker-0 working on job-1 (will take 2.0s to complete)
->[ 3.014105] (2:worker@Ruby) worker-0 working on job-2 (will take 3.0s to complete)
->[ 5.000000] (1:client@Tremblay) received 2/5 results
->[ 5.000000] (1:client@Tremblay) job-0 status=complete result_payload=worker-0
->[ 5.000000] (1:client@Tremblay) job-1 status=complete result_payload=worker-0
->[ 5.000000] (1:client@Tremblay) job-2 status=pending result_payload=
->[ 5.000000] (1:client@Tremblay) job-3 status=pending result_payload=
->[ 5.000000] (1:client@Tremblay) job-4 status=pending result_payload=
-
-$ ${pythoncmd:=python3} ${PYTHON_TOOL_OPTIONS:=} ${bindir:=.}/comm-waitallfor.py --platform ${platfdir}/small_platform_fatpipe.xml --workers 1 --wait-timeout -1 --jobs 1,2,3,4,5 "--log=root.fmt:[%10.6r]%e(%i:%a@%h)%e%m%n"
->[ 0.000000] (2:worker@Ruby) worker-0 started
->[ 0.000000] (1:client@Tremblay) client started
->[ 0.000000] (1:client@Tremblay) awaiting results for all jobs (timeout=-1.0s)
->[ 0.001954] (2:worker@Ruby) worker-0 working on job-0 (will take 1.0s to complete)
->[ 1.008029] (2:worker@Ruby) worker-0 working on job-1 (will take 2.0s to complete)
->[ 3.014105] (2:worker@Ruby) worker-0 working on job-2 (will take 3.0s to complete)
->[ 6.020181] (2:worker@Ruby) worker-0 working on job-3 (will take 4.0s to complete)
->[ 10.026257] (2:worker@Ruby) worker-0 working on job-4 (will take 5.0s to complete)
->[ 15.030379] (1:client@Tremblay) received 5/5 results
->[ 15.030379] (1:client@Tremblay) job-0 status=complete result_payload=worker-0
->[ 15.030379] (1:client@Tremblay) job-1 status=complete result_payload=worker-0
->[ 15.030379] (1:client@Tremblay) job-2 status=complete result_payload=worker-0
->[ 15.030379] (1:client@Tremblay) job-3 status=complete result_payload=worker-0
->[ 15.030379] (1:client@Tremblay) job-4 status=complete result_payload=worker-0
-
-$ ${pythoncmd:=python3} ${PYTHON_TOOL_OPTIONS:=} ${bindir:=.}/comm-waitallfor.py --platform ${platfdir}/small_platform_fatpipe.xml --workers 5 --wait-timeout 3 --jobs 1,2,3,4,5 "--log=root.fmt:[%10.6r]%e(%i:%a@%h)%e%m%n"
->[ 0.000000] (2:worker@Ruby) worker-0 started
->[ 0.000000] (3:worker@Ruby) worker-1 started
->[ 0.000000] (4:worker@Ruby) worker-2 started
->[ 0.000000] (5:worker@Ruby) worker-3 started
->[ 0.000000] (6:worker@Ruby) worker-4 started
->[ 0.000000] (1:client@Tremblay) client started
->[ 0.000000] (1:client@Tremblay) awaiting results for all jobs (timeout=3.0s)
->[ 0.001954] (6:worker@Ruby) worker-4 working on job-4 (will take 5.0s to complete)
->[ 0.001954] (5:worker@Ruby) worker-3 working on job-3 (will take 4.0s to complete)
->[ 0.001954] (4:worker@Ruby) worker-2 working on job-2 (will take 3.0s to complete)
->[ 0.001954] (3:worker@Ruby) worker-1 working on job-1 (will take 2.0s to complete)
->[ 0.001954] (2:worker@Ruby) worker-0 working on job-0 (will take 1.0s to complete)
->[ 3.000000] (1:client@Tremblay) received 2/5 results
->[ 3.000000] (1:client@Tremblay) job-0 status=complete result_payload=worker-0
->[ 3.000000] (1:client@Tremblay) job-1 status=complete result_payload=worker-1
->[ 3.000000] (1:client@Tremblay) job-2 status=pending result_payload=
->[ 3.000000] (1:client@Tremblay) job-3 status=pending result_payload=
->[ 3.000000] (1:client@Tremblay) job-4 status=pending result_payload=
-
-$ ${pythoncmd:=python3} ${PYTHON_TOOL_OPTIONS:=} ${bindir:=.}/comm-waitallfor.py --platform ${platfdir}/small_platform_fatpipe.xml --workers 5 --wait-timeout -1 --jobs 5,10,5,20,5,40,5,80,5,160 "--log=root.fmt:[%10.6r]%e(%i:%a@%h)%e%m%n"
->[ 0.000000] (2:worker@Ruby) worker-0 started
->[ 0.000000] (3:worker@Ruby) worker-1 started
->[ 0.000000] (4:worker@Ruby) worker-2 started
->[ 0.000000] (5:worker@Ruby) worker-3 started
->[ 0.000000] (6:worker@Ruby) worker-4 started
->[ 0.000000] (1:client@Tremblay) client started
->[ 0.000000] (1:client@Tremblay) awaiting results for all jobs (timeout=-1.0s)
->[ 0.001954] (6:worker@Ruby) worker-4 working on job-4 (will take 5.0s to complete)
->[ 0.001954] (5:worker@Ruby) worker-3 working on job-3 (will take 20.0s to complete)
->[ 0.001954] (4:worker@Ruby) worker-2 working on job-2 (will take 5.0s to complete)
->[ 0.001954] (3:worker@Ruby) worker-1 working on job-1 (will take 10.0s to complete)
->[ 0.001954] (2:worker@Ruby) worker-0 working on job-0 (will take 5.0s to complete)
->[ 5.008029] (2:worker@Ruby) worker-0 working on job-7 (will take 80.0s to complete)
->[ 5.008029] (4:worker@Ruby) worker-2 working on job-6 (will take 5.0s to complete)
->[ 5.008029] (6:worker@Ruby) worker-4 working on job-5 (will take 40.0s to complete)
->[ 10.008029] (3:worker@Ruby) worker-1 working on job-8 (will take 5.0s to complete)
->[ 10.014105] (4:worker@Ruby) worker-2 working on job-9 (will take 160.0s to complete)
->[170.018227] (1:client@Tremblay) received 10/10 results
->[170.018227] (1:client@Tremblay) job-0 status=complete result_payload=worker-0
->[170.018227] (1:client@Tremblay) job-1 status=complete result_payload=worker-1
->[170.018227] (1:client@Tremblay) job-2 status=complete result_payload=worker-2
->[170.018227] (1:client@Tremblay) job-3 status=complete result_payload=worker-3
->[170.018227] (1:client@Tremblay) job-4 status=complete result_payload=worker-4
->[170.018227] (1:client@Tremblay) job-5 status=complete result_payload=worker-4
->[170.018227] (1:client@Tremblay) job-6 status=complete result_payload=worker-2
->[170.018227] (1:client@Tremblay) job-7 status=complete result_payload=worker-0
->[170.018227] (1:client@Tremblay) job-8 status=complete result_payload=worker-1
->[170.018227] (1:client@Tremblay) job-9 status=complete result_payload=worker-2
XBT_PUBLIC sg_error_t sg_comm_wait(sg_comm_t comm);
XBT_PUBLIC sg_error_t sg_comm_wait_for(sg_comm_t comm, double timeout);
XBT_PUBLIC void sg_comm_wait_all(sg_comm_t* comms, size_t count);
-XBT_PUBLIC size_t sg_comm_wait_all_for(sg_comm_t* comms, size_t count, double timeout);
XBT_PUBLIC ssize_t sg_comm_wait_any_for(sg_comm_t* comms, size_t count, double timeout);
XBT_PUBLIC ssize_t sg_comm_wait_any(sg_comm_t* comms, size_t count);
XBT_PUBLIC void sg_comm_unref(sg_comm_t comm);
/*! \static Same as wait_any, but with a timeout. Return -1 if the timeout occurs.*/
static ssize_t wait_any_for(const std::vector<CommPtr>& comms, double timeout);
- /*! \static Same as wait_all, but with a timeout. Return the number of terminated comm (less than comms.size() if
- * the timeout occurs). */
- static size_t wait_all_for(const std::vector<CommPtr>& comms, double timeout);
-
#ifndef DOXYGEN
XBT_ATTRIB_DEPRECATED_v339("Please use ActivitySet instead") static void wait_all(const std::vector<CommPtr>& comms);
+ XBT_ATTRIB_DEPRECATED_v339("Please use ActivitySet instead") static size_t
+ wait_all_for(const std::vector<CommPtr>& comms, double timeout);
#endif
};
} // namespace simgrid::s4u
"In particular, the actor does not have to be on one of the involved hosts.")
.def_static("test_any", &Comm::test_any, py::call_guard<py::gil_scoped_release>(), py::arg("comms"),
"take a vector s4u::CommPtr and return the rank of the first finished one (or -1 if none is done)")
- .def_static("wait_all_for", &Comm::wait_all_for, py::call_guard<py::gil_scoped_release>(), py::arg("comms"),
- py::arg("timeout"),
- "Block until the completion of all communications in the list, or raises TimeoutException after "
- "the specified timeout.")
.def_static("wait_any", &Comm::wait_any, py::call_guard<py::gil_scoped_release>(), py::arg("comms"),
"Block until the completion of any communication in the list and return the index of the "
"terminated one.")
comm->wait();
}
-size_t Comm::wait_all_for(const std::vector<CommPtr>& comms, double timeout)
+size_t Comm::wait_all_for(const std::vector<CommPtr>& comms, double timeout) // XBT_ATTRIB_DEPRECATED_v339
{
if (timeout < 0.0) {
for (const auto& comm : comms)
void sg_comm_wait_all(sg_comm_t* comms, size_t count)
{
- sg_comm_wait_all_for(comms, count, -1);
-}
-
-size_t sg_comm_wait_all_for(sg_comm_t* comms, size_t count, double timeout)
-{
+ simgrid::s4u::ActivitySet as;
std::vector<simgrid::s4u::CommPtr> s4u_comms;
for (size_t i = 0; i < count; i++)
- s4u_comms.emplace_back(comms[i], false);
+ as.push(comms[i]);
- size_t pos = simgrid::s4u::Comm::wait_all_for(s4u_comms, timeout);
- for (size_t i = pos; i < count; i++)
- s4u_comms[i]->add_ref();
- return pos;
+ as.wait_all();
}
ssize_t sg_comm_wait_any(sg_comm_t* comms, size_t count)
foreach(x actor actor-autorestart actor-suspend
activity-lifecycle
- comm-get-sender comm-pt2pt comm-fault-scenarios wait-all-for wait-any-for
+ comm-get-sender comm-pt2pt comm-fault-scenarios wait-any-for
cloud-interrupt-migration cloud-two-execs
monkey-masterworkers monkey-semaphore
concurrent_rw
## Add the tests.
## Some need to be run with all factories, some don't need tesh to run
-foreach(x actor actor-autorestart actor-suspend activity-lifecycle comm-get-sender wait-all-for wait-any-for
+foreach(x actor actor-autorestart actor-suspend activity-lifecycle comm-get-sender wait-any-for
cloud-interrupt-migration cloud-two-execs concurrent_rw dag-incomplete-simulation dependencies io-set-bw io-stream
vm-live-migration vm-suicide)
set(tesh_files ${tesh_files} ${CMAKE_CURRENT_SOURCE_DIR}/${x}/${x}.tesh)
+++ /dev/null
-/* Copyright (c) 2019-2023. The SimGrid Team. All rights reserved. */
-
-/* This program is free software; you can redistribute it and/or modify it
- * under the terms of the license (GNU LGPL) which comes with this package. */
-
-#include <cstdlib>
-#include <iostream>
-#include <simgrid/s4u.hpp>
-#include <string>
-
-XBT_LOG_NEW_DEFAULT_CATEGORY(meh, "meh");
-
-static void worker()
-{
- auto* mbox = simgrid::s4u::Mailbox::by_name("meh");
- int input1 = 42;
- int input2 = 51;
-
- XBT_INFO("Sending and receiving %d and %d asynchronously", input1, input2);
-
- auto put1 = mbox->put_async(&input1, 1000 * 1000 * 500);
- auto put2 = mbox->put_async(&input2, 1000 * 1000 * 1000);
-
- int* out1;
- auto get1 = mbox->get_async<int>(&out1);
-
- int* out2;
- auto get2 = mbox->get_async<int>(&out2);
-
- XBT_INFO("All comms have started");
- std::vector<simgrid::s4u::CommPtr> comms = {put1, put2, get1, get2};
-
- while (not comms.empty()) {
- size_t index = simgrid::s4u::Comm::wait_all_for(comms, 0.5);
- if (index < comms.size())
- XBT_INFO("wait_all_for: Timeout reached");
- XBT_INFO("wait_all_for: %zu comms finished (#comms=%zu)", index, comms.size());
- comms.erase(comms.begin(), comms.begin() + index);
- }
-
- XBT_INFO("All comms have finished");
- XBT_INFO("Got %d and %d", *out1, *out2);
-}
-
-int main(int argc, char* argv[])
-
-{
- simgrid::s4u::Engine e(&argc, argv);
- e.load_platform(argv[1]);
- simgrid::s4u::Actor::create("worker", e.host_by_name("Tremblay"), worker);
- e.run();
- return 0;
-}
+++ /dev/null
-#!/usr/bin/env tesh
-
-p Testing the wait_all_for feature of S4U
-
-! output sort 19
-$ ${bindir:=.}/wait-all-for ${platfdir:=.}/small_platform.xml "--log=root.fmt:[%10.6r]%e(%i:%a@%h)%e%m%n"
-> [ 0.000000] (1:worker@Tremblay) Sending and receiving 42 and 51 asynchronously
-> [ 0.000000] (1:worker@Tremblay) All comms have started
-> [ 0.500000] (1:worker@Tremblay) wait_all_for: Timeout reached
-> [ 0.500000] (1:worker@Tremblay) wait_all_for: 0 comms finished (#comms=4)
-> [ 1.000000] (1:worker@Tremblay) wait_all_for: Timeout reached
-> [ 1.000000] (1:worker@Tremblay) wait_all_for: 0 comms finished (#comms=4)
-> [ 1.500000] (1:worker@Tremblay) wait_all_for: Timeout reached
-> [ 1.500000] (1:worker@Tremblay) wait_all_for: 1 comms finished (#comms=4)
-> [ 2.000000] (1:worker@Tremblay) wait_all_for: Timeout reached
-> [ 2.000000] (1:worker@Tremblay) wait_all_for: 0 comms finished (#comms=3)
-> [ 2.070331] (1:worker@Tremblay) wait_all_for: 3 comms finished (#comms=3)
-> [ 2.070331] (1:worker@Tremblay) All comms have finished
-> [ 2.070331] (1:worker@Tremblay) Got 42 and 51