1 /* Copyright (c) 2007-2022. The SimGrid Team. All rights reserved. */
3 /* This program is free software; you can redistribute it and/or modify it
4 * under the terms of the license (GNU LGPL) which comes with this package. */
6 /* This is a version of the masterworkers that (hopefully) survives to the chaos monkey.
7 * It tests synchronous send/receive as well as synchronous computations.
9 * It is not written to be pleasant to read, but instead to resist the aggressions of the monkey:
10 * - Workers keep going until after a global variable `todo` reaches 0.
11 * - The master is a daemon that just sends infinitely tasks
12 * (simgrid simulations stop as soon as all non-daemon actors are done).
13 * - The platform is created programmatically to remove path issues and control the problem size.
15 * Command-line configuration items:
16 * - host-count: how many actors to start (including the master
17 * - task-count: initial value of the `todo` global
18 * - deadline: time at which the simulation is known to be failed (to detect infinite loops).
20 * See the simgrid-monkey script for more information.
23 #include <simgrid/s4u.hpp>
24 #include <xbt/config.hpp>
25 #include <xbt/string.hpp>
27 namespace sg4 = simgrid::s4u;
29 XBT_LOG_NEW_DEFAULT_CATEGORY(s4u_test, "Messages specific for this s4u example");
31 static simgrid::config::Flag<int> cfg_host_count{"host-count", "Host count (master on one, workers on the others)", 2};
32 static simgrid::config::Flag<double> cfg_deadline{"deadline", "When to fail the simulation (infinite loop detection)",
34 static simgrid::config::Flag<int> cfg_task_count{"task-count", "Amount of tasks that must be executed to succeed", 1};
36 int todo; // remaining amount of tasks to execute, a global variable
37 sg4::Mailbox* mailbox; // as a global to reduce the amount of simcalls during actor reboot
39 static void master(double comp_size, long comm_size)
41 XBT_INFO("Master booting");
42 sg4::Actor::self()->daemonize();
43 sg4::this_actor::on_exit(
44 [](bool forcefully) { XBT_INFO("Master dying %s.", forcefully ? "forcefully" : "peacefully"); });
46 while (true) { // This is a daemon
47 xbt_assert(sg4::Engine::get_clock() < cfg_deadline,
48 "Failed to run all tasks in less than %d seconds. Is this an infinite loop?", (int)cfg_deadline);
50 auto* payload = new double(comp_size);
52 XBT_INFO("Try to send a message");
53 mailbox->put(payload, comm_size, 10.0);
54 } catch (const simgrid::TimeoutException&) {
56 XBT_INFO("Timeouted while sending a task");
57 } catch (const simgrid::NetworkFailureException&) {
59 XBT_INFO("Network error while sending a task");
65 static void worker(int id)
67 XBT_INFO("Worker booting");
68 sg4::this_actor::on_exit(
69 [id](bool forcefully) { XBT_INFO("worker %d dying %s.", id, forcefully ? "forcefully" : "peacefully"); });
72 xbt_assert(sg4::Engine::get_clock() < cfg_deadline,
73 "Failed to run all tasks in less than %d seconds. Is this an infinite loop?", (int)cfg_deadline);
75 XBT_INFO("Waiting a message on %s", mailbox->get_cname());
76 auto payload = mailbox->get_unique<double>(10);
77 xbt_assert(payload != nullptr, "mailbox->get() failed");
78 double comp_size = *payload;
79 if (comp_size < 0) { /* - Exit when -1.0 is received */
80 XBT_INFO("I'm done. See you!");
83 /* - Otherwise, process the task */
84 XBT_INFO("Start execution...");
85 sg4::this_actor::execute(comp_size);
86 XBT_INFO("Execution complete.");
88 } catch (const simgrid::TimeoutException&) {
89 XBT_INFO("Timeouted while getting a task.");
91 } catch (const simgrid::NetworkFailureException&) {
92 XBT_INFO("Mmh. Something went wrong. Nevermind. Let's keep going!");
97 int main(int argc, char* argv[])
99 sg4::Engine e(&argc, argv);
101 XBT_INFO("host count: %d ", (int)cfg_host_count);
103 auto* rootzone = sg4::create_full_zone("root");
104 sg4::Host* main; // First host created, where the master will stay
105 std::vector<sg4::Host*> worker_hosts;
106 for (int i = 0; i < cfg_host_count; i++) {
107 auto hostname = std::string("lilibeth ") + std::to_string(i);
108 auto* host = rootzone->create_host(hostname, 1e15);
112 sg4::LinkInRoute link(rootzone->create_link(hostname, "1MBps")->set_latency("24us")->seal());
113 rootzone->add_route(main->get_netpoint(), host->get_netpoint(), nullptr, nullptr, {link}, true);
114 worker_hosts.push_back(host);
118 sg4::Engine::get_instance()->on_platform_created(); // FIXME this should not be necessary
120 sg4::Actor::create("master", main, master, 50000000, 1000000)->set_auto_restart(true);
122 for (auto* h : worker_hosts)
123 sg4::Actor::create("worker", h, worker, id++)->set_auto_restart(true);
125 todo = cfg_task_count;
126 xbt_assert(todo > 0, "Please give more than %d tasks to run", todo);
127 mailbox = sg4::Mailbox::by_name("mailbox");
131 XBT_INFO("WE SURVIVED!");