From ec8c7e3af0345da0f8386c572a10c69e16d9fe74 Mon Sep 17 00:00:00 2001 From: Martin Quinson Date: Wed, 22 Dec 2021 23:15:53 +0100 Subject: [PATCH] Extend the platf-failure example to show how to create an host with its trace programatically --- .../s4u-platform-failures.cpp | 40 ++++++++--- .../s4u-platform-failures.tesh | 68 ++++++++++++------- 2 files changed, 75 insertions(+), 33 deletions(-) diff --git a/examples/cpp/platform-failures/s4u-platform-failures.cpp b/examples/cpp/platform-failures/s4u-platform-failures.cpp index 4a5cc0f6e0..b47d78eb91 100644 --- a/examples/cpp/platform-failures/s4u-platform-failures.cpp +++ b/examples/cpp/platform-failures/s4u-platform-failures.cpp @@ -21,13 +21,15 @@ #include "simgrid/kernel/ProfileBuilder.hpp" #include "simgrid/s4u.hpp" +namespace sg4 = simgrid::s4u; + XBT_LOG_NEW_DEFAULT_CATEGORY(s4u_test, "Messages specific for this s4u example"); static void master(std::vector args) { xbt_assert(args.size() == 5, "Expecting one parameter"); - simgrid::s4u::Mailbox* mailbox; + sg4::Mailbox* mailbox; long number_of_tasks = std::stol(args[1]); double comp_size = std::stod(args[2]); long comm_size = std::stol(args[3]); @@ -36,7 +38,7 @@ static void master(std::vector args) XBT_INFO("Got %ld workers and %ld tasks to process", workers_count, number_of_tasks); for (int i = 0; i < number_of_tasks; i++) { - mailbox = simgrid::s4u::Mailbox::by_name(std::string("worker-") + std::to_string(i % workers_count)); + mailbox = sg4::Mailbox::by_name(std::string("worker-") + std::to_string(i % workers_count)); auto* payload = new double(comp_size); try { XBT_INFO("Send a message to %s", mailbox->get_cname()); @@ -54,7 +56,7 @@ static void master(std::vector args) XBT_INFO("All tasks have been dispatched. Let's tell everybody the computation is over."); for (int i = 0; i < workers_count; i++) { /* - Eventually tell all the workers to stop by sending a "finalize" task */ - mailbox = simgrid::s4u::Mailbox::by_name(std::string("worker-") + std::to_string(i)); + mailbox = sg4::Mailbox::by_name(std::string("worker-") + std::to_string(i)); auto* payload = new double(-1.0); try { mailbox->put(payload, 0, 1.0); @@ -73,8 +75,8 @@ static void master(std::vector args) static void worker(std::vector args) { xbt_assert(args.size() == 2, "Expecting one parameter"); - long id = std::stol(args[1]); - simgrid::s4u::Mailbox* mailbox = simgrid::s4u::Mailbox::by_name(std::string("worker-") + std::to_string(id)); + long id = std::stol(args[1]); + sg4::Mailbox* mailbox = sg4::Mailbox::by_name(std::string("worker-") + std::to_string(id)); while (true) { try { XBT_INFO("Waiting a message on %s", mailbox->get_cname()); @@ -87,7 +89,7 @@ static void worker(std::vector args) } /* - Otherwise, process the task */ XBT_INFO("Start execution..."); - simgrid::s4u::this_actor::execute(comp_size); + sg4::this_actor::execute(comp_size); XBT_INFO("Execution complete."); } catch (const simgrid::NetworkFailureException&) { XBT_INFO("Mmh. Something went wrong. Nevermind. Let's keep going!"); @@ -97,12 +99,12 @@ static void worker(std::vector args) int main(int argc, char* argv[]) { - simgrid::s4u::Engine e(&argc, argv); + sg4::Engine e(&argc, argv); // This is how to attach a profile to an host that is created from the XML file. // This should be done before calling load_platform(), as the on_creation() event is fired when loading the platform. // You can never set a new profile to a resource that already have one. - simgrid::s4u::Host::on_creation.connect([](simgrid::s4u::Host& h) { + sg4::Host::on_creation.connect([](sg4::Host& h) { if (h.get_name() == "Bourrassa") { h.set_state_profile(simgrid::kernel::profile::ProfileBuilder::from_string("bourassa_profile", "67 0\n70 1\n", 0)); } @@ -113,8 +115,28 @@ int main(int argc, char* argv[]) e.register_function("worker", worker); e.load_deployment(argv[2]); + // Add a new host programatically, and attach a state profile to it + auto* root = e.get_netzone_root(); + auto* lilibeth = root->create_host("Lilibeth", 1e15); + sg4::LinkInRoute link = sg4::LinkInRoute(e.link_by_name("10")); + root->add_route(e.host_by_name("Tremblay")->get_netpoint(), lilibeth->get_netpoint(), nullptr, nullptr, {link}, true); + lilibeth->set_state_profile(simgrid::kernel::profile::ProfileBuilder::from_string("lilibeth_profile", R"( +4 0 +5 1 +)", + 10)); + lilibeth->seal(); + + // Create an actor on that new host, to monitor its own state + auto actor = sg4::Actor::create("sleeper", lilibeth, []() { + XBT_INFO("Start sleeping..."); + sg4::this_actor::sleep_for(1); + XBT_INFO("done sleeping."); + }); + actor->set_auto_restart(true); + e.run(); - XBT_INFO("Simulation time %g", simgrid::s4u::Engine::get_clock()); + XBT_INFO("Simulation time %g", sg4::Engine::get_clock()); return 0; } diff --git a/examples/cpp/platform-failures/s4u-platform-failures.tesh b/examples/cpp/platform-failures/s4u-platform-failures.tesh index 47d7d5dafb..eb0906d506 100644 --- a/examples/cpp/platform-failures/s4u-platform-failures.tesh +++ b/examples/cpp/platform-failures/s4u-platform-failures.tesh @@ -8,6 +8,7 @@ $ ${bindir:=.}/s4u-platform-failures --log=xbt_cfg.thres:critical --log=no_loc $ > [ 0.000000] (0:maestro@) Deployment includes some initially turned off Hosts ... nevermind. > [ 0.000000] (1:master@Tremblay) Got 5 workers and 20 tasks to process > [ 0.000000] (1:master@Tremblay) Send a message to worker-0 +> [ 0.000000] (7:sleeper@Lilibeth) Start sleeping... > [ 0.010309] (1:master@Tremblay) Send to worker-0 completed > [ 0.010309] (2:worker@Tremblay) Start execution... > [ 0.000000] (2:worker@Tremblay) Waiting a message on worker-0 @@ -16,13 +17,13 @@ $ ${bindir:=.}/s4u-platform-failures --log=xbt_cfg.thres:critical --log=no_loc $ > [ 0.000000] (6:worker@Bourassa) Waiting a message on worker-4 > [ 0.010309] (1:master@Tremblay) Send a message to worker-1 > [ 1.000000] (0:maestro@) Restart actors on host Fafard -> [ 1.000000] (7:worker@Fafard) Waiting a message on worker-2 +> [ 1.000000] (8:worker@Fafard) Waiting a message on worker-2 > [ 1.000000] (1:master@Tremblay) Mmh. The communication with 'worker-1' failed. Nevermind. Let's keep going! > [ 1.000000] (1:master@Tremblay) Send a message to worker-2 > [ 2.000000] (1:master@Tremblay) Mmh. The communication with 'worker-2' failed. Nevermind. Let's keep going! > [ 2.000000] (0:maestro@) Restart actors on host Jupiter > [ 2.000000] (1:master@Tremblay) Send a message to worker-3 -> [ 2.000000] (8:worker@Jupiter) Waiting a message on worker-1 +> [ 2.000000] (9:worker@Jupiter) Waiting a message on worker-1 > [ 2.010309] (2:worker@Tremblay) Execution complete. > [ 2.010309] (2:worker@Tremblay) Waiting a message on worker-0 > [ 3.030928] (1:master@Tremblay) Send to worker-3 completed @@ -34,17 +35,20 @@ $ ${bindir:=.}/s4u-platform-failures --log=xbt_cfg.thres:critical --log=no_loc $ > [ 4.072165] (1:master@Tremblay) Send to worker-0 completed > [ 4.072165] (1:master@Tremblay) Send a message to worker-1 > [ 4.072165] (2:worker@Tremblay) Start execution... +> [ 5.000000] (0:maestro@) Restart actors on host Lilibeth +> [ 5.000000] (10:sleeper@Lilibeth) Start sleeping... > [ 5.030928] (5:worker@Ginette) Execution complete. > [ 5.030928] (5:worker@Ginette) Waiting a message on worker-3 > [ 5.103093] (1:master@Tremblay) Send to worker-1 completed > [ 5.103093] (1:master@Tremblay) Send a message to worker-2 -> [ 5.103093] (8:worker@Jupiter) Start execution... +> [ 5.103093] (9:worker@Jupiter) Start execution... +> [ 6.000000] (10:sleeper@Lilibeth) done sleeping. > [ 6.061856] (6:worker@Bourassa) Execution complete. > [ 6.061856] (6:worker@Bourassa) Waiting a message on worker-4 > [ 6.072165] (2:worker@Tremblay) Execution complete. > [ 6.072165] (2:worker@Tremblay) Waiting a message on worker-0 -> [ 7.103093] (8:worker@Jupiter) Execution complete. -> [ 7.103093] (8:worker@Jupiter) Waiting a message on worker-1 +> [ 7.103093] (9:worker@Jupiter) Execution complete. +> [ 7.103093] (9:worker@Jupiter) Waiting a message on worker-1 > [ 15.103093] (1:master@Tremblay) Mmh. Got timeouted while speaking to 'worker-2'. Nevermind. Let's keep going! > [ 15.103093] (1:master@Tremblay) Send a message to worker-3 > [ 15.103093] (1:master@Tremblay) Mmh. The communication with 'worker-3' failed. Nevermind. Let's keep going! @@ -59,13 +63,16 @@ $ ${bindir:=.}/s4u-platform-failures --log=xbt_cfg.thres:critical --log=no_loc $ > [ 16.144330] (2:worker@Tremblay) Start execution... > [ 17.175258] (1:master@Tremblay) Send to worker-1 completed > [ 17.175258] (1:master@Tremblay) Send a message to worker-2 -> [ 17.175258] (8:worker@Jupiter) Start execution... +> [ 17.175258] (9:worker@Jupiter) Start execution... > [ 18.134021] (6:worker@Bourassa) Execution complete. > [ 18.134021] (6:worker@Bourassa) Waiting a message on worker-4 > [ 18.144330] (2:worker@Tremblay) Execution complete. > [ 18.144330] (2:worker@Tremblay) Waiting a message on worker-0 -> [ 19.175258] (8:worker@Jupiter) Execution complete. -> [ 19.175258] (8:worker@Jupiter) Waiting a message on worker-1 +> [ 19.175258] (9:worker@Jupiter) Execution complete. +> [ 19.175258] (9:worker@Jupiter) Waiting a message on worker-1 +> [ 20.000000] (0:maestro@) Restart actors on host Lilibeth +> [ 20.000000] (11:sleeper@Lilibeth) Start sleeping... +> [ 21.000000] (11:sleeper@Lilibeth) done sleeping. > [ 27.175258] (1:master@Tremblay) Mmh. Got timeouted while speaking to 'worker-2'. Nevermind. Let's keep going! > [ 27.175258] (1:master@Tremblay) Send a message to worker-3 > [ 28.206186] (1:master@Tremblay) Send to worker-3 completed @@ -80,13 +87,16 @@ $ ${bindir:=.}/s4u-platform-failures --log=xbt_cfg.thres:critical --log=no_loc $ > [ 28.216495] (2:worker@Tremblay) Start execution... > [ 29.247423] (1:master@Tremblay) Send to worker-1 completed > [ 29.247423] (1:master@Tremblay) Send a message to worker-2 -> [ 29.247423] (8:worker@Jupiter) Start execution... +> [ 29.247423] (9:worker@Jupiter) Start execution... > [ 30.206186] (5:worker@Ginette) Execution complete. > [ 30.206186] (5:worker@Ginette) Waiting a message on worker-3 > [ 30.216495] (2:worker@Tremblay) Execution complete. > [ 30.216495] (2:worker@Tremblay) Waiting a message on worker-0 -> [ 31.247423] (8:worker@Jupiter) Execution complete. -> [ 31.247423] (8:worker@Jupiter) Waiting a message on worker-1 +> [ 31.247423] (9:worker@Jupiter) Execution complete. +> [ 31.247423] (9:worker@Jupiter) Waiting a message on worker-1 +> [ 35.000000] (0:maestro@) Restart actors on host Lilibeth +> [ 35.000000] (12:sleeper@Lilibeth) Start sleeping... +> [ 36.000000] (12:sleeper@Lilibeth) done sleeping. > [ 39.247423] (1:master@Tremblay) Mmh. Got timeouted while speaking to 'worker-2'. Nevermind. Let's keep going! > [ 39.247423] (1:master@Tremblay) Send a message to worker-3 > [ 40.278351] (1:master@Tremblay) Send to worker-3 completed @@ -96,7 +106,7 @@ $ ${bindir:=.}/s4u-platform-failures --log=xbt_cfg.thres:critical --log=no_loc $ > [ 41.309278] (1:master@Tremblay) All tasks have been dispatched. Let's tell everybody the computation is over. > [ 41.309278] (2:worker@Tremblay) I'm done. See you! > [ 41.309278] (6:worker@Bourassa) Start execution... -> [ 41.309278] (8:worker@Jupiter) I'm done. See you! +> [ 41.309278] (9:worker@Jupiter) I'm done. See you! > [ 42.309278] (1:master@Tremblay) Mmh. Got timeouted while speaking to 'worker-2'. Nevermind. Let's keep going! > [ 43.309278] (0:maestro@) Simulation time 43.3093 > [ 43.309278] (1:master@Tremblay) Mmh. Got timeouted while speaking to 'worker-3'. Nevermind. Let's keep going! @@ -117,15 +127,16 @@ $ ${bindir:=.}/s4u-platform-failures --log=xbt_cfg.thres:critical --log=no_loc $ > [ 0.000000] (3:worker@Jupiter) Waiting a message on worker-1 > [ 0.000000] (5:worker@Ginette) Waiting a message on worker-3 > [ 0.000000] (6:worker@Bourassa) Waiting a message on worker-4 +> [ 0.000000] (7:sleeper@Lilibeth) Start sleeping... > [ 0.010825] (2:worker@Tremblay) Start execution... > [ 0.010825] (1:master@Tremblay) Send to worker-0 completed > [ 0.010825] (1:master@Tremblay) Send a message to worker-1 > [ 1.000000] (0:maestro@) Restart actors on host Fafard -> [ 1.000000] (7:worker@Fafard) Waiting a message on worker-2 +> [ 1.000000] (8:worker@Fafard) Waiting a message on worker-2 > [ 1.000000] (1:master@Tremblay) Mmh. The communication with 'worker-1' failed. Nevermind. Let's keep going! > [ 1.000000] (1:master@Tremblay) Send a message to worker-2 > [ 2.000000] (0:maestro@) Restart actors on host Jupiter -> [ 2.000000] (8:worker@Jupiter) Waiting a message on worker-1 +> [ 2.000000] (9:worker@Jupiter) Waiting a message on worker-1 > [ 2.000000] (1:master@Tremblay) Mmh. The communication with 'worker-2' failed. Nevermind. Let's keep going! > [ 2.000000] (1:master@Tremblay) Send a message to worker-3 > [ 2.010825] (2:worker@Tremblay) Execution complete. @@ -139,17 +150,20 @@ $ ${bindir:=.}/s4u-platform-failures --log=xbt_cfg.thres:critical --log=no_loc $ > [ 4.175773] (2:worker@Tremblay) Start execution... > [ 4.175773] (1:master@Tremblay) Send to worker-0 completed > [ 4.175773] (1:master@Tremblay) Send a message to worker-1 +> [ 5.000000] (0:maestro@) Restart actors on host Lilibeth +> [ 5.000000] (10:sleeper@Lilibeth) Start sleeping... > [ 5.082474] (5:worker@Ginette) Execution complete. > [ 5.082474] (5:worker@Ginette) Waiting a message on worker-3 -> [ 5.258247] (8:worker@Jupiter) Start execution... +> [ 5.258247] (9:worker@Jupiter) Start execution... > [ 5.258247] (1:master@Tremblay) Send to worker-1 completed > [ 5.258247] (1:master@Tremblay) Send a message to worker-2 +> [ 6.000000] (10:sleeper@Lilibeth) done sleeping. > [ 6.164948] (6:worker@Bourassa) Execution complete. > [ 6.164948] (6:worker@Bourassa) Waiting a message on worker-4 > [ 6.175773] (2:worker@Tremblay) Execution complete. > [ 6.175773] (2:worker@Tremblay) Waiting a message on worker-0 -> [ 7.258247] (8:worker@Jupiter) Execution complete. -> [ 7.258247] (8:worker@Jupiter) Waiting a message on worker-1 +> [ 7.258247] (9:worker@Jupiter) Execution complete. +> [ 7.258247] (9:worker@Jupiter) Waiting a message on worker-1 > [ 15.258247] (1:master@Tremblay) Mmh. Got timeouted while speaking to 'worker-2'. Nevermind. Let's keep going! > [ 15.258247] (1:master@Tremblay) Send a message to worker-3 > [ 15.258247] (5:worker@Ginette) Mmh. Something went wrong. Nevermind. Let's keep going! @@ -162,15 +176,18 @@ $ ${bindir:=.}/s4u-platform-failures --log=xbt_cfg.thres:critical --log=no_loc $ > [ 16.351546] (2:worker@Tremblay) Start execution... > [ 16.351546] (1:master@Tremblay) Send to worker-0 completed > [ 16.351546] (1:master@Tremblay) Send a message to worker-1 -> [ 17.434021] (8:worker@Jupiter) Start execution... +> [ 17.434021] (9:worker@Jupiter) Start execution... > [ 17.434021] (1:master@Tremblay) Send to worker-1 completed > [ 17.434021] (1:master@Tremblay) Send a message to worker-2 > [ 18.340722] (6:worker@Bourassa) Execution complete. > [ 18.340722] (6:worker@Bourassa) Waiting a message on worker-4 > [ 18.351546] (2:worker@Tremblay) Execution complete. > [ 18.351546] (2:worker@Tremblay) Waiting a message on worker-0 -> [ 19.434021] (8:worker@Jupiter) Execution complete. -> [ 19.434021] (8:worker@Jupiter) Waiting a message on worker-1 +> [ 19.434021] (9:worker@Jupiter) Execution complete. +> [ 19.434021] (9:worker@Jupiter) Waiting a message on worker-1 +> [ 20.000000] (0:maestro@) Restart actors on host Lilibeth +> [ 20.000000] (11:sleeper@Lilibeth) Start sleeping... +> [ 21.000000] (11:sleeper@Lilibeth) done sleeping. > [ 27.434021] (1:master@Tremblay) Mmh. Got timeouted while speaking to 'worker-2'. Nevermind. Let's keep going! > [ 27.434021] (1:master@Tremblay) Send a message to worker-3 > [ 28.516495] (5:worker@Ginette) Start execution... @@ -183,15 +200,18 @@ $ ${bindir:=.}/s4u-platform-failures --log=xbt_cfg.thres:critical --log=no_loc $ > [ 28.527320] (2:worker@Tremblay) Start execution... > [ 28.527320] (1:master@Tremblay) Send to worker-0 completed > [ 28.527320] (1:master@Tremblay) Send a message to worker-1 -> [ 29.609794] (8:worker@Jupiter) Start execution... +> [ 29.609794] (9:worker@Jupiter) Start execution... > [ 29.609794] (1:master@Tremblay) Send to worker-1 completed > [ 29.609794] (1:master@Tremblay) Send a message to worker-2 > [ 30.516495] (5:worker@Ginette) Execution complete. > [ 30.516495] (5:worker@Ginette) Waiting a message on worker-3 > [ 30.527320] (2:worker@Tremblay) Execution complete. > [ 30.527320] (2:worker@Tremblay) Waiting a message on worker-0 -> [ 31.609794] (8:worker@Jupiter) Execution complete. -> [ 31.609794] (8:worker@Jupiter) Waiting a message on worker-1 +> [ 31.609794] (9:worker@Jupiter) Execution complete. +> [ 31.609794] (9:worker@Jupiter) Waiting a message on worker-1 +> [ 35.000000] (0:maestro@) Restart actors on host Lilibeth +> [ 35.000000] (12:sleeper@Lilibeth) Start sleeping... +> [ 36.000000] (12:sleeper@Lilibeth) done sleeping. > [ 39.609794] (1:master@Tremblay) Mmh. Got timeouted while speaking to 'worker-2'. Nevermind. Let's keep going! > [ 39.609794] (1:master@Tremblay) Send a message to worker-3 > [ 40.692268] (5:worker@Ginette) Start execution... @@ -201,7 +221,7 @@ $ ${bindir:=.}/s4u-platform-failures --log=xbt_cfg.thres:critical --log=no_loc $ > [ 41.774742] (1:master@Tremblay) Send to worker-4 completed > [ 41.774742] (1:master@Tremblay) All tasks have been dispatched. Let's tell everybody the computation is over. > [ 41.774742] (2:worker@Tremblay) I'm done. See you! -> [ 41.774742] (8:worker@Jupiter) I'm done. See you! +> [ 41.774742] (9:worker@Jupiter) I'm done. See you! > [ 42.774742] (1:master@Tremblay) Mmh. Got timeouted while speaking to 'worker-2'. Nevermind. Let's keep going! > [ 43.774742] (6:worker@Bourassa) Execution complete. > [ 43.774742] (6:worker@Bourassa) Waiting a message on worker-4 -- 2.20.1