X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/67c723e593aeb7d51a39a70bd75bef02199aae60..4c753f8d4cabd4104f3f7109823f16be2ebdcce3:/teshsuite/s4u/monkey-masterworkers/monkey-masterworkers.cpp diff --git a/teshsuite/s4u/monkey-masterworkers/monkey-masterworkers.cpp b/teshsuite/s4u/monkey-masterworkers/monkey-masterworkers.cpp index 67a1c104ec..9554fc3e3e 100644 --- a/teshsuite/s4u/monkey-masterworkers/monkey-masterworkers.cpp +++ b/teshsuite/s4u/monkey-masterworkers/monkey-masterworkers.cpp @@ -28,7 +28,7 @@ namespace sg4 = simgrid::s4u; XBT_LOG_NEW_DEFAULT_CATEGORY(s4u_test, "Messages specific for this s4u example"); -static simgrid::config::Flag cfg_host_count{"host-count", "Host count (master on one, workers on the others)", 2}; +static simgrid::config::Flag cfg_host_count{"host-count", "Host count (master on one, workers on the others)", 3}; static simgrid::config::Flag cfg_deadline{"deadline", "When to fail the simulation (infinite loop detection)", 120}; static simgrid::config::Flag cfg_task_count{"task-count", "Amount of tasks that must be executed to succeed", 1}; @@ -36,27 +36,31 @@ static simgrid::config::Flag cfg_task_count{"task-count", "Amount of tasks int todo; // remaining amount of tasks to execute, a global variable sg4::Mailbox* mailbox; // as a global to reduce the amount of simcalls during actor reboot -static void master(double comp_size, long comm_size) +XBT_ATTRIB_NORETURN static void master() { - XBT_INFO("Master booting"); - sg4::Actor::self()->daemonize(); - sg4::this_actor::on_exit( - [](bool forcefully) { XBT_INFO("Master dying %s.", forcefully ? "forcefully" : "peacefully"); }); + double comp_size = 1e6; + long comm_size = 1e6; + bool rebooting = sg4::Actor::self()->get_restart_count() > 0; + + XBT_INFO("Master %s", rebooting ? "rebooting" : "booting"); + if (not rebooting) // Starting for the first time + sg4::this_actor::on_exit( + [](bool forcefully) { XBT_INFO("Master dying %s.", forcefully ? "forcefully" : "peacefully"); }); while (true) { // This is a daemon xbt_assert(sg4::Engine::get_clock() < cfg_deadline, "Failed to run all tasks in less than %d seconds. Is this an infinite loop?", (int)cfg_deadline); - auto* payload = new double(comp_size); + auto payload = std::make_unique(comp_size); try { XBT_INFO("Try to send a message"); - mailbox->put(payload, comm_size, 10.0); + mailbox->put(payload.get(), comm_size, 10.0); + payload.release(); } catch (const simgrid::TimeoutException&) { - delete payload; XBT_INFO("Timeouted while sending a task"); } catch (const simgrid::NetworkFailureException&) { - delete payload; - XBT_INFO("Network error while sending a task"); + XBT_INFO("Got a NetworkFailureException. Wait a second before starting again."); + sg4::this_actor::sleep_for(1); } } THROW_IMPOSSIBLE; @@ -64,9 +68,12 @@ static void master(double comp_size, long comm_size) static void worker(int id) { - XBT_INFO("Worker booting"); - sg4::this_actor::on_exit( - [id](bool forcefully) { XBT_INFO("worker %d dying %s.", id, forcefully ? "forcefully" : "peacefully"); }); + bool rebooting = sg4::Actor::self()->get_restart_count() > 0; + + XBT_INFO("Worker %s", rebooting ? "rebooting" : "booting"); + if (not rebooting) // Starting for the first time + sg4::this_actor::on_exit( + [id](bool forcefully) { XBT_INFO("worker %d dying %s.", id, forcefully ? "forcefully" : "peacefully"); }); while (todo > 0) { xbt_assert(sg4::Engine::get_clock() < cfg_deadline, @@ -76,20 +83,16 @@ static void worker(int id) auto payload = mailbox->get_unique(10); xbt_assert(payload != nullptr, "mailbox->get() failed"); double comp_size = *payload; - if (comp_size < 0) { /* - Exit when -1.0 is received */ - XBT_INFO("I'm done. See you!"); - break; - } - /* - Otherwise, process the task */ + XBT_INFO("Start execution..."); sg4::this_actor::execute(comp_size); XBT_INFO("Execution complete."); todo--; } catch (const simgrid::TimeoutException&) { XBT_INFO("Timeouted while getting a task."); - } catch (const simgrid::NetworkFailureException&) { - XBT_INFO("Mmh. Something went wrong. Nevermind. Let's keep going!"); + XBT_INFO("Got a NetworkFailureException. Wait a second before starting again."); + sg4::this_actor::sleep_for(1); } } } @@ -98,29 +101,27 @@ int main(int argc, char* argv[]) { sg4::Engine e(&argc, argv); - XBT_INFO("host count: %d ", (int)cfg_host_count); - auto* rootzone = sg4::create_full_zone("root"); - sg4::Host* main; // First host created, where the master will stay std::vector worker_hosts; - for (int i = 0; i < cfg_host_count; i++) { - auto hostname = std::string("lilibeth ") + std::to_string(i); - auto* host = rootzone->create_host(hostname, 1e15); - if (i == 0) { - main = host; - } else { - sg4::LinkInRoute link(rootzone->create_link(hostname, "1MBps")->set_latency("24us")->seal()); - rootzone->add_route(main->get_netpoint(), host->get_netpoint(), nullptr, nullptr, {link}, true); - worker_hosts.push_back(host); - } + + xbt_assert(cfg_host_count > 2, "You need at least 2 workers (i.e., 3 hosts) or the master will be auto-killed when " + "the only worker gets killed."); + sg4::Host* master_host = rootzone->create_host("lilibeth 0", 1e9); // Host where the master will stay + for (int i = 1; i < cfg_host_count; i++) { + auto hostname = "lilibeth " + std::to_string(i); + auto* host = rootzone->create_host(hostname, 1e9); + sg4::LinkInRoute link(rootzone->create_link(hostname, "1MBps")->set_latency("24us")->seal()); + rootzone->add_route(master_host->get_netpoint(), host->get_netpoint(), nullptr, nullptr, {link}, true); + worker_hosts.push_back(host); } rootzone->seal(); - sg4::Engine::get_instance()->on_platform_created(); // FIXME this should not be necessary - sg4::Actor::create("master", main, master, 50000000, 1000000)->set_auto_restart(true); + sg4::Actor::create("master", master_host, master)->daemonize()->set_auto_restart(true); int id = 0; - for (auto* h : worker_hosts) - sg4::Actor::create("worker", h, worker, id++)->set_auto_restart(true); + for (auto* h : worker_hosts) { + sg4::Actor::create("worker", h, worker, id)->set_auto_restart(true); + id++; + } todo = cfg_task_count; xbt_assert(todo > 0, "Please give more than %d tasks to run", todo);