From: Frederic Suter Date: Mon, 16 Jul 2018 14:53:18 +0000 (+0200) Subject: MSG to S4U conversion. X-Git-Tag: v3_21~421 X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/commitdiff_plain/7bb44227a9dadf58237d2cfda064b157f1e821b9 MSG to S4U conversion. This example is currently broken, hence not tested --- diff --git a/examples/s4u/CMakeLists.txt b/examples/s4u/CMakeLists.txt index 7b24fc1462..7d21680842 100644 --- a/examples/s4u/CMakeLists.txt +++ b/examples/s4u/CMakeLists.txt @@ -10,7 +10,7 @@ foreach (example actor-create actor-daemon actor-join actor-kill exec-async exec-basic exec-dvfs exec-monitor exec-ptask exec-remote io-file-system io-file-remote io-storage-raw mutex - platform-properties plugin-hostload + platform-failures platform-properties plugin-hostload replay-comm replay-storage routing-get-clusters trace-platform) diff --git a/examples/s4u/platform-failures/s4u-platform-failures.cpp b/examples/s4u/platform-failures/s4u-platform-failures.cpp new file mode 100644 index 0000000000..d0b33fde9b --- /dev/null +++ b/examples/s4u/platform-failures/s4u-platform-failures.cpp @@ -0,0 +1,133 @@ +/* Copyright (c) 2007-2018. The SimGrid Team. All rights reserved. */ + +/* This program is free software; you can redistribute it and/or modify it + * under the terms of the license (GNU LGPL) which comes with this package. */ + +#include "simgrid/s4u.hpp" +#include "xbt/ex.hpp" +#include "xbt/str.h" + +XBT_LOG_NEW_DEFAULT_CATEGORY(s4u_test, "Messages specific for this s4u example"); + +static int master(int argc, char* argv[]) +{ + simgrid::s4u::MailboxPtr mailbox; + long number_of_tasks = xbt_str_parse_int(argv[1], "Invalid amount of tasks: %s"); + double comp_size = xbt_str_parse_double(argv[2], "Invalid computational size: %s"); + double comm_size = xbt_str_parse_double(argv[3], "Invalid communication size: %s"); + long workers_count = xbt_str_parse_int(argv[4], "Invalid amount of workers: %s"); + + XBT_INFO("Got %ld workers and %ld tasks to process", workers_count, number_of_tasks); + + for (int i = 0; i < number_of_tasks; i++) { + mailbox = simgrid::s4u::Mailbox::by_name(std::string("worker-") + std::to_string(i % workers_count)); + double* payload = new double(comp_size); + try { + mailbox->put(payload, comm_size, 10.0); + XBT_INFO("Send completed"); + } catch (xbt_ex& e) { + switch (e.category) { + case host_error: + XBT_INFO("Gloups. The cpu on which I'm running just turned off!. See you!"); + return -1; + break; + case network_error: + XBT_INFO("Mmh. Something went wrong with '%s'. Nevermind. Let's keep going!", mailbox->get_cname()); + break; + case timeout_error: + XBT_INFO("Mmh. Got timeouted while speaking to '%s'. Nevermind. Let's keep going!", mailbox->get_cname()); + break; + default: + xbt_die("Unexpected behavior"); + } + delete payload; + } + } + + XBT_INFO("All tasks have been dispatched. Let's tell everybody the computation is over."); + for (int i = 0; i < workers_count; i++) { + /* - Eventually tell all the workers to stop by sending a "finalize" task */ + mailbox = simgrid::s4u::Mailbox::by_name(std::string("worker-") + std::to_string(i)); + double* payload = new double(-1.0); + try { + mailbox->put(payload, 0, 1.0); + } catch (xbt_ex& e) { + delete payload; + switch (e.category) { + case host_error: + XBT_INFO("Gloups. The cpu on which I'm running just turned off!. See you!"); + break; + case network_error: + XBT_INFO("Mmh. Something went wrong with '%s'. Nevermind. Let's keep going!", mailbox->get_cname()); + break; + case timeout_error: + XBT_INFO("Mmh. Got timeouted while speaking to '%s'. Nevermind. Let's keep going!", mailbox->get_cname()); + break; + default: + xbt_die("Unexpected behavior"); + } + } + } + + XBT_INFO("Goodbye now!"); + return 0; +} + +static int worker(int argc, char* argv[]) +{ + long id = xbt_str_parse_int(argv[1], "Invalid argument %s"); + simgrid::s4u::MailboxPtr mailbox = simgrid::s4u::Mailbox::by_name(std::string("worker-") + std::to_string(id)); + double* payload = nullptr; + double comp_size; + while (1) { + try { + payload = static_cast(mailbox->get()); + comp_size = *payload; + delete payload; + } catch (xbt_ex& e) { + switch (e.category) { + case host_error: + XBT_INFO("Gloups. The cpu on which I'm running just turned off!. See you!"); + return -1; + case network_error: + XBT_INFO("Mmh. Something went wrong. Nevermind. Let's keep going!"); + break; + default: + xbt_die("Unexpected behavior"); + } + } + xbt_assert(payload != nullptr, "mailbox->get() failed"); + if (comp_size < 0) { /* - Exit when -1.0 is received */ + XBT_INFO("I'm done. See you!"); + break; + } + /* - Otherwise, process the task */ + try { + simgrid::s4u::this_actor::execute(comp_size); + } catch (xbt_ex& e) { + switch (e.category) { + case host_error: + XBT_INFO("Gloups. The cpu on which I'm running just turned off!. See you!"); + return -1; + default: + xbt_die("Unexpected behavior"); + } + } + } + XBT_INFO("I'm done. See you!"); + return 0; +} + +int main(int argc, char* argv[]) +{ + simgrid::s4u::Engine e(&argc, argv); + e.load_platform(argv[1]); + e.register_function("master", master); + e.register_function("worker", worker); + e.load_deployment(argv[2]); + + e.run(); + + XBT_INFO("Simulation time %g", simgrid::s4u::Engine::get_clock()); + return 0; +} diff --git a/examples/s4u/platform-failures/s4u-platform-failures.tesh b/examples/s4u/platform-failures/s4u-platform-failures.tesh new file mode 100644 index 0000000000..5738a23318 --- /dev/null +++ b/examples/s4u/platform-failures/s4u-platform-failures.tesh @@ -0,0 +1,42 @@ +#!/usr/bin/env tesh + +p Testing a simple master/worker example application handling failures TCP crosstraffic DISABLED + +! output sort 19 +$ $SG_TEST_EXENV ${bindir:=.}/s4u-platform-failures$EXEEXT --log=xbt_cfg.thres:critical --log=no_loc ${platfdir}/small_platform_with_failures.xml ${bindir}/../app-masterworker/s4u-app-masterworker_d.xml --cfg=path:${srcdir} --cfg=network/crosstraffic:0 "--log=root.fmt:[%10.6r]%e(%i:%P@%h)%e%m%n" +> [ 0.000000] (0:maestro@) Cannot launch process 'worker' on failed host 'Fafard' +> [ 0.000000] (1:master@Tremblay) Got 5 workers and 20 tasks to process +> [ 0.010309] (1:master@Tremblay) Send completed +> [ 1.000000] (0:maestro@) Restart processes on host Fafard +> [ 1.000000] (1:master@Tremblay) Mmh. Something went wrong with 'worker-1'. Nevermind. Let's keep going! +> [ 1.000000] (3:worker@Jupiter) Gloups. The cpu on which I'm running just turned off!. See you! +> [ 2.000000] (0:maestro@) Restart processes on host Jupiter +> [ 11.000000] (1:master@Tremblay) Mmh. Got timeouted while speaking to 'worker-2'. Nevermind. Let's keep going! +> [ 12.030928] (1:master@Tremblay) Send completed +> [ 13.061856] (1:master@Tremblay) Send completed +> [ 13.072165] (1:master@Tremblay) Send completed +> [ 14.103093] (1:master@Tremblay) Send completed +> [ 24.103093] (1:master@Tremblay) Mmh. Got timeouted while speaking to 'worker-2'. Nevermind. Let's keep going! +> [ 24.103093] (1:master@Tremblay) Mmh. Something went wrong with 'worker-3'. Nevermind. Let's keep going! +> [ 24.103093] (4:worker@Ginette) Mmh. Something went wrong. Nevermind. Let's keep going! +> [ 25.134021] (1:master@Tremblay) Send completed +> [ 25.144330] (1:master@Tremblay) Send completed +> [ 26.175258] (1:master@Tremblay) Send completed +> [ 36.175258] (1:master@Tremblay) Mmh. Got timeouted while speaking to 'worker-2'. Nevermind. Let's keep going! +> [ 37.206186] (1:master@Tremblay) Send completed +> [ 37.206186] (1:master@Tremblay) Mmh. Something went wrong with 'worker-4'. Nevermind. Let's keep going! +> [ 37.206186] (5:worker@Bourassa) Mmh. Something went wrong. Nevermind. Let's keep going! +> [ 38.247423] (1:master@Tremblay) Send completed +> [ 48.247423] (1:master@Tremblay) Mmh. Got timeouted while speaking to 'worker-2'. Nevermind. Let's keep going! +> [ 49.278351] (1:master@Tremblay) Send completed +> [ 50.000000] (4:worker@Ginette) Gloups. The cpu on which I'm running just turned off!. See you! +> [ 50.309278] (1:master@Tremblay) Send completed +> [ 50.309278] (1:master@Tremblay) All tasks have been dispatched. Let's tell everybody the computation is over. +> [ 50.309278] (2:worker@Tremblay) I'm done. See you! +> [ 50.309278] (6:worker@Jupiter) I'm done. See you! +> [ 51.309278] (1:master@Tremblay) Mmh. Got timeouted while speaking to 'worker-2'. Nevermind. Let's keep going! +> [ 52.309278] (0:maestro@) Simulation time 52.3093 +> [ 52.309278] (1:master@Tremblay) Mmh. Got timeouted while speaking to 'worker-3'. Nevermind. Let's keep going! +> [ 52.309278] (1:master@Tremblay) Goodbye now! +> [ 52.309278] (5:worker@Bourassa) I'm done. See you! +