From e3b4914d04bbe5d9e79ea8e8cf00d6ad9e7824d1 Mon Sep 17 00:00:00 2001 From: Frederic Suter Date: Sun, 5 Aug 2018 20:25:24 +0200 Subject: [PATCH] Warn processes killed because their host was turned off. handle the consequence in old msg tests create a annoying leak for: teshsuite/msg/host_on_off_processes/host_on_off_processes.cpp --- src/simix/ActorImpl.cpp | 4 +++- teshsuite/msg/host_on_off/host_on_off.c | 4 ++++ .../msg/host_on_off_processes/host_on_off_processes.cpp | 8 +++++--- .../msg/host_on_off_processes/host_on_off_processes.tesh | 3 +++ teshsuite/msg/host_on_off_recv/host_on_off_recv.c | 5 ++++- 5 files changed, 19 insertions(+), 5 deletions(-) diff --git a/src/simix/ActorImpl.cpp b/src/simix/ActorImpl.cpp index c27e6d0386..7ff3367903 100644 --- a/src/simix/ActorImpl.cpp +++ b/src/simix/ActorImpl.cpp @@ -450,7 +450,9 @@ void SIMIX_process_kill(smx_actor_t process, smx_actor_t issuer) { /* destroy the blocking synchro if any */ if (process->waiting_synchro != nullptr) { - + if (process->host_->is_off()) { + SMX_EXCEPTION(process, host_error, 0, "Host failed"); + } simgrid::kernel::activity::ExecImplPtr exec = boost::dynamic_pointer_cast(process->waiting_synchro); simgrid::kernel::activity::CommImplPtr comm = diff --git a/teshsuite/msg/host_on_off/host_on_off.c b/teshsuite/msg/host_on_off/host_on_off.c index 8c34453a53..9922503d12 100644 --- a/teshsuite/msg/host_on_off/host_on_off.c +++ b/teshsuite/msg/host_on_off/host_on_off.c @@ -17,6 +17,10 @@ static int slave(int argc, char *argv[]) while (1) { res = MSG_task_receive(&(task), mailbox); + if (res == MSG_HOST_FAILURE) { + XBT_DEBUG("The host has been turned off, this was expected"); + return 1; + } xbt_assert(res == MSG_OK, "MSG_task_get failed"); if (!strcmp(MSG_task_get_name(task), "finalize")) { diff --git a/teshsuite/msg/host_on_off_processes/host_on_off_processes.cpp b/teshsuite/msg/host_on_off_processes/host_on_off_processes.cpp index f84aaed537..8f0b5b2d36 100644 --- a/teshsuite/msg/host_on_off_processes/host_on_off_processes.cpp +++ b/teshsuite/msg/host_on_off_processes/host_on_off_processes.cpp @@ -27,10 +27,13 @@ static int process_daemon(int /*argc*/, char** /*argv*/) msg_task_t task = MSG_task_create("daemon", MSG_host_get_speed(MSG_host_self()), 0, NULL); MSG_process_set_data(self, task); XBT_INFO(" Execute daemon"); - MSG_task_execute(task); - MSG_process_set_data(self, NULL); + msg_error_t res = MSG_task_execute(task); MSG_task_destroy(task); tasks_done++; + if (res == MSG_HOST_FAILURE) { + XBT_INFO("Host as died as expected, do nothing else"); + return 0; + } } XBT_INFO(" daemon done. See you!"); return 0; @@ -247,7 +250,6 @@ int main(int argc, char* argv[]) MSG_create_environment(argv[1]); - MSG_process_set_data_cleanup(task_cleanup_handler); MSG_process_create("test_launcher", test_launcher, NULL, MSG_get_host_by_name("Tremblay")); res = MSG_main(); diff --git a/teshsuite/msg/host_on_off_processes/host_on_off_processes.tesh b/teshsuite/msg/host_on_off_processes/host_on_off_processes.tesh index ba402ab5a0..f89a77e731 100644 --- a/teshsuite/msg/host_on_off_processes/host_on_off_processes.tesh +++ b/teshsuite/msg/host_on_off_processes/host_on_off_processes.tesh @@ -65,6 +65,8 @@ $ ${bindir}/host_on_off_processes ${platfdir}/small_platform.xml 5 --log=no_loc > [Tremblay:test_launcher:(1) 20.000000] [msg_test/INFO] Turn Jupiter off > [Tremblay:test_launcher:(1) 20.000000] [msg_test/INFO] Test 5 seems ok (number of Process: 2, it should be 2) > [Tremblay:test_launcher:(1) 20.000000] [msg_test/INFO] Test done. See you! +> [Jupiter:commRX:(2) 20.000000] [msg_test/INFO] Receive message: HOST_FAILURE +> [Jupiter:commRX:(2) 20.000000] [msg_test/INFO] RX Done > [Tremblay:commTX:(3) 40.000000] [msg_test/INFO] TX done > [40.000000] [msg_test/INFO] Simulation time 40 @@ -85,6 +87,7 @@ $ ${bindir}/host_on_off_processes ${platfdir}/small_platform.xml 6 --log=no_loc > [Jupiter:process_daemonJUPI:(3) 9.000011] [msg_test/INFO] Execute daemon > [Tremblay:test_launcher:(1) 10.000000] [msg_test/INFO] Turn Jupiter off > [Tremblay:test_launcher:(1) 10.000000] [msg_test/INFO] Shutdown vm0 +> [Jupiter:process_daemonJUPI:(3) 10.000000] [msg_test/INFO] Host as died as expected, do nothing else > [Tremblay:test_launcher:(1) 10.000000] [msg_test/INFO] Destroy vm0 > [Tremblay:test_launcher:(1) 10.000000] [msg_test/INFO] Test 6 is also weird: when the node Jupiter is turned off once again, the VM and its daemon are not killed. However, the issue regarding the shutdown of hosted VMs can be seen a feature not a bug ;) > [Tremblay:test_launcher:(1) 10.000000] [msg_test/INFO] Test done. See you! diff --git a/teshsuite/msg/host_on_off_recv/host_on_off_recv.c b/teshsuite/msg/host_on_off_recv/host_on_off_recv.c index 97363f8ab2..9755c8d3f6 100644 --- a/teshsuite/msg/host_on_off_recv/host_on_off_recv.c +++ b/teshsuite/msg/host_on_off_recv/host_on_off_recv.c @@ -41,7 +41,10 @@ static int slave(int argc, char *argv[]) msg_task_t task = NULL; msg_error_t error = MSG_task_receive(&(task), mailbox); if (error) { - XBT_ERROR("Error while receiving message"); + if (error != MSG_HOST_FAILURE) + XBT_ERROR("Error while receiving message"); + else + XBT_DEBUG("The host has been turned off, this was expected"); return 1; } -- 2.20.1