From: Martin Quinson Date: Sun, 26 Aug 2018 20:50:16 +0000 (+0200) Subject: MSG_process_sleep should intercept HostFailureException and report it accordingly X-Git-Tag: v3_21~156 X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/commitdiff_plain/0af2746ea094442ef04e3f0fab84710cdd43b50b?hp=0d4065e84bc5b13ae87790aba6294c7a21bc0199 MSG_process_sleep should intercept HostFailureException and report it accordingly Don't ask me how it could have worked before, but they were a C++ try/catch in teshsuite/msg/host_on_off_processes. In a MSG code!! --- diff --git a/src/msg/msg_gos.cpp b/src/msg/msg_gos.cpp index 65b78093c9..97781b9468 100644 --- a/src/msg/msg_gos.cpp +++ b/src/msg/msg_gos.cpp @@ -114,8 +114,9 @@ msg_error_t MSG_process_sleep(double nb_sec) try { simgrid::s4u::this_actor::sleep_for(nb_sec); - } - catch(xbt_ex& e) { + } catch (simgrid::HostFailureException& e) { + status = MSG_HOST_FAILURE; + } catch (xbt_ex& e) { if (e.category == cancel_error) { XBT_DEBUG("According to the JAVA API, a sleep call should only deal with HostFailureException, I'm lost."); // adsein: MSG_TASK_CANCELED is assigned when someone kills the process that made the sleep, this is not diff --git a/teshsuite/msg/host_on_off_processes/host_on_off_processes.cpp b/teshsuite/msg/host_on_off_processes/host_on_off_processes.cpp index 122a39a046..669bd11a6a 100644 --- a/teshsuite/msg/host_on_off_processes/host_on_off_processes.cpp +++ b/teshsuite/msg/host_on_off_processes/host_on_off_processes.cpp @@ -43,7 +43,11 @@ static int process_sleep(int /*argc*/, char** /*argv*/) { for (;;) { XBT_INFO(" I'm alive but I should sleep"); - MSG_process_sleep(10); + int res = MSG_process_sleep(10); + if (res == MSG_HOST_FAILURE) { + XBT_INFO("Host as died as expected, do nothing else"); + return 0; + } } XBT_INFO(" I'm done. See you!"); return 0; @@ -56,9 +60,8 @@ static int commTX(int /*argc*/, char** /*argv*/) msg_task_t task = MSG_task_create("COMM", 0, 100000000, NULL); MSG_task_dsend(task, mailbox, task_cleanup_handler); // We should wait a bit (if not the process will end before the communication, hence an exception on the other side). - try { - MSG_process_sleep(30); - } catch (simgrid::HostFailureException& e) { + int res = MSG_process_sleep(30); + if (res == MSG_HOST_FAILURE) { XBT_INFO("The host has died ... as expected."); } XBT_INFO(" TX done"); diff --git a/teshsuite/msg/host_on_off_processes/host_on_off_processes.tesh b/teshsuite/msg/host_on_off_processes/host_on_off_processes.tesh index f89a77e731..695428a090 100644 --- a/teshsuite/msg/host_on_off_processes/host_on_off_processes.tesh +++ b/teshsuite/msg/host_on_off_processes/host_on_off_processes.tesh @@ -8,7 +8,8 @@ $ ${bindir}/host_on_off_processes ${platfdir}/small_platform.xml 1 --log=no_loc > [Jupiter:process_daemon:(2) 2.000000] [msg_test/INFO] Execute daemon > [Tremblay:test_launcher:(1) 3.000000] [msg_test/INFO] Turn off Jupiter > [Jupiter:process_daemon:(2) 3.000000] [msg_test/INFO] Execute daemon -> [Tremblay:test_launcher:(1) 13.000000] [msg_test/INFO] Test 1 seems ok, cool !(#Processes: 1, it should be 1; #tasks: 3) +> [Jupiter:process_daemon:(2) 3.000000] [msg_test/INFO] Host as died as expected, do nothing else +> [Tremblay:test_launcher:(1) 13.000000] [msg_test/INFO] Test 1 seems ok, cool !(#Processes: 1, it should be 1; #tasks: 4) > [Tremblay:test_launcher:(1) 13.000000] [msg_test/INFO] Test done. See you! > [13.000000] [msg_test/INFO] Simulation time 13 @@ -39,6 +40,7 @@ $ ${bindir}/host_on_off_processes ${platfdir}/small_platform.xml 3 --log=no_loc > [Tremblay:test_launcher:(1) 100.000000] [msg_test/INFO] Turn off > [Jupiter:process_sleep:(2) 100.000000] [msg_test/INFO] I'm alive but I should sleep > [Tremblay:test_launcher:(1) 100.000000] [msg_test/INFO] sleep for 10 seconds +> [Jupiter:process_sleep:(2) 100.000000] [msg_test/INFO] Host as died as expected, do nothing else > [Tremblay:test_launcher:(1) 10100.000000] [msg_test/INFO] number of Process : 1 it should be 1 (i.e. the Test one)) > [Tremblay:test_launcher:(1) 10100.000000] [msg_test/INFO] Test done. See you! > [10100.000000] [msg_test/INFO] Simulation time 10100