From: Frederic Suter Date: Fri, 13 Jul 2018 09:44:49 +0000 (+0200) Subject: Merge branch 'master' of git+ssh://scm.gforge.inria.fr//gitroot/simgrid/simgrid X-Git-Tag: v3_21~471 X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/commitdiff_plain/1b4f0ecf22879a1152f7b5fbcbf7bcdb416d21da?hp=331665bd7365d35d9cb6eeb8e85e6536c4a001a0 Merge branch 'master' of git+ssh://scm.gforge.inria.fr//gitroot/simgrid/simgrid --- diff --git a/src/kernel/activity/SleepImpl.cpp b/src/kernel/activity/SleepImpl.cpp index 041440eeda..b02cda83d0 100644 --- a/src/kernel/activity/SleepImpl.cpp +++ b/src/kernel/activity/SleepImpl.cpp @@ -11,7 +11,9 @@ #include "src/simix/ActorImpl.hpp" #include "src/simix/popping_private.hpp" +#include "src/simix/smx_private.hpp" #include "src/surf/surf_interface.hpp" +#include "xbt/ex.hpp" XBT_LOG_EXTERNAL_DEFAULT_CATEGORY(simix_process); @@ -30,12 +32,18 @@ void simgrid::kernel::activity::SleepImpl::post() while (not simcalls_.empty()) { smx_simcall_t simcall = simcalls_.front(); simcalls_.pop_front(); - e_smx_state_t result; + if (host && host->is_off()) { + /* If the host running the synchro failed, notice it. This way, the asking + * actor can be killed if it runs on that host itself */ + result = SIMIX_SRC_HOST_FAILURE; + SMX_EXCEPTION(simcall->issuer, host_error, 0, "Host failed"); + } + switch (surf_sleep->get_state()) { case simgrid::kernel::resource::Action::State::FAILED: simcall->issuer->context_->iwannadie = 1; - result = SIMIX_SRC_HOST_FAILURE; + result = SIMIX_FAILED; break; case simgrid::kernel::resource::Action::State::FINISHED: diff --git a/src/simix/ActorImpl.cpp b/src/simix/ActorImpl.cpp index e28bbc2100..dfb2f05ab3 100644 --- a/src/simix/ActorImpl.cpp +++ b/src/simix/ActorImpl.cpp @@ -476,8 +476,9 @@ void SIMIX_process_kill(smx_actor_t process, smx_actor_t issuer) { if (i != process->waiting_synchro->simcalls_.end()) process->waiting_synchro->simcalls_.remove(&process->simcall); } else if (sleep != nullptr) { - SIMIX_process_sleep_destroy(process->waiting_synchro); - + if (sleep->surf_sleep) + sleep->surf_sleep->cancel(); + sleep->post(); } else if (raw != nullptr) { SIMIX_synchro_stop_waiting(process, &process->simcall); diff --git a/teshsuite/msg/CMakeLists.txt b/teshsuite/msg/CMakeLists.txt index 1cc651102d..0fe97502d0 100644 --- a/teshsuite/msg/CMakeLists.txt +++ b/teshsuite/msg/CMakeLists.txt @@ -2,7 +2,7 @@ foreach(x app-pingpong app-token-ring async-wait async-waitall async-waitany cloud-capping cloud-migration cloud-sharing cloud-two-tasks cloud-simple - get_sender host_on_off host_on_off_recv host_on_off_processes + get_sender host_on_off host_on_off_recv process-daemon process-kill process-join process-lifetime process-migration process-suspend process-yield energy-consumption energy-ptask energy-pstate platform-properties io-file io-raw-storage io-file-remote @@ -18,7 +18,7 @@ foreach(x app-pingpong app-token-ring endforeach() # CPP examples -foreach(x task_destroy_cancel task_listen_from task_progress) +foreach(x task_destroy_cancel task_listen_from task_progress host_on_off_processes) add_executable (${x} ${x}/${x}.cpp) target_link_libraries(${x} simgrid) set_target_properties(${x} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/${x}) diff --git a/teshsuite/msg/host_on_off_processes/host_on_off_processes.c b/teshsuite/msg/host_on_off_processes/host_on_off_processes.cpp similarity index 74% rename from teshsuite/msg/host_on_off_processes/host_on_off_processes.c rename to teshsuite/msg/host_on_off_processes/host_on_off_processes.cpp index a13d6b4c8c..c2ec3201ad 100644 --- a/teshsuite/msg/host_on_off_processes/host_on_off_processes.c +++ b/teshsuite/msg/host_on_off_processes/host_on_off_processes.cpp @@ -4,6 +4,7 @@ * under the terms of the license (GNU LGPL) which comes with this package. */ #include "simgrid/msg.h" +#include #include /* sscanf */ @@ -12,32 +13,32 @@ XBT_LOG_NEW_DEFAULT_CATEGORY(msg_test, "Messages specific for this msg example") xbt_dynar_t tests; int tasks_done = 0; -static void task_cleanup_handler(void *task) +static void task_cleanup_handler(void* task) { if (task) - MSG_task_destroy(task); + MSG_task_destroy(static_cast(task)); } -static int process_daemon(int argc, char *argv[]) +static int process_daemon(int argc, char* argv[]) { msg_process_t self = MSG_process_self(); XBT_INFO(" Start daemon on %s (%f)", MSG_host_get_name(MSG_host_self()), MSG_host_get_speed(MSG_host_self())); - for(;;){ + for (;;) { msg_task_t task = MSG_task_create("daemon", MSG_host_get_speed(MSG_host_self()), 0, NULL); MSG_process_set_data(self, task); XBT_INFO(" Execute daemon"); MSG_task_execute(task); MSG_process_set_data(self, NULL); MSG_task_destroy(task); - tasks_done ++; + tasks_done++; } XBT_INFO(" daemon done. See you!"); return 0; } -static int process_sleep(int argc, char *argv[]) +static int process_sleep(int argc, char* argv[]) { - for(;;){ + for (;;) { XBT_INFO(" I'm alive but I should sleep"); MSG_process_sleep(10); } @@ -45,30 +46,39 @@ static int process_sleep(int argc, char *argv[]) return 0; } -static int commTX(int argc, char *argv[]) +static int commTX(int argc, char* argv[]) { - const char * mailbox = "comm"; + const char* mailbox = "comm"; XBT_INFO(" Start TX"); msg_task_t task = MSG_task_create("COMM", 0, 100000000, NULL); MSG_task_dsend(task, mailbox, task_cleanup_handler); // We should wait a bit (if not the process will end before the communication, hence an exception on the other side). - MSG_process_sleep(30); + try { + MSG_process_sleep(30); + } catch (xbt_ex& e) { + if (e.category == host_error) { + XBT_INFO("The host has died ... as expected."); + } else { + XBT_ERROR("An unexpected exception has been raised."); + throw; + } + } XBT_INFO(" TX done"); return 0; } -static int commRX(int argc, char *argv[]) +static int commRX(int argc, char* argv[]) { - msg_task_t task = NULL; - const char * mailbox = "comm"; + msg_task_t task = NULL; + const char* mailbox = "comm"; XBT_INFO(" Start RX"); msg_error_t error = MSG_task_receive(&(task), mailbox); - if (error==MSG_OK) { + if (error == MSG_OK) { XBT_INFO(" Receive message: %s", task->name); MSG_task_destroy(task); - } else if (error==MSG_HOST_FAILURE) { + } else if (error == MSG_HOST_FAILURE) { XBT_INFO(" Receive message: HOST_FAILURE"); - } else if (error==MSG_TRANSFER_FAILURE) { + } else if (error == MSG_TRANSFER_FAILURE) { XBT_INFO(" Receive message: TRANSFERT_FAILURE"); } else { XBT_INFO(" Receive message: %u", error); @@ -77,36 +87,37 @@ static int commRX(int argc, char *argv[]) return 0; } -static int test_launcher(int argc, char *argv[]) +static int test_launcher(int argc, char* argv[]) { int test = 0; - char **argvF; + char** argvF; msg_host_t jupiter = MSG_host_by_name("Jupiter"); test = 1; // Create a process running a simple task on a host and turn the host off during the execution of the process. - if (xbt_dynar_search_or_negative(tests, &test)!=-1){ + if (xbt_dynar_search_or_negative(tests, &test) != -1) { XBT_INFO("Test 1:"); XBT_INFO(" Create a process on Jupiter"); - argvF = xbt_new(char*, 2); + argvF = xbt_new(char*, 2); argvF[0] = xbt_strdup("process_daemon"); MSG_process_create_with_arguments("process_daemon", process_daemon, NULL, jupiter, 1, argvF); MSG_process_sleep(3); XBT_INFO(" Turn off Jupiter"); MSG_host_off(jupiter); MSG_process_sleep(10); - XBT_INFO("Test 1 seems ok, cool !(#Processes: %d, it should be 1; #tasks: %d)", MSG_process_get_number(), tasks_done); + XBT_INFO("Test 1 seems ok, cool !(#Processes: %d, it should be 1; #tasks: %d)", MSG_process_get_number(), + tasks_done); } test = 2; // Create a process that on a host that is turned off (this should not be possible) - if (xbt_dynar_search_or_negative(tests, &test)!=-1){ + if (xbt_dynar_search_or_negative(tests, &test) != -1) { XBT_INFO("Test 2:"); XBT_INFO(" Turn off Jupiter"); // adsein: Jupiter is already, hence nothing should happen // adsein: This can be one additional test, to check that you cannot shutdown twice a host MSG_host_off(jupiter); - argvF = xbt_new(char*, 2); + argvF = xbt_new(char*, 2); argvF[0] = xbt_strdup("process_daemon"); MSG_process_create_with_arguments("process_daemon", process_daemon, NULL, jupiter, 1, argvF); MSG_process_sleep(10); @@ -119,15 +130,16 @@ static int test_launcher(int argc, char *argv[]) MSG_process_sleep(10); XBT_INFO("number of Process : %d it should be 1. The daemon that has been created for test2 has been correctly " "destroyed....ok at least it looks rigorous, cool ! You just have to disallow the possibility to create " - "a new process on a node when the node is off.)", MSG_process_get_number()); + "a new process on a node when the node is off.)", + MSG_process_get_number()); } - test = 3; + test = 3; // Create a process running sucessive sleeps on a host and turn the host off during the execution of the process. - if (xbt_dynar_search_or_negative(tests, &test)!=-1){ + if (xbt_dynar_search_or_negative(tests, &test) != -1) { XBT_INFO("Test 3:"); MSG_host_on(jupiter); - argvF = xbt_new(char*, 2); + argvF = xbt_new(char*, 2); argvF[0] = xbt_strdup("process_sleep"); MSG_process_create_with_arguments("process_sleep", process_sleep, NULL, jupiter, 1, argvF); MSG_process_sleep(100); @@ -139,35 +151,36 @@ static int test_launcher(int argc, char *argv[]) } test = 4; - if (xbt_dynar_search_or_negative(tests, &test)!=-1){ + if (xbt_dynar_search_or_negative(tests, &test) != -1) { XBT_INFO("Test 4 (turn off src during a communication) : Create a Process/task to make a communication between " "Jupiter and Tremblay and turn off Jupiter during the communication"); MSG_host_on(jupiter); MSG_process_sleep(10); - argvF = xbt_new(char*, 2); + argvF = xbt_new(char*, 2); argvF[0] = xbt_strdup("commRX"); MSG_process_create_with_arguments("commRX", commRX, NULL, MSG_host_by_name("Tremblay"), 1, argvF); - argvF = xbt_new(char*, 2); + argvF = xbt_new(char*, 2); argvF[0] = xbt_strdup("commTX"); MSG_process_create_with_arguments("commTX", commTX, NULL, jupiter, 1, argvF); XBT_INFO(" number of processes: %d", MSG_process_get_number()); MSG_process_sleep(10); XBT_INFO(" Turn Jupiter off"); MSG_host_off(jupiter); - XBT_INFO("Test 4 seems ok (number of Process : %d, it should be 1 or 2 if RX has not been satisfied) cool, you " - "can now turn off a node that has a process paused by a sleep call", MSG_process_get_number()); + XBT_INFO("Test 4 is ok. (number of Process : %d, it should be 1 or 2 if RX has not been satisfied)." + " An exception is raised when we turn off a node that has a process sleeping", + MSG_process_get_number()); } test = 5; - if (xbt_dynar_search_or_negative(tests, &test)!=-1){ + if (xbt_dynar_search_or_negative(tests, &test) != -1) { XBT_INFO("Test 5 (turn off dest during a communication : Create a Process/task to make a communication between " "Tremblay and Jupiter and turn off Jupiter during the communication"); MSG_host_on(jupiter); MSG_process_sleep(10); - argvF = xbt_new(char*, 2); + argvF = xbt_new(char*, 2); argvF[0] = xbt_strdup("commRX"); MSG_process_create_with_arguments("commRX", commRX, NULL, jupiter, 1, argvF); - argvF = xbt_new(char*, 2); + argvF = xbt_new(char*, 2); argvF[0] = xbt_strdup("commTX"); MSG_process_create_with_arguments("commTX", commTX, NULL, MSG_host_by_name("Tremblay"), 1, argvF); XBT_INFO(" number of processes: %d", MSG_process_get_number()); @@ -177,20 +190,20 @@ static int test_launcher(int argc, char *argv[]) XBT_INFO("Test 5 seems ok (number of Process: %d, it should be 2)", MSG_process_get_number()); } - test =6; - if (xbt_dynar_search_or_negative(tests, &test)!=-1){ + test = 6; + if (xbt_dynar_search_or_negative(tests, &test) != -1) { XBT_INFO("Test 6: Turn on Jupiter, assign a VM on Jupiter, launch a process inside the VM, and turn off the node"); // Create VM0 msg_vm_t vm0 = MSG_vm_create_core(jupiter, "vm0"); MSG_vm_start(vm0); - argvF = xbt_new(char*, 2); + argvF = xbt_new(char*, 2); argvF[0] = xbt_strdup("process_daemon"); msg_process_t daemon = MSG_process_create_with_arguments("process_daemon", process_daemon, NULL, (msg_host_t)vm0, 1, argvF); - argvF = xbt_new(char*, 2); + argvF = xbt_new(char*, 2); argvF[0] = xbt_strdup("process_daemonJUPI"); MSG_process_create_with_arguments("process_daemonJUPI", process_daemon, NULL, jupiter, 1, argvF); @@ -214,21 +227,21 @@ static int test_launcher(int argc, char *argv[]) return 0; } -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { msg_error_t res; MSG_init(&argc, argv); - xbt_assert(argc == 3,"Usage: %s platform_file test_number\n\tExample: %s msg_platform.xml 1\n", argv[0], argv[0]); + xbt_assert(argc == 3, "Usage: %s platform_file test_number\n\tExample: %s msg_platform.xml 1\n", argv[0], argv[0]); unsigned int iter; - char *groups; + char* groups; xbt_dynar_t s_tests = xbt_str_split(argv[2], ","); - int tmp_test = 0; - tests = xbt_dynar_new(sizeof(int), NULL); - xbt_dynar_foreach(s_tests, iter, groups) { - sscanf(xbt_dynar_get_as(s_tests, iter, char *), "%d", &tmp_test); - xbt_dynar_set_as(tests, iter, int, tmp_test); + int tmp_test = 0; + tests = xbt_dynar_new(sizeof(int), NULL); + xbt_dynar_foreach (s_tests, iter, groups) { + sscanf(xbt_dynar_get_as(s_tests, iter, char*), "%d", &tmp_test); + xbt_dynar_set_as(tests, iter, int, tmp_test); } xbt_dynar_free(&s_tests); diff --git a/teshsuite/msg/host_on_off_processes/host_on_off_processes.tesh b/teshsuite/msg/host_on_off_processes/host_on_off_processes.tesh index 3f98361023..3173660bdf 100644 --- a/teshsuite/msg/host_on_off_processes/host_on_off_processes.tesh +++ b/teshsuite/msg/host_on_off_processes/host_on_off_processes.tesh @@ -49,8 +49,10 @@ $ ${bindir}/host_on_off_processes ${platfdir}/small_platform.xml 4 --log=no_loc > [Jupiter:commTX:(3) 10.000000] [msg_test/INFO] Start TX > [Tremblay:test_launcher:(1) 10.000000] [msg_test/INFO] number of processes: 3 > [Tremblay:test_launcher:(1) 20.000000] [msg_test/INFO] Turn Jupiter off -> [Tremblay:test_launcher:(1) 20.000000] [msg_test/INFO] Test 4 seems ok (number of Process : 2, it should be 1 or 2 if RX has not been satisfied) cool, you can now turn off a node that has a process paused by a sleep call +> [Tremblay:test_launcher:(1) 20.000000] [msg_test/INFO] Test 4 is ok. (number of Process : 2, it should be 1 or 2 if RX has not been satisfied). An exception is raised when we turn off a node that has a process sleeping > [Tremblay:test_launcher:(1) 20.000000] [msg_test/INFO] Test done. See you! +> [Jupiter:commTX:(3) 20.000000] [msg_test/INFO] The host has died ... as expected. +> [Jupiter:commTX:(3) 20.000000] [msg_test/INFO] TX done > [Tremblay:commRX:(2) 20.000000] [msg_test/INFO] Receive message: TRANSFERT_FAILURE > [Tremblay:commRX:(2) 20.000000] [msg_test/INFO] RX Done > [20.000000] [msg_test/INFO] Simulation time 20 diff --git a/teshsuite/s4u/host_on_off_wait/host_on_off_wait.cpp b/teshsuite/s4u/host_on_off_wait/host_on_off_wait.cpp index 280536ee27..0ee7ad17b5 100644 --- a/teshsuite/s4u/host_on_off_wait/host_on_off_wait.cpp +++ b/teshsuite/s4u/host_on_off_wait/host_on_off_wait.cpp @@ -4,6 +4,7 @@ * under the terms of the license (GNU LGPL) which comes with this package. */ #include "simgrid/s4u.hpp" +#include XBT_LOG_NEW_DEFAULT_CATEGORY(s4u_test, "Messages specific for this s4u example"); @@ -21,9 +22,16 @@ static void master() static void worker() { XBT_INFO("Worker waiting"); - // TODO, This should really be MSG_HOST_FAILURE - simgrid::s4u::this_actor::sleep_for(5); - XBT_ERROR("Worker should be off already."); + try { + simgrid::s4u::this_actor::sleep_for(5); + } catch (xbt_ex& e) { + if (e.category == host_error) { + XBT_INFO("The host has died ... as expected."); + } else { + XBT_ERROR("An unexpected exception has been raised."); + throw; + } + } } int main(int argc, char* argv[]) diff --git a/teshsuite/s4u/host_on_off_wait/host_on_off_wait.tesh b/teshsuite/s4u/host_on_off_wait/host_on_off_wait.tesh index 4a13add852..d82c303853 100644 --- a/teshsuite/s4u/host_on_off_wait/host_on_off_wait.tesh +++ b/teshsuite/s4u/host_on_off_wait/host_on_off_wait.tesh @@ -3,4 +3,5 @@ $ ./host_on_off_wait ${platfdir}/small_platform.xml > [Jupiter:worker:(2) 0.000000] [s4u_test/INFO] Worker waiting > [Tremblay:master:(1) 1.000000] [s4u_test/INFO] Turning off the worker host > [Tremblay:master:(1) 1.000000] [s4u_test/INFO] Master has finished +> [Jupiter:worker:(2) 1.000000] [s4u_test/INFO] The host has died ... as expected. > [1.000000] [s4u_test/INFO] Simulation time 1