From: Martin Quinson Date: Wed, 2 Mar 2022 22:28:18 +0000 (+0100) Subject: Python version of the masterworkers monkey X-Git-Tag: v3.31~245 X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/commitdiff_plain/16678c8c9b1395b7b2a65ff845fbcdda775b3f44 Python version of the masterworkers monkey --- diff --git a/src/bindings/python/simgrid_python.cpp b/src/bindings/python/simgrid_python.cpp index 206f1d0c6f..dbffb1fcee 100644 --- a/src/bindings/python/simgrid_python.cpp +++ b/src/bindings/python/simgrid_python.cpp @@ -155,8 +155,9 @@ PYBIND11_MODULE(simgrid, m) .def_static("get_clock", []() // XBT_ATTRIB_DEPRECATED_v334 { - PyErr_WarnEx(PyExc_DeprecationWarning, - "get_clock() is deprecated and will be dropped after v3.33, use clock instead.", 1); + PyErr_WarnEx( + PyExc_DeprecationWarning, + "get_clock() is deprecated and will be dropped after v3.33, use `Engine.clock` instead.", 1); return Engine::get_clock(); }) .def_property_readonly_static( diff --git a/teshsuite/s4u/CMakeLists.txt b/teshsuite/s4u/CMakeLists.txt index 86987b7ba8..270d6f0b78 100644 --- a/teshsuite/s4u/CMakeLists.txt +++ b/teshsuite/s4u/CMakeLists.txt @@ -49,12 +49,38 @@ foreach(x basic-link-test basic-parsing-test host-on-off host-on-off-actors host monkey-masterworkers pid storage_client_server trace-integration seal-platform issue71) set(tesh_files ${tesh_files} ${CMAKE_CURRENT_SOURCE_DIR}/${x}/${x}.tesh) - ADD_TESH(tesh-s4u-${x} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/s4u/${x} --setenv srcdir=${CMAKE_HOME_DIRECTORY}/teshsuite/s4u/${x} --setenv rootdir=${CMAKE_HOME_DIRECTORY} --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --cd ${CMAKE_BINARY_DIR}/teshsuite/s4u/${x} ${CMAKE_HOME_DIRECTORY}/teshsuite/s4u/${x}/${x}.tesh) + ADD_TESH(tesh-s4u-${x} + --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/s4u/${x} + --setenv srcdir=${CMAKE_CURRENT_SOURCE_DIR}/${x} + --setenv rootdir=${CMAKE_HOME_DIRECTORY} + --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms + --cd ${CMAKE_BINARY_DIR}/teshsuite/s4u/${x} + ${CMAKE_HOME_DIRECTORY}/teshsuite/s4u/${x}/${x}.tesh) endforeach() +# Python tesh tests +foreach(x monkey-masterworkers) + if(enable_python) + ADD_TESH(tesh-python-${x} + --setenv srcdir=${CMAKE_CURRENT_SOURCE_DIR}/${x} + --setenv pythoncmd=${PYTHON_EXECUTABLE} + --setenv LD_LIBRARY_PATH=${TESH_LIBRARY_PATH} + --setenv PYTHONPATH=${CMAKE_BINARY_DIR}/lib + --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms + --cd ${CMAKE_BINARY_DIR}/teshsuite/s4u/${x} + ${CMAKE_HOME_DIRECTORY}/teshsuite/s4u/${x}/${x}.py.tesh) + + endif() +endforeach() + + # Monkey tests are launched directly, not with tesh foreach(x monkey-masterworkers) ADD_TEST(monkey-s4u-${x} "${PYTHON_EXECUTABLE}" ${CMAKE_HOME_DIRECTORY}/tools/simgrid-monkey ${CMAKE_BINARY_DIR}/teshsuite/s4u/${x}/${x}) + if(enable_python) + ADD_TEST(monkey-python-${x} "${PYTHON_EXECUTABLE}" ${CMAKE_HOME_DIRECTORY}/tools/simgrid-monkey "${PYTHON_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/${x}/${x}.py) + set_tests_properties(monkey-python-${x} PROPERTIES ENVIRONMENT "PYTHONPATH=${CMAKE_BINARY_DIR}/lib") + endif() endforeach() diff --git a/teshsuite/s4u/monkey-masterworkers/monkey-masterworkers.cpp b/teshsuite/s4u/monkey-masterworkers/monkey-masterworkers.cpp index 8e14acd194..aef2566218 100644 --- a/teshsuite/s4u/monkey-masterworkers/monkey-masterworkers.cpp +++ b/teshsuite/s4u/monkey-masterworkers/monkey-masterworkers.cpp @@ -96,8 +96,6 @@ int main(int argc, char* argv[]) { sg4::Engine e(&argc, argv); - XBT_INFO("host count: %d ", (int)cfg_host_count); - auto* rootzone = sg4::create_full_zone("root"); sg4::Host* main; // First host created, where the master will stay std::vector worker_hosts; diff --git a/teshsuite/s4u/monkey-masterworkers/monkey-masterworkers.py b/teshsuite/s4u/monkey-masterworkers/monkey-masterworkers.py new file mode 100644 index 0000000000..e8605f9bce --- /dev/null +++ b/teshsuite/s4u/monkey-masterworkers/monkey-masterworkers.py @@ -0,0 +1,102 @@ +# Copyright (c) 2007-2022. The SimGrid Team. All rights reserved. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the license (GNU LGPL) which comes with this package. + +""" + This is a version of the masterworkers that (hopefully) survives to the chaos monkey. + It tests synchronous send/receive as well as synchronous computations. + + It is not written to be pleasant to read, but instead to resist the aggressions of the monkey: + - Workers keep going until after a global variable `todo` reaches 0. + - The master is a daemon that just sends infinitely tasks + (simgrid simulations stop as soon as all non-daemon actors are done). + - The platform is created programmatically to remove path issues and control the problem size. + + See the simgrid-monkey script for more information. + + Inline configuration items: + - host-count: how many actors to start (including the master + - task-count: initial value of the `todo` global + - deadline: time at which the simulation is known to be failed (to detect infinite loops). + +""" + +# Configuration items: +host_count = 3 # Host count (master on one, workers on the others) +task_count = 1 # Amount of tasks that must be executed to succeed +deadline = 120 # When to fail the simulation (infinite loop detection) +# End of configuration + +import sys +from simgrid import Actor, Engine, Host, this_actor, Mailbox, NetZone, LinkInRoute, TimeoutException, NetworkFailureException + +todo = task_count # remaining amount of tasks to execute, a global variable + +def master(): + comp_size = int(1e6) + comm_size = int(1e6) + this_actor.info("Master booting") + Actor.self().daemonize() + this_actor.on_exit(lambda killed: this_actor.info("Master dying forcefully." if killed else "Master dying peacefully.")) + + while True: # This is a daemon + assert Engine.clock < deadline, f"Failed to run all tasks in less than {deadline} seconds. Is this an infinite loop?" + + try: + this_actor.info("Try to send a message") + mailbox.put(comp_size, comm_size, 10.) + except TimeoutException: + this_actor.info("Timeouted while sending a task") + except NetworkFailureException: + this_actor.info("Got a NetworkFailureException. Wait a second before starting again.") + this_actor.sleep_for(1.) + + assert False, "The impossible just happened (yet again): daemons shall not finish." + +def worker(id): + global todo + this_actor.info(f"Worker {id} booting") + this_actor.on_exit(lambda killed: this_actor.info(f"Worker {id} dying {'forcefully' if killed else 'peacefully'}.")) + + while todo > 0: + assert Engine.clock < deadline, f"Failed to run all tasks in less than {deadline} seconds. Is this an infinite loop?" + + try: + this_actor.info(f"Waiting a message on mailbox") + compute_cost = mailbox.get() + + this_actor.info("Start execution...") + this_actor.execute(compute_cost) + todo = todo - 1 + this_actor.info(f"Execution complete. Still {todo} to go.") + + except NetworkFailureException: + this_actor.info("Got a NetworkFailureException. Wait a second before starting again.") + this_actor.sleep_for(1.) + except TimeoutException: + this_actor.info("Timeouted while getting a task.") + +if __name__ == '__main__': + global mailbox + e = Engine(sys.argv) + + assert host_count > 2, "You need at least 2 workers (i.e., 3 hosts) or the master will be auto-killed when the only worker gets killed." + assert todo > 0, "Please give some tasks to do to the workers." + + mailbox = Mailbox.by_name("mailbox") + + rootzone = NetZone.create_full_zone("Zone1") + main = rootzone.create_host("lilibeth 0", 1e9) + Actor.create("master", main, master).set_auto_restart(True) + + for i in range(1, host_count): + link = rootzone.create_split_duplex_link(f"link {i}", "1MBps").set_latency("24us") + host = rootzone.create_host(f"lilibeth {i}", 1e9) + rootzone.add_route(main.netpoint, host.netpoint, None, None, [LinkInRoute(link, LinkInRoute.Direction.UP)], True) + Actor.create("worker", host, worker, i).set_auto_restart(True) + + e.netzone_root.seal() + e.run() + + this_actor.info("WE SURVIVED!") diff --git a/teshsuite/s4u/monkey-masterworkers/monkey-masterworkers.py.tesh b/teshsuite/s4u/monkey-masterworkers/monkey-masterworkers.py.tesh new file mode 100644 index 0000000000..eb92ddeb03 --- /dev/null +++ b/teshsuite/s4u/monkey-masterworkers/monkey-masterworkers.py.tesh @@ -0,0 +1,30 @@ + +p Smoke test: do one arbitrary run of the monkey, just to make sure that *something* is happening. + +$ ${pythoncmd:=python3} ${PYTHON_TOOL_OPTIONS:=} ${srcdir:=.}/monkey-masterworkers.py --cfg=plugin:cmonkey --cfg=cmonkey/time:1 --cfg=cmonkey/host:1 +> [0.000000] [xbt_cfg/INFO] Configuration change: Set 'plugin' to 'cmonkey' +> [0.000000] [cmonkey/INFO] Initializing the chaos monkey +> [0.000000] [xbt_cfg/INFO] Configuration change: Set 'cmonkey/time' to '1' +> [0.000000] [xbt_cfg/INFO] Configuration change: Set 'cmonkey/host' to '1' +> [lilibeth 0:master:(1) 0.000000] [python/INFO] Master booting +> [lilibeth 1:worker:(2) 0.000000] [python/INFO] Worker 1 booting +> [lilibeth 2:worker:(3) 0.000000] [python/INFO] Worker 2 booting +> [lilibeth 1:worker:(2) 0.000000] [python/INFO] Waiting a message on mailbox +> [lilibeth 2:worker:(3) 0.000000] [python/INFO] Waiting a message on mailbox +> [lilibeth 0:master:(1) 0.000000] [python/INFO] Try to send a message +> [1.000000] [cmonkey/INFO] Kill host lilibeth 1 +> [lilibeth 0:master:(1) 1.000000] [python/INFO] Got a NetworkFailureException. Wait a second before starting again. +> [lilibeth 1:worker:(2) 1.000000] [python/INFO] Worker 1 dying forcefully. +> [lilibeth 0:master:(1) 2.000000] [python/INFO] Try to send a message +> [lilibeth 2:worker:(3) 3.031240] [python/INFO] Start execution... +> [lilibeth 0:master:(1) 3.031240] [python/INFO] Try to send a message +> [lilibeth 2:worker:(3) 3.032240] [python/INFO] Execution complete. Still 0 to go. +> [lilibeth 2:worker:(3) 3.032240] [python/INFO] Worker 2 dying peacefully. +> [lilibeth 0:master:(1) 3.032240] [python/INFO] Master dying forcefully. +> [31.000000] [cmonkey/INFO] Restart host lilibeth 1 +> [lilibeth 1:worker:(4) 31.000000] [python/INFO] Worker 1 booting +> [lilibeth 1:worker:(4) 31.000000] [python/INFO] Worker 1 dying peacefully. +> [lilibeth 1:worker:(4) 31.000000] [python/INFO] Worker 1 dying peacefully. +> [31.000000] [cmonkey/INFO] Chaos Monkey done! +> [31.000000] [python/INFO] WE SURVIVED! + diff --git a/teshsuite/s4u/monkey-masterworkers/monkey-masterworkers.tesh b/teshsuite/s4u/monkey-masterworkers/monkey-masterworkers.tesh index 1d43a5b148..4d09ca267e 100644 --- a/teshsuite/s4u/monkey-masterworkers/monkey-masterworkers.tesh +++ b/teshsuite/s4u/monkey-masterworkers/monkey-masterworkers.tesh @@ -6,7 +6,6 @@ $ ${bindir:=.}/monkey-masterworkers --cfg=plugin:cmonkey --cfg=cmonkey/time:1 -- > [0.000000] [cmonkey/INFO] Initializing the chaos monkey > [0.000000] [xbt_cfg/INFO] Configuration change: Set 'cmonkey/time' to '1' > [0.000000] [xbt_cfg/INFO] Configuration change: Set 'cmonkey/host' to '1' -> [0.000000] [s4u_test/INFO] host count: 3 > [lilibeth 0:master:(1) 0.000000] [s4u_test/INFO] Master booting > [lilibeth 1:worker:(2) 0.000000] [s4u_test/INFO] Worker booting > [lilibeth 2:worker:(3) 0.000000] [s4u_test/INFO] Worker booting