Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
semaphore monkey
authorMartin Quinson <martin.quinson@ens-rennes.fr>
Thu, 3 Mar 2022 22:45:34 +0000 (23:45 +0100)
committerMartin Quinson <martin.quinson@ens-rennes.fr>
Thu, 3 Mar 2022 22:59:38 +0000 (23:59 +0100)
C++ only for now, as we don't have semaphores in the current Python interface.

include/simgrid/s4u/Actor.hpp
teshsuite/s4u/CMakeLists.txt
teshsuite/s4u/monkey-semaphore/monkey-semaphore.cpp [new file with mode: 0644]
teshsuite/s4u/monkey-semaphore/monkey-semaphore.tesh [new file with mode: 0644]
tools/simgrid-monkey

index e3a606e..4ed1da9 100644 (file)
@@ -339,7 +339,7 @@ public:
   bool is_suspended() const;
 
   /** If set to true, the actor will automatically restart when its host reboots */
-  Actor* set_auto_restart(bool autorestart);
+  Actor* set_auto_restart(bool autorestart = true);
 
   /** Add a function to the list of "on_exit" functions for the current actor. The on_exit functions are the functions
    * executed when your actor is killed. You should use them to free the data used by your actor.
index f595f37..8f10e1a 100644 (file)
@@ -8,7 +8,7 @@ foreach(x actor actor-autorestart actor-suspend
         activity-lifecycle
         comm-get-sender comm-pt2pt wait-all-for wait-any-for
         cloud-interrupt-migration cloud-two-execs
-       monkey-masterworkers
+       monkey-masterworkers monkey-semaphore
         concurrent_rw
         dag-incomplete-simulation dependencies
         host-on-off host-on-off-actors host-on-off-recv host-multicore-speed-file io-set-bw
@@ -46,7 +46,7 @@ foreach(x actor actor-autorestart actor-suspend activity-lifecycle comm-get-send
 endforeach()
 
 foreach(x basic-link-test basic-parsing-test host-on-off host-on-off-actors host-on-off-recv host-multicore-speed-file is-router listen_async
-        monkey-masterworkers
+        monkey-masterworkers monkey-semaphore
         pid storage_client_server trace-integration seal-platform issue71)
   set(tesh_files    ${tesh_files}    ${CMAKE_CURRENT_SOURCE_DIR}/${x}/${x}.tesh)
   ADD_TESH(tesh-s4u-${x}
@@ -78,15 +78,17 @@ endforeach()
 
 
 # Monkey tests are launched directly, not with tesh
-foreach(x  monkey-masterworkers)
+set(_monkey-semaphore_disable_python 1) # Semaphore not exposed to python
+foreach(x  monkey-masterworkers monkey-semaphore)
   ADD_TEST(monkey-s4u-${x} "${PYTHON_EXECUTABLE}" ${CMAKE_HOME_DIRECTORY}/tools/simgrid-monkey ${CMAKE_BINARY_DIR}/teshsuite/s4u/${x}/${x})
   if(enable_python)
-    ADD_TEST(monkey-python-${x} "${PYTHON_EXECUTABLE}" ${CMAKE_HOME_DIRECTORY}/tools/simgrid-monkey "${PYTHON_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/${x}/${x}.py)
-    set_tests_properties(monkey-python-${x} PROPERTIES ENVIRONMENT "PYTHONPATH=${CMAKE_BINARY_DIR}/lib")
+    if(NOT DEFINED _${x}_disable_python)
+      ADD_TEST(monkey-python-${x} "${PYTHON_EXECUTABLE}" ${CMAKE_HOME_DIRECTORY}/tools/simgrid-monkey "${PYTHON_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/${x}/${x}.py)
+      set_tests_properties(monkey-python-${x} PROPERTIES ENVIRONMENT "PYTHONPATH=${CMAKE_BINARY_DIR}/lib")
+    endif()
   endif()
 endforeach()
 
-
 # The output is not relevant
 ADD_TEST(tesh-s4u-comm-pt2pt    ${CMAKE_BINARY_DIR}/teshsuite/s4u/comm-pt2pt/comm-pt2pt    ${CMAKE_HOME_DIRECTORY}/examples/platforms/cluster_backbone.xml)
 
diff --git a/teshsuite/s4u/monkey-semaphore/monkey-semaphore.cpp b/teshsuite/s4u/monkey-semaphore/monkey-semaphore.cpp
new file mode 100644 (file)
index 0000000..b6d389e
--- /dev/null
@@ -0,0 +1,121 @@
+/* Copyright (c) 2006-2022. The SimGrid Team. All rights reserved.          */
+
+/* This program is free software; you can redistribute it and/or modify it
+ * under the terms of the license (GNU LGPL) which comes with this package. */
+
+// This example implements a simple producer/consumer schema, passing a bunch of items from one to the other,
+// hopefully implemented in a way that resists resource failures.
+
+#include <simgrid/s4u.hpp>
+#include <xbt/config.hpp>
+
+namespace sg4 = simgrid::s4u;
+
+XBT_LOG_NEW_DEFAULT_CATEGORY(sem_monkey, "Simple test of the semaphore");
+
+int buffer;                                              /* Where the data is exchanged */
+sg4::SemaphorePtr sem_empty = sg4::Semaphore::create(1); /* indicates whether the buffer is empty */
+sg4::SemaphorePtr sem_full  = sg4::Semaphore::create(0); /* indicates whether the buffer is full */
+
+static simgrid::config::Flag<int> cfg_item_count{"item-count", "Amount of items that must be exchanged to succeed", 2};
+static simgrid::config::Flag<double> cfg_deadline{"deadline", "When to fail the simulation (infinite loop detection)",
+                                                  120};
+
+int todo; // remaining amount of items to exchange
+
+static void producer()
+{
+  static bool inited = false;
+  static std::vector<sg4::Semaphore*> to_release;
+  XBT_INFO("Producer %s", inited ? "rebooting" : "booting");
+
+  if (not inited) {
+    sg4::this_actor::on_exit(
+        [](bool forcefully) { XBT_INFO("Producer dying %s.", forcefully ? "forcefully" : "peacefully"); });
+    inited = true;
+  }
+  while (not to_release.empty()) { // Clean up a previous run. Cannot be done in on_exit, as it entails a simcall
+    auto* sem = to_release.back();
+    sem->release();
+    XBT_INFO("Released a semaphore on reboot. It's now %d", sem->get_capacity());
+    to_release.pop_back();
+  }
+
+  while (todo > 0) {
+    xbt_assert(sg4::Engine::get_clock() < cfg_deadline,
+               "Failed to exchange all tasks in less than %d seconds. Is this an infinite loop?", (int)cfg_deadline);
+
+    sg4::this_actor::sleep_for(1); // Give a chance to the monkey to kill this actor at this point
+
+    while (sem_empty->acquire_timeout(10))
+      XBT_INFO("Timeouted");
+    to_release.push_back(sem_empty.get());
+    XBT_INFO("sem_empty acquired");
+
+    sg4::this_actor::sleep_for(1); // Give a chance to the monkey to kill this actor at this point
+
+    XBT_INFO("Pushing item %d", todo - 1);
+    buffer = todo - 1;
+    sem_full->release();
+    to_release.pop_back();
+    XBT_INFO("sem_empty removed from to_release");
+    todo--;
+  }
+}
+static void consumer()
+{
+  static std::vector<sg4::Semaphore*> to_release;
+
+  static bool inited = false;
+  XBT_INFO("Consumer %s", inited ? "rebooting" : "booting");
+  if (not inited) {
+    sg4::this_actor::on_exit(
+        [](bool forcefully) { XBT_INFO("Consumer dying %s.", forcefully ? "forcefully" : "peacefully"); });
+    inited = true;
+  }
+  while (not to_release.empty()) { // Clean up a previous run. Cannot be done in on_exit, as it entails a simcall
+    auto* sem = to_release.back();
+    sem->release();
+    XBT_INFO("Released a semaphore on reboot. It's now %d", sem->get_capacity());
+    to_release.pop_back();
+  }
+
+  int item;
+  do {
+    xbt_assert(sg4::Engine::get_clock() < cfg_deadline,
+               "Failed to exchange all tasks in less than %d seconds. Is this an infinite loop?", (int)cfg_deadline);
+
+    sg4::this_actor::sleep_for(0.75); // Give a chance to the monkey to kill this actor at this point
+
+    while (sem_full->acquire_timeout(10))
+      XBT_INFO("Timeouted");
+    to_release.push_back(sem_full.get());
+
+    sg4::this_actor::sleep_for(0.75); // Give a chance to the monkey to kill this actor at this point
+
+    item = buffer;
+    XBT_INFO("Receiving item %d", item);
+    sem_empty->release();
+    to_release.pop_back();
+  } while (item != 0);
+
+  XBT_INFO("Bye!");
+}
+
+int main(int argc, char** argv)
+{
+  sg4::Engine e(&argc, argv);
+
+  todo           = cfg_item_count;
+  auto* rootzone = sg4::create_full_zone("root");
+  auto* paul     = rootzone->create_host("Paul", 1e9);
+  auto* carol    = rootzone->create_host("Carol", 1e9);
+  sg4::LinkInRoute link(rootzone->create_link("link", "1MBps")->set_latency("24us")->seal());
+  rootzone->add_route(paul->get_netpoint(), carol->get_netpoint(), nullptr, nullptr, {link}, true);
+
+  sg4::Actor::create("producer", paul, producer)->set_auto_restart();
+  sg4::Actor::create("consumer", carol, consumer)->set_auto_restart();
+  e.run();
+
+  return 0;
+}
diff --git a/teshsuite/s4u/monkey-semaphore/monkey-semaphore.tesh b/teshsuite/s4u/monkey-semaphore/monkey-semaphore.tesh
new file mode 100644 (file)
index 0000000..5a36de0
--- /dev/null
@@ -0,0 +1,29 @@
+
+p Smoke test: do one arbitrary run of the monkey, just to make sure that *something* is happening.
+
+$ ${bindir:=.}/monkey-semaphore --cfg=plugin:cmonkey --cfg=cmonkey/time:1 --cfg=cmonkey/host:1
+> [0.000000] [xbt_cfg/INFO] Configuration change: Set 'plugin' to 'cmonkey'
+> [0.000000] [cmonkey/INFO] Initializing the chaos monkey
+> [0.000000] [xbt_cfg/INFO] Configuration change: Set 'cmonkey/time' to '1'
+> [0.000000] [xbt_cfg/INFO] Configuration change: Set 'cmonkey/host' to '1'
+> [Paul:producer:(1) 0.000000] [sem_monkey/INFO] Producer booting
+> [Carol:consumer:(2) 0.000000] [sem_monkey/INFO] Consumer booting
+> [1.000000] [cmonkey/INFO] Kill host Paul
+> [Paul:producer:(1) 1.000000] [sem_monkey/INFO] Producer dying forcefully.
+> [Carol:consumer:(2) 10.750000] [sem_monkey/INFO] Timeouted
+> [Carol:consumer:(2) 20.750000] [sem_monkey/INFO] Timeouted
+> [Carol:consumer:(2) 30.750000] [sem_monkey/INFO] Timeouted
+> [31.000000] [cmonkey/INFO] Restart host Paul
+> [Paul:producer:(3) 31.000000] [sem_monkey/INFO] Producer rebooting
+> [Paul:producer:(3) 32.000000] [sem_monkey/INFO] sem_empty acquired
+> [Paul:producer:(3) 33.000000] [sem_monkey/INFO] Pushing item 1
+> [Paul:producer:(3) 33.000000] [sem_monkey/INFO] sem_empty removed from to_release
+> [Carol:consumer:(2) 33.750000] [sem_monkey/INFO] Receiving item 1
+> [Paul:producer:(3) 34.000000] [sem_monkey/INFO] sem_empty acquired
+> [Paul:producer:(3) 35.000000] [sem_monkey/INFO] Pushing item 0
+> [Paul:producer:(3) 35.000000] [sem_monkey/INFO] sem_empty removed from to_release
+> [Paul:producer:(3) 35.000000] [sem_monkey/INFO] Producer dying peacefully.
+> [Carol:consumer:(2) 35.750000] [sem_monkey/INFO] Receiving item 0
+> [Carol:consumer:(2) 35.750000] [sem_monkey/INFO] Bye!
+> [Carol:consumer:(2) 35.750000] [sem_monkey/INFO] Consumer dying peacefully.
+> [35.750000] [cmonkey/INFO] Chaos Monkey done!
index d2f6273..c48a771 100755 (executable)
@@ -19,7 +19,8 @@
 #   * So the amount of simulations is: 1 + (host_c+link_c) * timestamps * 2
 # 
 # * Test program, written to resist these extreme conditions:
-#   * teshsuite/s4u/monkey-masterworkers: tests synchronous comms and execs
+#   * teshsuite/s4u/monkey-masterworkers: tests synchronous comms and execs (C++ and python)
+#   * teshsuite/s4u/monkey-semaphore: tests async semaphores (C++ only)
 
 import multiprocessing as mp
 import sys