Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Ensure that suspending an actor running later in the current scheduling round actuall...
authorMartin Quinson <martin.quinson@ens-rennes.fr>
Sat, 9 May 2020 23:56:20 +0000 (01:56 +0200)
committerMartin Quinson <martin.quinson@ens-rennes.fr>
Sat, 9 May 2020 23:56:58 +0000 (01:56 +0200)
Fixes https://framagit.org/simgrid/simgrid/-/issues/50

ChangeLog
src/kernel/actor/ActorImpl.cpp
teshsuite/s4u/CMakeLists.txt
teshsuite/s4u/actor-suspend/actor-suspend.cpp [new file with mode: 0644]
teshsuite/s4u/actor-suspend/actor-suspend.tesh [new file with mode: 0644]

index e302c5e..118d391 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -42,6 +42,7 @@ Fixed bugs (FG#.. -> FramaGit bugs; FG!.. -> FG merge requests)
  - FG#41: Add sg_actor_create C interface
  - FG#43: xbt::random needs some care
  - FG#48: The Impossible Did Happen (yet again)
+ - FG#50: Suspending an actor executed at the current timestamp fails
  - FG!24: Documentation and fix for xbt/random
  - FG!35: Add a modeling hint for parallel links in doc
  - FG!36: [xbt/random] Read/Write the state of the RNG
index 59ec1c5..84c85e9 100644 (file)
@@ -297,6 +297,7 @@ void ActorImpl::yield()
     xbt_assert(exception_ == nullptr, "Gasp! This exception may be lost by subsequent calls.");
     suspended_ = false;
     suspend();
+    yield(); // Yield back to maestro without proceeding with my execution. I'll get resumed at some point
   }
 
   if (exception_ != nullptr) {
index 03487a5..a19f6d3 100644 (file)
@@ -1,4 +1,4 @@
-foreach(x actor actor-autorestart
+foreach(x actor actor-autorestart actor-suspend
         activity-lifecycle
         comm-get-sender comm-pt2pt wait-any-for
         cloud-interrupt-migration cloud-sharing cloud-two-execs
@@ -24,7 +24,7 @@ set_property(TARGET activity-lifecycle APPEND PROPERTY INCLUDE_DIRECTORIES "${IN
 
 ## Add the tests.
 ## Some need to be run with all factories, some don't need tesh to run
-foreach(x actor actor-autorestart
+foreach(x actor actor-autorestart actor-suspend
         activity-lifecycle comm-get-sender wait-any-for
         cloud-interrupt-migration cloud-two-execs concurrent_rw)
   set(tesh_files    ${tesh_files}    ${CMAKE_CURRENT_SOURCE_DIR}/${x}/${x}.tesh)
diff --git a/teshsuite/s4u/actor-suspend/actor-suspend.cpp b/teshsuite/s4u/actor-suspend/actor-suspend.cpp
new file mode 100644 (file)
index 0000000..f87fadb
--- /dev/null
@@ -0,0 +1,67 @@
+/* Copyright (c) 2020. The SimGrid Team. All rights reserved.          */
+
+/* This program is free software; you can redistribute it and/or modify it
+ * under the terms of the license (GNU LGPL) which comes with this package. */
+
+// This is the MWE of https://framagit.org/simgrid/simgrid/-/issues/50
+// The problem was occuring when suspending an actor that will be executed later in the same scheduling round
+
+#include <iostream>
+#include <simgrid/s4u.hpp>
+#include <stdio.h>
+#include <stdlib.h>
+#include <vector>
+
+XBT_LOG_NEW_DEFAULT_CATEGORY(mwe, "Minimum Working Example");
+
+simgrid::s4u::ActorPtr receiver;
+
+class Receiver {
+
+public:
+  void operator()()
+  {
+    XBT_INFO("Starting.");
+    simgrid::s4u::Mailbox* mailbox = simgrid::s4u::Mailbox::by_name("receiver");
+    void* data                     = (void*)2;
+    data                           = mailbox->get();
+    xbt_die("get() has returned (even though it shouldn't!) with a %s message",
+            (data == nullptr ? "null" : "non-null"));
+  }
+};
+
+class Suspender {
+
+public:
+  void operator()()
+  {
+
+    // If we sleep a bit here, this MWE works because the suspender is not trying to suspend someone executed later in
+    // the same scheduling round simgrid::s4u::this_actor::sleep_for(0.01);
+
+    XBT_INFO("Suspend the receiver...");
+    receiver->suspend();
+    XBT_INFO("Resume the receiver...");
+    receiver->resume();
+
+    XBT_INFO("Sleeping 10 sec...");
+    simgrid::s4u::this_actor::sleep_for(10);
+    XBT_INFO("Done!");
+  }
+};
+
+int main(int argc, char** argv)
+{
+
+  simgrid::s4u::Engine* engine = new simgrid::s4u::Engine(&argc, argv);
+
+  engine->load_platform(argv[1]);
+  simgrid::s4u::Host* host = simgrid::s4u::Host::by_name("Tremblay");
+
+  auto suspender = simgrid::s4u::Actor::create("Suspender", host, Suspender());
+  receiver       = simgrid::s4u::Actor::create("Receiver", host, Receiver());
+
+  engine->run();
+
+  return 0;
+}
diff --git a/teshsuite/s4u/actor-suspend/actor-suspend.tesh b/teshsuite/s4u/actor-suspend/actor-suspend.tesh
new file mode 100644 (file)
index 0000000..8ee1897
--- /dev/null
@@ -0,0 +1,10 @@
+$ ./actor-suspend ${platfdir}/small_platform.xml --log=no_loc
+> [Tremblay:Suspender:(1) 0.000000] [mwe/INFO] Suspend the receiver...
+> [Tremblay:Receiver:(2) 0.000000] [mwe/INFO] Starting.
+> [Tremblay:Suspender:(1) 0.000000] [mwe/INFO] Resume the receiver...
+> [Tremblay:Suspender:(1) 0.000000] [mwe/INFO] Sleeping 10 sec...
+> [Tremblay:Suspender:(1) 10.000000] [mwe/INFO] Done!
+> [10.000000] [simix_kernel/CRITICAL] Oops! Deadlock or code not perfectly clean.
+> [10.000000] [simix_kernel/INFO] 1 actors are still running, waiting for something.
+> [10.000000] [simix_kernel/INFO] Legend of the following listing: "Actor <pid> (<name>@<host>): <status>"
+> [10.000000] [simix_kernel/INFO] Actor 2 (Receiver@Tremblay): waiting for execution activity 0xdeadbeef (suspend) in state 3 to finish