Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Fix the code and doc of Comm/Exec suspend/resume in Python
[simgrid.git] / src / bindings / python / simgrid_python.cpp
index a3589e7..d098b91 100644 (file)
@@ -1,22 +1,13 @@
-/* Copyright (c) 2018-2022. The SimGrid Team. All rights reserved.          */
+/* Copyright (c) 2018-2023. The SimGrid Team. All rights reserved.          */
 
 /* This program is free software; you can redistribute it and/or modify it
  * under the terms of the license (GNU LGPL) which comes with this package. */
 
-#if defined(__GNUG__)
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wunused-value"
-#endif
-
 #include <pybind11/pybind11.h> // Must come before our own stuff
 
 #include <pybind11/functional.h>
 #include <pybind11/stl.h>
 
-#if defined(__GNUG__)
-#pragma GCC diagnostic pop
-#endif
-
 #include "simgrid/kernel/ProfileBuilder.hpp"
 #include "simgrid/kernel/routing/NetPoint.hpp"
 #include <simgrid/Exception.hpp>
@@ -118,14 +109,12 @@ PYBIND11_MODULE(simgrid, m)
            py::call_guard<py::gil_scoped_release>())
       .def("exec_async", py::overload_cast<double>(&simgrid::s4u::this_actor::exec_async),
            py::call_guard<py::gil_scoped_release>())
-      .def("parallel_execute", &simgrid::s4u::this_actor::parallel_execute,
-           py::call_guard<py::gil_scoped_release>(),
+      .def("parallel_execute", &simgrid::s4u::this_actor::parallel_execute, py::call_guard<py::gil_scoped_release>(),
            "Run a parallel task (requires the 'ptask_L07' model)")
       .def("exec_init",
            py::overload_cast<const std::vector<simgrid::s4u::Host*>&, const std::vector<double>&,
-           const std::vector<double>&>  (&simgrid::s4u::this_actor::exec_init),
-           py::call_guard<py::gil_scoped_release>(),
-           "Initiate a parallel task (requires the 'ptask_L07' model)")
+                             const std::vector<double>&>(&simgrid::s4u::this_actor::exec_init),
+           py::call_guard<py::gil_scoped_release>(), "Initiate a parallel task (requires the 'ptask_L07' model)")
       .def("get_host", &simgrid::s4u::this_actor::get_host, "Retrieves host on which the current actor is located")
       .def("set_host", &simgrid::s4u::this_actor::set_host, py::call_guard<py::gil_scoped_release>(),
            "Moves the current actor to another host.", py::arg("dest"))
@@ -141,19 +130,19 @@ PYBIND11_MODULE(simgrid, m)
       .def("exit", &simgrid::s4u::this_actor::exit, py::call_guard<py::gil_scoped_release>(), "kill the current actor")
       .def(
           "on_exit",
-          [](py::object cb) {
-            py::function fun = py::reinterpret_borrow<py::function>(cb);
-            fun.inc_ref(); // FIXME: why is this needed for tests like actor-kill and actor-lifetime?
-            simgrid::s4u::this_actor::on_exit([fun](bool failed) {
-              py::gil_scoped_acquire py_context; // need a new context for callback
+          [](py::object fun) {
+            fun.inc_ref(); // keep alive after return
+            const py::gil_scoped_release gil_release;
+            simgrid::s4u::this_actor::on_exit([fun_p = fun.ptr()](bool failed) {
+              const py::gil_scoped_acquire py_context; // need a new context for callback
               try {
+                const auto fun = py::reinterpret_borrow<py::function>(fun_p);
                 fun(failed);
               } catch (const py::error_already_set& e) {
                 xbt_die("Error while executing the on_exit lambda: %s", e.what());
               }
             });
           },
-          py::call_guard<py::gil_scoped_release>(),
           "Define a lambda to be called when the actor ends. It takes a bool parameter indicating whether the actor "
           "was killed. If False, the actor finished peacefully.")
       .def("get_pid", &simgrid::s4u::this_actor::get_pid, "Retrieves PID of the current actor")
@@ -190,7 +179,7 @@ PYBIND11_MODULE(simgrid, m)
                           "get_all_hosts() is deprecated and  will be dropped after v3.33, use all_hosts instead.", 1);
              return self.attr("all_hosts");
            })
-      .def("host_by_name", &Engine::host_by_name_or_null, py::call_guard<py::gil_scoped_release>(),
+      .def("host_by_name", &Engine::host_by_name_or_null,
            "Retrieve a host by its name, or None if it does not exist in the platform.")
       .def_property_readonly("all_hosts", &Engine::get_all_hosts, "Returns the list of all hosts found in the platform")
       .def("get_all_links",
@@ -226,10 +215,8 @@ PYBIND11_MODULE(simgrid, m)
            "Change one of SimGrid's configurations")
       .def("load_platform", &Engine::load_platform, "Load a platform file describing the environment")
       .def("load_deployment", &Engine::load_deployment, "Load a deployment file and launch the actors that it contains")
-      .def("mailbox_by_name_or_create", &Engine::mailbox_by_name_or_create,
-           py::call_guard<py::gil_scoped_release>(),
-           py::arg("name"),
-           "Find a mailbox from its name or create one if it does not exist")
+      .def("mailbox_by_name_or_create", &Engine::mailbox_by_name_or_create, py::call_guard<py::gil_scoped_release>(),
+           py::arg("name"), "Find a mailbox from its name or create one if it does not exist")
       .def("run", &Engine::run, py::call_guard<py::gil_scoped_release>(), "Run the simulation until its end")
       .def("run_until", py::overload_cast<double>(&Engine::run_until, py::const_),
            py::call_guard<py::gil_scoped_release>(), "Run the simulation until the given date",
@@ -237,14 +224,17 @@ PYBIND11_MODULE(simgrid, m)
       .def(
           "register_actor",
           [](Engine* e, const std::string& name, py::object fun_or_class) {
-            e->register_actor(name, [fun_or_class](std::vector<std::string> args) {
-              py::gil_scoped_acquire py_context;
+            fun_or_class.inc_ref(); // keep alive after return
+            const py::gil_scoped_release gil_release;
+            e->register_actor(name, [fun_or_class_p = fun_or_class.ptr()](std::vector<std::string> args) {
+              const py::gil_scoped_acquire py_context;
               try {
                 /* Convert the std::vector into a py::tuple */
                 py::tuple params(args.size() - 1);
                 for (size_t i = 1; i < args.size(); i++)
                   params[i - 1] = py::cast(args[i]);
 
+                const auto fun_or_class = py::reinterpret_borrow<py::object>(fun_or_class_p);
                 py::object res = fun_or_class(*params);
                 /* If I was passed a class, I just built an instance, so I need to call it now */
                 if (py::isinstance<py::function>(res))
@@ -404,8 +394,7 @@ PYBIND11_MODULE(simgrid, m)
                  "get_pstate_speed() is deprecated and  will be dropped after v3.33, use pstate_speed instead.", 1);
              return self.attr("pstate_speed")(state);
            })
-      .def("pstate_speed", &Host::get_pstate_speed, py::call_guard<py::gil_scoped_release>(),
-           "Retrieve the maximal speed at the given pstate")
+      .def("pstate_speed", &Host::get_pstate_speed, "Retrieve the maximal speed at the given pstate")
       .def("get_netpoint",
            [](py::object self) // XBT_ATTRIB_DEPRECATED_v334
            {
@@ -456,17 +445,19 @@ PYBIND11_MODULE(simgrid, m)
       .def_static(
           "on_creation_cb",
           [](py::object cb) {
-            Host::on_creation_cb([cb](Host& h) {
-              py::function fun = py::reinterpret_borrow<py::function>(cb);
-              py::gil_scoped_acquire py_context; // need a new context for callback
+            cb.inc_ref(); // keep alive after return
+            const py::gil_scoped_release gil_release;
+            Host::on_creation_cb([cb_p = cb.ptr()](Host& h) {
+              const py::gil_scoped_acquire py_context; // need a new context for callback
               try {
+                const auto fun = py::reinterpret_borrow<py::function>(cb_p);
                 fun(&h);
               } catch (const py::error_already_set& e) {
                 xbt_die("Error while executing the on_creation lambda : %s", e.what());
               }
             });
           },
-          py::call_guard<py::gil_scoped_release>(), "");
+          "");
 
   py::enum_<simgrid::s4u::Host::SharingPolicy>(host, "SharingPolicy")
       .value("NONLINEAR", simgrid::s4u::Host::SharingPolicy::NONLINEAR)
@@ -633,43 +624,42 @@ PYBIND11_MODULE(simgrid, m)
       .def_static("by_name", &Mailbox::by_name, py::call_guard<py::gil_scoped_release>(), py::arg("name"),
                   "Retrieve a Mailbox from its name")
       .def_property_readonly("name", &Mailbox::get_name, "The name of that mailbox (read-only property).")
-      .def_property_readonly("ready", &Mailbox::ready, py::call_guard<py::gil_scoped_release>(),
+      .def_property_readonly("ready", &Mailbox::ready,
                              "Check if there is a communication ready to be consumed from a mailbox.")
       .def(
           "put",
           [](Mailbox* self, py::object data, uint64_t size, double timeout) {
-            data.inc_ref();
-            self->put(data.ptr(), size, timeout);
+            auto* data_ptr = data.inc_ref().ptr();
+            const py::gil_scoped_release gil_release;
+            self->put(data_ptr, size, timeout);
           },
-          py::call_guard<py::gil_scoped_release>(), "Blocking data transmission with a timeout")
+          "Blocking data transmission with a timeout")
       .def(
           "put",
           [](Mailbox* self, py::object data, uint64_t size) {
-            data.inc_ref();
-            self->put(data.ptr(), size);
+            auto* data_ptr = data.inc_ref().ptr();
+            const py::gil_scoped_release gil_release;
+            self->put(data_ptr, size);
           },
-          py::call_guard<py::gil_scoped_release>(), "Blocking data transmission")
+          "Blocking data transmission")
       .def(
           "put_async",
           [](Mailbox* self, py::object data, uint64_t size) {
-            data.inc_ref();
-            return self->put_async(data.ptr(), size);
+            auto* data_ptr = data.inc_ref().ptr();
+            const py::gil_scoped_release gil_release;
+            return self->put_async(data_ptr, size);
           },
-          py::call_guard<py::gil_scoped_release>(), "Non-blocking data transmission")
+          "Non-blocking data transmission")
       .def(
           "put_init",
           [](Mailbox* self, py::object data, uint64_t size) {
-            data.inc_ref();
-            return self->put_init(data.ptr(), size);
+            auto* data_ptr = data.inc_ref().ptr();
+            const py::gil_scoped_release gil_release;
+            return self->put_init(data_ptr, size);
           },
-          py::call_guard<py::gil_scoped_release>(), "Creates (but don’t start) a data transmission to that mailbox.")
+          "Creates (but don’t start) a data transmission to that mailbox.")
       .def(
-          "get",
-          [](Mailbox* self) {
-            py::object data = py::reinterpret_steal<py::object>(self->get<PyObject>());
-            // data.dec_ref(); // FIXME: why does it break python-actor-create?
-            return data;
-          },
+          "get", [](Mailbox* self) { return py::reinterpret_steal<py::object>(self->get<PyObject>()); },
           py::call_guard<py::gil_scoped_release>(), "Blocking data reception")
       .def(
           "get_async",
@@ -692,31 +682,24 @@ PYBIND11_MODULE(simgrid, m)
 
   /* Class Comm */
   py::class_<Comm, CommPtr>(m, "Comm", "Communication. See the C++ documentation for details.")
-      .def_property_readonly("dst_data_size", &Comm::get_dst_data_size,
-                             py::call_guard<py::gil_scoped_release>(),
+      .def_property_readonly("dst_data_size", &Comm::get_dst_data_size, py::call_guard<py::gil_scoped_release>(),
                              "Retrieve the size of the received data.")
-      .def_property_readonly("mailbox", &Comm::get_mailbox,
-                             py::call_guard<py::gil_scoped_release>(),
+      .def_property_readonly("mailbox", &Comm::get_mailbox, py::call_guard<py::gil_scoped_release>(),
                              "Retrieve the mailbox on which this comm acts.")
-      .def_property_readonly("sender", &Comm::get_sender,
-                             py::call_guard<py::gil_scoped_release>())
-      .def_property_readonly("state_str", &Comm::get_state_str,
-                             py::call_guard<py::gil_scoped_release>(),
+      .def_property_readonly("sender", &Comm::get_sender, py::call_guard<py::gil_scoped_release>())
+      .def_property_readonly("state_str", &Comm::get_state_str, py::call_guard<py::gil_scoped_release>(),
                              "Retrieve the Comm state as string")
-      .def_property_readonly("remaining",  &Comm::get_remaining,
-                             py::call_guard<py::gil_scoped_release>(),
+      .def_property_readonly("remaining", &Comm::get_remaining, py::call_guard<py::gil_scoped_release>(),
                              "Remaining amount of work that this Comm entails")
-      .def_property_readonly("start_time",  &Comm::get_start_time,
-                             py::call_guard<py::gil_scoped_release>(),
+      .def_property_readonly("start_time", &Comm::get_start_time, py::call_guard<py::gil_scoped_release>(),
                              "Time at which this Comm started")
-      .def_property_readonly("finish_time",  &Comm::get_finish_time,
-                             py::call_guard<py::gil_scoped_release>(),
+      .def_property_readonly("finish_time", &Comm::get_finish_time, py::call_guard<py::gil_scoped_release>(),
                              "Time at which this Comm finished")
-      .def("set_payload_size", &Comm::set_payload_size, py::call_guard<py::gil_scoped_release>(),
-           py::arg("bytes"),
+      .def_property_readonly("is_suspended", &Comm::is_suspended, py::call_guard<py::gil_scoped_release>(),
+                             "Whether this Comm is suspended")
+      .def("set_payload_size", &Comm::set_payload_size, py::call_guard<py::gil_scoped_release>(), py::arg("bytes"),
            "Specify the amount of bytes which exchange should be simulated.")
-      .def("set_rate", &Comm::set_rate, py::call_guard<py::gil_scoped_release>(),
-           py::arg("rate"),
+      .def("set_rate", &Comm::set_rate, py::call_guard<py::gil_scoped_release>(), py::arg("rate"),
            "Sets the maximal communication rate (in byte/sec). Must be done before start")
       .def("cancel", &Comm::cancel, py::call_guard<py::gil_scoped_release>(),
            py::return_value_policy::reference_internal, "Cancel the activity.")
@@ -731,46 +714,36 @@ PYBIND11_MODULE(simgrid, m)
            "Test whether the communication is terminated.")
       .def("wait", &Comm::wait, py::call_guard<py::gil_scoped_release>(),
            "Block until the completion of that communication.")
-      .def("wait_for", &Comm::wait_for, py::call_guard<py::gil_scoped_release>(),
-           py::arg("timeout"),
+      .def("wait_for", &Comm::wait_for, py::call_guard<py::gil_scoped_release>(), py::arg("timeout"),
            "Block until the completion of that communication, or raises TimeoutException after the specified timeout.")
-      .def("wait_until", &Comm::wait_until, py::call_guard<py::gil_scoped_release>(),
-           py::arg("time_limit"),
+      .def("wait_until", &Comm::wait_until, py::call_guard<py::gil_scoped_release>(), py::arg("time_limit"),
            "Block until the completion of that communication, or raises TimeoutException after the specified time.")
       .def("detach", py::overload_cast<>(&Comm::detach), py::return_value_policy::reference_internal,
            py::call_guard<py::gil_scoped_release>(),
            "Start the comm, and ignore its result. It can be completely forgotten after that.")
-      .def_static("sendto", &Comm::sendto, py::call_guard<py::gil_scoped_release>(),
-                  py::arg("from"), py::arg("to"), py::arg("simulated_size_in_bytes"),
-                  "Do a blocking communication between two arbitrary hosts.")
+      .def_static("sendto", &Comm::sendto, py::call_guard<py::gil_scoped_release>(), py::arg("from"), py::arg("to"),
+                  py::arg("simulated_size_in_bytes"), "Do a blocking communication between two arbitrary hosts.")
       .def_static("sendto_init", py::overload_cast<Host*, Host*>(&Comm::sendto_init),
-                  py::call_guard<py::gil_scoped_release>(),
-                  py::arg("from"), py::arg("to"),
+                  py::call_guard<py::gil_scoped_release>(), py::arg("from"), py::arg("to"),
                   "Creates a communication between the two given hosts, bypassing the mailbox mechanism.")
-      .def_static("sendto_async", &Comm::sendto_async, py::call_guard<py::gil_scoped_release>(),
-                  py::arg("from"), py::arg("to"), py::arg("simulated_size_in_bytes"),
+      .def_static("sendto_async", &Comm::sendto_async, py::call_guard<py::gil_scoped_release>(), py::arg("from"),
+                  py::arg("to"), py::arg("simulated_size_in_bytes"),
                   "Do a blocking communication between two arbitrary hosts.\n\nThis initializes a communication that "
                   "completely bypass the mailbox and actors mechanism. There is really no limit on the hosts involved. "
                   "In particular, the actor does not have to be on one of the involved hosts.")
-      .def_static("test_any", &Comm::test_any,
-                  py::call_guard<py::gil_scoped_release>(),
-                  py::arg("comms"),
+      .def_static("test_any", &Comm::test_any, py::call_guard<py::gil_scoped_release>(), py::arg("comms"),
                   "take a vector s4u::CommPtr and return the rank of the first finished one (or -1 if none is done)")
-      .def_static("wait_all", &Comm::wait_all, py::call_guard<py::gil_scoped_release>(),
-                  py::arg("comms"),
+      .def_static("wait_all", &Comm::wait_all, py::call_guard<py::gil_scoped_release>(), py::arg("comms"),
                   "Block until the completion of all communications in the list.")
-      .def_static("wait_all_for", &Comm::wait_all_for, py::call_guard<py::gil_scoped_release>(),
-                  py::arg("comms"), py::arg("timeout"),
+      .def_static("wait_all_for", &Comm::wait_all_for, py::call_guard<py::gil_scoped_release>(), py::arg("comms"),
+                  py::arg("timeout"),
                   "Block until the completion of all communications in the list, or raises TimeoutException after "
                   "the specified timeout.")
-      .def_static("wait_any", &Comm::wait_any,
-                  py::call_guard<py::gil_scoped_release>(),
-                  py::arg("comms"),
+      .def_static("wait_any", &Comm::wait_any, py::call_guard<py::gil_scoped_release>(), py::arg("comms"),
                   "Block until the completion of any communication in the list and return the index of the "
                   "terminated one.")
-      .def_static("wait_any_for", &Comm::wait_any_for,
-                  py::call_guard<py::gil_scoped_release>(),
-                  py::arg("comms"), py::arg("timeout"),
+      .def_static("wait_any_for", &Comm::wait_any_for, py::call_guard<py::gil_scoped_release>(), py::arg("comms"),
+                  py::arg("timeout"),
                   "Block until the completion of any communication in the list and return the index of the terminated "
                   "one, or -1 if a timeout occurred.");
 
@@ -797,15 +770,17 @@ PYBIND11_MODULE(simgrid, m)
       .def_property("host", &simgrid::s4u::Exec::get_host, &simgrid::s4u::Exec::set_host,
                     "Host on which this execution runs. Only the first host is returned for parallel executions. "
                     "Changing this value migrates the execution.")
+      .def_property_readonly("is_suspended", &simgrid::s4u::Exec::is_suspended,
+                             py::call_guard<py::gil_scoped_release>(), "Whether this Exec is suspended")
       .def("test", &simgrid::s4u::Exec::test, py::call_guard<py::gil_scoped_release>(),
            "Test whether the execution is terminated.")
       .def("cancel", &simgrid::s4u::Exec::cancel, py::call_guard<py::gil_scoped_release>(), "Cancel that execution.")
       .def("start", &simgrid::s4u::Exec::start, py::call_guard<py::gil_scoped_release>(), "Start that execution.")
       .def("suspend", &simgrid::s4u::Exec::suspend, py::call_guard<py::gil_scoped_release>(), "Suspend that execution.")
+      .def("resume", &simgrid::s4u::Exec::resume, py::call_guard<py::gil_scoped_release>(), "Resume that execution.")
       .def("wait", &simgrid::s4u::Exec::wait, py::call_guard<py::gil_scoped_release>(),
            "Block until the completion of that execution.")
-      .def("wait_for", &simgrid::s4u::Exec::wait_for, py::call_guard<py::gil_scoped_release>(),
-           py::arg("timeout"),
+      .def("wait_for", &simgrid::s4u::Exec::wait_for, py::call_guard<py::gil_scoped_release>(), py::arg("timeout"),
            "Block until the completion of that activity, or raises TimeoutException after the specified timeout.");
 
   /* Class Semaphore */
@@ -860,12 +835,15 @@ PYBIND11_MODULE(simgrid, m)
                                             "application. See the C++ documentation for details.")
       .def(
           "create",
-          [](py::str name, Host* h, py::object fun, py::args args) {
-            fun.inc_ref();  // FIXME: why is this needed for tests like exec-async, exec-dvfs and exec-remote?
-            args.inc_ref(); // FIXME: why is this needed for tests like actor-migrate?
-            return simgrid::s4u::Actor::create(name, h, [fun, args]() {
-              py::gil_scoped_acquire py_context;
+          [](const std::string& name, Host* h, py::object fun, py::args args) {
+            fun.inc_ref();  // keep alive after return
+            args.inc_ref(); // keep alive after return
+            const py::gil_scoped_release gil_release;
+            return simgrid::s4u::Actor::create(name, h, [fun_p = fun.ptr(), args_p = args.ptr()]() {
+              const py::gil_scoped_acquire py_context;
               try {
+                const auto fun  = py::reinterpret_borrow<py::object>(fun_p);
+                const auto args = py::reinterpret_borrow<py::args>(args_p);
                 fun(*args);
               } catch (const py::error_already_set& ex) {
                 if (ex.matches(pyForcefulKillEx)) {
@@ -876,7 +854,6 @@ PYBIND11_MODULE(simgrid, m)
               }
             });
           },
-          py::call_guard<py::gil_scoped_release>(),
           "Create an actor from a function or an object. See the :ref:`example <s4u_ex_actors_create>`.")
       .def_property(
           "host", &Actor::get_host, py::cpp_function(&Actor::set_host, py::call_guard<py::gil_scoped_release>()),
@@ -908,5 +885,6 @@ PYBIND11_MODULE(simgrid, m)
            "Suspend that actor, that is blocked until resume()ed by another actor.")
       .def("resume", &Actor::resume, py::call_guard<py::gil_scoped_release>(),
            "Resume that actor, that was previously suspend()ed.")
-      .def_static("kill_all", &Actor::kill_all, py::call_guard<py::gil_scoped_release>(), "Kill all actors but the caller.");
+      .def_static("kill_all", &Actor::kill_all, py::call_guard<py::gil_scoped_release>(),
+                  "Kill all actors but the caller.");
 }