From 0d9d64e3e4f978144c0a57e9eae1c358e742c011 Mon Sep 17 00:00:00 2001 From: Martin Quinson Date: Fri, 21 Jan 2022 01:57:07 +0100 Subject: [PATCH] Port the Algorithm tutorial to python --- MANIFEST.in | 4 +- docs/source/Tutorial_Algorithms.rst | 584 +++++++++++++----- docs/source/_static/css/custom.css | 17 +- docs/source/app_s4u.rst | 1 + .../{deployment4.xml => deployment5.xml} | 0 docs/source/tuto_s4u/img/intro.svg | 273 ++++---- docs/source/tuto_s4u/master-workers-lab1.py | 74 +++ docs/source/tuto_s4u/master-workers-lab2.py | 79 +++ .../app-masterworkers/app-masterworkers.py | 6 + src/bindings/python/simgrid_python.cpp | 3 + tools/cmake/DefinePackages.cmake | 4 +- 11 files changed, 740 insertions(+), 305 deletions(-) rename docs/source/tuto_s4u/{deployment4.xml => deployment5.xml} (100%) create mode 100644 docs/source/tuto_s4u/master-workers-lab1.py create mode 100644 docs/source/tuto_s4u/master-workers-lab2.py diff --git a/MANIFEST.in b/MANIFEST.in index e2fdf2b772..1f89959290 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1927,7 +1927,7 @@ include docs/source/tuto_network_calibration/send_dhist.json include docs/source/tuto_s4u/deployment1.xml include docs/source/tuto_s4u/deployment2.xml include docs/source/tuto_s4u/deployment3.xml -include docs/source/tuto_s4u/deployment4.xml +include docs/source/tuto_s4u/deployment5.xml include docs/source/tuto_s4u/draw_gantt.R include docs/source/tuto_s4u/img/Rscript-screenshot.png include docs/source/tuto_s4u/img/intro.svg @@ -1935,7 +1935,9 @@ include docs/source/tuto_s4u/img/question.svg include docs/source/tuto_s4u/img/result.png include docs/source/tuto_s4u/img/vite-screenshot.png include docs/source/tuto_s4u/master-workers-lab1.cpp +include docs/source/tuto_s4u/master-workers-lab1.py include docs/source/tuto_s4u/master-workers-lab2.cpp +include docs/source/tuto_s4u/master-workers-lab2.py include docs/source/tuto_s4u/master-workers-lab3.cpp include docs/source/tuto_s4u/master-workers-lab4.cpp include docs/source/tuto_smpi/3hosts.png diff --git a/docs/source/Tutorial_Algorithms.rst b/docs/source/Tutorial_Algorithms.rst index 1c28510845..72eb7f3ebf 100644 --- a/docs/source/Tutorial_Algorithms.rst +++ b/docs/source/Tutorial_Algorithms.rst @@ -77,9 +77,9 @@ is in charge of distributing some computational tasks to a set of .. image:: /tuto_s4u/img/intro.svg :align: center -We first present a round-robin version of this application, where the -master dispatches the tasks to the workers, one after the other, until -all tasks are dispatched. You will improve this scheme later in this tutorial. +The provided code dispatches these tasks in `round-robin scheduling `_, +i.e. in circular order: tasks are dispatched to each worker one after the other, until all tasks are dispatched. +You will improve this scheme later in this tutorial. The Actors .......... @@ -93,9 +93,7 @@ host on which a worker is waiting for something to compute. Then, the tasks are sent one after the other, each on a mailbox named after the worker's hosts. On the other side, a given worker (which -code is given below) waits for incoming tasks on its -mailbox. - +code is given below) waits for incoming tasks on its mailbox. In the end, once all tasks are dispatched, the master dispatches @@ -103,56 +101,103 @@ another task per worker, but this time with a negative amount of flops to compute. Indeed, this application decided by convention, that the workers should stop when encountering such a negative compute_size. -At the end of the day, the only SimGrid specific functions used in -this example are :cpp:func:`simgrid::s4u::Mailbox::by_name` and -:cpp:func:`simgrid::s4u::Mailbox::put`. Also, :c:macro:`XBT_INFO` is used -as a replacement to `printf()` or `std::cout` to ensure that the messages -are nicely logged along with the simulated time and actor name. + .. tabs:: + + .. group-tab:: C++ + + At the end of the day, the only SimGrid specific functions used in + this example are :cpp:func:`simgrid::s4u::Mailbox::by_name` (to retrieve or create a mailbox) and + :cpp:func:`simgrid::s4u::Mailbox::put` (so send something over a mailbox). Also, :c:macro:`XBT_INFO` is used + as a replacement to ``printf()`` or ``std::cout`` to ensure that the messages + are nicely logged along with the simulated time and actor name. + + .. literalinclude:: ../../examples/cpp/app-masterworkers/s4u-app-masterworkers-fun.cpp + :language: c++ + :start-after: master-begin + :end-before: master-end + .. group-tab:: Python -.. literalinclude:: ../../examples/cpp/app-masterworkers/s4u-app-masterworkers-fun.cpp - :language: c++ - :start-after: master-begin - :end-before: master-end + At the end of the day, the only SimGrid specific functions used in + this example are :py:func:`simgrid.Mailbox.by_name` (to retrieve or create a mailbox) and + :py:func:`simgrid.Mailbox.put` (so send something over a mailbox). Also, :py:func:`simgrid.this_actor.info` is used + as a replacement to `print` to ensure that the messages + are nicely logged along with the simulated time and actor name. -Here comes the code of the worker actors. This function expects no + .. literalinclude:: ../../examples/python/app-masterworkers/app-masterworkers.py + :language: python + :start-after: master-begin + :end-before: master-end + +Then comes the code of the worker actors. This function expects no parameter from its vector of strings. Its code is very simple: it expects messages on the mailbox that is named after its host. As long as it gets valid computation requests (whose compute_amount is positive), it computes this task and waits for the next one. -The worker retrieves its own host with -:cpp:func:`simgrid::s4u::this_actor::get_host`. The -:ref:`simgrid::s4u::this_actor ` -namespace contains many such helping functions. +.. tabs:: + + .. group-tab:: C++ + + The worker retrieves its own host with + :cpp:func:`simgrid::s4u::this_actor::get_host`. The + :ref:`simgrid::s4u::this_actor ` + namespace contains many such helping functions. + + .. literalinclude:: ../../examples/cpp/app-masterworkers/s4u-app-masterworkers-fun.cpp + :language: c++ + :start-after: worker-begin + :end-before: worker-end -.. literalinclude:: ../../examples/cpp/app-masterworkers/s4u-app-masterworkers-fun.cpp - :language: c++ - :start-after: worker-begin - :end-before: worker-end + .. group-tab:: Python + + The worker retrieves its own host with :py:func:`simgrid.this_actor.get_host`. The + :ref:`this_actor ` object contains many such helping functions. + + .. literalinclude:: ../../examples/python/app-masterworkers/app-masterworkers.py + :language: python + :start-after: worker-begin + :end-before: worker-end Starting the Simulation ....................... -And this is it. In only a few lines, we defined the algorithm of our -master/workers examples. +And this is it. In only a few lines, we defined the algorithm of our master/workers example. + +.. tabs:: + + .. group-tab:: C++ + + That being said, an algorithm alone is not enough to define a + simulation: SimGrid is a library, not a program. So you need to define + your own ``main()`` function as follows. This function is in charge of + creating a SimGrid simulation engine (on line 3), register the actor + functions to the engine (on lines 7 and 8), load the simulated platform + from its description file (on line 11), map actors onto that platform + (on line 12) and run the simulation until its completion on line 15. + + .. literalinclude:: ../../examples/cpp/app-masterworkers/s4u-app-masterworkers-fun.cpp + :language: c++ + :start-after: main-begin + :end-before: main-end + :linenos: + + .. group-tab:: Python -That being said, an algorithm alone is not enough to define a -simulation: SimGrid is a library, not a program. So you need to define -your own ``main()`` function as follows. This function is in charge of -creating a SimGrid simulation engine (on line 3), register the actor -functions to the engine (on lines 7 and 8), load the simulated platform -from its description file (on line 11), map actors onto that platform -(on line 12) and run the simulation until its completion on line 15. + That being said, an algorithm alone is not enough to define a simulation: + you need a main block to setup the simulation and its components as follows. + This code creates a SimGrid simulation engine (on line 4), registers the actor + functions to the engine (on lines 7 and 8), loads the simulated platform + from its description file (on line 11), map actors onto that platform + (on line 12) and run the simulation until its completion on line 15. -.. literalinclude:: ../../examples/cpp/app-masterworkers/s4u-app-masterworkers-fun.cpp - :language: c++ - :start-after: main-begin - :end-before: main-end - :linenos: + .. literalinclude:: ../../examples/python/app-masterworkers/app-masterworkers.py + :language: python + :start-after: main-begin + :end-before: main-end + :linenos: -As you can see, this also requires a platform file and a deployment -file. +Finally, this example requires a platform file and a deployment file. Platform File ............. @@ -197,18 +242,33 @@ example, we start 6 actors: one master and 5 workers. Execution Example ................. -This time, we have all parts: once the program is compiled, we can -execute it as follows. Note how the XBT_INFO() requests turned into -informative messages. +This time, we have all parts: once the program is compiled, we can execute it as follows. -.. "WARNING: non-whitespace stripped by dedent" is expected here as we remove the $ marker this way +.. tabs:: -.. literalinclude:: ../../examples/cpp/app-masterworkers/s4u-app-masterworkers.tesh - :language: shell - :start-after: s4u-app-masterworkers-fun - :prepend: $$$ ./masterworkers platform.xml deploy.xml - :append: $$$ - :dedent: 2 + .. group-tab:: C++ + + Note how the :c:macro:`XBT_INFO` requests turned into informative messages. + + .. "WARNING: non-whitespace stripped by dedent" is expected here as we remove the $ marker this way + + .. literalinclude:: ../../examples/cpp/app-masterworkers/s4u-app-masterworkers.tesh + :language: shell + :start-after: s4u-app-masterworkers-fun + :prepend: $$$ ./masterworkers platform.xml deploy.xml + :append: $$$ + :dedent: 2 + + .. group-tab:: Python + + Note how the :py:func:`simgrid.this_actor.info` calls turned into informative messages. + + .. literalinclude:: ../../examples/python/app-masterworkers/app-masterworkers.tesh + :language: shell + :start-after: app-masterworkers_d.xml + :prepend: $$$ python ./app-masterworkers.py platform.xml deploy.xml + :append: $$$ + :dedent: 2 Each example included in the SimGrid distribution comes with a `tesh` file that presents how to start the example once compiled, along with @@ -344,16 +404,67 @@ recompile it when you first log in: Using your Computer Natively ............................ -To take the tutorial on your machine, you first need to :ref:`install -a recent version of SimGrid `, a C++ compiler, and also -``pajeng`` to visualize the traces. You may want to install `Vite -`_ to get a first glance at the traces. -The provided code template requires CMake to compile. On Debian and -Ubuntu for example, you can get them as follows: +.. tabs:: -.. code-block:: console + .. group-tab:: C++ + + To take the tutorial on your machine, you first need to :ref:`install + a recent version of SimGrid `, a C++ compiler, and also + ``pajeng`` to visualize the traces. You may want to install `Vite + `_ to get a first glance at the traces. + The provided code template requires CMake to compile. On Debian and + Ubuntu for example, you can get them as follows: + + .. code-block:: console + + $ sudo apt install simgrid pajeng cmake g++ vite + + An initial version of the source code is provided on framagit. This + template compiles with CMake. If SimGrid is correctly installed, you + should be able to clone the `repository + `_ and recompile + everything as follows: + + .. code-block:: console + + # (exporting SimGrid_PATH is only needed if SimGrid is installed in a non-standard path) + $ export SimGrid_PATH=/where/to/simgrid + + $ git clone https://framagit.org/simgrid/simgrid-template-s4u.git + $ cd simgrid-template-s4u/ + $ cmake . + $ make + + If you struggle with the compilation, then you should double-check + your :ref:`SimGrid installation `. On need, please refer to + the :ref:`Troubleshooting your Project Setup + ` section. + + .. group-tab:: Python + + To take the tutorial on your machine, you first need to :ref:`install + a recent version of SimGrid ` and ``pajeng`` to visualize the + traces. You may want to install `Vite `_ to get a first glance at the traces. + On Debian and Ubuntu for example, you can get them as follows: + + .. code-block:: console + + $ sudo apt install simgrid pajeng vite + + An initial version of the source code is provided on framagit. + If SimGrid is correctly installed, you should be able to clone the `repository + `_ and execute it as follows: + + .. code-block:: console + + $ git clone https://framagit.org/simgrid/simgrid-template-s4u.git + $ cd simgrid-template-s4u/ + $ python master-workers.py small_platform.xml master-workers_d.xml + + If you get some errors, then you should double-check + your :ref:`SimGrid installation `. On need, please refer to + the :ref:`Troubleshooting your Project Setup ` section. - $ sudo apt install simgrid pajeng cmake g++ vite For R analysis of the produced traces, you may want to install R and the `pajengr `_ package. @@ -367,43 +478,39 @@ and the `pajengr `_ package. # install the pajengr R package $ Rscript -e "library(devtools); install_github('schnorr/pajengr');" -An initial version of the source code is provided on framagit. This -template compiles with CMake. If SimGrid is correctly installed, you -should be able to clone the `repository -`_ and recompile -everything as follows: -.. code-block:: console +Discovering the Provided Code +............................. - # (exporting SimGrid_PATH is only needed if SimGrid is installed in a non-standard path) - $ export SimGrid_PATH=/where/to/simgrid +.. tabs:: - $ git clone https://framagit.org/simgrid/simgrid-template-s4u.git - $ cd simgrid-template-s4u/ - $ cmake . - $ make + .. group-tab:: C++ -If you struggle with the compilation, then you should double-check -your :ref:`SimGrid installation `. On need, please refer to -the :ref:`Troubleshooting your Project Setup -` section. + Please compile and execute the provided simulator as follows: -Discovering the Provided Code -............................. + .. code-block:: console -Please compile and execute the provided simulator as follows: + $ make master-workers + $ ./master-workers small_platform.xml master-workers_d.xml -.. code-block:: console + .. group-tab:: Python + + Please execute the provided simulator as follows: - $ make master-workers - $ ./master-workers small_platform.xml master-workers_d.xml + .. code-block:: console + + $ python master-workers.py small_platform.xml master-workers_d.xml For a more "fancy" output, you can use simgrid-colorizer. .. code-block:: console + # Run C++ code $ ./master-workers small_platform.xml master-workers_d.xml 2>&1 | simgrid-colorizer + # Run Python code + $ python master-workers.py small_platform.xml master-workers_d.xml 2>&1 | simgrid-colorizer + If you installed SimGrid to a non-standard path, you may have to specify the full path to simgrid-colorizer on the above line, such as ``/opt/simgrid/bin/simgrid-colorizer``. If you did not install it at all, @@ -416,7 +523,12 @@ is a better way to visualize SimGrid traces (see below). .. code-block:: console + # Run C++ code $ ./master-workers small_platform.xml master-workers_d.xml --cfg=tracing:yes --cfg=tracing/actor:yes + # Run Python code + $ python master-workers.py small_platform.xml master-workers_d.xml --cfg=tracing:yes --cfg=tracing/actor:yes + + # Visualize the produced trace $ vite simgrid.trace .. image:: /tuto_s4u/img/vite-screenshot.png @@ -434,7 +546,12 @@ and use it as follows: .. code-block:: console + # Run C++ code $ ./master-workers small_platform.xml master-workers_d.xml --cfg=tracing:yes --cfg=tracing/actor:yes + # Run Python code + $ python master-workers.py small_platform.xml master-workers_d.xml --cfg=tracing:yes --cfg=tracing/actor:yes + + # Visualize the produced trace $ Rscript draw_gantt.R simgrid.trace It produces a ``Rplots.pdf`` with the following content: @@ -443,9 +560,16 @@ It produces a ``Rplots.pdf`` with the following content: :align: center -Lab 1: Simpler Deployments +Lab 1: Simpler deployments -------------------------- +.. rst-class:: learning-goals + + **Learning goals:** + + * Get your hands on the code and change the communication pattern + * Discover the Mailbox mechanism + In the provided example, adding more workers quickly becomes a pain: You need to start them (at the bottom of the file) and inform the master of its availability with an extra parameter. This is mandatory @@ -468,25 +592,41 @@ information is only written once. It thus follows the `DRY .. literalinclude:: tuto_s4u/deployment1.xml :language: xml +.. tabs:: + + .. group-tab:: C++ + + Copy your ``master-workers.cpp`` into ``master-workers-lab1.cpp`` and + add a new executable into ``CMakeLists.txt``. Then modify your worker + function so that it gets its mailbox name not from the name of its + host, but from the string passed as ``args[1]``. The master will send + messages to all workers based on their number, for example as follows: + + .. code-block:: cpp -Copy your ``master-workers.cpp`` into ``master-workers-lab1.cpp`` and -add a new executable into ``CMakeLists.txt``. Then modify your worker -function so that it gets its mailbox name not from the name of its -host, but from the string passed as ``args[1]``. The master will send -messages to all workers based on their number, for example as follows: + for (int i = 0; i < tasks_count; i++) { + std::string worker_rank = std::to_string(i % workers_count); + std::string mailbox_name = std::string("worker-") + worker_rank; + simgrid::s4u::Mailbox* mailbox = simgrid::s4u::Mailbox::by_name(mailbox_name); -.. code-block:: cpp + mailbox->put(...); - for (int i = 0; i < tasks_count; i++) { - std::string worker_rank = std::to_string(i % workers_count); - std::string mailbox_name = std::string("worker-") + worker_rank; - simgrid::s4u::Mailbox* mailbox = simgrid::s4u::Mailbox::by_name(mailbox_name); + ... + } - mailbox->put(...); + .. group-tab:: Python - ... - } + Copy your ``master-workers.py`` into ``master-workers-lab1.py`` then + modify your worker + function so that it gets its mailbox name not from the name of its + host, but from the string passed as ``args[0]``. The master will send + messages to all workers based on their number, for example as follows: + .. code-block:: cpp + + for i in range(tasks_count): + mailbox = Mailbox.by_name(str(i % worker_count)) + mailbox.put(...) Wrap up ....... @@ -500,13 +640,20 @@ timing. ``put()`` and ``get()`` are matched regardless of their initiators' location and then the real communication occurs between the involved parties. -Please refer to the full `Mailboxes' documentation -`_ for more details. +Please refer to the full `Mailboxes' documentation `_ +for more details. Lab 2: Using the Whole Platform ------------------------------- +.. rst-class:: learning-goals + + **Learning goals:** + + * Interact with the platform (get the list of all hosts) + * Create actors directly from your program instead of the deployment file + It is now easier to add a new worker, but you still have to do it manually. It would be much easier if the master could start the workers on its own, one per available host in the platform. The new @@ -519,29 +666,52 @@ deployment file should be as simple as: Creating the workers from the master .................................... -For that, the master needs to retrieve the list of hosts declared in -the platform with :cpp:func:`simgrid::s4u::Engine::get_all_hosts`. -Then, the master should start the worker actors with -:cpp:func:`simgrid::s4u::Actor::create`. +.. tabs:: + + .. group-tab:: C++ + + For that, the master needs to retrieve the list of hosts declared in + the platform with :cpp:func:`simgrid::s4u::Engine::get_all_hosts`. + Then, the master should start the worker actors with + :cpp:func:`simgrid::s4u::Actor::create`. + + ``Actor::create(name, host, func, params...)`` is a very flexible + function. Its third parameter is the function that the actor should + execute. This function can take any kind of parameter, provided that + you pass similar parameters to ``Actor::create()``. For example, you + could have something like this: -``Actor::create(name, host, func, params...)`` is a very flexible -function. Its third parameter is the function that the actor should -execute. This function can take any kind of parameter, provided that -you pass similar parameters to ``Actor::create()``. For example, you -could have something like this: + .. code-block:: cpp -.. code-block:: cpp + void my_actor(int param1, double param2, std::string param3) { + ... + } + int main(int argc, char argv**) { + ... + simgrid::s4u::ActorPtr actor; + actor = simgrid::s4u::Actor::create("name", simgrid::s4u::Host::by_name("the_host"), + &my_actor, 42, 3.14, "thevalue"); + ... + } - void my_actor(int param1, double param2, std::string param3) { - ... - } - int main(int argc, char argv**) { - ... - simgrid::s4u::ActorPtr actor; - actor = simgrid::s4u::Actor::create("name", simgrid::s4u::Host::by_name("the_host"), - &my_actor, 42, 3.14, "thevalue"); - ... - } + .. group-tab:: Python + + For that, the master needs to retrieve the list of hosts declared in + the platform with :py:func:`simgrid.Engine.get_all_hosts`. + Then, the master should start the worker actors with + :py:func:`simgrid.Actor.create`. + + ``Actor.create(name, host, func, params...)`` is a very flexible + function. Its third parameter is the function that the actor should + execute. This function can take any kind of parameter, provided that + you pass similar parameters to ``Actor?create()``. For example, you + could have something like this: + + .. code-block:: cpp + + def my_actor(param1, param2, param3): + ... + actor = simgrid.Actor.create("name", the_host, my_actor, 42, 3.14, "thevalue") Master-Workers Communication @@ -559,19 +729,10 @@ more than one master. One possibility for that is to use the actor ID (aid) of each worker as a mailbox name. The master can retrieve the aid of the newly -created actor with ``actor->get_pid()`` while the actor itself can -retrieve its own aid with ``simgrid::s4u::this_actor::get_pid()``. +created actor with ``get_pid()`` while the actor itself can +retrieve its own aid with ``this_actor::get_pid()``. The retrieved value is an ``aid_t``, which is an alias for ``long``. -Instead of having one mailbox per worker, you could also reorganize -completely your application to have only one mailbox per master. All -the workers of a given master would pull their work from the same -mailbox, which should be passed as a parameter to the workers. -This requires fewer mailboxes but prevents the master from taking -any scheduling decision. It depends on how you want to organize -your application and what you want to study with your simulator. In -this tutorial, that's probably not a good idea. - Wrap up ....... @@ -593,6 +754,13 @@ separation of concerns between things of different nature. Lab 3: Fixed Experiment Duration -------------------------------- +.. rst-class:: learning-goals + + **Learning goals:** + + * Forcefully kill actors, and stop the simulation at a given point of time + * Control the logging verbosity + In the current version, the number of tasks is defined through the worker arguments. Hence, tasks are created at the very beginning of the simulation. Instead, have the master dispatching tasks for a @@ -602,10 +770,12 @@ instead of beforehand. Of course, usual time functions like ``gettimeofday`` will give you the time on your real machine, which is pretty useless in the simulation. Instead, retrieve the time in the simulated world with -:cpp:func:`simgrid::s4u::Engine::get_clock`. +:cpp:func:`simgrid::s4u::Engine::get_clock` (C++) or +:py:func:`simgrid.Engine.get_clock()`) (Python). You can still stop your workers with a specific task as previously, -or you may kill them forcefully with :cpp:func:`simgrid::s4u::Actor::kill`. +or you may kill them forcefully with :cpp:func:`simgrid::s4u::Actor::kill` (C++) +:py:func:`simgrid.Actor.kill` (C++). Anyway, the new deployment `deployment3.xml` file should thus look like this: @@ -617,9 +787,10 @@ Controlling the message verbosity ................................. Not all messages are equally informative, so you probably want to -change some of the ``XBT_INFO`` into ``XBT_DEBUG`` so that they are -hidden by default. For example, you may want to use ``XBT_INFO`` once -every 100 tasks and ``XBT_DEBUG`` when sending all the other tasks. Or +change some of the *info* messages (C: :c:macro:`XBT_INFO`; Python: :py:func:`this_actor.info`) +into *debug* messages`(C: c:macro:`XBT_DEBUG`; Python: :py:func:`this_actor.debug`) so that they are +hidden by default. For example, you may want to use an *info* message once +every 100 tasks and *debug* when sending all the other tasks. Or you could show only the total number of tasks processed by default. You can still see the debug messages as follows: @@ -627,13 +798,85 @@ default. You can still see the debug messages as follows: $ ./master-workers-lab3 small_platform.xml deployment3.xml --log=s4u_app_masterworker.thres:debug +Lab 4: What-if analysis +----------------------- + +.. rst-class:: learning-goals + + **Learning goals:** + + * Change the platform characteristics during the simulation. + * Explore other communication patterns. + +.. todo:: + + Some of the required functions are not implemented in Python yet. You can detect that if the method name is not a link to the documentation. + +Computational speed +................... + +Attach a profile to your hosts, so that their computational speed automatically vary over time, modeling an external load on these machines. +This can be done with :cpp:func:`simgrid::s4u::Host::set_speed_profile` (C++) or :py:func:`simgrid.Host.set_speed_profile` (python). + +Make it so that one of the hosts get really really slow, and observe how your whole application performance decreases. +This is because one slow host slows down the whole process. Instead of a round-robin dispatch push, +you should completely reorganize your application in a First-Come First-Served manner (FCFS). +Actors should pull a task whenever they are ready, so that fast actors can overpass slow ones in the queue. + +There is two ways to implement that: either the workers request a task to the master by sending their name to a specific mailbox, +or the master directly pushes the tasks to a centralized mailbox from which the workers pull their work. The first approach is closer +to what would happen with communications based on BSD sockets while the second is closer to message queues. You could also decide to +model your socket application in the second manner if you want to neglect these details and keep your simulator simple. It's your decision. + +Changing the communication schema can be a bit hairy, but once it works, you will see that such as simple FCFS schema allows one to greatly +increase the amount of tasks handled over time here. Things may be different with another platform file. + +Communication speed +................... + +Let's now modify the communication speed between hosts. -Lab 4: Competing Applications +Retrieve a link from its name with :cpp:func:`simgrid::s4u::Link::by_name()` (C++) or :py:func:`simgrid.Link.by_name()` (python). + +Retrieve all links in the platform with :cpp:func:`simgrid::s4u::Engine::get_all_links()` (C++) or :py:func:`simgrid.Engine.get_all_links()` (python). + +Retrieve the list of links from one host to another with :cpp:func:`simgrid::s4u::Host::route_to` (C++) or :cpp:func:`simgrid.Host.route_to` (python). + +Modify the bandwidth of a given link with :cpp:func:`simgrid::s4u::Link::set_bandwidth` (C++) or :py:func:`simgrid.Link.set_bandwidth` (python). +You can even have the bandwidth automatically vary over time with :cpp:func:`simgrid::s4u::Link::set_bandwidth_profile` (C++) or :py:func:`simgrid.Link.set_bandwidth_profile` (python). + +Once implemented, you will notice that slow communications may still result in situations +where one worker only works at a given point of time. To overcome that, your master needs +to send data to several workers in parallel, using +:cpp:func:`simgrid::s4u::Mailbox::put_async` (C++) or :py:func:`simgrid.Mailbox.put_async` (Python) +to start several communications in parallel, and +:cpp:func:`simgrid::s4u::Comm::wait_any` (C++) or and :py:func:`simgrid.Comm.wait_any` (Python) +to react to the completion of one of these communications. Actually, since this code somewhat tricky +to write, it's provided as :ref:`an example ` in the distribution (search for +``wait_any`` in that page). + +Dealing with failures +..................... + +Turn a given link off with :cpp:func:`simgrid::s4u::Link::turn_off` (C++) or :py:func:`simgrid.Link.turn_off` (python). +You can even implement churn where a link automatically turn off and on again over time with :cpp:func:`simgrid::s4u::Link::set_state_profile` (C++) or :py:func:`simgrid.Link.set_state_profile` (python). + +If a link fails while you try to use it, ``wait()`` will raise a ``NetworkFailureException`` that you need to catch. +Again, there is a nice example demoing this feature, :ref:`under platform-failures `. + +Lab 5: Competing Applications ----------------------------- -It is now time to start several applications at once, with the following ``deployment4.xml`` file. +.. rst-class:: learning-goals + + **Learning goals:** + + * Advanced vizualization through tracing categories + -.. literalinclude:: tuto_s4u/deployment4.xml +It is now time to start several applications at once, with the following ``deployment5.xml`` file. + +.. literalinclude:: tuto_s4u/deployment5.xml :language: xml Things happen when you do so, but it remains utterly difficult to @@ -644,21 +887,41 @@ will categorize the tasks. Instead of starting the execution in one function call only with ``this_actor::execute(cost)``, you need to -create the execution activity, set its tracing category, and then start -it and wait for its completion, as follows: +create the execution activity, set its tracing category, start it +and wait for its completion, as follows: + +.. tabs:: + + .. group-tab:: C++ -.. code-block:: cpp + .. code-block:: cpp - simgrid::s4u::ExecPtr exec = simgrid::s4u::this_actor::exec_init(compute_cost); - exec->set_tracing_category(category); - // exec->start() is optional here as wait() starts the activity on need - exec->wait(); + simgrid::s4u::ExecPtr exec = simgrid::s4u::this_actor::exec_init(compute_cost); + exec->set_tracing_category(category); + // exec->start() is optional here as wait() starts the activity on need + exec->wait(); -You can shorten this code as follows: + You can shorten this code as follows: -.. code-block:: cpp + .. code-block:: cpp + + simgrid::s4u::this_actor::exec_init(compute_cost)->set_tracing_category(category)->wait(); + + .. group-tab:: Python + + .. code-block:: python + + exec = simgrid:.this_actor.exec_init(compute_cost) + exec.set_tracing_category(category) + // exec.start() is optional here as wait() starts the activity on need + exec->wait() + + You can shorten this code as follows: + + .. code-block:: python + + simgrid.this_actor.exec_init(compute_cost).set_tracing_category(category).wait() - simgrid::s4u::this_actor::exec_init(compute_cost)->set_tracing_category(category)->wait(); Visualizing the result ....................... @@ -672,28 +935,8 @@ as explained on `this page Include here the minimal setting to view something in R. - -Lab 5: Better Scheduling ------------------------- - -You don't need a very advanced visualization solution to notice that -round-robin is completely suboptimal: most of the workers keep waiting -for more work. We will move to a First-Come First-Served mechanism -instead. - -For that, your workers should explicitly request work with a -message sent to a channel that is specific to their master. The name -of that private channel can be the one used to categorize the -executions, as it is already specific to each master. - -The master should serve in a round-robin manner the requests it -receives until the time is up. Changing the communication schema can -be a bit hairy, but once it works, you will see that such as simple -FCFS schema allows one to double the number of tasks handled over time -here. Things may be different with another platform file. - Further Improvements -.................... +-------------------- From this, many things can easily be added. For example, you could: @@ -707,8 +950,7 @@ From this, many things can easily be added. For example, you could: What is the largest number of tasks requiring 50e6 flops and 1e5 bytes that you manage to distribute and process in one hour on ``g5k.xml`` ? -- Optimize not only for the number of tasks handled but also for the - total energy dissipated. +- Optimize not only for the number of tasks handled but also for the total energy dissipated. - And so on. If you come up with a nice extension, please share it with us so that we can extend this tutorial. @@ -722,6 +964,10 @@ tutorials, or you could head up to the :ref:`example section ` to Things to improve in the future: - - Propose equivalent exercises and skeleton in java (and Python once we have a python binding). + - Propose equivalent exercises and skeleton in Java once we fix the Java binding. + +.. |br| raw:: html + +
.. LocalWords: SimGrid diff --git a/docs/source/_static/css/custom.css b/docs/source/_static/css/custom.css index e9b8837c07..d438d656f9 100644 --- a/docs/source/_static/css/custom.css +++ b/docs/source/_static/css/custom.css @@ -4,4 +4,19 @@ object#TOC { div[itemprop="articleBody"] { word-wrap: break-word; -} \ No newline at end of file +} +ul.learning-goals { + line-height: 80%; + margin-left: 2em; + padding: 0.2em; + margin-bottom: 1em; +} +ul.learning-goals li { + list-style: disc; +} +ul.learning-goals li p { + margin-bottom: 0; +} +p.learning-goals { + margin-bottom: 0; +} diff --git a/docs/source/app_s4u.rst b/docs/source/app_s4u.rst index e2c61ede7c..8cdf561e5d 100644 --- a/docs/source/app_s4u.rst +++ b/docs/source/app_s4u.rst @@ -1512,6 +1512,7 @@ Retrieving links .. group-tab:: Python + .. automethod:: simgrid.Link.by_name .. autoattribute:: simgrid.Link.name .. group-tab:: C diff --git a/docs/source/tuto_s4u/deployment4.xml b/docs/source/tuto_s4u/deployment5.xml similarity index 100% rename from docs/source/tuto_s4u/deployment4.xml rename to docs/source/tuto_s4u/deployment5.xml diff --git a/docs/source/tuto_s4u/img/intro.svg b/docs/source/tuto_s4u/img/intro.svg index 331071e575..9f5989f61f 100644 --- a/docs/source/tuto_s4u/img/intro.svg +++ b/docs/source/tuto_s4u/img/intro.svg @@ -2,20 +2,20 @@ + inkscape:version="1.1.1 (3bf5ae0d25, 2021-09-20)" + sodipodi:docname="intro.svg" + xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" + xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" + xmlns="http://www.w3.org/2000/svg" + xmlns:svg="http://www.w3.org/2000/svg" + xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns:cc="http://creativecommons.org/ns#" + xmlns:dc="http://purl.org/dc/elements/1.1/"> @@ -44,7 +44,7 @@ @@ -60,7 +60,7 @@ inkscape:connector-curvature="0" id="path1496-6" d="M 0,0 5,-5 -12.5,0 5,5 Z" - style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1" + style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1pt;stroke-opacity:1" transform="matrix(-0.8,0,0,-0.8,-10,0)" /> @@ -117,20 +117,21 @@ inkscape:pageopacity="0.0" inkscape:pageshadow="2" inkscape:zoom="2.8" - inkscape:cx="27.095926" - inkscape:cy="74.973851" + inkscape:cx="159.82143" + inkscape:cy="72.142857" inkscape:document-units="mm" inkscape:current-layer="layer1" showgrid="false" inkscape:window-width="1920" - inkscape:window-height="1019" + inkscape:window-height="1023" inkscape:window-x="0" - inkscape:window-y="32" + inkscape:window-y="33" inkscape:window-maximized="1" fit-margin-top="0" fit-margin-left="0" fit-margin-right="0" - fit-margin-bottom="0" /> + fit-margin-bottom="0" + inkscape:pagecheckerboard="0" /> @@ -139,7 +140,6 @@ image/svg+xml - @@ -147,10 +147,10 @@ inkscape:label="Calque 1" inkscape:groupmode="layer" id="layer1" - transform="translate(-29.950586,-101.20699)"> + transform="translate(-29.920019,-101.20699)"> Master + style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-family:sans-serif;-inkscape-font-specification:sans-serif;stroke-width:0.16372px">Master Worker + style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-family:sans-serif;-inkscape-font-specification:sans-serif;stroke-width:0.264583px">Worker Worker + style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-family:sans-serif;-inkscape-font-specification:sans-serif;stroke-width:0.264583px">Worker Worker + style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-family:sans-serif;-inkscape-font-specification:sans-serif;stroke-width:0.264583px">Worker Worker + style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-family:sans-serif;-inkscape-font-specification:sans-serif;stroke-width:0.264583px">Worker Worker + style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-family:sans-serif;-inkscape-font-specification:sans-serif;stroke-width:0.264583px">Worker The master dispatchesThe master dispatchesthe tasks to the workers T + diff --git a/docs/source/tuto_s4u/master-workers-lab1.py b/docs/source/tuto_s4u/master-workers-lab1.py new file mode 100644 index 0000000000..98b71d3210 --- /dev/null +++ b/docs/source/tuto_s4u/master-workers-lab1.py @@ -0,0 +1,74 @@ +# Copyright (c) 2010-2022. The SimGrid Team. All rights reserved. + +# This program is free software; you can redistribute it and/or modify it +# under the terms of the license (GNU LGPL) which comes with this package. + +# ################################################################################## +# Take this tutorial online: https://simgrid.org/doc/latest/Tutorial_Algorithms.html +# ################################################################################## + +from simgrid import Actor, Engine, Host, Mailbox, this_actor +import sys + +# master-begin +def master(*args): + if len(args) == 2: + raise AssertionError( + f"Actor master requires 4 parameters, but only {len(args)}") + worker_count = int(args[0]) + tasks_count = int(args[1]) + compute_cost = int(args[2]) + communicate_cost = int(args[3]) + this_actor.info(f"Got {worker_count} workers and {tasks_count} tasks to process") + + for i in range(tasks_count): # For each task to be executed: + # - Select a worker in a round-robin way + mailbox = Mailbox.by_name(str(i % worker_count)) + + # - Send the computation amount to the worker + if (tasks_count < 10000 or (tasks_count < 100000 and i % 10000 == 0) or i % 100000 == 0): + this_actor.info(f"Sending task {i} of {tasks_count} to mailbox '{mailbox.name}'") + mailbox.put(compute_cost, communicate_cost) + + this_actor.info("All tasks have been dispatched. Request all workers to stop.") + for i in range (worker_count): + # The workers stop when receiving a negative compute_cost + mailbox = Mailbox.by_name(str(i)) + mailbox.put(-1, 0) +# master-end + +# worker-begin +def worker(*args): + assert len(args) == 1, "The worker expects one argument" + + mailbox = Mailbox.by_name(args[0]) + done = False + while not done: + compute_cost = mailbox.get() + if compute_cost > 0: # If compute_cost is valid, execute a computation of that cost + this_actor.execute(compute_cost) + else: # Stop when receiving an invalid compute_cost + done = True + + this_actor.info("Exiting now.") +# worker-end + +# main-begin +if __name__ == '__main__': + assert len(sys.argv) > 2, f"Usage: python app-masterworkers.py platform_file deployment_file" + + e = Engine(sys.argv) + + # Register the classes representing the actors + e.register_actor("master", master) + e.register_actor("worker", worker) + + # Load the platform description and then deploy the application + e.load_platform(sys.argv[1]) + e.load_deployment(sys.argv[2]) + + # Run the simulation + e.run() + + this_actor.info("Simulation is over") +# main-end \ No newline at end of file diff --git a/docs/source/tuto_s4u/master-workers-lab2.py b/docs/source/tuto_s4u/master-workers-lab2.py new file mode 100644 index 0000000000..6ed833ba55 --- /dev/null +++ b/docs/source/tuto_s4u/master-workers-lab2.py @@ -0,0 +1,79 @@ +# Copyright (c) 2010-2022. The SimGrid Team. All rights reserved. + +# This program is free software; you can redistribute it and/or modify it +# under the terms of the license (GNU LGPL) which comes with this package. + +# ################################################################################## +# Take this tutorial online: https://simgrid.org/doc/latest/Tutorial_Algorithms.html +# ################################################################################## + +from simgrid import Actor, Engine, Host, Mailbox, this_actor +import sys + +# master-begin +def master(*args): + if len(args) == 2: + raise AssertionError( + f"Actor master requires 4 parameters, but only {len(args)}") + tasks_count = int(args[0]) + compute_cost = int(args[1]) + communicate_cost = int(args[2]) + + this_actor.info(f"Got {tasks_count} tasks to process") + + hosts = Engine.instance().get_all_hosts() + + for h in hosts: + Actor.create(f'Worker-{h.name}', h, worker) + + for i in range(tasks_count): # For each task to be executed: + # - Select a worker in a round-robin way + mailbox = Mailbox.by_name(f'Worker-{hosts[i%len(hosts)].name}') + + # - Send the computation amount to the worker + if (tasks_count < 10000 or (tasks_count < 100000 and i % 10000 == 0) or i % 100000 == 0): + this_actor.info(f"Sending task {i} of {tasks_count} to mailbox '{mailbox.name}'") + mailbox.put(compute_cost, communicate_cost) + + this_actor.info("All tasks have been dispatched. Request all workers to stop.") + for h in hosts: + # The workers stop when receiving a negative compute_cost + mailbox = Mailbox.by_name(f'Worker-{h.name}') + mailbox.put(-1, 0) +# master-end + +# worker-begin +def worker(*args): + assert len(args) == 0, "The worker expects no argument" + + mailbox = Mailbox.by_name(f'Worker-{this_actor.get_host().name}') + done = False + while not done: + compute_cost = mailbox.get() + if compute_cost > 0: # If compute_cost is valid, execute a computation of that cost + this_actor.execute(compute_cost) + else: # Stop when receiving an invalid compute_cost + done = True + + this_actor.info("Exiting now.") +# worker-end + +# main-begin +if __name__ == '__main__': + assert len(sys.argv) > 2, f"Usage: python app-masterworkers.py platform_file deployment_file" + + e = Engine(sys.argv) + + # Register the classes representing the actors + e.register_actor("master", master) + e.register_actor("worker", worker) + + # Load the platform description and then deploy the application + e.load_platform(sys.argv[1]) + e.load_deployment(sys.argv[2]) + + # Run the simulation + e.run() + + this_actor.info("Simulation is over") +# main-end \ No newline at end of file diff --git a/examples/python/app-masterworkers/app-masterworkers.py b/examples/python/app-masterworkers/app-masterworkers.py index 7f93acad36..200d8186fa 100644 --- a/examples/python/app-masterworkers/app-masterworkers.py +++ b/examples/python/app-masterworkers/app-masterworkers.py @@ -10,6 +10,7 @@ from simgrid import Actor, Engine, Host, Mailbox, this_actor import sys +# master-begin def master(*args): if len(args) < 2: raise AssertionError( @@ -36,7 +37,9 @@ def master(*args): # The workers stop when receiving a negative compute_cost mailbox = workers[i] mailbox.put(-1, 0) +# master-end +# worker-begin def worker(*args): assert len(args) == 0, "The worker expects to not get any argument" @@ -50,7 +53,9 @@ def worker(*args): done = True this_actor.info("Exiting now.") +# worker-end +# main-begin if __name__ == '__main__': assert len(sys.argv) > 2, f"Usage: python app-masterworkers.py platform_file deployment_file" @@ -68,3 +73,4 @@ if __name__ == '__main__': e.run() this_actor.info("Simulation is over") +# main-end \ No newline at end of file diff --git a/src/bindings/python/simgrid_python.cpp b/src/bindings/python/simgrid_python.cpp index 3053be097f..2aa3e27fef 100644 --- a/src/bindings/python/simgrid_python.cpp +++ b/src/bindings/python/simgrid_python.cpp @@ -136,6 +136,8 @@ PYBIND11_MODULE(simgrid, m) })) .def_static("get_clock", &Engine::get_clock, "The simulation time, ie the amount of simulated seconds since the simulation start.") + .def_static( + "instance", []() { return Engine::get_instance(); }, "Retrieve the simulation engine") .def("get_all_hosts", &Engine::get_all_hosts, "Returns the list of all hosts found in the platform") .def("load_platform", &Engine::load_platform, "Load a platform file describing the environment") .def("load_deployment", &Engine::load_deployment, "Load a deployment file and launch the actors that it contains") @@ -330,6 +332,7 @@ PYBIND11_MODULE(simgrid, m) py::call_guard(), "Set concurrency limit for this link") .def("set_host_wifi_rate", &simgrid::s4u::Link::set_host_wifi_rate, py::call_guard(), "Set level of communication speed of given host on this Wi-Fi link") + .def("by_name", &simgrid::s4u::Link::by_name, "Retrieves a Link from its name, or dies") .def("seal", &simgrid::s4u::Link::seal, py::call_guard(), "Seal this link") .def_property_readonly( "name", diff --git a/tools/cmake/DefinePackages.cmake b/tools/cmake/DefinePackages.cmake index bdfa923fe3..b48150fcc9 100644 --- a/tools/cmake/DefinePackages.cmake +++ b/tools/cmake/DefinePackages.cmake @@ -898,7 +898,7 @@ set(DOC_SOURCES docs/source/tuto_s4u/deployment1.xml docs/source/tuto_s4u/deployment2.xml docs/source/tuto_s4u/deployment3.xml - docs/source/tuto_s4u/deployment4.xml + docs/source/tuto_s4u/deployment5.xml docs/source/tuto_s4u/draw_gantt.R docs/source/tuto_s4u/img/intro.svg docs/source/tuto_s4u/img/question.svg @@ -906,7 +906,9 @@ set(DOC_SOURCES docs/source/tuto_s4u/img/Rscript-screenshot.png docs/source/tuto_s4u/img/vite-screenshot.png docs/source/tuto_s4u/master-workers-lab1.cpp + docs/source/tuto_s4u/master-workers-lab1.py docs/source/tuto_s4u/master-workers-lab2.cpp + docs/source/tuto_s4u/master-workers-lab2.py docs/source/tuto_s4u/master-workers-lab3.cpp docs/source/tuto_s4u/master-workers-lab4.cpp -- 2.20.1