From: Martin Quinson Date: Sat, 11 Aug 2018 20:49:38 +0000 (+0200) Subject: Merge branches 'auto_restart' and 'auto_restart' of framagit.org:simgrid/simgrid X-Git-Tag: v3_21~261^2~1 X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/commitdiff_plain/937f2eb5b429c3c03fc989a25fbc26fabd0cf529?hp=4ccbacb51eb49323847a906c3e79ea838d76e2a7 Merge branches 'auto_restart' and 'auto_restart' of framagit.org:simgrid/simgrid Also, don't run doxygen manually on framagit/gitlab-ci --- diff --git a/.gitignore b/.gitignore index 4d94ed5308..5f2d918713 100644 --- a/.gitignore +++ b/.gitignore @@ -199,9 +199,11 @@ examples/simdag/scheduling/sd_scheduling examples/simdag/test/sd_test examples/simdag/throttling/sd_throttling examples/simdag/typed_tasks/sd_typed_tasks +examples/smpi/ampi/smpi_ampi examples/smpi/energy/f77/sef examples/smpi/energy/f90/sef90 examples/smpi/energy/smpi_energy +examples/smpi/load_balancer_replay/load_balancer_replay examples/smpi/mc/smpi_bugged1 examples/smpi/mc/smpi_bugged1_liveness examples/smpi/mc/smpi_bugged2 @@ -309,6 +311,7 @@ teshsuite/smpi/coll-gather/coll-gather teshsuite/smpi/coll-reduce/coll-reduce teshsuite/smpi/coll-reduce-scatter/coll-reduce-scatter teshsuite/smpi/coll-scatter/coll-scatter +teshsuite/smpi/fort_args/fort_args teshsuite/smpi/isp/umpire/abort teshsuite/smpi/isp/umpire/abort1 teshsuite/smpi/isp/umpire/abort2 diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 0085c43ae7..3ef1530238 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -3,13 +3,8 @@ image: debian:testing-slim .build: &build script: - apt-get update - - apt-get install -y python3-sphinx doxygen python3-breathe python3-sphinx-rtd-theme pip3 - - apt-get install -y cmake doxygen libboost-all-dev libboost-dev - - pip3 install --no-deps exhale - - cmake -Denable_documentation=ON . - - make documentation - - mkdir docs/doxyoutput - - mv doc/xml docs/doxyoutput + - apt-get install -y python3-pip doxygen libboost-all-dev libboost-dev fig2dev + - pip3 install --requirement docs/requirements.txt - cd docs - sphinx-build -M html source/ build/ - mv build/html ../public diff --git a/.mailmap b/.mailmap index 61b9df5361..3e2afe02b9 100644 --- a/.mailmap +++ b/.mailmap @@ -35,6 +35,7 @@ Augustin Degomme Augustin Degomme Augustin Degomme Augustin Degomme +Augustin Degomme Augustin Degomme Augustin Degomme Augustin Degomme diff --git a/ChangeLog b/ChangeLog index f556726820..39946e8de7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,9 +1,18 @@ SimGrid (3.21) NOT RELEASED (Release Target: September 23. 2018, 1:54 UTC) +S4U new features: + - s4u::Io: IOs go asynchronous as activities. This comes with new methods in the + s4u::Storage class: + - io_init(sg_size_t, s4u::Io::OpType) to create a READ or WRITE asynchronous + IO operations that can be started, waited for, or canceled as a regular + activity. + - read_async(sg_size_t) and write_async(sg_size_t) which are wrappers on + io_init() + start() + Tracing: - Rename 'power' and 'power_used' variables into 'speed' and 'speed_used' - New host variable: 'core_count' - + XBT: - Remove xbt_os_thread_specific features - Remove portability wrapper to condition variables diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in index ca97d87a7d..b5f7607ac5 100644 --- a/doc/Doxyfile.in +++ b/doc/Doxyfile.in @@ -686,43 +686,11 @@ INPUT = @CMAKE_HOME_DIRECTORY@/doc/doxygen/index.doc \ @CMAKE_HOME_DIRECTORY@/doc/doxygen/module-trace.doc \ @CMAKE_BINARY_DIR@/doc/doxygen/logcategories.doc \ @CMAKE_HOME_DIRECTORY@/include/ \ - @CMAKE_HOME_DIRECTORY@/include/simgrid/ \ - @CMAKE_HOME_DIRECTORY@/include/simgrid/jedule/ \ - @CMAKE_HOME_DIRECTORY@/include/simgrid/kernel/ \ - @CMAKE_HOME_DIRECTORY@/include/simgrid/kernel/resource/ \ - @CMAKE_HOME_DIRECTORY@/include/simgrid/kernel/routing/ \ - @CMAKE_HOME_DIRECTORY@/include/simgrid/plugins/ \ - @CMAKE_HOME_DIRECTORY@/include/simgrid/s4u/ \ - @CMAKE_HOME_DIRECTORY@/include/simgrid/simix/ \ - @CMAKE_HOME_DIRECTORY@/include/simgrid/smpi/ \ - @CMAKE_HOME_DIRECTORY@/include/xbt \ - @CMAKE_HOME_DIRECTORY@/src/include/simgrid/ \ - @CMAKE_HOME_DIRECTORY@/src/include/surf \ - @CMAKE_HOME_DIRECTORY@/src/include/xbt \ - @CMAKE_HOME_DIRECTORY@/src/instr/ \ - @CMAKE_HOME_DIRECTORY@/src/instr/jedule/ \ - @CMAKE_HOME_DIRECTORY@/src/kernel/ \ - @CMAKE_HOME_DIRECTORY@/src/kernel/activity/ \ - @CMAKE_HOME_DIRECTORY@/src/kernel/context/ \ - @CMAKE_HOME_DIRECTORY@/src/kernel/lmm/ \ - @CMAKE_HOME_DIRECTORY@/src/kernel/resource/ \ - @CMAKE_HOME_DIRECTORY@/src/kernel/routing/ \ - @CMAKE_HOME_DIRECTORY@/src/msg/ \ - @CMAKE_HOME_DIRECTORY@/src/plugins/ \ - @CMAKE_HOME_DIRECTORY@/src/plugins/file_system/ \ - @CMAKE_HOME_DIRECTORY@/src/plugins/vm/ \ - @CMAKE_HOME_DIRECTORY@/src/s4u/ \ - @CMAKE_HOME_DIRECTORY@/src/simdag/ \ - @CMAKE_HOME_DIRECTORY@/src/simgrid/ \ - @CMAKE_HOME_DIRECTORY@/src/simix/ \ - @CMAKE_HOME_DIRECTORY@/src/smpi/ \ - @CMAKE_HOME_DIRECTORY@/src/surf/ \ - @CMAKE_HOME_DIRECTORY@/src/xbt/ \ - @CMAKE_BINARY_DIR@/include \ - @CMAKE_BINARY_DIR@/src \ + @CMAKE_HOME_DIRECTORY@/src/plugins/ \ @CMAKE_HOME_DIRECTORY@/examples/msg/README.doc \ @CMAKE_HOME_DIRECTORY@/examples/s4u/README.doc + # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is # also the default input encoding. Doxygen uses libiconv (or the iconv built @@ -753,7 +721,7 @@ RECURSIVE = YES # Note that relative paths are relative to the directory from which doxygen is # run. -EXCLUDE = +EXCLUDE = # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or # directories that are symbolic links (a Unix file system feature) are excluded @@ -767,7 +735,18 @@ EXCLUDE_SYMLINKS = YES # against the file with absolute path, so to exclude all test directories # for example use the pattern */test/* -EXCLUDE_PATTERNS = +EXCLUDE_PATTERNS = */include/simgrid/forward.h +EXCLUDE_PATTERNS += */include/smpi/* + +EXCLUDE_PATTERNS += @CMAKE_HOME_DIRECTORY@/src/surf/xml/simgrid_dtd.* +EXCLUDE_PATTERNS += @CMAKE_HOME_DIRECTORY@/src/simdag/dax_dtd.* +EXCLUDE_PATTERNS += @CMAKE_HOME_DIRECTORY@/src/xbt/automaton/parserPromela.* +EXCLUDE_PATTERNS += @CMAKE_HOME_DIRECTORY@/src/bindings/java/*.cpp @CMAKE_HOME_DIRECTORY@/src/bindings/java/*.h +EXCLUDE_PATTERNS += @CMAKE_HOME_DIRECTORY@/src/simix/popping_accessors.hpp \ + @CMAKE_HOME_DIRECTORY@/src/simix/popping_bodies.cpp \ + @CMAKE_HOME_DIRECTORY@/src/simix/popping_enum.h \ + @CMAKE_HOME_DIRECTORY@/src/simix/popping_generated.cpp + # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names # (namespaces, classes, functions, etc.) that should be excluded from the @@ -775,7 +754,10 @@ EXCLUDE_PATTERNS = # wildcard * is used, a substring. Examples: ANamespace, AClass, # AClass::ANamespace, ANamespace::*Test -EXCLUDE_SYMBOLS = +EXCLUDE_SYMBOLS = s_xbt_dict_cursor s_xbt_dictelm xbt_dynar_s +EXCLUDE_SYMBOLS += xbt_edge xbt_graph xbt_node +EXCLUDE_SYMBOLS += e_xbt_parmap_mode_t +EXCLUDE_SYMBOLS += MPI_* # The EXAMPLE_PATH tag can be used to specify one or more files or # directories that contain example code fragments that are included (see @@ -1470,11 +1452,14 @@ INCLUDE_FILE_PATTERNS = # instead of the = operator. PREDEFINED = __cplusplus \ + DOXYGEN \ XBT_PUBLIC= \ XBT_EXPORT_NO_IMPORT= \ XBT_IMPORT_NO_EXPORT= \ XBT_PUBLIC_DATA=extern \ + XBT_PUBLIC= \ XBT_INLINE= \ + XBT_ALWAYS_INLINE= \ XBT_PRIVATE= \ XBT_ATTRIB_NORETURN= \ XBT_ATTRIB_UNUSED= \ @@ -1489,7 +1474,8 @@ PREDEFINED = __cplusplus \ # overrules the definition found in the source code. EXPAND_AS_DEFINED = COLL_APPLY COLL_GATHERS COLL_ALLGATHERS COLL_ALLGATHERVS COLL_ALLREDUCES COLL_ALLTOALLS \ - COLL_ALLTOALLVS COLL_BCASTS COLL_REDUCES COLL_REDUCE_SCATTERS COLL_SCATTERS COLL_BARRIERS + COLL_ALLTOALLVS COLL_BCASTS COLL_REDUCES COLL_REDUCE_SCATTERS COLL_SCATTERS COLL_BARRIERS \ + MPI_CALL # If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then # doxygen's preprocessor will remove all references to function-like macros @@ -1549,7 +1535,7 @@ PERL_PATH = /usr/bin/perl # this option also works with HAVE_DOT disabled, but it is recommended to # install and use dot, since it yields more powerful graphs. -CLASS_DIAGRAMS = YES +CLASS_DIAGRAMS = NO # You can define message sequence charts within doxygen comments using the \msc # command. Doxygen will then run the mscgen tool (see @@ -1668,7 +1654,7 @@ CALL_GRAPH = NO # the time of a run. So in most cases it will be better to enable caller # graphs for selected functions only using the \callergraph command. -CALLER_GRAPH = YES +CALLER_GRAPH = NO # If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen # will generate a graphical hierarchy of all classes instead of a textual one. @@ -1688,7 +1674,7 @@ DIRECTORY_GRAPH = YES # HTML_FILE_EXTENSION to xhtml in order to make the SVG files # visible in IE 9+ (other browsers do not have this requirement). -DOT_IMAGE_FORMAT = png +DOT_IMAGE_FORMAT = svg # If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to # enable generation of interactive SVG images that allow zooming and panning. diff --git a/doc/doxygen/examples.doc b/doc/doxygen/examples.doc deleted file mode 100644 index 78ca077bec..0000000000 --- a/doc/doxygen/examples.doc +++ /dev/null @@ -1,113 +0,0 @@ -/*! @page examples SimGrid Examples - -@tableofcontents - -SimGrid comes with many examples provided in the examples/ directory. -Those examples are described in section @ref msg_examples. Those -examples are commented and should be easy to understand. for a first -step into SimGrid we also provide some more detailed examples in the -sections below. - -@htmlonly -You should also check our online tutorial section that contains a generic tutorial about using SimGrid. -@endhtmlonly - -@section using_msg Using MSG - -@htmlonly -You should also check our online tutorial section that contains a dedicated tutorial. -@endhtmlonly - -Here are some examples on how to use MSG, the most used API. - -MSG comes with an extensive set of examples. It is sometimes difficult -to find the one you need. This list aims at helping you finding the -example from which you can learn what you want to. - -@subsection MSG_ex_basics Basic examples and features - -@subsubsection MSG_ex_master_worker Basic Master/Workers - -Simulation of a master-worker application using a realistic platform and an external description of the deployment. - -@paragraph MSG_ex_mw_TOC Table of contents: - - - @ref MSG_ext_mw_preliminary - - @ref MSG_ext_mw_master - - @ref MSG_ext_mw_worker - - @ref MSG_ext_mw_core - - @ref MSG_ext_mw_platform - - @ref MSG_ext_mw_application - -
- -@dontinclude msg/app-masterworker/app-masterworker.c - -@paragraph MSG_ext_mw_preliminary Preliminary declarations - -@skip include -@until example"); -@skipline Master expects - -@paragraph MSG_ext_mw_master Master code - -This function has to be assigned to a #msg_process_t that will behave as the master. It should not be called directly -but either given as a parameter to #MSG_process_create() or registered as a public function through -#MSG_function_register() and then automatically assigned to a process through #MSG_launch_application(). - -C style arguments (argc/argv) are interpreted as: - - the number of tasks to distribute - - the computational size of each task - - the communication size of each task - - the number of workers managed by the master. - -Tasks are evenly sent in a round-robin style. - -@until return 0; -@until } -@skipline Worker expects - -@paragraph MSG_ext_mw_worker Worker code - -This function has to be assigned to a #msg_process_t that has to behave as a worker. Just like the master function -(described in @ref MSG_ext_mw_master), it should not be called directly. - -C style arguments (argc/argv) are interpreted as: - - a unique id used to build the mailbox name of the worker - -This function keeps waiting for tasks and executes them as it receives them. When a special task named 'finalize' is -received from the master, the process ends its execution. - -@until return 0; -@until } - -@paragraph MSG_ext_mw_core Main function - -This function is the core of the simulation and is divided only into 3 parts: - -# Simulation settings : #MSG_create_environment() creates a realistic - environment - -# Application deployment : create the processes on the right locations with - #MSG_launch_application() - -# The simulation is run with #MSG_main() - -Its arguments are: - - platform_file: the name of a file containing an valid platform description. - - deployment_file: the name of a file containing a valid application description -@line main -@until OK; -@until } - -@paragraph MSG_ext_mw_platform Example of a platform file - -The following platform description can be found in @c examples/msg/platforms/small_platform.xml -@include platforms/small_platform.xml - -@paragraph MSG_ext_mw_application Example of a deployment file - -The following application description can be found in @c examples/msg/app-masterworker/app-masterworker_d.xml: - -@include msg/app-masterworker/app-masterworker_d.xml - -*/ - - diff --git a/docs/requirements.txt b/docs/requirements.txt index cd6467ed82..98294c5608 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1 +1,5 @@ breathe +exhale +sphinx +sphinx_rtd_theme + diff --git a/docs/source/conf.py b/docs/source/conf.py index e5bcf6c9b0..8fd2797e59 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -39,6 +39,7 @@ release = u'3.21' # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ + 'sphinx.ext.todo', # 'sphinx.ext.coverage', 'sphinx.ext.mathjax', # 'sphinx.ext.ifconfig', @@ -46,7 +47,9 @@ extensions = [ 'exhale', ] -breathe_projects = { 'simgrid': '../../doc/xml' } +todo_include_todos = True + +breathe_projects = { 'simgrid': '../build/doxy/xml' } breathe_default_project = "simgrid" # Setup the exhale extension @@ -61,9 +64,32 @@ exhale_args = { # TIP: if using the sphinx-bootstrap-theme, you need # "treeViewIsBootstrap": True, "exhaleExecutesDoxygen": True, - "exhaleDoxygenStdin": "INPUT = ../../include" + "exhaleDoxygenStdin": """ + INPUT = ../../include/simgrid/s4u + GENERATE_XML = YES + PREDEFINED += \ + __cplusplus \ + DOXYGEN \ + XBT_PUBLIC= \ + XBT_EXPORT_NO_IMPORT= \ + XBT_IMPORT_NO_EXPORT= \ + XBT_PUBLIC_DATA=extern \ + XBT_PUBLIC= \ + XBT_INLINE= \ + XBT_ALWAYS_INLINE= \ + XBT_PRIVATE= \ + XBT_ATTRIB_NORETURN= \ + XBT_ATTRIB_UNUSED= \ + XBT_ATTRIB_DEPRECATED_v322(m)= \ + XBT_ATTRIB_DEPRECATED_v323(m)= \ + XBT_ATTRIB_DEPRECATED_v324(m)= + """ } +# For cross-ref generation +primary_domain = 'cpp' + + # Add any paths that contain templates here, relative to this directory. # templates_path = ['_templates'] diff --git a/docs/source/images/tuto-masterworkers-intro.svg b/docs/source/images/tuto-masterworkers-intro.svg new file mode 100644 index 0000000000..331071e575 --- /dev/null +++ b/docs/source/images/tuto-masterworkers-intro.svg @@ -0,0 +1,1221 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + Master + + + + T + + + + T + + + + Worker + + + + Worker + + + + Worker + + + + Worker + + + + Worker + + + + + + + + + The master dispatchesthe tasks to the workers + + + T + + + + T + + + + T + + + + T + + + + T + + + + T + + + + T + + + + T + + + + T + + + + T + + + + T + + + + T + + + + T + + + + T + + + + T + + + + T + + + + T + + + + T + + + + T + + + + T + + + + T + + + + T + + + + T + + + + T + + + + T + + + diff --git a/docs/source/images/tuto-masterworkers-question.svg b/docs/source/images/tuto-masterworkers-question.svg new file mode 100644 index 0000000000..c3188764f9 --- /dev/null +++ b/docs/source/images/tuto-masterworkers-question.svg @@ -0,0 +1,1240 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + Master + + + + T + + + + T + + + + Worker + + + + Worker + + + + Worker + + + + Worker + + + + Worker + + + + + + + + + How should the masterdistribute the tasks? + + + T + + + + T + + + + T + + + + T + + + + T + + + + T + + + + T + + + + T + + + + T + + + + T + + + + T + + + + T + + + + T + + + + T + + + + T + + + + T + + + + T + + + + T + + + + T + + + + T + + + + T + + + + T + + + + T + + + + T + + + + T + + + ? + + diff --git a/doc/sc3-description.png b/docs/source/images/tuto-masterworkers-result.png similarity index 100% rename from doc/sc3-description.png rename to docs/source/images/tuto-masterworkers-result.png diff --git a/docs/source/index.rst b/docs/source/index.rst index 9b50a14aeb..0be8937cbc 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -24,7 +24,6 @@ Welcome to SimGrid's documentation! :caption: API Reference: API - S4U Indices and tables ================== diff --git a/docs/source/install_yours.rst b/docs/source/install_yours.rst index 920bbbff71..2258c4c68d 100644 --- a/docs/source/install_yours.rst +++ b/docs/source/install_yours.rst @@ -138,7 +138,9 @@ modify the examples directly but instead create your own project in eclipse. This will make it easier to upgrade to another version of SimGrid. -Troubleshooting your project setup +.. _install_yours_troubleshooting: + +Troubleshooting your Project Setup ---------------------------------- Library not found diff --git a/docs/source/usecase_algorithms.rst b/docs/source/usecase_algorithms.rst new file mode 100644 index 0000000000..6a15168afd --- /dev/null +++ b/docs/source/usecase_algorithms.rst @@ -0,0 +1,429 @@ +.. _usecase_simalgo: + +Simulating Algorithms +===================== + +SimGrid was conceived as a tool to study distributed algorithms. Its +modern S4U interface makes it easy to assess Cloud, P2P, HPC, IoT and +similar settings. + +A typical SimGrid simulation is composed of several **Actors** +|api_s4u_Actor|_ , that execute user-provided functions. The actors +have to explicitly use the S4U interface to express their computation, +communication, disk usage and other **Activities** |api_s4u_Activity|_ +, so that they get reflected within the simulator. These activities +take place on **Resources** (CPUs, links, disks). SimGrid predicts the +time taken by each activity and orchestrates accordingly the actors +waiting for the completion of these activities. + +.. |api_s4u_Actor| image:: /images/extlink.png + :align: middle + :width: 12 +.. _api_s4u_Actor: api/classsimgrid_1_1s4u_1_1Actor.html#class-documentation + +.. |api_s4u_Activity| image:: /images/extlink.png + :align: middle + :width: 12 +.. _api_s4u_Activity: api/classsimgrid_1_1s4u_1_1Activity.html#class-documentation + + +Each actor executes a user-provided function on a simulated **Host** +|api_s4u_Host|_ with which it can interact. Communications are not +directly sent to actors, but posted onto **Mailboxes** +|api_s4u_Mailbox|_ that serve as rendez-vous points between +communicating processes. + +.. |api_s4u_Host| image:: /images/extlink.png + :align: middle + :width: 12 +.. _api_s4u_Host: api/classsimgrid_1_1s4u_1_1Host.html#class-documentation + +.. |api_s4u_Mailbox| image:: /images/extlink.png + :align: middle + :width: 12 +.. _api_s4u_Mailbox: api/classsimgrid_1_1s4u_1_1Mailbox.html#class-documentation + + +Discover the Master/Workers +--------------------------- + +This section introduces a first example of SimGrid simulation. This +simple application is composed of two kind of actors: the **master** +is in charge of distributing some computational tasks to a set of +**workers** that execute them. + +.. image:: /images/tuto-masterworkers-intro.svg + :align: center + +We first present a round-robin version of this application, where the +master dispatches the tasks to the workers, one after the other, until +all tasks are dispatched. Later in this tutorial, you will be given +the opportunity to improve this scheme. + +The Actors +.......... + +Let's start with the code of the worker. It is represented by the +*master* function below. This simple function takes 4 parameters, +given as a vector of strings: + + - the number of workers managed by the master. + - the number of tasks to dispatch + - the computational size (in flops to compute) of each task + - the communication size (in bytes to exchange) of each task + +Then, the tasks are sent one after the other, each on a mailbox named +"worker-XXX" where XXX is the number of an existing worker. On the +other side, a given worker (which code is given below) wait for +incoming tasks on its own mailbox. Notice how this mailbox mechanism +allow the actors to find each other without having all information: +the master don't have to know the actors nor even where they are, it +simply pushes the messages on mailbox which name is predetermined. + +At the end, once all tasks are dispatched, the master dispatches +another task per worker, but this time with a negative amount of flops +to compute. Indeed, this application decided by convention, that the +workers should stop when encountering such a negative compute_size. + +At the end of the day, the only SimGrid specific functions used in +this example are :cpp:func:`simgrid::s4u::Mailbox::by_name` and +:cpp:func:`simgrid::s4u::Mailbox::put`. Also, :c:macro:`XBT_INFO` is used +as a replacement to printf() or to cout to ensure that the messages +are nicely logged along with the simulated time and actor name. + + +.. literalinclude:: ../../examples/s4u/app-masterworkers/s4u-app-masterworkers-fun.cpp + :language: c++ + :start-after: master-begin + :end-before: master-end + +Here comes the code of the worker actors. This function expects only one +parameter from its vector of strings: its identifier so that it knows +on which mailbox its incoming tasks will arrive. Its code is very +simple: as long as it gets valid computation requests (whose +compute_amount is positive), it compute this task and waits for the +next one. + +.. literalinclude:: ../../examples/s4u/app-masterworkers/s4u-app-masterworkers-fun.cpp + :language: c++ + :start-after: worker-begin + :end-before: worker-end + +Starting the Simulation +....................... + +And this is it. In only a few lines, we defined the algorithm of our +master/workers examples. Well, this is true, but an algorithm alone is +not enough to define a simulation. + +First, SimGrid is a library, not a program. So you need to define your +own `main()` function, as follows. This function is in charge of +creating a SimGrid simulation engine (on line 3), register the actor +functions to the engine (on lines 7 and 8), load the virtual platform +from its description file (on line 11), map actors onto that platform +(on line 12) and run the simulation until its completion on line 15. + +.. literalinclude:: ../../examples/s4u/app-masterworkers/s4u-app-masterworkers-fun.cpp + :language: c++ + :start-after: main-begin + :end-before: main-end + :linenos: + +After that, the missing pieces are the platform and deployment +files. + +Platform File +............. + +Platform files define the virtual platform on which the provided +application will take place. In contains one or several **Network +Zone** |api_s4u_NetZone|_ that contain both **Host-** |api_s4u_Host|_ +and **Link-** |api_s4u_Link|_ Resources, as well as routing +information. + +Such files can get rather long and boring, so the example below is +only an excerpts of the full ``examples/platforms/small_platform.xml`` +file. For example, most routing information are missing, and only the +route between the hosts Tremblay and Fafard is given. This path +traverses 6 links (4, 3, 2, 0, 1 and 8). The full file, along with +other examples, can be found in the archive under +``examples/platforms``. + +.. |api_s4u_NetZone| image:: /images/extlink.png + :align: middle + :width: 12 +.. _api_s4u_NetZone: api/classsimgrid_1_1s4u_1_1NetZone.html#class-documentation + +.. |api_s4u_Link| image:: /images/extlink.png + :align: middle + :width: 12 +.. _api_s4u_Link: api/classsimgrid_1_1s4u_1_1Link.html#class-documentation + +.. literalinclude:: ../../examples/platforms/small_platform.xml + :language: xml + :lines: 1-10,12-20,56-63,192- + :caption: (excerpts of the small_platform.xml file) + +Deployment File +............... + +Deployment files specify the execution scenario: it lists the actors +that should be started, along with their parameter. In the following +example, we start 6 actors: one master and 5 workers. + +.. literalinclude:: ../../examples/s4u/app-masterworkers/s4u-app-masterworkers_d.xml + :language: xml + +Execution Example +................. + +This time, we have all parts: once the program is compiled, we can +execute it as follows. Note how the XBT_INFO() requests turned into +informative messages. + +.. literalinclude:: ../../examples/s4u/app-masterworkers/s4u-app-masterworkers.tesh + :language: shell + :start-after: s4u-app-masterworkers-fun + :prepend: $$$ ./masterworkers platform.xml deploy.xml + :append: $$$ + :dedent: 2 + + +Improve it Yourself +------------------- + +In this section, you will modify the example presented earlier to +explore the quality of the proposed algorithm. For now, it works and +the simulation prints things, but the truth is that we have no idea of +whether this is a good algorithm to dispatch tasks to the workers. +This very simple setting raises many interesting questions: + +.. image:: /images/tuto-masterworkers-question.svg + :align: center + +- Which algorithm should the master use? Or should the worker decide + by themselves? + + Round Robin is not an efficient algorithm when all tasks are not + processed at the same speed. It would probably be more efficient + if the workers were asking for tasks when ready. + +- Should tasks be grouped in batches or sent separately? + + The workers will starve if they don't get the tasks fast + enough. One possibility to reduce latency would be to send tasks + in pools instead of one by one. But if the pools are too big, the + load balancing will likely get uneven, in particular when + distributing the last tasks. + +- How does the quality of such algorithm dependent on the platform + characteristics and on the task characteristics? + + Whenever the input communication time is very small compared to + processing time and workers are homogeneous, it is likely that the + round-robin algorithm performs very well. Would it still hold true + when transfer time is not negligible? What if some tasks are + performed faster on some specific nodes? + +- The network topology interconnecting the master and the workers + may be quite complicated. How does such a topology impact the + previous result? + + When data transfers are the bottleneck, it is likely that a good + modeling of the platform becomes essential. The SimGrid platform + models are particularly handy to account for complex platform + topologies. + +- What is the best applicative topology? + + Is a flat master worker deployment sufficient? Should we go for a + hierarchical algorithm, with some forwarders taking large pools of + tasks from the master, each of them distributing their tasks to a + sub-pool of workers? Or should we introduce super-peers, + dupplicating the master's role in a peer-to-peer manner? Do the + algorithms require a perfect knowledge of the network? + +- How is such an algorithm sensitive to external workload variation? + + What if bandwidth, latency and computing speed can vary with no + warning? Shouldn't you study whether your algorithm is sensitive + to such load variations? + +- Although an algorithm may be more efficient than another, how does + it interfere with unrelated applications executing on the same + facilities? + +**SimGrid was invented to answer such questions.** Do not believe the +fools saying that all you need to study such settings is a simple +discrete event simulator. Do you really want to reinvent the wheel, +debug your own tool, optimize it and validate its models against real +settings for ages, or do you prefer to sit on the shoulders of a +giant? With SimGrid, you can focus on your algorithm. The whole +simulation mechanism is already working. + +Here is the visualization of a SimGrid simulation of two master worker +applications (one in light gray and the other in dark gray) running in +concurrence and showing resource usage over a long period of time. It +was obtained with the Triva software. + +.. image:: /images/tuto-masterworkers-result.png + :align: center + +Prerequisite +............ + +Before your proceed, you need to :ref:`install SimGrid `, a +C++ compiler and also ``pajeng`` to visualize the traces. The provided +code template requires cmake to compile. On Debian and Ubuntu for +example, you can get them as follows: + +.. code-block:: shell + + sudo apt install simgrid pajeng cmake g++ + +An initial version of the source code is provided on framagit. This +template compiles with cmake. If SimGrid is correctly installed, you +should be able to clone the `repository +`_ and recompile +everything as follows: + +.. code-block:: shell + + git clone git@framagit.org:simgrid/simgrid-template-s4u.git + cd simgrid-template-s4u/ + cmake . + make + +If you struggle with the compilation, then you should double check +your :ref:`SimGrid installation `. On need, please refer to +the :ref:`Troubleshooting your Project Setup +` section. + +Discovering the Provided Code +............................. + +Please compile and execute the provided simulator as follows: + + +.. code-block:: shell + + make master-workers + ./master-workers small_platform.xml master-workers_d.xml + +For a more "fancy" output, you can use simgrid-colorizer. + +.. code-block:: shell + + ./master-workers small_platform.xml master-workers_d.xml 2>&1 | simgrid-colorizer + +If you installed SimGrid to a non-standard path, you may have to +specify the full path to simgrid-colorizer on the above line, such as +``/opt/simgrid/bin/simgrid-colorizer``. If you did not install it at all, +you can find it in /bin/colorize. + +.. todo:: + + Explain how to generate a Gantt-Chart with S4U and pajeng. + +Exercise 1: Simplifying the deployment file +........................................... + +In the provided example, the deployment file is tightly connected to +the platform file ``small_platform.xml`` and adding more workers +quickly becomes a pain: You need to start them (at the bottom of the +file), add to inform the master that they are available by increasing +the right parameter. + +Instead, modify the simulator ``master-workers.c`` into +``master-workers-exo1.c`` so that the master launches a worker process +on `all` the other machines at startup. The new deployment file should +be as simple as: + +.. code-block:: xml + + + + + + + + + + + +Creating the workers from the master +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +For that, the master needs to retrieve the list of hosts declared in +the platform with :cpp:func:`simgrid::s4u::Engine::get_all_host()`. +Then, the master should start the worker processes with +:cpp:func:`simgrid::s4u::Actor::create`. + +``Actor::create(name, host, func, params...)`` is a very flexible +function. Its third parameter is the function that the actor should +execute. This function can take any kind of parameter, provided that +you pass similar parameters to ``Actor::create()``. For example, you +could have something like this: + +.. code-block:: cpp + + void my_actor(int param1, double param2, std::string param3) { + ... + } + int main(int argc, char argv**) { + ... + simgrid::s4u::ActorPtr actor; + actor = simgrid::s4u::Actor::create("name", simgrid::s4u::Host::by_name("the_host"), + &my_actor, 42, 3.14, "thevalue"); + ... + } + + +Master-Workers Communication +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously, the workers got from their parameter the name of the +mailbox they should use. We can still do so: the master should build +such a parameter before using it in the ``Actor::create()`` call. The +master could even pass directly the mailbox as a parameter to the +workers. + +Since we want later to study concurrent applications, it is advised to +use a mailbox name that is unique over the simulation even if there is +more than one master. + +One possibility for that is to use the actor ID (aid) of each worker +as a mailbox name. The master can retrieve the aid of the newly +created actor with ``actor->get_pid()`` while the actor itself can +retrieve its own aid with ``simgrid::s4u::this_actor::get_pid()``. +The retrieved value is an ``aid_t``, which is an alias for ``long``. + +Instead of having one mailbox per worker, you could also reorganize +completely your application to have only one mailbox per master. All +the workers of a given master would pull their work from the same +mailbox, which should be passed as parameter to the workers. This +reduces the amount of mailboxes, but prevents the master from taking +any scheduling decision. It really depends on how you want to organize +your application and what you want to study with your simulator. + +Wrap up +^^^^^^^ + +In this exercise, we reduced the amount of configuration that our +simulator requests. This is both a good idea, and a dangerous +trend. This simplification is an application of the good old DRY/SPOT +programming principle (Don't Repeat Yourself / Single Point Of Truth +-- `more on wikipedia +`_), and you +really want your programming artefacts to follow these software +engineering principles. + +But at the same time, you should be careful in separating your +scientific contribution (the master/workers algorithm) and the +artefacts used to test it (platform, deployment and workload). This is +why SimGrid forces you to express your platform and deployment files +in XML instead of using a programming interface: it forces a clear +separation of concerns between things of very different nature. + + +.. LocalWords: SimGrid diff --git a/examples/msg/README.doc b/examples/msg/README.doc index 203c2495d2..53ea4d7f2a 100644 --- a/examples/msg/README.doc +++ b/examples/msg/README.doc @@ -1,5 +1,5 @@ -This file follows the Doxygen syntax to be included in the -documentation, but it should remain readable directly. +// This file follows the Doxygen syntax to be included in the +// documentation, but it should remain readable directly. /** @defgroup msg_examples MSG examples @@ -114,12 +114,12 @@ TODO: show the XML files instead if it's what is interesting. On a "XML example */ -As a human, you can stop reading at this point. The rest is garbage: - -Every example must be listed in the following, but it's not possible -to move this content upper as each @example directive seems to eat -everything until the next */ marker (and the content is placed at the -top of the example file). +// As a human, you can stop reading at this point. The rest is garbage: +// +// Every example must be listed in the following, but it's not possible +// to move this content upper as each @example directive seems to eat +// everything until the next */ marker (and the content is placed at the +// top of the example file). /** diff --git a/examples/s4u/CMakeLists.txt b/examples/s4u/CMakeLists.txt index 5c18f404c5..dae1364bf5 100644 --- a/examples/s4u/CMakeLists.txt +++ b/examples/s4u/CMakeLists.txt @@ -8,7 +8,7 @@ foreach (example actor-create actor-daemon actor-join actor-kill energy-exec energy-boot energy-link energy-vm engine-filtering exec-async exec-basic exec-dvfs exec-monitor exec-ptask exec-remote - io-file-system io-file-remote io-storage-raw + io-async io-file-system io-file-remote io-storage-raw mutex platform-failures platform-properties plugin-hostload replay-comm replay-storage @@ -28,10 +28,9 @@ foreach(variant fun class) target_link_libraries(s4u-app-masterworkers-${variant} simgrid) set_target_properties(s4u-app-masterworkers-${variant} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/app-masterworkers) - set(examples_src ${examples_src} ${CMAKE_CURRENT_SOURCE_DIR}/masterworkers-fun/s4u-masterworkers-${variant}.cpp) + set(examples_src ${examples_src} ${CMAKE_CURRENT_SOURCE_DIR}/app-masterworkers/s4u-app-masterworkers-${variant}.cpp) endforeach() -set(tesh_files ${tesh_files} ${CMAKE_CURRENT_SOURCE_DIR}/masterworkers-fun/s4u-masterworkers.tesh) - +set(tesh_files ${tesh_files} ${CMAKE_CURRENT_SOURCE_DIR}/app-masterworkers/s4u-app-masterworkers.tesh) # CHORD EXAMPLE add_executable (s4u-dht-chord dht-chord/s4u-dht-chord.cpp dht-chord/s4u-dht-chord-node.cpp) @@ -102,8 +101,13 @@ foreach(example actor-create actor-daemon actor-join actor-kill energy-exec energy-boot energy-link energy-vm engine-filtering exec-async exec-basic exec-dvfs exec-monitor exec-ptask exec-remote +<<<<<<< HEAD + platform-properties plugin-hostload mutex + io-async io-file-system io-file-remote io-storage-raw +======= platform-failures platform-properties plugin-hostload mutex io-file-system io-file-remote io-storage-raw +>>>>>>> 4ccbacb51eb49323847a906c3e79ea838d76e2a7 replay-comm replay-storage routing-get-clusters ) diff --git a/examples/s4u/README.doc b/examples/s4u/README.doc index 9b302e5cf5..3d564cadff 100644 --- a/examples/s4u/README.doc +++ b/examples/s4u/README.doc @@ -1,12 +1,12 @@ -S4U (Simgrid for you) is the next interface of SimGrid, expected to be released with SimGrid 4.0. - -Even if it is not completely rock stable yet, it may well already fit -your needs. You are welcome to try it and report any interface -glitches that you see. Be however warned that the interface may change -until the final release. You will have to adapt your code on the way. - -This file follows the Doxygen syntax to be included in the -documentation, but it should remain readable directly. +// S4U (Simgrid for you) is the next interface of SimGrid, expected to be released with SimGrid 4.0. +// +// Even if it is not completely rock stable yet, it may well already fit +// your needs. You are welcome to try it and report any interface +// glitches that you see. Be however warned that the interface may change +// until the final release. You will have to adapt your code on the way. +// +// This file follows the Doxygen syntax to be included in the +// documentation, but it should remain readable directly. /** @defgroup s4u_examples S4U examples @@ -286,9 +286,14 @@ than the previous examples. Shows how to implement a classical communication pattern, where a token is exchanged along a ring to reach every participant. - - Master Workers: @ref examples/s4u/app-masterworker/s4u-app-masterworker.cpp @n + - Master Workers: @ref examples/s4u/app-masterworkers/s4u-app-masterworkers-class.cpp + @ref examples/s4u/app-masterworkers/s4u-app-masterworkers-fun.cpp @n Another good old example, where one Master process has a bunch of task to dispatch to a set of several Worker - processes. + processes. This example comes in two equivalent variants, one + where the actors are specified as simple functions (which is easier to + understand for newcomers) and one where the actors are specified + as classes (which is more powerful for the users wanting to build + their own projects upon the example). @subsection s4u_ex_app_data Data diffusion @@ -324,7 +329,8 @@ than the previous examples. @example examples/s4u/async-waitany/s4u-async-waitany.cpp @example examples/s4u/app-bittorrent/s4u-bittorrent.cpp @example examples/s4u/app-chainsend/s4u-app-chainsend.cpp -@example examples/s4u/app-masterworker/s4u-app-masterworker.cpp +@example examples/s4u/app-masterworkers/s4u-app-masterworkers-class.cpp +@example examples/s4u/app-masterworkers/s4u-app-masterworkers-fun.cpp @example examples/s4u/app-pingpong/s4u-app-pingpong.cpp @example examples/s4u/app-token-ring/s4u-app-token-ring.cpp @example examples/s4u/dht-chord/s4u-dht-chord.cpp diff --git a/examples/s4u/app-masterworkers/s4u-app-masterworkers.tesh b/examples/s4u/app-masterworkers/s4u-app-masterworkers.tesh index a0f32d6956..206dd72563 100644 --- a/examples/s4u/app-masterworkers/s4u-app-masterworkers.tesh +++ b/examples/s4u/app-masterworkers/s4u-app-masterworkers.tesh @@ -63,4 +63,3 @@ $ $SG_TEST_EXENV ${bindir:=.}/s4u-app-masterworkers-fun$EXEEXT ${platfdir}/small > [ 4.965689] (worker@Ginette) Exiting now. > [ 5.133855] (maestro@) Simulation is over > [ 5.133855] (worker@Bourassa) Exiting now. - diff --git a/examples/s4u/app-masterworkers/s4u-app-masterworkers_d.xml b/examples/s4u/app-masterworkers/s4u-app-masterworkers_d.xml index 3d93a78b7e..29fddcaef6 100644 --- a/examples/s4u/app-masterworkers/s4u-app-masterworkers_d.xml +++ b/examples/s4u/app-masterworkers/s4u-app-masterworkers_d.xml @@ -9,19 +9,19 @@ - + - + - + - + - + diff --git a/examples/s4u/io-async/s4u-io-async.cpp b/examples/s4u/io-async/s4u-io-async.cpp new file mode 100644 index 0000000000..7b40346f80 --- /dev/null +++ b/examples/s4u/io-async/s4u-io-async.cpp @@ -0,0 +1,47 @@ +/* Copyright (c) 2007-2018. The SimGrid Team. All rights reserved. */ + +/* This program is free software; you can redistribute it and/or modify it + * under the terms of the license (GNU LGPL) which comes with this package. */ + +#include "simgrid/s4u.hpp" + +XBT_LOG_NEW_DEFAULT_CATEGORY(s4u_test, "Messages specific for this s4u example"); + +static void test(sg_size_t size) +{ + simgrid::s4u::Storage* storage = simgrid::s4u::Storage::by_name("Disk1"); + XBT_INFO("Hello! read %llu bytes from Storage %s", size, storage->get_cname()); + + simgrid::s4u::IoPtr activity = storage->io_init(size, simgrid::s4u::Io::OpType::READ); + activity->start(); + activity->wait(); + + XBT_INFO("Goodbye now!"); +} + +static void test_cancel(sg_size_t size) +{ + simgrid::s4u::Storage* storage = simgrid::s4u::Storage::by_name("Disk2"); + XBT_INFO("Hello! write %llu bytes from Storage %s", size, storage->get_cname()); + + simgrid::s4u::IoPtr activity = storage->write_async(size); + simgrid::s4u::this_actor::sleep_for(0.5); + XBT_INFO("I changed my mind, cancel!"); + activity->cancel(); + + XBT_INFO("Goodbye now!"); +} + +int main(int argc, char* argv[]) +{ + simgrid::s4u::Engine e(&argc, argv); + e.load_platform(argv[1]); + simgrid::s4u::Actor::create("test", simgrid::s4u::Host::by_name("bob"), test, 2e7); + simgrid::s4u::Actor::create("test_cancel", simgrid::s4u::Host::by_name("alice"), test_cancel, 5e7); + + e.run(); + + XBT_INFO("Simulation time %g", e.get_clock()); + + return 0; +} diff --git a/examples/s4u/io-async/s4u-io-async.tesh b/examples/s4u/io-async/s4u-io-async.tesh new file mode 100644 index 0000000000..3a32addb26 --- /dev/null +++ b/examples/s4u/io-async/s4u-io-async.tesh @@ -0,0 +1,9 @@ +#!/usr/bin/env tesh + +$ $SG_TEST_EXENV ${bindir:=.}/s4u-io-async$EXEEXT ${platfdir}/storage/storage.xml "--log=root.fmt:[%10.6r]%e(%i:%P@%h)%e%m%n" +> [ 0.000000] (1:test@bob) Hello! read 20000000 bytes from Storage Disk1 +> [ 0.000000] (2:test_cancel@alice) Hello! write 50000000 bytes from Storage Disk2 +> [ 0.200000] (1:test@bob) Goodbye now! +> [ 0.500000] (2:test_cancel@alice) I changed my mind, cancel! +> [ 0.500000] (2:test_cancel@alice) Goodbye now! +> [ 0.500000] (0:maestro@) Simulation time 0.5 diff --git a/examples/s4u/platform-properties/s4u-platform-properties.cpp b/examples/s4u/platform-properties/s4u-platform-properties.cpp index d35bf9f5b1..085464e95a 100644 --- a/examples/s4u/platform-properties/s4u-platform-properties.cpp +++ b/examples/s4u/platform-properties/s4u-platform-properties.cpp @@ -109,7 +109,7 @@ int main(int argc, char* argv[]) e.register_function("carole", carole); e.register_function("david", david); - size_t totalHosts = sg_host_count(); + size_t totalHosts = e.get_host_count(); XBT_INFO("There are %zu hosts in the environment", totalHosts); std::vector hosts = e.get_all_hosts(); diff --git a/examples/smpi/replay_multiple_manual_deploy/replay_multiple_manual_mixed2_sr.tesh b/examples/smpi/replay_multiple_manual_deploy/replay_multiple_manual_mixed2_sr.tesh deleted file mode 100644 index 29fc5b275e..0000000000 --- a/examples/smpi/replay_multiple_manual_deploy/replay_multiple_manual_mixed2_sr.tesh +++ /dev/null @@ -1,25 +0,0 @@ -p Workload with two mixed jobs (not at the same time, but on the same resources) -! timeout 120 -! output sort 19 -$ ./replay_multiple_manual ${srcdir:=.}/../../platforms/small_platform_with_routers.xml ${srcdir:=.}/workload_mixed2_same_resources --log=smpi.:info --cfg=smpi/host-speed:100 "--log=root.fmt:[%11.6r]%e(%P@%h)%e%m%n" 0 0 -> [ 0.000000] (maestro@) Configuration change: Set 'smpi/host-speed' to '100' -> [ 0.000000] (maestro@) Job read: app='job0', file='mixed.txt', size=2, start=0, alloc='0,1' -> [ 0.000000] (maestro@) Job read: app='job1', file='mixed.txt', size=2, start=1000, alloc='0,1' -> [ 0.000000] (workload_executor@Bourassa) Launching the job executor of job 0 (app 'job0') -> [ 0.000000] (job_job0@Bourassa) Executing job 0 (smpi_app 'job0') -> [ 0.000000] (workload_executor@Bourassa) Sleeping 1000 seconds (waiting for job 1000, app 'job1') -> [ 0.000000] (0_0@Bourassa) Replaying rank 0 of job 0 (smpi_app 'job0') -> [ 0.000000] (0_1@Fafard) Replaying rank 1 of job 0 (smpi_app 'job0') -> [ 737.001374] (0_0@Bourassa) Simulation time 737.001374 -> [ 737.001374] (0_0@Bourassa) Finished replaying rank 0 of job 0 (smpi_app 'job0') -> [ 737.001374] (0_1@Fafard) Finished replaying rank 1 of job 0 (smpi_app 'job0') -> [ 737.001374] (job_job0@Bourassa) Finished job 0 (smpi_app 'job0') -> [1000.000000] (workload_executor@Bourassa) Launching the job executor of job 1 (app 'job1') -> [1000.000000] (job_job1@Bourassa) Executing job 1 (smpi_app 'job1') -> [1000.000000] (1_0@Bourassa) Replaying rank 0 of job 1 (smpi_app 'job1') -> [1000.000000] (1_1@Fafard) Replaying rank 1 of job 1 (smpi_app 'job1') -> [1737.001374] (1_0@Bourassa) Simulation time 737.001374 -> [1737.001374] (1_0@Bourassa) Finished replaying rank 0 of job 1 (smpi_app 'job1') -> [1737.001374] (1_1@Fafard) Finished replaying rank 1 of job 1 (smpi_app 'job1') -> [1737.001374] (job_job1@Bourassa) Finished job 1 (smpi_app 'job1') -> [1737.001374] (maestro@) Simulation finished! Final time: 1737 diff --git a/examples/smpi/replay_multiple_manual_deploy/replay_multiple_manual_mixed2_sr_noise.tesh b/examples/smpi/replay_multiple_manual_deploy/replay_multiple_manual_mixed2_sr_noise.tesh deleted file mode 100644 index 689369168d..0000000000 --- a/examples/smpi/replay_multiple_manual_deploy/replay_multiple_manual_mixed2_sr_noise.tesh +++ /dev/null @@ -1,25 +0,0 @@ -p Workload with two mixed jobs (not at the same time, but on the same resources) -! timeout 120 -! output sort 19 -$ ./replay_multiple_manual ${srcdir:=.}/../../platforms/small_platform_with_routers.xml ${srcdir:=.}/workload_mixed2_same_resources --log=smpi.:info --cfg=smpi/host-speed:100 "--log=root.fmt:[%11.6r]%e(%P@%h)%e%m%n" 7 13 -> [ 0.000000] (maestro@) Configuration change: Set 'smpi/host-speed' to '100' -> [ 0.000000] (maestro@) Job read: app='job0', file='mixed.txt', size=2, start=0, alloc='0,1' -> [ 0.000000] (maestro@) Job read: app='job1', file='mixed.txt', size=2, start=1000, alloc='0,1' -> [ 0.000000] (workload_executor@Bourassa) Launching the job executor of job 0 (app 'job0') -> [ 0.000000] (job_job0@Bourassa) Executing job 0 (smpi_app 'job0') -> [ 0.000000] (workload_executor@Bourassa) Sleeping 1000 seconds (waiting for job 1000, app 'job1') -> [ 0.000000] (0_0@Bourassa) Replaying rank 0 of job 0 (smpi_app 'job0') -> [ 0.000000] (0_1@Fafard) Replaying rank 1 of job 0 (smpi_app 'job0') -> [ 737.001374] (0_0@Bourassa) Simulation time 737.001374 -> [ 737.001374] (0_0@Bourassa) Finished replaying rank 0 of job 0 (smpi_app 'job0') -> [ 737.001374] (0_1@Fafard) Finished replaying rank 1 of job 0 (smpi_app 'job0') -> [ 737.001374] (job_job0@Bourassa) Finished job 0 (smpi_app 'job0') -> [1000.000000] (workload_executor@Bourassa) Launching the job executor of job 1 (app 'job1') -> [1000.000000] (job_job1@Bourassa) Executing job 1 (smpi_app 'job1') -> [1000.000000] (1_0@Bourassa) Replaying rank 0 of job 1 (smpi_app 'job1') -> [1000.000000] (1_1@Fafard) Replaying rank 1 of job 1 (smpi_app 'job1') -> [1737.001374] (1_0@Bourassa) Simulation time 737.001374 -> [1737.001374] (1_0@Bourassa) Finished replaying rank 0 of job 1 (smpi_app 'job1') -> [1737.001374] (1_1@Fafard) Finished replaying rank 1 of job 1 (smpi_app 'job1') -> [1737.001374] (job_job1@Bourassa) Finished job 1 (smpi_app 'job1') -> [1737.001374] (maestro@) Simulation finished! Final time: 1737 diff --git a/examples/smpi/replay_multiple_manual_deploy/workload_mixed2_same_resources b/examples/smpi/replay_multiple_manual_deploy/workload_mixed2_same_resources deleted file mode 100644 index f36085a1b2..0000000000 --- a/examples/smpi/replay_multiple_manual_deploy/workload_mixed2_same_resources +++ /dev/null @@ -1,3 +0,0 @@ -Two jobs not at the same time but using the same resources -job0 mixed.txt 2 0 0,1 -job1 mixed.txt 2 1000 0,1 diff --git a/include/simgrid/s4u/Activity.hpp b/include/simgrid/s4u/Activity.hpp index 615f7bb3ba..6cd6276393 100644 --- a/include/simgrid/s4u/Activity.hpp +++ b/include/simgrid/s4u/Activity.hpp @@ -27,6 +27,7 @@ namespace s4u { * - Synchronization activities may possibly be connected to no action. */ class XBT_PUBLIC Activity { +#ifndef DOXYGEN friend Comm; friend XBT_PUBLIC void intrusive_ptr_release(Comm * c); friend XBT_PUBLIC void intrusive_ptr_add_ref(Comm * c); @@ -36,6 +37,7 @@ class XBT_PUBLIC Activity { friend Io; friend XBT_PUBLIC void intrusive_ptr_release(Io* i); friend XBT_PUBLIC void intrusive_ptr_add_ref(Io* i); +#endif protected: Activity() = default; diff --git a/include/simgrid/s4u/Actor.hpp b/include/simgrid/s4u/Actor.hpp index 09e3df6803..cf5f0f10ef 100644 --- a/include/simgrid/s4u/Actor.hpp +++ b/include/simgrid/s4u/Actor.hpp @@ -18,7 +18,7 @@ namespace simgrid { namespace s4u { -/** @ingroup s4u_api +/** * * An actor is an independent stream of execution in your distributed application. * @@ -121,10 +121,12 @@ namespace s4u { /** @brief Simulation Agent */ class XBT_PUBLIC Actor : public simgrid::xbt::Extendable { +#ifndef DOXYGEN friend Exec; friend Mailbox; friend simgrid::kernel::actor::ActorImpl; friend simgrid::kernel::activity::MailboxImpl; +#endif kernel::actor::ActorImpl* pimpl_ = nullptr; /** Wrap a (possibly non-copyable) single-use task into a `std::function` */ @@ -169,12 +171,16 @@ public: /** Signal indicating that the given actor is about to disappear */ static simgrid::xbt::signal on_destruction; - /** Create an actor using a function + /** Create an actor from a std::function * * If the actor is restarted, the actor has a fresh copy of the function. */ static ActorPtr create(std::string name, s4u::Host* host, std::function code); + /** Create an actor from a std::function + * + * If the actor is restarted, the actor has a fresh copy of the function. + */ static ActorPtr create(std::string name, s4u::Host* host, std::function*)> code, std::vector* args) { @@ -245,6 +251,15 @@ public: /** Retrieves the time at which that actor will be killed (or -1 if not set) */ double get_kill_time(); + /** @brief Moves the actor to another host + * + * If the actor is currently blocked on an execution activity, the activity is also + * migrated to the new host. If it's blocked on another kind of activity, an error is + * raised as the mandated code is not written yet. Please report that bug if you need it. + * + * Asynchronous activities started by the actor are not migrated automatically, so you have + * to take care of this yourself (only you knows which ones should be migrated). + */ void migrate(Host * new_host); /** Ask the actor to die. @@ -262,7 +277,7 @@ public: /** Retrieves the actor that have the given PID (or nullptr if not existing) */ static ActorPtr by_pid(aid_t pid); - /** @brief Wait for the actor to finish. + /** Wait for the actor to finish. * * This blocks the calling actor until the actor on which we call join() is terminated */ @@ -282,6 +297,7 @@ public: const char* get_property(std::string key); void set_property(std::string key, std::string value); +#ifndef DOXYGEN /** @deprecated See Actor::create() */ XBT_ATTRIB_DEPRECATED_v323("Please use Actor::create()") static ActorPtr createActor( const char* name, s4u::Host* host, std::function code) @@ -373,6 +389,7 @@ public: { set_property(key, value); } +#endif }; /** @ingroup s4u_api @@ -439,6 +456,7 @@ XBT_PUBLIC bool is_suspended(); /** @brief kill the actor. */ XBT_PUBLIC void exit(); +#ifndef DOXYGEN /** @deprecated Please use std::function for first parameter */ XBT_ATTRIB_DEPRECATED_v323("Please use std::function for first parameter.") XBT_PUBLIC void on_exit(int_f_pvoid_pvoid_t fun, void* data); @@ -466,6 +484,7 @@ XBT_ATTRIB_DEPRECATED_v323("Please use this_actor::is_suspended()") XBT_PUBLIC b XBT_ATTRIB_DEPRECATED_v323("Please use this_actor::on_exit()") XBT_PUBLIC void onExit(int_f_pvoid_pvoid_t fun, void* data); /** @deprecated See this_actor::exit() */ XBT_ATTRIB_DEPRECATED_v324("Please use this_actor::exit()") XBT_PUBLIC void kill(); +#endif } /** @} */ diff --git a/include/simgrid/s4u/ConditionVariable.hpp b/include/simgrid/s4u/ConditionVariable.hpp index 06037981ee..e2f5cce6c1 100644 --- a/include/simgrid/s4u/ConditionVariable.hpp +++ b/include/simgrid/s4u/ConditionVariable.hpp @@ -23,8 +23,10 @@ namespace s4u { */ class XBT_PUBLIC ConditionVariable { private: +#ifndef DOXYGEN friend kernel::activity::ConditionVariableImpl; smx_cond_t cond_; +#endif explicit ConditionVariable(smx_cond_t cond) : cond_(cond) {} public: ConditionVariable(ConditionVariable const&) = delete; diff --git a/include/simgrid/s4u/Engine.hpp b/include/simgrid/s4u/Engine.hpp index fe486f403a..6bb6e896ca 100644 --- a/include/simgrid/s4u/Engine.hpp +++ b/include/simgrid/s4u/Engine.hpp @@ -84,23 +84,26 @@ public: void load_deployment(std::string deploy); protected: - friend s4u::Host; - friend s4u::Link; - friend s4u::Storage; +#ifndef DOXYGEN + friend Host; + friend Link; + friend Storage; friend kernel::routing::NetPoint; friend kernel::routing::NetZoneImpl; friend kernel::resource::LinkImpl; - void host_register(std::string name, simgrid::s4u::Host* host); + void host_register(std::string name, Host* host); void host_unregister(std::string name); - void link_register(std::string name, simgrid::s4u::Link* link); + void link_register(std::string name, Link* link); void link_unregister(std::string name); - void storage_register(std::string name, simgrid::s4u::Storage* storage); + void storage_register(std::string name, Storage* storage); void storage_unregister(std::string name); void netpoint_register(simgrid::kernel::routing::NetPoint* card); void netpoint_unregister(simgrid::kernel::routing::NetPoint* card); +#endif /*DOXYGEN*/ public: size_t get_host_count(); + /** @brief Returns the list of all hosts found in the platform */ std::vector get_all_hosts(); std::vector get_filtered_hosts(std::function filter); simgrid::s4u::Host* host_by_name(std::string name); diff --git a/include/simgrid/s4u/Exec.hpp b/include/simgrid/s4u/Exec.hpp index b47ac41e9e..26af5d02e4 100644 --- a/include/simgrid/s4u/Exec.hpp +++ b/include/simgrid/s4u/Exec.hpp @@ -14,6 +14,11 @@ namespace simgrid { namespace s4u { +/** Computation #Activity, representing the asynchronous disk access. + * + * They are generated from simgrid::s4u::this_actor::exec_init() or simgrid::s4u::Host::execute(). + */ + class XBT_PUBLIC Exec : public Activity { Exec() : Activity() {} public: diff --git a/include/simgrid/s4u/Host.hpp b/include/simgrid/s4u/Host.hpp index 4098162236..26568e8b1d 100644 --- a/include/simgrid/s4u/Host.hpp +++ b/include/simgrid/s4u/Host.hpp @@ -35,8 +35,10 @@ namespace s4u { * and actors can retrieve the host on which they run using simgrid::s4u::Host::current(). */ class XBT_PUBLIC Host : public simgrid::xbt::Extendable { +#ifndef DOXYGEN friend simgrid::vm::VMModel; // Use the pimpl_cpu to compute the VM sharing friend simgrid::vm::VirtualMachineImpl; // creates the the pimpl_cpu +#endif public: explicit Host(std::string name); @@ -95,6 +97,8 @@ public: const char* get_property(std::string key) const; void set_property(std::string key, std::string value); std::unordered_map* get_properties(); + +#ifndef DOXYGEN /** @deprecated See Host::get_properties() */ XBT_ATTRIB_DEPRECATED_v323("Please use Host::get_properties()") std::map* getProperties() { @@ -104,6 +108,7 @@ public: res->insert(kv); return res; } +#endif double get_speed() const; double get_available_speed() const; @@ -115,6 +120,7 @@ public: void set_pstate(int pstate_index); int get_pstate() const; +#ifndef DOXYGEN /** @deprecated See Host::get_speed() */ XBT_ATTRIB_DEPRECATED_v323("Please use Host::get_speed() instead.") double getSpeed() { return get_speed(); } /** @deprecated See Host::get_pstate_speed() */ @@ -122,6 +128,7 @@ public: { return get_pstate_speed(pstate_index); } +#endif std::vector get_attached_storages() const; XBT_ATTRIB_DEPRECATED_v323("Please use Host::get_attached_storages() instead.") void getAttachedStorages( @@ -151,6 +158,7 @@ public: void execute(double flops, double priority); // Deprecated functions +#ifndef DOXYGEN /** @deprecated See Host::get_name() */ XBT_ATTRIB_DEPRECATED_v323("Please use Host::get_name()") simgrid::xbt::string const& getName() const { @@ -203,6 +211,7 @@ public: { return get_pstate_count(); } +#endif /* !DOXYGEN */ private: simgrid::xbt::string name_ {"noname"}; diff --git a/include/simgrid/s4u/Io.hpp b/include/simgrid/s4u/Io.hpp index 17fd1f2605..72b5ea2296 100644 --- a/include/simgrid/s4u/Io.hpp +++ b/include/simgrid/s4u/Io.hpp @@ -15,6 +15,11 @@ namespace simgrid { namespace s4u { +/** I/O Activity, representing the asynchronous disk access. + * + * They are generated from simgrid::s4u::Storage::read() and simgrid::s4u::Storage::write(). + */ + class XBT_PUBLIC Io : public Activity { public: enum class OpType { READ, WRITE }; diff --git a/include/simgrid/s4u/Link.hpp b/include/simgrid/s4u/Link.hpp index bd4a84bac9..c6dd56d3d1 100644 --- a/include/simgrid/s4u/Link.hpp +++ b/include/simgrid/s4u/Link.hpp @@ -22,7 +22,9 @@ namespace simgrid { namespace s4u { /** @brief A Link represents the network facilities between [hosts](@ref simgrid::s4u::Host) */ class XBT_PUBLIC Link : public simgrid::xbt::Extendable { +#ifndef DOXYGEN friend simgrid::kernel::resource::LinkImpl; +#endif // Links are created from the NetZone, and destroyed by their private implementation when the simulation ends explicit Link(kernel::resource::LinkImpl* pimpl) : pimpl_(pimpl) {} diff --git a/include/simgrid/s4u/Mailbox.hpp b/include/simgrid/s4u/Mailbox.hpp index 8b4267a86f..9e41dc1f63 100644 --- a/include/simgrid/s4u/Mailbox.hpp +++ b/include/simgrid/s4u/Mailbox.hpp @@ -102,8 +102,10 @@ namespace s4u { * @section s4u_mb_api The API */ class XBT_PUBLIC Mailbox { +#ifndef DOXYGEN friend Comm; friend simgrid::kernel::activity::MailboxImpl; +#endif simgrid::kernel::activity::MailboxImpl* pimpl_; diff --git a/include/simgrid/s4u/Mutex.hpp b/include/simgrid/s4u/Mutex.hpp index b4df2e8b42..ae785f4841 100644 --- a/include/simgrid/s4u/Mutex.hpp +++ b/include/simgrid/s4u/Mutex.hpp @@ -29,8 +29,10 @@ class ConditionVariable; * */ class XBT_PUBLIC Mutex { +#ifndef DOXYGEN friend ConditionVariable; friend simgrid::kernel::activity::MutexImpl; +#endif simgrid::kernel::activity::MutexImpl* pimpl_; explicit Mutex(simgrid::kernel::activity::MutexImpl* mutex) : pimpl_(mutex) {} diff --git a/include/simgrid/s4u/NetZone.hpp b/include/simgrid/s4u/NetZone.hpp index 8156699c38..ab145b9de4 100644 --- a/include/simgrid/s4u/NetZone.hpp +++ b/include/simgrid/s4u/NetZone.hpp @@ -25,7 +25,9 @@ namespace s4u { */ class XBT_PUBLIC NetZone { protected: +#ifndef DOXYGEN friend simgrid::kernel::routing::NetZoneImpl; +#endif explicit NetZone(kernel::routing::NetZoneImpl* impl); ~NetZone(); diff --git a/include/simgrid/s4u/Storage.hpp b/include/simgrid/s4u/Storage.hpp index 956a664e6d..85005c1259 100644 --- a/include/simgrid/s4u/Storage.hpp +++ b/include/simgrid/s4u/Storage.hpp @@ -17,18 +17,21 @@ #include namespace simgrid { -namespace xbt { -extern template class XBT_PUBLIC Extendable; -} namespace s4u { +#ifndef DOXYGEN /** @deprecated Engine::get_all_storages() */ XBT_ATTRIB_DEPRECATED_v322("Please use Engine::get_all_storages()") XBT_PUBLIC void getStorageList(std::map* whereTo); +#endif + +/** Storage represent the disk resources, usually associated to a given host */ class XBT_PUBLIC Storage : public simgrid::xbt::Extendable { +#ifndef DOXYGEN friend s4u::Engine; friend s4u::Io; friend simgrid::surf::StorageImpl; +#endif /* DOXYGEN */ public: explicit Storage(std::string name, surf::StorageImpl * pimpl); @@ -65,7 +68,10 @@ public: IoPtr io_init(sg_size_t size, s4u::Io::OpType type); + IoPtr read_async(sg_size_t size); sg_size_t read(sg_size_t size); + + IoPtr write_async(sg_size_t size); sg_size_t write(sg_size_t size); surf::StorageImpl* get_impl() { return pimpl_; } diff --git a/src/instr/instr_smpi.hpp b/src/instr/instr_smpi.hpp index 4a7130a3c2..3e94c72b31 100644 --- a/src/instr/instr_smpi.hpp +++ b/src/instr/instr_smpi.hpp @@ -29,8 +29,6 @@ XBT_PRIVATE void TRACE_smpi_recv(int src, int dst, int tag); XBT_PRIVATE void TRACE_smpi_init(int rank); XBT_PRIVATE void TRACE_smpi_finalize(int rank); /* SMPI + LB (load balancer) */ -XBT_PRIVATE void TRACE_smpi_send_process_data_in(int rank); -XBT_PRIVATE void TRACE_smpi_send_process_data_out(int rank); XBT_PRIVATE void TRACE_smpi_process_change_host(int rank, sg_host_t new_host); class smpi_trace_call_location_t { diff --git a/src/mc/sosp/PageStore.cpp b/src/mc/sosp/PageStore.cpp index f0191c4889..e9a7fc16aa 100644 --- a/src/mc/sosp/PageStore.cpp +++ b/src/mc/sosp/PageStore.cpp @@ -1,5 +1,4 @@ -/* Copyright (c) 2015-2018. The SimGrid Team. - * All rights reserved. */ +/* Copyright (c) 2015-2018. The SimGrid Team. All rights reserved. */ /* This program is free software; you can redistribute it and/or modify it * under the terms of the license (GNU LGPL) which comes with this package. */ @@ -49,7 +48,7 @@ static XBT_ALWAYS_INLINE PageStore::hash_type mc_hash_page(const void* data) // ***** snapshot_page_manager -PageStore::PageStore(size_t size) : memory_(nullptr), capacity_(size), top_index_(0) +PageStore::PageStore(std::size_t size) : memory_(nullptr), capacity_(size), top_index_(0) { // Using mmap in order to be able to expand the region by relocating it somewhere else in the virtual memory space: void* memory = diff --git a/src/mc/sosp/mc_snapshot.cpp b/src/mc/sosp/mc_snapshot.cpp index a17e3fa974..68e80aa6dc 100644 --- a/src/mc/sosp/mc_snapshot.cpp +++ b/src/mc/sosp/mc_snapshot.cpp @@ -23,8 +23,7 @@ * * @param addr Pointer * @param snapshot Snapshot - * @param Snapshot region in the snapshot this pointer belongs to - * (or nullptr if it does not belong to any snapshot region) + * @param process_index rank requesting the region * */ mc_mem_region_t mc_get_snapshot_region(const void* addr, const simgrid::mc::Snapshot* snapshot, int process_index) { @@ -102,11 +101,11 @@ const void* MC_region_read_fragmented(mc_mem_region_t region, void* target, cons /** Compare memory between snapshots (with known regions) * * @param addr1 Address in the first snapshot - * @param snapshot2 Region of the address in the first snapshot + * @param region1 Region of the address in the first snapshot * @param addr2 Address in the second snapshot - * @param snapshot2 Region of the address in the second snapshot - * @return same as memcmp - * */ + * @param region2 Region of the address in the second snapshot + * @return same semantic as memcmp + */ int MC_snapshot_region_memcmp(const void* addr1, mc_mem_region_t region1, const void* addr2, mc_mem_region_t region2, size_t size) { diff --git a/src/plugins/host_dvfs.cpp b/src/plugins/host_dvfs.cpp index 0a0e2da83b..4cb1f04cf1 100644 --- a/src/plugins/host_dvfs.cpp +++ b/src/plugins/host_dvfs.cpp @@ -5,7 +5,10 @@ #include "simgrid/plugins/dvfs.h" #include "simgrid/plugins/load.h" +#include "simgrid/s4u/Engine.hpp" +#include "src/kernel/activity/ExecImpl.hpp" #include "src/plugins/vm/VirtualMachineImpl.hpp" +#include "src/smpi/plugins/ampi/ampi.hpp" #include #include @@ -20,13 +23,23 @@ static simgrid::config::Flag cfg_governor("plugin/dvfs/governor", "Which Governor should be used that adapts the CPU frequency?", "performance", std::map({ + {"adagio", "TODO: Doc"}, {"conservative", "TODO: Doc"}, {"ondemand", "TODO: Doc"}, {"performance", "TODO: Doc"}, {"powersave", "TODO: Doc"}, }), - [](std::string val){if (val != "performance") sg_host_dvfs_plugin_init();}); + [](std::string val) { if (val != "performance") sg_host_dvfs_plugin_init(); }); + +static simgrid::config::Flag cfg_min_pstate("plugin/dvfs/min-pstate", {"plugin/dvfs/min_pstate"}, + "Which pstate is the minimum (and hence fastest) pstate for this governor?", 0, + [](int index) {}); + +static const int max_pstate_not_limited = -1; +static simgrid::config::Flag cfg_max_pstate("plugin/dvfs/max-pstate", {"plugin/dvfs/max_pstate"}, + "Which pstate is the maximum (and hence slowest) pstate for this governor?", max_pstate_not_limited, + [](int index) {}); /** @addtogroup SURF_plugin_load @@ -61,30 +74,50 @@ namespace dvfs { */ class Governor { -protected: +private: simgrid::s4u::Host* const host_; double sampling_rate_; + int min_pstate; //< Never use a pstate less than this one + int max_pstate; //< Never use a pstate larger than this one public: - - explicit Governor(simgrid::s4u::Host* ptr) : host_(ptr) { init(); } + explicit Governor(simgrid::s4u::Host* ptr) + : host_(ptr) + , min_pstate(cfg_min_pstate) + , max_pstate(cfg_max_pstate == max_pstate_not_limited ? host_->get_pstate_count() - 1 : cfg_max_pstate) + { + init(); + } virtual ~Governor() = default; - virtual std::string get_name() = 0; + virtual std::string get_name() const = 0; simgrid::s4u::Host* get_host() const { return host_; } + int get_min_pstate() const { return min_pstate; } + int get_max_pstate() const { return max_pstate; } void init() { const char* local_sampling_rate_config = host_->get_property(cfg_sampling_rate.get_name()); - double global_sampling_rate_config = cfg_sampling_rate; if (local_sampling_rate_config != nullptr) { sampling_rate_ = std::stod(local_sampling_rate_config); } else { - sampling_rate_ = global_sampling_rate_config; + sampling_rate_ = cfg_sampling_rate; + } + const char* local_min_pstate_config = host_->get_property(cfg_min_pstate.get_name()); + if (local_min_pstate_config != nullptr) { + min_pstate = std::stoi(local_min_pstate_config); + } + + const char* local_max_pstate_config = host_->get_property(cfg_max_pstate.get_name()); + if (local_max_pstate_config != nullptr) { + max_pstate = std::stod(local_max_pstate_config); } + xbt_assert(max_pstate <= host_->get_pstate_count() - 1, "Value for max_pstate too large!"); + xbt_assert(min_pstate <= max_pstate, "min_pstate is larger than max_pstate!"); + xbt_assert(0 <= min_pstate, "min_pstate is negative!"); } virtual void update() = 0; - double get_sampling_rate() { return sampling_rate_; } + double get_sampling_rate() const { return sampling_rate_; } }; /** @@ -100,9 +133,9 @@ public: class Performance : public Governor { public: explicit Performance(simgrid::s4u::Host* ptr) : Governor(ptr) {} - std::string get_name() override { return "Performance"; } + std::string get_name() const override { return "Performance"; } - void update() override { get_host()->set_pstate(0); } + void update() override { get_host()->set_pstate(get_min_pstate()); } }; /** @@ -118,9 +151,9 @@ public: class Powersave : public Governor { public: explicit Powersave(simgrid::s4u::Host* ptr) : Governor(ptr) {} - std::string get_name() override { return "Powersave"; } + std::string get_name() const override { return "Powersave"; } - void update() override { get_host()->set_pstate(get_host()->get_pstate_count() - 1); } + void update() override { get_host()->set_pstate(get_max_pstate()); } }; /** @@ -141,7 +174,7 @@ class OnDemand : public Governor { public: explicit OnDemand(simgrid::s4u::Host* ptr) : Governor(ptr) {} - std::string get_name() override { return "OnDemand"; } + std::string get_name() const override { return "OnDemand"; } void update() override { @@ -149,8 +182,8 @@ public: sg_host_load_reset(get_host()); // Only consider the period between two calls to this method! if (load > freq_up_threshold_) { - get_host()->set_pstate(0); /* Run at max. performance! */ - XBT_INFO("Load: %f > threshold: %f --> changed to pstate %i", load, freq_up_threshold_, 0); + get_host()->set_pstate(get_min_pstate()); /* Run at max. performance! */ + XBT_INFO("Load: %f > threshold: %f --> changed to pstate %i", load, freq_up_threshold_, get_min_pstate()); } else { /* The actual implementation uses a formula here: (See Kernel file cpufreq_ondemand.c:158) * @@ -159,10 +192,11 @@ public: * So they assume that frequency increases by 100 MHz. We will just use * lowest_pstate - load*pstatesCount() */ - int max_pstate = get_host()->get_pstate_count() - 1; // Load is now < freq_up_threshold; exclude pstate 0 (the fastest) // because pstate 0 can only be selected if load > freq_up_threshold_ - int new_pstate = max_pstate - load * (max_pstate + 1); + int new_pstate = get_max_pstate() - load * (get_max_pstate() + 1); + if (new_pstate < get_min_pstate()) + new_pstate = get_min_pstate(); get_host()->set_pstate(new_pstate); XBT_DEBUG("Load: %f < threshold: %f --> changed to pstate %i", load, freq_up_threshold_, new_pstate); @@ -188,7 +222,7 @@ class Conservative : public Governor { public: explicit Conservative(simgrid::s4u::Host* ptr) : Governor(ptr) {} - virtual std::string get_name() override { return "Conservative"; } + virtual std::string get_name() const override { return "Conservative"; } virtual void update() override { @@ -197,7 +231,7 @@ public: sg_host_load_reset(get_host()); // Only consider the period between two calls to this method! if (load > freq_up_threshold_) { - if (pstate != 0) { + if (pstate != get_min_pstate()) { get_host()->set_pstate(pstate - 1); XBT_INFO("Load: %f > threshold: %f -> increasing performance to pstate %d", load, freq_up_threshold_, pstate - 1); @@ -206,8 +240,7 @@ public: freq_up_threshold_, pstate); } } else if (load < freq_down_threshold_) { - int max_pstate = get_host()->get_pstate_count() - 1; - if (pstate != max_pstate) { // Are we in the slowest pstate already? + if (pstate != get_max_pstate()) { // Are we in the slowest pstate already? get_host()->set_pstate(pstate + 1); XBT_INFO("Load: %f < threshold: %f -> slowing down to pstate %d", load, freq_down_threshold_, pstate + 1); } else { @@ -218,6 +251,101 @@ public: } }; +class Adagio : public Governor { +private: + int best_pstate = 0; + double start_time = 0; + double comp_counter = 0; + double comp_timer = 0; + + std::vector> rates; + + unsigned int task_id = 0; + bool iteration_running = false; /*< Are we currently between iteration_in and iteration_out calls? */ + +public: + explicit Adagio(simgrid::s4u::Host* ptr) + : Governor(ptr), rates(100, std::vector(ptr->get_pstate_count(), 0.0)) + { + simgrid::smpi::plugin::ampi::on_iteration_in.connect([this](simgrid::s4u::ActorPtr actor) { + // Every instance of this class subscribes to this event, so one per host + // This means that for any actor, all 'hosts' are normally notified of these + // changes, even those who don't currently run the actor 'proc_id'. + // -> Let's check if this signal call is for us! + if (get_host() == actor->get_host()) { + iteration_running = true; + } + }); + simgrid::smpi::plugin::ampi::on_iteration_out.connect([this](simgrid::s4u::ActorPtr actor) { + if (get_host() == actor->get_host()) { + iteration_running = false; + task_id = 0; + } + }); + simgrid::kernel::activity::ExecImpl::on_creation.connect([this](simgrid::kernel::activity::ExecImplPtr activity) { + if (activity->host_ == get_host()) + pre_task(); + }); + simgrid::kernel::activity::ExecImpl::on_completion.connect([this](simgrid::kernel::activity::ExecImplPtr activity) { + // For more than one host (not yet supported), we can access the host via + // simcalls_.front()->issuer->iface()->get_host() + if (activity->host_ == get_host() && iteration_running) { + comp_timer += activity->surf_action_->get_finish_time() - activity->surf_action_->get_start_time(); + } + }); + simgrid::s4u::Link::on_communicate.connect( + [this](kernel::resource::NetworkAction* action, s4u::Host* src, s4u::Host* dst) { + if ((get_host() == src || get_host() == dst) && iteration_running) { + post_task(); + } + }); + } + + virtual std::string get_name() const override { return "Adagio"; } + + void pre_task() + { + sg_host_load_reset(get_host()); + comp_counter = sg_host_get_computed_flops(get_host()); // Should be 0 because of the reset + comp_timer = 0; + start_time = simgrid::s4u::Engine::get_clock(); + if (rates.size() <= task_id) + rates.resize(task_id + 5, std::vector(get_host()->get_pstate_count(), 0.0)); + if (rates[task_id][best_pstate] == 0) + best_pstate = 0; + get_host()->set_pstate(best_pstate); // Load our schedule + XBT_DEBUG("Set pstate to %i", best_pstate); + } + + void post_task() + { + double computed_flops = sg_host_get_computed_flops(get_host()) - comp_counter; + double target_time = (simgrid::s4u::Engine::get_clock() - start_time); + target_time = + target_time * + static_cast(99.0 / 100.0); // FIXME We account for t_copy arbitrarily with 1% -- this needs to be fixed + + bool is_initialized = rates[task_id][best_pstate] != 0; + rates[task_id][best_pstate] = computed_flops / comp_timer; + if (not is_initialized) { + for (int i = 1; i < get_host()->get_pstate_count(); i++) { + rates[task_id][i] = rates[task_id][0] * (get_host()->get_pstate_speed(i) / get_host()->get_speed()); + } + is_initialized = true; + } + + for (int pstate = get_host()->get_pstate_count() - 1; pstate >= 0; pstate--) { + if (computed_flops / rates[task_id][pstate] <= target_time) { + // We just found the pstate we want to use! + best_pstate = pstate; + break; + } + } + task_id++; + } + + virtual void update() override {} +}; } // namespace dvfs } // namespace plugin } // namespace simgrid @@ -256,6 +384,9 @@ static void on_host_added(simgrid::s4u::Host& host) } else if (dvfs_governor == "ondemand") { return std::unique_ptr( new simgrid::plugin::dvfs::OnDemand(daemon_proc->get_host())); + } else if (dvfs_governor == "adagio") { + return std::unique_ptr( + new simgrid::plugin::dvfs::Adagio(daemon_proc->get_host())); } else if (dvfs_governor == "performance") { return std::unique_ptr( new simgrid::plugin::dvfs::Performance(daemon_proc->get_host())); diff --git a/src/s4u/s4u_Actor.cpp b/src/s4u/s4u_Actor.cpp index a5b89f6eb3..b933c632c4 100644 --- a/src/s4u/s4u_Actor.cpp +++ b/src/s4u/s4u_Actor.cpp @@ -97,15 +97,6 @@ void Actor::on_exit(std::function fun, void* data) simgrid::simix::simcall([this, fun, data] { SIMIX_process_on_exit(pimpl_, fun, data); }); } -/** @brief Moves the actor to another host - * - * If the actor is currently blocked on an execution activity, the activity is also - * migrated to the new host. If it's blocked on another kind of activity, an error is - * raised as the mandated code is not written yet. Please report that bug if you need it. - * - * Asynchronous activities started by the actor are not migrated automatically, so you have - * to take care of this yourself (only you knows which ones should be migrated). - */ void Actor::migrate(Host* new_host) { s4u::Actor::on_migration_start(this); diff --git a/src/s4u/s4u_Engine.cpp b/src/s4u/s4u_Engine.cpp index afdc16fc64..3f8d5677e8 100644 --- a/src/s4u/s4u_Engine.cpp +++ b/src/s4u/s4u_Engine.cpp @@ -106,7 +106,6 @@ void Engine::getHostList(std::vector* list) list->push_back(kv.second); } -/** @brief Returns the list of all hosts found in the platform */ std::vector Engine::get_all_hosts() { std::vector res; diff --git a/src/s4u/s4u_Storage.cpp b/src/s4u/s4u_Storage.cpp index aba158e313..b1e3e714ae 100644 --- a/src/s4u/s4u_Storage.cpp +++ b/src/s4u/s4u_Storage.cpp @@ -63,6 +63,14 @@ IoPtr Storage::io_init(sg_size_t size, Io::OpType type) return res; } +IoPtr Storage::read_async(sg_size_t size) +{ + + IoPtr res = io_init(size, Io::OpType::READ); + res->start(); + return res; +} + sg_size_t Storage::read(sg_size_t size) { IoPtr i = io_init(size, Io::OpType::READ); @@ -70,6 +78,14 @@ sg_size_t Storage::read(sg_size_t size) return i->get_performed_ioops(); } +IoPtr Storage::write_async(sg_size_t size) +{ + + IoPtr res = io_init(size, Io::OpType::WRITE); + res->start(); + return res; +} + sg_size_t Storage::write(sg_size_t size) { IoPtr i = io_init(size, Io::OpType::WRITE); diff --git a/src/simix/libsmx.cpp b/src/simix/libsmx.cpp index 68badd050e..d3409086df 100644 --- a/src/simix/libsmx.cpp +++ b/src/simix/libsmx.cpp @@ -34,9 +34,10 @@ XBT_LOG_EXTERNAL_DEFAULT_CATEGORY(simix); * to create the SIMIX synchro. It can raise a host_error exception if the host crashed. * * @param name Name of the execution synchro to create + * @param category Tracing category * @param flops_amount amount Computation amount (in flops) * @param priority computation priority - * @param bound + * @param bound Maximal speed for this execution (in flops) or -1 if no limit * @param host host where the synchro will be executed * @return A new SIMIX execution synchronization */ diff --git a/src/smpi/internals/instr_smpi.cpp b/src/smpi/internals/instr_smpi.cpp index 792376a21e..30514595dd 100644 --- a/src/smpi/internals/instr_smpi.cpp +++ b/src/smpi/internals/instr_smpi.cpp @@ -277,22 +277,6 @@ void TRACE_smpi_recv(int src, int dst, int tag) } /**************** Functions to trace the migration of tasks. *****************/ -void TRACE_smpi_send_process_data_in(int rank) -{ - if (not TRACE_smpi_is_enabled()) return; - - smpi_container(rank)->get_state("MIGRATE_STATE")->add_entity_value("migration", instr_find_color("migration")); - smpi_container(rank)->get_state("MIGRATE_STATE")->push_event("migration"); -} - -void TRACE_smpi_send_process_data_out(int rank) -{ - if (not TRACE_smpi_is_enabled()) return; - - /* Clean the process state. */ - smpi_container(rank)->get_state("MIGRATE_STATE")->pop_event(); -} - void TRACE_smpi_process_change_host(int rank, sg_host_t new_host) { if (not TRACE_smpi_is_enabled()) return; @@ -316,4 +300,3 @@ void TRACE_smpi_process_change_host(int rank, sg_host_t new_host) cont = smpi_container(rank); // This points to the newly created container simgrid::instr::Container::get_root()->get_link("MIGRATE_LINK")->end_event(cont, "M", key); } - diff --git a/src/smpi/plugins/load_balancer/LoadBalancer.cpp b/src/smpi/plugins/load_balancer/LoadBalancer.cpp index e0fc633569..bf993dc4a3 100644 --- a/src/smpi/plugins/load_balancer/LoadBalancer.cpp +++ b/src/smpi/plugins/load_balancer/LoadBalancer.cpp @@ -19,14 +19,14 @@ namespace plugin { namespace loadbalancer { struct XBT_PRIVATE compare_hosts { - bool operator()(const simgrid::s4u::Host* a, const simgrid::s4u::Host* b) const; + bool operator()(simgrid::s4u::Host* const a, simgrid::s4u::Host* const b) const; }; typedef boost::heap::fibonacci_heap>::handle_type heap_handle; /** - * Structure that imitates a std::pair, but it allows us - * to use meaningful names instead of .first and .second + * Structure that imitates a std::pair, but it allows us + * to use meaningful names instead of .first and .second */ struct XBT_PRIVATE pair_handle_load { @@ -34,10 +34,10 @@ struct XBT_PRIVATE pair_handle_load double load; }; -static std::map additional_load; +static std::map additional_load; -bool compare_hosts::operator()(const simgrid::s4u::Host* a, const simgrid::s4u::Host* b) const { - return /*sg_host_get_avg_load(a) +*/ additional_load[a].load > /*sg_host_get_avg_load(b) +*/ additional_load[b].load; +bool compare_hosts::operator()(simgrid::s4u::Host* const a, simgrid::s4u::Host* const b) const { + return additional_load[a].load > additional_load[b].load; } @@ -136,9 +136,9 @@ void LoadBalancer::run() } } -simgrid::s4u::Host* LoadBalancer::get_mapping() +simgrid::s4u::Host* LoadBalancer::get_mapping(simgrid::s4u::ActorPtr actor) { - return new_mapping.get_host(simgrid::s4u::Actor::self()); + return new_mapping.get_host(actor); } void LoadBalancer::record_actor_computation(simgrid::s4u::ActorPtr actor, double load) diff --git a/src/smpi/plugins/load_balancer/load_balancer.hpp b/src/smpi/plugins/load_balancer/load_balancer.hpp index ff94b5743a..44a2d08b35 100644 --- a/src/smpi/plugins/load_balancer/load_balancer.hpp +++ b/src/smpi/plugins/load_balancer/load_balancer.hpp @@ -1,3 +1,7 @@ +/* Copyright (c) 2006-2018. The SimGrid Team. All rights reserved. */ + +/* This program is free software; you can redistribute it and/or modify it + * under the terms of the license (GNU LGPL) which comes with this package. */ #ifndef HAVE_SG_PLUGIN_LB #define HAVE_SG_PLUGIN_LB @@ -66,7 +70,7 @@ public: /** * FIXME These are functions used for testing and should be re-written or removed */ - simgrid::s4u::Host* get_mapping(); + simgrid::s4u::Host* get_mapping(simgrid::s4u::ActorPtr); void record_actor_computation(simgrid::s4u::ActorPtr actor, double load); private: }; diff --git a/src/smpi/plugins/sampi_loadbalancer.cpp b/src/smpi/plugins/sampi_loadbalancer.cpp index 99e1bcc929..b201830d3c 100644 --- a/src/smpi/plugins/sampi_loadbalancer.cpp +++ b/src/smpi/plugins/sampi_loadbalancer.cpp @@ -78,7 +78,7 @@ public: smpilb_bar.wait(); was_executed = false; // Must stay behind this barrier so that all processes have passed the if clause - migrate_to_host = lb.get_mapping(); + migrate_to_host = lb.get_mapping(simgrid::s4u::Actor::self()); if (cur_host != migrate_to_host) { // Origin and dest are not the same -> migrate sg_host_t migration_hosts[2] = {cur_host, migrate_to_host}; // Changing this to double[2] ... will cause trouble with parallel_execute, because that fct is trying to call free(). diff --git a/teshsuite/smpi/pt2pt-dsend/pt2pt-dsend.c b/teshsuite/smpi/pt2pt-dsend/pt2pt-dsend.c index 32a7656bbb..67a54a5adb 100644 --- a/teshsuite/smpi/pt2pt-dsend/pt2pt-dsend.c +++ b/teshsuite/smpi/pt2pt-dsend/pt2pt-dsend.c @@ -11,7 +11,8 @@ XBT_LOG_NEW_DEFAULT_CATEGORY(dsend,"the dsend test"); -int main(int argc, char *argv[]) { +int main() +{ int rank; int32_t data=11; diff --git a/tools/cmake/DefinePackages.cmake b/tools/cmake/DefinePackages.cmake index c9974ce8cf..bd9de442b3 100644 --- a/tools/cmake/DefinePackages.cmake +++ b/tools/cmake/DefinePackages.cmake @@ -877,7 +877,6 @@ set(DOC_SOURCES doc/doxygen/application.doc doc/doxygen/community.doc doc/doxygen/deployment.doc - doc/doxygen/examples.doc doc/doxygen/footer.html doc/doxygen/getting_started.doc doc/doxygen/header.html @@ -954,7 +953,6 @@ set(DOC_TOOLS # these files get copied automatically to the html documentation set(DOC_IMG - ${CMAKE_HOME_DIRECTORY}/doc/sc3-description.png ${CMAKE_HOME_DIRECTORY}/doc/webcruft/AS_hierarchy.png ${CMAKE_HOME_DIRECTORY}/doc/webcruft/eclipseScreenShot.png ${CMAKE_HOME_DIRECTORY}/doc/webcruft/Paje_MSG_screenshot.jpg diff --git a/tools/cmake/Documentation.cmake b/tools/cmake/Documentation.cmake index 9efb34430b..c25c77769d 100644 --- a/tools/cmake/Documentation.cmake +++ b/tools/cmake/Documentation.cmake @@ -24,6 +24,8 @@ if(enable_documentation) COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_BINARY_DIR}/doc/example_lists COMMAND ${CMAKE_COMMAND} -E remove_directory ${CMAKE_BINARY_DIR}/doc/html COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_BINARY_DIR}/doc/html + COMMAND ${CMAKE_COMMAND} -E remove_directory ${CMAKE_BINARY_DIR}/doc/xml + COMMAND ${CMAKE_COMMAND} -E remove_directory ${CMAKE_BINARY_DIR}/docs/source/api WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/doc )