From: Martin Quinson Date: Fri, 3 Aug 2018 21:38:32 +0000 (+0200) Subject: Merge branch 'master' of framagit.org:simgrid/simgrid X-Git-Tag: v3_21~302 X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/commitdiff_plain/50ddfe2f5b5035e0ed9556b334d6977ee81ff83a?hp=4dc49c9d77bc2dcba754a985c4c03e02c4b737fd Merge branch 'master' of framagit.org:simgrid/simgrid --- diff --git a/docs/source/install_yours.rst b/docs/source/install_yours.rst index d5e556dcb8..920bbbff71 100644 --- a/docs/source/install_yours.rst +++ b/docs/source/install_yours.rst @@ -9,6 +9,18 @@ Instead, you should create your own working directory somewhere on your disk (say `/home/joe/MyFirstScheduler/`), and write your code in there. +Cloning a Template Project for S4U +---------------------------------- + +If you plan to use the modern S4U interface of SimGrid, the easiest is +to clone the `Template Project +`_ directly. It +contains the necessary configuration to use cmake and S4U together. + +Once you forked the project on FramaGit, do not forget to remove the +fork relationship, as you won't need it unless you plan to contribute +to the template itself. + Building your project with CMake -------------------------------- @@ -17,7 +29,8 @@ your project. It builds two simulators from a given set of source files. .. code-block:: cmake - project(MyFirstScheduler) + cmake_minimum_required(VERSION 2.8.8) + project(MyFirstSimulator) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") @@ -34,7 +47,8 @@ your project. It builds two simulators from a given set of source files. target_link_libraries(other_xp ${SimGrid_LIBRARY}) -For that, you need FindSimGrid.cmake, +For that, you need `FindSimGrid.cmake +`_, that is located at the root of the SimGrid tree. You can either copy this file into the `cmake/Modules` directory of your project, or use the version installed on the disk. Both solutions present advantages diff --git a/docs/source/installation.rst b/docs/source/installation.rst index 60af9b9359..06fe3291a6 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -1,5 +1,7 @@ .. Copyright 2005-2018 +.. _install: + Installing SimGrid ================== diff --git a/docs/source/main_concepts.rst b/docs/source/main_concepts.rst index 2c894c560b..051dddd81d 100644 --- a/docs/source/main_concepts.rst +++ b/docs/source/main_concepts.rst @@ -218,43 +218,43 @@ SimGrid Success Stories SimGrid was cited in over 1,500 scientific papers (according to Google Scholar). Among them -over 200 -publications (written by about 300 individuals) use SimGrid as a -scientific instrument to conduct their experimental evaluation. These +`over 200 publications `_ +(written by about 300 individuals) use SimGrid as a scientific +instrument to conduct their experimental evaluation. These numbers do not count the articles contributing to SimGrid. This instrument was used in many research communities, such as -High-Performance Computing, -Cloud Computing, -Workflow Scheduling, -Big Data and -MapReduce, -Data Grid, -Volunteer Computing, -Peer-to-Peer Computing, -Network Architecture, -Fog Computing, or -Batch Scheduling -(more info). +`High-Performance Computing `_, +`Cloud Computing `_, +`Workflow Scheduling `_, +`Big Data `_ and +`MapReduce `_, +`Data Grid `_, +`Volunteer Computing `_, +`Peer-to-Peer Computing `_, +`Network Architecture `_, +`Fog Computing `_, or +`Batch Scheduling `_ +`(more info) `_. If your platform description is accurate enough (see -here or -there), +`here `_ or +`there `_), SimGrid can provide high-quality performance predictions. For example, we determined the speedup achieved by the Tibidabo Arm-based cluster before its construction -(paper). In this case, +(`paper `_). In this case, some differences between the prediction and the real timings were due to misconfiguration or other problems with the real platforms. To some extent, SimGrid could even be used to debug the real platform :) SimGrid is also used to debug, improve and tune several large applications. -BigDFT (a massively parallel code +`BigDFT `_ (a massively parallel code computing the electronic structure of chemical elements developped by -the CEA), StarPU (a +the CEA), `StarPU `_ (a Unified Runtime System for Heterogeneous Multicore Architectures developped by Inria Bordeaux) and -TomP2P (a high performance +`TomP2P `_ (a high performance key-value pair storage library developped at University of Zurich). Some of these applications enjoy large user communities themselves. @@ -262,6 +262,6 @@ Where to proceed next? ---------------------- Now that you know about the basic concepts of SimGrid, you can give it -a try. If it's not done yet, first @ref install "install it". Then, +a try. If it's not done yet, first :ref:`install it `. Then, proceed to the section on @ref application "describing the application" that you want to study. diff --git a/examples/smpi/CMakeLists.txt b/examples/smpi/CMakeLists.txt index 5d7a96b492..03580f0e00 100644 --- a/examples/smpi/CMakeLists.txt +++ b/examples/smpi/CMakeLists.txt @@ -5,8 +5,7 @@ if(enable_smpi) file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/mc/") - foreach(x replay - trace trace_simple trace_call_location energy) + foreach(x replay ampi trace trace_simple trace_call_location energy) add_executable (smpi_${x} ${CMAKE_CURRENT_SOURCE_DIR}/${x}/${x}) target_link_libraries(smpi_${x} simgrid) set_target_properties(smpi_${x} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/${x}) @@ -24,7 +23,7 @@ if(enable_smpi) endforeach() endif() -foreach(x replay) +foreach(x ampi replay) set(examples_src ${examples_src} ${CMAKE_CURRENT_SOURCE_DIR}/${x}/${x}.cpp) endforeach() foreach(x trace trace_simple trace_call_location energy) @@ -40,6 +39,7 @@ set(tesh_files ${tesh_files} ${CMAKE_CURRENT_SOURCE_DIR}/energy/energy.tes ${CMAKE_CURRENT_SOURCE_DIR}/trace/trace.tesh ${CMAKE_CURRENT_SOURCE_DIR}/trace_simple/trace_simple.tesh ${CMAKE_CURRENT_SOURCE_DIR}/trace_call_location/trace_call_location.tesh + ${CMAKE_CURRENT_SOURCE_DIR}/ampi/ampi.tesh ${CMAKE_CURRENT_SOURCE_DIR}/replay/replay.tesh PARENT_SCOPE) set(bin_files ${bin_files} ${CMAKE_CURRENT_SOURCE_DIR}/hostfile ${CMAKE_CURRENT_SOURCE_DIR}/energy/hostfile @@ -75,4 +75,6 @@ if(enable_smpi) ADD_TESH(smpi-tracing-call-location --setenv bindir=${CMAKE_BINARY_DIR}/examples/smpi/trace_call_location --setenv srcdir=${CMAKE_HOME_DIRECTORY}/examples/smpi --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --cd ${CMAKE_BINARY_DIR}/examples/smpi/trace_call_location ${CMAKE_HOME_DIRECTORY}/examples/smpi/trace_call_location/trace_call_location.tesh) ADD_TESH(smpi-replay --setenv srcdir=${CMAKE_HOME_DIRECTORY}/examples/smpi --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --cd ${CMAKE_BINARY_DIR}/examples/smpi ${CMAKE_HOME_DIRECTORY}/examples/smpi/replay/replay.tesh) ADD_TESH_FACTORIES(smpi-energy "thread;ucontext;raw;boost" --setenv bindir=${CMAKE_BINARY_DIR}/examples/smpi/energy --setenv srcdir=${CMAKE_HOME_DIRECTORY}/examples/smpi/energy --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --setenv bindir=${CMAKE_BINARY_DIR}/bin --cd ${CMAKE_BINARY_DIR}/examples/smpi/energy ${CMAKE_HOME_DIRECTORY}/examples/smpi/energy/energy.tesh) + + ADD_TESH(smpi-ampi --setenv srcdir=${CMAKE_HOME_DIRECTORY}/examples/smpi --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --cd ${CMAKE_BINARY_DIR}/examples/smpi ${CMAKE_HOME_DIRECTORY}/examples/smpi/ampi/ampi.tesh) endif() diff --git a/examples/smpi/ampi/ampi.cpp b/examples/smpi/ampi/ampi.cpp new file mode 100644 index 0000000000..e9722af1d4 --- /dev/null +++ b/examples/smpi/ampi/ampi.cpp @@ -0,0 +1,41 @@ +/* Copyright (c) 2009-2018. The SimGrid Team. All rights reserved. */ + +/* This program is free software; you can redistribute it and/or modify it + * under the terms of the license (GNU LGPL) which comes with this package. */ + +#include +#include "smpi/smpi.h" +#include "smpi/sampi.h" + +XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_plugin_load_balancer_example, smpi, "Simple tracing test for SAMPI functions"); + +int main(int argc, char* argv[]) +{ + MPI_Init(&argc, &argv); + void* pointer = malloc(100 * sizeof(int)); + free(pointer); + pointer = malloc(100 * sizeof(int)); + int rank; + int err = MPI_Comm_rank(MPI_COMM_WORLD, &rank); /* Get id of this process */ + if (err != MPI_SUCCESS) { + fprintf(stderr, "MPI_Comm_rank failed: %d", err); + MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); + exit(EXIT_FAILURE); + } + AMPI_Iteration_in(MPI_COMM_WORLD); + simgrid::s4u::this_actor::sleep_for(rank); + AMPI_Iteration_out(MPI_COMM_WORLD); + + AMPI_Iteration_in(MPI_COMM_WORLD); + simgrid::s4u::this_actor::sleep_for(rank); + AMPI_Iteration_out(MPI_COMM_WORLD); + if (rank == 0) + free(pointer); + AMPI_Migrate(MPI_COMM_WORLD); + if (rank != 0) + free(pointer); + + MPI_Finalize(); + return 0; +} + diff --git a/examples/smpi/ampi/ampi.tesh b/examples/smpi/ampi/ampi.tesh new file mode 100644 index 0000000000..be6bb157fa --- /dev/null +++ b/examples/smpi/ampi/ampi.tesh @@ -0,0 +1,34 @@ +# use the tested library, not the installed one +# (since we want to pass it to the child, it has to be redefined before each command) +# Go for the first test + +p Test if the load balancing code gets traced correctly +! timeout 60 + +$ ../../smpi_script/bin/smpirun -trace-ti --cfg=tracing/filename:${bindir:=.}/smpi_trace.trace --cfg=tracing/smpi/format/ti-one-file:yes -no-privatize -ext smpi_replay --log=replay.thresh:critical --log=smpi_replay.thresh:verbose --log=no_loc --cfg=smpi/simulate-computation:no -np 3 -platform ${srcdir:=.}/../platforms/small_platform.xml -hostfile ${srcdir:=.}/hostfile ./ampi/smpi_ampi ${srcdir:=.}/replay/actions_bcast.txt --log=smpi_kernel.thres:warning --log=xbt_cfg.thres:warning + +$ bash -c "cat ${bindir:=.}/smpi_trace.trace_files/*" +> 0 init +> 0 iteration_in +> 0 iteration_out +> 0 iteration_in +> 0 iteration_out +> 0 migrate 0 +> 0 finalize +> 1 init +> 1 iteration_in +> 2 init +> 2 iteration_in +> 1 iteration_out +> 1 iteration_in +> 2 iteration_out +> 2 iteration_in +> 1 iteration_out +> 1 migrate 400 +> 1 finalize +> 2 iteration_out +> 2 migrate 400 +> 2 finalize + +$ rm -rf ${bindir:=.}/smpi_trace.trace +$ rm -rf ${bindir:=.}/smpi_trace.trace_files diff --git a/examples/smpi/load_balancer_replay/CMakeLists.txt b/examples/smpi/load_balancer_replay/CMakeLists.txt new file mode 100644 index 0000000000..73aae8733e --- /dev/null +++ b/examples/smpi/load_balancer_replay/CMakeLists.txt @@ -0,0 +1,13 @@ +if(enable_smpi) + set(CMAKE_C_COMPILER "${CMAKE_BINARY_DIR}/smpi_script/bin/smpicc") + set(CMAKE_CXX_COMPILER "${CMAKE_BINARY_DIR}/smpi_script/bin/smpicxx") + include_directories(BEFORE "${CMAKE_HOME_DIRECTORY}/include/smpi") + + add_executable (load_balancer_replay load_balancer_replay.cpp) + target_link_libraries(load_balancer_replay simgrid) + # ADD_TESH(sampi-load-balancer-replay --setenv srcdir=${CMAKE_CURRENT_SOURCE_DIR} --setenv bindir=${CMAKE_CURRENT_BINARY_DIR} --cd ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/load_balancer_replay.tesh) +endif() + +# Uncomment this to install tesh files +#set(tesh_files ${tesh_files} ${CMAKE_CURRENT_SOURCE_DIR}/load_balancer_replay.tesh PARENT_SCOPE) +set(examples_src ${examples_src} ${CMAKE_CURRENT_SOURCE_DIR}/load_balancer_replay.cpp PARENT_SCOPE) diff --git a/examples/smpi/load_balancer_replay/load_balancer_replay.cpp b/examples/smpi/load_balancer_replay/load_balancer_replay.cpp new file mode 100644 index 0000000000..7c0bfb4a62 --- /dev/null +++ b/examples/smpi/load_balancer_replay/load_balancer_replay.cpp @@ -0,0 +1,23 @@ +/* Copyright (c) 2009-2018. The SimGrid Team. All rights reserved. */ + +/* This program is free software; you can redistribute it and/or modify it + * under the terms of the license (GNU LGPL) which comes with this package. */ + +#include "smpi/smpi.h" +#include "smpi/sampi.h" +#include +#include +#include + +XBT_LOG_NEW_DEFAULT_CATEGORY(sampi_load_balancer_test, "Messages specific for this sampi example"); + + +int main(int argc, char* argv[]) +{ + sg_host_load_plugin_init(); + smpi_replay_init(&argc, &argv); + sg_load_balancer_plugin_init(); // Must be called after smpi_replay_init as this will overwrite some replay actions + + smpi_replay_main(&argc, &argv); + return 0; +} diff --git a/include/simgrid/smpi/replay.hpp b/include/simgrid/smpi/replay.hpp index 034e065edf..02caf33d6b 100644 --- a/include/simgrid/smpi/replay.hpp +++ b/include/simgrid/smpi/replay.hpp @@ -171,7 +171,7 @@ public: template class ReplayAction { protected: const std::string name; - const int my_proc_id; + const aid_t my_proc_id; T args; public: diff --git a/include/smpi/sampi.h b/include/smpi/sampi.h new file mode 100644 index 0000000000..c3087a86f6 --- /dev/null +++ b/include/smpi/sampi.h @@ -0,0 +1,32 @@ +/* Copyright (c) 2007-2018. The SimGrid Team. All rights reserved. */ + +/* This program is free software; you can redistribute it and/or modify it + * under the terms of the license (GNU LGPL) which comes with this package. */ + +#ifndef SAMPI_H_ +#define SAMPI_H_ + +#include +#include + +#define AMPI_CALL(type, name, args) \ + type A##name args __attribute__((weak)); \ + type AP##name args; + +#ifndef HAVE_SMPI +#define malloc(nbytes) _sampi_malloc(nbytes) +#define free(ptr) _sampi_free(ptr) +#endif + +SG_BEGIN_DECL() + +XBT_PUBLIC void* _sampi_malloc(size_t size); +XBT_PUBLIC void _sampi_free(void* ptr); + +AMPI_CALL(XBT_PUBLIC int, MPI_Iteration_in, (MPI_Comm comm)) +AMPI_CALL(XBT_PUBLIC int, MPI_Iteration_out, (MPI_Comm comm)) +AMPI_CALL(XBT_PUBLIC void, MPI_Migrate, (MPI_Comm comm)) + +SG_END_DECL() + +#endif diff --git a/src/instr/instr_private.hpp b/src/instr/instr_private.hpp index 31b08e7986..bf594db9ce 100644 --- a/src/instr/instr_private.hpp +++ b/src/instr/instr_private.hpp @@ -219,6 +219,22 @@ public: std::string display_size() override { return ""; } }; + +class AmpiMigrateTIData : public TIData { + size_t memory_consumption; +public: + explicit AmpiMigrateTIData(size_t memory_conso) : TIData("migrate"), memory_consumption(memory_conso) { }; + + std::string print() override + { + std::stringstream stream; + stream << getName() << " " << memory_consumption; + + return stream.str(); + } + + std::string display_size() override { return ""; } +}; } } diff --git a/src/plugins/host_dvfs.cpp b/src/plugins/host_dvfs.cpp index 9e09300f93..0a0e2da83b 100644 --- a/src/plugins/host_dvfs.cpp +++ b/src/plugins/host_dvfs.cpp @@ -40,20 +40,37 @@ namespace simgrid { namespace plugin { namespace dvfs { + +/** + * Add this to your host tag: + * - \ + * + * Valid values as of now are: performance, powersave, ondemand, conservative + * It doesn't matter if you use uppercase or lowercase. + * + * For the sampling rate, use this: + * + * - \ + * + * This will run the update() method of the specified governor every 2 seconds + * on that host. + * + * These properties can also be used within the \ tag to configure + * these values globally. Using them within the \ will overwrite this + * global configuration + */ class Governor { -private: +protected: simgrid::s4u::Host* const host_; double sampling_rate_; -protected: - simgrid::s4u::Host* get_host() const { return host_; } - public: explicit Governor(simgrid::s4u::Host* ptr) : host_(ptr) { init(); } virtual ~Governor() = default; virtual std::string get_name() = 0; + simgrid::s4u::Host* get_host() const { return host_; } void init() { @@ -201,24 +218,6 @@ public: } }; -/** - * Add this to your host tag: - * - \ - * - * Valid values as of now are: performance, powersave, ondemand, conservative - * It doesn't matter if you use uppercase or lowercase. - * - * For the sampling rate, use this: - * - * - \ - * - * This will run the update() method of the specified governor every 2 seconds - * on that host. - * - * These properties can also be used within the \ tag to configure - * these values globally. Using them within the \ will overwrite this - * global configuration - */ } // namespace dvfs } // namespace plugin } // namespace simgrid diff --git a/src/plugins/host_energy.cpp b/src/plugins/host_energy.cpp index 4140befa88..be43b29592 100644 --- a/src/plugins/host_energy.cpp +++ b/src/plugins/host_energy.cpp @@ -5,6 +5,7 @@ #include "simgrid/plugins/energy.h" #include "simgrid/s4u/Engine.hpp" +#include "src/kernel/activity/ExecImpl.hpp" #include "src/include/surf/surf.hpp" #include "src/plugins/vm/VirtualMachineImpl.hpp" #include "src/surf/cpu_interface.hpp" @@ -478,6 +479,21 @@ void sg_host_energy_plugin_init() simgrid::s4u::Host::on_destruction.connect(&on_host_destruction); simgrid::s4u::on_simulation_end.connect(&on_simulation_end); simgrid::surf::CpuAction::on_state_change.connect(&on_action_state_change); + // We may only have one actor on a node. If that actor executes something like + // compute -> recv -> compute + // the recv operation will not trigger a "CpuAction::on_state_change". This means + // that the next trigger would be the 2nd compute, hence ignoring the idle time + // during the recv call. By updating at the beginning of a compute, we can + // fix that. (If the cpu is not idle, this is not required.) + simgrid::kernel::activity::ExecImpl::on_creation.connect([](simgrid::kernel::activity::ExecImplPtr activity){ + if (activity->host_ != nullptr) { // We only run on one host + simgrid::s4u::Host* host = activity->host_; + if (dynamic_cast(activity->host_)) + host = dynamic_cast(activity->host_)->get_pm(); + + host->extension()->update(); + } + }); } /** @ingroup plugin_energy diff --git a/src/smpi/internals/instr_smpi.cpp b/src/smpi/internals/instr_smpi.cpp index 9bacf19aa7..792376a21e 100644 --- a/src/smpi/internals/instr_smpi.cpp +++ b/src/smpi/internals/instr_smpi.cpp @@ -56,7 +56,6 @@ static std::map smpi_colors = {{"recv", "1 0 0"}, {"put", "0.3 1 0"}, {"get", "0 1 0.3"}, {"accumulate", "1 0.3 0"}, - {"migration", "0.2 0.5 0.2"}, {"rput", "0.3 1 0"}, {"rget", "0 1 0.3"}, {"raccumulate", "1 0.3 0"}, @@ -296,7 +295,7 @@ void TRACE_smpi_send_process_data_out(int rank) void TRACE_smpi_process_change_host(int rank, sg_host_t new_host) { - if (!TRACE_smpi_is_enabled()) return; + if (not TRACE_smpi_is_enabled()) return; /** The key is (most likely) used to match the events in the trace */ static long long int counter = 0; @@ -317,3 +316,4 @@ void TRACE_smpi_process_change_host(int rank, sg_host_t new_host) cont = smpi_container(rank); // This points to the newly created container simgrid::instr::Container::get_root()->get_link("MIGRATE_LINK")->end_event(cont, "M", key); } + diff --git a/src/smpi/plugins/ampi/ampi.cpp b/src/smpi/plugins/ampi/ampi.cpp new file mode 100644 index 0000000000..6f400b8bab --- /dev/null +++ b/src/smpi/plugins/ampi/ampi.cpp @@ -0,0 +1,87 @@ +#include +#include +#include +#include +#include +#include +#include + +XBT_LOG_NEW_DEFAULT_SUBCATEGORY(plugin_pampi, smpi, "Logging specific to the AMPI functions"); + +static std::vector memory_size(500, 0); // FIXME cheinrich This needs to be dynamic +static std::map alloc_table; // Keep track of all allocations +extern "C" XBT_PUBLIC void* _sampi_malloc(size_t); +extern "C" XBT_PUBLIC void _sampi_free(void* ptr); +extern "C" void* _sampi_malloc(size_t size) +{ + void* result = malloc (size); // We need the space here to prevent recursive substitution + alloc_table.insert({result, size}); + if (not simgrid::s4u::this_actor::is_maestro()) { + memory_size[simgrid::s4u::this_actor::get_pid()] += size; + } + return result; +} + +extern "C" void _sampi_free(void* ptr) +{ + size_t alloc_size = alloc_table.at(ptr); + int my_proc_id = simgrid::s4u::this_actor::get_pid(); + memory_size[my_proc_id] -= alloc_size; + free(ptr); +} + +#include +namespace simgrid { +namespace smpi { +namespace plugin { +namespace ampi { + simgrid::xbt::signal on_iteration_in; + simgrid::xbt::signal on_iteration_out; +} +} +} +} + +/* FIXME The following contains several times "rank() + 1". This works for one + * instance, but we need to find a way to deal with this for several instances and + * for daemons: If we just replace this with the process id, we will get id's that + * don't start at 0 if we start daemons as well. + */ +int APMPI_Iteration_in(MPI_Comm comm) +{ + smpi_bench_end(); + TRACE_Iteration_in(comm->rank() + 1, new simgrid::instr::NoOpTIData("iteration_in")); // implemented on instr_smpi.c + smpi_bench_begin(); + return 1; +} + +int APMPI_Iteration_out(MPI_Comm comm) +{ + smpi_bench_end(); + TRACE_Iteration_out(comm->rank() + 1, new simgrid::instr::NoOpTIData("iteration_out")); + smpi_bench_begin(); + return 1; +} + +void APMPI_Migrate(MPI_Comm comm) +{ + smpi_bench_end(); + int my_proc_id = simgrid::s4u::this_actor::get_pid(); + TRACE_migration_call(comm->rank() + 1, new simgrid::instr::AmpiMigrateTIData(memory_size[my_proc_id])); + smpi_bench_begin(); +} + +int AMPI_Iteration_in(MPI_Comm comm) +{ + return APMPI_Iteration_in(comm); +} + +int AMPI_Iteration_out(MPI_Comm comm) +{ + return APMPI_Iteration_out(comm); +} + +void AMPI_Migrate(MPI_Comm comm) +{ + APMPI_Migrate(comm); +} diff --git a/src/smpi/plugins/ampi/ampi.hpp b/src/smpi/plugins/ampi/ampi.hpp new file mode 100644 index 0000000000..0685cee29f --- /dev/null +++ b/src/smpi/plugins/ampi/ampi.hpp @@ -0,0 +1,18 @@ +/* Copyright (c) 2010-2018. The SimGrid Team. All rights reserved. */ + +/* This program is free software; you can redistribute it and/or modify it + * under the terms of the license (GNU LGPL) which comes with this package. */ + +#include + +namespace simgrid { +namespace smpi { +namespace plugin { +namespace ampi { + extern simgrid::xbt::signal on_iteration_out; + extern simgrid::xbt::signal on_iteration_in; +} +} +} +} + diff --git a/src/smpi/plugins/ampi/instr_ampi.cpp b/src/smpi/plugins/ampi/instr_ampi.cpp new file mode 100644 index 0000000000..cd6d53b6df --- /dev/null +++ b/src/smpi/plugins/ampi/instr_ampi.cpp @@ -0,0 +1,60 @@ +/* Copyright (c) 2010-2018. The SimGrid Team. All rights reserved. */ + +/* This program is free software; you can redistribute it and/or modify it + * under the terms of the license (GNU LGPL) which comes with this package. */ + +#include "src/smpi/plugins/ampi/instr_ampi.hpp" +#include "smpi/smpi.h" +#include "src/instr/instr_private.hpp" +#include +#include + +XBT_LOG_NEW_DEFAULT_SUBCATEGORY(instr_sampi, instr, "Tracing (S)AMPI"); + +static std::map ampi_colors = {{"migrate", "0.2 0.5 0.2"}, + {"iteration", "0.5 0.5 0.5"} +}; + +void TRACE_Iteration_in(int rank, simgrid::instr::TIData* extra) +{ + if (not TRACE_smpi_is_enabled()) { + delete extra; + return; + } + smpi_container(rank)->get_state("MPI_STATE")->add_entity_value("iteration", ampi_colors["iteration"]); + smpi_container(rank)->get_state("MPI_STATE")->push_event("iteration", extra); +} + +void TRACE_Iteration_out(int rank, simgrid::instr::TIData* extra) +{ + if (not TRACE_smpi_is_enabled()) return; + + smpi_container(rank)->get_state("MPI_STATE")->pop_event(extra); +} + +void TRACE_migration_call(int rank, simgrid::instr::TIData* extra) +{ + if (not TRACE_smpi_is_enabled()) return; + + const std::string operation = "migrate"; + if(smpi_process()->replaying()) {//When replaying, we register an event. + smpi_container(rank)->get_state("MIGRATE_STATE")->add_entity_value(operation); + + simgrid::instr::EventType* type = + static_cast(smpi_container(rank)->type_->by_name(operation)); + new simgrid::instr::NewEvent(smpi_process()->simulated_elapsed(), smpi_container(rank), type, + type->get_entity_value(operation)); + } else { + // FIXME From rktesser: Ugly workaround! + // TI tracing uses states as events, and does not support printing events. + // So, we need a different code than for replay in order to be able to + // generate ti_traces for the migration calls. + if (!TRACE_smpi_is_enabled()) { + delete extra; + return; + } + smpi_container(rank)->get_state("MIGRATE_STATE")->add_entity_value(operation, ampi_colors[operation.c_str()]); + smpi_container(rank)->get_state("MIGRATE_STATE")->push_event(operation, extra); + smpi_container(rank)->get_state("MIGRATE_STATE")->pop_event(); + } +} diff --git a/src/smpi/plugins/ampi/instr_ampi.hpp b/src/smpi/plugins/ampi/instr_ampi.hpp new file mode 100644 index 0000000000..3fc2f742e3 --- /dev/null +++ b/src/smpi/plugins/ampi/instr_ampi.hpp @@ -0,0 +1,16 @@ +/* Copyright (c) 2010-2018. The SimGrid Team. All rights reserved. */ + +/* This program is free software; you can redistribute it and/or modify it + * under the terms of the license (GNU LGPL) which comes with this package. */ + +#ifndef INSTR_AMPI_HPP_ +#define INSTR_AMPI_HPP_ + +#include "smpi/smpi.h" +#include "src/instr/instr_private.hpp" + +XBT_PRIVATE void TRACE_Iteration_in(int rank, simgrid::instr::TIData* extra); +XBT_PRIVATE void TRACE_Iteration_out(int rank, simgrid::instr::TIData* extra); +XBT_PRIVATE void TRACE_migration_call(int rank, simgrid::instr::TIData* extra); + +#endif diff --git a/src/smpi/plugins/load_balancer/LoadBalancer.cpp b/src/smpi/plugins/load_balancer/LoadBalancer.cpp new file mode 100644 index 0000000000..e0fc633569 --- /dev/null +++ b/src/smpi/plugins/load_balancer/LoadBalancer.cpp @@ -0,0 +1,150 @@ +/* Copyright (c) 2006-2018. The SimGrid Team. All rights reserved. */ + +/* This program is free software; you can redistribute it and/or modify it + * under the terms of the license (GNU LGPL) which comes with this package. */ + +#include +#include +#include +#include + +#include +#include +#include + +XBT_LOG_EXTERNAL_DEFAULT_CATEGORY(plugin_load_balancer); + +namespace simgrid { +namespace plugin { +namespace loadbalancer { + +struct XBT_PRIVATE compare_hosts { + bool operator()(const simgrid::s4u::Host* a, const simgrid::s4u::Host* b) const; +}; + +typedef boost::heap::fibonacci_heap>::handle_type heap_handle; + +/** + * Structure that imitates a std::pair, but it allows us + * to use meaningful names instead of .first and .second + */ +struct XBT_PRIVATE pair_handle_load +{ + heap_handle update_handle; + double load; +}; + +static std::map additional_load; + +bool compare_hosts::operator()(const simgrid::s4u::Host* a, const simgrid::s4u::Host* b) const { + return /*sg_host_get_avg_load(a) +*/ additional_load[a].load > /*sg_host_get_avg_load(b) +*/ additional_load[b].load; +} + + +LoadBalancer::LoadBalancer() +{ +} + +LoadBalancer::~LoadBalancer() +{ +} + +void LoadBalancer::run() +{ + simgrid::s4u::Engine* engine = simgrid::s4u::Engine::get_instance(); + std::vector available_hosts = engine->get_filtered_hosts([](simgrid::s4u::Host* host) { + return not host->is_off(); + }); + xbt_assert(available_hosts.size() > 0, "No hosts available; are they all switched off?"); + + // TODO: Account for daemon background load (-> use especially the availability file) + + std::vector all_actors = + engine->get_filtered_actors([](simgrid::s4u::ActorPtr actor) { return not actor->is_daemon(); }); + + for (auto& actor : all_actors) { + new_mapping.assign(actor, actor->get_host()); + } + // Sort the actors, from highest to lowest load; we then just iterate over these actors + std::sort(all_actors.begin(), all_actors.end(), [this](simgrid::s4u::ActorPtr a, simgrid::s4u::ActorPtr b) { + return actor_computation[a->get_pid()] > actor_computation[b->get_pid()]; + }); + + // Sort the hosts. Use a heap datastructure, because we have to reorder + // after a host got another actor assigned (or moved from). + // We can't use std::priorityQueue here because we modify *two* elements: The top element, which + // we can access and which has the lowest load, gets a new actor assigned. + // However, the host loosing that actor must be updated as well. + // std::priorityQueue is immutable and hence doesn't work for us. + // + // This heap contains the least loaded host at the top + boost::heap::fibonacci_heap> usable_hosts; + for (auto& host : available_hosts) { + std::vector actors = host->get_all_actors(); + heap_handle update_handle = usable_hosts.push(host); // Required to update elements in the heap + additional_load[host] = {update_handle, 0}; // Save the handle for later + for (auto& actor : actors) { + additional_load[host].load += actor_computation[actor->get_pid()]; + XBT_DEBUG("Actor %li -> %f", actor->get_pid(), actor_computation[actor->get_pid()]); + } + XBT_DEBUG("Host %s initialized to %f", host->get_cname(), additional_load[host].load); + } + + // Implementation of the Greedy algorithm + for (auto& actor : all_actors) { + simgrid::s4u::Host* target_host = usable_hosts.top(); // This is the host with the lowest load + + simgrid::s4u::Host* cur_mapped_host = new_mapping.get_host(actor); + if (target_host != cur_mapped_host + && additional_load[target_host].load + actor_computation[actor->get_pid()] < additional_load[cur_mapped_host].load + && new_mapping.count_actors(cur_mapped_host) > 1) { + usable_hosts.pop(); + XBT_DEBUG("Assigning %li from %s to %s -- actor_load: %f -- host_load: %f", actor->get_pid(), actor->get_host()->get_cname(), target_host->get_cname(), actor_computation[actor->get_pid()], additional_load[target_host].load); + additional_load[cur_mapped_host].load = std::max(0.0, additional_load[cur_mapped_host].load - actor_computation[actor->get_pid()]); // No negative loads, please! + usable_hosts.update(additional_load[cur_mapped_host].update_handle, cur_mapped_host); + additional_load[target_host].load += actor_computation[actor->get_pid()]; + + new_mapping.assign(actor, target_host); + + XBT_DEBUG("Assigning actor %li to host %s", actor->get_pid(), target_host->get_cname()); + + XBT_DEBUG("host_load: %f after the assignment", additional_load[target_host].load); + additional_load[target_host].update_handle = usable_hosts.push(target_host); // Save update handle for later + } + } + + while (!usable_hosts.empty()) { + simgrid::s4u::Host* host = usable_hosts.top(); + usable_hosts.pop(); + + sg_host_load_reset(host); // Reset host load for next iterations + + if (XBT_LOG_ISENABLED(plugin_load_balancer, e_xbt_log_priority_t::xbt_log_priority_debug)) { + /* Debug messages that allow us to verify the load for each host */ + XBT_DEBUG("Host: %s, load total: %f", host->get_cname(), additional_load[host].load); + double load_verif = 0.0; + new_mapping.for_each_actor(host, + [this, &load_verif](simgrid::s4u::ActorPtr actor) { + load_verif += actor_computation[actor->get_pid()]; + XBT_DEBUG(" %li (load: %f)", actor->get_pid(), actor_computation[actor->get_pid()]); + }); + XBT_DEBUG("Host load verification: %f", load_verif); + } + } + for (auto& elem : actor_computation) { // Reset actor load + elem.second = 0; + } +} + +simgrid::s4u::Host* LoadBalancer::get_mapping() +{ + return new_mapping.get_host(simgrid::s4u::Actor::self()); +} + +void LoadBalancer::record_actor_computation(simgrid::s4u::ActorPtr actor, double load) +{ + actor_computation[actor->get_pid()] += load; +} +} +} +} diff --git a/src/smpi/plugins/load_balancer/load_balancer.hpp b/src/smpi/plugins/load_balancer/load_balancer.hpp new file mode 100644 index 0000000000..ff94b5743a --- /dev/null +++ b/src/smpi/plugins/load_balancer/load_balancer.hpp @@ -0,0 +1,77 @@ + +#ifndef HAVE_SG_PLUGIN_LB +#define HAVE_SG_PLUGIN_LB + +#include + +namespace simgrid { +namespace plugin { +namespace loadbalancer { + +class XBT_PRIVATE Mapping { +public: + Mapping() = default; + ~Mapping() = default; + /** Each host can have an arbitrary number of actors -> multimap **/ + typedef std::unordered_multimap host_to_actors_map_t; + host_to_actors_map_t host_to_actors; + + /** Each actor gets assigned to exactly one host -> map **/ + std::map actor_to_host; + + void assign(simgrid::s4u::ActorPtr actor, simgrid::s4u::Host* host) + { + /* Remove "actor" from its old host -> get all elements that have the current host as key **/ + auto range = host_to_actors.equal_range(/* current host */actor_to_host[actor]); + for (auto it = range.first; it != range.second; it++) { + if (it->second == actor) { + host_to_actors.erase(it); // unassign this actor + break; + } + } + + actor_to_host[actor] = host; + host_to_actors.insert({host, actor}); + } + + simgrid::s4u::Host* get_host(simgrid::s4u::ActorPtr actor) { return actor_to_host[actor]; } + + unsigned int count_actors(simgrid::s4u::Host* host) + { + return host_to_actors.count(host); // TODO This is linear in the size of the map. Maybe replace by constant lookup through another map? + } + + void for_each_actor(simgrid::s4u::Host* host, std::function callback) + { + auto range = host_to_actors.equal_range(host); + std::for_each( + range.first, + range.second, + [&callback](host_to_actors_map_t::value_type& x) { callback(x.second); } + ); + } +}; + +class XBT_PRIVATE LoadBalancer +{ + Mapping new_mapping; + std::map actor_computation; + +public: + LoadBalancer(); + ~LoadBalancer(); + void run(); + void assign(simgrid::s4u::ActorPtr actor, simgrid::s4u::Host* host); + + /** + * FIXME These are functions used for testing and should be re-written or removed + */ + simgrid::s4u::Host* get_mapping(); + void record_actor_computation(simgrid::s4u::ActorPtr actor, double load); +private: +}; + +} +} +} +#endif diff --git a/src/smpi/plugins/sampi_loadbalancer.cpp b/src/smpi/plugins/sampi_loadbalancer.cpp index c9b9a457e0..99e1bcc929 100644 --- a/src/smpi/plugins/sampi_loadbalancer.cpp +++ b/src/smpi/plugins/sampi_loadbalancer.cpp @@ -4,17 +4,124 @@ * under the terms of the license (GNU LGPL) which comes with this package. */ #include -#include +#include #include +#include +#include +#include +#include +#include #include -XBT_LOG_NEW_DEFAULT_SUBCATEGORY(plugin_load_balancer, surf, "Logging specific to the SMPI load balancing plugin"); +#include "src/kernel/activity/ExecImpl.hpp" +#include "src/simix/ActorImpl.hpp" +#include "src/smpi/plugins/load_balancer/load_balancer.hpp" // This is not yet ready to be public + +XBT_LOG_NEW_DEFAULT_SUBCATEGORY(plugin_load_balancer, smpi, "Logging specific to the SMPI load balancing plugin"); + +static simgrid::config::Flag cfg_migration_frequency("smpi/plugin/lb/migration-frequency", {"smpi/plugin/lb/migration_frequency"}, + "After how many calls to the migration function should the migration be actually executed?", 10, + [](double val){if (val != 10) sg_load_balancer_plugin_init();}); namespace simgrid { namespace smpi { namespace plugin { +static simgrid::plugin::loadbalancer::LoadBalancer lb; + +class MigrateParser : public simgrid::smpi::replay::ActionArgParser { +public: + double memory_consumption; + void parse(simgrid::xbt::ReplayAction& action, std::string name) + { + // The only parameter is the amount of memory used by the current process. + CHECK_ACTION_PARAMS(action, 1, 0); + memory_consumption = std::stod(action[2]); + } +}; + +/* This function simulates what happens when the original application calls + * (A)MPI_Migrate. It executes the load balancing heuristics, makes the necessary + * migrations and updates the task mapping in the load balancer. + */ +class MigrateAction : public simgrid::smpi::replay::ReplayAction { +public: + explicit MigrateAction() : ReplayAction("Migrate") {} + void kernel(simgrid::xbt::ReplayAction& action) + { + static std::map migration_call_counter; + static simgrid::s4u::Barrier smpilb_bar(smpi_process_count()); + simgrid::s4u::Host* cur_host = simgrid::s4u::this_actor::get_host(); + simgrid::s4u::Host* migrate_to_host; + + TRACE_migration_call(my_proc_id, nullptr); + + // We only migrate every "cfg_migration_frequency"-times, not at every call + migration_call_counter[simgrid::s4u::Actor::self()]++; + if ((migration_call_counter[simgrid::s4u::Actor::self()] % simgrid::config::get_value(cfg_migration_frequency.get_name())) != 0) { + return; + } + + // TODO cheinrich: Why do we need this barrier? + smpilb_bar.wait(); + + static bool was_executed = false; + if (not was_executed) { + was_executed = true; + XBT_DEBUG("Process %li runs the load balancer", my_proc_id); + smpi_bench_begin(); + lb.run(); + smpi_bench_end(); + } + + // This barrier is required to ensure that the mapping has been computed and is available + smpilb_bar.wait(); + was_executed = false; // Must stay behind this barrier so that all processes have passed the if clause + + migrate_to_host = lb.get_mapping(); + if (cur_host != migrate_to_host) { // Origin and dest are not the same -> migrate + sg_host_t migration_hosts[2] = {cur_host, migrate_to_host}; + // Changing this to double[2] ... will cause trouble with parallel_execute, because that fct is trying to call free(). + double* comp_amount = new double[2]{0, 0}; + double* comm_amount = new double[4]{0, /*must not be 0*/std::max(args.memory_consumption, 1.0), 0, 0}; + xbt_os_timer_t timer = smpi_process()->timer(); + xbt_os_threadtimer_start(timer); + simgrid::s4u::this_actor::parallel_execute(2, migration_hosts, comp_amount, comm_amount, -1.0); + xbt_os_threadtimer_stop(timer); + smpi_execute(xbt_os_timer_elapsed(timer)); + + // Update the process and host mapping in SimGrid. + TRACE_smpi_process_change_host(my_proc_id, migrate_to_host); + simgrid::s4u::this_actor::migrate(migrate_to_host); + } + + smpilb_bar.wait(); + + smpi_bench_begin(); + } +}; + +/****************************************************************************** + * Code to include the duration of iterations in the trace. * + ******************************************************************************/ + +// FIXME Move declaration +XBT_PRIVATE void action_iteration_in(simgrid::xbt::ReplayAction& action); +void action_iteration_in(simgrid::xbt::ReplayAction& action) +{ + CHECK_ACTION_PARAMS(action, 0, 0) + TRACE_Iteration_in(simgrid::s4u::this_actor::get_pid(), nullptr); + simgrid::smpi::plugin::ampi::on_iteration_in(MPI_COMM_WORLD->group()->actor(std::stol(action[0]))); +} + +XBT_PRIVATE void action_iteration_out(simgrid::xbt::ReplayAction& action); +void action_iteration_out(simgrid::xbt::ReplayAction& action) +{ + CHECK_ACTION_PARAMS(action, 0, 0) + TRACE_Iteration_out(simgrid::s4u::this_actor::get_pid(), nullptr); + simgrid::smpi::plugin::ampi::on_iteration_out(MPI_COMM_WORLD->group()->actor(std::stol(action[0]))); +} } } } @@ -26,4 +133,16 @@ namespace plugin { */ void sg_load_balancer_plugin_init() { + static bool done = false; + if (!done) { + done = true; + simgrid::kernel::activity::ExecImpl::on_completion.connect([](simgrid::kernel::activity::ExecImplPtr activity){ + simgrid::smpi::plugin::lb.record_actor_computation(activity->simcalls_.front()->issuer->iface(), activity->surf_action_->get_cost()); + }); + + xbt_replay_action_register( + "migrate", [](simgrid::xbt::ReplayAction& action) { simgrid::smpi::plugin::MigrateAction().execute(action); }); + xbt_replay_action_register("iteration_in", simgrid::smpi::plugin::action_iteration_in); + xbt_replay_action_register("iteration_out", simgrid::smpi::plugin::action_iteration_out); + } } diff --git a/tools/cmake/DefinePackages.cmake b/tools/cmake/DefinePackages.cmake index f05744b917..c9974ce8cf 100644 --- a/tools/cmake/DefinePackages.cmake +++ b/tools/cmake/DefinePackages.cmake @@ -238,22 +238,28 @@ set(SMPI_SRC src/smpi/mpi/smpi_status.cpp src/smpi/mpi/smpi_topo.cpp src/smpi/mpi/smpi_win.cpp + src/smpi/include/smpi_actor.hpp src/smpi/include/smpi_coll.hpp src/smpi/include/smpi_comm.hpp + src/smpi/include/smpi_datatype_derived.hpp + src/smpi/include/smpi_datatype.hpp src/smpi/include/smpi_f2c.hpp src/smpi/include/smpi_group.hpp src/smpi/include/smpi_host.hpp - src/smpi/include/smpi_datatype.hpp src/smpi/include/smpi_info.hpp src/smpi/include/smpi_keyvals.hpp - src/smpi/include/smpi_datatype_derived.hpp src/smpi/include/smpi_op.hpp - src/smpi/include/smpi_actor.hpp src/smpi/include/smpi_request.hpp src/smpi/include/smpi_status.hpp - src/smpi/include/smpi_win.hpp src/smpi/include/smpi_topo.hpp + src/smpi/include/smpi_win.hpp src/smpi/plugins/sampi_loadbalancer.cpp + src/smpi/plugins/ampi/ampi.cpp + src/smpi/plugins/ampi/ampi.hpp + src/smpi/plugins/ampi/instr_ampi.cpp + src/smpi/plugins/ampi/instr_ampi.hpp + src/smpi/plugins/load_balancer/LoadBalancer.cpp + src/smpi/plugins/load_balancer/load_balancer.hpp src/surf/network_smpi.cpp src/surf/network_ib.cpp ) @@ -660,7 +666,6 @@ set(MC_SRC set(MC_SIMGRID_MC_SRC src/mc/checker/simgrid_mc.cpp) set(headers_to_install - include/simgrid_config.h include/simgrid/actor.h include/simgrid/barrier.h @@ -723,6 +728,7 @@ set(headers_to_install include/simgrid/kernel/routing/VivaldiZone.hpp include/smpi/mpi.h + include/smpi/sampi.h include/smpi/smpi.h include/smpi/smpi_main.h include/smpi/smpi_helpers.h @@ -1004,6 +1010,7 @@ set(CMAKEFILES_TXT examples/smpi/smpi_msg_masterslave/CMakeLists.txt examples/smpi/replay_multiple/CMakeLists.txt examples/smpi/replay_multiple_manual_deploy/CMakeLists.txt + examples/smpi/load_balancer_replay/CMakeLists.txt examples/smpi/energy/f77/CMakeLists.txt examples/smpi/energy/f90/CMakeLists.txt diff --git a/tools/internal/check_dist_archive.exclude b/tools/internal/check_dist_archive.exclude index 015b38ac89..c1f1629dc2 100644 --- a/tools/internal/check_dist_archive.exclude +++ b/tools/internal/check_dist_archive.exclude @@ -19,12 +19,17 @@ + \.codacy\.yml + \.cproject + \.editorconfig ++ \.gitlab-ci\.yml + \.project ++ \.readthedocs.yml + \.travis\.yml + sonar-project\.properties + contrib/.* ++ docs/requirements\.txt ++ docs/source/.* + + tools/appveyor-irc-notify\.py + tools/git-hooks/.* + tools/internal/.*