There's no example, and it seems that it's not used since nearly 2 years.
include include/simgrid/plugins/file_system.h
include include/simgrid/plugins/live_migration.h
include include/simgrid/plugins/load.h
-include include/simgrid/plugins/load_balancer.h
include include/simgrid/s4u.hpp
include include/simgrid/s4u/Activity.hpp
include include/simgrid/s4u/Actor.hpp
include src/smpi/plugins/ampi/ampi.hpp
include src/smpi/plugins/ampi/instr_ampi.cpp
include src/smpi/plugins/ampi/instr_ampi.hpp
-include src/smpi/plugins/load_balancer/LoadBalancer.cpp
-include src/smpi/plugins/load_balancer/load_balancer.hpp
-include src/smpi/plugins/sampi_loadbalancer.cpp
include src/smpi/smpi_main.c
include src/smpi/smpi_replay_main.cpp
include src/smpi/smpicc.in
+++ /dev/null
-/* Copyright (c) 2009-2020. The SimGrid Team. All rights reserved. */
-
-/* This program is free software; you can redistribute it and/or modify it
- * under the terms of the license (GNU LGPL) which comes with this package. */
-#ifndef SIMGRID_PLUGINS_LOAD_BALANCER_H_
-#define SIMGRID_PLUGINS_LOAD_BALANCER_H_
-
-#include <simgrid/forward.h>
-#include <xbt/base.h>
-
-SG_BEGIN_DECL
-
-XBT_PUBLIC void sg_load_balancer_plugin_init();
-
-SG_END_DECL
-
-#endif
if (not TRACE_smpi_is_grouped())
mpi->by_name_or_create<StateType>("MPI_STATE");
root->type_->by_name_or_create("MPI_LINK", mpi, mpi);
- // TODO See if we can move this to the LoadBalancer plugin
root->type_->by_name_or_create("MIGRATE_LINK", mpi, mpi);
mpi->by_name_or_create<StateType>("MIGRATE_STATE");
}
if (TRACE_smpi_is_enabled() && TRACE_smpi_is_grouped()) {
auto* mpi = container->type_->by_name_or_create<ContainerType>("MPI");
mpi->by_name_or_create<StateType>("MPI_STATE");
- // TODO See if we can move this to the LoadBalancer plugin
root->type_->by_name_or_create("MIGRATE_LINK", mpi, mpi);
mpi->by_name_or_create<StateType>("MIGRATE_STATE");
}
XBT_PRIVATE void TRACE_smpi_recv(int src, int dst, int tag);
XBT_PRIVATE void TRACE_smpi_init(int rank, const std::string& calling_func);
-/* SMPI + LB (load balancer) */
-XBT_PRIVATE void TRACE_smpi_process_change_host(int rank, const_sg_host_t new_host);
-
class smpi_trace_call_location_t {
public:
std::string filename;
XBT_DEBUG("Recv tracing from %d to %d, tag %d, with key %s", src, dst, tag, key.c_str());
simgrid::instr::Container::get_root()->get_link("MPI_LINK")->end_event(smpi_container(dst), "PTP", key);
}
-
-/**************** Functions to trace the migration of tasks. *****************/
-void TRACE_smpi_process_change_host(int rank, const_sg_host_t new_host)
-{
- if (not TRACE_smpi_is_enabled()) return;
-
- /** The key is (most likely) used to match the events in the trace */
- static long long int counter = 0;
- std::string key = std::to_string(counter);
- counter++;
-
- // start link (= tell the trace that this rank moves from A to B)
- auto* cont = smpi_container(rank);
- simgrid::instr::Container::get_root()->get_link("MIGRATE_LINK")->start_event(cont, "M", key);
-
- // Destroy container of this rank on this host
- cont->remove_from_parent();
-
- // Setup container on new host
- TRACE_smpi_setup_container(rank, new_host);
-
- // end link
- cont = smpi_container(rank); // This points to the newly created container
- simgrid::instr::Container::get_root()->get_link("MIGRATE_LINK")->end_event(cont, "M", key);
-}
/* This program is free software; you can redistribute it and/or modify it
* under the terms of the license (GNU LGPL) which comes with this package. */
-#include <simgrid/plugins/load_balancer.h>
#include <simgrid/s4u/Actor.hpp>
#include <src/instr/instr_smpi.hpp>
#include <src/smpi/include/smpi_actor.hpp>
+++ /dev/null
-/* Copyright (c) 2006-2020. The SimGrid Team. All rights reserved. */
-
-/* This program is free software; you can redistribute it and/or modify it
- * under the terms of the license (GNU LGPL) which comes with this package. */
-
-#include <algorithm>
-#include <map>
-#include <unordered_map>
-#include <queue>
-
-#include <boost/heap/fibonacci_heap.hpp>
-#include <simgrid/plugins/load.h>
-#include <src/smpi/plugins/load_balancer/load_balancer.hpp>
-
-XBT_LOG_EXTERNAL_DEFAULT_CATEGORY(plugin_load_balancer);
-
-namespace simgrid {
-namespace plugin {
-namespace loadbalancer {
-
-class XBT_PRIVATE compare_hosts {
-public:
- bool operator()(s4u::Host* const a, s4u::Host* const b) const;
-};
-
-using heap_handle = boost::heap::fibonacci_heap<s4u::Host*, boost::heap::compare<compare_hosts>>::handle_type;
-
-/** Structure that imitates a std::pair, but it allows us to use meaningful names instead of .first and .second */
-struct XBT_PRIVATE pair_handle_load
-{
- heap_handle update_handle;
- double load;
-};
-
-static std::map<s4u::Host* const, pair_handle_load> additional_load;
-
-bool compare_hosts::operator()(s4u::Host* const a, s4u::Host* const b) const
-{
- return additional_load[a].load > additional_load[b].load;
-}
-
-void LoadBalancer::run()
-{
- const s4u::Engine* engine = s4u::Engine::get_instance();
- std::vector<s4u::Host*> available_hosts =
- engine->get_filtered_hosts([](const s4u::Host* host) { return host->is_on(); });
- xbt_assert(available_hosts.size() > 0, "No hosts available; are they all switched off?");
-
- // TODO: Account for daemon background load (-> use especially the availability file)
-
- std::vector<s4u::ActorPtr> all_actors =
- engine->get_filtered_actors([](s4u::ActorPtr actor) { return not actor->is_daemon(); });
-
- for (auto const& actor : all_actors) {
- new_mapping.assign(actor, actor->get_host());
- }
- // Sort the actors, from highest to lowest load; we then just iterate over these actors
- std::sort(all_actors.begin(), all_actors.end(), [this](s4u::ActorPtr a, s4u::ActorPtr b) {
- return actor_computation[a->get_pid()] > actor_computation[b->get_pid()];
- });
-
- // Sort the hosts. Use a heap datastructure, because we have to reorder
- // after a host got another actor assigned (or moved from).
- // We can't use std::priorityQueue here because we modify *two* elements: The top element, which
- // we can access and which has the lowest load, gets a new actor assigned.
- // However, the host losing that actor must be updated as well.
- // std::priorityQueue is immutable and hence doesn't work for us.
- //
- // This heap contains the least loaded host at the top
- boost::heap::fibonacci_heap<s4u::Host*, boost::heap::compare<compare_hosts>> usable_hosts;
- for (auto& host : available_hosts) {
- std::vector<s4u::ActorPtr> actors = host->get_all_actors();
- heap_handle update_handle = usable_hosts.push(host); // Required to update elements in the heap
- additional_load[host] = {update_handle, 0}; // Save the handle for later
- const double total_flops_computed = sg_host_get_computed_flops(host);
- for (auto const& actor : actors) {
- additional_load[host].load += actor_computation[actor->get_pid()] / total_flops_computed; // Normalize load - this allows comparison
- // even between hosts with different frequencies
- XBT_DEBUG("Actor %li -> %f", actor->get_pid(), actor_computation[actor->get_pid()]);
- }
- usable_hosts.increase(update_handle);
- XBT_DEBUG("Host %s initialized to %f", host->get_cname(), additional_load[host].load);
- }
-
- // Implementation of the Greedy algorithm
- for (auto const& actor : all_actors) {
- s4u::Host* target_host = usable_hosts.top(); // This is the host with the lowest load
-
- s4u::Host* cur_mapped_host = new_mapping.get_host(actor);
- if (target_host != cur_mapped_host
- && additional_load[target_host].load + actor_computation[actor->get_pid()] < additional_load[cur_mapped_host].load
- && new_mapping.count_actors(cur_mapped_host) > 1) {
- usable_hosts.pop();
- XBT_DEBUG("Assigning %li from %s to %s -- actor_load: %f -- host_load: %f", actor->get_pid(), actor->get_host()->get_cname(), target_host->get_cname(), actor_computation[actor->get_pid()], additional_load[target_host].load);
- additional_load[cur_mapped_host].load = std::max<double>(0.0, additional_load[cur_mapped_host].load - actor_computation[actor->get_pid()]); // No negative loads, please!
- usable_hosts.update(additional_load[cur_mapped_host].update_handle, cur_mapped_host);
- additional_load[target_host].load += actor_computation[actor->get_pid()];
-
- new_mapping.assign(actor, target_host);
-
- XBT_DEBUG("Assigning actor %li to host %s", actor->get_pid(), target_host->get_cname());
-
- XBT_DEBUG("host_load: %f after the assignment", additional_load[target_host].load);
- additional_load[target_host].update_handle = usable_hosts.push(target_host); // Save update handle for later
- }
- }
-
- while (!usable_hosts.empty()) {
- s4u::Host* host = usable_hosts.top();
- usable_hosts.pop();
-
- sg_host_load_reset(host); // Reset host load for next iterations
-
- if (XBT_LOG_ISENABLED(plugin_load_balancer, e_xbt_log_priority_t::xbt_log_priority_debug)) {
- /* Debug messages that allow us to verify the load for each host */
- XBT_DEBUG("Host: %s, load total: %f", host->get_cname(), additional_load[host].load);
- double load_verif = 0.0;
- new_mapping.for_each_actor(host, [this, &load_verif](s4u::ActorPtr actor) {
- load_verif += actor_computation[actor->get_pid()];
- XBT_DEBUG(" %li (load: %f)", actor->get_pid(), actor_computation[actor->get_pid()]);
- });
- XBT_DEBUG("Host load verification: %f", load_verif);
- }
- }
- for (auto& elem : actor_computation) { // Reset actor load
- elem.second = 0;
- }
-}
-
-s4u::Host* LoadBalancer::get_mapping(s4u::ActorPtr actor)
-{
- return new_mapping.get_host(actor);
-}
-
-void LoadBalancer::record_actor_computation(s4u::Actor const& actor, double load)
-{
- actor_computation[actor.get_pid()] += load;
-}
-} // namespace loadbalancer
-} // namespace plugin
-} // namespace simgrid
+++ /dev/null
-/* Copyright (c) 2006-2020. The SimGrid Team. All rights reserved. */
-
-/* This program is free software; you can redistribute it and/or modify it
- * under the terms of the license (GNU LGPL) which comes with this package. */
-
-#ifndef HAVE_SG_PLUGIN_LB
-#define HAVE_SG_PLUGIN_LB
-
-#include <simgrid/s4u.hpp>
-
-namespace simgrid {
-namespace plugin {
-namespace loadbalancer {
-
-class XBT_PRIVATE Mapping {
-public:
- /** Each host can have an arbitrary number of actors -> multimap **/
- using host_to_actors_map_t = std::unordered_multimap<s4u::Host*, s4u::ActorPtr>;
- host_to_actors_map_t host_to_actors;
-
- /** Each actor gets assigned to exactly one host -> map **/
- std::map<s4u::ActorPtr, s4u::Host*> actor_to_host;
-
- void assign(s4u::ActorPtr actor, s4u::Host* host)
- {
- /* Remove "actor" from its old host -> get all elements that have the current host as key **/
- auto range = host_to_actors.equal_range(/* current host */actor_to_host[actor]);
- for (auto it = range.first; it != range.second; it++) {
- if (it->second == actor) {
- host_to_actors.erase(it); // unassign this actor
- break;
- }
- }
-
- actor_to_host[actor] = host;
- host_to_actors.insert({host, actor});
- }
-
- s4u::Host* get_host(s4u::ActorPtr actor) const { return actor_to_host.at(actor); }
-
- unsigned int count_actors(s4u::Host* host) const
- {
- return host_to_actors.count(host); // TODO This is linear in the size of the map. Maybe replace by constant lookup through another map?
- }
-
- void for_each_actor(s4u::Host* host, const std::function<void(s4u::ActorPtr)>& callback)
- {
- auto range = host_to_actors.equal_range(host);
- std::for_each(range.first, range.second,
- [&callback](host_to_actors_map_t::value_type const& x) { callback(x.second); });
- }
-};
-
-class XBT_PRIVATE LoadBalancer
-{
- Mapping new_mapping;
- std::map</*proc id*/int, double> actor_computation;
-
-public:
- void run();
- void assign(s4u::ActorPtr actor, s4u::Host* host);
-
- /** FIXME These are functions used for testing and should be re-written or removed */
- s4u::Host* get_mapping(s4u::ActorPtr);
- void record_actor_computation(s4u::Actor const& actor, double load);
-};
-
-} // namespace loadbalancer
-} // namespace plugin
-} // namespace simgrid
-#endif
+++ /dev/null
-/* Copyright (c) 2018-2020. The SimGrid Team. All rights reserved. */
-
-/* This program is free software; you can redistribute it and/or modify it
- * under the terms of the license (GNU LGPL) which comes with this package. */
-
-#include <simgrid/plugins/load_balancer.h>
-#include <simgrid/s4u.hpp>
-#include <simgrid/smpi/replay.hpp>
-#include <smpi/smpi.h>
-#include <src/smpi/include/smpi_comm.hpp>
-#include <src/smpi/include/smpi_actor.hpp>
-#include <src/smpi/plugins/ampi/instr_ampi.hpp>
-#include <src/smpi/plugins/ampi/ampi.hpp>
-#include <xbt/replay.hpp>
-
-#include "src/kernel/activity/ExecImpl.hpp"
-#include "src/kernel/actor/ActorImpl.hpp"
-#include "src/smpi/plugins/load_balancer/load_balancer.hpp" // This is not yet ready to be public
-
-XBT_LOG_NEW_DEFAULT_SUBCATEGORY(plugin_load_balancer, smpi, "Logging specific to the SMPI load balancing plugin");
-
-static simgrid::config::Flag<int> cfg_migration_frequency("smpi/plugin/lb/migration-frequency", {"smpi/plugin/lb/migration_frequency"},
- "After how many calls to the migration function should the migration be actually executed?", 10,
- [](double val){if (val != 10) sg_load_balancer_plugin_init();});
-
-namespace simgrid {
-namespace smpi {
-namespace plugin {
-
-static simgrid::plugin::loadbalancer::LoadBalancer lb;
-
-class MigrateParser : public replay::ActionArgParser {
-public:
- double memory_consumption;
- void parse(xbt::ReplayAction& action, const std::string&) override
- {
- // The only parameter is the amount of memory used by the current process.
- CHECK_ACTION_PARAMS(action, 1, 0);
- memory_consumption = std::stod(action[2]);
- }
-};
-
-/* This function simulates what happens when the original application calls (A)MPI_Migrate. It executes the load
- * balancing heuristics, makes the necessary migrations and updates the task mapping in the load balancer.
- */
-class MigrateAction : public replay::ReplayAction<smpi::plugin::MigrateParser> {
-public:
- explicit MigrateAction() : ReplayAction("Migrate") {}
- void kernel(xbt::ReplayAction&) override
- {
- static std::map<s4u::ActorPtr, int> migration_call_counter;
- static s4u::Barrier smpilb_bar(smpi_get_universe_size());
- s4u::Host* cur_host = s4u::this_actor::get_host();
- s4u::Host* migrate_to_host;
-
- TRACE_migration_call(get_pid(), nullptr);
-
- // We only migrate every "cfg_migration_frequency"-times, not at every call
- migration_call_counter[s4u::Actor::self()]++;
- if ((migration_call_counter[s4u::Actor::self()] % config::get_value<int>(cfg_migration_frequency.get_name())) !=
- 0) {
- return;
- }
-
- // TODO cheinrich: Why do we need this barrier?
- smpilb_bar.wait();
-
- static bool was_executed = false;
- if (not was_executed) {
- was_executed = true;
- XBT_DEBUG("Process %li runs the load balancer", get_pid());
- smpi_bench_begin();
- lb.run();
- smpi_bench_end();
- }
-
- // This barrier is required to ensure that the mapping has been computed and is available
- smpilb_bar.wait();
- was_executed = false; // Must stay behind this barrier so that all processes have passed the if clause
-
- migrate_to_host = lb.get_mapping(simgrid::s4u::Actor::self());
- if (cur_host != migrate_to_host) { // Origin and dest are not the same -> migrate
- std::vector<s4u::Host*> migration_hosts = {cur_host, migrate_to_host};
- std::vector<double> comp_amount = {0, 0};
- std::vector<double> comm_amount = {0, /*must not be 0*/ std::max(get_args().memory_consumption, 1.0), 0, 0};
-
- xbt_os_timer_t timer = smpi_process()->timer();
- xbt_os_threadtimer_start(timer);
- s4u::this_actor::parallel_execute(migration_hosts, comp_amount, comm_amount);
- xbt_os_threadtimer_stop(timer);
- smpi_execute(xbt_os_timer_elapsed(timer));
-
- // Update the process and host mapping in SimGrid.
- XBT_DEBUG("Migrating process %li from %s to %s", get_pid(), cur_host->get_cname(), migrate_to_host->get_cname());
- TRACE_smpi_process_change_host(get_pid(), migrate_to_host);
- s4u::this_actor::set_host(migrate_to_host);
- }
-
- smpilb_bar.wait();
-
- smpi_bench_begin();
- }
-};
-
-/******************************************************************************
- * Code to include the duration of iterations in the trace. *
- ******************************************************************************/
-
-// FIXME Move declaration
-XBT_PRIVATE void action_iteration_in(xbt::ReplayAction& action);
-void action_iteration_in(xbt::ReplayAction& action)
-{
- CHECK_ACTION_PARAMS(action, 0, 0)
- TRACE_Iteration_in(s4u::this_actor::get_pid(), nullptr);
- smpi::plugin::ampi::on_iteration_in(*MPI_COMM_WORLD->group()->actor(std::stol(action[0])));
-}
-
-XBT_PRIVATE void action_iteration_out(xbt::ReplayAction& action);
-void action_iteration_out(xbt::ReplayAction& action)
-{
- CHECK_ACTION_PARAMS(action, 0, 0)
- TRACE_Iteration_out(s4u::this_actor::get_pid(), nullptr);
- ampi::on_iteration_out(*MPI_COMM_WORLD->group()->actor(std::stol(action[0])));
-}
-} // namespace plugin
-} // namespace smpi
-} // namespace simgrid
-
-/** @ingroup plugin_loadbalancer
- * @brief Initializes the load balancer plugin
- * @details The load balancer plugin supports several AMPI load balancers that move ranks
- * around, based on their host's load.
- */
-void sg_load_balancer_plugin_init()
-{
- static bool done = false;
- if (!done) {
- done = true;
- simgrid::s4u::Exec::on_completion.connect([](simgrid::s4u::Actor const& actor, simgrid::s4u::Exec const& exec) {
- simgrid::smpi::plugin::lb.record_actor_computation(actor, exec.get_cost());
- });
-
- xbt_replay_action_register(
- "migrate", [](simgrid::xbt::ReplayAction& action) { simgrid::smpi::plugin::MigrateAction().execute(action); });
- xbt_replay_action_register("iteration_in", simgrid::smpi::plugin::action_iteration_in);
- xbt_replay_action_register("iteration_out", simgrid::smpi::plugin::action_iteration_out);
- }
-}
src/smpi/include/smpi_status.hpp
src/smpi/include/smpi_topo.hpp
src/smpi/include/smpi_win.hpp
- src/smpi/plugins/sampi_loadbalancer.cpp
src/smpi/plugins/ampi/ampi.cpp
src/smpi/plugins/ampi/ampi.hpp
src/smpi/plugins/ampi/instr_ampi.cpp
src/smpi/plugins/ampi/instr_ampi.hpp
- src/smpi/plugins/load_balancer/LoadBalancer.cpp
- src/smpi/plugins/load_balancer/load_balancer.hpp
src/surf/network_smpi.cpp
src/surf/network_ib.cpp
src/smpi/bindings/smpi_mpi.cpp
include/simgrid/plugins/file_system.h
include/simgrid/plugins/live_migration.h
include/simgrid/plugins/load.h
- include/simgrid/plugins/load_balancer.h
include/simgrid/smpi/replay.hpp
include/simgrid/instr.h
include/simgrid/mailbox.h