From: Christian Heinrich Date: Tue, 17 Jul 2018 11:39:27 +0000 (+0200) Subject: [SMPI/LB] Implement GreedyLB with a Fibonacci Heap X-Git-Tag: v3_21~328 X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/commitdiff_plain/9b7e1582ff236038ce10b41a545060d11fa66655 [SMPI/LB] Implement GreedyLB with a Fibonacci Heap --- diff --git a/src/smpi/plugins/load_balancer/LoadBalancer.cpp b/src/smpi/plugins/load_balancer/LoadBalancer.cpp index 18c28de9c7..055d7e2cd6 100644 --- a/src/smpi/plugins/load_balancer/LoadBalancer.cpp +++ b/src/smpi/plugins/load_balancer/LoadBalancer.cpp @@ -4,24 +4,43 @@ * under the terms of the license (GNU LGPL) which comes with this package. */ #include +#include +#include #include + +#include #include -#include #include -XBT_LOG_NEW_DEFAULT_SUBCATEGORY(plugin_load_balancer_impl, smpi, "Logging specific to the SMPI load balancing plugin"); +XBT_LOG_EXTERNAL_DEFAULT_CATEGORY(plugin_load_balancer); namespace simgrid { namespace plugin { namespace loadbalancer { -static std::map additional_load; +struct XBT_PRIVATE compare_hosts { + bool operator()(const simgrid::s4u::Host* a, const simgrid::s4u::Host* b) const; +}; + +typedef boost::heap::fibonacci_heap>::handle_type heap_handle; -static bool compare_by_avg_load(simgrid::s4u::Host* a, simgrid::s4u::Host* b) +/** + * Structure that imitates a std::pair, but it allows us + * to use meaningful names instead of .first and .second + */ +struct XBT_PRIVATE pair_handle_load { - return sg_host_get_avg_load(a) + additional_load[a] > sg_host_get_avg_load(b) + additional_load[b]; + heap_handle update_handle; + double load; +}; + +static std::map additional_load; + +bool compare_hosts::operator()(const simgrid::s4u::Host* a, const simgrid::s4u::Host* b) const { + return /*sg_host_get_avg_load(a) +*/ additional_load[a].load > /*sg_host_get_avg_load(b) +*/ additional_load[b].load; } + LoadBalancer::LoadBalancer() { } @@ -37,58 +56,89 @@ void LoadBalancer::run() return not host->is_off(); }); xbt_assert(available_hosts.size() > 0, "No hosts available; are they all switched off?"); - for (auto& host : available_hosts) { - additional_load[host] = 0; - } // TODO: Account for daemon background load (-> use especially the availability file) std::vector all_actors = engine->get_filtered_actors([](simgrid::s4u::ActorPtr actor) { return not actor->is_daemon(); }); + for (auto& actor : all_actors) { - new_mapping[actor] = actor->get_host(); + new_mapping.assign(actor, actor->get_host()); } // Sort the actors, from highest to lowest load; we then just iterate over these actors std::sort(all_actors.begin(), all_actors.end(), [this](simgrid::s4u::ActorPtr a, simgrid::s4u::ActorPtr b) { - return actor_computation[a->get_pid()] < actor_computation[b->get_pid()]; + return actor_computation[a->get_pid()] > actor_computation[b->get_pid()]; }); - for (auto& actor : all_actors) { - XBT_INFO("Order: %li", actor->get_pid()); + + // Sort the hosts. Use a heap datastructure, because we have to reorder + // after a host got another actor assigned (or moved from). + // We can't use std::priorityQueue here because we modify *two* elements: The top element, which + // we can access and which has the lowest load, gets a new actor assigned. + // However, the host loosing that actor must be updated as well. + // std::priorityQueue is immutable and hence doesn't work for us. + // + // This heap contains the least loaded host at the top + boost::heap::fibonacci_heap> usable_hosts; + for (auto& host : available_hosts) { + std::vector actors = host->get_all_actors(); + heap_handle update_handle = usable_hosts.push(host); // Required to update elements in the heap + additional_load[host] = {update_handle, 0}; // Save the handle for later + for (auto& actor : actors) { + additional_load[host].load += actor_computation[actor->get_pid()]; + XBT_DEBUG("Actor %li -> %f", actor->get_pid(), actor_computation[actor->get_pid()]); + } + XBT_DEBUG("Host %s initialized to %f", host->get_cname(), additional_load[host].load); } - // Sort the hosts. Use a heap datastructure, because we re-insert a host - // that got an actor assigned in a possibly different position - // This is equivalent to a std::greater implementation that uses the load to compare the two objects. - // --> The least loaded host will be in the first position - std::priority_queue, - /*comparison function*/ std::function> - usable_hosts(compare_by_avg_load, std::move(available_hosts)); + // Implementation of the Greedy algorithm for (auto& actor : all_actors) { simgrid::s4u::Host* target_host = usable_hosts.top(); // This is the host with the lowest load - if (target_host != actor->get_host() && actor->get_host()->get_actor_count() > 1) { + simgrid::s4u::Host* cur_mapped_host = new_mapping.get_host(actor); + if (target_host != cur_mapped_host + && additional_load[target_host].load + actor_computation[actor->get_pid()] < additional_load[cur_mapped_host].load + && new_mapping.count_actors(cur_mapped_host) > 1) { usable_hosts.pop(); + XBT_DEBUG("Assigning %li from %s to %s -- actor_load: %f -- host_load: %f", actor->get_pid(), actor->get_host()->get_cname(), target_host->get_cname(), actor_computation[actor->get_pid()], additional_load[target_host].load); + additional_load[cur_mapped_host].load = std::max(0.0, additional_load[cur_mapped_host].load - actor_computation[actor->get_pid()]); // No negative loads, please! + usable_hosts.update(additional_load[cur_mapped_host].update_handle, cur_mapped_host); + additional_load[target_host].load += actor_computation[actor->get_pid()]; + + new_mapping.assign(actor, target_host); - assign(actor, target_host); // This updates also the load - usable_hosts.push(target_host); + XBT_DEBUG("Assigning actor %li to host %s", actor->get_pid(), target_host->get_cname()); + + XBT_DEBUG("host_load: %f after the assignment", additional_load[target_host].load); + additional_load[target_host].update_handle = usable_hosts.push(target_host); // Save update handle for later } } -} -void LoadBalancer::assign(simgrid::s4u::ActorPtr actor, simgrid::s4u::Host* host) -{ - // If an actor gets re-assigned twice, we don't want to subtract - // the load from the same host twice so we have to use the old value of new_mapping here - additional_load[new_mapping[actor]] -= 1; // actor->load(); - additional_load[host] += 1; // actor->load(); - new_mapping[actor] = host; - - XBT_INFO("Assigning actor %li to host %s", actor->get_pid(), host->get_cname()); + while (!usable_hosts.empty()) { + simgrid::s4u::Host* host = usable_hosts.top(); + usable_hosts.pop(); + + sg_host_load_reset(host); // Reset host load for next iterations + + if (XBT_LOG_ISENABLED(plugin_load_balancer, e_xbt_log_priority_t::xbt_log_priority_debug)) { + /* Debug messages that allow us to verify the load for each host */ + XBT_DEBUG("Host: %s, load total: %f", host->get_cname(), additional_load[host].load); + double load_verif = 0.0; + new_mapping.for_each_actor(host, + [this, &load_verif](simgrid::s4u::ActorPtr actor) { + load_verif += actor_computation[actor->get_pid()]; + XBT_DEBUG(" %li (load: %f)", actor->get_pid(), actor_computation[actor->get_pid()]); + }); + XBT_DEBUG("Host load verification: %f", load_verif); + } + } + for (auto& elem : actor_computation) { // Reset actor load + elem.second = 0; + } } simgrid::s4u::Host* LoadBalancer::get_mapping() { - return new_mapping[simgrid::s4u::Actor::self()]; + return new_mapping.get_host(simgrid::s4u::Actor::self()); } void LoadBalancer::record_actor_computation(simgrid::s4u::ActorPtr actor, double load)