X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/ca2e418072d73461d9c4f1e39e77c9f7380eb3fd..635743ad3b947c1b55642cb708fbf1626fe0b36f:/src/surf/plugins/host_energy.cpp diff --git a/src/surf/plugins/host_energy.cpp b/src/surf/plugins/host_energy.cpp index 08ab4df26f..2b88fb032c 100644 --- a/src/surf/plugins/host_energy.cpp +++ b/src/surf/plugins/host_energy.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2010, 2012-2016. The SimGrid Team. All rights reserved. */ +/* Copyright (c) 2010-2017. The SimGrid Team. All rights reserved. */ /* This program is free software; you can redistribute it and/or modify it * under the terms of the license (GNU LGPL) which comes with this package. */ @@ -7,50 +7,111 @@ #include "simgrid/simix.hpp" #include "src/plugins/vm/VirtualMachineImpl.hpp" #include "src/surf/cpu_interface.hpp" -#include + +#include "simgrid/s4u/Engine.hpp" + +#include +#include +#include #include +#include -/** @addtogroup SURF_plugin_energy +/** @addtogroup plugin_energy This is the energy plugin, enabling to account not only for computation time, but also for the dissipated energy in the simulated platform. +To activate this plugin, first call sg_host_energy_plugin_init() before your #MSG_init(), +and then use MSG_host_get_consumed_energy() to retrieve the consumption of a given host. + +When the host is on, this energy consumption naturally depends on both the +current CPU load and the host energy profile. According to our measurements, +the consumption is somehow linear in the amount of cores at full speed, +with an abnormality when all the cores are idle. The full details are in +our scientific paper on that topic. -The energy consumption of a CPU depends directly of its current load. Specify that consumption in your platform file as -follows: +As a result, our energy model takes 4 parameters: -\verbatim - + - \b Idle: instantaneous consumption (in Watt) when your host is up and running, but without anything to do. + - \b OneCore: instantaneous consumption (in Watt) when only one core is active, at 100%. + - \b AllCores: instantaneous consumption (in Watt) when all cores of the host are at 100%. + - \b Off: instantaneous consumption (in Watt) when the host is turned off. + +Here is an example of XML declaration: + +\code{.xml} + -\endverbatim +\endcode + +This example gives the following parameters: \b Off is 10 Watts; \b Idle is 100 Watts; \b OneCore is 120 Watts and \b +AllCores is 200 Watts. +This is enough to compute the consumption as a function of the amount of loaded cores: + + + + + + + + +
#Cores loadedConsumptionExplanation
0 100 WattsIdle value
1 120 WattsOneCore value
2 147 Wattslinear extrapolation between OneCore and AllCores
3 173 Wattslinear extrapolation between OneCore and AllCores
4 200 WattsAllCores value
+ +### What if a given core is only at load 50%? + +This is impossible in SimGrid because we recompute everything each time +that the CPU starts or stops doing something. So if a core is at load 50% over +a period, it means that it is at load 100% half of the time and at load 0% the +rest of the time, and our model holds. + +### What if the host has only one core? + +In this case, the parameters \b OneCore and \b AllCores are obviously the same. +Actually, SimGrid expect an energetic profile formated as 'Idle:Running' for mono-cores hosts. +If you insist on passing 3 parameters in this case, then you must have the same value for \b OneCore and \b AllCores. + +\code{.xml} + + + + +\endcode -The first property means that when your host is up and running, but without anything to do, it will dissipate 100 Watts. -If only one care is active, it will dissipate 120 Watts. If it's fully loaded, it will dissipate 200 Watts. If its load is at 50%, then it will dissipate 153.33 Watts. -The second property means that when your host is turned off, it will dissipate only 10 Watts (please note that these -values are arbitrary). +### How does DVFS interact with the host energy model? -If your CPU is using pstates, then you can provide one consumption interval per pstate. +If your host has several DVFS levels (several pstates), then you should +give the energetic profile of each pstate level: -\verbatim - +\code{.xml} + -\endverbatim +\endcode -That host has 3 levels of performance with the following performance: 100 Mflop/s, 50 Mflop/s or 20 Mflop/s. -It starts at pstate 0 (ie, at 100 Mflop/s). In this case, you have to specify one interval per pstate in the -watt_per_state property. -In this example, the idle consumption is 95 Watts, 93 Watts and 90 Watts in each pstate while the CPU burn consumption -are at 200 Watts, 170 Watts, and 150 Watts respectively. If only one core is active, this machine consumes 120 / 115 / 110 watts. +This encodes the following values + + + + + +
pstatePerformanceIdleOneCoreAllCores
0100 Mflop/s95 Watts120 Watts200 Watts
150 Mflop/s93 Watts115 Watts170 Watts
220 Mflop/s90 Watts110 Watts150 Watts
To change the pstate of a given CPU, use the following functions: #MSG_host_get_nb_pstates(), simgrid#s4u#Host#setPstate(), #MSG_host_get_power_peak_at(). -To simulate the energy-related elements, first call the simgrid#energy#sg_energy_plugin_init() before your #MSG_init(), -and then use the following function to retrieve the consumption of a given host: MSG_host_get_consumed_energy(). +### How accurate are these models? + +This model cannot be more accurate than your instantiation: +with the default values, your result will not be accurate at all. You can still get +accurate energy prediction, provided that you carefully instantiate the model. +The first step is to ensure that your timing prediction match perfectly. But this +is only the first step of the path, and you really want to read +this paper to see all what you need +to do before you can get accurate energy predictions. + */ XBT_LOG_NEW_DEFAULT_SUBCATEGORY(surf_energy, surf, "Logging specific to the SURF energy plugin"); @@ -85,6 +146,13 @@ private: simgrid::s4u::Host* host = nullptr; std::vector power_range_watts_list; /*< List of (min_power,max_power) pairs corresponding to each cpu pstate */ + + /* We need to keep track of what pstate has been used, as we will sometimes + * be notified only *after* a pstate has been used (but we need to update the energy consumption + * with the old pstate!) + */ + int pstate = 0; + public: double watts_off = 0.0; /*< Consumption when the machine is turned off (shutdown) */ double total_energy = 0.0; /*< Total energy consumed by the host */ @@ -98,44 +166,59 @@ void HostEnergy::update() { double start_time = this->last_updated; double finish_time = surf_get_clock(); - double cpu_load; - if (host->pimpl_cpu->getPstateSpeedCurrent() <= 0) - // Some users declare a pstate of speed 0 flops (e.g., to model boot time). - // We consider that the machine is then fully loaded. That's arbitrary but it avoids a NaN - cpu_load = 1; - else - cpu_load = lmm_constraint_get_usage(host->pimpl_cpu->constraint()) / host->pimpl_cpu->getPstateSpeedCurrent(); - - /** Divide by the number of cores here **/ - cpu_load /= host->pimpl_cpu->coreCount(); - - if (cpu_load > 1) // A machine with a load > 1 consumes as much as a fully loaded machine, not more - cpu_load = 1; - - /* The problem with this model is that the load is always 0 or 1, never something less. - * Another possibility could be to model the total energy as - * - * X/(X+Y)*W_idle + Y/(X+Y)*W_burn - * - * where X is the amount of idling cores, and Y the amount of computing cores. - */ + double current_speed = host->speed(); + + if (start_time < finish_time) { + double cpu_load; + // We may have start == finish if the past consumption was updated since the simcall was started + // for example if 2 actors requested to update the same host's consumption in a given scheduling round. + // + // Even in this case, we need to save the pstate for the next call (after this big if), + // which may have changed since that recent update. + + if (current_speed <= 0) + // Some users declare a pstate of speed 0 flops (e.g., to model boot time). + // We consider that the machine is then fully loaded. That's arbitrary but it avoids a NaN + cpu_load = 1; + else + cpu_load = lmm_constraint_get_usage(host->pimpl_cpu->constraint()) / current_speed; + + /** Divide by the number of cores here **/ + cpu_load /= host->pimpl_cpu->coreCount(); + + if (cpu_load > 1) // A machine with a load > 1 consumes as much as a fully loaded machine, not more + cpu_load = 1; + + /* The problem with this model is that the load is always 0 or 1, never something less. + * Another possibility could be to model the total energy as + * + * X/(X+Y)*W_idle + Y/(X+Y)*W_burn + * + * where X is the amount of idling cores, and Y the amount of computing cores. + */ + + double previous_energy = this->total_energy; + + double instantaneous_consumption; + if (this->pstate == -1) // The host was off at the beginning of this time interval + instantaneous_consumption = this->watts_off; + else + instantaneous_consumption = this->getCurrentWattsValue(cpu_load); - double previous_energy = this->total_energy; + double energy_this_step = instantaneous_consumption * (finish_time - start_time); - double instantaneous_consumption; - if (host->isOff()) - instantaneous_consumption = this->watts_off; - else - instantaneous_consumption = this->getCurrentWattsValue(cpu_load); + // TODO Trace: Trace energy_this_step from start_time to finish_time in host->name() - double energy_this_step = instantaneous_consumption * (finish_time - start_time); + this->total_energy = previous_energy + energy_this_step; + this->last_updated = finish_time; - this->total_energy = previous_energy + energy_this_step; - this->last_updated = finish_time; + XBT_DEBUG("[update_energy of %s] period=[%.2f-%.2f]; current power peak=%.0E flop/s; consumption change: %.2f J -> " + "%.2f J", + host->cname(), start_time, finish_time, host->pimpl_cpu->speed_.peak, previous_energy, energy_this_step); + } - XBT_DEBUG( - "[update_energy of %s] period=[%.2f-%.2f]; current power peak=%.0E flop/s; consumption change: %.2f J -> %.2f J", - host->cname(), start_time, finish_time, host->pimpl_cpu->speed_.peak, previous_energy, energy_this_step); + /* Save data for the upcoming time interval: whether it's on/off and the pstate if it's on */ + this->pstate = host->isOn() ? host->pstate() : -1; } HostEnergy::HostEnergy(simgrid::s4u::Host* ptr) : host(ptr), last_updated(surf_get_clock()) @@ -155,24 +238,24 @@ HostEnergy::~HostEnergy() = default; double HostEnergy::getWattMinAt(int pstate) { - xbt_assert(!power_range_watts_list.empty(), "No power range properties specified for host %s", host->cname()); + xbt_assert(not power_range_watts_list.empty(), "No power range properties specified for host %s", host->cname()); return power_range_watts_list[pstate].min; } double HostEnergy::getWattMaxAt(int pstate) { - xbt_assert(!power_range_watts_list.empty(), "No power range properties specified for host %s", host->cname()); + xbt_assert(not power_range_watts_list.empty(), "No power range properties specified for host %s", host->cname()); return power_range_watts_list[pstate].max; } /** @brief Computes the power consumed by the host according to the current pstate and processor load */ double HostEnergy::getCurrentWattsValue(double cpu_load) { - xbt_assert(!power_range_watts_list.empty(), "No power range properties specified for host %s", host->cname()); + xbt_assert(not power_range_watts_list.empty(), "No power range properties specified for host %s", host->cname()); /* min_power corresponds to the power consumed when only one core is active */ /* max_power is the power consumed at 100% cpu load */ - auto range = power_range_watts_list.at(host->pstate()); + auto range = power_range_watts_list.at(this->pstate); double current_power = 0; double min_power = 0; double max_power = 0; @@ -225,32 +308,52 @@ void HostEnergy::initWattsRangeList() if (all_power_values_str == nullptr) return; - xbt_dynar_t all_power_values = xbt_str_split(all_power_values_str, ","); - int pstate_nb = xbt_dynar_length(all_power_values); + std::vector all_power_values; + boost::split(all_power_values, all_power_values_str, boost::is_any_of(",")); + XBT_DEBUG("%s: profile: %s, cores: %d", host->cname(), all_power_values_str, host->coreCount()); - for (int i = 0; i < pstate_nb; i++) { + int i = 0; + for (auto current_power_values_str : all_power_values) { /* retrieve the power values associated with the current pstate */ - xbt_dynar_t current_power_values = xbt_str_split(xbt_dynar_get_as(all_power_values, i, char*), ":"); - xbt_assert(xbt_dynar_length(current_power_values) == 3, - "Power properties incorrectly defined - could not retrieve idle, min and max power values for host %s", - host->cname()); + std::vector current_power_values; + boost::split(current_power_values, current_power_values_str, boost::is_any_of(":")); + if (host->coreCount() == 1) { + xbt_assert(current_power_values.size() == 2 || current_power_values.size() == 3, + "Power properties incorrectly defined for host %s." + "It should be 'Idle:FullSpeed' power values because you have one core only.", + host->cname()); + if (current_power_values.size() == 2) { + // In this case, 1core == AllCores + current_power_values.push_back(current_power_values.at(1)); + } else { // size == 3 + xbt_assert((current_power_values.at(1)) == (current_power_values.at(2)), + "Power properties incorrectly defined for host %s.\n" + "The energy profile of mono-cores should be formated as 'Idle:FullSpeed' only.\n" + "If you go for a 'Idle:OneCore:AllCores' power profile on mono-cores, then OneCore and AllCores " + "must be equal.", + host->cname()); + } + } else { + xbt_assert(current_power_values.size() == 3, + "Power properties incorrectly defined for host %s." + "It should be 'Idle:OneCore:AllCores' power values because you have more than one core.", + host->cname()); + } /* min_power corresponds to the idle power (cpu load = 0) */ /* max_power is the power consumed at 100% cpu load */ char* msg_idle = bprintf("Invalid idle value for pstate %d on host %s: %%s", i, host->cname()); - char* msg_min = bprintf("Invalid min value for pstate %d on host %s: %%s", i, host->cname()); - char* msg_max = bprintf("Invalid max value for pstate %d on host %s: %%s", i, host->cname()); - PowerRange range(xbt_str_parse_double(xbt_dynar_get_as(current_power_values, 0, char*), msg_idle), - xbt_str_parse_double(xbt_dynar_get_as(current_power_values, 1, char*), msg_min), - xbt_str_parse_double(xbt_dynar_get_as(current_power_values, 2, char*), msg_max)); + char* msg_min = bprintf("Invalid OneCore value for pstate %d on host %s: %%s", i, host->cname()); + char* msg_max = bprintf("Invalid AllCores value for pstate %d on host %s: %%s", i, host->cname()); + PowerRange range(xbt_str_parse_double((current_power_values.at(0)).c_str(), msg_idle), + xbt_str_parse_double((current_power_values.at(1)).c_str(), msg_min), + xbt_str_parse_double((current_power_values.at(2)).c_str(), msg_max)); power_range_watts_list.push_back(range); xbt_free(msg_idle); xbt_free(msg_min); xbt_free(msg_max); - - xbt_dynar_free(¤t_power_values); + i++; } - xbt_dynar_free(&all_power_values); } } } @@ -262,6 +365,9 @@ static void onCreation(simgrid::s4u::Host& host) { if (dynamic_cast(&host)) // Ignore virtual machines return; + + //TODO Trace: set to zero the energy variable associated to host->name() + host.extension_set(new HostEnergy(&host)); } @@ -269,24 +375,24 @@ static void onActionStateChange(simgrid::surf::CpuAction* action, simgrid::surf: { for (simgrid::surf::Cpu* cpu : action->cpus()) { simgrid::s4u::Host* host = cpu->getHost(); - if (host == nullptr) - continue; + if (host != nullptr) { - // If it's a VM, take the corresponding PM - simgrid::s4u::VirtualMachine* vm = dynamic_cast(host); - if (vm) // If it's a VM, take the corresponding PM - host = vm->pimpl_vm_->getPm(); + // If it's a VM, take the corresponding PM + simgrid::s4u::VirtualMachine* vm = dynamic_cast(host); + if (vm) // If it's a VM, take the corresponding PM + host = vm->pimpl_vm_->getPm(); - // Get the host_energy extension for the relevant host - HostEnergy* host_energy = host->extension(); + // Get the host_energy extension for the relevant host + HostEnergy* host_energy = host->extension(); - if (host_energy->last_updated < surf_get_clock()) - host_energy->update(); + if (host_energy->last_updated < surf_get_clock()) + host_energy->update(); + } } } -/* This callback is fired either when the host change its state (on/off) or its speed - * (because the user changed the pstate, or because of external trace events) */ +/* This callback is fired either when the host changes its state (on/off) ("onStateChange") or its speed + * (because the user changed the pstate, or because of external trace events) ("onSpeedChange") */ static void onHostChange(simgrid::s4u::Host& host) { if (dynamic_cast(&host)) // Ignore virtual machines @@ -294,8 +400,7 @@ static void onHostChange(simgrid::s4u::Host& host) HostEnergy* host_energy = host.extension(); - if (host_energy->last_updated < surf_get_clock()) - host_energy->update(); + host_energy->update(); } static void onHostDestruction(simgrid::s4u::Host& host) @@ -305,31 +410,34 @@ static void onHostDestruction(simgrid::s4u::Host& host) HostEnergy* host_energy = host.extension(); host_energy->update(); - XBT_INFO("Total energy of host %s: %f Joules", host.cname(), host_energy->getConsumedEnergy()); + XBT_INFO("Energy consumption of host %s: %f Joules", host.cname(), host_energy->getConsumedEnergy()); } static void onSimulationEnd() { sg_host_t* host_list = sg_host_list(); int host_count = sg_host_count(); - double total_energy = 0.0; // Total energy consumption (whole plattform) + double total_energy = 0.0; // Total energy consumption (whole platform) double used_hosts_energy = 0.0; // Energy consumed by hosts that computed something for (int i = 0; i < host_count; i++) { - bool host_was_used = (host_list[i]->extension()->last_updated != 0); - double energy = 0.0; - energy = host_list[i]->extension()->getConsumedEnergy(); - total_energy += energy; - if (host_was_used) - used_hosts_energy += energy; + if (dynamic_cast(host_list[i]) == nullptr) { // Ignore virtual machines + + bool host_was_used = (host_list[i]->extension()->last_updated != 0); + double energy = host_list[i]->extension()->getConsumedEnergy(); + total_energy += energy; + if (host_was_used) + used_hosts_energy += energy; + } } - XBT_INFO("Summed energy consumption: %f Joules; used hosts consumed: %f Joules; unused (idle) hosts consumed: %f", + XBT_INFO("Total energy consumption: %f Joules (used hosts: %f Joules; unused/idle hosts: %f)", total_energy, used_hosts_energy, total_energy - used_hosts_energy); + xbt_free(host_list); } /* **************************** Public interface *************************** */ SG_BEGIN_DECL() -/** \ingroup SURF_plugin_energy +/** \ingroup plugin_energy * \brief Enable host energy plugin * \details Enable energy plugin to get joules consumption of each cpu. Call this function before #MSG_init(). */ @@ -348,9 +456,29 @@ void sg_host_energy_plugin_init() simgrid::surf::CpuAction::onStateChange.connect(&onActionStateChange); } -/** @brief Returns the total energy consumed by the host so far (in Joules) +/** @ingroup plugin_energy + * @brief updates the consumption of all hosts + * + * After this call, sg_host_get_consumed_energy() will not interrupt your process + * (until after the next clock update). + */ +void sg_host_energy_update_all() +{ + simgrid::simix::kernelImmediate([]() { + std::vector list; + simgrid::s4u::Engine::instance()->hostList(&list); + for (auto host : list) + if (dynamic_cast(host) == nullptr) // Ignore virtual machines + host->extension()->update(); + }); +} + +/** @ingroup plugin_energy + * @brief Returns the total energy consumed by the host so far (in Joules) * - * See also @ref SURF_plugin_energy. + * Please note that since the consumption is lazily updated, it may require a simcall to update it. + * The result is that the actor requesting this value will be interrupted, + * the value will be updated in kernel mode before returning the control to the requesting actor. */ double sg_host_get_consumed_energy(sg_host_t host) { @@ -359,14 +487,18 @@ double sg_host_get_consumed_energy(sg_host_t host) return host->extension()->getConsumedEnergy(); } -/** @brief Get the amount of watt dissipated at the given pstate when the host is idling */ +/** @ingroup plugin_energy + * @brief Get the amount of watt dissipated at the given pstate when the host is idling + */ double sg_host_get_wattmin_at(sg_host_t host, int pstate) { xbt_assert(HostEnergy::EXTENSION_ID.valid(), "The Energy plugin is not active. Please call sg_energy_plugin_init() during initialization."); return host->extension()->getWattMinAt(pstate); } -/** @brief Returns the amount of watt dissipated at the given pstate when the host burns CPU at 100% */ +/** @ingroup plugin_energy + * @brief Returns the amount of watt dissipated at the given pstate when the host burns CPU at 100% + */ double sg_host_get_wattmax_at(sg_host_t host, int pstate) { xbt_assert(HostEnergy::EXTENSION_ID.valid(), @@ -374,4 +506,15 @@ double sg_host_get_wattmax_at(sg_host_t host, int pstate) return host->extension()->getWattMaxAt(pstate); } +/** @ingroup plugin_energy + * @brief Returns the current consumption of the host + */ +double sg_host_get_current_consumption(sg_host_t host) +{ + xbt_assert(HostEnergy::EXTENSION_ID.valid(), + "The Energy plugin is not active. Please call sg_energy_plugin_init() during initialization."); + double cpu_load = lmm_constraint_get_usage(host->pimpl_cpu->constraint()) / host->speed(); + return host->extension()->getCurrentWattsValue(cpu_load); +} + SG_END_DECL()