X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/ff3923e71ef52ba57fcc6fd8d26ecfa4c22e3638..167d54120fca91495ebe52ca6dec8a830a7ab023:/src/smpi/internals/smpi_utils.cpp diff --git a/src/smpi/internals/smpi_utils.cpp b/src/smpi/internals/smpi_utils.cpp index baf73731f5..6660c506fb 100644 --- a/src/smpi/internals/smpi_utils.cpp +++ b/src/smpi/internals/smpi_utils.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2016-2021. The SimGrid Team. +/* Copyright (c) 2016-2022. The SimGrid Team. * All rights reserved. */ /* This program is free software; you can redistribute it and/or modify it @@ -6,25 +6,23 @@ #include "smpi_utils.hpp" -#include "src/surf/xml/platf_private.hpp" +#include "private.hpp" +#include "smpi_config.hpp" +#include "src/surf/xml/platf.hpp" +#include "xbt/file.hpp" #include "xbt/log.h" +#include "xbt/ex.h" #include "xbt/parse_units.hpp" #include "xbt/sysdep.h" -#include "xbt/file.hpp" -#include -#include "smpi_config.hpp" -#include "src/simix/smx_private.hpp" #include -#include "private.hpp" +#include XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_utils, smpi, "Logging specific to SMPI (utils)"); extern std::string surf_parsed_filename; extern int surf_parse_lineno; -namespace simgrid { -namespace smpi { -namespace utils { +namespace simgrid::smpi::utils { double total_benched_time=0; unsigned long total_malloc_size=0; @@ -49,6 +47,50 @@ current_buffer_metadata_t current_buffer2; std::unordered_map allocs; +std::unordered_map> collective_calls; + +void FactorSet::parse(const std::string& values) +{ + factors_ = parse_factor(values); + initialized_ = true; +} + +FactorSet::FactorSet(const std::string& name, double default_value, + std::function const& lambda) + : name_(name), default_value_(default_value), lambda_(lambda) +{ +} + +double FactorSet::operator()() +{ + return lambda_(factors_.front(), 0); +} + +double FactorSet::operator()(double size) +{ + if (factors_.empty()) + return default_value_; + + for (long unsigned i = 0; i < factors_.size(); i++) { + auto const& fact = factors_[i]; + + if (size <= fact.factor) { // Too large already, use the previous value + + if (i == 0) { // Before the first boundary: use the default value + XBT_DEBUG("%s: %f <= %zu return default %f", name_.c_str(), size, fact.factor, default_value_); + return default_value_; + } + double val = lambda_(factors_[i - 1], size); + XBT_DEBUG("%s: %f <= %zu return %f", name_.c_str(), size, fact.factor, val); + return val; + } + } + double val = lambda_(factors_.back(), size); + + XBT_DEBUG("%s: %f > %zu return %f", name_.c_str(), size, factors_.back().factor, val); + return val; +} + std::vector parse_factor(const std::string& smpi_coef_string) { std::vector smpi_factor; @@ -66,14 +108,14 @@ std::vector parse_factor(const std::string& smpi_coef_string) * E --> F * G --> H */ - for (Tokenizer::iterator token_iter = tokens.begin(); token_iter != tokens.end(); ++token_iter) { - XBT_DEBUG("token : %s", token_iter->c_str()); + for (auto token_iter = tokens.begin(); token_iter != tokens.end(); ++token_iter) { + XBT_DEBUG("token: %s", token_iter->c_str()); Tokenizer factor_values(*token_iter, factor_separator); s_smpi_factor_t fact; xbt_assert(factor_values.begin() != factor_values.end(), "Malformed radical for smpi factor: '%s'", smpi_coef_string.c_str()); unsigned int iteration = 0; - for (Tokenizer::iterator factor_iter = factor_values.begin(); factor_iter != factor_values.end(); ++factor_iter) { + for (auto factor_iter = factor_values.begin(); factor_iter != factor_values.end(); ++factor_iter) { iteration++; if (factor_iter == factor_values.begin()) { /* first element */ @@ -85,8 +127,7 @@ std::vector parse_factor(const std::string& smpi_coef_string) } } else { try { - fact.values.push_back( - xbt_parse_get_time(surf_parsed_filename, surf_parse_lineno, (*factor_iter).c_str(), "smpi factor", "")); + fact.values.push_back(xbt_parse_get_time(surf_parsed_filename, surf_parse_lineno, *factor_iter, "")); } catch (const std::invalid_argument&) { throw std::invalid_argument(std::string("Invalid factor value ") + std::to_string(iteration) + " in chunk " + std::to_string(smpi_factor.size() + 1) + ": " + *factor_iter); @@ -95,13 +136,13 @@ std::vector parse_factor(const std::string& smpi_coef_string) } smpi_factor.push_back(fact); - XBT_DEBUG("smpi_factor:\t%zu : %zu values, first: %f", fact.factor, smpi_factor.size(), fact.values[0]); + XBT_DEBUG("smpi_factor:\t%zu: %zu values, first: %f", fact.factor, smpi_factor.size(), fact.values[0]); } std::sort(smpi_factor.begin(), smpi_factor.end(), [](const s_smpi_factor_t &pa, const s_smpi_factor_t &pb) { return (pa.factor < pb.factor); }); for (auto const& fact : smpi_factor) { - XBT_DEBUG("smpi_factor:\t%zu : %zu values, first: %f", fact.factor, smpi_factor.size() ,fact.values[0]); + XBT_DEBUG("smpi_factor:\t%zu: %zu values, first: %f", fact.factor, smpi_factor.size(), fact.values[0]); } smpi_factor.shrink_to_fit(); @@ -112,7 +153,7 @@ void add_benched_time(double time){ total_benched_time += time; } -void account_malloc_size(size_t size, const std::string& file, int line, void* ptr) +void account_malloc_size(size_t size, std::string_view file, int line, const void* ptr) { if (smpi_cfg_display_alloc()) { alloc_metadata_t metadata; @@ -120,7 +161,7 @@ void account_malloc_size(size_t size, const std::string& file, int line, void* p metadata.line = line; metadata.numcall = 1; metadata.file = file; - allocs.insert(std::make_pair(ptr, metadata)); + allocs.try_emplace(ptr, metadata); total_malloc_size += size; if(size > max_malloc.size){ @@ -144,115 +185,119 @@ void account_shared_size(size_t size){ void print_time_analysis(double global_time){ if (simgrid::config::get_value("smpi/display-timing")) { XBT_INFO("Simulated time: %g seconds. \n\n" - "The simulation took %g seconds (after parsing and platform setup)\n" - "%g seconds were actual computation of the application", - SIMIX_get_clock(), global_time , total_benched_time); + "The simulation took %g seconds (after parsing and platform setup)\n" + "%g seconds were actual computation of the application", + simgrid_get_clock(), global_time, total_benched_time); if (total_benched_time/global_time>=0.75) XBT_INFO("More than 75%% of the time was spent inside the application code.\n" "You may want to use sampling functions or trace replay to reduce this."); } } -static void print_leaked_handles(){ +static void print_leaked_handles() +{ // Put the leaked non-default handles in a vector to sort them by id std::vector> handles; if (simgrid::smpi::F2C::lookup() != nullptr) std::copy_if(simgrid::smpi::F2C::lookup()->begin(), simgrid::smpi::F2C::lookup()->end(), std::back_inserter(handles), [](auto const& entry) { return entry.first >= simgrid::smpi::F2C::get_num_default_handles(); }); - if (not handles.empty()) { - auto max = static_cast(simgrid::config::get_value("smpi/list-leaks")); - std::string message = "Probable memory leaks in your code: SMPI detected %zu unfreed MPI handles :"; - if(max==0) - message +="\nHINT : Display types and addresses (n max) with --cfg=smpi/list-leaks:n.\n"\ - "Running smpirun with -wrapper \"valgrind --leak-check=full\" can provide more information"; - XBT_INFO(message.c_str(), handles.size()); - if (max > 0) { // we cannot trust F2C::lookup()->size() > F2C::get_num_default_handles() because some default - // handles are already freed at this point - bool display_advice = false; - std::map> count; - for (const auto& elem : handles) { - std::string key = elem.second->name(); - if ((not xbt_log_no_loc) && (not elem.second->call_location().empty())) - key+=" at "+ elem.second->call_location(); - else - display_advice=true; - auto result = count.insert(std::pair(key, 1)); - if (result.second == false) - result.first->second++; - } - if (display_advice) - XBT_WARN("To get more information (location of allocations), compile your code with -trace-call-location flag of smpicc/f90"); - unsigned int i = 0; - for (const auto& p : count) { - if(p.second == 1) - XBT_INFO("leaked handle of type %s", p.first.c_str()); - else - XBT_INFO("%d leaked handles of type %s", p.second, p.first.c_str()); - i++; - if(i == max) - break; - } - if (max < count.size()) - XBT_INFO("(%lu more handle leaks hidden as you wanted to see only %lu of them)", count.size()-max, max); - } + if (handles.empty()) + return; + + auto max = static_cast(simgrid::config::get_value("smpi/list-leaks")); + std::string message = "Probable memory leaks in your code: SMPI detected %zu unfreed MPI handles:"; + if (max == 0) + message += "\nHINT: Display types and addresses (n max) with --cfg=smpi/list-leaks:n.\n" + "Running smpirun with -wrapper \"valgrind --leak-check=full\" can provide more information"; + XBT_INFO(message.c_str(), handles.size()); + if (max == 0) + return; + + // we cannot trust F2C::lookup()->size() > F2C::get_num_default_handles() because some default handles are already + // freed at this point + bool display_advice = false; + std::map> count; + for (const auto& [_, elem] : handles) { + std::string key = elem->name(); + if ((not xbt_log_no_loc) && (not elem->call_location().empty())) + key += " at " + elem->call_location(); + else + display_advice = true; + auto& result = count.try_emplace(key, 0).first->second; + result++; + } + if (display_advice) + XBT_WARN("To get more information (location of allocations), compile your code with -trace-call-location flag of " + "smpicc/f90"); + unsigned int i = 0; + for (const auto& [key, value] : count) { + if (value == 1) + XBT_INFO("leaked handle of type %s", key.c_str()); + else + XBT_INFO("%d leaked handles of type %s", value, key.c_str()); + i++; + if (i == max) + break; } + if (max < count.size()) + XBT_INFO("(%lu more handle leaks hidden as you wanted to see only %lu of them)", count.size() - max, max); } -static void print_leaked_buffers(){ - if (not allocs.empty()) { - auto max = static_cast(simgrid::config::get_value("smpi/list-leaks")); - std::string message = "Probable memory leaks in your code: SMPI detected %zu unfreed buffers :"; - if(max==0) - message +="display types and addresses (n max) with --cfg=smpi/list-leaks:n.\nRunning smpirun with -wrapper \"valgrind --leak-check=full\" can provide more information"; - XBT_INFO(message.c_str(), allocs.size()); - - if (max > 0) { - //gather by allocation origin (only one group reported in case of no-loc or if trace-call-location is not used) - struct buff_leak{ - int count; - size_t total_size; - size_t min_size; - size_t max_size; - }; - std::map leaks_aggreg; - for (auto & elem : allocs){ - std::string key = "leaked allocations"; - if (not xbt_log_no_loc) - key=elem.second.file+":"+std::to_string(elem.second.line)+" : "+key; - auto result = leaks_aggreg.insert(std::pair(key, {1, elem.second.size, elem.second.size, elem.second.size})); - if (result.second == false){ - result.first->second.count ++; - result.first->second.total_size += elem.second.size; - if(elem.second.size > result.first->second.max_size) - result.first->second.max_size = elem.second.size; - else if (elem.second.size < result.first->second.min_size) - result.first->second.min_size = elem.second.size; - } - } - //now we can order by total size. - std::vector> leaks; - std::copy(leaks_aggreg.begin(), - leaks_aggreg.end(), - std::back_inserter>>(leaks)); - std::sort(leaks.begin(), leaks.end(), [](auto const& a, auto const& b) { return a.second.total_size > b.second.total_size; }); - - unsigned int i =0; - for (const auto& p : leaks) { - if(p.second.min_size == p.second.max_size) - XBT_INFO("%s of total size %zu, called %d times, each with size %zu", - p.first.c_str(),p.second.total_size,p.second.count,p.second.min_size); - else - XBT_INFO("%s of total size %zu, called %d times, with minimum size %zu and maximum size %zu", - p.first.c_str(),p.second.total_size,p.second.count,p.second.min_size,p.second.max_size); - i++; - if(i == max) - break; - } - if (max < leaks_aggreg.size()) - XBT_INFO("(more buffer leaks hidden as you wanted to see only %lu of them)", max); - } +static void print_leaked_buffers() +{ + if (allocs.empty()) + return; + + auto max = static_cast(simgrid::config::get_value("smpi/list-leaks")); + std::string message = "Probable memory leaks in your code: SMPI detected %zu unfreed buffers:"; + if (max == 0) + message += "display types and addresses (n max) with --cfg=smpi/list-leaks:n.\nRunning smpirun with -wrapper " + "\"valgrind --leak-check=full\" can provide more information"; + XBT_INFO(message.c_str(), allocs.size()); + + if (max == 0) + return; + + // gather by allocation origin (only one group reported in case of no-loc or if trace-call-location is not used) + struct buff_leak { + int count; + size_t total_size; + size_t min_size; + size_t max_size; + }; + std::map> leaks_aggreg; + for (const auto& [_, elem] : allocs) { + std::string key = "leaked allocations"; + if (not xbt_log_no_loc) + key = elem.file + ":" + std::to_string(elem.line) + ": " + key; + auto& result = leaks_aggreg.try_emplace(key, buff_leak{0, 0, elem.size, elem.size}).first->second; + result.count++; + result.total_size += elem.size; + if (elem.size > result.max_size) + result.max_size = elem.size; + else if (elem.size < result.min_size) + result.min_size = elem.size; + } + // now we can order by total size. + std::vector> leaks(leaks_aggreg.begin(), leaks_aggreg.end()); + std::sort(leaks.begin(), leaks.end(), + [](auto const& a, auto const& b) { return a.second.total_size > b.second.total_size; }); + + unsigned int i = 0; + for (const auto& [key, value] : leaks) { + if (value.min_size == value.max_size) + XBT_INFO("%s of total size %zu, called %d times, each with size %zu", key.c_str(), value.total_size, value.count, + value.min_size); + else + XBT_INFO("%s of total size %zu, called %d times, with minimum size %zu and maximum size %zu", key.c_str(), + value.total_size, value.count, value.min_size, value.max_size); + i++; + if (i == max) + break; } + if (max < leaks_aggreg.size()) + XBT_INFO("(more buffer leaks hidden as you wanted to see only %lu of them)", max); } void print_memory_analysis() @@ -269,8 +314,9 @@ void print_memory_analysis() total_malloc_size, max_malloc.size, simgrid::xbt::Path(max_malloc.file).get_base_name().c_str(), max_malloc.line, max_malloc.numcall ); else - XBT_INFO("Allocations analysis asked, but 0 bytes were allocated through malloc/calloc calls intercepted by SMPI.\n" - "Either code is using other ways of allocating memory, or it was built with SMPI_NO_OVERRIDE_MALLOC"); + XBT_INFO( + "Allocations analysis asked, but 0 bytes were allocated through malloc/calloc calls intercepted by SMPI.\n" + "The code may not use malloc() to allocate memory, or it was built with SMPI_NO_OVERRIDE_MALLOC"); if(total_shared_size != 0) XBT_INFO("%lu bytes were automatically shared between processes, in %u calls\n", total_shared_size, total_shared_calls); } @@ -314,11 +360,14 @@ void set_current_buffer(int i, const char* name, const void* buf){ } } -void print_buffer_info(){ - if(not current_buffer1.name.empty()) - XBT_INFO("Buffer %s was allocated from %s line %d, with size %zu", current_buffer1.name.c_str(), current_buffer1.alloc.file.c_str(), current_buffer1.alloc.line, current_buffer1.alloc.size); - if(not current_buffer2.name.empty()) - XBT_INFO("Buffer %s was allocated from %s line %d, with size %zu", current_buffer2.name.c_str(), current_buffer2.alloc.file.c_str(), current_buffer2.alloc.line, current_buffer2.alloc.size); +void print_buffer_info() +{ + if (not current_buffer1.name.empty()) + XBT_INFO("Buffer %s was allocated from %s line %d, with size %zu", current_buffer1.name.c_str(), + current_buffer1.alloc.file.c_str(), current_buffer1.alloc.line, current_buffer1.alloc.size); + if (not current_buffer2.name.empty()) + XBT_INFO("Buffer %s was allocated from %s line %d, with size %zu", current_buffer2.name.c_str(), + current_buffer2.alloc.file.c_str(), current_buffer2.alloc.line, current_buffer2.alloc.size); } size_t get_buffer_size(const void* buf){ @@ -336,6 +385,26 @@ void account_free(const void* ptr){ } } +int check_collectives_ordering(MPI_Comm comm, const std::string& call) +{ + unsigned int count = comm->get_collectives_count(); + comm->increment_collectives_count(); + if (auto vec = collective_calls.find(comm->id()); vec == collective_calls.end()) { + collective_calls.try_emplace(comm->id(), std::vector{call}); + } else { + // are we the first ? add the call + if (vec->second.size() == count) { + vec->second.emplace_back(call); + } else if (vec->second.size() > count) { + if (vec->second[count] != call) { + XBT_WARN("Collective operation mismatch. For process %ld, expected %s, got %s", + simgrid::s4u::this_actor::get_pid(), vec->second[count].c_str(), call.c_str()); + return MPI_ERR_OTHER; + } + } else { + THROW_IMPOSSIBLE; + } + } + return MPI_SUCCESS; } -} -} // namespace simgrid +} // namespace simgrid::smpi::utils