With smpi/display-analysis, it can display the total memory allocated by malloc/calloc calls intecepted by SMPI.
It outputs at the end of the simulation the largest one, its address, and the number of times it was called, and hints to use sharing options on it.
To avoid too many globals, and singletons, I used the namespace smpi::utils to regroup all of these analysis tools. Please tell if this is awful C++ :p
std::vector<double> values;
};
-XBT_PUBLIC std::vector<s_smpi_factor_t> parse_factor(const std::string& smpi_coef_string);
+
+namespace simgrid {
+namespace smpi {
+namespace utils {
+
+ extern double total_benched_time;
+ extern size_t total_malloc_size;
+ extern size_t total_shared_size;
+ extern size_t max_malloc_size;
+
+ XBT_PUBLIC std::vector<s_smpi_factor_t> parse_factor(const std::string& smpi_coef_string);
+ XBT_PUBLIC void add_benched_time(double time);
+ XBT_PUBLIC void account_malloc_size(size_t size, const char* file, int line);
+ XBT_PUBLIC void account_shared_size(size_t size);
+ XBT_PUBLIC void print_time_analysis(double time);
+ XBT_PUBLIC void print_memory_analysis();
+}
+}
+}
#endif
#include "simgrid/modelchecker.h"
#include "simgrid/s4u/Exec.hpp"
#include "smpi_comm.hpp"
+#include "smpi_utils.hpp"
#include "src/internal_config.h"
#include "src/mc/mc_replay.hpp"
#include "xbt/config.hpp"
"Minimum time to inject inside a call to MPI_Wtime(), gettimeofday() and clock_gettime()",
1e-8 /* Documented to be 10 ns */);
-double smpi_total_benched_time = 0;
-
// Private execute_flops used by smpi_execute and smpi_execute_benched
void private_execute_flops(double flops) {
xbt_assert(flops >= 0, "You're trying to execute a negative amount of flops (%f)!", flops);
}
#endif
- smpi_total_benched_time += xbt_os_timer_elapsed(timer);
+ simgrid::smpi::utils::add_benched_time(xbt_os_timer_elapsed(timer));
}
/* Private sleep function used by smpi_sleep(), smpi_usleep() and friends */
// return if already called
if(_smpi_options_initialized)
return;
+ simgrid::config::declare_flag<bool>("smpi/display-analysis", "Whether we should display a memory allocations analysis after simulation.", false);
simgrid::config::declare_flag<bool>("smpi/display-timing", "Whether we should display the timing after simulation.", false);
simgrid::config::declare_flag<int>("smpi/list-leaks", "Whether we should display the n first MPI handle leaks (addresses and type only) after simulation", 0);
simgrid::config::declare_flag<bool>("smpi/keep-temps", "Whether we should keep the generated temporary files.", false);
std::unordered_map<std::string, double> location2speedup;
static int smpi_exit_status = 0;
-extern double smpi_total_benched_time;
xbt_os_timer_t global_timer;
static std::vector<std::string> privatize_libs_paths;
SIMIX_run();
xbt_os_walltimer_stop(global_timer);
- if (simgrid::config::get_value<bool>("smpi/display-timing")) {
- double global_time = xbt_os_timer_elapsed(global_timer);
- XBT_INFO("Simulated time: %g seconds. \n\n"
- "The simulation took %g seconds (after parsing and platform setup)\n"
- "%g seconds were actual computation of the application",
- SIMIX_get_clock(), global_time , smpi_total_benched_time);
-
- if (smpi_total_benched_time/global_time>=0.75)
- XBT_INFO("More than 75%% of the time was spent inside the application code.\n"
- "You may want to use sampling functions or trace replay to reduce this.");
- }
+ simgrid::smpi::utils::print_time_analysis(xbt_os_timer_elapsed(global_timer));
}
SMPI_finalize();
if (smpi_cfg_privatization() == SmpiPrivStrategies::MMAP)
smpi_destroy_global_memory_segments();
- if (simgrid::smpi::F2C::lookup() != nullptr &&
- simgrid::smpi::F2C::lookup()->size() > simgrid::smpi::F2C::get_num_default_handles()) {
- XBT_INFO("Probable memory leaks in your code: SMPI detected %zu unfreed MPI handles : "
- "display types and addresses (n max) with --cfg=smpi/list-leaks:n.\n"
- "Running smpirun with -wrapper \"valgrind --leak-check=full\" can provide more information",
- simgrid::smpi::F2C::lookup()->size() - simgrid::smpi::F2C::get_num_default_handles());
- int n = simgrid::config::get_value<int>("smpi/list-leaks");
- for (auto const& p : *simgrid::smpi::F2C::lookup()) {
- static int printed = 0;
- if (printed >= n)
- break;
- if (p.first >= simgrid::smpi::F2C::get_num_default_handles()) {
- XBT_WARN("Leak %p of type %s", p.second, boost::core::demangle(typeid(*(p.second)).name()).c_str());
- printed++;
- }
- }
- }
+ simgrid::smpi::utils::print_memory_analysis();
}
void smpi_mpi_init() {
const char* orecv_string = host->get_property("smpi/or");
if (orecv_string != nullptr) {
- orecv_parsed_values = parse_factor(orecv_string);
+ orecv_parsed_values = simgrid::smpi::utils::parse_factor(orecv_string);
} else {
- orecv_parsed_values = parse_factor(config::get_value<std::string>("smpi/or"));
+ orecv_parsed_values = simgrid::smpi::utils::parse_factor(config::get_value<std::string>("smpi/or"));
}
const char* osend_string = host->get_property("smpi/os");
if (osend_string != nullptr) {
- osend_parsed_values = parse_factor(osend_string);
+ osend_parsed_values = simgrid::smpi::utils::parse_factor(osend_string);
} else {
- osend_parsed_values = parse_factor(config::get_value<std::string>("smpi/os"));
+ osend_parsed_values = simgrid::smpi::utils::parse_factor(config::get_value<std::string>("smpi/os"));
}
const char* oisend_string = host->get_property("smpi/ois");
if (oisend_string != nullptr) {
- oisend_parsed_values = parse_factor(oisend_string);
+ oisend_parsed_values = simgrid::smpi::utils::parse_factor(oisend_string);
} else {
- oisend_parsed_values = parse_factor(config::get_value<std::string>("smpi/ois"));
+ oisend_parsed_values = simgrid::smpi::utils::parse_factor(config::get_value<std::string>("smpi/ois"));
}
}
#include <stdlib.h>
#include <sys/types.h>
#include <unistd.h>
-
+#include "smpi_utils.hpp"
#ifndef MAP_ANONYMOUS
#define MAP_ANONYMOUS MAP_ANON
#endif
#endif
}
-
void smpi_shared_destroy()
{
allocs.clear();
void* smpi_shared_malloc_intercept(size_t size, const char* file, int line)
{
- if( smpi_cfg_auto_shared_malloc_thresh() == 0 || size < smpi_cfg_auto_shared_malloc_thresh())
+ if( smpi_cfg_auto_shared_malloc_thresh() == 0 || size < smpi_cfg_auto_shared_malloc_thresh()){
+ simgrid::smpi::utils::account_malloc_size(size, file, line);
return ::operator new(size);
- else
+ } else {
+ simgrid::smpi::utils::account_shared_size(size);
return smpi_shared_malloc(size, file, line);
+ }
}
void* smpi_shared_calloc_intercept(size_t num_elm, size_t elem_size, const char* file, int line)
{
if( smpi_cfg_auto_shared_malloc_thresh() == 0 || elem_size*num_elm < smpi_cfg_auto_shared_malloc_thresh()){
+ simgrid::smpi::utils::account_malloc_size(elem_size*num_elm, file, line);
void* ptr = ::operator new(elem_size*num_elm);
memset(ptr, 0, elem_size*num_elm);
return ptr;
- } else
+ } else {
+ simgrid::smpi::utils::account_shared_size(elem_size*num_elm);
return smpi_shared_malloc(elem_size*num_elm, file, line);
+ }
}
void* smpi_shared_malloc(size_t size, const char* file, int line)
#include "xbt/log.h"
#include "xbt/parse_units.hpp"
#include "xbt/sysdep.h"
+#include "xbt/file.hpp"
#include <boost/tokenizer.hpp>
+#include "smpi_config.hpp"
+#include "smpi_f2c.hpp"
+#include "src/simix/smx_private.hpp"
XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_utils, smpi, "Logging specific to SMPI (utils)");
extern std::string surf_parsed_filename;
extern int surf_parse_lineno;
+namespace simgrid {
+namespace smpi {
+namespace utils {
+
+double total_benched_time=0;
+unsigned long total_malloc_size=0;
+unsigned long total_shared_size=0;
+unsigned int total_shared_calls=0;
+struct max_malloc{
+ size_t size;
+ int numcall;
+ int line;
+ std::string file;
+} max_malloc = {0, 0, 0, std::string()};
+
std::vector<s_smpi_factor_t> parse_factor(const std::string& smpi_coef_string)
{
std::vector<s_smpi_factor_t> smpi_factor;
return smpi_factor;
}
+
+void add_benched_time(double time){
+ total_benched_time += time;
+}
+
+void account_malloc_size(size_t size, const char* file, int line){
+ total_malloc_size += size;
+ if(size > max_malloc.size){
+ max_malloc.size = size;
+ max_malloc.line = line;
+ max_malloc.numcall = 1;
+ max_malloc.file = std::string(file);
+ }else if(size == max_malloc.size && max_malloc.line == line && not max_malloc.file.compare(file)){
+ max_malloc.numcall++;
+ }
+}
+
+void account_shared_size(size_t size){
+ total_shared_size += size;
+ total_shared_calls++;
+}
+
+void print_time_analysis(double global_time){
+ if (simgrid::config::get_value<bool>("smpi/display-timing")) {
+ XBT_INFO("Simulated time: %g seconds. \n\n"
+ "The simulation took %g seconds (after parsing and platform setup)\n"
+ "%g seconds were actual computation of the application",
+ SIMIX_get_clock(), global_time , total_benched_time);
+ if (total_benched_time/global_time>=0.75)
+ XBT_INFO("More than 75%% of the time was spent inside the application code.\n"
+ "You may want to use sampling functions or trace replay to reduce this.");
+ }
+}
+
+void print_memory_analysis(){
+ if (simgrid::smpi::F2C::lookup() != nullptr &&
+ simgrid::smpi::F2C::lookup()->size() > simgrid::smpi::F2C::get_num_default_handles()) {
+ XBT_INFO("Probable memory leaks in your code: SMPI detected %zu unfreed MPI handles : "
+ "display types and addresses (n max) with --cfg=smpi/list-leaks:n.\n"
+ "Running smpirun with -wrapper \"valgrind --leak-check=full\" can provide more information",
+ simgrid::smpi::F2C::lookup()->size() - simgrid::smpi::F2C::get_num_default_handles());
+ int n = simgrid::config::get_value<int>("smpi/list-leaks");
+ for (auto const& p : *simgrid::smpi::F2C::lookup()) {
+ static int printed = 0;
+ if (printed >= n)
+ break;
+ if (p.first >= simgrid::smpi::F2C::get_num_default_handles()) {
+ XBT_WARN("Leak %p of type %s", p.second, boost::core::demangle(typeid(*(p.second)).name()).c_str());
+ printed++;
+ }
+ }
+ }
+ if (simgrid::config::get_value<bool>("smpi/display-analysis")) {
+ XBT_INFO("Memory Usage: Simulated application allocated %zu bytes during its lifetime through malloc/calloc calls.\n"
+ "Largest allocation at once from a single process was %zu bytes, at %s:%d. It was called %d times during the whole simulation.\n"
+ "If this is too much, consider sharing allocations for computation buffers.\n"
+ "This can be done automatically by setting --cfg=smpi/auto-shared-malloc-thresh to the minimum size wanted size (this can alter execution if data content is necessary)\n",
+ total_malloc_size, max_malloc.size, simgrid::xbt::Path(max_malloc.file).get_base_name().c_str(), max_malloc.line, max_malloc.numcall
+ );
+ if(total_shared_size != 0)
+ XBT_INFO("%zu bytes were automatically shared between processes, in %u calls\n", total_shared_size, total_shared_calls);
+ }
+}
+
+}
+}
+}
\ No newline at end of file
double NetworkSmpiModel::get_bandwidth_factor(double size)
{
if (smpi_bw_factor.empty())
- smpi_bw_factor = parse_factor(config::get_value<std::string>("smpi/bw-factor"));
+ smpi_bw_factor = simgrid::smpi::utils::parse_factor(config::get_value<std::string>("smpi/bw-factor"));
double current = 1.0;
for (auto const& fact : smpi_bw_factor) {
double NetworkSmpiModel::get_latency_factor(double size)
{
if (smpi_lat_factor.empty())
- smpi_lat_factor = parse_factor(config::get_value<std::string>("smpi/lat-factor"));
+ smpi_lat_factor = simgrid::smpi::utils::parse_factor(config::get_value<std::string>("smpi/lat-factor"));
double current = 1.0;
for (auto const& fact : smpi_lat_factor) {