From 2247d8d7ee677ba1a970301b456537022c53f503 Mon Sep 17 00:00:00 2001
From: Augustin Degomme <adegomme@gmail.com>
Date: Sun, 14 Mar 2021 23:22:20 +0100
Subject: [PATCH] Add a small allocations counter in SMPI. With
 smpi/display-analysis, it can display the total memory allocated by
 malloc/calloc calls intecepted by SMPI. It outputs at the end of the
 simulation the largest one, its address, and the number of times it was
 called, and hints to use sharing options on it. To avoid too many globals,
 and singletons, I used the namespace smpi::utils to regroup all of these
 analysis tools. Please tell if this is awful C++ :p

---
 src/smpi/include/smpi_utils.hpp    | 20 ++++++-
 src/smpi/internals/smpi_bench.cpp  |  5 +-
 src/smpi/internals/smpi_config.cpp |  1 +
 src/smpi/internals/smpi_global.cpp | 31 +----------
 src/smpi/internals/smpi_host.cpp   | 12 ++---
 src/smpi/internals/smpi_shared.cpp | 15 ++++--
 src/smpi/internals/smpi_utils.cpp  | 86 ++++++++++++++++++++++++++++++
 src/surf/network_smpi.cpp          |  4 +-
 8 files changed, 128 insertions(+), 46 deletions(-)
diff --git a/src/smpi/include/smpi_utils.hpp b/src/smpi/include/smpi_utils.hpp
index 84134c45bf..624fe72c04 100644
--- a/src/smpi/include/smpi_utils.hpp
+++ b/src/smpi/include/smpi_utils.hpp
@@ -16,6 +16,24 @@ struct s_smpi_factor_t {
   std::vector<double> values;
 };
 
-XBT_PUBLIC std::vector<s_smpi_factor_t> parse_factor(const std::string& smpi_coef_string);
+
+namespace simgrid {
+namespace smpi {
+namespace utils {
+
+  extern double total_benched_time;
+  extern size_t total_malloc_size;
+  extern size_t total_shared_size;
+  extern size_t max_malloc_size;
+
+  XBT_PUBLIC std::vector<s_smpi_factor_t> parse_factor(const std::string& smpi_coef_string);
+  XBT_PUBLIC void add_benched_time(double time);
+  XBT_PUBLIC void account_malloc_size(size_t size, const char* file, int line);
+  XBT_PUBLIC void account_shared_size(size_t size);
+  XBT_PUBLIC void print_time_analysis(double time);
+  XBT_PUBLIC void print_memory_analysis();
+}
+}
+}
 
 #endif
diff --git a/src/smpi/internals/smpi_bench.cpp b/src/smpi/internals/smpi_bench.cpp
index 8eac4e7d43..0e4e8524e5 100644
--- a/src/smpi/internals/smpi_bench.cpp
+++ b/src/smpi/internals/smpi_bench.cpp
@@ -9,6 +9,7 @@
 #include "simgrid/modelchecker.h"
 #include "simgrid/s4u/Exec.hpp"
 #include "smpi_comm.hpp"
+#include "smpi_utils.hpp"
 #include "src/internal_config.h"
 #include "src/mc/mc_replay.hpp"
 #include "xbt/config.hpp"
@@ -34,8 +35,6 @@ static simgrid::config::Flag<double>
                      "Minimum time to inject inside a call to MPI_Wtime(), gettimeofday() and clock_gettime()",
                      1e-8 /* Documented to be 10 ns */);
 
-double smpi_total_benched_time = 0;
-
 // Private execute_flops used by smpi_execute and smpi_execute_benched
 void private_execute_flops(double flops) {
   xbt_assert(flops >= 0, "You're trying to execute a negative amount of flops (%f)!", flops);
@@ -162,7 +161,7 @@ void smpi_bench_end()
   }
 #endif
 
-  smpi_total_benched_time += xbt_os_timer_elapsed(timer);
+  simgrid::smpi::utils::add_benched_time(xbt_os_timer_elapsed(timer));
 }
 
 /* Private sleep function used by smpi_sleep(), smpi_usleep() and friends */
diff --git a/src/smpi/internals/smpi_config.cpp b/src/smpi/internals/smpi_config.cpp
index ed6c33885b..a5cb4cb9b7 100644
--- a/src/smpi/internals/smpi_config.cpp
+++ b/src/smpi/internals/smpi_config.cpp
@@ -181,6 +181,7 @@ void smpi_init_options(){
   // return if already called
   if(_smpi_options_initialized)
     return;
+  simgrid::config::declare_flag<bool>("smpi/display-analysis", "Whether we should display a memory allocations analysis after simulation.", false);
   simgrid::config::declare_flag<bool>("smpi/display-timing", "Whether we should display the timing after simulation.", false);
   simgrid::config::declare_flag<int>("smpi/list-leaks", "Whether we should display the n first MPI handle leaks (addresses and type only) after simulation", 0);
   simgrid::config::declare_flag<bool>("smpi/keep-temps", "Whether we should keep the generated temporary files.", false);
diff --git a/src/smpi/internals/smpi_global.cpp b/src/smpi/internals/smpi_global.cpp
index 2b5e91267c..aa77ce4c8e 100644
--- a/src/smpi/internals/smpi_global.cpp
+++ b/src/smpi/internals/smpi_global.cpp
@@ -71,7 +71,6 @@ std::map</* computation unit name */ std::string, papi_process_data, std::less<>
 std::unordered_map<std::string, double> location2speedup;
 
 static int smpi_exit_status = 0;
-extern double smpi_total_benched_time;
 xbt_os_timer_t global_timer;
 static std::vector<std::string> privatize_libs_paths;
 
@@ -570,17 +569,7 @@ int smpi_main(const char* executable, int argc, char* argv[])
     SIMIX_run();
 
     xbt_os_walltimer_stop(global_timer);
-    if (simgrid::config::get_value<bool>("smpi/display-timing")) {
-      double global_time = xbt_os_timer_elapsed(global_timer);
-      XBT_INFO("Simulated time: %g seconds. \n\n"
-          "The simulation took %g seconds (after parsing and platform setup)\n"
-          "%g seconds were actual computation of the application",
-          SIMIX_get_clock(), global_time , smpi_total_benched_time);
-
-      if (smpi_total_benched_time/global_time>=0.75)
-      XBT_INFO("More than 75%% of the time was spent inside the application code.\n"
-      "You may want to use sampling functions or trace replay to reduce this.");
-    }
+    simgrid::smpi::utils::print_time_analysis(xbt_os_timer_elapsed(global_timer));
   }
   SMPI_finalize();
 
@@ -625,23 +614,7 @@ void SMPI_finalize()
   if (smpi_cfg_privatization() == SmpiPrivStrategies::MMAP)
     smpi_destroy_global_memory_segments();
 
-  if (simgrid::smpi::F2C::lookup() != nullptr &&
-      simgrid::smpi::F2C::lookup()->size() > simgrid::smpi::F2C::get_num_default_handles()) {
-    XBT_INFO("Probable memory leaks in your code: SMPI detected %zu unfreed MPI handles : "
-             "display types and addresses (n max) with --cfg=smpi/list-leaks:n.\n"
-             "Running smpirun with -wrapper \"valgrind --leak-check=full\" can provide more information",
-             simgrid::smpi::F2C::lookup()->size() - simgrid::smpi::F2C::get_num_default_handles());
-    int n = simgrid::config::get_value<int>("smpi/list-leaks");
-    for (auto const& p : *simgrid::smpi::F2C::lookup()) {
-      static int printed = 0;
-      if (printed >= n)
-        break;
-      if (p.first >= simgrid::smpi::F2C::get_num_default_handles()) {
-        XBT_WARN("Leak %p of type %s", p.second, boost::core::demangle(typeid(*(p.second)).name()).c_str());
-        printed++;
-      }
-    }
-  }
+  simgrid::smpi::utils::print_memory_analysis();
 }
 
 void smpi_mpi_init() {
diff --git a/src/smpi/internals/smpi_host.cpp b/src/smpi/internals/smpi_host.cpp
index df71b90817..03035c7d05 100644
--- a/src/smpi/internals/smpi_host.cpp
+++ b/src/smpi/internals/smpi_host.cpp
@@ -94,23 +94,23 @@ Host::Host(s4u::Host* ptr) : host(ptr)
 
   const char* orecv_string = host->get_property("smpi/or");
   if (orecv_string != nullptr) {
-    orecv_parsed_values = parse_factor(orecv_string);
+    orecv_parsed_values = simgrid::smpi::utils::parse_factor(orecv_string);
   } else {
-    orecv_parsed_values = parse_factor(config::get_value<std::string>("smpi/or"));
+    orecv_parsed_values = simgrid::smpi::utils::parse_factor(config::get_value<std::string>("smpi/or"));
   }
 
   const char* osend_string = host->get_property("smpi/os");
   if (osend_string != nullptr) {
-    osend_parsed_values = parse_factor(osend_string);
+    osend_parsed_values = simgrid::smpi::utils::parse_factor(osend_string);
   } else {
-    osend_parsed_values = parse_factor(config::get_value<std::string>("smpi/os"));
+    osend_parsed_values = simgrid::smpi::utils::parse_factor(config::get_value<std::string>("smpi/os"));
   }
 
   const char* oisend_string = host->get_property("smpi/ois");
   if (oisend_string != nullptr) {
-    oisend_parsed_values = parse_factor(oisend_string);
+    oisend_parsed_values = simgrid::smpi::utils::parse_factor(oisend_string);
   } else {
-    oisend_parsed_values = parse_factor(config::get_value<std::string>("smpi/ois"));
+    oisend_parsed_values = simgrid::smpi::utils::parse_factor(config::get_value<std::string>("smpi/ois"));
   }
 }
 
diff --git a/src/smpi/internals/smpi_shared.cpp b/src/smpi/internals/smpi_shared.cpp
index 83224d3d39..34e70d267f 100644
--- a/src/smpi/internals/smpi_shared.cpp
+++ b/src/smpi/internals/smpi_shared.cpp
@@ -49,7 +49,7 @@
 #include <stdlib.h>
 #include <sys/types.h>
 #include <unistd.h>
-
+#include "smpi_utils.hpp"
 #ifndef MAP_ANONYMOUS
 #define MAP_ANONYMOUS MAP_ANON
 #endif
@@ -100,7 +100,6 @@ unsigned long smpi_shared_malloc_blocksize = 1UL << 20;
 #endif
 }
 
-
 void smpi_shared_destroy()
 {
   allocs.clear();
@@ -305,20 +304,26 @@ void* smpi_shared_malloc_partial(size_t size, const size_t* shared_block_offsets
 
 void* smpi_shared_malloc_intercept(size_t size, const char* file, int line)
 {
-  if( smpi_cfg_auto_shared_malloc_thresh() == 0 || size < smpi_cfg_auto_shared_malloc_thresh())
+  if( smpi_cfg_auto_shared_malloc_thresh() == 0 || size < smpi_cfg_auto_shared_malloc_thresh()){
+    simgrid::smpi::utils::account_malloc_size(size, file, line);
     return ::operator new(size);
-  else
+  } else {
+    simgrid::smpi::utils::account_shared_size(size);
     return smpi_shared_malloc(size, file, line);
+  }
 }
 
 void* smpi_shared_calloc_intercept(size_t num_elm, size_t elem_size, const char* file, int line)
 {
   if( smpi_cfg_auto_shared_malloc_thresh() == 0 || elem_size*num_elm < smpi_cfg_auto_shared_malloc_thresh()){
+    simgrid::smpi::utils::account_malloc_size(elem_size*num_elm, file, line);
     void* ptr = ::operator new(elem_size*num_elm);
     memset(ptr, 0, elem_size*num_elm);
     return ptr;
-  } else
+  } else {
+    simgrid::smpi::utils::account_shared_size(elem_size*num_elm);
     return smpi_shared_malloc(elem_size*num_elm, file, line);
+  }
 }
 
 void* smpi_shared_malloc(size_t size, const char* file, int line)
diff --git a/src/smpi/internals/smpi_utils.cpp b/src/smpi/internals/smpi_utils.cpp
index 3973f8b2c2..700345a1a1 100644
--- a/src/smpi/internals/smpi_utils.cpp
+++ b/src/smpi/internals/smpi_utils.cpp
@@ -10,13 +10,32 @@
 #include "xbt/log.h"
 #include "xbt/parse_units.hpp"
 #include "xbt/sysdep.h"
+#include "xbt/file.hpp"
 #include <boost/tokenizer.hpp>
+#include "smpi_config.hpp"
+#include "smpi_f2c.hpp"
+#include "src/simix/smx_private.hpp"
 
 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_utils, smpi, "Logging specific to SMPI (utils)");
 
 extern std::string surf_parsed_filename;
 extern int surf_parse_lineno;
 
+namespace simgrid {
+namespace smpi {
+namespace utils {
+
+double total_benched_time=0;
+unsigned long total_malloc_size=0;
+unsigned long total_shared_size=0;
+unsigned int total_shared_calls=0;
+struct max_malloc{
+  size_t size;
+  int numcall;
+  int line;
+  std::string file;
+} max_malloc = {0, 0, 0, std::string()};
+
 std::vector<s_smpi_factor_t> parse_factor(const std::string& smpi_coef_string)
 {
   std::vector<s_smpi_factor_t> smpi_factor;
@@ -76,3 +95,70 @@ std::vector<s_smpi_factor_t> parse_factor(const std::string& smpi_coef_string)
 
   return smpi_factor;
 }
+
+void add_benched_time(double time){
+  total_benched_time += time;
+}
+
+void account_malloc_size(size_t size, const char* file, int line){
+  total_malloc_size += size;
+  if(size > max_malloc.size){
+    max_malloc.size = size;
+    max_malloc.line = line;
+    max_malloc.numcall = 1;
+    max_malloc.file = std::string(file);
+  }else if(size == max_malloc.size && max_malloc.line == line && not max_malloc.file.compare(file)){
+    max_malloc.numcall++;
+  }
+}
+
+void account_shared_size(size_t size){
+  total_shared_size += size;
+  total_shared_calls++;
+}
+
+void print_time_analysis(double global_time){
+  if (simgrid::config::get_value<bool>("smpi/display-timing")) {
+    XBT_INFO("Simulated time: %g seconds. \n\n"
+        "The simulation took %g seconds (after parsing and platform setup)\n"
+        "%g seconds were actual computation of the application",
+        SIMIX_get_clock(), global_time , total_benched_time);
+    if (total_benched_time/global_time>=0.75)
+      XBT_INFO("More than 75%% of the time was spent inside the application code.\n"
+    "You may want to use sampling functions or trace replay to reduce this.");
+  }
+}
+
+void print_memory_analysis(){
+  if (simgrid::smpi::F2C::lookup() != nullptr &&
+      simgrid::smpi::F2C::lookup()->size() > simgrid::smpi::F2C::get_num_default_handles()) {
+    XBT_INFO("Probable memory leaks in your code: SMPI detected %zu unfreed MPI handles : "
+             "display types and addresses (n max) with --cfg=smpi/list-leaks:n.\n"
+             "Running smpirun with -wrapper \"valgrind --leak-check=full\" can provide more information",
+             simgrid::smpi::F2C::lookup()->size() - simgrid::smpi::F2C::get_num_default_handles());
+    int n = simgrid::config::get_value<int>("smpi/list-leaks");
+    for (auto const& p : *simgrid::smpi::F2C::lookup()) {
+      static int printed = 0;
+      if (printed >= n)
+        break;
+      if (p.first >= simgrid::smpi::F2C::get_num_default_handles()) {
+        XBT_WARN("Leak %p of type %s", p.second, boost::core::demangle(typeid(*(p.second)).name()).c_str());
+        printed++;
+      }
+    }
+  }
+  if (simgrid::config::get_value<bool>("smpi/display-analysis")) {
+    XBT_INFO("Memory Usage: Simulated application allocated %zu bytes during its lifetime through malloc/calloc calls.\n"
+           "Largest allocation at once from a single process was %zu bytes, at %s:%d. It was called %d times during the whole simulation.\n" 
+           "If this is too much, consider sharing allocations for computation buffers.\n"
+           "This can be done automatically by setting --cfg=smpi/auto-shared-malloc-thresh to the minimum size wanted size (this can alter execution if data content is necessary)\n", 
+           total_malloc_size, max_malloc.size, simgrid::xbt::Path(max_malloc.file).get_base_name().c_str(), max_malloc.line, max_malloc.numcall
+    );
+    if(total_shared_size != 0)
+      XBT_INFO("%zu bytes were automatically shared between processes, in %u calls\n", total_shared_size, total_shared_calls);
+  }
+}
+
+}
+}
+}
\ No newline at end of file
diff --git a/src/surf/network_smpi.cpp b/src/surf/network_smpi.cpp
index 0c32b4f94a..96193d3b51 100644
--- a/src/surf/network_smpi.cpp
+++ b/src/surf/network_smpi.cpp
@@ -52,7 +52,7 @@ NetworkSmpiModel::NetworkSmpiModel() : NetworkCm02Model() {}
 double NetworkSmpiModel::get_bandwidth_factor(double size)
 {
   if (smpi_bw_factor.empty())
-    smpi_bw_factor = parse_factor(config::get_value<std::string>("smpi/bw-factor"));
+    smpi_bw_factor = simgrid::smpi::utils::parse_factor(config::get_value<std::string>("smpi/bw-factor"));
 
   double current = 1.0;
   for (auto const& fact : smpi_bw_factor) {
@@ -70,7 +70,7 @@ double NetworkSmpiModel::get_bandwidth_factor(double size)
 double NetworkSmpiModel::get_latency_factor(double size)
 {
   if (smpi_lat_factor.empty())
-    smpi_lat_factor = parse_factor(config::get_value<std::string>("smpi/lat-factor"));
+    smpi_lat_factor = simgrid::smpi::utils::parse_factor(config::get_value<std::string>("smpi/lat-factor"));
 
   double current = 1.0;
   for (auto const& fact : smpi_lat_factor) {
-- 
2.20.1