Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Update copyright headers.
[simgrid.git] / src / smpi / internals / smpi_global.cpp
index a161b19..ebf13d8 100644 (file)
@@ -6,6 +6,7 @@
 #include "SmpiHost.hpp"
 #include "mc/mc.h"
 #include "private.hpp"
+#include "simgrid/s4u/Engine.hpp"
 #include "simgrid/s4u/Host.hpp"
 #include "simgrid/s4u/Mailbox.hpp"
 #include "simgrid/s4u/forward.hpp"
@@ -57,6 +58,7 @@ std::unordered_map<std::string, double> location2speedup;
 
 static std::map</*process_id*/ ActorPtr, simgrid::smpi::Process*> process_data;
 int process_count = 0;
+static int smpi_exit_status = 0;
 int smpi_universe_size = 0;
 extern double smpi_total_benched_time;
 xbt_os_timer_t global_timer;
@@ -76,7 +78,7 @@ MPI_Errhandler *MPI_ERRORS_ARE_FATAL = nullptr;
 MPI_Errhandler *MPI_ERRHANDLER_NULL = nullptr;
 // No instance gets manually created; check also the smpirun.in script as
 // this default name is used there as well (when the <actor> tag is generated).
-static const char* smpi_default_instance_name = "smpirun";
+static const std::string smpi_default_instance_name("smpirun");
 static simgrid::config::Flag<double> smpi_wtime_sleep(
   "smpi/wtime", "Minimum time to inject inside a call to MPI_Wtime", 0.0);
 static simgrid::config::Flag<double> smpi_init_sleep(
@@ -84,11 +86,6 @@ static simgrid::config::Flag<double> smpi_init_sleep(
 
 void (*smpi_comm_copy_data_callback) (smx_activity_t, void*, size_t) = &smpi_comm_copy_buffer_callback;
 
-void smpi_add_process(ActorPtr actor)
-{
-  process_data.insert({actor, new simgrid::smpi::Process(actor, nullptr)});
-}
-
 int smpi_process_count()
 {
   return process_count;
@@ -117,7 +114,7 @@ void smpi_process_init(int *argc, char ***argv){
 }
 
 int smpi_process_index(){
-  return simgrid::s4u::Actor::self()->getPid();
+  return simgrid::s4u::this_actor::getPid();
 }
 
 void * smpi_process_get_user_data(){
@@ -192,19 +189,19 @@ void smpi_comm_copy_buffer_callback(smx_activity_t synchro, void *buff, size_t b
   auto private_blocks = merge_private_blocks(src_private_blocks, dst_private_blocks);
   check_blocks(private_blocks, buff_size);
   void* tmpbuff=buff;
-  if ((smpi_privatize_global_variables == SMPI_PRIVATIZE_MMAP) && (static_cast<char*>(buff) >= smpi_data_exe_start) &&
+  if ((smpi_privatize_global_variables == SmpiPrivStrategies::Mmap) &&
+      (static_cast<char*>(buff) >= smpi_data_exe_start) &&
       (static_cast<char*>(buff) < smpi_data_exe_start + smpi_data_exe_size)) {
     XBT_DEBUG("Privatization : We are copying from a zone inside global memory... Saving data to temp buffer !");
-
-    smpi_switch_data_segment(Actor::self()->getPid());
+    smpi_switch_data_segment(comm->src_proc->iface());
     tmpbuff = static_cast<void*>(xbt_malloc(buff_size));
     memcpy_private(tmpbuff, buff, private_blocks);
   }
 
-  if ((smpi_privatize_global_variables == SMPI_PRIVATIZE_MMAP) && ((char*)comm->dst_buff >= smpi_data_exe_start) &&
+  if ((smpi_privatize_global_variables == SmpiPrivStrategies::Mmap) && ((char*)comm->dst_buff >= smpi_data_exe_start) &&
       ((char*)comm->dst_buff < smpi_data_exe_start + smpi_data_exe_size)) {
     XBT_DEBUG("Privatization : We are copying to a zone inside global memory - Switch data segment");
-    smpi_switch_data_segment(Actor::self()->getPid());
+    smpi_switch_data_segment(comm->dst_proc->iface());
   }
   XBT_DEBUG("Copying %zu bytes from %p to %p", buff_size, tmpbuff,comm->dst_buff);
   memcpy_private(comm->dst_buff, tmpbuff, private_blocks);
@@ -349,18 +346,6 @@ void smpi_global_destroy()
   smpi_bench_destroy();
   smpi_shared_destroy();
   smpi_deployment_cleanup_instances();
-  for (auto& pair : process_data) {
-    auto& process = pair.second;
-    if (process->comm_self() != MPI_COMM_NULL) {
-      simgrid::smpi::Comm::destroy(process->comm_self());
-    }
-    if (process->comm_intra() != MPI_COMM_NULL) {
-      simgrid::smpi::Comm::destroy(process->comm_intra());
-    }
-    xbt_os_timer_free(process->timer());
-    xbt_mutex_destroy(process->mailboxes_mutex());
-  }
-  process_data.clear();
 
   if (simgrid::smpi::Colls::smpi_coll_cleanup_callback != nullptr)
     simgrid::smpi::Colls::smpi_coll_cleanup_callback();
@@ -371,7 +356,7 @@ void smpi_global_destroy()
     xbt_os_timer_free(global_timer);
   }
 
-  if(smpi_privatize_global_variables == SMPI_PRIVATIZE_MMAP)
+  if (smpi_privatize_global_variables == SmpiPrivStrategies::Mmap)
     smpi_destroy_global_memory_segments();
   smpi_free_static();
 }
@@ -384,23 +369,28 @@ static void smpi_init_options(){
   simgrid::smpi::Colls::smpi_coll_cleanup_callback = nullptr;
   smpi_cpu_threshold                               = xbt_cfg_get_double("smpi/cpu-threshold");
   smpi_host_speed                                  = xbt_cfg_get_double("smpi/host-speed");
+  xbt_assert(smpi_host_speed >= 0, "You're trying to set the host_speed to a negative value (%f)", smpi_host_speed);
   std::string smpi_privatize_option                = xbt_cfg_get_string("smpi/privatization");
   if (smpi_privatize_option == "no" || smpi_privatize_option == "0")
-    smpi_privatize_global_variables = SMPI_PRIVATIZE_NONE;
+    smpi_privatize_global_variables = SmpiPrivStrategies::None;
   else if (smpi_privatize_option == "yes" || smpi_privatize_option == "1")
-    smpi_privatize_global_variables = SMPI_PRIVATIZE_DEFAULT;
+    smpi_privatize_global_variables = SmpiPrivStrategies::Default;
   else if (smpi_privatize_option == "mmap")
-    smpi_privatize_global_variables = SMPI_PRIVATIZE_MMAP;
+    smpi_privatize_global_variables = SmpiPrivStrategies::Mmap;
   else if (smpi_privatize_option == "dlopen")
-    smpi_privatize_global_variables = SMPI_PRIVATIZE_DLOPEN;
+    smpi_privatize_global_variables = SmpiPrivStrategies::Dlopen;
   else
     xbt_die("Invalid value for smpi/privatization: '%s'", smpi_privatize_option.c_str());
 
+  if (not SMPI_switch_data_segment) {
+    XBT_DEBUG("Running without smpi_main(); disable smpi/privatization.");
+    smpi_privatize_global_variables = SmpiPrivStrategies::None;
+  }
 #if defined(__FreeBSD__)
-    if (smpi_privatize_global_variables == SMPI_PRIVATIZE_MMAP) {
-      XBT_INFO("Mixing mmap privatization is broken on FreeBSD, switching to dlopen privatization instead.");
-      smpi_privatize_global_variables = SMPI_PRIVATIZE_DLOPEN;
-    }
+  if (smpi_privatize_global_variables == SmpiPrivStrategies::Mmap) {
+    XBT_INFO("mmap privatization is broken on FreeBSD, switching to dlopen privatization instead.");
+    smpi_privatize_global_variables = SmpiPrivStrategies::Dlopen;
+  }
 #endif
 
     if (smpi_cpu_threshold < 0)
@@ -435,7 +425,8 @@ static int smpi_run_entry_point(smpi_entry_point_type entry_point, std::vector<s
   int res = entry_point(argc, argv.get());
   if (res != 0){
     XBT_WARN("SMPI process did not return 0. Return value : %d", res);
-    smpi_process()->set_return_value(res);
+    if (smpi_exit_status == 0)
+      smpi_exit_status = res;
   }
   return 0;
 }
@@ -489,7 +480,7 @@ int smpi_main(const char* executable, int argc, char *argv[])
   SIMIX_comm_set_copy_data_callback(smpi_comm_copy_buffer_callback);
 
   smpi_init_options();
-  if (smpi_privatize_global_variables == SMPI_PRIVATIZE_DLOPEN) {
+  if (smpi_privatize_global_variables == SmpiPrivStrategies::Dlopen) {
 
     std::string executable_copy = executable;
 
@@ -508,17 +499,18 @@ int smpi_main(const char* executable, int argc, char *argv[])
           + "_" + std::to_string(rank++) + ".so";
 
         int fdin = open(executable_copy.c_str(), O_RDONLY);
-        xbt_assert(fdin >= 0, "Cannot read from %s", executable_copy.c_str());
+        xbt_assert(fdin >= 0, "Cannot read from %s. Please make sure that the file exists and is executable.",
+                   executable_copy.c_str());
         int fdout = open(target_executable.c_str(), O_CREAT | O_RDWR, S_IRWXU);
         xbt_assert(fdout >= 0, "Cannot write into %s", target_executable.c_str());
 
+        XBT_DEBUG("Copy %ld bytes into %s", static_cast<long>(fdin_size), target_executable.c_str());
 #if HAVE_SENDFILE
         ssize_t sent_size = sendfile(fdout, fdin, NULL, fdin_size);
         xbt_assert(sent_size == fdin_size,
                    "Error while copying %s: only %zd bytes copied instead of %ld (errno: %d -- %s)",
                    target_executable.c_str(), sent_size, fdin_size, errno, strerror(errno));
 #else
-        XBT_VERB("Copy %d bytes into %s", static_cast<int>(fdin_size), target_executable.c_str());
         const int bufsize = 1024 * 1024 * 4;
         char buf[bufsize];
         while (int got = read(fdin, buf, bufsize)) {
@@ -555,12 +547,10 @@ int smpi_main(const char* executable, int argc, char *argv[])
         smpi_run_entry_point(entry_point, args);
       });
     };
-
   }
   else {
-
     // Load the dynamic library and resolve the entry point:
-    void* handle = dlopen(executable, RTLD_LAZY | RTLD_LOCAL | RTLD_DEEPBIND);
+    void* handle = dlopen(executable, RTLD_LAZY | RTLD_LOCAL);
     if (handle == nullptr)
       xbt_die("dlopen failed for %s: %s (errno: %d -- %s)", executable, dlerror(), errno, strerror(errno));
     smpi_entry_point_type entry_point = smpi_resolve_function(handle);
@@ -574,13 +564,12 @@ int smpi_main(const char* executable, int argc, char *argv[])
         smpi_run_entry_point(entry_point, args);
       });
     };
-
   }
 
   SMPI_init();
   SIMIX_launch_application(argv[2]);
-  SMPI_app_instance_register(smpi_default_instance_name, nullptr,
-                               SIMIX_process_count()); // This call has a side effect on process_count...
+  SMPI_app_instance_register(smpi_default_instance_name.c_str(), nullptr,
+                             process_data.size()); // This call has a side effect on process_count...
   MPI_COMM_WORLD = *smpi_deployment_comm_world(smpi_default_instance_name);
   smpi_universe_size = process_count;
 
@@ -608,32 +597,34 @@ int smpi_main(const char* executable, int argc, char *argv[])
       "You may want to use sampling functions or trace replay to reduce this.");
     }
   }
-  int ret   = 0;
-  for (auto& pair : process_data) {
-    auto& smpi_process = pair.second;
-    if (smpi_process->return_value() != 0) {
-      ret = smpi_process->return_value(); // return first non 0 value
-      break;
-    }
-  }
   smpi_global_destroy();
 
   TRACE_end();
 
-  return ret;
+  return smpi_exit_status;
 }
 
 // Called either directly from the user code, or from the code called by smpirun
 void SMPI_init(){
-  simgrid::s4u::Actor::onCreation.connect([](simgrid::s4u::ActorPtr actor) {
-    smpi_add_process(actor);
+  simgrid::s4u::Actor::on_creation.connect([](simgrid::s4u::ActorPtr actor) {
+    if (not actor->is_daemon()) {
+      process_data.insert({actor, new simgrid::smpi::Process(actor, nullptr)});
+    }
+  });
+  simgrid::s4u::Actor::on_destruction.connect([](simgrid::s4u::ActorPtr actor) {
+    auto it = process_data.find(actor);
+    if (it != process_data.end()) {
+      delete it->second;
+      process_data.erase(it);
+    }
   });
+
   smpi_init_options();
   smpi_global_init();
   smpi_check_options();
   TRACE_smpi_alloc();
-  simgrid::surf::surfExitCallbacks.connect(TRACE_smpi_release);
-  if(smpi_privatize_global_variables == SMPI_PRIVATIZE_MMAP)
+  simgrid::s4u::onSimulationEnd.connect(TRACE_smpi_release);
+  if (smpi_privatize_global_variables == SmpiPrivStrategies::Mmap)
     smpi_backup_global_memory_segment();
 }