Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
[SMPI/LB] Small cleanups
[simgrid.git] / src / smpi / plugins / sampi_loadbalancer.cpp
index f367ede..e32317e 100644 (file)
@@ -9,11 +9,13 @@
 #include <smpi/smpi.h>
 #include <src/smpi/include/smpi_comm.hpp>
 #include <src/smpi/include/smpi_actor.hpp>
+#include <src/smpi/plugins/ampi/instr_ampi.hpp>
+#include <src/smpi/plugins/ampi/ampi.hpp>
 #include <xbt/replay.hpp>
 
 #include "src/kernel/activity/ExecImpl.hpp"
 #include "src/simix/ActorImpl.hpp"
-#include <simgrid/smpi/loadbalancer/load_balancer.hpp>
+#include "src/smpi/plugins/load_balancer/load_balancer.hpp" // This is not yet ready to be public
 
 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(plugin_load_balancer, smpi, "Logging specific to the SMPI load balancing plugin");
 
@@ -47,12 +49,18 @@ public:
   explicit MigrateAction() : ReplayAction("Migrate") {}
   void kernel(simgrid::xbt::ReplayAction& action)
   {
-    static std::map<simgrid::s4u::ActorPtr, int> migration_counter;
+    static std::map<simgrid::s4u::ActorPtr, int> migration_call_counter;
     static simgrid::s4u::Barrier smpilb_bar(smpi_process_count());
     simgrid::s4u::Host* cur_host = simgrid::s4u::this_actor::get_host();
     simgrid::s4u::Host* migrate_to_host;
 
-    TRACE_migration_call(my_proc_id, NULL);
+    TRACE_migration_call(my_proc_id, nullptr);
+
+    // We only migrate every "cfg_migration_frequency"-times, not at every call
+    migration_call_counter[simgrid::s4u::Actor::self()]++;
+    if ((migration_call_counter[simgrid::s4u::Actor::self()] % simgrid::config::get_value<int>(cfg_migration_frequency.get_name())) != 0) {
+      return;
+    }
 
     // TODO cheinrich: Why do we need this barrier?
     smpilb_bar.wait();
@@ -60,8 +68,8 @@ public:
     static bool was_executed = false;
     if (not was_executed) {
       was_executed = true;
+      XBT_DEBUG("Process %u runs the load balancer", my_proc_id);
       smpi_bench_begin();
-      XBT_INFO("RUNNING THE LB");
       lb.run();
       smpi_bench_end();
     }
@@ -69,18 +77,13 @@ public:
     // This barrier is required to ensure that the mapping has been computed and is available
     smpilb_bar.wait();
     was_executed = false; // Must stay behind this barrier so that all processes have passed the if clause
-    migration_counter[simgrid::s4u::Actor::self()]++;
-    if ((migration_counter[simgrid::s4u::Actor::self()] % simgrid::config::get_value<int>(cfg_migration_frequency.get_name())) != 0) {
-      return;
-    }
 
     migrate_to_host = lb.get_mapping();
-
     if (cur_host != migrate_to_host) { // Origin and dest are not the same -> migrate
       sg_host_t migration_hosts[2] = {cur_host, migrate_to_host};
       // Changing this to double[2] ... will cause trouble with parallel_execute, because that fct is trying to call free().
       double* comp_amount  = new double[2]{0, 0};
-      double* comm_amount  = new double[4]{0, std::max(args.memory_consumption, 1.0), 0, 0};
+      double* comm_amount  = new double[4]{0, /*must not be 0*/std::max(args.memory_consumption, 1.0), 0, 0};
 
       xbt_os_timer_t timer = smpi_process()->timer();
       xbt_os_threadtimer_start(timer);
@@ -109,14 +112,15 @@ void action_iteration_in(simgrid::xbt::ReplayAction& action)
 {
   CHECK_ACTION_PARAMS(action, 0, 0)
   TRACE_Iteration_in(simgrid::s4u::this_actor::get_pid(), nullptr);
+  simgrid::smpi::plugin::ampi::on_iteration_in(MPI_COMM_WORLD->group()->actor(std::stol(action[0])));
 }
 
-// FIXME Move declaration
 XBT_PRIVATE void action_iteration_out(simgrid::xbt::ReplayAction& action);
 void action_iteration_out(simgrid::xbt::ReplayAction& action)
 {
   CHECK_ACTION_PARAMS(action, 0, 0)
   TRACE_Iteration_out(simgrid::s4u::this_actor::get_pid(), nullptr);
+  simgrid::smpi::plugin::ampi::on_iteration_out(MPI_COMM_WORLD->group()->actor(std::stol(action[0])));
 }
 }
 }