Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
New function SMPI_app_instance_start() to easily start a MPI instance in your S4U...
authorMartin Quinson <martin.quinson@ens-rennes.fr>
Wed, 1 Mar 2023 20:44:56 +0000 (21:44 +0100)
committerMartin Quinson <martin.quinson@ens-rennes.fr>
Wed, 1 Mar 2023 20:50:07 +0000 (21:50 +0100)
ChangeLog
examples/smpi/smpi_s4u_masterworker/deployment_masterworker_mailbox_smpi.xml
examples/smpi/smpi_s4u_masterworker/masterworker_mailbox_smpi.cpp
examples/smpi/smpi_s4u_masterworker/s4u_smpi.tesh
include/smpi/smpi.h
src/smpi/internals/smpi_deployment.cpp

index a60dfcf..5e238de 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -26,6 +26,7 @@ Kernel:
 MPI:
  - New option smpi/barrier-collectives to add a barrier to some collectives
    to detect dangerous code that /may/ work on some MPI implems.
+ - New function SMPI_app_instance_start() to easily start a MPI instance in your S4U simulation.
 
 Models:
  - Write the section of the manual about models, at least.
index 1c5fac6..cfa7021 100644 (file)
     <prop id="instance_id" value="master_mpi"/>
     <prop id="rank" value="1"/>
   </actor>
-  <actor host="Ginette" function="alltoall_mpi">
-    <prop id="instance_id" value="alltoall_mpi"/>
-    <prop id="rank" value="0"/>
-  </actor>
-  <actor host="Bourassa" function="alltoall_mpi">
-    <prop id="instance_id" value="alltoall_mpi"/>
-    <prop id="rank" value="1"/>
-  </actor>
-  <actor host="Jupiter" function="alltoall_mpi">
-    <prop id="instance_id" value="alltoall_mpi"/>
-    <prop id="rank" value="2"/>
-  </actor>
-  <actor host="Fafard" function="alltoall_mpi">
-    <prop id="instance_id" value="alltoall_mpi"/>
-    <prop id="rank" value="3"/>
-  </actor>
 </platform>
index 43587a9..a76fa88 100644 (file)
@@ -80,9 +80,9 @@ static void master_mpi(int argc, char* argv[])
   XBT_INFO("After finalize %d %d", rank, test[0]);
 }
 
-static void alltoall_mpi(int argc, char* argv[])
+static void alltoall_mpi()
 {
-  MPI_Init(&argc, &argv);
+  MPI_Init();
 
   int rank;
   int size;
@@ -114,9 +114,12 @@ int main(int argc, char* argv[])
   e.register_function("worker", worker);
   // launch two MPI applications as well, one using master_mpi function as main on 2 nodes
   SMPI_app_instance_register("master_mpi", master_mpi, 2);
-  // the second performing an alltoall on 4 nodes
-  SMPI_app_instance_register("alltoall_mpi", alltoall_mpi, 4);
   e.load_deployment(argv[2]);
+  // the second performing an alltoall on 4 nodes, started directly, not from the deployment file
+  auto all_hosts = e.get_all_hosts();
+  SMPI_app_instance_start("alltoall_mpi", alltoall_mpi,
+                          {e.host_by_name_or_null("Ginette"), e.host_by_name_or_null("Bourassa"),
+                           e.host_by_name_or_null("Jupiter"), e.host_by_name_or_null("Fafard")});
 
   e.run();
 
index bf1b377..cb3d3a7 100644 (file)
@@ -4,21 +4,21 @@ $ ./masterworker_mailbox_smpi ${srcdir:=.}/../../platforms/small_platform_with_r
 > [0.000000] [xbt_cfg/INFO] Configuration change: Set 'smpi/simulate-computation' to 'no'
 > [0.000000] [smpi_config/INFO] You did not set the power of the host running the simulation.  The timings will certainly not be accurate.  Use the option "--cfg=smpi/host-speed:<flops>" to set its value.  Check https://simgrid.org/doc/latest/Configuring_SimGrid.html#automatic-benchmarking-of-smpi-code for more information.
 > [11.586581] [smpi_masterworkers/INFO] Simulation time 11.5866
-> [Bourassa:alltoall_mpi:(7) 0.000000] [smpi_masterworkers/INFO] alltoall for rank 1
-> [Bourassa:alltoall_mpi:(7) 0.047272] [smpi_masterworkers/INFO] after alltoall 1
+> [Bourassa:alltoall_mpi#1:(7) 0.000000] [smpi_masterworkers/INFO] alltoall for rank 1
+> [Bourassa:alltoall_mpi#1:(7) 0.047272] [smpi_masterworkers/INFO] after alltoall 1
 > [Bourassa:master_mpi:(5) 0.000000] [smpi_masterworkers/INFO] here for rank 1
 > [Bourassa:master_mpi:(5) 0.017245] [smpi_masterworkers/INFO] After comm 1
 > [Bourassa:master_mpi:(5) 0.017245] [smpi_masterworkers/INFO] After finalize 1 0
-> [Fafard:alltoall_mpi:(9) 0.000000] [smpi_masterworkers/INFO] alltoall for rank 3
-> [Fafard:alltoall_mpi:(9) 0.047582] [smpi_masterworkers/INFO] after alltoall 3
-> [Ginette:alltoall_mpi:(6) 0.000000] [smpi_masterworkers/INFO] alltoall for rank 0
-> [Ginette:alltoall_mpi:(6) 0.037258] [smpi_masterworkers/INFO] after alltoall 0
+> [Fafard:alltoall_mpi#3:(9) 0.000000] [smpi_masterworkers/INFO] alltoall for rank 3
+> [Fafard:alltoall_mpi#3:(9) 0.047582] [smpi_masterworkers/INFO] after alltoall 3
+> [Ginette:alltoall_mpi#0:(6) 0.000000] [smpi_masterworkers/INFO] alltoall for rank 0
+> [Ginette:alltoall_mpi#0:(6) 0.037258] [smpi_masterworkers/INFO] after alltoall 0
 > [Ginette:master_mpi:(4) 0.000000] [smpi_masterworkers/INFO] After comm 0
 > [Ginette:master_mpi:(4) 0.000000] [smpi_masterworkers/INFO] After finalize 0 0
 > [Ginette:master_mpi:(4) 0.000000] [smpi_masterworkers/INFO] here for rank 0
 > [Ginette:worker:(2) 11.567566] [smpi_masterworkers/INFO] Exiting now.
-> [Jupiter:alltoall_mpi:(8) 0.000000] [smpi_masterworkers/INFO] alltoall for rank 2
-> [Jupiter:alltoall_mpi:(8) 0.047582] [smpi_masterworkers/INFO] after alltoall 2
+> [Jupiter:alltoall_mpi#2:(8) 0.000000] [smpi_masterworkers/INFO] alltoall for rank 2
+> [Jupiter:alltoall_mpi#2:(8) 0.047582] [smpi_masterworkers/INFO] after alltoall 2
 > [Jupiter:worker:(3) 11.586581] [smpi_masterworkers/INFO] Exiting now.
 > [Tremblay:master:(1) 0.000000] [smpi_masterworkers/INFO] Got 2 workers and 20 tasks to process
 > [Tremblay:master:(1) 0.000000] [smpi_masterworkers/INFO] Sending task 0 of 20 to mailbox 'Ginette'
index a2a83f0..6263334 100644 (file)
@@ -1232,8 +1232,17 @@ XBT_PUBLIC void SMPI_thread_create();
 
 SG_END_DECL
 
-/* C++ declarations for shared_malloc and default copy buffer callback */
 #ifdef __cplusplus
+XBT_PUBLIC void SMPI_app_instance_start(const char* name, std::function<void()> const& code,
+                                        std::vector<simgrid::s4u::Host*> const& hosts);
+
+/* This version without parameter is nice to use with SMPI_app_instance_start() */
+static void MPI_Init()
+{
+  MPI_Init(nullptr, nullptr);
+}
+
+/* C++ declarations for shared_malloc and default copy buffer callback */
 XBT_PUBLIC int smpi_is_shared(const void* ptr, std::vector<std::pair<size_t, size_t>>& private_blocks, size_t* offset);
 
 std::vector<std::pair<size_t, size_t>> shift_and_frame_private_blocks(const std::vector<std::pair<size_t, size_t>>& vec,
index 5e36060..d20de25 100644 (file)
@@ -54,6 +54,24 @@ void SMPI_app_instance_register(const char *name, xbt_main_func_t code, int num_
 
   smpi_instances.try_emplace(name, num_processes);
 }
+void SMPI_app_instance_start(const char* name, const std::function<void()>& code,
+                             std::vector<simgrid::s4u::Host*> const& hosts)
+{
+  xbt_assert(hosts.size(), "Cannot start a SMPI instance on 0 hosts");
+  smpi_instances.try_emplace(name, hosts.size());
+
+  int rank = 0;
+  for (auto* host : hosts) {
+    auto rank_str          = std::to_string(rank);
+    std::string actor_name = std::string(name) + "#" + rank_str;
+    auto actor             = simgrid::s4u::Actor::create(actor_name, host, code);
+    actor->set_property("instance_id", name);
+    actor->set_property("rank", rank_str);
+    smpi_deployment_register_process(name, rank, actor.get());
+
+    rank++;
+  }
+}
 
 void smpi_deployment_register_process(const std::string& instance_id, int rank, const simgrid::s4u::Actor* actor)
 {