Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
New option to detect dangerous code mixing collectives and P2P in a deadlocking way
authorMartin Quinson <martin.quinson@ens-rennes.fr>
Mon, 10 Oct 2022 19:19:04 +0000 (21:19 +0200)
committerMartin Quinson <martin.quinson@ens-rennes.fr>
Mon, 10 Oct 2022 19:20:31 +0000 (21:20 +0200)
ChangeLog
docs/source/Configuring_SimGrid.rst
src/smpi/bindings/smpi_pmpi.cpp
src/smpi/bindings/smpi_pmpi_coll.cpp
src/smpi/internals/smpi_config.cpp
src/smpi/internals/smpi_replay.cpp
src/smpi/smpirun.in
teshsuite/smpi/MBI/MBI.py
teshsuite/smpi/MBI/simgrid.py
teshsuite/smpi/pt2pt-dsend/pt2pt-dsend.tesh

index 51acd4e..7863f28 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,10 @@ S4U:
  - Activity::set_remaining() is not public anymore. Use for example
    Comm::set_payload_size() to change the size of the simulated data.
 
+MPI:
+ - New option smpi/barrier-collectives to add a barrier in all
+   collectives to detect dangerous code that /may/ work on some implems.
+
 Models:
  - WiFi: the total capacity of a link depends on the amout of flows on that link.
    - Use the nonlinear callback feature of LMM to reflect this.
index 4ea9206..f5af4a0 100644 (file)
@@ -147,6 +147,8 @@ Existing Configuration Items
 - **For collective operations of SMPI,** please refer to Section :ref:`cfg=smpi/coll-selector`
 - **smpi/auto-shared-malloc-thresh:** :ref:`cfg=smpi/auto-shared-malloc-thresh`
 - **smpi/async-small-thresh:** :ref:`cfg=smpi/async-small-thresh`
+- **smpi/barrier-finalization:** :ref:`cfg=smpi/barrier-finalization`
+- **smpi/barrier-collectives:** :ref:`cfg=smpi/barrier-collectives`
 - **smpi/buffering:** :ref:`cfg=smpi/buffering`
 - **smpi/bw-factor:** :ref:`cfg=smpi/bw-factor`
 - **smpi/coll-selector:** :ref:`cfg=smpi/coll-selector`
@@ -155,7 +157,6 @@ Existing Configuration Items
 - **smpi/display-allocs:** :ref:`cfg=smpi/display-allocs`
 - **smpi/display-timing:** :ref:`cfg=smpi/display-timing`
 - **smpi/errors-are-fatal:** :ref:`cfg=smpi/errors-are-fatal`
-- **smpi/finalization-barrier:** :ref:`cfg=smpi/finalization-barrier`
 - **smpi/grow-injected-times:** :ref:`cfg=smpi/grow-injected-times`
 - **smpi/host-speed:** :ref:`cfg=smpi/host-speed`
 - **smpi/IB-penalty-factors:** :ref:`cfg=smpi/IB-penalty-factors`
@@ -1368,7 +1369,32 @@ Each collective operation can be manually selected with a
 .. TODO:: All available collective algorithms will be made available
           via the ``smpirun --help-coll`` command.
 
-.. _cfg=smpi/finalization-barrier:
+.. _cfg=smpi/barrier-collectives:
+
+Add a barrier in all collectives
+................................
+
+**Option** ``smpi/barrier-collectives`` **default:** off
+
+This option adds a simple barrier in all collectives operation to catch dangerous
+code that may or may not work depending on the MPI implementation. It is disabled
+by default, and activated by the `-analyze` flag of smpirun.
+
+For example, the following code works with OpenMPI while it deadlocks in MPICH and
+Intel MPI. It seems to mean that OpenMPI has a "fire and forget" implementation for
+Broadcast.
+
+.. code-block:: C
+
+  if (rank == 0) {
+    MPI_Bcast(buf1, buff_size, MPI_CHAR, 0, newcom);
+    MPI_Send(&buf2, buff_size, MPI_CHAR, 1, tag, newcom);
+  } else if (rank==1) {
+    MPI_Recv(&buf2, buff_size, MPI_CHAR, 0, tag, newcom, MPI_STATUS_IGNORE);
+    MPI_Bcast(buf1, buff_size, MPI_CHAR, 0, newcom);
+  }
+
+.. _cfg=smpi/barrier-finalization:
 
 Add a barrier in MPI_Finalize
 .............................
index ef203b0..543f451 100644 (file)
@@ -71,7 +71,7 @@ int PMPI_Finalize()
   smpi_process()->mark_as_finalizing();
   TRACE_smpi_comm_in(rank_traced, __func__, new simgrid::instr::NoOpTIData("finalize"));
 
-  if(simgrid::config::get_value<bool>("smpi/finalization-barrier"))
+  if (simgrid::config::get_value<bool>("smpi/barrier-finalization"))
     simgrid::smpi::colls::barrier(MPI_COMM_WORLD);
 
   smpi_process()->finalize();
index a8da1d4..c626857 100644 (file)
@@ -76,8 +76,8 @@ int PMPI_Ibcast(void* buf, int count, MPI_Datatype datatype, int root, MPI_Comm
                      new simgrid::instr::CollTIData(request == MPI_REQUEST_IGNORED ? "bcast" : "ibcast", root, -1.0,
                                                     count, 0,
                                                     simgrid::smpi::Datatype::encode(datatype), ""));
-  if(simgrid::config::get_value<bool>("smpi/colls-inject-barrier"))
-    simgrid::smpi::colls::barrier(comm);
+  if (simgrid::config::get_value<bool>("smpi/barrier-collectives"))
+    smpi_deployment_startup_barrier(smpi_process()->get_instance_id());
 
   if (comm->size() > 1) {
     if (request == MPI_REQUEST_IGNORED)
@@ -138,8 +138,8 @@ int PMPI_Igather(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void
 
   const SmpiBenchGuard suspend_bench;
 
-  if(simgrid::config::get_value<bool>("smpi/colls-inject-barrier"))
-    simgrid::smpi::colls::barrier(comm);
+  if (simgrid::config::get_value<bool>("smpi/barrier-collectives"))
+    smpi_deployment_startup_barrier(smpi_process()->get_instance_id());
 
   aid_t pid = simgrid::s4u::this_actor::get_pid();
 
@@ -197,8 +197,8 @@ int PMPI_Igatherv(const void* sendbuf, int sendcount, MPI_Datatype sendtype, voi
 
   const SmpiBenchGuard suspend_bench;
 
-  if(simgrid::config::get_value<bool>("smpi/colls-inject-barrier"))
-    simgrid::smpi::colls::barrier(comm);
+  if (simgrid::config::get_value<bool>("smpi/barrier-collectives"))
+    smpi_deployment_startup_barrier(smpi_process()->get_instance_id());
 
   const void* real_sendbuf   = sendbuf;
   int real_sendcount         = sendcount;
@@ -270,8 +270,8 @@ int PMPI_Iallgather(const void* sendbuf, int sendcount, MPI_Datatype sendtype, v
 
   const SmpiBenchGuard suspend_bench;
 
-  if(simgrid::config::get_value<bool>("smpi/colls-inject-barrier"))
-    simgrid::smpi::colls::barrier(comm);
+  if (simgrid::config::get_value<bool>("smpi/barrier-collectives"))
+    smpi_deployment_startup_barrier(smpi_process()->get_instance_id());
 
   aid_t pid = simgrid::s4u::this_actor::get_pid();
 
@@ -320,8 +320,8 @@ int PMPI_Iallgatherv(const void* sendbuf, int sendcount, MPI_Datatype sendtype,
 
   const SmpiBenchGuard suspend_bench;
 
-  if(simgrid::config::get_value<bool>("smpi/colls-inject-barrier"))
-    simgrid::smpi::colls::barrier(comm);
+  if (simgrid::config::get_value<bool>("smpi/barrier-collectives"))
+    smpi_deployment_startup_barrier(smpi_process()->get_instance_id());
 
   if (sendbuf == MPI_IN_PLACE) {
     sendbuf   = static_cast<char*>(recvbuf) + recvtype->get_extent() * displs[comm->rank()];
@@ -391,8 +391,8 @@ int PMPI_Iscatter(const void* sendbuf, int sendcount, MPI_Datatype sendtype, voi
 
   const SmpiBenchGuard suspend_bench;
 
-  if(simgrid::config::get_value<bool>("smpi/colls-inject-barrier"))
-    simgrid::smpi::colls::barrier(comm);
+  if (simgrid::config::get_value<bool>("smpi/barrier-collectives"))
+    smpi_deployment_startup_barrier(smpi_process()->get_instance_id());
 
   aid_t pid = simgrid::s4u::this_actor::get_pid();
 
@@ -450,8 +450,8 @@ int PMPI_Iscatterv(const void* sendbuf, const int* sendcounts, const int* displs
 
   const SmpiBenchGuard suspend_bench;
 
-  if(simgrid::config::get_value<bool>("smpi/colls-inject-barrier"))
-    simgrid::smpi::colls::barrier(comm);
+  if (simgrid::config::get_value<bool>("smpi/barrier-collectives"))
+    smpi_deployment_startup_barrier(smpi_process()->get_instance_id());
 
   aid_t pid        = simgrid::s4u::this_actor::get_pid();
 
@@ -504,8 +504,8 @@ int PMPI_Ireduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype dat
 
   const SmpiBenchGuard suspend_bench;
 
-  if(simgrid::config::get_value<bool>("smpi/colls-inject-barrier"))
-    simgrid::smpi::colls::barrier(comm);
+  if (simgrid::config::get_value<bool>("smpi/barrier-collectives"))
+    smpi_deployment_startup_barrier(smpi_process()->get_instance_id());
 
   aid_t pid = simgrid::s4u::this_actor::get_pid();
 
@@ -560,8 +560,8 @@ int PMPI_Iallreduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype
 
   const SmpiBenchGuard suspend_bench;
 
-  if(simgrid::config::get_value<bool>("smpi/colls-inject-barrier"))
-    simgrid::smpi::colls::barrier(comm);
+  if (simgrid::config::get_value<bool>("smpi/barrier-collectives"))
+    smpi_deployment_startup_barrier(smpi_process()->get_instance_id());
 
   std::vector<unsigned char> tmp_sendbuf;
   const void* real_sendbuf = smpi_get_in_place_buf(sendbuf, recvbuf, tmp_sendbuf, count, datatype);
@@ -603,8 +603,8 @@ int PMPI_Iscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datat
 
   const SmpiBenchGuard suspend_bench;
 
-  if(simgrid::config::get_value<bool>("smpi/colls-inject-barrier"))
-    simgrid::smpi::colls::barrier(comm);
+  if (simgrid::config::get_value<bool>("smpi/barrier-collectives"))
+    smpi_deployment_startup_barrier(smpi_process()->get_instance_id());
 
   aid_t pid = simgrid::s4u::this_actor::get_pid();
   std::vector<unsigned char> tmp_sendbuf;
@@ -644,8 +644,8 @@ int PMPI_Iexscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype dat
 
   const SmpiBenchGuard suspend_bench;
 
-  if(simgrid::config::get_value<bool>("smpi/colls-inject-barrier"))
-    simgrid::smpi::colls::barrier(comm);
+  if (simgrid::config::get_value<bool>("smpi/barrier-collectives"))
+    smpi_deployment_startup_barrier(smpi_process()->get_instance_id());
 
   aid_t pid = simgrid::s4u::this_actor::get_pid();
   std::vector<unsigned char> tmp_sendbuf;
@@ -691,8 +691,8 @@ int PMPI_Ireduce_scatter(const void *sendbuf, void *recvbuf, const int *recvcoun
 
   const SmpiBenchGuard suspend_bench;
 
-  if(simgrid::config::get_value<bool>("smpi/colls-inject-barrier"))
-    simgrid::smpi::colls::barrier(comm);
+  if (simgrid::config::get_value<bool>("smpi/barrier-collectives"))
+    smpi_deployment_startup_barrier(smpi_process()->get_instance_id());
 
   aid_t pid                          = simgrid::s4u::this_actor::get_pid();
   auto trace_recvcounts              = std::make_shared<std::vector<int>>();
@@ -744,8 +744,8 @@ int PMPI_Ireduce_scatter_block(const void* sendbuf, void* recvbuf, int recvcount
 
   const SmpiBenchGuard suspend_bench;
 
-  if(simgrid::config::get_value<bool>("smpi/colls-inject-barrier"))
-    simgrid::smpi::colls::barrier(comm);
+  if (simgrid::config::get_value<bool>("smpi/barrier-collectives"))
+    smpi_deployment_startup_barrier(smpi_process()->get_instance_id());
 
   int count = comm->size();
 
@@ -814,8 +814,8 @@ int PMPI_Ialltoall(const void* sendbuf, int sendcount, MPI_Datatype sendtype, vo
 
   const SmpiBenchGuard suspend_bench;
 
-  if(simgrid::config::get_value<bool>("smpi/colls-inject-barrier"))
-    simgrid::smpi::colls::barrier(comm);
+  if (simgrid::config::get_value<bool>("smpi/barrier-collectives"))
+    smpi_deployment_startup_barrier(smpi_process()->get_instance_id());
 
   TRACE_smpi_comm_in(pid, request == MPI_REQUEST_IGNORED ? "PMPI_Alltoall" : "PMPI_Ialltoall",
                      new simgrid::instr::CollTIData(
@@ -870,8 +870,8 @@ int PMPI_Ialltoallv(const void* sendbuf, const int* sendcounts, const int* sendd
 
   const SmpiBenchGuard suspend_bench;
 
-  if(simgrid::config::get_value<bool>("smpi/colls-inject-barrier"))
-    simgrid::smpi::colls::barrier(comm);
+  if (simgrid::config::get_value<bool>("smpi/barrier-collectives"))
+    smpi_deployment_startup_barrier(smpi_process()->get_instance_id());
 
   int send_size                      = 0;
   int recv_size                      = 0;
@@ -968,8 +968,8 @@ int PMPI_Ialltoallw(const void* sendbuf, const int* sendcounts, const int* sendd
 
   const SmpiBenchGuard suspend_bench;
 
-  if(simgrid::config::get_value<bool>("smpi/colls-inject-barrier"))
-    simgrid::smpi::colls::barrier(comm);
+  if (simgrid::config::get_value<bool>("smpi/barrier-collectives"))
+    smpi_deployment_startup_barrier(smpi_process()->get_instance_id());
 
   int send_size                      = 0;
   int recv_size                      = 0;
index 2a0ec18..49bd376 100644 (file)
@@ -152,9 +152,6 @@ simgrid::config::Flag<int> _smpi_cfg_list_leaks("smpi/list-leaks",
                                                 "Whether we should display the n first MPI handle leaks (addresses and type only) after simulation",
                                                 -1);
 
-simgrid::config::Flag<bool> _smpi_cfg_colls_inject_barrier{
-  "smpi/colls-inject-barrier", "Inject a barrier in each colllective operation, to detect some deadlocks in incorrect MPI codes, which may not be triggered in all cases", false };
-
 double smpi_cfg_host_speed(){
   return _smpi_cfg_host_speed;
 }
@@ -295,7 +292,12 @@ void smpi_init_options_internal(bool called_by_smpi_main)
   simgrid::config::declare_flag<std::string>(
       "smpi/or", "Small messages timings (MPI_Recv minimum time for small messages)", "0:0:0:0:0");
 
-  simgrid::config::declare_flag<bool>("smpi/finalization-barrier", "Do we add a barrier in MPI_Finalize or not", false);
+  simgrid::config::declare_flag<bool>("smpi/barrier-finalization", {"smpi/finalization-barrier"},
+                                      "Do we add a barrier in MPI_Finalize or not", false);
+  simgrid::config::declare_flag<bool>("smpi/barrier-collectives",
+                                      "Inject a barrier in each colllective operation, to detect some deadlocks in "
+                                      "incorrect MPI codes, which may not be triggered in all cases",
+                                      false);
 
   smpi_options_initialized = true;
 }
index 8c6a3a5..68570b3 100644 (file)
@@ -928,7 +928,7 @@ void smpi_replay_main(int rank, const char* private_trace_filename)
     simgrid::smpi::Request::waitall(count_requests, requests.data(), MPI_STATUSES_IGNORE);
   }
 
-  if(simgrid::config::get_value<bool>("smpi/finalization-barrier"))
+  if (simgrid::config::get_value<bool>("smpi/barrier-finalization"))
     simgrid::smpi::colls::barrier(MPI_COMM_WORLD);
 
   active_processes--;
index 311973c..bbb3a82 100755 (executable)
@@ -233,7 +233,7 @@ while true; do
             shift 1
             ;;
         "-analyze")
-            SIMOPTS="$SIMOPTS --cfg=smpi/display-timing:yes --cfg=smpi/display-allocs:yes --cfg=smpi/list-leaks:50 --cfg=smpi/pedantic:true --cfg=smpi/colls-inject-barrier:true"
+            SIMOPTS="$SIMOPTS --cfg=smpi/display-timing:yes --cfg=smpi/display-allocs:yes --cfg=smpi/list-leaks:50 --cfg=smpi/pedantic:true --cfg=smpi/barrier-collectives:true"
             shift 1
             ;;
         "-help" | "--help" | "-h")
index 9e2f904..23ed59b 100755 (executable)
@@ -30,7 +30,7 @@ simgrid = sg.Tool()
 
 (name, path, binary, filename) = sys.argv
 for test in mbi.parse_one_code(filename):
-    execcmd = test['cmd'].replace("mpirun", f"{path}/smpi_script/bin/smpirun -wrapper '{path}/bin/simgrid-mc --log=mc_safety.t:info' -platform ./cluster.xml -analyze --cfg=smpi/finalization-barrier:on --cfg=smpi/list-leaks:10 --cfg=model-check/max-depth:10000")
+    execcmd = test['cmd'].replace("mpirun", f"{path}/smpi_script/bin/smpirun -wrapper '{path}/bin/simgrid-mc --log=mc_safety.t:info' -platform ./cluster.xml -analyze --cfg=smpi/barrier-finalization:on --cfg=smpi/list-leaks:10 --cfg=model-check/max-depth:10000")
     execcmd = execcmd.replace('${EXE}', binary)
     execcmd = execcmd.replace('$zero_buffer', "--cfg=smpi/buffering:zero")
     execcmd = execcmd.replace('$infty_buffer', "--cfg=smpi/buffering:infty")
index 2119d40..3c1ca88 100644 (file)
@@ -49,7 +49,7 @@ class Tool(mbi.AbstractTool):
                 outfile.write(' <cluster id="acme" prefix="node-" radical="0-99" suffix="" speed="1Gf" bw="125MBps" lat="50us"/>\n')
                 outfile.write('</platform>\n')
 
-        execcmd = execcmd.replace("mpirun", "smpirun -wrapper simgrid-mc -platform ./cluster.xml -analyze --cfg=smpi/finalization-barrier:on --cfg=smpi/list-leaks:10 --cfg=model-check/max-depth:10000")
+        execcmd = execcmd.replace("mpirun", "smpirun -wrapper simgrid-mc -platform ./cluster.xml -analyze --cfg=smpi/barrier-finalization:on --cfg=smpi/list-leaks:10 --cfg=model-check/max-depth:10000")
         execcmd = execcmd.replace('${EXE}', binary)
         execcmd = execcmd.replace('$zero_buffer', "--cfg=smpi/buffering:zero")
         execcmd = execcmd.replace('$infty_buffer', "--cfg=smpi/buffering:infty")
index 11476da..303f73e 100644 (file)
@@ -1,6 +1,6 @@
 p Test dsend
 ! output sort
-$ ${bindir:=.}/../../../smpi_script/bin/smpirun -map -hostfile ${bindir:=.}/../hostfile -platform ${platfdir}/small_platform.xml -np 2 --log=no_loc ${bindir:=.}/pt2pt-dsend -s --long --log=smpi_config.thres:warning --log=xbt_cfg.thres:warning --cfg=smpi/simulate-computation:no --cfg=smpi/finalization-barrier:on
+$ ${bindir:=.}/../../../smpi_script/bin/smpirun -map -hostfile ${bindir:=.}/../hostfile -platform ${platfdir}/small_platform.xml -np 2 --log=no_loc ${bindir:=.}/pt2pt-dsend -s --long --log=smpi_config.thres:warning --log=xbt_cfg.thres:warning --cfg=smpi/simulate-computation:no --cfg=smpi/barrier-finalization:on
 > [Jupiter:1:(2) 0.000000] [dsend/INFO] rank 1: data exchanged
 > [Tremblay:0:(1) 0.005896] [dsend/INFO] rank 0: data exchanged
 > [0.000000] [smpi/INFO] [rank 0] -> Tremblay
@@ -11,7 +11,7 @@ p message size is 4 bytes
 p process 1 will finish at 0.5+2*4 (send) + 1+0.1*4 (isend) = 9.9s
 p process 2 will finish at 0.5+2*4 (time before first send) + 2*(1+0.5*4) (recv+irecv) + 0.005890 (network time, same as before) = 14.505890s
 ! output sort
-$ ${bindir:=.}/../../../smpi_script/bin/smpirun -map -hostfile ${bindir:=.}/../hostfile -platform ${platfdir}/small_platform.xml -np 2 --log=no_loc ${bindir:=.}/pt2pt-dsend -s --long --log=smpi_config.thres:warning --cfg=smpi/or:0:1:0.5 --cfg=smpi/os:0:0.5:2 --cfg=smpi/ois:0:1:0.1 --cfg=smpi/simulate-computation:no --cfg=smpi/finalization-barrier:on --log=xbt_cfg.thres:warning
+$ ${bindir:=.}/../../../smpi_script/bin/smpirun -map -hostfile ${bindir:=.}/../hostfile -platform ${platfdir}/small_platform.xml -np 2 --log=no_loc ${bindir:=.}/pt2pt-dsend -s --long --log=smpi_config.thres:warning --cfg=smpi/or:0:1:0.5 --cfg=smpi/os:0:0.5:2 --cfg=smpi/ois:0:1:0.1 --cfg=smpi/simulate-computation:no --cfg=smpi/barrier-finalization:on --log=xbt_cfg.thres:warning
 > [Jupiter:1:(2) 9.900000] [dsend/INFO] rank 1: data exchanged
 > [Tremblay:0:(1) 14.505896] [dsend/INFO] rank 0: data exchanged
 > [0.000000] [smpi/INFO] [rank 0] -> Tremblay