Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Upgrade MPICH collective selector to 3.3.
authorAugustin Degomme <degomme@wasabi>
Tue, 28 Aug 2018 14:23:24 +0000 (16:23 +0200)
committerAugustin Degomme <degomme@wasabi>
Tue, 28 Aug 2018 15:44:29 +0000 (17:44 +0200)
Add SMP variants of some algorithms, and protect against side effects.

src/smpi/colls/barrier/barrier-mpich-smp.cpp [new file with mode: 0644]
src/smpi/colls/smpi_default_selector.cpp
src/smpi/colls/smpi_mpich_selector.cpp
src/smpi/include/smpi_coll.hpp
src/smpi/include/smpi_comm.hpp
src/smpi/mpi/smpi_comm.cpp
teshsuite/smpi/CMakeLists.txt
tools/cmake/DefinePackages.cmake

diff --git a/src/smpi/colls/barrier/barrier-mpich-smp.cpp b/src/smpi/colls/barrier/barrier-mpich-smp.cpp
new file mode 100644 (file)
index 0000000..6df4962
--- /dev/null
@@ -0,0 +1,62 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *
+ *  (C) 2001 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+
+
+#include "../coll_tuned_topo.hpp"
+#include "../colls_private.hpp"
+
+namespace simgrid{
+namespace smpi{
+int Coll_barrier_mpich_smp::barrier(MPI_Comm comm)
+{
+    int mpi_errno = MPI_SUCCESS;
+    int mpi_errno_ret = MPI_SUCCESS;
+    MPI_Comm shmem_comm = MPI_COMM_NULL, leader_comm = MPI_COMM_NULL;
+    int local_rank = -1;
+    
+    if(comm->get_leaders_comm()==MPI_COMM_NULL){
+      comm->init_smp();
+    }
+
+    shmem_comm = comm->get_intra_comm();
+    local_rank = shmem_comm->rank();
+    /* do the intranode barrier on all nodes */
+    if (shmem_comm != NULL) {
+        mpi_errno = Coll_barrier_mpich::barrier(shmem_comm);
+        if (mpi_errno) {
+          mpi_errno_ret+=mpi_errno;
+        }
+    }
+
+    leader_comm = comm->get_leaders_comm();
+    /* do the barrier across roots of all nodes */
+    if (leader_comm != NULL && local_rank == 0) {
+        mpi_errno = Coll_barrier_mpich::barrier(leader_comm);
+        if (mpi_errno) {
+          mpi_errno_ret+=mpi_errno;
+        }
+    }
+
+    /* release the local processes on each node with a 1-byte
+     * broadcast (0-byte broadcast just returns without doing
+     * anything) */
+    if (shmem_comm != NULL) {
+        int i = 0;
+        mpi_errno = Coll_bcast_mpich::bcast(&i, 1, MPI_BYTE, 0, shmem_comm);
+        if (mpi_errno) {
+          mpi_errno_ret+=mpi_errno;
+        }
+    }
+
+    if (mpi_errno_ret)
+        mpi_errno = mpi_errno_ret;
+    return mpi_errno;
+}
+
+}
+}
+
index 2d96a01..f13079a 100644 (file)
@@ -276,9 +276,9 @@ int Coll_reduce_default::reduce(void *sendbuf, void *recvbuf, int count, MPI_Dat
 int Coll_allreduce_default::allreduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
 {
   int ret;
 int Coll_allreduce_default::allreduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
 {
   int ret;
-  ret = Colls::reduce(sendbuf, recvbuf, count, datatype, op, 0, comm);
+  ret = Coll_reduce_default::reduce(sendbuf, recvbuf, count, datatype, op, 0, comm);
   if(ret==MPI_SUCCESS)
   if(ret==MPI_SUCCESS)
-    ret = Colls::bcast(recvbuf, count, datatype, 0, comm);
+    ret = Coll_bcast_default::bcast(recvbuf, count, datatype, 0, comm);
   return ret;
 }
 
   return ret;
 }
 
index 89440a1..2aab898 100644 (file)
@@ -68,6 +68,14 @@ int Coll_allreduce_mpich::allreduce(void *sbuf, void *rbuf, int count,
     dsize = dtype->size();
     block_dsize = dsize * count;
 
     dsize = dtype->size();
     block_dsize = dsize * count;
 
+    /*MPICH uses SMP algorithms for all commutative ops now*/
+    if(!comm->is_smp_comm()){
+      if(comm->get_leaders_comm()==MPI_COMM_NULL){
+        comm->init_smp();
+      }
+      if(op->is_commutative())
+        return Coll_allreduce_mvapich2_two_level::allreduce (sbuf, rbuf,count, dtype, op, comm);
+    }
 
     /* find nearest power-of-two less than or equal to comm_size */
     int pof2 = 1;
 
     /* find nearest power-of-two less than or equal to comm_size */
     int pof2 = 1;
@@ -76,14 +84,10 @@ int Coll_allreduce_mpich::allreduce(void *sbuf, void *rbuf, int count,
 
     if (block_dsize > large_message && count >= pof2 && (op==MPI_OP_NULL || op->is_commutative())) {
       //for long messages
 
     if (block_dsize > large_message && count >= pof2 && (op==MPI_OP_NULL || op->is_commutative())) {
       //for long messages
-       return (Coll_allreduce_rab_rdb::allreduce (sbuf, rbuf,
-                                                                   count, dtype,
-                                                                   op, comm));
+       return Coll_allreduce_rab_rdb::allreduce (sbuf, rbuf, count, dtype, op, comm);
     }else {
       //for short ones and count < pof2
     }else {
       //for short ones and count < pof2
-      return (Coll_allreduce_rdb::allreduce (sbuf, rbuf,
-                                                                   count, dtype,
-                                                                   op, comm));
+      return Coll_allreduce_rdb::allreduce (sbuf, rbuf, count, dtype, op, comm);
     }
 }
 
     }
 }
 
@@ -169,7 +173,7 @@ int Coll_alltoall_mpich::alltoall( void *sbuf, int scount,
                                                     comm);
 
     } else if (block_dsize < medium_size) {
                                                     comm);
 
     } else if (block_dsize < medium_size) {
-        return Coll_alltoall_basic_linear::alltoall(sbuf, scount, sdtype,
+        return Coll_alltoall_mvapich2_scatter_dest::alltoall(sbuf, scount, sdtype,
                                                            rbuf, rcount, rdtype,
                                                            comm);
     }else if (communicator_size%2){
                                                            rbuf, rcount, rdtype,
                                                            comm);
     }else if (communicator_size%2){
@@ -259,6 +263,14 @@ int Coll_bcast_mpich::bcast(void *buff, int count,
     //int segsize = 0;
     size_t message_size, dsize;
 
     //int segsize = 0;
     size_t message_size, dsize;
 
+    if(!comm->is_smp_comm()){
+      if(comm->get_leaders_comm()==MPI_COMM_NULL){
+        comm->init_smp();
+      }
+      if(comm->is_uniform())
+        return Coll_bcast_SMP_binomial::bcast(buff, count, datatype, root, comm);
+    }
+
     communicator_size = comm->size();
 
     /* else we need data size for decision function */
     communicator_size = comm->size();
 
     /* else we need data size for decision function */
@@ -348,8 +360,16 @@ int Coll_reduce_mpich::reduce( void *sendbuf, void *recvbuf,
                                             )
 {
     int communicator_size=0;
                                             )
 {
     int communicator_size=0;
-    //int segsize = 0;
     size_t message_size, dsize;
     size_t message_size, dsize;
+
+    if(!comm->is_smp_comm()){
+      if(comm->get_leaders_comm()==MPI_COMM_NULL){
+        comm->init_smp();
+      }
+      if (op->is_commutative() == 1)
+        return Coll_reduce_mvapich2_two_level::reduce(sendbuf, recvbuf, count, datatype, op, root, comm);
+    }
+
     communicator_size = comm->size();
 
     /* need data size for decision function */
     communicator_size = comm->size();
 
     /* need data size for decision function */
@@ -363,8 +383,7 @@ int Coll_reduce_mpich::reduce( void *sendbuf, void *recvbuf,
     if ((count < pof2) || (message_size < 2048) || (op != MPI_OP_NULL && not op->is_commutative())) {
       return Coll_reduce_binomial::reduce(sendbuf, recvbuf, count, datatype, op, root, comm);
     }
     if ((count < pof2) || (message_size < 2048) || (op != MPI_OP_NULL && not op->is_commutative())) {
       return Coll_reduce_binomial::reduce(sendbuf, recvbuf, count, datatype, op, root, comm);
     }
-        return Coll_reduce_scatter_gather::reduce(sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
-                                                     segsize, max_requests*/);
+        return Coll_reduce_scatter_gather::reduce(sendbuf, recvbuf, count, datatype, op, root, comm);
 }
 
 
 }
 
 
index 7f468aa..d5fcef4 100644 (file)
@@ -398,6 +398,7 @@ COLL_APPLY(action, COLL_BARRIER_SIG, ompi_tree)  COLL_sep \
 COLL_APPLY(action, COLL_BARRIER_SIG, ompi_bruck)  COLL_sep \
 COLL_APPLY(action, COLL_BARRIER_SIG, ompi_recursivedoubling) COLL_sep \
 COLL_APPLY(action, COLL_BARRIER_SIG, ompi_doublering) COLL_sep \
 COLL_APPLY(action, COLL_BARRIER_SIG, ompi_bruck)  COLL_sep \
 COLL_APPLY(action, COLL_BARRIER_SIG, ompi_recursivedoubling) COLL_sep \
 COLL_APPLY(action, COLL_BARRIER_SIG, ompi_doublering) COLL_sep \
+COLL_APPLY(action, COLL_BARRIER_SIG, mpich_smp)   COLL_sep \
 COLL_APPLY(action, COLL_BARRIER_SIG, mpich)   COLL_sep \
 COLL_APPLY(action, COLL_BARRIER_SIG, mvapich2_pair)   COLL_sep \
 COLL_APPLY(action, COLL_BARRIER_SIG, mvapich2)   COLL_sep \
 COLL_APPLY(action, COLL_BARRIER_SIG, mpich)   COLL_sep \
 COLL_APPLY(action, COLL_BARRIER_SIG, mvapich2_pair)   COLL_sep \
 COLL_APPLY(action, COLL_BARRIER_SIG, mvapich2)   COLL_sep \
index 5cb1b7a..6e30cb6 100644 (file)
@@ -28,7 +28,7 @@ class Comm : public F2C, public Keyval{
     int is_uniform_;
     int* non_uniform_map_; //set if smp nodes have a different number of processes allocated
     int is_blocked_;// are ranks allocated on the same smp node contiguous ?
     int is_uniform_;
     int* non_uniform_map_; //set if smp nodes have a different number of processes allocated
     int is_blocked_;// are ranks allocated on the same smp node contiguous ?
-
+    int is_smp_comm_;// set to 0 in case this is already an intra-comm or a leader-comm to avoid recursivity
     std::list<MPI_Win> rma_wins_; // attached windows for synchronization.
 
   public:
     std::list<MPI_Win> rma_wins_; // attached windows for synchronization.
 
   public:
@@ -36,7 +36,7 @@ class Comm : public F2C, public Keyval{
     static int keyval_id_;
 
     Comm() = default;
     static int keyval_id_;
 
     Comm() = default;
-    Comm(MPI_Group group, MPI_Topology topo);
+    Comm(MPI_Group group, MPI_Topology topo, int smp=0);
     int dup(MPI_Comm* newcomm);
     MPI_Group group();
     MPI_Topology topo();
     int dup(MPI_Comm* newcomm);
     MPI_Group group();
     MPI_Topology topo();
@@ -51,6 +51,7 @@ class Comm : public F2C, public Keyval{
     MPI_Comm get_intra_comm();
     int is_uniform();
     int is_blocked();
     MPI_Comm get_intra_comm();
     int is_uniform();
     int is_blocked();
+    int is_smp_comm();
     MPI_Comm split(int color, int key);
     void cleanup_smp();
     void ref();
     MPI_Comm split(int color, int key);
     void cleanup_smp();
     void ref();
index d4ef053..57c61d2 100644 (file)
@@ -29,7 +29,7 @@ namespace smpi{
 std::unordered_map<int, smpi_key_elem> Comm::keyvals_;
 int Comm::keyval_id_=0;
 
 std::unordered_map<int, smpi_key_elem> Comm::keyvals_;
 int Comm::keyval_id_=0;
 
-Comm::Comm(MPI_Group group, MPI_Topology topo) : group_(group), topo_(topo)
+Comm::Comm(MPI_Group group, MPI_Topology topo, int smp) : group_(group), topo_(topo),is_smp_comm_(smp)
 {
   refcount_        = 1;
   topoType_        = MPI_INVALID_TOPO;
 {
   refcount_        = 1;
   topoType_        = MPI_INVALID_TOPO;
@@ -169,6 +169,12 @@ int Comm::is_blocked(){
   return is_blocked_;
 }
 
   return is_blocked_;
 }
 
+int Comm::is_smp_comm(){
+  if (this == MPI_COMM_UNINITIALIZED)
+    return smpi_process()->comm_world()->is_smp_comm();
+  return is_smp_comm_;
+}
+
 MPI_Comm Comm::split(int color, int key)
 {
   if (this == MPI_COMM_UNINITIALIZED)
 MPI_Comm Comm::split(int color, int key)
 {
   if (this == MPI_COMM_UNINITIALIZED)
@@ -321,7 +327,7 @@ void Comm::init_smp(){
     }
   }
 
     }
   }
 
-  MPI_Comm comm_intra = new  Comm(group_intra, nullptr);
+  MPI_Comm comm_intra = new  Comm(group_intra, nullptr, 1);
   leader=min_index;
 
   int* leaders_map = new int[comm_size];
   leader=min_index;
 
   int* leaders_map = new int[comm_size];
@@ -329,7 +335,7 @@ void Comm::init_smp(){
   std::fill_n(leaders_map, comm_size, 0);
   std::fill_n(leader_list, comm_size, -1);
 
   std::fill_n(leaders_map, comm_size, 0);
   std::fill_n(leader_list, comm_size, -1);
 
-  Coll_allgather_mpich::allgather(&leader, 1, MPI_INT , leaders_map, 1, MPI_INT, this);
+  Coll_allgather_ring::allgather(&leader, 1, MPI_INT , leaders_map, 1, MPI_INT, this);
 
   if (smpi_privatize_global_variables == SmpiPrivStrategies::MMAP) {
     // we need to switch as the called function may silently touch global variables
 
   if (smpi_privatize_global_variables == SmpiPrivStrategies::MMAP) {
     // we need to switch as the called function may silently touch global variables
@@ -363,7 +369,7 @@ void Comm::init_smp(){
     //create leader_communicator
     for (i=0; i< leader_group_size;i++)
       leaders_group->set_mapping(simgrid::s4u::Actor::by_pid(leader_list[i]), i);
     //create leader_communicator
     for (i=0; i< leader_group_size;i++)
       leaders_group->set_mapping(simgrid::s4u::Actor::by_pid(leader_list[i]), i);
-    leader_comm = new  Comm(leaders_group, nullptr);
+    leader_comm = new  Comm(leaders_group, nullptr,1);
     this->set_leaders_comm(leader_comm);
     this->set_intra_comm(comm_intra);
 
     this->set_leaders_comm(leader_comm);
     this->set_intra_comm(comm_intra);
 
@@ -373,7 +379,7 @@ void Comm::init_smp(){
       leaders_group->set_mapping(simgrid::s4u::Actor::by_pid(leader_list[i]), i);
 
     if(this->get_leaders_comm()==MPI_COMM_NULL){
       leaders_group->set_mapping(simgrid::s4u::Actor::by_pid(leader_list[i]), i);
 
     if(this->get_leaders_comm()==MPI_COMM_NULL){
-      leader_comm = new  Comm(leaders_group, nullptr);
+      leader_comm = new  Comm(leaders_group, nullptr,1);
       this->set_leaders_comm(leader_comm);
     }else{
       leader_comm=this->get_leaders_comm();
       this->set_leaders_comm(leader_comm);
     }else{
       leader_comm=this->get_leaders_comm();
@@ -387,7 +393,7 @@ void Comm::init_smp(){
   if(comm_intra->rank()==0) {
     int is_uniform       = 1;
     int* non_uniform_map = xbt_new0(int,leader_group_size);
   if(comm_intra->rank()==0) {
     int is_uniform       = 1;
     int* non_uniform_map = xbt_new0(int,leader_group_size);
-    Coll_allgather_mpich::allgather(&my_local_size, 1, MPI_INT,
+    Coll_allgather_ring::allgather(&my_local_size, 1, MPI_INT,
         non_uniform_map, 1, MPI_INT, leader_comm);
     for(i=0; i < leader_group_size; i++) {
       if(non_uniform_map[0] != non_uniform_map[i]) {
         non_uniform_map, 1, MPI_INT, leader_comm);
     for(i=0; i < leader_group_size; i++) {
       if(non_uniform_map[0] != non_uniform_map[i]) {
@@ -402,7 +408,7 @@ void Comm::init_smp(){
     }
     is_uniform_=is_uniform;
   }
     }
     is_uniform_=is_uniform;
   }
-  Coll_bcast_mpich::bcast(&(is_uniform_),1, MPI_INT, 0, comm_intra );
+  Coll_bcast_scatter_LR_allgather::bcast(&(is_uniform_),1, MPI_INT, 0, comm_intra );
 
   if (smpi_privatize_global_variables == SmpiPrivStrategies::MMAP) {
     // we need to switch as the called function may silently touch global variables
 
   if (smpi_privatize_global_variables == SmpiPrivStrategies::MMAP) {
     // we need to switch as the called function may silently touch global variables
index 97e081c..85bf920 100644 (file)
@@ -93,7 +93,7 @@ if(enable_smpi)
     ADD_TESH(tesh-smpi-coll-alltoallv-${ALLTOALLV} --cfg smpi/alltoallv:${ALLTOALLV} --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/coll-alltoallv --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/coll-alltoallv coll-alltoallv.tesh)
   endforeach()
 
     ADD_TESH(tesh-smpi-coll-alltoallv-${ALLTOALLV} --cfg smpi/alltoallv:${ALLTOALLV} --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/coll-alltoallv --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/coll-alltoallv coll-alltoallv.tesh)
   endforeach()
 
-  foreach (BARRIER ompi mpich ompi_basic_linear ompi_tree ompi_bruck ompi_recursivedoubling ompi_doublering mvapich2_pair mvapich2 impi)
+  foreach (BARRIER ompi mpich mpich_smp ompi_basic_linear ompi_tree ompi_bruck ompi_recursivedoubling ompi_doublering mvapich2_pair mvapich2 impi)
       ADD_TESH(tesh-smpi-coll-barrier-${BARRIER} --cfg smpi/barrier:${BARRIER} --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/coll-barrier --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/coll-barrier coll-barrier.tesh)
   endforeach()
 
       ADD_TESH(tesh-smpi-coll-barrier-${BARRIER} --cfg smpi/barrier:${BARRIER} --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/coll-barrier --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/coll-barrier coll-barrier.tesh)
   endforeach()
 
index f9c2dab..1ce6888 100644 (file)
@@ -173,6 +173,7 @@ set(SMPI_SRC
   src/smpi/colls/alltoallv/alltoallv-ring.cpp
   src/smpi/colls/barrier/barrier-ompi.cpp
   src/smpi/colls/barrier/barrier-mvapich2-pair.cpp
   src/smpi/colls/alltoallv/alltoallv-ring.cpp
   src/smpi/colls/barrier/barrier-ompi.cpp
   src/smpi/colls/barrier/barrier-mvapich2-pair.cpp
+  src/smpi/colls/barrier/barrier-mpich-smp.cpp
   src/smpi/colls/bcast/bcast-NTSB.cpp
   src/smpi/colls/bcast/bcast-NTSL-Isend.cpp
   src/smpi/colls/bcast/bcast-NTSL.cpp
   src/smpi/colls/bcast/bcast-NTSB.cpp
   src/smpi/colls/bcast/bcast-NTSL-Isend.cpp
   src/smpi/colls/bcast/bcast-NTSL.cpp