Upgrade MPICH collective selector to 3.3.

author Augustin Degomme <degomme@wasabi>

Tue, 28 Aug 2018 14:23:24 +0000 (16:23 +0200)

committer Augustin Degomme <degomme@wasabi>

Tue, 28 Aug 2018 15:44:29 +0000 (17:44 +0200)
author Augustin Degomme <degomme@wasabi>
Tue, 28 Aug 2018 14:23:24 +0000 (16:23 +0200)
committer Augustin Degomme <degomme@wasabi>
Tue, 28 Aug 2018 15:44:29 +0000 (17:44 +0200)
diff --git a/src/smpi/colls/barrier/barrier-mpich-smp.cpp b/src/smpi/colls/barrier/barrier-mpich-smp.cpp

new file mode 100644 (file)

index 0000000..6df4962
--- /dev/null
+++ b/src/smpi/colls/barrier/barrier-mpich-smp.cpp
@@ -0,0 +1,62 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *
+ *  (C) 2001 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+
+
+#include "../coll_tuned_topo.hpp"
+#include "../colls_private.hpp"
+
+namespace simgrid{
+namespace smpi{
+int Coll_barrier_mpich_smp::barrier(MPI_Comm comm)
+{
+    int mpi_errno = MPI_SUCCESS;
+    int mpi_errno_ret = MPI_SUCCESS;
+    MPI_Comm shmem_comm = MPI_COMM_NULL, leader_comm = MPI_COMM_NULL;
+    int local_rank = -1;
+    
+    if(comm->get_leaders_comm()==MPI_COMM_NULL){
+      comm->init_smp();
+    }
+
+    shmem_comm = comm->get_intra_comm();
+    local_rank = shmem_comm->rank();
+    /* do the intranode barrier on all nodes */
+    if (shmem_comm != NULL) {
+        mpi_errno = Coll_barrier_mpich::barrier(shmem_comm);
+        if (mpi_errno) {
+          mpi_errno_ret+=mpi_errno;
+        }
+    }
+
+    leader_comm = comm->get_leaders_comm();
+    /* do the barrier across roots of all nodes */
+    if (leader_comm != NULL && local_rank == 0) {
+        mpi_errno = Coll_barrier_mpich::barrier(leader_comm);
+        if (mpi_errno) {
+          mpi_errno_ret+=mpi_errno;
+        }
+    }
+
+    /* release the local processes on each node with a 1-byte
+     * broadcast (0-byte broadcast just returns without doing
+     * anything) */
+    if (shmem_comm != NULL) {
+        int i = 0;
+        mpi_errno = Coll_bcast_mpich::bcast(&i, 1, MPI_BYTE, 0, shmem_comm);
+        if (mpi_errno) {
+          mpi_errno_ret+=mpi_errno;
+        }
+    }
+
+    if (mpi_errno_ret)
+        mpi_errno = mpi_errno_ret;
+    return mpi_errno;
+}
+
+}
+}
+
diff --git a/src/smpi/colls/smpi_default_selector.cpp b/src/smpi/colls/smpi_default_selector.cpp

index 2d96a01..f13079a 100644 (file)
--- a/src/smpi/colls/smpi_default_selector.cpp
+++ b/src/smpi/colls/smpi_default_selector.cpp
@@ -276,9 +276,9 @@ int Coll_reduce_default::reduce(void *sendbuf, void *recvbuf, int count, MPI_Dat
  int Coll_allreduce_default::allreduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
  {
    int ret;
-  ret = Colls::reduce(sendbuf, recvbuf, count, datatype, op, 0, comm);
+  ret = Coll_reduce_default::reduce(sendbuf, recvbuf, count, datatype, op, 0, comm);
    if(ret==MPI_SUCCESS)
-    ret = Colls::bcast(recvbuf, count, datatype, 0, comm);
+    ret = Coll_bcast_default::bcast(recvbuf, count, datatype, 0, comm);
    return ret;
  }
  
diff --git a/src/smpi/colls/smpi_mpich_selector.cpp b/src/smpi/colls/smpi_mpich_selector.cpp

index 89440a1..2aab898 100644 (file)
--- a/src/smpi/colls/smpi_mpich_selector.cpp
+++ b/src/smpi/colls/smpi_mpich_selector.cpp
@@ -68,6 +68,14 @@ int Coll_allreduce_mpich::allreduce(void *sbuf, void *rbuf, int count,
      dsize = dtype->size();
      block_dsize = dsize * count;
  
+    /*MPICH uses SMP algorithms for all commutative ops now*/
+    if(!comm->is_smp_comm()){
+      if(comm->get_leaders_comm()==MPI_COMM_NULL){
+        comm->init_smp();
+      }
+      if(op->is_commutative())
+        return Coll_allreduce_mvapich2_two_level::allreduce (sbuf, rbuf,count, dtype, op, comm);
+    }
  
      /* find nearest power-of-two less than or equal to comm_size */
      int pof2 = 1;
@@ -76,14 +84,10 @@ int Coll_allreduce_mpich::allreduce(void *sbuf, void *rbuf, int count,
  
      if (block_dsize > large_message && count >= pof2 && (op==MPI_OP_NULL || op->is_commutative())) {
        //for long messages
-       return (Coll_allreduce_rab_rdb::allreduce (sbuf, rbuf,
-                                                                   count, dtype,
-                                                                   op, comm));
+       return Coll_allreduce_rab_rdb::allreduce (sbuf, rbuf, count, dtype, op, comm);
      }else {
        //for short ones and count < pof2
-      return (Coll_allreduce_rdb::allreduce (sbuf, rbuf,
-                                                                   count, dtype,
-                                                                   op, comm));
+      return Coll_allreduce_rdb::allreduce (sbuf, rbuf, count, dtype, op, comm);
      }
  }
  
@@ -169,7 +173,7 @@ int Coll_alltoall_mpich::alltoall( void *sbuf, int scount,
                                                      comm);
  
      } else if (block_dsize < medium_size) {
-        return Coll_alltoall_basic_linear::alltoall(sbuf, scount, sdtype,
+        return Coll_alltoall_mvapich2_scatter_dest::alltoall(sbuf, scount, sdtype,
                                                             rbuf, rcount, rdtype,
                                                             comm);
      }else if (communicator_size%2){
@@ -259,6 +263,14 @@ int Coll_bcast_mpich::bcast(void *buff, int count,
      //int segsize = 0;
      size_t message_size, dsize;
  
+    if(!comm->is_smp_comm()){
+      if(comm->get_leaders_comm()==MPI_COMM_NULL){
+        comm->init_smp();
+      }
+      if(comm->is_uniform())
+        return Coll_bcast_SMP_binomial::bcast(buff, count, datatype, root, comm);
+    }
+
      communicator_size = comm->size();
  
      /* else we need data size for decision function */
@@ -348,8 +360,16 @@ int Coll_reduce_mpich::reduce( void *sendbuf, void *recvbuf,
                                              )
  {
      int communicator_size=0;
-    //int segsize = 0;
      size_t message_size, dsize;
+
+    if(!comm->is_smp_comm()){
+      if(comm->get_leaders_comm()==MPI_COMM_NULL){
+        comm->init_smp();
+      }
+      if (op->is_commutative() == 1)
+        return Coll_reduce_mvapich2_two_level::reduce(sendbuf, recvbuf, count, datatype, op, root, comm);
+    }
+
      communicator_size = comm->size();
  
      /* need data size for decision function */
@@ -363,8 +383,7 @@ int Coll_reduce_mpich::reduce( void *sendbuf, void *recvbuf,
      if ((count < pof2) || (message_size < 2048) || (op != MPI_OP_NULL && not op->is_commutative())) {
        return Coll_reduce_binomial::reduce(sendbuf, recvbuf, count, datatype, op, root, comm);
      }
-        return Coll_reduce_scatter_gather::reduce(sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
-                                                     segsize, max_requests*/);
+        return Coll_reduce_scatter_gather::reduce(sendbuf, recvbuf, count, datatype, op, root, comm);
  }
  
  
diff --git a/src/smpi/include/smpi_coll.hpp b/src/smpi/include/smpi_coll.hpp

index 7f468aa..d5fcef4 100644 (file)
--- a/src/smpi/include/smpi_coll.hpp
+++ b/src/smpi/include/smpi_coll.hpp
@@ -398,6 +398,7 @@ COLL_APPLY(action, COLL_BARRIER_SIG, ompi_tree)  COLL_sep \
  COLL_APPLY(action, COLL_BARRIER_SIG, ompi_bruck)  COLL_sep \
  COLL_APPLY(action, COLL_BARRIER_SIG, ompi_recursivedoubling) COLL_sep \
  COLL_APPLY(action, COLL_BARRIER_SIG, ompi_doublering) COLL_sep \
+COLL_APPLY(action, COLL_BARRIER_SIG, mpich_smp)   COLL_sep \
  COLL_APPLY(action, COLL_BARRIER_SIG, mpich)   COLL_sep \
  COLL_APPLY(action, COLL_BARRIER_SIG, mvapich2_pair)   COLL_sep \
  COLL_APPLY(action, COLL_BARRIER_SIG, mvapich2)   COLL_sep \
diff --git a/src/smpi/include/smpi_comm.hpp b/src/smpi/include/smpi_comm.hpp

index 5cb1b7a..6e30cb6 100644 (file)
--- a/src/smpi/include/smpi_comm.hpp
+++ b/src/smpi/include/smpi_comm.hpp
@@ -28,7 +28,7 @@ class Comm : public F2C, public Keyval{
      int is_uniform_;
      int* non_uniform_map_; //set if smp nodes have a different number of processes allocated
      int is_blocked_;// are ranks allocated on the same smp node contiguous ?
-
+    int is_smp_comm_;// set to 0 in case this is already an intra-comm or a leader-comm to avoid recursivity
      std::list<MPI_Win> rma_wins_; // attached windows for synchronization.
  
    public:
@@ -36,7 +36,7 @@ class Comm : public F2C, public Keyval{
      static int keyval_id_;
  
      Comm() = default;
-    Comm(MPI_Group group, MPI_Topology topo);
+    Comm(MPI_Group group, MPI_Topology topo, int smp=0);
      int dup(MPI_Comm* newcomm);
      MPI_Group group();
      MPI_Topology topo();
@@ -51,6 +51,7 @@ class Comm : public F2C, public Keyval{
      MPI_Comm get_intra_comm();
      int is_uniform();
      int is_blocked();
+    int is_smp_comm();
      MPI_Comm split(int color, int key);
      void cleanup_smp();
      void ref();
diff --git a/src/smpi/mpi/smpi_comm.cpp b/src/smpi/mpi/smpi_comm.cpp

index d4ef053..57c61d2 100644 (file)
--- a/src/smpi/mpi/smpi_comm.cpp
+++ b/src/smpi/mpi/smpi_comm.cpp
@@ -29,7 +29,7 @@ namespace smpi{
  std::unordered_map<int, smpi_key_elem> Comm::keyvals_;
  int Comm::keyval_id_=0;
  
-Comm::Comm(MPI_Group group, MPI_Topology topo) : group_(group), topo_(topo)
+Comm::Comm(MPI_Group group, MPI_Topology topo, int smp) : group_(group), topo_(topo),is_smp_comm_(smp)
  {
    refcount_        = 1;
    topoType_        = MPI_INVALID_TOPO;
@@ -169,6 +169,12 @@ int Comm::is_blocked(){
    return is_blocked_;
  }
  
+int Comm::is_smp_comm(){
+  if (this == MPI_COMM_UNINITIALIZED)
+    return smpi_process()->comm_world()->is_smp_comm();
+  return is_smp_comm_;
+}
+
  MPI_Comm Comm::split(int color, int key)
  {
    if (this == MPI_COMM_UNINITIALIZED)
@@ -321,7 +327,7 @@ void Comm::init_smp(){
      }
    }
  
-  MPI_Comm comm_intra = new  Comm(group_intra, nullptr);
+  MPI_Comm comm_intra = new  Comm(group_intra, nullptr, 1);
    leader=min_index;
  
    int* leaders_map = new int[comm_size];
@@ -329,7 +335,7 @@ void Comm::init_smp(){
    std::fill_n(leaders_map, comm_size, 0);
    std::fill_n(leader_list, comm_size, -1);
  
-  Coll_allgather_mpich::allgather(&leader, 1, MPI_INT , leaders_map, 1, MPI_INT, this);
+  Coll_allgather_ring::allgather(&leader, 1, MPI_INT , leaders_map, 1, MPI_INT, this);
  
    if (smpi_privatize_global_variables == SmpiPrivStrategies::MMAP) {
      // we need to switch as the called function may silently touch global variables
@@ -363,7 +369,7 @@ void Comm::init_smp(){
      //create leader_communicator
      for (i=0; i< leader_group_size;i++)
        leaders_group->set_mapping(simgrid::s4u::Actor::by_pid(leader_list[i]), i);
-    leader_comm = new  Comm(leaders_group, nullptr);
+    leader_comm = new  Comm(leaders_group, nullptr,1);
      this->set_leaders_comm(leader_comm);
      this->set_intra_comm(comm_intra);
  
@@ -373,7 +379,7 @@ void Comm::init_smp(){
        leaders_group->set_mapping(simgrid::s4u::Actor::by_pid(leader_list[i]), i);
  
      if(this->get_leaders_comm()==MPI_COMM_NULL){
-      leader_comm = new  Comm(leaders_group, nullptr);
+      leader_comm = new  Comm(leaders_group, nullptr,1);
        this->set_leaders_comm(leader_comm);
      }else{
        leader_comm=this->get_leaders_comm();
@@ -387,7 +393,7 @@ void Comm::init_smp(){
    if(comm_intra->rank()==0) {
      int is_uniform       = 1;
      int* non_uniform_map = xbt_new0(int,leader_group_size);
-    Coll_allgather_mpich::allgather(&my_local_size, 1, MPI_INT,
+    Coll_allgather_ring::allgather(&my_local_size, 1, MPI_INT,
          non_uniform_map, 1, MPI_INT, leader_comm);
      for(i=0; i < leader_group_size; i++) {
        if(non_uniform_map[0] != non_uniform_map[i]) {
@@ -402,7 +408,7 @@ void Comm::init_smp(){
      }
      is_uniform_=is_uniform;
    }
-  Coll_bcast_mpich::bcast(&(is_uniform_),1, MPI_INT, 0, comm_intra );
+  Coll_bcast_scatter_LR_allgather::bcast(&(is_uniform_),1, MPI_INT, 0, comm_intra );
  
    if (smpi_privatize_global_variables == SmpiPrivStrategies::MMAP) {
      // we need to switch as the called function may silently touch global variables
diff --git a/teshsuite/smpi/CMakeLists.txt b/teshsuite/smpi/CMakeLists.txt

index 97e081c..85bf920 100644 (file)
--- a/teshsuite/smpi/CMakeLists.txt
+++ b/teshsuite/smpi/CMakeLists.txt
@@ -93,7 +93,7 @@ if(enable_smpi)
      ADD_TESH(tesh-smpi-coll-alltoallv-${ALLTOALLV} --cfg smpi/alltoallv:${ALLTOALLV} --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/coll-alltoallv --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/coll-alltoallv coll-alltoallv.tesh)
    endforeach()
  
-  foreach (BARRIER ompi mpich ompi_basic_linear ompi_tree ompi_bruck ompi_recursivedoubling ompi_doublering mvapich2_pair mvapich2 impi)
+  foreach (BARRIER ompi mpich mpich_smp ompi_basic_linear ompi_tree ompi_bruck ompi_recursivedoubling ompi_doublering mvapich2_pair mvapich2 impi)
        ADD_TESH(tesh-smpi-coll-barrier-${BARRIER} --cfg smpi/barrier:${BARRIER} --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/coll-barrier --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/coll-barrier coll-barrier.tesh)
    endforeach()
  
diff --git a/tools/cmake/DefinePackages.cmake b/tools/cmake/DefinePackages.cmake

index f9c2dab..1ce6888 100644 (file)
--- a/tools/cmake/DefinePackages.cmake
+++ b/tools/cmake/DefinePackages.cmake
@@ -173,6 +173,7 @@ set(SMPI_SRC
    src/smpi/colls/alltoallv/alltoallv-ring.cpp
    src/smpi/colls/barrier/barrier-ompi.cpp
    src/smpi/colls/barrier/barrier-mvapich2-pair.cpp
+  src/smpi/colls/barrier/barrier-mpich-smp.cpp
    src/smpi/colls/bcast/bcast-NTSB.cpp
    src/smpi/colls/bcast/bcast-NTSL-Isend.cpp
    src/smpi/colls/bcast/bcast-NTSL.cpp
author	Augustin Degomme <degomme@wasabi>
	Tue, 28 Aug 2018 14:23:24 +0000 (16:23 +0200)
committer	Augustin Degomme <degomme@wasabi>
	Tue, 28 Aug 2018 15:44:29 +0000 (17:44 +0200)
src/smpi/colls/barrier/barrier-mpich-smp.cpp	[new file with mode: 0644]	patch \| blob
src/smpi/colls/smpi_default_selector.cpp		patch \| blob \| history
src/smpi/colls/smpi_mpich_selector.cpp		patch \| blob \| history
src/smpi/include/smpi_coll.hpp		patch \| blob \| history
src/smpi/include/smpi_comm.hpp		patch \| blob \| history
src/smpi/mpi/smpi_comm.cpp		patch \| blob \| history
teshsuite/smpi/CMakeLists.txt		patch \| blob \| history
tools/cmake/DefinePackages.cmake		patch \| blob \| history