From: Augustin Degomme Date: Tue, 28 Aug 2018 14:23:24 +0000 (+0200) Subject: Upgrade MPICH collective selector to 3.3. X-Git-Tag: v3_21~150 X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/commitdiff_plain/c5544435bde9e10652726e77a8da39298d5064be Upgrade MPICH collective selector to 3.3. Add SMP variants of some algorithms, and protect against side effects. --- diff --git a/src/smpi/colls/barrier/barrier-mpich-smp.cpp b/src/smpi/colls/barrier/barrier-mpich-smp.cpp new file mode 100644 index 0000000000..6df4962690 --- /dev/null +++ b/src/smpi/colls/barrier/barrier-mpich-smp.cpp @@ -0,0 +1,62 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ +/* + * + * (C) 2001 by Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + + +#include "../coll_tuned_topo.hpp" +#include "../colls_private.hpp" + +namespace simgrid{ +namespace smpi{ +int Coll_barrier_mpich_smp::barrier(MPI_Comm comm) +{ + int mpi_errno = MPI_SUCCESS; + int mpi_errno_ret = MPI_SUCCESS; + MPI_Comm shmem_comm = MPI_COMM_NULL, leader_comm = MPI_COMM_NULL; + int local_rank = -1; + + if(comm->get_leaders_comm()==MPI_COMM_NULL){ + comm->init_smp(); + } + + shmem_comm = comm->get_intra_comm(); + local_rank = shmem_comm->rank(); + /* do the intranode barrier on all nodes */ + if (shmem_comm != NULL) { + mpi_errno = Coll_barrier_mpich::barrier(shmem_comm); + if (mpi_errno) { + mpi_errno_ret+=mpi_errno; + } + } + + leader_comm = comm->get_leaders_comm(); + /* do the barrier across roots of all nodes */ + if (leader_comm != NULL && local_rank == 0) { + mpi_errno = Coll_barrier_mpich::barrier(leader_comm); + if (mpi_errno) { + mpi_errno_ret+=mpi_errno; + } + } + + /* release the local processes on each node with a 1-byte + * broadcast (0-byte broadcast just returns without doing + * anything) */ + if (shmem_comm != NULL) { + int i = 0; + mpi_errno = Coll_bcast_mpich::bcast(&i, 1, MPI_BYTE, 0, shmem_comm); + if (mpi_errno) { + mpi_errno_ret+=mpi_errno; + } + } + + if (mpi_errno_ret) + mpi_errno = mpi_errno_ret; + return mpi_errno; +} + +} +} + diff --git a/src/smpi/colls/smpi_default_selector.cpp b/src/smpi/colls/smpi_default_selector.cpp index 2d96a01764..f13079a2fc 100644 --- a/src/smpi/colls/smpi_default_selector.cpp +++ b/src/smpi/colls/smpi_default_selector.cpp @@ -276,9 +276,9 @@ int Coll_reduce_default::reduce(void *sendbuf, void *recvbuf, int count, MPI_Dat int Coll_allreduce_default::allreduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) { int ret; - ret = Colls::reduce(sendbuf, recvbuf, count, datatype, op, 0, comm); + ret = Coll_reduce_default::reduce(sendbuf, recvbuf, count, datatype, op, 0, comm); if(ret==MPI_SUCCESS) - ret = Colls::bcast(recvbuf, count, datatype, 0, comm); + ret = Coll_bcast_default::bcast(recvbuf, count, datatype, 0, comm); return ret; } diff --git a/src/smpi/colls/smpi_mpich_selector.cpp b/src/smpi/colls/smpi_mpich_selector.cpp index 89440a13e6..2aab898f87 100644 --- a/src/smpi/colls/smpi_mpich_selector.cpp +++ b/src/smpi/colls/smpi_mpich_selector.cpp @@ -68,6 +68,14 @@ int Coll_allreduce_mpich::allreduce(void *sbuf, void *rbuf, int count, dsize = dtype->size(); block_dsize = dsize * count; + /*MPICH uses SMP algorithms for all commutative ops now*/ + if(!comm->is_smp_comm()){ + if(comm->get_leaders_comm()==MPI_COMM_NULL){ + comm->init_smp(); + } + if(op->is_commutative()) + return Coll_allreduce_mvapich2_two_level::allreduce (sbuf, rbuf,count, dtype, op, comm); + } /* find nearest power-of-two less than or equal to comm_size */ int pof2 = 1; @@ -76,14 +84,10 @@ int Coll_allreduce_mpich::allreduce(void *sbuf, void *rbuf, int count, if (block_dsize > large_message && count >= pof2 && (op==MPI_OP_NULL || op->is_commutative())) { //for long messages - return (Coll_allreduce_rab_rdb::allreduce (sbuf, rbuf, - count, dtype, - op, comm)); + return Coll_allreduce_rab_rdb::allreduce (sbuf, rbuf, count, dtype, op, comm); }else { //for short ones and count < pof2 - return (Coll_allreduce_rdb::allreduce (sbuf, rbuf, - count, dtype, - op, comm)); + return Coll_allreduce_rdb::allreduce (sbuf, rbuf, count, dtype, op, comm); } } @@ -169,7 +173,7 @@ int Coll_alltoall_mpich::alltoall( void *sbuf, int scount, comm); } else if (block_dsize < medium_size) { - return Coll_alltoall_basic_linear::alltoall(sbuf, scount, sdtype, + return Coll_alltoall_mvapich2_scatter_dest::alltoall(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); }else if (communicator_size%2){ @@ -259,6 +263,14 @@ int Coll_bcast_mpich::bcast(void *buff, int count, //int segsize = 0; size_t message_size, dsize; + if(!comm->is_smp_comm()){ + if(comm->get_leaders_comm()==MPI_COMM_NULL){ + comm->init_smp(); + } + if(comm->is_uniform()) + return Coll_bcast_SMP_binomial::bcast(buff, count, datatype, root, comm); + } + communicator_size = comm->size(); /* else we need data size for decision function */ @@ -348,8 +360,16 @@ int Coll_reduce_mpich::reduce( void *sendbuf, void *recvbuf, ) { int communicator_size=0; - //int segsize = 0; size_t message_size, dsize; + + if(!comm->is_smp_comm()){ + if(comm->get_leaders_comm()==MPI_COMM_NULL){ + comm->init_smp(); + } + if (op->is_commutative() == 1) + return Coll_reduce_mvapich2_two_level::reduce(sendbuf, recvbuf, count, datatype, op, root, comm); + } + communicator_size = comm->size(); /* need data size for decision function */ @@ -363,8 +383,7 @@ int Coll_reduce_mpich::reduce( void *sendbuf, void *recvbuf, if ((count < pof2) || (message_size < 2048) || (op != MPI_OP_NULL && not op->is_commutative())) { return Coll_reduce_binomial::reduce(sendbuf, recvbuf, count, datatype, op, root, comm); } - return Coll_reduce_scatter_gather::reduce(sendbuf, recvbuf, count, datatype, op, root, comm/*, module, - segsize, max_requests*/); + return Coll_reduce_scatter_gather::reduce(sendbuf, recvbuf, count, datatype, op, root, comm); } diff --git a/src/smpi/include/smpi_coll.hpp b/src/smpi/include/smpi_coll.hpp index 7f468aaa5b..d5fcef4ceb 100644 --- a/src/smpi/include/smpi_coll.hpp +++ b/src/smpi/include/smpi_coll.hpp @@ -398,6 +398,7 @@ COLL_APPLY(action, COLL_BARRIER_SIG, ompi_tree) COLL_sep \ COLL_APPLY(action, COLL_BARRIER_SIG, ompi_bruck) COLL_sep \ COLL_APPLY(action, COLL_BARRIER_SIG, ompi_recursivedoubling) COLL_sep \ COLL_APPLY(action, COLL_BARRIER_SIG, ompi_doublering) COLL_sep \ +COLL_APPLY(action, COLL_BARRIER_SIG, mpich_smp) COLL_sep \ COLL_APPLY(action, COLL_BARRIER_SIG, mpich) COLL_sep \ COLL_APPLY(action, COLL_BARRIER_SIG, mvapich2_pair) COLL_sep \ COLL_APPLY(action, COLL_BARRIER_SIG, mvapich2) COLL_sep \ diff --git a/src/smpi/include/smpi_comm.hpp b/src/smpi/include/smpi_comm.hpp index 5cb1b7a7ff..6e30cb62fa 100644 --- a/src/smpi/include/smpi_comm.hpp +++ b/src/smpi/include/smpi_comm.hpp @@ -28,7 +28,7 @@ class Comm : public F2C, public Keyval{ int is_uniform_; int* non_uniform_map_; //set if smp nodes have a different number of processes allocated int is_blocked_;// are ranks allocated on the same smp node contiguous ? - + int is_smp_comm_;// set to 0 in case this is already an intra-comm or a leader-comm to avoid recursivity std::list rma_wins_; // attached windows for synchronization. public: @@ -36,7 +36,7 @@ class Comm : public F2C, public Keyval{ static int keyval_id_; Comm() = default; - Comm(MPI_Group group, MPI_Topology topo); + Comm(MPI_Group group, MPI_Topology topo, int smp=0); int dup(MPI_Comm* newcomm); MPI_Group group(); MPI_Topology topo(); @@ -51,6 +51,7 @@ class Comm : public F2C, public Keyval{ MPI_Comm get_intra_comm(); int is_uniform(); int is_blocked(); + int is_smp_comm(); MPI_Comm split(int color, int key); void cleanup_smp(); void ref(); diff --git a/src/smpi/mpi/smpi_comm.cpp b/src/smpi/mpi/smpi_comm.cpp index d4ef0530d0..57c61d292b 100644 --- a/src/smpi/mpi/smpi_comm.cpp +++ b/src/smpi/mpi/smpi_comm.cpp @@ -29,7 +29,7 @@ namespace smpi{ std::unordered_map Comm::keyvals_; int Comm::keyval_id_=0; -Comm::Comm(MPI_Group group, MPI_Topology topo) : group_(group), topo_(topo) +Comm::Comm(MPI_Group group, MPI_Topology topo, int smp) : group_(group), topo_(topo),is_smp_comm_(smp) { refcount_ = 1; topoType_ = MPI_INVALID_TOPO; @@ -169,6 +169,12 @@ int Comm::is_blocked(){ return is_blocked_; } +int Comm::is_smp_comm(){ + if (this == MPI_COMM_UNINITIALIZED) + return smpi_process()->comm_world()->is_smp_comm(); + return is_smp_comm_; +} + MPI_Comm Comm::split(int color, int key) { if (this == MPI_COMM_UNINITIALIZED) @@ -321,7 +327,7 @@ void Comm::init_smp(){ } } - MPI_Comm comm_intra = new Comm(group_intra, nullptr); + MPI_Comm comm_intra = new Comm(group_intra, nullptr, 1); leader=min_index; int* leaders_map = new int[comm_size]; @@ -329,7 +335,7 @@ void Comm::init_smp(){ std::fill_n(leaders_map, comm_size, 0); std::fill_n(leader_list, comm_size, -1); - Coll_allgather_mpich::allgather(&leader, 1, MPI_INT , leaders_map, 1, MPI_INT, this); + Coll_allgather_ring::allgather(&leader, 1, MPI_INT , leaders_map, 1, MPI_INT, this); if (smpi_privatize_global_variables == SmpiPrivStrategies::MMAP) { // we need to switch as the called function may silently touch global variables @@ -363,7 +369,7 @@ void Comm::init_smp(){ //create leader_communicator for (i=0; i< leader_group_size;i++) leaders_group->set_mapping(simgrid::s4u::Actor::by_pid(leader_list[i]), i); - leader_comm = new Comm(leaders_group, nullptr); + leader_comm = new Comm(leaders_group, nullptr,1); this->set_leaders_comm(leader_comm); this->set_intra_comm(comm_intra); @@ -373,7 +379,7 @@ void Comm::init_smp(){ leaders_group->set_mapping(simgrid::s4u::Actor::by_pid(leader_list[i]), i); if(this->get_leaders_comm()==MPI_COMM_NULL){ - leader_comm = new Comm(leaders_group, nullptr); + leader_comm = new Comm(leaders_group, nullptr,1); this->set_leaders_comm(leader_comm); }else{ leader_comm=this->get_leaders_comm(); @@ -387,7 +393,7 @@ void Comm::init_smp(){ if(comm_intra->rank()==0) { int is_uniform = 1; int* non_uniform_map = xbt_new0(int,leader_group_size); - Coll_allgather_mpich::allgather(&my_local_size, 1, MPI_INT, + Coll_allgather_ring::allgather(&my_local_size, 1, MPI_INT, non_uniform_map, 1, MPI_INT, leader_comm); for(i=0; i < leader_group_size; i++) { if(non_uniform_map[0] != non_uniform_map[i]) { @@ -402,7 +408,7 @@ void Comm::init_smp(){ } is_uniform_=is_uniform; } - Coll_bcast_mpich::bcast(&(is_uniform_),1, MPI_INT, 0, comm_intra ); + Coll_bcast_scatter_LR_allgather::bcast(&(is_uniform_),1, MPI_INT, 0, comm_intra ); if (smpi_privatize_global_variables == SmpiPrivStrategies::MMAP) { // we need to switch as the called function may silently touch global variables diff --git a/teshsuite/smpi/CMakeLists.txt b/teshsuite/smpi/CMakeLists.txt index 97e081c841..85bf920960 100644 --- a/teshsuite/smpi/CMakeLists.txt +++ b/teshsuite/smpi/CMakeLists.txt @@ -93,7 +93,7 @@ if(enable_smpi) ADD_TESH(tesh-smpi-coll-alltoallv-${ALLTOALLV} --cfg smpi/alltoallv:${ALLTOALLV} --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/coll-alltoallv --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/coll-alltoallv coll-alltoallv.tesh) endforeach() - foreach (BARRIER ompi mpich ompi_basic_linear ompi_tree ompi_bruck ompi_recursivedoubling ompi_doublering mvapich2_pair mvapich2 impi) + foreach (BARRIER ompi mpich mpich_smp ompi_basic_linear ompi_tree ompi_bruck ompi_recursivedoubling ompi_doublering mvapich2_pair mvapich2 impi) ADD_TESH(tesh-smpi-coll-barrier-${BARRIER} --cfg smpi/barrier:${BARRIER} --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/coll-barrier --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/coll-barrier coll-barrier.tesh) endforeach() diff --git a/tools/cmake/DefinePackages.cmake b/tools/cmake/DefinePackages.cmake index f9c2dabede..1ce6888840 100644 --- a/tools/cmake/DefinePackages.cmake +++ b/tools/cmake/DefinePackages.cmake @@ -173,6 +173,7 @@ set(SMPI_SRC src/smpi/colls/alltoallv/alltoallv-ring.cpp src/smpi/colls/barrier/barrier-ompi.cpp src/smpi/colls/barrier/barrier-mvapich2-pair.cpp + src/smpi/colls/barrier/barrier-mpich-smp.cpp src/smpi/colls/bcast/bcast-NTSB.cpp src/smpi/colls/bcast/bcast-NTSL-Isend.cpp src/smpi/colls/bcast/bcast-NTSL.cpp