Add SMP variants of some algorithms, and protect against side effects.
--- /dev/null
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *
+ * (C) 2001 by Argonne National Laboratory.
+ * See COPYRIGHT in top-level directory.
+ */
+
+
+#include "../coll_tuned_topo.hpp"
+#include "../colls_private.hpp"
+
+namespace simgrid{
+namespace smpi{
+int Coll_barrier_mpich_smp::barrier(MPI_Comm comm)
+{
+ int mpi_errno = MPI_SUCCESS;
+ int mpi_errno_ret = MPI_SUCCESS;
+ MPI_Comm shmem_comm = MPI_COMM_NULL, leader_comm = MPI_COMM_NULL;
+ int local_rank = -1;
+
+ if(comm->get_leaders_comm()==MPI_COMM_NULL){
+ comm->init_smp();
+ }
+
+ shmem_comm = comm->get_intra_comm();
+ local_rank = shmem_comm->rank();
+ /* do the intranode barrier on all nodes */
+ if (shmem_comm != NULL) {
+ mpi_errno = Coll_barrier_mpich::barrier(shmem_comm);
+ if (mpi_errno) {
+ mpi_errno_ret+=mpi_errno;
+ }
+ }
+
+ leader_comm = comm->get_leaders_comm();
+ /* do the barrier across roots of all nodes */
+ if (leader_comm != NULL && local_rank == 0) {
+ mpi_errno = Coll_barrier_mpich::barrier(leader_comm);
+ if (mpi_errno) {
+ mpi_errno_ret+=mpi_errno;
+ }
+ }
+
+ /* release the local processes on each node with a 1-byte
+ * broadcast (0-byte broadcast just returns without doing
+ * anything) */
+ if (shmem_comm != NULL) {
+ int i = 0;
+ mpi_errno = Coll_bcast_mpich::bcast(&i, 1, MPI_BYTE, 0, shmem_comm);
+ if (mpi_errno) {
+ mpi_errno_ret+=mpi_errno;
+ }
+ }
+
+ if (mpi_errno_ret)
+ mpi_errno = mpi_errno_ret;
+ return mpi_errno;
+}
+
+}
+}
+
int Coll_allreduce_default::allreduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
{
int ret;
- ret = Colls::reduce(sendbuf, recvbuf, count, datatype, op, 0, comm);
+ ret = Coll_reduce_default::reduce(sendbuf, recvbuf, count, datatype, op, 0, comm);
if(ret==MPI_SUCCESS)
- ret = Colls::bcast(recvbuf, count, datatype, 0, comm);
+ ret = Coll_bcast_default::bcast(recvbuf, count, datatype, 0, comm);
return ret;
}
dsize = dtype->size();
block_dsize = dsize * count;
+ /*MPICH uses SMP algorithms for all commutative ops now*/
+ if(!comm->is_smp_comm()){
+ if(comm->get_leaders_comm()==MPI_COMM_NULL){
+ comm->init_smp();
+ }
+ if(op->is_commutative())
+ return Coll_allreduce_mvapich2_two_level::allreduce (sbuf, rbuf,count, dtype, op, comm);
+ }
/* find nearest power-of-two less than or equal to comm_size */
int pof2 = 1;
if (block_dsize > large_message && count >= pof2 && (op==MPI_OP_NULL || op->is_commutative())) {
//for long messages
- return (Coll_allreduce_rab_rdb::allreduce (sbuf, rbuf,
- count, dtype,
- op, comm));
+ return Coll_allreduce_rab_rdb::allreduce (sbuf, rbuf, count, dtype, op, comm);
}else {
//for short ones and count < pof2
- return (Coll_allreduce_rdb::allreduce (sbuf, rbuf,
- count, dtype,
- op, comm));
+ return Coll_allreduce_rdb::allreduce (sbuf, rbuf, count, dtype, op, comm);
}
}
comm);
} else if (block_dsize < medium_size) {
- return Coll_alltoall_basic_linear::alltoall(sbuf, scount, sdtype,
+ return Coll_alltoall_mvapich2_scatter_dest::alltoall(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm);
}else if (communicator_size%2){
//int segsize = 0;
size_t message_size, dsize;
+ if(!comm->is_smp_comm()){
+ if(comm->get_leaders_comm()==MPI_COMM_NULL){
+ comm->init_smp();
+ }
+ if(comm->is_uniform())
+ return Coll_bcast_SMP_binomial::bcast(buff, count, datatype, root, comm);
+ }
+
communicator_size = comm->size();
/* else we need data size for decision function */
)
{
int communicator_size=0;
- //int segsize = 0;
size_t message_size, dsize;
+
+ if(!comm->is_smp_comm()){
+ if(comm->get_leaders_comm()==MPI_COMM_NULL){
+ comm->init_smp();
+ }
+ if (op->is_commutative() == 1)
+ return Coll_reduce_mvapich2_two_level::reduce(sendbuf, recvbuf, count, datatype, op, root, comm);
+ }
+
communicator_size = comm->size();
/* need data size for decision function */
if ((count < pof2) || (message_size < 2048) || (op != MPI_OP_NULL && not op->is_commutative())) {
return Coll_reduce_binomial::reduce(sendbuf, recvbuf, count, datatype, op, root, comm);
}
- return Coll_reduce_scatter_gather::reduce(sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
- segsize, max_requests*/);
+ return Coll_reduce_scatter_gather::reduce(sendbuf, recvbuf, count, datatype, op, root, comm);
}
COLL_APPLY(action, COLL_BARRIER_SIG, ompi_bruck) COLL_sep \
COLL_APPLY(action, COLL_BARRIER_SIG, ompi_recursivedoubling) COLL_sep \
COLL_APPLY(action, COLL_BARRIER_SIG, ompi_doublering) COLL_sep \
+COLL_APPLY(action, COLL_BARRIER_SIG, mpich_smp) COLL_sep \
COLL_APPLY(action, COLL_BARRIER_SIG, mpich) COLL_sep \
COLL_APPLY(action, COLL_BARRIER_SIG, mvapich2_pair) COLL_sep \
COLL_APPLY(action, COLL_BARRIER_SIG, mvapich2) COLL_sep \
int is_uniform_;
int* non_uniform_map_; //set if smp nodes have a different number of processes allocated
int is_blocked_;// are ranks allocated on the same smp node contiguous ?
-
+ int is_smp_comm_;// set to 0 in case this is already an intra-comm or a leader-comm to avoid recursivity
std::list<MPI_Win> rma_wins_; // attached windows for synchronization.
public:
static int keyval_id_;
Comm() = default;
- Comm(MPI_Group group, MPI_Topology topo);
+ Comm(MPI_Group group, MPI_Topology topo, int smp=0);
int dup(MPI_Comm* newcomm);
MPI_Group group();
MPI_Topology topo();
MPI_Comm get_intra_comm();
int is_uniform();
int is_blocked();
+ int is_smp_comm();
MPI_Comm split(int color, int key);
void cleanup_smp();
void ref();
std::unordered_map<int, smpi_key_elem> Comm::keyvals_;
int Comm::keyval_id_=0;
-Comm::Comm(MPI_Group group, MPI_Topology topo) : group_(group), topo_(topo)
+Comm::Comm(MPI_Group group, MPI_Topology topo, int smp) : group_(group), topo_(topo),is_smp_comm_(smp)
{
refcount_ = 1;
topoType_ = MPI_INVALID_TOPO;
return is_blocked_;
}
+int Comm::is_smp_comm(){
+ if (this == MPI_COMM_UNINITIALIZED)
+ return smpi_process()->comm_world()->is_smp_comm();
+ return is_smp_comm_;
+}
+
MPI_Comm Comm::split(int color, int key)
{
if (this == MPI_COMM_UNINITIALIZED)
}
}
- MPI_Comm comm_intra = new Comm(group_intra, nullptr);
+ MPI_Comm comm_intra = new Comm(group_intra, nullptr, 1);
leader=min_index;
int* leaders_map = new int[comm_size];
std::fill_n(leaders_map, comm_size, 0);
std::fill_n(leader_list, comm_size, -1);
- Coll_allgather_mpich::allgather(&leader, 1, MPI_INT , leaders_map, 1, MPI_INT, this);
+ Coll_allgather_ring::allgather(&leader, 1, MPI_INT , leaders_map, 1, MPI_INT, this);
if (smpi_privatize_global_variables == SmpiPrivStrategies::MMAP) {
// we need to switch as the called function may silently touch global variables
//create leader_communicator
for (i=0; i< leader_group_size;i++)
leaders_group->set_mapping(simgrid::s4u::Actor::by_pid(leader_list[i]), i);
- leader_comm = new Comm(leaders_group, nullptr);
+ leader_comm = new Comm(leaders_group, nullptr,1);
this->set_leaders_comm(leader_comm);
this->set_intra_comm(comm_intra);
leaders_group->set_mapping(simgrid::s4u::Actor::by_pid(leader_list[i]), i);
if(this->get_leaders_comm()==MPI_COMM_NULL){
- leader_comm = new Comm(leaders_group, nullptr);
+ leader_comm = new Comm(leaders_group, nullptr,1);
this->set_leaders_comm(leader_comm);
}else{
leader_comm=this->get_leaders_comm();
if(comm_intra->rank()==0) {
int is_uniform = 1;
int* non_uniform_map = xbt_new0(int,leader_group_size);
- Coll_allgather_mpich::allgather(&my_local_size, 1, MPI_INT,
+ Coll_allgather_ring::allgather(&my_local_size, 1, MPI_INT,
non_uniform_map, 1, MPI_INT, leader_comm);
for(i=0; i < leader_group_size; i++) {
if(non_uniform_map[0] != non_uniform_map[i]) {
}
is_uniform_=is_uniform;
}
- Coll_bcast_mpich::bcast(&(is_uniform_),1, MPI_INT, 0, comm_intra );
+ Coll_bcast_scatter_LR_allgather::bcast(&(is_uniform_),1, MPI_INT, 0, comm_intra );
if (smpi_privatize_global_variables == SmpiPrivStrategies::MMAP) {
// we need to switch as the called function may silently touch global variables
ADD_TESH(tesh-smpi-coll-alltoallv-${ALLTOALLV} --cfg smpi/alltoallv:${ALLTOALLV} --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/coll-alltoallv --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/coll-alltoallv coll-alltoallv.tesh)
endforeach()
- foreach (BARRIER ompi mpich ompi_basic_linear ompi_tree ompi_bruck ompi_recursivedoubling ompi_doublering mvapich2_pair mvapich2 impi)
+ foreach (BARRIER ompi mpich mpich_smp ompi_basic_linear ompi_tree ompi_bruck ompi_recursivedoubling ompi_doublering mvapich2_pair mvapich2 impi)
ADD_TESH(tesh-smpi-coll-barrier-${BARRIER} --cfg smpi/barrier:${BARRIER} --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/coll-barrier --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/coll-barrier coll-barrier.tesh)
endforeach()
src/smpi/colls/alltoallv/alltoallv-ring.cpp
src/smpi/colls/barrier/barrier-ompi.cpp
src/smpi/colls/barrier/barrier-mvapich2-pair.cpp
+ src/smpi/colls/barrier/barrier-mpich-smp.cpp
src/smpi/colls/bcast/bcast-NTSB.cpp
src/smpi/colls/bcast/bcast-NTSL-Isend.cpp
src/smpi/colls/bcast/bcast-NTSL.cpp