From: degomme Date: Tue, 14 Mar 2017 17:05:56 +0000 (+0100) Subject: SMPI colls in not really C++. But cleaner than before. X-Git-Tag: v3_15~117 X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/commitdiff_plain/b7ed19dfcc221d7b3eca182abb5c4a3946671172 SMPI colls in not really C++. But cleaner than before. Still needs a lot to be considered clean. --- diff --git a/include/smpi/forward.hpp b/include/smpi/forward.hpp index 7788f55599..c1a210ccf8 100644 --- a/include/smpi/forward.hpp +++ b/include/smpi/forward.hpp @@ -32,6 +32,8 @@ class Type_Struct; class Type_Vector; class Win; +class Colls; + } } diff --git a/src/include/smpi/smpi_interface.h b/src/include/smpi/smpi_interface.h index d5669c4918..74df6907f7 100644 --- a/src/include/smpi/smpi_interface.h +++ b/src/include/smpi/smpi_interface.h @@ -23,66 +23,51 @@ typedef struct mpi_coll_description* mpi_coll_description_t; * \brief The list of all available allgather collectives */ XBT_PUBLIC_DATA(s_mpi_coll_description_t) mpi_coll_gather_description[]; -XBT_PUBLIC_DATA(int (*mpi_coll_gather_fun)(void *, int, MPI_Datatype, void *, int, MPI_Datatype, int, MPI_Comm)); /** \ingroup MPI allgather * \brief The list of all available allgather collectives */ XBT_PUBLIC_DATA(s_mpi_coll_description_t) mpi_coll_allgather_description[]; -XBT_PUBLIC_DATA(int (*mpi_coll_allgather_fun) (void *, int, MPI_Datatype, void *, int, MPI_Datatype, MPI_Comm)); /** \ingroup MPI allgather * \brief The list of all available allgather collectives */ XBT_PUBLIC_DATA(s_mpi_coll_description_t) mpi_coll_allgatherv_description[]; -XBT_PUBLIC_DATA(int (*mpi_coll_allgatherv_fun) (void *, int, MPI_Datatype, void *, int*, int*, MPI_Datatype, MPI_Comm)); /** \ingroup MPI allreduce * \brief The list of all available allgather collectives */ XBT_PUBLIC_DATA(s_mpi_coll_description_t) mpi_coll_allreduce_description[]; -XBT_PUBLIC_DATA(int (*mpi_coll_allreduce_fun)(void *sbuf, void *rbuf, int rcount, MPI_Datatype dtype,MPI_Op op, - MPI_Comm comm)); /** \ingroup MPI alltoall * \brief The list of all available alltoall collectives */ XBT_PUBLIC_DATA(s_mpi_coll_description_t) mpi_coll_alltoall_description[]; -XBT_PUBLIC_DATA(int (*mpi_coll_alltoall_fun)(void *, int, MPI_Datatype, void *, int, MPI_Datatype, MPI_Comm)); /** \ingroup MPI alltoallv * \brief The list of all available alltoallv collectives */ XBT_PUBLIC_DATA(s_mpi_coll_description_t) mpi_coll_alltoallv_description[]; -XBT_PUBLIC_DATA(int (*mpi_coll_alltoallv_fun)(void *, int*, int*, MPI_Datatype, void *, int*, int*, MPI_Datatype, - MPI_Comm)); /** \ingroup MPI bcast * \brief The list of all available bcast collectives */ XBT_PUBLIC_DATA(s_mpi_coll_description_t) mpi_coll_bcast_description[]; -XBT_PUBLIC_DATA(int (*mpi_coll_bcast_fun)(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm com)); /** \ingroup MPI reduce * \brief The list of all available reduce collectives */ XBT_PUBLIC_DATA(s_mpi_coll_description_t) mpi_coll_reduce_description[]; -XBT_PUBLIC_DATA(int (*mpi_coll_reduce_fun)(void *buf, void *rbuf, int count, MPI_Datatype datatype, - MPI_Op op, int root, MPI_Comm comm)); /** \ingroup MPI reduce_scatter * \brief The list of all available allgather collectives */ XBT_PUBLIC_DATA(s_mpi_coll_description_t) mpi_coll_reduce_scatter_description[]; -XBT_PUBLIC_DATA(int (*mpi_coll_reduce_scatter_fun) (void *sbuf, void *rbuf, int *rcounts, - MPI_Datatype dtype, MPI_Op op,MPI_Comm comm)); /** \ingroup MPI scatter * \brief The list of all available allgather collectives */ XBT_PUBLIC_DATA(s_mpi_coll_description_t) mpi_coll_scatter_description[]; -XBT_PUBLIC_DATA(int (*mpi_coll_scatter_fun)(void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm)); /** \ingroup MPI barrier * \brief The list of all available allgather collectives @@ -91,7 +76,7 @@ XBT_PUBLIC_DATA(s_mpi_coll_description_t) mpi_coll_barrier_description[]; XBT_PUBLIC_DATA(int (*mpi_coll_barrier_fun)(MPI_Comm comm)); XBT_PUBLIC(void) coll_help(const char *category, s_mpi_coll_description_t * table); -XBT_PUBLIC(int) find_coll_description(s_mpi_coll_description_t * table, char *name, const char *desc); +XBT_PUBLIC(int) find_coll_description(s_mpi_coll_description_t * table, const char *name, const char *desc); XBT_PUBLIC_DATA(void) (*smpi_coll_cleanup_callback)(); XBT_PUBLIC(void) smpi_coll_cleanup_mvapich2(void); diff --git a/src/smpi/colls/allgather/allgather-2dmesh.cpp b/src/smpi/colls/allgather/allgather-2dmesh.cpp index 21be3ac9e2..103b4452b3 100644 --- a/src/smpi/colls/allgather/allgather-2dmesh.cpp +++ b/src/smpi/colls/allgather/allgather-2dmesh.cpp @@ -105,8 +105,13 @@ static int is_2dmesh(int num, int *i, int *j) * "simple" * Auther: Ahmad Faraj ****************************************************************************/ + +namespace simgrid{ +namespace smpi{ + + int -smpi_coll_tuned_allgather_2dmesh(void *send_buff, int send_count, MPI_Datatype +Coll_allgather_2dmesh::allgather(void *send_buff, int send_count, MPI_Datatype send_type, void *recv_buff, int recv_count, MPI_Datatype recv_type, MPI_Comm comm) { @@ -192,3 +197,6 @@ smpi_coll_tuned_allgather_2dmesh(void *send_buff, int send_count, MPI_Datatype return MPI_SUCCESS; } + +} +} diff --git a/src/smpi/colls/allgather/allgather-3dmesh.cpp b/src/smpi/colls/allgather/allgather-3dmesh.cpp index dfe154d01f..2c80059f85 100644 --- a/src/smpi/colls/allgather/allgather-3dmesh.cpp +++ b/src/smpi/colls/allgather/allgather-3dmesh.cpp @@ -92,7 +92,11 @@ static int is_3dmesh(int num, int *i, int *j, int *k) * follows "simple" * Auther: Ahmad Faraj ****************************************************************************/ -int smpi_coll_tuned_allgather_3dmesh(void *send_buff, int send_count, +namespace simgrid{ +namespace smpi{ + + +int Coll_allgather_3dmesh::allgather(void *send_buff, int send_count, MPI_Datatype send_type, void *recv_buff, int recv_count, MPI_Datatype recv_type, MPI_Comm comm) @@ -206,3 +210,7 @@ int smpi_coll_tuned_allgather_3dmesh(void *send_buff, int send_count, return MPI_SUCCESS; } + + +} +} diff --git a/src/smpi/colls/allgather/allgather-GB.cpp b/src/smpi/colls/allgather/allgather-GB.cpp index 1a107b02c9..f50e07f9fa 100644 --- a/src/smpi/colls/allgather/allgather-GB.cpp +++ b/src/smpi/colls/allgather/allgather-GB.cpp @@ -6,17 +6,19 @@ #include "../colls_private.h" +using namespace simgrid::smpi; + // Allgather - gather/bcast algorithm -int smpi_coll_tuned_allgather_GB(void *send_buff, int send_count, +int Coll_allgather_GB::allgather(void *send_buff, int send_count, MPI_Datatype send_type, void *recv_buff, int recv_count, MPI_Datatype recv_type, MPI_Comm comm) { int num_procs; num_procs = comm->size(); - mpi_coll_gather_fun(send_buff, send_count, send_type, recv_buff, recv_count, recv_type, + Colls::gather(send_buff, send_count, send_type, recv_buff, recv_count, recv_type, 0, comm); - mpi_coll_bcast_fun(recv_buff, (recv_count * num_procs), recv_type, 0, comm); + Colls::bcast(recv_buff, (recv_count * num_procs), recv_type, 0, comm); return MPI_SUCCESS; } diff --git a/src/smpi/colls/allgather/allgather-NTSLR-NB.cpp b/src/smpi/colls/allgather/allgather-NTSLR-NB.cpp index dd1018a592..d714abfb1c 100644 --- a/src/smpi/colls/allgather/allgather-NTSLR-NB.cpp +++ b/src/smpi/colls/allgather/allgather-NTSLR-NB.cpp @@ -6,9 +6,13 @@ #include "../colls_private.h" +namespace simgrid{ +namespace smpi{ + + // Allgather-Non-Topoloty-Scecific-Logical-Ring algorithm int -smpi_coll_tuned_allgather_NTSLR_NB(void *sbuf, int scount, MPI_Datatype stype, +Coll_allgather_NTSLR_NB::allgather(void *sbuf, int scount, MPI_Datatype stype, void *rbuf, int rcount, MPI_Datatype rtype, MPI_Comm comm) { @@ -30,7 +34,7 @@ smpi_coll_tuned_allgather_NTSLR_NB(void *sbuf, int scount, MPI_Datatype stype, // irregular case use default MPI fucntions if (scount * sextent != rcount * rextent) { XBT_WARN("MPI_allgather_NTSLR_NB use default MPI_allgather."); - smpi_mpi_allgather(sbuf, scount, stype, rbuf, rcount, rtype, comm); + Coll_allgather_default::allgather(sbuf, scount, stype, rbuf, rcount, rtype, comm); return MPI_SUCCESS; } @@ -67,3 +71,6 @@ smpi_coll_tuned_allgather_NTSLR_NB(void *sbuf, int scount, MPI_Datatype stype, return MPI_SUCCESS; } + +} +} diff --git a/src/smpi/colls/allgather/allgather-NTSLR.cpp b/src/smpi/colls/allgather/allgather-NTSLR.cpp index 5191e85e28..5c8faee997 100644 --- a/src/smpi/colls/allgather/allgather-NTSLR.cpp +++ b/src/smpi/colls/allgather/allgather-NTSLR.cpp @@ -6,9 +6,14 @@ #include "../colls_private.h" +namespace simgrid{ +namespace smpi{ + + + // Allgather-Non-Topoloty-Scecific-Logical-Ring algorithm int -smpi_coll_tuned_allgather_NTSLR(void *sbuf, int scount, MPI_Datatype stype, +Coll_allgather_NTSLR::allgather(void *sbuf, int scount, MPI_Datatype stype, void *rbuf, int rcount, MPI_Datatype rtype, MPI_Comm comm) { @@ -26,7 +31,7 @@ smpi_coll_tuned_allgather_NTSLR(void *sbuf, int scount, MPI_Datatype stype, // irregular case use default MPI fucntions if (scount * sextent != rcount * rextent) { XBT_WARN("MPI_allgather_NTSLR use default MPI_allgather."); - smpi_mpi_allgather(sbuf, scount, stype, rbuf, rcount, rtype, comm); + Coll_allgather_default::allgather(sbuf, scount, stype, rbuf, rcount, rtype, comm); return MPI_SUCCESS; } @@ -54,3 +59,7 @@ smpi_coll_tuned_allgather_NTSLR(void *sbuf, int scount, MPI_Datatype stype, return MPI_SUCCESS; } + + +} +} diff --git a/src/smpi/colls/allgather/allgather-SMP-NTS.cpp b/src/smpi/colls/allgather/allgather-SMP-NTS.cpp index 0ea5ff7d24..571312063e 100644 --- a/src/smpi/colls/allgather/allgather-SMP-NTS.cpp +++ b/src/smpi/colls/allgather/allgather-SMP-NTS.cpp @@ -6,7 +6,11 @@ #include "../colls_private.h" -int smpi_coll_tuned_allgather_SMP_NTS(void *sbuf, int scount, +namespace simgrid{ +namespace smpi{ + + +int Coll_allgather_SMP_NTS::allgather(void *sbuf, int scount, MPI_Datatype stype, void *rbuf, int rcount, MPI_Datatype rtype, MPI_Comm comm) @@ -42,7 +46,7 @@ int smpi_coll_tuned_allgather_SMP_NTS(void *sbuf, int scount, /* for too small number of processes, use default implementation */ if (comm_size <= num_core) { XBT_WARN("MPI_allgather_SMP_NTS use default MPI_allgather."); - smpi_mpi_allgather(sbuf, scount, stype, rbuf, rcount, rtype, comm); + Coll_allgather_default::allgather(sbuf, scount, stype, rbuf, rcount, rtype, comm); return MPI_SUCCESS; } @@ -157,3 +161,7 @@ int smpi_coll_tuned_allgather_SMP_NTS(void *sbuf, int scount, return MPI_SUCCESS; } + + +} +} diff --git a/src/smpi/colls/allgather/allgather-bruck.cpp b/src/smpi/colls/allgather/allgather-bruck.cpp index 68edee294d..7040938b52 100644 --- a/src/smpi/colls/allgather/allgather-bruck.cpp +++ b/src/smpi/colls/allgather/allgather-bruck.cpp @@ -67,7 +67,13 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * Comment: Original bruck algorithm from MPICH is slightly modified by * Ahmad Faraj. ****************************************************************************/ -int smpi_coll_tuned_allgather_bruck(void *send_buff, int send_count, + +namespace simgrid{ +namespace smpi{ + + + +int Coll_allgather_bruck::allgather(void *send_buff, int send_count, MPI_Datatype send_type, void *recv_buff, int recv_count, MPI_Datatype recv_type, MPI_Comm comm) @@ -133,3 +139,7 @@ int smpi_coll_tuned_allgather_bruck(void *send_buff, int send_count, smpi_free_tmp_buffer(tmp_buff); return MPI_SUCCESS; } + + +} +} diff --git a/src/smpi/colls/allgather/allgather-loosely-lr.cpp b/src/smpi/colls/allgather/allgather-loosely-lr.cpp index 1d31885092..e7307433a2 100644 --- a/src/smpi/colls/allgather/allgather-loosely-lr.cpp +++ b/src/smpi/colls/allgather/allgather-loosely-lr.cpp @@ -6,7 +6,11 @@ #include "../colls_private.h" -int smpi_coll_tuned_allgather_loosely_lr(void *sbuf, int scount, +namespace simgrid{ +namespace smpi{ + + +int Coll_allgather_loosely_lr::allgather(void *sbuf, int scount, MPI_Datatype stype, void *rbuf, int rcount, MPI_Datatype rtype, MPI_Comm comm) @@ -131,3 +135,7 @@ if(comm->get_leaders_comm()==MPI_COMM_NULL){ return MPI_SUCCESS; } + + +} +} diff --git a/src/smpi/colls/allgather/allgather-mvapich-smp.cpp b/src/smpi/colls/allgather/allgather-mvapich-smp.cpp index 32d4aea198..9c5f5b1b80 100644 --- a/src/smpi/colls/allgather/allgather-mvapich-smp.cpp +++ b/src/smpi/colls/allgather/allgather-mvapich-smp.cpp @@ -36,9 +36,9 @@ */ #include "../colls_private.h" +using namespace simgrid::smpi; - -int smpi_coll_tuned_allgather_mvapich2_smp(void *sendbuf,int sendcnt, MPI_Datatype sendtype, +int Coll_allgather_mvapich2_smp::allgather(void *sendbuf,int sendcnt, MPI_Datatype sendtype, void *recvbuf, int recvcnt,MPI_Datatype recvtype, MPI_Comm comm) { @@ -82,7 +82,7 @@ int smpi_coll_tuned_allgather_mvapich2_smp(void *sendbuf,int sendcnt, MPI_Dataty /*If there is just one node, after gather itself, * root has all the data and it can do bcast*/ if(local_rank == 0) { - mpi_errno = mpi_coll_gather_fun(sendbuf, sendcnt,sendtype, + mpi_errno = Colls::gather(sendbuf, sendcnt,sendtype, (void*)((char*)recvbuf + (rank * recvcnt * recvtype_extent)), recvcnt, recvtype, 0, shmem_comm); @@ -90,12 +90,12 @@ int smpi_coll_tuned_allgather_mvapich2_smp(void *sendbuf,int sendcnt, MPI_Dataty /*Since in allgather all the processes could have * its own data in place*/ if(sendbuf == MPI_IN_PLACE) { - mpi_errno = mpi_coll_gather_fun((void*)((char*)recvbuf + (rank * recvcnt * recvtype_extent)), + mpi_errno = Colls::gather((void*)((char*)recvbuf + (rank * recvcnt * recvtype_extent)), recvcnt , recvtype, recvbuf, recvcnt, recvtype, 0, shmem_comm); } else { - mpi_errno = mpi_coll_gather_fun(sendbuf, sendcnt,sendtype, + mpi_errno = Colls::gather(sendbuf, sendcnt,sendtype, recvbuf, recvcnt, recvtype, 0, shmem_comm); } @@ -128,7 +128,7 @@ int smpi_coll_tuned_allgather_mvapich2_smp(void *sendbuf,int sendcnt, MPI_Dataty void* sendbuf=((char*)recvbuf)+recvtype->get_extent()*displs[leader_comm->rank()]; - mpi_errno = mpi_coll_allgatherv_fun(sendbuf, + mpi_errno = Colls::allgatherv(sendbuf, (recvcnt*local_size), recvtype, recvbuf, recvcnts, @@ -141,7 +141,7 @@ int smpi_coll_tuned_allgather_mvapich2_smp(void *sendbuf,int sendcnt, MPI_Dataty - mpi_errno = smpi_coll_tuned_allgather_mpich(sendtmpbuf, + mpi_errno = Coll_allgather_mpich::allgather(sendtmpbuf, (recvcnt*local_size), recvtype, recvbuf, (recvcnt*local_size), recvtype, @@ -151,6 +151,6 @@ int smpi_coll_tuned_allgather_mvapich2_smp(void *sendbuf,int sendcnt, MPI_Dataty } /*Bcast the entire data from node leaders to all other cores*/ - mpi_errno = mpi_coll_bcast_fun (recvbuf, recvcnt * size, recvtype, 0, shmem_comm); + mpi_errno = Colls::bcast (recvbuf, recvcnt * size, recvtype, 0, shmem_comm); return mpi_errno; } diff --git a/src/smpi/colls/allgather/allgather-ompi-neighborexchange.cpp b/src/smpi/colls/allgather/allgather-ompi-neighborexchange.cpp index d8f9e8d968..1bb0e53b35 100644 --- a/src/smpi/colls/allgather/allgather-ompi-neighborexchange.cpp +++ b/src/smpi/colls/allgather/allgather-ompi-neighborexchange.cpp @@ -63,8 +63,12 @@ */ #include "../colls_private.h" + +namespace simgrid{ +namespace smpi{ + int -smpi_coll_tuned_allgather_ompi_neighborexchange(void *sbuf, int scount, +Coll_allgather_ompi_neighborexchange::allgather(void *sbuf, int scount, MPI_Datatype sdtype, void* rbuf, int rcount, MPI_Datatype rdtype, @@ -86,7 +90,7 @@ smpi_coll_tuned_allgather_ompi_neighborexchange(void *sbuf, int scount, XBT_DEBUG( "coll:tuned:allgather_intra_neighborexchange WARNING: odd size %d, switching to ring algorithm", size); - return smpi_coll_tuned_allgather_ring(sbuf, scount, sdtype, + return Coll_allgather_ring::allgather(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); } @@ -178,3 +182,7 @@ smpi_coll_tuned_allgather_ompi_neighborexchange(void *sbuf, int scount, __FILE__, line, err, rank); return err; } + + +} +} diff --git a/src/smpi/colls/allgather/allgather-pair.cpp b/src/smpi/colls/allgather/allgather-pair.cpp index cfec7c89fb..553378d68c 100644 --- a/src/smpi/colls/allgather/allgather-pair.cpp +++ b/src/smpi/colls/allgather/allgather-pair.cpp @@ -64,8 +64,13 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * phases, nodes in pair communicate their data. * Auther: Ahmad Faraj ****************************************************************************/ + +namespace simgrid{ +namespace smpi{ + + int -smpi_coll_tuned_allgather_pair(void *send_buff, int send_count, +Coll_allgather_pair::allgather(void *send_buff, int send_count, MPI_Datatype send_type, void *recv_buff, int recv_count, MPI_Datatype recv_type, MPI_Comm comm) @@ -101,3 +106,7 @@ smpi_coll_tuned_allgather_pair(void *send_buff, int send_count, return MPI_SUCCESS; } + + +} +} diff --git a/src/smpi/colls/allgather/allgather-rdb.cpp b/src/smpi/colls/allgather/allgather-rdb.cpp index 8fb6efa6ef..6d2950aa42 100644 --- a/src/smpi/colls/allgather/allgather-rdb.cpp +++ b/src/smpi/colls/allgather/allgather-rdb.cpp @@ -6,8 +6,12 @@ #include "../colls_private.h" +namespace simgrid{ +namespace smpi{ + + int -smpi_coll_tuned_allgather_rdb(void *sbuf, int send_count, +Coll_allgather_rdb::allgather(void *sbuf, int send_count, MPI_Datatype send_type, void *rbuf, int recv_count, MPI_Datatype recv_type, MPI_Comm comm) @@ -127,3 +131,7 @@ smpi_coll_tuned_allgather_rdb(void *sbuf, int send_count, return success; } + + +} +} diff --git a/src/smpi/colls/allgather/allgather-rhv.cpp b/src/smpi/colls/allgather/allgather-rhv.cpp index 8e7f44bd28..12a929db11 100644 --- a/src/smpi/colls/allgather/allgather-rhv.cpp +++ b/src/smpi/colls/allgather/allgather-rhv.cpp @@ -6,10 +6,14 @@ #include "../colls_private.h" +namespace simgrid{ +namespace smpi{ + + // now only work with power of two processes int -smpi_coll_tuned_allgather_rhv(void *sbuf, int send_count, +Coll_allgather_rhv::allgather(void *sbuf, int send_count, MPI_Datatype send_type, void *rbuf, int recv_count, MPI_Datatype recv_type, MPI_Comm comm) @@ -42,7 +46,7 @@ smpi_coll_tuned_allgather_rhv(void *sbuf, int send_count, if (send_chunk != recv_chunk) { XBT_WARN("MPI_allgather_rhv use default MPI_allgather."); - smpi_mpi_allgather(sbuf, send_count, send_type, rbuf, recv_count, + Coll_allgather_default::allgather(sbuf, send_count, send_type, rbuf, recv_count, recv_type, comm); return MPI_SUCCESS; } @@ -103,3 +107,7 @@ smpi_coll_tuned_allgather_rhv(void *sbuf, int send_count, return MPI_SUCCESS; } + + +} +} diff --git a/src/smpi/colls/allgather/allgather-ring.cpp b/src/smpi/colls/allgather/allgather-ring.cpp index c638b25bd1..14ffc97288 100644 --- a/src/smpi/colls/allgather/allgather-ring.cpp +++ b/src/smpi/colls/allgather/allgather-ring.cpp @@ -63,8 +63,13 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * Descrp: Function works in P - 1 steps. In step i, node j - i -> j -> j+ i. * Auther: Ahmad Faraj ****************************************************************************/ + +namespace simgrid{ +namespace smpi{ + + int -smpi_coll_tuned_allgather_ring(void *send_buff, int send_count, +Coll_allgather_ring::allgather(void *send_buff, int send_count, MPI_Datatype send_type, void *recv_buff, int recv_count, MPI_Datatype recv_type, MPI_Comm comm) @@ -97,3 +102,7 @@ smpi_coll_tuned_allgather_ring(void *send_buff, int send_count, return MPI_SUCCESS; } + + +} +} diff --git a/src/smpi/colls/allgather/allgather-smp-simple.cpp b/src/smpi/colls/allgather/allgather-smp-simple.cpp index 8d7f190a86..38f1c5c45a 100644 --- a/src/smpi/colls/allgather/allgather-smp-simple.cpp +++ b/src/smpi/colls/allgather/allgather-smp-simple.cpp @@ -6,7 +6,11 @@ #include "../colls_private.h" -int smpi_coll_tuned_allgather_smp_simple(void *send_buf, int scount, +namespace simgrid{ +namespace smpi{ + + +int Coll_allgather_smp_simple::allgather(void *send_buf, int scount, MPI_Datatype stype, void *recv_buf, int rcount, MPI_Datatype rtype, MPI_Comm comm) @@ -121,3 +125,7 @@ int smpi_coll_tuned_allgather_smp_simple(void *send_buf, int scount, return MPI_SUCCESS; } + + +} +} diff --git a/src/smpi/colls/allgather/allgather-spreading-simple.cpp b/src/smpi/colls/allgather/allgather-spreading-simple.cpp index 539358f2f1..57d0046b79 100644 --- a/src/smpi/colls/allgather/allgather-spreading-simple.cpp +++ b/src/smpi/colls/allgather/allgather-spreading-simple.cpp @@ -66,8 +66,13 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Auther: Ahmad Faraj ****************************************************************************/ + +namespace simgrid{ +namespace smpi{ + + int -smpi_coll_tuned_allgather_spreading_simple(void *send_buff, int send_count, +Coll_allgather_spreading_simple::allgather(void *send_buff, int send_count, MPI_Datatype send_type, void *recv_buff, int recv_count, MPI_Datatype recv_type, @@ -117,3 +122,6 @@ smpi_coll_tuned_allgather_spreading_simple(void *send_buff, int send_count, return MPI_SUCCESS; } + +} +} diff --git a/src/smpi/colls/allgatherv/allgatherv-GB.cpp b/src/smpi/colls/allgatherv/allgatherv-GB.cpp index bd53450ae0..6c41685422 100644 --- a/src/smpi/colls/allgatherv/allgatherv-GB.cpp +++ b/src/smpi/colls/allgatherv/allgatherv-GB.cpp @@ -6,13 +6,16 @@ #include "../colls_private.h" +namespace simgrid{ +namespace smpi{ + // Allgather - gather/bcast algorithm -int smpi_coll_tuned_allgatherv_GB(void *send_buff, int send_count, +int Coll_allgatherv_GB::allgatherv(void *send_buff, int send_count, MPI_Datatype send_type, void *recv_buff, int *recv_counts, int *recv_disps, MPI_Datatype recv_type, MPI_Comm comm) { - smpi_mpi_gatherv(send_buff, send_count, send_type, recv_buff, recv_counts, + Colls::gatherv(send_buff, send_count, send_type, recv_buff, recv_counts, recv_disps, recv_type, 0, comm); int num_procs, i, current, max = 0; num_procs = comm->size(); @@ -21,7 +24,10 @@ int smpi_coll_tuned_allgatherv_GB(void *send_buff, int send_count, if (current > max) max = current; } - mpi_coll_bcast_fun(recv_buff, max, recv_type, 0, comm); + Colls::bcast(recv_buff, max, recv_type, 0, comm); return MPI_SUCCESS; } + +} +} diff --git a/src/smpi/colls/allgatherv/allgatherv-mpich-rdb.cpp b/src/smpi/colls/allgatherv/allgatherv-mpich-rdb.cpp index c224fbd7d8..5d5f94537d 100644 --- a/src/smpi/colls/allgatherv/allgatherv-mpich-rdb.cpp +++ b/src/smpi/colls/allgatherv/allgatherv-mpich-rdb.cpp @@ -7,7 +7,11 @@ /* Short or medium size message and power-of-two no. of processes. Use * recursive doubling algorithm */ #include "../colls_private.h" -int smpi_coll_tuned_allgatherv_mpich_rdb ( + +namespace simgrid{ +namespace smpi{ + +int Coll_allgatherv_mpich_rdb::allgatherv ( void *sendbuf, int sendcount, MPI_Datatype sendtype, @@ -214,3 +218,6 @@ int smpi_coll_tuned_allgatherv_mpich_rdb ( smpi_free_tmp_buffer(tmp_buf_rl); return MPI_SUCCESS; } + +} +} diff --git a/src/smpi/colls/allgatherv/allgatherv-mpich-ring.cpp b/src/smpi/colls/allgatherv/allgatherv-mpich-ring.cpp index 2deb294b6d..273bf6246e 100644 --- a/src/smpi/colls/allgatherv/allgatherv-mpich-ring.cpp +++ b/src/smpi/colls/allgatherv/allgatherv-mpich-ring.cpp @@ -24,8 +24,12 @@ * recv_type: data type of elements being received * comm: communication ****************************************************************************/ + +namespace simgrid{ +namespace smpi{ + int -smpi_coll_tuned_allgatherv_mpich_ring(void *sendbuf, int sendcount, +Coll_allgatherv_mpich_ring::allgatherv(void *sendbuf, int sendcount, MPI_Datatype send_type, void *recvbuf, int *recvcounts, int *displs, MPI_Datatype recvtype, MPI_Comm comm) @@ -124,3 +128,6 @@ smpi_coll_tuned_allgatherv_mpich_ring(void *sendbuf, int sendcount, return MPI_SUCCESS; } + +} +} diff --git a/src/smpi/colls/allgatherv/allgatherv-ompi-bruck.cpp b/src/smpi/colls/allgatherv/allgatherv-ompi-bruck.cpp index f93667533b..8d28d925d1 100644 --- a/src/smpi/colls/allgatherv/allgatherv-ompi-bruck.cpp +++ b/src/smpi/colls/allgatherv/allgatherv-ompi-bruck.cpp @@ -76,7 +76,11 @@ * [5] [5] [5] [5] [5] [5] [5] * [6] [6] [6] [6] [6] [6] [6] */ -int smpi_coll_tuned_allgatherv_ompi_bruck(void *sbuf, int scount, + +namespace simgrid{ +namespace smpi{ + +int Coll_allgatherv_ompi_bruck::allgatherv(void *sbuf, int scount, MPI_Datatype sdtype, void *rbuf, int *rcounts, int *rdispls, @@ -173,3 +177,6 @@ int smpi_coll_tuned_allgatherv_ompi_bruck(void *sbuf, int scount, } + +} +} diff --git a/src/smpi/colls/allgatherv/allgatherv-ompi-neighborexchange.cpp b/src/smpi/colls/allgatherv/allgatherv-ompi-neighborexchange.cpp index 025c8b8779..c24e79e0bf 100644 --- a/src/smpi/colls/allgatherv/allgatherv-ompi-neighborexchange.cpp +++ b/src/smpi/colls/allgatherv/allgatherv-ompi-neighborexchange.cpp @@ -65,8 +65,11 @@ #include "../colls_private.h" +namespace simgrid{ +namespace smpi{ + int -smpi_coll_tuned_allgatherv_ompi_neighborexchange(void *sbuf, int scount, +Coll_allgatherv_ompi_neighborexchange::allgatherv(void *sbuf, int scount, MPI_Datatype sdtype, void* rbuf, int *rcounts, int *rdispls, MPI_Datatype rdtype, @@ -89,7 +92,7 @@ smpi_coll_tuned_allgatherv_ompi_neighborexchange(void *sbuf, int scount, XBT_DEBUG( "coll:tuned:allgatherv_ompi_neighborexchange WARNING: odd size %d, switching to ring algorithm", size); - return smpi_coll_tuned_allgatherv_ring(sbuf, scount, sdtype, + return Coll_allgatherv_ring::allgatherv(sbuf, scount, sdtype, rbuf, rcounts, rdispls, rdtype, comm); @@ -216,3 +219,6 @@ smpi_coll_tuned_allgatherv_ompi_neighborexchange(void *sbuf, int scount, __FILE__, line, err, rank); return err; } + +} +} diff --git a/src/smpi/colls/allgatherv/allgatherv-pair.cpp b/src/smpi/colls/allgatherv/allgatherv-pair.cpp index 438c8634b4..1f6756c53d 100644 --- a/src/smpi/colls/allgatherv/allgatherv-pair.cpp +++ b/src/smpi/colls/allgatherv/allgatherv-pair.cpp @@ -64,8 +64,12 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * phases, nodes in pair communicate their data. * Auther: Ahmad Faraj ****************************************************************************/ + +namespace simgrid{ +namespace smpi{ + int -smpi_coll_tuned_allgatherv_pair(void *send_buff, int send_count, +Coll_allgatherv_pair::allgatherv(void *send_buff, int send_count, MPI_Datatype send_type, void *recv_buff, int *recv_counts, int *recv_disps, MPI_Datatype recv_type, MPI_Comm comm) @@ -100,3 +104,6 @@ smpi_coll_tuned_allgatherv_pair(void *send_buff, int send_count, return MPI_SUCCESS; } + +} +} diff --git a/src/smpi/colls/allgatherv/allgatherv-ring.cpp b/src/smpi/colls/allgatherv/allgatherv-ring.cpp index 7d7927ec7b..4712c557be 100644 --- a/src/smpi/colls/allgatherv/allgatherv-ring.cpp +++ b/src/smpi/colls/allgatherv/allgatherv-ring.cpp @@ -63,8 +63,12 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * Descrp: Function works in P - 1 steps. In step i, node j - i -> j -> j+ i. * Auther: Ahmad Faraj ****************************************************************************/ + +namespace simgrid{ +namespace smpi{ + int -smpi_coll_tuned_allgatherv_ring(void *send_buff, int send_count, +Coll_allgatherv_ring::allgatherv(void *send_buff, int send_count, MPI_Datatype send_type, void *recv_buff, int *recv_counts, int *recv_disps, MPI_Datatype recv_type, MPI_Comm comm) @@ -98,3 +102,6 @@ smpi_coll_tuned_allgatherv_ring(void *send_buff, int send_count, return MPI_SUCCESS; } + +} +} diff --git a/src/smpi/colls/allreduce/allreduce-lr.cpp b/src/smpi/colls/allreduce/allreduce-lr.cpp index ab847ee411..5ff1b88ff5 100644 --- a/src/smpi/colls/allreduce/allreduce-lr.cpp +++ b/src/smpi/colls/allreduce/allreduce-lr.cpp @@ -20,7 +20,7 @@ //#include int -smpi_coll_tuned_allreduce_lr(void *sbuf, void *rbuf, int rcount, +Coll_allreduce_lr::allreduce(void *sbuf, void *rbuf, int rcount, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm) { int tag = COLL_TAG_ALLREDUCE; @@ -39,7 +39,7 @@ smpi_coll_tuned_allreduce_lr(void *sbuf, void *rbuf, int rcount, /* when communication size is smaller than number of process (not support) */ if (rcount < size) { XBT_WARN("MPI_allreduce_lr use default MPI_allreduce."); - smpi_mpi_allreduce(sbuf, rbuf, rcount, dtype, op, comm); + Coll_allreduce_default::allreduce(sbuf, rbuf, rcount, dtype, op, comm); return MPI_SUCCESS; } @@ -97,7 +97,7 @@ smpi_coll_tuned_allreduce_lr(void *sbuf, void *rbuf, int rcount, /* when communication size is not divisible by number of process: call the native implementation for the remain chunk at the end of the operation */ if (remainder_flag) { - return mpi_coll_allreduce_fun((char *) sbuf + remainder_offset, + return Colls::allreduce((char *) sbuf + remainder_offset, (char *) rbuf + remainder_offset, remainder, dtype, op, comm); } diff --git a/src/smpi/colls/allreduce/allreduce-mvapich-rs.cpp b/src/smpi/colls/allreduce/allreduce-mvapich-rs.cpp index e8789a11d9..c94bd90758 100644 --- a/src/smpi/colls/allreduce/allreduce-mvapich-rs.cpp +++ b/src/smpi/colls/allreduce/allreduce-mvapich-rs.cpp @@ -23,7 +23,7 @@ #include "../colls_private.h" -int smpi_coll_tuned_allreduce_mvapich2_rs(void *sendbuf, +int Coll_allreduce_mvapich2_rs::allreduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, diff --git a/src/smpi/colls/allreduce/allreduce-mvapich-two-level.cpp b/src/smpi/colls/allreduce/allreduce-mvapich-two-level.cpp index d2a2264c82..123de2ce44 100644 --- a/src/smpi/colls/allreduce/allreduce-mvapich-two-level.cpp +++ b/src/smpi/colls/allreduce/allreduce-mvapich-two-level.cpp @@ -37,10 +37,10 @@ #include "../colls_private.h" -#define MPIR_Allreduce_pt2pt_rd_MV2 smpi_coll_tuned_allreduce_rdb -#define MPIR_Allreduce_pt2pt_rs_MV2 smpi_coll_tuned_allreduce_mvapich2_rs +#define MPIR_Allreduce_pt2pt_rd_MV2 Coll_allreduce_rdb::allreduce +#define MPIR_Allreduce_pt2pt_rs_MV2 Coll_allreduce_mvapich2_rs::allreduce -extern int (*MV2_Allreduce_function)(void *sendbuf, +extern int (*MV2_Allreducection)(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, @@ -59,7 +59,7 @@ static int MPIR_Allreduce_reduce_p2p_MV2( void *sendbuf, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) { - mpi_coll_reduce_fun(sendbuf,recvbuf,count,datatype,op,0,comm); + Colls::reduce(sendbuf,recvbuf,count,datatype,op,0,comm); return MPI_SUCCESS; } @@ -69,13 +69,13 @@ static int MPIR_Allreduce_reduce_shmem_MV2( void *sendbuf, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) { - mpi_coll_reduce_fun(sendbuf,recvbuf,count,datatype,op,0,comm); + Colls::reduce(sendbuf,recvbuf,count,datatype,op,0,comm); return MPI_SUCCESS; } /* general two level allreduce helper function */ -int smpi_coll_tuned_allreduce_mvapich2_two_level(void *sendbuf, +int Coll_allreduce_mvapich2_two_level::allreduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, @@ -89,9 +89,9 @@ int smpi_coll_tuned_allreduce_mvapich2_two_level(void *sendbuf, //if not set (use of the algo directly, without mvapich2 selector) if(MV2_Allreduce_intra_function==NULL) - MV2_Allreduce_intra_function = smpi_coll_tuned_allreduce_mpich; - if(MV2_Allreduce_function==NULL) - MV2_Allreduce_function = smpi_coll_tuned_allreduce_rdb; + MV2_Allreduce_intra_function = Coll_allreduce_mpich::allreduce; + if(MV2_Allreducection==NULL) + MV2_Allreducection = Coll_allreduce_rdb::allreduce; if(comm->get_leaders_comm()==MPI_COMM_NULL){ comm->init_smp(); @@ -135,7 +135,7 @@ int smpi_coll_tuned_allreduce_mvapich2_two_level(void *sendbuf, void* sendtmpbuf = (char *)smpi_get_tmp_sendbuffer(count*datatype->get_extent()); Datatype::copy(recvbuf, count, datatype,sendtmpbuf, count, datatype); /* inter-node allreduce */ - if(MV2_Allreduce_function == &MPIR_Allreduce_pt2pt_rd_MV2){ + if(MV2_Allreducection == &MPIR_Allreduce_pt2pt_rd_MV2){ mpi_errno = MPIR_Allreduce_pt2pt_rd_MV2(sendtmpbuf, recvbuf, count, datatype, op, leader_comm); @@ -163,7 +163,7 @@ int smpi_coll_tuned_allreduce_mvapich2_two_level(void *sendbuf, /* Broadcasting the mesage from leader to the rest */ /* Note: shared memory broadcast could improve the performance */ - mpi_errno = mpi_coll_bcast_fun(recvbuf, count, datatype, 0, shmem_comm); + mpi_errno = Colls::bcast(recvbuf, count, datatype, 0, shmem_comm); return (mpi_errno); diff --git a/src/smpi/colls/allreduce/allreduce-ompi-ring-segmented.cpp b/src/smpi/colls/allreduce/allreduce-ompi-ring-segmented.cpp index 595a189ff8..8a88359791 100644 --- a/src/smpi/colls/allreduce/allreduce-ompi-ring-segmented.cpp +++ b/src/smpi/colls/allreduce/allreduce-ompi-ring-segmented.cpp @@ -155,7 +155,7 @@ #include "../colls_private.h" int -smpi_coll_tuned_allreduce_ompi_ring_segmented(void *sbuf, void *rbuf, int count, +Coll_allreduce_ompi_ring_segmented::allreduce(void *sbuf, void *rbuf, int count, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm) @@ -202,7 +202,7 @@ smpi_coll_tuned_allreduce_ompi_ring_segmented(void *sbuf, void *rbuf, int count, /* Special case for count less than size * segcount - use regular ring */ if (count < size * segcount) { XBT_DEBUG( "coll:tuned:allreduce_ring_segmented rank %d/%d, count %d, switching to regular ring", rank, size, count); - return (smpi_coll_tuned_allreduce_lr(sbuf, rbuf, count, dtype, op, + return (Coll_allreduce_lr::allreduce(sbuf, rbuf, count, dtype, op, comm)); } diff --git a/src/smpi/colls/allreduce/allreduce-rab-rdb.cpp b/src/smpi/colls/allreduce/allreduce-rab-rdb.cpp index f72175bd09..def46333ee 100644 --- a/src/smpi/colls/allreduce/allreduce-rab-rdb.cpp +++ b/src/smpi/colls/allreduce/allreduce-rab-rdb.cpp @@ -6,7 +6,7 @@ #include "../colls_private.h" -int smpi_coll_tuned_allreduce_rab_rdb(void *sbuff, void *rbuff, int count, +int Coll_allreduce_rab_rdb::allreduce(void *sbuff, void *rbuff, int count, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm) { diff --git a/src/smpi/colls/allreduce/allreduce-rab1.cpp b/src/smpi/colls/allreduce/allreduce-rab1.cpp index 96c71055af..120abebf46 100644 --- a/src/smpi/colls/allreduce/allreduce-rab1.cpp +++ b/src/smpi/colls/allreduce/allreduce-rab1.cpp @@ -6,9 +6,10 @@ #include "../colls_private.h" //#include +using namespace simgrid::smpi; // NP pow of 2 for now -int smpi_coll_tuned_allreduce_rab1(void *sbuff, void *rbuff, +int Coll_allreduce_rab1::allreduce(void *sbuff, void *rbuff, int count, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm) { @@ -68,7 +69,7 @@ int smpi_coll_tuned_allreduce_rab1(void *sbuff, void *rbuff, } memcpy(tmp_buf, (char *) recv + recv_idx * extent, recv_cnt * extent); - mpi_coll_allgather_fun(tmp_buf, recv_cnt, dtype, recv, recv_cnt, dtype, comm); + Colls::allgather(tmp_buf, recv_cnt, dtype, recv, recv_cnt, dtype, comm); memcpy(rbuff, recv, count * extent); smpi_free_tmp_buffer(recv); @@ -102,7 +103,7 @@ int smpi_coll_tuned_allreduce_rab1(void *sbuff, void *rbuff, } memcpy(tmp_buf, (char *) rbuff + recv_idx * extent, recv_cnt * extent); - mpi_coll_allgather_fun(tmp_buf, recv_cnt, dtype, rbuff, recv_cnt, dtype, comm); + Colls::allgather(tmp_buf, recv_cnt, dtype, rbuff, recv_cnt, dtype, comm); smpi_free_tmp_buffer(tmp_buf); } diff --git a/src/smpi/colls/allreduce/allreduce-rab2.cpp b/src/smpi/colls/allreduce/allreduce-rab2.cpp index 007a42b842..ac1e98f505 100644 --- a/src/smpi/colls/allreduce/allreduce-rab2.cpp +++ b/src/smpi/colls/allreduce/allreduce-rab2.cpp @@ -6,9 +6,10 @@ #include "../colls_private.h" //#include +using namespace simgrid::smpi; // this requires that count >= NP -int smpi_coll_tuned_allreduce_rab2(void *sbuff, void *rbuff, +int Coll_allreduce_rab2::allreduce(void *sbuff, void *rbuff, int count, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm) { @@ -46,14 +47,14 @@ int smpi_coll_tuned_allreduce_rab2(void *sbuff, void *rbuff, memcpy(send, sbuff, s_extent * count); - mpi_coll_alltoall_fun(send, send_size, dtype, recv, send_size, dtype, comm); + Colls::alltoall(send, send_size, dtype, recv, send_size, dtype, comm); memcpy(tmp, recv, nbytes); for (i = 1, s_offset = nbytes; i < nprocs; i++, s_offset = i * nbytes) if(op!=MPI_OP_NULL) op->apply( (char *) recv + s_offset, tmp, &send_size, dtype); - mpi_coll_allgather_fun(tmp, send_size, dtype, recv, send_size, dtype, comm); + Colls::allgather(tmp, send_size, dtype, recv, send_size, dtype, comm); memcpy(rbuff, recv, count * s_extent); smpi_free_tmp_buffer(recv); @@ -67,7 +68,7 @@ int smpi_coll_tuned_allreduce_rab2(void *sbuff, void *rbuff, recv = (void *) smpi_get_tmp_recvbuffer(s_extent * send_size * nprocs); - mpi_coll_alltoall_fun(send, send_size, dtype, recv, send_size, dtype, comm); + Colls::alltoall(send, send_size, dtype, recv, send_size, dtype, comm); memcpy((char *) rbuff + r_offset, recv, nbytes); @@ -75,7 +76,7 @@ int smpi_coll_tuned_allreduce_rab2(void *sbuff, void *rbuff, if(op!=MPI_OP_NULL) op->apply( (char *) recv + s_offset, (char *) rbuff + r_offset, &send_size, dtype); - mpi_coll_allgather_fun((char *) rbuff + r_offset, send_size, dtype, rbuff, send_size, + Colls::allgather((char *) rbuff + r_offset, send_size, dtype, rbuff, send_size, dtype, comm); smpi_free_tmp_buffer(recv); } diff --git a/src/smpi/colls/allreduce/allreduce-rdb.cpp b/src/smpi/colls/allreduce/allreduce-rdb.cpp index cb99c01b54..82fc3fe9c8 100644 --- a/src/smpi/colls/allreduce/allreduce-rdb.cpp +++ b/src/smpi/colls/allreduce/allreduce-rdb.cpp @@ -7,7 +7,7 @@ #include "../colls_private.h" //#include -int smpi_coll_tuned_allreduce_rdb(void *sbuff, void *rbuff, int count, +int Coll_allreduce_rdb::allreduce(void *sbuff, void *rbuff, int count, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm) { int nprocs, rank, tag = COLL_TAG_ALLREDUCE; diff --git a/src/smpi/colls/allreduce/allreduce-redbcast.cpp b/src/smpi/colls/allreduce/allreduce-redbcast.cpp index 26b9585300..35e40f17d9 100644 --- a/src/smpi/colls/allreduce/allreduce-redbcast.cpp +++ b/src/smpi/colls/allreduce/allreduce-redbcast.cpp @@ -6,11 +6,11 @@ #include "../colls_private.h" -int smpi_coll_tuned_allreduce_redbcast(void *buf, void *buf2, int count, +int Coll_allreduce_redbcast::allreduce(void *buf, void *buf2, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) { - mpi_coll_reduce_fun(buf, buf2, count, datatype, op, 0, comm); - mpi_coll_bcast_fun(buf2, count, datatype, 0, comm); + Colls::reduce(buf, buf2, count, datatype, op, 0, comm); + Colls::bcast(buf2, count, datatype, 0, comm); return MPI_SUCCESS; } diff --git a/src/smpi/colls/allreduce/allreduce-smp-binomial-pipeline.cpp b/src/smpi/colls/allreduce/allreduce-smp-binomial-pipeline.cpp index 9484f2652f..9ac2c82281 100644 --- a/src/smpi/colls/allreduce/allreduce-smp-binomial-pipeline.cpp +++ b/src/smpi/colls/allreduce/allreduce-smp-binomial-pipeline.cpp @@ -38,7 +38,7 @@ This fucntion performs all-reduce operation as follow. ** in a pipeline fashion 3) binomial_tree bcast intra-communication between root of each SMP node 4) binomial_tree bcast inside each SMP node */ -int smpi_coll_tuned_allreduce_smp_binomial_pipeline(void *send_buf, +int Coll_allreduce_smp_binomial_pipeline::allreduce(void *send_buf, void *recv_buf, int count, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm) diff --git a/src/smpi/colls/allreduce/allreduce-smp-binomial.cpp b/src/smpi/colls/allreduce/allreduce-smp-binomial.cpp index 2545da91f6..7b93bf60ab 100644 --- a/src/smpi/colls/allreduce/allreduce-smp-binomial.cpp +++ b/src/smpi/colls/allreduce/allreduce-smp-binomial.cpp @@ -27,7 +27,7 @@ This fucntion performs all-reduce operation as follow. 3) binomial_tree bcast intra-communication between root of each SMP node 4) binomial_tree bcast inside each SMP node */ -int smpi_coll_tuned_allreduce_smp_binomial(void *send_buf, void *recv_buf, +int Coll_allreduce_smp_binomial::allreduce(void *send_buf, void *recv_buf, int count, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm) { diff --git a/src/smpi/colls/allreduce/allreduce-smp-rdb.cpp b/src/smpi/colls/allreduce/allreduce-smp-rdb.cpp index a5b40f1707..285d354b26 100644 --- a/src/smpi/colls/allreduce/allreduce-smp-rdb.cpp +++ b/src/smpi/colls/allreduce/allreduce-smp-rdb.cpp @@ -26,7 +26,7 @@ This fucntion performs all-reduce operation as follow. 2) Recursive doubling intra-communication between root of each SMP node 3) binomial_tree bcast inside each SMP node */ -int smpi_coll_tuned_allreduce_smp_rdb(void *send_buf, void *recv_buf, int count, +int Coll_allreduce_smp_rdb::allreduce(void *send_buf, void *recv_buf, int count, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm) { diff --git a/src/smpi/colls/allreduce/allreduce-smp-rsag-lr.cpp b/src/smpi/colls/allreduce/allreduce-smp-rsag-lr.cpp index 23abf0fde2..7e62a835ee 100644 --- a/src/smpi/colls/allreduce/allreduce-smp-rsag-lr.cpp +++ b/src/smpi/colls/allreduce/allreduce-smp-rsag-lr.cpp @@ -14,7 +14,7 @@ This fucntion performs all-reduce operation as follow. 3) allgather - inter between root of each SMP node 4) binomial_tree bcast inside each SMP node */ -int smpi_coll_tuned_allreduce_smp_rsag_lr(void *send_buf, void *recv_buf, +int Coll_allreduce_smp_rsag_lr::allreduce(void *send_buf, void *recv_buf, int count, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm) { diff --git a/src/smpi/colls/allreduce/allreduce-smp-rsag-rab.cpp b/src/smpi/colls/allreduce/allreduce-smp-rsag-rab.cpp index 021279555e..53cde44a07 100644 --- a/src/smpi/colls/allreduce/allreduce-smp-rsag-rab.cpp +++ b/src/smpi/colls/allreduce/allreduce-smp-rsag-rab.cpp @@ -18,7 +18,7 @@ This fucntion performs all-reduce operation as follow. 3) allgather - inter between root of each SMP node 4) binomial_tree bcast inside each SMP node */ -int smpi_coll_tuned_allreduce_smp_rsag_rab(void *sbuf, void *rbuf, int count, +int Coll_allreduce_smp_rsag_rab::allreduce(void *sbuf, void *rbuf, int count, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm) { diff --git a/src/smpi/colls/allreduce/allreduce-smp-rsag.cpp b/src/smpi/colls/allreduce/allreduce-smp-rsag.cpp index 4cb5d31b03..f5f5b75bf1 100644 --- a/src/smpi/colls/allreduce/allreduce-smp-rsag.cpp +++ b/src/smpi/colls/allreduce/allreduce-smp-rsag.cpp @@ -13,7 +13,7 @@ This fucntion performs all-reduce operation as follow. 3) allgather - inter between root of each SMP node 4) binomial_tree bcast inside each SMP node */ -int smpi_coll_tuned_allreduce_smp_rsag(void *send_buf, void *recv_buf, +int Coll_allreduce_smp_rsag::allreduce(void *send_buf, void *recv_buf, int count, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm) { diff --git a/src/smpi/colls/alltoall/alltoall-2dmesh.cpp b/src/smpi/colls/alltoall/alltoall-2dmesh.cpp index ca0702735e..bfc1bc455b 100644 --- a/src/smpi/colls/alltoall/alltoall-2dmesh.cpp +++ b/src/smpi/colls/alltoall/alltoall-2dmesh.cpp @@ -53,7 +53,7 @@ static int alltoall_check_is_2dmesh(int num, int *i, int *j) return 0; } -int smpi_coll_tuned_alltoall_2dmesh(void *send_buff, int send_count, +int Coll_alltoall_2dmesh::alltoall(void *send_buff, int send_count, MPI_Datatype send_type, void *recv_buff, int recv_count, MPI_Datatype recv_type, MPI_Comm comm) diff --git a/src/smpi/colls/alltoall/alltoall-3dmesh.cpp b/src/smpi/colls/alltoall/alltoall-3dmesh.cpp index 742998e92c..85415517f4 100644 --- a/src/smpi/colls/alltoall/alltoall-3dmesh.cpp +++ b/src/smpi/colls/alltoall/alltoall-3dmesh.cpp @@ -46,7 +46,7 @@ static int alltoall_check_is_3dmesh(int num, int *i, int *j, int *k) return 0; } -int smpi_coll_tuned_alltoall_3dmesh(void *send_buff, int send_count, +int Coll_alltoall_3dmesh::alltoall(void *send_buff, int send_count, MPI_Datatype send_type, void *recv_buff, int recv_count, MPI_Datatype recv_type, MPI_Comm comm) diff --git a/src/smpi/colls/alltoall/alltoall-basic-linear.cpp b/src/smpi/colls/alltoall/alltoall-basic-linear.cpp new file mode 100644 index 0000000000..723c5b6121 --- /dev/null +++ b/src/smpi/colls/alltoall/alltoall-basic-linear.cpp @@ -0,0 +1,68 @@ +/* Copyright (c) 2013-2017. The SimGrid Team. + * All rights reserved. */ + +/* This program is free software; you can redistribute it and/or modify it + * under the terms of the license (GNU LGPL) which comes with this package. */ + + #include "../colls_private.h" + +/*Naive and simple basic alltoall implementation. */ + + +namespace simgrid{ +namespace smpi{ + + +int Coll_alltoall_basic_linear::alltoall(void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm) +{ + int system_tag = 888; + int i; + int count; + MPI_Aint lb = 0, sendext = 0, recvext = 0; + MPI_Request *requests; + + /* Initialize. */ + int rank = comm->rank(); + int size = comm->size(); + XBT_DEBUG("<%d> algorithm alltoall_basic_linear() called.", rank); + sendtype->extent(&lb, &sendext); + recvtype->extent(&lb, &recvext); + /* simple optimization */ + int err = Datatype::copy(static_cast(sendbuf) + rank * sendcount * sendext, sendcount, sendtype, + static_cast(recvbuf) + rank * recvcount * recvext, recvcount, recvtype); + if (err == MPI_SUCCESS && size > 1) { + /* Initiate all send/recv to/from others. */ + requests = xbt_new(MPI_Request, 2 * (size - 1)); + /* Post all receives first -- a simple optimization */ + count = 0; + for (i = (rank + 1) % size; i != rank; i = (i + 1) % size) { + requests[count] = Request::irecv_init(static_cast(recvbuf) + i * recvcount * recvext, recvcount, + recvtype, i, system_tag, comm); + count++; + } + /* Now post all sends in reverse order + * - We would like to minimize the search time through message queue + * when messages actually arrive in the order in which they were posted. + * TODO: check the previous assertion + */ + for (i = (rank + size - 1) % size; i != rank; i = (i + size - 1) % size) { + requests[count] = Request::isend_init(static_cast(sendbuf) + i * sendcount * sendext, sendcount, + sendtype, i, system_tag, comm); + count++; + } + /* Wait for them all. */ + Request::startall(count, requests); + XBT_DEBUG("<%d> wait for %d requests", rank, count); + Request::waitall(count, requests, MPI_STATUS_IGNORE); + for(i = 0; i < count; i++) { + if(requests[i]!=MPI_REQUEST_NULL) + Request::unref(&requests[i]); + } + xbt_free(requests); + } + return err; +} + +} +} diff --git a/src/smpi/colls/alltoall/alltoall-bruck.cpp b/src/smpi/colls/alltoall/alltoall-bruck.cpp index 9c0e30cd2a..fa262b7ab2 100644 --- a/src/smpi/colls/alltoall/alltoall-bruck.cpp +++ b/src/smpi/colls/alltoall/alltoall-bruck.cpp @@ -24,8 +24,15 @@ * Auther: MPICH / modified by Ahmad Faraj ****************************************************************************/ + +#include "../colls_private.h" + +namespace simgrid{ +namespace smpi{ + + int -smpi_coll_tuned_alltoall_bruck(void *send_buff, int send_count, +Coll_alltoall_bruck::alltoall(void *send_buff, int send_count, MPI_Datatype send_type, void *recv_buff, int recv_count, MPI_Datatype recv_type, MPI_Comm comm) @@ -35,7 +42,7 @@ smpi_coll_tuned_alltoall_bruck(void *send_buff, int send_count, MPI_Datatype new_type; int *blocks_length, *disps; - int i, src, dst, rank, num_procs, count, remainder, block, position; + int i, src, dst, rank, num_procs, count, block, position; int pack_size, tag = COLL_TAG_ALLTOALL, pof2 = 1; @@ -79,7 +86,7 @@ smpi_coll_tuned_alltoall_bruck(void *send_buff, int send_count, } MPI_Type_indexed(count, blocks_length, disps, recv_type, &new_type); - smpi_datatype_commit(&new_type); + new_type->commit(); position = 0; MPI_Pack(recv_buff, 1, new_type, tmp_buff, pack_size, &position, comm); @@ -113,3 +120,6 @@ smpi_coll_tuned_alltoall_bruck(void *send_buff, int send_count, smpi_free_tmp_buffer(tmp_buff); return MPI_SUCCESS; } + +} +} diff --git a/src/smpi/colls/alltoall/alltoall-mvapich-scatter-dest.cpp b/src/smpi/colls/alltoall/alltoall-mvapich-scatter-dest.cpp index 047c876779..e812ea33d1 100644 --- a/src/smpi/colls/alltoall/alltoall-mvapich-scatter-dest.cpp +++ b/src/smpi/colls/alltoall/alltoall-mvapich-scatter-dest.cpp @@ -43,7 +43,7 @@ #include "../colls_private.h" -int smpi_coll_tuned_alltoall_mvapich2_scatter_dest( +int Coll_alltoall_mvapich2_scatter_dest::alltoall( void *sendbuf, int sendcount, MPI_Datatype sendtype, diff --git a/src/smpi/colls/alltoall/alltoall-pair-light-barrier.cpp b/src/smpi/colls/alltoall/alltoall-pair-light-barrier.cpp index 6b5f3e8d39..2299e719d6 100644 --- a/src/smpi/colls/alltoall/alltoall-pair-light-barrier.cpp +++ b/src/smpi/colls/alltoall/alltoall-pair-light-barrier.cpp @@ -28,7 +28,7 @@ ****************************************************************************/ int -smpi_coll_tuned_alltoall_pair_light_barrier(void *send_buff, int send_count, +Coll_alltoall_pair_light_barrier::alltoall(void *send_buff, int send_count, MPI_Datatype send_type, void *recv_buff, int recv_count, MPI_Datatype recv_type, diff --git a/src/smpi/colls/alltoall/alltoall-pair-mpi-barrier.cpp b/src/smpi/colls/alltoall/alltoall-pair-mpi-barrier.cpp index 2493ca433e..9c04f878f3 100644 --- a/src/smpi/colls/alltoall/alltoall-pair-mpi-barrier.cpp +++ b/src/smpi/colls/alltoall/alltoall-pair-mpi-barrier.cpp @@ -28,7 +28,7 @@ ****************************************************************************/ int -smpi_coll_tuned_alltoall_pair_mpi_barrier(void *send_buff, int send_count, +Coll_alltoall_pair_mpi_barrier::alltoall(void *send_buff, int send_count, MPI_Datatype send_type, void *recv_buff, int recv_count, MPI_Datatype recv_type, MPI_Comm comm) @@ -54,7 +54,7 @@ smpi_coll_tuned_alltoall_pair_mpi_barrier(void *send_buff, int send_count, for (i = 0; i < num_procs; i++) { src = dst = rank ^ i; - mpi_coll_barrier_fun(comm); + Colls::barrier(comm); Request::sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst, tag, recv_ptr + src * recv_chunk, recv_count, recv_type, src, tag, comm, &s); diff --git a/src/smpi/colls/alltoall/alltoall-pair-one-barrier.cpp b/src/smpi/colls/alltoall/alltoall-pair-one-barrier.cpp index 6a12962759..c18406e5ef 100644 --- a/src/smpi/colls/alltoall/alltoall-pair-one-barrier.cpp +++ b/src/smpi/colls/alltoall/alltoall-pair-one-barrier.cpp @@ -27,7 +27,7 @@ ****************************************************************************/ int -smpi_coll_tuned_alltoall_pair_one_barrier(void *send_buff, int send_count, +Coll_alltoall_pair_one_barrier::alltoall(void *send_buff, int send_count, MPI_Datatype send_type, void *recv_buff, int recv_count, MPI_Datatype recv_type, MPI_Comm comm) @@ -53,7 +53,7 @@ smpi_coll_tuned_alltoall_pair_one_barrier(void *send_buff, int send_count, send_chunk *= send_count; recv_chunk *= recv_count; - mpi_coll_barrier_fun(comm); + Colls::barrier(comm); for (i = 0; i < num_procs; i++) { src = dst = rank ^ i; Request::sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst, diff --git a/src/smpi/colls/alltoall/alltoall-pair.cpp b/src/smpi/colls/alltoall/alltoall-pair.cpp index 4d2956af3d..1f1f5ef9ba 100644 --- a/src/smpi/colls/alltoall/alltoall-pair.cpp +++ b/src/smpi/colls/alltoall/alltoall-pair.cpp @@ -28,7 +28,7 @@ ****************************************************************************/ -int smpi_coll_tuned_alltoall_pair_rma(void *send_buff, int send_count, MPI_Datatype send_type, +int Coll_alltoall_pair_rma::alltoall(void *send_buff, int send_count, MPI_Datatype send_type, void *recv_buff, int recv_count, MPI_Datatype recv_type, MPI_Comm comm) { @@ -62,7 +62,7 @@ int smpi_coll_tuned_alltoall_pair_rma(void *send_buff, int send_count, MPI_Datat } -int smpi_coll_tuned_alltoall_pair(void *send_buff, int send_count, +int Coll_alltoall_pair::alltoall(void *send_buff, int send_count, MPI_Datatype send_type, void *recv_buff, int recv_count, MPI_Datatype recv_type, MPI_Comm comm) diff --git a/src/smpi/colls/alltoall/alltoall-rdb.cpp b/src/smpi/colls/alltoall/alltoall-rdb.cpp index 92a0722e7d..f41ded7adf 100644 --- a/src/smpi/colls/alltoall/alltoall-rdb.cpp +++ b/src/smpi/colls/alltoall/alltoall-rdb.cpp @@ -27,7 +27,7 @@ * Auther: MPICH / slightly modified by Ahmad Faraj. ****************************************************************************/ -int smpi_coll_tuned_alltoall_rdb(void *send_buff, int send_count, +int Coll_alltoall_rdb::alltoall(void *send_buff, int send_count, MPI_Datatype send_type, void *recv_buff, int recv_count, MPI_Datatype recv_type, MPI_Comm comm) diff --git a/src/smpi/colls/alltoall/alltoall-ring-light-barrier.cpp b/src/smpi/colls/alltoall/alltoall-ring-light-barrier.cpp index f670da3b63..5292eeff23 100644 --- a/src/smpi/colls/alltoall/alltoall-ring-light-barrier.cpp +++ b/src/smpi/colls/alltoall/alltoall-ring-light-barrier.cpp @@ -28,7 +28,7 @@ ****************************************************************************/ int -smpi_coll_tuned_alltoall_ring_light_barrier(void *send_buff, int send_count, +Coll_alltoall_ring_light_barrier::alltoall(void *send_buff, int send_count, MPI_Datatype send_type, void *recv_buff, int recv_count, MPI_Datatype recv_type, diff --git a/src/smpi/colls/alltoall/alltoall-ring-mpi-barrier.cpp b/src/smpi/colls/alltoall/alltoall-ring-mpi-barrier.cpp index aefe70fe5b..9145378770 100644 --- a/src/smpi/colls/alltoall/alltoall-ring-mpi-barrier.cpp +++ b/src/smpi/colls/alltoall/alltoall-ring-mpi-barrier.cpp @@ -27,7 +27,7 @@ ****************************************************************************/ int -smpi_coll_tuned_alltoall_ring_mpi_barrier(void *send_buff, int send_count, +Coll_alltoall_ring_mpi_barrier::alltoall(void *send_buff, int send_count, MPI_Datatype send_type, void *recv_buff, int recv_count, MPI_Datatype recv_type, MPI_Comm comm) @@ -52,7 +52,7 @@ smpi_coll_tuned_alltoall_ring_mpi_barrier(void *send_buff, int send_count, src = (rank - i + num_procs) % num_procs; dst = (rank + i) % num_procs; - mpi_coll_barrier_fun(comm); + Colls::barrier(comm); Request::sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst, tag, recv_ptr + src * recv_chunk, recv_count, recv_type, src, tag, comm, &s); diff --git a/src/smpi/colls/alltoall/alltoall-ring-one-barrier.cpp b/src/smpi/colls/alltoall/alltoall-ring-one-barrier.cpp index 7ad212aa86..7f168b9dab 100644 --- a/src/smpi/colls/alltoall/alltoall-ring-one-barrier.cpp +++ b/src/smpi/colls/alltoall/alltoall-ring-one-barrier.cpp @@ -26,7 +26,7 @@ ****************************************************************************/ int -smpi_coll_tuned_alltoall_ring_one_barrier(void *send_buff, int send_count, +Coll_alltoall_ring_one_barrier::alltoall(void *send_buff, int send_count, MPI_Datatype send_type, void *recv_buff, int recv_count, MPI_Datatype recv_type, MPI_Comm comm) @@ -47,7 +47,7 @@ smpi_coll_tuned_alltoall_ring_one_barrier(void *send_buff, int send_count, send_chunk *= send_count; recv_chunk *= recv_count; - mpi_coll_barrier_fun(comm); + Colls::barrier(comm); for (i = 0; i < num_procs; i++) { src = (rank - i + num_procs) % num_procs; dst = (rank + i) % num_procs; diff --git a/src/smpi/colls/alltoall/alltoall-ring.cpp b/src/smpi/colls/alltoall/alltoall-ring.cpp index dea762e39b..9029408630 100644 --- a/src/smpi/colls/alltoall/alltoall-ring.cpp +++ b/src/smpi/colls/alltoall/alltoall-ring.cpp @@ -26,7 +26,7 @@ ****************************************************************************/ int -smpi_coll_tuned_alltoall_ring(void *send_buff, int send_count, +Coll_alltoall_ring::alltoall(void *send_buff, int send_count, MPI_Datatype send_type, void *recv_buff, int recv_count, MPI_Datatype recv_type, MPI_Comm comm) diff --git a/src/smpi/colls/alltoallv/alltoallv-bruck.cpp b/src/smpi/colls/alltoallv/alltoallv-bruck.cpp index 3a6f9e5a03..56d750d7e4 100644 --- a/src/smpi/colls/alltoallv/alltoallv-bruck.cpp +++ b/src/smpi/colls/alltoallv/alltoallv-bruck.cpp @@ -13,7 +13,7 @@ * FIXME: uh, check smpi_pmpi again, but this routine is called for > 12, not * less... **/ -int smpi_coll_tuned_alltoallv_bruck(void *sendbuf, int *sendcounts, int *senddisps, +int Coll_alltoallv_bruck::alltoallv(void *sendbuf, int *sendcounts, int *senddisps, MPI_Datatype sendtype, void *recvbuf, int *recvcounts, int *recvdisps, MPI_Datatype recvtype, MPI_Comm comm) @@ -80,7 +80,7 @@ int smpi_coll_tuned_alltoallv_bruck(void *sendbuf, int *sendcounts, int *senddis count++; } /* Wait for them all. */ - //smpi_mpi_startall(count, requests); + //Colls::startall(count, requests); XBT_DEBUG("<%d> wait for %d requests", rank, count); Request::waitall(count, requests, MPI_STATUSES_IGNORE); xbt_free(requests); diff --git a/src/smpi/colls/alltoallv/alltoallv-ompi-basic-linear.cpp b/src/smpi/colls/alltoallv/alltoallv-ompi-basic-linear.cpp index 0dfa76eb26..0ef5fde063 100644 --- a/src/smpi/colls/alltoallv/alltoallv-ompi-basic-linear.cpp +++ b/src/smpi/colls/alltoallv/alltoallv-ompi-basic-linear.cpp @@ -15,7 +15,7 @@ * GEF Oct05 after asking Jeff. */ int -smpi_coll_tuned_alltoallv_ompi_basic_linear(void *sbuf, int *scounts, int *sdisps, +Coll_alltoallv_ompi_basic_linear::alltoallv(void *sbuf, int *scounts, int *sdisps, MPI_Datatype sdtype, void *rbuf, int *rcounts, int *rdisps, MPI_Datatype rdtype, diff --git a/src/smpi/colls/alltoallv/alltoallv-pair-light-barrier.cpp b/src/smpi/colls/alltoallv/alltoallv-pair-light-barrier.cpp index 13b0396291..4159fdea7c 100644 --- a/src/smpi/colls/alltoallv/alltoallv-pair-light-barrier.cpp +++ b/src/smpi/colls/alltoallv/alltoallv-pair-light-barrier.cpp @@ -28,7 +28,7 @@ ****************************************************************************/ int -smpi_coll_tuned_alltoallv_pair_light_barrier(void *send_buff, int *send_counts, int *send_disps, +Coll_alltoallv_pair_light_barrier::alltoallv(void *send_buff, int *send_counts, int *send_disps, MPI_Datatype send_type, void *recv_buff, int *recv_counts, int *recv_disps, MPI_Datatype recv_type, diff --git a/src/smpi/colls/alltoallv/alltoallv-pair-mpi-barrier.cpp b/src/smpi/colls/alltoallv/alltoallv-pair-mpi-barrier.cpp index a24b0fb392..76b88726db 100644 --- a/src/smpi/colls/alltoallv/alltoallv-pair-mpi-barrier.cpp +++ b/src/smpi/colls/alltoallv/alltoallv-pair-mpi-barrier.cpp @@ -28,7 +28,7 @@ ****************************************************************************/ int -smpi_coll_tuned_alltoallv_pair_mpi_barrier(void *send_buff, int *send_counts, int *send_disps, +Coll_alltoallv_pair_mpi_barrier::alltoallv(void *send_buff, int *send_counts, int *send_disps, MPI_Datatype send_type, void *recv_buff, int *recv_counts, int *recv_disps, MPI_Datatype recv_type, MPI_Comm comm) @@ -51,7 +51,7 @@ smpi_coll_tuned_alltoallv_pair_mpi_barrier(void *send_buff, int *send_counts, in for (i = 0; i < num_procs; i++) { src = dst = rank ^ i; - smpi_mpi_barrier(comm); + Colls::barrier(comm); Request::sendrecv(send_ptr + send_disps[dst] * send_chunk, send_counts[dst], send_type, dst, tag, recv_ptr + recv_disps[src] * recv_chunk, recv_counts[src], recv_type, src, tag, comm, &s); diff --git a/src/smpi/colls/alltoallv/alltoallv-pair-one-barrier.cpp b/src/smpi/colls/alltoallv/alltoallv-pair-one-barrier.cpp index 2118658225..77187552ec 100644 --- a/src/smpi/colls/alltoallv/alltoallv-pair-one-barrier.cpp +++ b/src/smpi/colls/alltoallv/alltoallv-pair-one-barrier.cpp @@ -27,7 +27,7 @@ ****************************************************************************/ int -smpi_coll_tuned_alltoallv_pair_one_barrier(void *send_buff, int *send_counts, int *send_disps, +Coll_alltoallv_pair_one_barrier::alltoallv(void *send_buff, int *send_counts, int *send_disps, MPI_Datatype send_type, void *recv_buff, int *recv_counts, int *recv_disps, MPI_Datatype recv_type, MPI_Comm comm) { @@ -49,7 +49,7 @@ smpi_coll_tuned_alltoallv_pair_one_barrier(void *send_buff, int *send_counts, in send_chunk = send_type->get_extent(); recv_chunk = recv_type->get_extent(); - smpi_mpi_barrier(comm); + Colls::barrier(comm); for (i = 0; i < num_procs; i++) { src = dst = rank ^ i; Request::sendrecv(send_ptr + send_disps[dst] * send_chunk, send_counts[dst], send_type, dst, diff --git a/src/smpi/colls/alltoallv/alltoallv-pair.cpp b/src/smpi/colls/alltoallv/alltoallv-pair.cpp index c8e276c4a0..b2ebef67f0 100644 --- a/src/smpi/colls/alltoallv/alltoallv-pair.cpp +++ b/src/smpi/colls/alltoallv/alltoallv-pair.cpp @@ -27,7 +27,7 @@ * Auther: Ahmad Faraj ****************************************************************************/ -int smpi_coll_tuned_alltoallv_pair(void *send_buff, int *send_counts, int *send_disps, +int Coll_alltoallv_pair::alltoallv(void *send_buff, int *send_counts, int *send_disps, MPI_Datatype send_type, void *recv_buff, int *recv_counts, int *recv_disps, MPI_Datatype recv_type, MPI_Comm comm) diff --git a/src/smpi/colls/alltoallv/alltoallv-ring-light-barrier.cpp b/src/smpi/colls/alltoallv/alltoallv-ring-light-barrier.cpp index eb9ef77798..076771ae17 100644 --- a/src/smpi/colls/alltoallv/alltoallv-ring-light-barrier.cpp +++ b/src/smpi/colls/alltoallv/alltoallv-ring-light-barrier.cpp @@ -28,7 +28,7 @@ ****************************************************************************/ int -smpi_coll_tuned_alltoallv_ring_light_barrier(void *send_buff, int *send_counts, int *send_disps, +Coll_alltoallv_ring_light_barrier::alltoallv(void *send_buff, int *send_counts, int *send_disps, MPI_Datatype send_type, void *recv_buff, int *recv_counts, int *recv_disps, MPI_Datatype recv_type, diff --git a/src/smpi/colls/alltoallv/alltoallv-ring-mpi-barrier.cpp b/src/smpi/colls/alltoallv/alltoallv-ring-mpi-barrier.cpp index 840a9d5e0d..c4d616927f 100644 --- a/src/smpi/colls/alltoallv/alltoallv-ring-mpi-barrier.cpp +++ b/src/smpi/colls/alltoallv/alltoallv-ring-mpi-barrier.cpp @@ -27,7 +27,7 @@ ****************************************************************************/ int -smpi_coll_tuned_alltoallv_ring_mpi_barrier(void *send_buff, int *send_counts, int *send_disps, +Coll_alltoallv_ring_mpi_barrier::alltoallv(void *send_buff, int *send_counts, int *send_disps, MPI_Datatype send_type, void *recv_buff, int *recv_counts, int *recv_disps, MPI_Datatype recv_type, MPI_Comm comm) @@ -49,7 +49,7 @@ smpi_coll_tuned_alltoallv_ring_mpi_barrier(void *send_buff, int *send_counts, in src = (rank - i + num_procs) % num_procs; dst = (rank + i) % num_procs; - smpi_mpi_barrier(comm); + Colls::barrier(comm); Request::sendrecv(send_ptr + send_disps[dst] * send_chunk, send_counts[dst], send_type, dst, tag, recv_ptr + recv_disps[src] * recv_chunk, recv_counts[src], recv_type, src, tag, comm, &s); diff --git a/src/smpi/colls/alltoallv/alltoallv-ring-one-barrier.cpp b/src/smpi/colls/alltoallv/alltoallv-ring-one-barrier.cpp index 1bb68f7fe1..a73f029cf9 100644 --- a/src/smpi/colls/alltoallv/alltoallv-ring-one-barrier.cpp +++ b/src/smpi/colls/alltoallv/alltoallv-ring-one-barrier.cpp @@ -26,7 +26,7 @@ ****************************************************************************/ int -smpi_coll_tuned_alltoallv_ring_one_barrier(void *send_buff, int *send_counts, int *send_disps, +Coll_alltoallv_ring_one_barrier::alltoallv(void *send_buff, int *send_counts, int *send_disps, MPI_Datatype send_type, void *recv_buff, int *recv_counts, int *recv_disps, MPI_Datatype recv_type, MPI_Comm comm) @@ -44,7 +44,7 @@ smpi_coll_tuned_alltoallv_ring_one_barrier(void *send_buff, int *send_counts, in send_chunk = send_type->get_extent(); recv_chunk = recv_type->get_extent(); - smpi_mpi_barrier(comm); + Colls::barrier(comm); for (i = 0; i < num_procs; i++) { src = (rank - i + num_procs) % num_procs; dst = (rank + i) % num_procs; diff --git a/src/smpi/colls/alltoallv/alltoallv-ring.cpp b/src/smpi/colls/alltoallv/alltoallv-ring.cpp index 4ac63179aa..09efe30da5 100644 --- a/src/smpi/colls/alltoallv/alltoallv-ring.cpp +++ b/src/smpi/colls/alltoallv/alltoallv-ring.cpp @@ -26,7 +26,7 @@ ****************************************************************************/ int -smpi_coll_tuned_alltoallv_ring(void *send_buff, int *send_counts, int *send_disps, +Coll_alltoallv_ring::alltoallv(void *send_buff, int *send_counts, int *send_disps, MPI_Datatype send_type, void *recv_buff, int *recv_counts, int *recv_disps, MPI_Datatype recv_type, diff --git a/src/smpi/colls/barrier/barrier-mvapich2-pair.cpp b/src/smpi/colls/barrier/barrier-mvapich2-pair.cpp index bdcb4456d0..51bc6b6f04 100644 --- a/src/smpi/colls/barrier/barrier-mvapich2-pair.cpp +++ b/src/smpi/colls/barrier/barrier-mvapich2-pair.cpp @@ -42,7 +42,7 @@ #include "../colls_private.h" #include "../coll_tuned_topo.h" -int smpi_coll_tuned_barrier_mvapich2_pair(MPI_Comm comm) +int Coll_barrier_mvapich2_pair::barrier(MPI_Comm comm) { int size, rank; diff --git a/src/smpi/colls/barrier/barrier-ompi.cpp b/src/smpi/colls/barrier/barrier-ompi.cpp index 2b08bdf772..f57b2307ed 100644 --- a/src/smpi/colls/barrier/barrier-ompi.cpp +++ b/src/smpi/colls/barrier/barrier-ompi.cpp @@ -43,7 +43,7 @@ * synchronous gurantee made by last ring of sends are synchronous * */ -int smpi_coll_tuned_barrier_ompi_doublering(MPI_Comm comm +int Coll_barrier_ompi_doublering::barrier(MPI_Comm comm ) { int rank, size; @@ -104,7 +104,7 @@ int smpi_coll_tuned_barrier_ompi_doublering(MPI_Comm comm * To make synchronous, uses sync sends and sync sendrecvs */ -int smpi_coll_tuned_barrier_ompi_recursivedoubling(MPI_Comm comm +int Coll_barrier_ompi_recursivedoubling::barrier(MPI_Comm comm ) { int rank, size, adjsize; @@ -179,7 +179,7 @@ int smpi_coll_tuned_barrier_ompi_recursivedoubling(MPI_Comm comm * To make synchronous, uses sync sends and sync sendrecvs */ -int smpi_coll_tuned_barrier_ompi_bruck(MPI_Comm comm +int Coll_barrier_ompi_bruck::barrier(MPI_Comm comm ) { int rank, size; @@ -212,7 +212,7 @@ int smpi_coll_tuned_barrier_ompi_bruck(MPI_Comm comm * To make synchronous, uses sync sends and sync sendrecvs */ /* special case for two processes */ -int smpi_coll_tuned_barrier_ompi_two_procs(MPI_Comm comm +int Coll_barrier_ompi_two_procs::barrier(MPI_Comm comm ) { int remote; @@ -245,7 +245,7 @@ int smpi_coll_tuned_barrier_ompi_two_procs(MPI_Comm comm /* copied function (with appropriate renaming) starts here */ -int smpi_coll_tuned_barrier_ompi_basic_linear(MPI_Comm comm) +int Coll_barrier_ompi_basic_linear::barrier(MPI_Comm comm) { int i; int size = comm->size(); @@ -297,7 +297,7 @@ int smpi_coll_tuned_barrier_ompi_basic_linear(MPI_Comm comm) * Another recursive doubling type algorithm, but in this case * we go up the tree and back down the tree. */ -int smpi_coll_tuned_barrier_ompi_tree(MPI_Comm comm) +int Coll_barrier_ompi_tree::barrier(MPI_Comm comm) { int rank, size, depth; int jump, partner; diff --git a/src/smpi/colls/bcast/bcast-NTSB.cpp b/src/smpi/colls/bcast/bcast-NTSB.cpp index 74225c12d3..e7746da2cd 100644 --- a/src/smpi/colls/bcast/bcast-NTSB.cpp +++ b/src/smpi/colls/bcast/bcast-NTSB.cpp @@ -8,7 +8,7 @@ int bcast_NTSB_segment_size_in_byte = 8192; -int smpi_coll_tuned_bcast_NTSB(void *buf, int count, MPI_Datatype datatype, +int Coll_bcast_NTSB::bcast(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm) { int tag = COLL_TAG_BCAST; @@ -177,7 +177,7 @@ int smpi_coll_tuned_bcast_NTSB(void *buf, int count, MPI_Datatype datatype, /* when count is not divisible by block size, use default BCAST for the remainder */ if ((remainder != 0) && (count > segment)) { XBT_WARN("MPI_bcast_NTSB use default MPI_bcast."); - smpi_mpi_bcast((char *) buf + (pipe_length * increment), remainder, datatype, + Colls::bcast((char *) buf + (pipe_length * increment), remainder, datatype, root, comm); } diff --git a/src/smpi/colls/bcast/bcast-NTSL-Isend.cpp b/src/smpi/colls/bcast/bcast-NTSL-Isend.cpp index 78cb092044..ee37369f75 100644 --- a/src/smpi/colls/bcast/bcast-NTSL-Isend.cpp +++ b/src/smpi/colls/bcast/bcast-NTSL-Isend.cpp @@ -11,7 +11,7 @@ static int bcast_NTSL_segment_size_in_byte = 8192; /* Non-topology-specific pipelined linear-bcast function 0->1, 1->2 ,2->3, ....., ->last node : in a pipeline fashion */ -int smpi_coll_tuned_bcast_NTSL_Isend(void *buf, int count, MPI_Datatype datatype, +int Coll_bcast_NTSL_Isend::bcast(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm) { int tag = COLL_TAG_BCAST; @@ -124,7 +124,7 @@ int smpi_coll_tuned_bcast_NTSL_Isend(void *buf, int count, MPI_Datatype datatype /* when count is not divisible by block size, use default BCAST for the remainder */ if ((remainder != 0) && (count > segment)) { XBT_WARN("MPI_bcast_NTSL_Isend_nb use default MPI_bcast."); - smpi_mpi_bcast((char *) buf + (pipe_length * increment), remainder, datatype, + Colls::bcast((char *) buf + (pipe_length * increment), remainder, datatype, root, comm); } diff --git a/src/smpi/colls/bcast/bcast-NTSL.cpp b/src/smpi/colls/bcast/bcast-NTSL.cpp index 740efeb704..587e8da7d7 100644 --- a/src/smpi/colls/bcast/bcast-NTSL.cpp +++ b/src/smpi/colls/bcast/bcast-NTSL.cpp @@ -11,7 +11,7 @@ static int bcast_NTSL_segment_size_in_byte = 8192; /* Non-topology-specific pipelined linear-bcast function 0->1, 1->2 ,2->3, ....., ->last node : in a pipeline fashion */ -int smpi_coll_tuned_bcast_NTSL(void *buf, int count, MPI_Datatype datatype, +int Coll_bcast_NTSL::bcast(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm) { int tag = COLL_TAG_BCAST; @@ -124,7 +124,7 @@ int smpi_coll_tuned_bcast_NTSL(void *buf, int count, MPI_Datatype datatype, /* when count is not divisible by block size, use default BCAST for the remainder */ if ((remainder != 0) && (count > segment)) { XBT_WARN("MPI_bcast_arrival_NTSL use default MPI_bcast."); - smpi_mpi_bcast((char *) buf + (pipe_length * increment), remainder, datatype, + Colls::bcast((char *) buf + (pipe_length * increment), remainder, datatype, root, comm); } diff --git a/src/smpi/colls/bcast/bcast-SMP-binary.cpp b/src/smpi/colls/bcast/bcast-SMP-binary.cpp index fa9cf451ae..02971613fd 100644 --- a/src/smpi/colls/bcast/bcast-SMP-binary.cpp +++ b/src/smpi/colls/bcast/bcast-SMP-binary.cpp @@ -9,7 +9,7 @@ int bcast_SMP_binary_segment_byte = 8192; -int smpi_coll_tuned_bcast_SMP_binary(void *buf, int count, +int Coll_bcast_SMP_binary::bcast(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm) { @@ -33,7 +33,7 @@ int smpi_coll_tuned_bcast_SMP_binary(void *buf, int count, host_num_core = comm->get_intra_comm()->size(); }else{ //implementation buggy in this case - return smpi_coll_tuned_bcast_mpich( buf , count, datatype, + return Coll_bcast_mpich::bcast( buf , count, datatype, root, comm); } @@ -222,7 +222,7 @@ int smpi_coll_tuned_bcast_SMP_binary(void *buf, int count, // when count is not divisible by block size, use default BCAST for the remainder if ((remainder != 0) && (count > segment)) { XBT_WARN("MPI_bcast_SMP_binary use default MPI_bcast."); - smpi_mpi_bcast((char *) buf + (pipe_length * increment), remainder, datatype, + Colls::bcast((char *) buf + (pipe_length * increment), remainder, datatype, root, comm); } diff --git a/src/smpi/colls/bcast/bcast-SMP-binomial.cpp b/src/smpi/colls/bcast/bcast-SMP-binomial.cpp index 5ac33b6cd5..6dfed6a42f 100644 --- a/src/smpi/colls/bcast/bcast-SMP-binomial.cpp +++ b/src/smpi/colls/bcast/bcast-SMP-binomial.cpp @@ -6,7 +6,7 @@ #include "../colls_private.h" -int smpi_coll_tuned_bcast_SMP_binomial(void *buf, int count, +int Coll_bcast_SMP_binomial::bcast(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm) { @@ -27,7 +27,7 @@ int smpi_coll_tuned_bcast_SMP_binomial(void *buf, int count, num_core = comm->get_intra_comm()->size(); }else{ //implementation buggy in this case - return smpi_coll_tuned_bcast_mpich( buf , count, datatype, + return Coll_bcast_mpich::bcast( buf , count, datatype, root, comm); } diff --git a/src/smpi/colls/bcast/bcast-SMP-linear.cpp b/src/smpi/colls/bcast/bcast-SMP-linear.cpp index 356c53ac2a..2b3aee4023 100644 --- a/src/smpi/colls/bcast/bcast-SMP-linear.cpp +++ b/src/smpi/colls/bcast/bcast-SMP-linear.cpp @@ -8,7 +8,7 @@ int bcast_SMP_linear_segment_byte = 8192; -int smpi_coll_tuned_bcast_SMP_linear(void *buf, int count, +int Coll_bcast_SMP_linear::bcast(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm) { @@ -32,7 +32,7 @@ int smpi_coll_tuned_bcast_SMP_linear(void *buf, int count, num_core = comm->get_intra_comm()->size(); }else{ //implementation buggy in this case - return smpi_coll_tuned_bcast_mpich( buf , count, datatype, + return Coll_bcast_mpich::bcast( buf , count, datatype, root, comm); } @@ -53,7 +53,7 @@ int smpi_coll_tuned_bcast_SMP_linear(void *buf, int count, // call native when MPI communication size is too small if (size <= num_core) { XBT_WARN("MPI_bcast_SMP_linear use default MPI_bcast."); - smpi_mpi_bcast(buf, count, datatype, root, comm); + Coll_bcast_default::bcast(buf, count, datatype, root, comm); return MPI_SUCCESS; } // if root is not zero send to rank zero first @@ -170,7 +170,7 @@ int smpi_coll_tuned_bcast_SMP_linear(void *buf, int count, // when count is not divisible by block size, use default BCAST for the remainder if ((remainder != 0) && (count > segment)) { XBT_WARN("MPI_bcast_SMP_linear use default MPI_bcast."); - smpi_mpi_bcast((char *) buf + (pipe_length * increment), remainder, datatype, + Colls::bcast((char *) buf + (pipe_length * increment), remainder, datatype, root, comm); } diff --git a/src/smpi/colls/bcast/bcast-arrival-pattern-aware-wait.cpp b/src/smpi/colls/bcast/bcast-arrival-pattern-aware-wait.cpp index b070bef4e8..5baa7cf3e9 100644 --- a/src/smpi/colls/bcast/bcast-arrival-pattern-aware-wait.cpp +++ b/src/smpi/colls/bcast/bcast-arrival-pattern-aware-wait.cpp @@ -17,7 +17,7 @@ int bcast_arrival_pattern_aware_wait_segment_size_in_byte = 8192; #endif /* Non-topology-specific pipelined linear-bcast function */ -int smpi_coll_tuned_bcast_arrival_pattern_aware_wait(void *buf, int count, +int Coll_bcast_arrival_pattern_aware_wait::bcast(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm) { @@ -247,7 +247,7 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware_wait(void *buf, int count, /* when count is not divisible by block size, use default BCAST for the remainder */ if ((remainder != 0) && (count > segment)) { XBT_WARN("MPI_bcast_arrival_pattern_aware_wait use default MPI_bcast."); - smpi_mpi_bcast((char *)buf + (pipe_length * increment), remainder, datatype, root, comm); + Colls::bcast((char *)buf + (pipe_length * increment), remainder, datatype, root, comm); } return MPI_SUCCESS; diff --git a/src/smpi/colls/bcast/bcast-arrival-pattern-aware.cpp b/src/smpi/colls/bcast/bcast-arrival-pattern-aware.cpp index 81eb7ae73f..9138d9290d 100644 --- a/src/smpi/colls/bcast/bcast-arrival-pattern-aware.cpp +++ b/src/smpi/colls/bcast/bcast-arrival-pattern-aware.cpp @@ -12,7 +12,7 @@ static int bcast_NTSL_segment_size_in_byte = 8192; #define MAX_NODE 1024 /* Non-topology-specific pipelined linear-bcast function */ -int smpi_coll_tuned_bcast_arrival_pattern_aware(void *buf, int count, +int Coll_bcast_arrival_pattern_aware::bcast(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm) { @@ -357,7 +357,7 @@ int smpi_coll_tuned_bcast_arrival_pattern_aware(void *buf, int count, /* when count is not divisible by block size, use default BCAST for the remainder */ if ((remainder != 0) && (count > segment)) { XBT_WARN("MPI_bcast_arrival_pattern_aware use default MPI_bcast."); - smpi_mpi_bcast((char *)buf + (pipe_length * increment), remainder, datatype, root, comm); + Colls::bcast((char *)buf + (pipe_length * increment), remainder, datatype, root, comm); } return MPI_SUCCESS; diff --git a/src/smpi/colls/bcast/bcast-arrival-scatter.cpp b/src/smpi/colls/bcast/bcast-arrival-scatter.cpp index 2f0ab213c1..578b0f988a 100644 --- a/src/smpi/colls/bcast/bcast-arrival-scatter.cpp +++ b/src/smpi/colls/bcast/bcast-arrival-scatter.cpp @@ -15,7 +15,7 @@ #endif /* Non-topology-specific pipelined linear-bcast function */ -int smpi_coll_tuned_bcast_arrival_scatter(void *buf, int count, +int Coll_bcast_arrival_scatter::bcast(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm) { @@ -59,7 +59,7 @@ int smpi_coll_tuned_bcast_arrival_scatter(void *buf, int count, /* message too small */ if (count < size) { XBT_WARN("MPI_bcast_arrival_scatter use default MPI_bcast."); - smpi_mpi_bcast(buf, count, datatype, root, comm); + Colls::bcast(buf, count, datatype, root, comm); return MPI_SUCCESS; } diff --git a/src/smpi/colls/bcast/bcast-binomial-tree.cpp b/src/smpi/colls/bcast/bcast-binomial-tree.cpp index 0ab2538705..d73682e7fd 100644 --- a/src/smpi/colls/bcast/bcast-binomial-tree.cpp +++ b/src/smpi/colls/bcast/bcast-binomial-tree.cpp @@ -69,7 +69,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ****************************************************************************/ int -smpi_coll_tuned_bcast_binomial_tree(void *buff, int count, +Coll_bcast_binomial_tree::bcast(void *buff, int count, MPI_Datatype data_type, int root, MPI_Comm comm) { diff --git a/src/smpi/colls/bcast/bcast-flattree-pipeline.cpp b/src/smpi/colls/bcast/bcast-flattree-pipeline.cpp index bac37d28dd..3f283e91bd 100644 --- a/src/smpi/colls/bcast/bcast-flattree-pipeline.cpp +++ b/src/smpi/colls/bcast/bcast-flattree-pipeline.cpp @@ -9,7 +9,7 @@ int flattree_segment_in_byte = 8192; int -smpi_coll_tuned_bcast_flattree_pipeline(void *buff, int count, +Coll_bcast_flattree_pipeline::bcast(void *buff, int count, MPI_Datatype data_type, int root, MPI_Comm comm) { @@ -25,7 +25,7 @@ smpi_coll_tuned_bcast_flattree_pipeline(void *buff, int count, int increment = segment * extent; if (pipe_length==0) { XBT_WARN("MPI_bcast_flattree_pipeline use default MPI_bcast_flattree."); - return smpi_coll_tuned_bcast_flattree(buff, count, data_type, root, comm); + return Coll_bcast_flattree::bcast(buff, count, data_type, root, comm); } rank = comm->rank(); num_procs = comm->size(); diff --git a/src/smpi/colls/bcast/bcast-flattree.cpp b/src/smpi/colls/bcast/bcast-flattree.cpp index 2a307f5f17..b77a1864fa 100644 --- a/src/smpi/colls/bcast/bcast-flattree.cpp +++ b/src/smpi/colls/bcast/bcast-flattree.cpp @@ -7,7 +7,7 @@ #include "../colls_private.h" int -smpi_coll_tuned_bcast_flattree(void *buff, int count, MPI_Datatype data_type, +Coll_bcast_flattree::bcast(void *buff, int count, MPI_Datatype data_type, int root, MPI_Comm comm) { MPI_Request *req_ptr; diff --git a/src/smpi/colls/bcast/bcast-mvapich-smp.cpp b/src/smpi/colls/bcast/bcast-mvapich-smp.cpp index af641acf40..73e5f60d61 100644 --- a/src/smpi/colls/bcast/bcast-mvapich-smp.cpp +++ b/src/smpi/colls/bcast/bcast-mvapich-smp.cpp @@ -51,17 +51,17 @@ extern int mv2_intra_node_knomial_factor; extern int mv2_bcast_two_level_system_size; #define INTRA_NODE_ROOT 0 -#define MPIR_Pipelined_Bcast_Zcpy_MV2 smpi_coll_tuned_bcast_mpich -#define MPIR_Pipelined_Bcast_MV2 smpi_coll_tuned_bcast_mpich -#define MPIR_Bcast_binomial_MV2 smpi_coll_tuned_bcast_binomial_tree -#define MPIR_Bcast_scatter_ring_allgather_shm_MV2 smpi_coll_tuned_bcast_scatter_LR_allgather -#define MPIR_Bcast_scatter_doubling_allgather_MV2 smpi_coll_tuned_bcast_scatter_rdb_allgather -#define MPIR_Bcast_scatter_ring_allgather_MV2 smpi_coll_tuned_bcast_scatter_LR_allgather -#define MPIR_Shmem_Bcast_MV2 smpi_coll_tuned_bcast_mpich -#define MPIR_Bcast_tune_inter_node_helper_MV2 smpi_coll_tuned_bcast_mvapich2_inter_node -#define MPIR_Bcast_inter_node_helper_MV2 smpi_coll_tuned_bcast_mvapich2_inter_node -#define MPIR_Knomial_Bcast_intra_node_MV2 smpi_coll_tuned_bcast_mvapich2_knomial_intra_node -#define MPIR_Bcast_intra_MV2 smpi_coll_tuned_bcast_mvapich2_intra_node +#define MPIR_Pipelined_Bcast_Zcpy_MV2 Coll_bcast_mpich::bcast +#define MPIR_Pipelined_Bcast_MV2 Coll_bcast_mpich::bcast +#define MPIR_Bcast_binomial_MV2 Coll_bcast_binomial_tree::bcast +#define MPIR_Bcast_scatter_ring_allgather_shm_MV2 Coll_bcast_scatter_LR_allgather::bcast +#define MPIR_Bcast_scatter_doubling_allgather_MV2 Coll_bcast_scatter_rdb_allgather::bcast +#define MPIR_Bcast_scatter_ring_allgather_MV2 Coll_bcast_scatter_LR_allgather::bcast +#define MPIR_Shmem_Bcast_MV2 Coll_bcast_mpich::bcast +#define MPIR_Bcast_tune_inter_node_helper_MV2 Coll_bcast_mvapich2_inter_node::bcast +#define MPIR_Bcast_inter_node_helper_MV2 Coll_bcast_mvapich2_inter_node::bcast +#define MPIR_Knomial_Bcast_intra_node_MV2 Coll_bcast_mvapich2_knomial_intra_node::bcast +#define MPIR_Bcast_intra_MV2 Coll_bcast_mvapich2_intra_node::bcast extern int zcpy_knomial_factor; extern int mv2_pipelined_zcpy_knomial_factor; @@ -73,7 +73,7 @@ extern int mv2_intra_node_knomial_factor; #define mv2_bcast_large_msg 512*1024 #define mv2_knomial_intra_node_threshold 131072 #define mv2_scatter_rd_inter_leader_bcast 1 -int smpi_coll_tuned_bcast_mvapich2_inter_node(void *buffer, +int Coll_bcast_mvapich2_inter_node::bcast(void *buffer, int count, MPI_Datatype datatype, int root, @@ -91,11 +91,11 @@ int smpi_coll_tuned_bcast_mvapich2_inter_node(void *buffer, if (MV2_Bcast_function==NULL){ - MV2_Bcast_function=smpi_coll_tuned_bcast_mpich; + MV2_Bcast_function=Coll_bcast_mpich::bcast; } if (MV2_Bcast_intra_node_function==NULL){ - MV2_Bcast_intra_node_function= smpi_coll_tuned_bcast_mpich; + MV2_Bcast_intra_node_function= Coll_bcast_mpich::bcast; } if(comm->get_leaders_comm()==MPI_COMM_NULL){ @@ -168,7 +168,7 @@ int smpi_coll_tuned_bcast_mvapich2_inter_node(void *buffer, } -int smpi_coll_tuned_bcast_mvapich2_knomial_intra_node(void *buffer, +int Coll_bcast_mvapich2_knomial_intra_node::bcast(void *buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm) @@ -180,11 +180,11 @@ int smpi_coll_tuned_bcast_mvapich2_knomial_intra_node(void *buffer, int src, dst, mask, relative_rank; int k; if (MV2_Bcast_function==NULL){ - MV2_Bcast_function=smpi_coll_tuned_bcast_mpich; + MV2_Bcast_function=Coll_bcast_mpich::bcast; } if (MV2_Bcast_intra_node_function==NULL){ - MV2_Bcast_intra_node_function= smpi_coll_tuned_bcast_mpich; + MV2_Bcast_intra_node_function= Coll_bcast_mpich::bcast; } if(comm->get_leaders_comm()==MPI_COMM_NULL){ @@ -244,7 +244,7 @@ int smpi_coll_tuned_bcast_mvapich2_knomial_intra_node(void *buffer, } -int smpi_coll_tuned_bcast_mvapich2_intra_node(void *buffer, +int Coll_bcast_mvapich2_intra_node::bcast(void *buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm) @@ -261,11 +261,11 @@ int smpi_coll_tuned_bcast_mvapich2_intra_node(void *buffer, if (count == 0) return MPI_SUCCESS; if (MV2_Bcast_function==NULL){ - MV2_Bcast_function=smpi_coll_tuned_bcast_mpich; + MV2_Bcast_function=Coll_bcast_mpich::bcast; } if (MV2_Bcast_intra_node_function==NULL){ - MV2_Bcast_intra_node_function= smpi_coll_tuned_bcast_mpich; + MV2_Bcast_intra_node_function= Coll_bcast_mpich::bcast; } if(comm->get_leaders_comm()==MPI_COMM_NULL){ diff --git a/src/smpi/colls/bcast/bcast-ompi-pipeline.cpp b/src/smpi/colls/bcast/bcast-ompi-pipeline.cpp index 7f7893b23c..fe0ef548b6 100644 --- a/src/smpi/colls/bcast/bcast-ompi-pipeline.cpp +++ b/src/smpi/colls/bcast/bcast-ompi-pipeline.cpp @@ -10,7 +10,7 @@ #define MAXTREEFANOUT 32 -int smpi_coll_tuned_bcast_ompi_pipeline( void* buffer, +int Coll_bcast_ompi_pipeline::bcast( void* buffer, int original_count, MPI_Datatype datatype, int root, diff --git a/src/smpi/colls/bcast/bcast-ompi-split-bintree.cpp b/src/smpi/colls/bcast/bcast-ompi-split-bintree.cpp index 332d6cdda5..e5dffb31dc 100644 --- a/src/smpi/colls/bcast/bcast-ompi-split-bintree.cpp +++ b/src/smpi/colls/bcast/bcast-ompi-split-bintree.cpp @@ -60,7 +60,7 @@ #define MAXTREEFANOUT 32 int -smpi_coll_tuned_bcast_ompi_split_bintree ( void* buffer, +Coll_bcast_ompi_split_bintree::bcast ( void* buffer, int count, MPI_Datatype datatype, int root, @@ -134,7 +134,7 @@ smpi_coll_tuned_bcast_ompi_split_bintree ( void* buffer, (segsize > counts[0] * type_size) || (segsize > counts[1] * type_size) ) { /* call linear version here ! */ - return (smpi_coll_tuned_bcast_SMP_linear ( buffer, count, datatype, + return (Coll_bcast_SMP_linear::bcast ( buffer, count, datatype, root, comm)); } type_extent = datatype->get_extent(); diff --git a/src/smpi/colls/bcast/bcast-scatter-LR-allgather.cpp b/src/smpi/colls/bcast/bcast-scatter-LR-allgather.cpp index c9d2b20023..3649c038e5 100644 --- a/src/smpi/colls/bcast/bcast-scatter-LR-allgather.cpp +++ b/src/smpi/colls/bcast/bcast-scatter-LR-allgather.cpp @@ -68,7 +68,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ****************************************************************************/ int -smpi_coll_tuned_bcast_scatter_LR_allgather(void *buff, int count, +Coll_bcast_scatter_LR_allgather::bcast(void *buff, int count, MPI_Datatype data_type, int root, MPI_Comm comm) { diff --git a/src/smpi/colls/bcast/bcast-scatter-rdb-allgather.cpp b/src/smpi/colls/bcast/bcast-scatter-rdb-allgather.cpp index 7b98385f0b..9a94599edb 100644 --- a/src/smpi/colls/bcast/bcast-scatter-rdb-allgather.cpp +++ b/src/smpi/colls/bcast/bcast-scatter-rdb-allgather.cpp @@ -93,7 +93,7 @@ static int scatter_for_bcast( } int -smpi_coll_tuned_bcast_scatter_rdb_allgather ( +Coll_bcast_scatter_rdb_allgather::bcast ( void *buffer, int count, MPI_Datatype datatype, diff --git a/src/smpi/colls/colls.h b/src/smpi/colls/colls.h index 816a0659d1..35fbe1959b 100644 --- a/src/smpi/colls/colls.h +++ b/src/smpi/colls/colls.h @@ -14,13 +14,21 @@ SG_BEGIN_DECL() + +namespace simgrid{ +namespace smpi{ + #define COLL_DESCRIPTION(cat, ret, args, name) \ {# name,\ # cat " " # name " collective",\ - (void*)smpi_coll_tuned_ ## cat ## _ ## name} + (void*) Coll_ ## cat ## _ ## name::cat } #define COLL_PROTO(cat, ret, args, name) \ - ret smpi_coll_tuned_ ## cat ## _ ## name(COLL_UNPAREN args); +class Coll_ ## cat ## _ ## name : public Coll_ ## cat { \ +public: \ +static ret cat (COLL_UNPAREN args); \ +}; + #define COLL_UNPAREN(...) __VA_ARGS__ #define COLL_APPLY(action, sig, name) action(sig, name) @@ -31,12 +39,14 @@ SG_BEGIN_DECL() /************* * GATHER * *************/ + #define COLL_GATHER_SIG gather, int, \ (void *send_buff, int send_count, MPI_Datatype send_type, \ void *recv_buff, int recv_count, MPI_Datatype recv_type, \ int root, MPI_Comm comm) #define COLL_GATHERS(action, COLL_sep) \ +COLL_APPLY(action, COLL_GATHER_SIG, default) COLL_sep \ COLL_APPLY(action, COLL_GATHER_SIG, ompi) COLL_sep \ COLL_APPLY(action, COLL_GATHER_SIG, ompi_basic_linear) COLL_sep \ COLL_APPLY(action, COLL_GATHER_SIG, ompi_binomial) COLL_sep \ @@ -47,8 +57,6 @@ COLL_APPLY(action, COLL_GATHER_SIG, mvapich2_two_level) COLL_sep \ COLL_APPLY(action, COLL_GATHER_SIG, impi) COLL_sep \ COLL_APPLY(action, COLL_GATHER_SIG, automatic) - - COLL_GATHERS(COLL_PROTO, COLL_NOsep) /************* @@ -60,6 +68,7 @@ COLL_GATHERS(COLL_PROTO, COLL_NOsep) MPI_Comm comm) #define COLL_ALLGATHERS(action, COLL_sep) \ +COLL_APPLY(action, COLL_ALLGATHER_SIG, default) COLL_sep \ COLL_APPLY(action, COLL_ALLGATHER_SIG, 2dmesh) COLL_sep \ COLL_APPLY(action, COLL_ALLGATHER_SIG, 3dmesh) COLL_sep \ COLL_APPLY(action, COLL_ALLGATHER_SIG, bruck) COLL_sep \ @@ -82,7 +91,6 @@ COLL_APPLY(action, COLL_ALLGATHER_SIG, mpich) COLL_sep \ COLL_APPLY(action, COLL_ALLGATHER_SIG, impi) COLL_sep \ COLL_APPLY(action, COLL_ALLGATHER_SIG, automatic) - COLL_ALLGATHERS(COLL_PROTO, COLL_NOsep) /************** @@ -94,6 +102,7 @@ COLL_ALLGATHERS(COLL_PROTO, COLL_NOsep) MPI_Datatype recv_type, MPI_Comm comm) #define COLL_ALLGATHERVS(action, COLL_sep) \ +COLL_APPLY(action, COLL_ALLGATHERV_SIG, default) COLL_sep \ COLL_APPLY(action, COLL_ALLGATHERV_SIG, GB) COLL_sep \ COLL_APPLY(action, COLL_ALLGATHERV_SIG, pair) COLL_sep \ COLL_APPLY(action, COLL_ALLGATHERV_SIG, ring) COLL_sep \ @@ -117,6 +126,7 @@ COLL_ALLGATHERVS(COLL_PROTO, COLL_NOsep) MPI_Datatype dtype, MPI_Op op, MPI_Comm comm) #define COLL_ALLREDUCES(action, COLL_sep) \ +COLL_APPLY(action, COLL_ALLREDUCE_SIG, default) COLL_sep \ COLL_APPLY(action, COLL_ALLREDUCE_SIG, lr) COLL_sep \ COLL_APPLY(action, COLL_ALLREDUCE_SIG, rab1) COLL_sep \ COLL_APPLY(action, COLL_ALLREDUCE_SIG, rab2) COLL_sep \ @@ -141,7 +151,6 @@ COLL_APPLY(action, COLL_ALLREDUCE_SIG, automatic) COLL_ALLREDUCES(COLL_PROTO, COLL_NOsep) - /************ * ALLTOALL * ************/ @@ -151,6 +160,7 @@ COLL_ALLREDUCES(COLL_PROTO, COLL_NOsep) MPI_Comm comm) #define COLL_ALLTOALLS(action, COLL_sep) \ +COLL_APPLY(action, COLL_ALLTOALL_SIG, default) COLL_sep \ COLL_APPLY(action, COLL_ALLTOALL_SIG, 2dmesh) COLL_sep \ COLL_APPLY(action, COLL_ALLTOALL_SIG, 3dmesh) COLL_sep \ COLL_APPLY(action, COLL_ALLTOALL_SIG, basic_linear) COLL_sep \ @@ -183,6 +193,7 @@ COLL_ALLTOALLS(COLL_PROTO, COLL_NOsep) MPI_Comm comm) #define COLL_ALLTOALLVS(action, COLL_sep) \ +COLL_APPLY(action, COLL_ALLTOALLV_SIG, default) COLL_sep \ COLL_APPLY(action, COLL_ALLTOALLV_SIG, bruck) COLL_sep \ COLL_APPLY(action, COLL_ALLTOALLV_SIG, pair) COLL_sep \ COLL_APPLY(action, COLL_ALLTOALLV_SIG, pair_light_barrier) COLL_sep \ @@ -209,6 +220,7 @@ COLL_ALLTOALLVS(COLL_PROTO, COLL_NOsep) int root, MPI_Comm comm) #define COLL_BCASTS(action, COLL_sep) \ +COLL_APPLY(action, COLL_BCAST_SIG, default) COLL_sep \ COLL_APPLY(action, COLL_BCAST_SIG, arrival_pattern_aware) COLL_sep \ COLL_APPLY(action, COLL_BCAST_SIG, arrival_pattern_aware_wait) COLL_sep \ COLL_APPLY(action, COLL_BCAST_SIG, arrival_scatter) COLL_sep \ @@ -245,6 +257,7 @@ COLL_BCASTS(COLL_PROTO, COLL_NOsep) MPI_Op op, int root, MPI_Comm comm) #define COLL_REDUCES(action, COLL_sep) \ +COLL_APPLY(action, COLL_REDUCE_SIG, default) COLL_sep \ COLL_APPLY(action, COLL_REDUCE_SIG, arrival_pattern_aware) COLL_sep \ COLL_APPLY(action, COLL_REDUCE_SIG, binomial) COLL_sep \ COLL_APPLY(action, COLL_REDUCE_SIG, flat_tree) COLL_sep \ @@ -275,6 +288,7 @@ COLL_REDUCES(COLL_PROTO, COLL_NOsep) MPI_Datatype dtype,MPI_Op op,MPI_Comm comm) #define COLL_REDUCE_SCATTERS(action, COLL_sep) \ +COLL_APPLY(action, COLL_REDUCE_SCATTER_SIG, default) COLL_sep \ COLL_APPLY(action, COLL_REDUCE_SCATTER_SIG, ompi) COLL_sep \ COLL_APPLY(action, COLL_REDUCE_SCATTER_SIG, ompi_basic_recursivehalving) COLL_sep \ COLL_APPLY(action, COLL_REDUCE_SCATTER_SIG, ompi_ring) COLL_sep \ @@ -300,6 +314,7 @@ COLL_REDUCE_SCATTERS(COLL_PROTO, COLL_NOsep) int root, MPI_Comm comm) #define COLL_SCATTERS(action, COLL_sep) \ +COLL_APPLY(action, COLL_SCATTER_SIG, default) COLL_sep \ COLL_APPLY(action, COLL_SCATTER_SIG, ompi) COLL_sep \ COLL_APPLY(action, COLL_SCATTER_SIG, ompi_basic_linear) COLL_sep \ COLL_APPLY(action, COLL_SCATTER_SIG, ompi_binomial) COLL_sep \ @@ -319,6 +334,7 @@ COLL_SCATTERS(COLL_PROTO, COLL_NOsep) (MPI_Comm comm) #define COLL_BARRIERS(action, COLL_sep) \ +COLL_APPLY(action, COLL_BARRIER_SIG, default) COLL_sep \ COLL_APPLY(action, COLL_BARRIER_SIG, ompi) COLL_sep \ COLL_APPLY(action, COLL_BARRIER_SIG, ompi_basic_linear) COLL_sep \ COLL_APPLY(action, COLL_BARRIER_SIG, ompi_two_procs) COLL_sep \ @@ -334,6 +350,9 @@ COLL_APPLY(action, COLL_BARRIER_SIG, automatic) COLL_BARRIERS(COLL_PROTO, COLL_NOsep) +} +} + SG_END_DECL() #endif diff --git a/src/smpi/colls/gather/gather-mvapich.cpp b/src/smpi/colls/gather/gather-mvapich.cpp index 97e14116aa..9936d250fe 100644 --- a/src/smpi/colls/gather/gather-mvapich.cpp +++ b/src/smpi/colls/gather/gather-mvapich.cpp @@ -37,9 +37,13 @@ #include "../colls_private.h" -#define MPIR_Gather_MV2_Direct smpi_coll_tuned_gather_ompi_basic_linear -#define MPIR_Gather_MV2_two_level_Direct smpi_coll_tuned_gather_ompi_basic_linear -#define MPIR_Gather_intra smpi_coll_tuned_gather_mpich + + + + +#define MPIR_Gather_MV2_Direct Coll_gather_ompi_basic_linear::gather +#define MPIR_Gather_MV2_two_level_Direct Coll_gather_ompi_basic_linear::gather +#define MPIR_Gather_intra Coll_gather_mpich::gather typedef int (*MV2_Gather_function_ptr) (void *sendbuf, int sendcnt, MPI_Datatype sendtype, @@ -54,6 +58,10 @@ extern MV2_Gather_function_ptr MV2_Gather_intra_node_function; #define TEMP_BUF_HAS_NO_DATA (0) #define TEMP_BUF_HAS_DATA (1) + +namespace simgrid{ +namespace smpi{ + /* sendbuf - (in) sender's buffer * sendcnt - (in) sender's element count * sendtype - (in) sender's data type @@ -121,7 +129,8 @@ static int MPIR_pt_pt_intra_gather( void *sendbuf, int sendcnt, MPI_Datatype sen } -int smpi_coll_tuned_gather_mvapich2_two_level(void *sendbuf, + +int Coll_gather_mvapich2_two_level::gather(void *sendbuf, int sendcnt, MPI_Datatype sendtype, void *recvbuf, @@ -146,7 +155,7 @@ int smpi_coll_tuned_gather_mvapich2_two_level(void *sendbuf, //if not set (use of the algo directly, without mvapich2 selector) if(MV2_Gather_intra_node_function==NULL) - MV2_Gather_intra_node_function=smpi_coll_tuned_gather_mpich; + MV2_Gather_intra_node_function= Coll_gather_mpich::gather; if(comm->get_leaders_comm()==MPI_COMM_NULL){ comm->init_smp(); @@ -324,7 +333,7 @@ int smpi_coll_tuned_gather_mvapich2_two_level(void *sendbuf, recvcnts[i] = node_sizes[i] * recvcnt; } } - smpi_mpi_gatherv(tmp_buf, + Colls::gatherv(tmp_buf, local_size * nbytes, MPI_BYTE, recvbuf, recvcnts, displs, recvtype, @@ -342,7 +351,7 @@ int smpi_coll_tuned_gather_mvapich2_two_level(void *sendbuf, recvcnts[i] = node_sizes[i] * nbytes; } } - smpi_mpi_gatherv(tmp_buf, local_size * nbytes, + Colls::gatherv(tmp_buf, local_size * nbytes, MPI_BYTE, leader_gather_buf, recvcnts, displs, MPI_BYTE, leader_root, leader_comm); @@ -410,4 +419,6 @@ int smpi_coll_tuned_gather_mvapich2_two_level(void *sendbuf, return (mpi_errno); } +} +} diff --git a/src/smpi/colls/gather/gather-ompi.cpp b/src/smpi/colls/gather/gather-ompi.cpp index e5133fd925..ad99d3cae2 100644 --- a/src/smpi/colls/gather/gather-ompi.cpp +++ b/src/smpi/colls/gather/gather-ompi.cpp @@ -22,10 +22,11 @@ #include "../colls_private.h" #include "../coll_tuned_topo.h" -/* Todo: gather_intra_generic, gather_intra_binary, gather_intra_chain, - * gather_intra_pipeline, segmentation? */ -int -smpi_coll_tuned_gather_ompi_binomial(void *sbuf, int scount, +namespace simgrid{ +namespace smpi{ + + +int Coll_gather_ompi_binomial::gather(void *sbuf, int scount, MPI_Datatype sdtype, void *rbuf, int rcount, MPI_Datatype rdtype, @@ -194,8 +195,7 @@ smpi_coll_tuned_gather_ompi_binomial(void *sbuf, int scount, * Accepts: - same arguments as MPI_Gather(), first segment size * Returns: - MPI_SUCCESS or error code */ -int -smpi_coll_tuned_gather_ompi_linear_sync(void *sbuf, int scount, +int Coll_gather_ompi_linear_sync::gather(void *sbuf, int scount, MPI_Datatype sdtype, void *rbuf, int rcount, MPI_Datatype rdtype, @@ -353,8 +353,7 @@ smpi_coll_tuned_gather_ompi_linear_sync(void *sbuf, int scount, * Accepts: - same arguments as MPI_Gather() * Returns: - MPI_SUCCESS or error code */ -int -smpi_coll_tuned_gather_ompi_basic_linear(void *sbuf, int scount, +int Coll_gather_ompi_basic_linear::gather(void *sbuf, int scount, MPI_Datatype sdtype, void *rbuf, int rcount, MPI_Datatype rdtype, @@ -411,3 +410,6 @@ smpi_coll_tuned_gather_ompi_basic_linear(void *sbuf, int scount, return MPI_SUCCESS; } + +} +} diff --git a/src/smpi/colls/reduce/reduce-NTSL.cpp b/src/smpi/colls/reduce/reduce-NTSL.cpp index 22823123eb..167a2c326e 100644 --- a/src/smpi/colls/reduce/reduce-NTSL.cpp +++ b/src/smpi/colls/reduce/reduce-NTSL.cpp @@ -12,7 +12,7 @@ int reduce_NTSL_segment_size_in_byte = 8192; /* Non-topology-specific pipelined linear-bcast function 0->1, 1->2 ,2->3, ....., ->last node : in a pipeline fashion */ -int smpi_coll_tuned_reduce_NTSL(void *buf, void *rbuf, int count, +int Coll_reduce_NTSL::reduce(void *buf, void *rbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm) { @@ -142,7 +142,7 @@ int smpi_coll_tuned_reduce_NTSL(void *buf, void *rbuf, int count, /* when count is not divisible by block size, use default BCAST for the remainder */ if ((remainder != 0) && (count > segment)) { XBT_WARN("MPI_reduce_NTSL use default MPI_reduce."); - smpi_mpi_reduce((char *)buf + (pipe_length * increment), + Coll_reduce_default::reduce((char *)buf + (pipe_length * increment), (char *)rbuf + (pipe_length * increment), remainder, datatype, op, root, comm); } diff --git a/src/smpi/colls/reduce/reduce-arrival-pattern-aware.cpp b/src/smpi/colls/reduce/reduce-arrival-pattern-aware.cpp index 8e68f07a94..277034fdce 100644 --- a/src/smpi/colls/reduce/reduce-arrival-pattern-aware.cpp +++ b/src/smpi/colls/reduce/reduce-arrival-pattern-aware.cpp @@ -18,7 +18,7 @@ int reduce_arrival_pattern_aware_segment_size_in_byte = 8192; #endif /* Non-topology-specific pipelined linear-reduce function */ -int smpi_coll_tuned_reduce_arrival_pattern_aware(void *buf, void *rbuf, +int Coll_reduce_arrival_pattern_aware::reduce(void *buf, void *rbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, @@ -344,7 +344,7 @@ int smpi_coll_tuned_reduce_arrival_pattern_aware(void *buf, void *rbuf, /* when count is not divisible by block size, use default BCAST for the remainder */ if ((remainder != 0) && (count > segment)) { - smpi_mpi_reduce((char *)buf + (pipe_length * increment), + Coll_reduce_default::reduce((char *)buf + (pipe_length * increment), (char *)rbuf + (pipe_length * increment), remainder, datatype, op, root, comm); } diff --git a/src/smpi/colls/reduce/reduce-binomial.cpp b/src/smpi/colls/reduce/reduce-binomial.cpp index 05105b20f0..ac0b789fb4 100644 --- a/src/smpi/colls/reduce/reduce-binomial.cpp +++ b/src/smpi/colls/reduce/reduce-binomial.cpp @@ -8,7 +8,7 @@ //#include -int smpi_coll_tuned_reduce_binomial(void *sendbuf, void *recvbuf, int count, +int Coll_reduce_binomial::reduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm) { diff --git a/src/smpi/colls/reduce/reduce-flat-tree.cpp b/src/smpi/colls/reduce/reduce-flat-tree.cpp index 66e5ebe1a1..76f3c5bc36 100644 --- a/src/smpi/colls/reduce/reduce-flat-tree.cpp +++ b/src/smpi/colls/reduce/reduce-flat-tree.cpp @@ -8,7 +8,7 @@ //#include int -smpi_coll_tuned_reduce_flat_tree(void *sbuf, void *rbuf, int count, +Coll_reduce_flat_tree::reduce(void *sbuf, void *rbuf, int count, MPI_Datatype dtype, MPI_Op op, int root, MPI_Comm comm) { diff --git a/src/smpi/colls/reduce/reduce-mvapich-knomial.cpp b/src/smpi/colls/reduce/reduce-mvapich-knomial.cpp index c23f09451b..cbcc1e4b1d 100644 --- a/src/smpi/colls/reduce/reduce-mvapich-knomial.cpp +++ b/src/smpi/colls/reduce/reduce-mvapich-knomial.cpp @@ -114,7 +114,7 @@ static int MPIR_Reduce_knomial_trace(int root, int reduce_knomial_factor, return 0; } -int smpi_coll_tuned_reduce_mvapich2_knomial ( +int Coll_reduce_mvapich2_knomial::reduce ( void *sendbuf, void *recvbuf, int count, diff --git a/src/smpi/colls/reduce/reduce-mvapich-two-level.cpp b/src/smpi/colls/reduce/reduce-mvapich-two-level.cpp index 4905ae0a68..d2c5b63820 100644 --- a/src/smpi/colls/reduce/reduce-mvapich-two-level.cpp +++ b/src/smpi/colls/reduce/reduce-mvapich-two-level.cpp @@ -42,11 +42,11 @@ #define SHMEM_COLL_BLOCK_SIZE (local_size * mv2_g_shmem_coll_max_msg_size) #define mv2_use_knomial_reduce 1 -#define MPIR_Reduce_inter_knomial_wrapper_MV2 smpi_coll_tuned_reduce_mvapich2_knomial -#define MPIR_Reduce_intra_knomial_wrapper_MV2 smpi_coll_tuned_reduce_mvapich2_knomial -#define MPIR_Reduce_binomial_MV2 smpi_coll_tuned_reduce_binomial -#define MPIR_Reduce_redscat_gather_MV2 smpi_coll_tuned_reduce_scatter_gather -#define MPIR_Reduce_shmem_MV2 smpi_coll_tuned_reduce_ompi_basic_linear +#define MPIR_Reduce_inter_knomial_wrapper_MV2 Coll_reduce_mvapich2_knomial::reduce +#define MPIR_Reduce_intra_knomial_wrapper_MV2 Coll_reduce_mvapich2_knomial::reduce +#define MPIR_Reduce_binomial_MV2 Coll_reduce_binomial::reduce +#define MPIR_Reduce_redscat_gather_MV2 Coll_reduce_scatter_gather::reduce +#define MPIR_Reduce_shmem_MV2 Coll_reduce_ompi_basic_linear::reduce extern int (*MV2_Reduce_function)( void *sendbuf, void *recvbuf, @@ -72,7 +72,7 @@ static int (*reduce_fn)(void *sendbuf, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm); -int smpi_coll_tuned_reduce_mvapich2_two_level( void *sendbuf, +int Coll_reduce_mvapich2_two_level::reduce( void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, @@ -92,9 +92,9 @@ int smpi_coll_tuned_reduce_mvapich2_two_level( void *sendbuf, //if not set (use of the algo directly, without mvapich2 selector) if(MV2_Reduce_function==NULL) - MV2_Reduce_function=smpi_coll_tuned_reduce_mpich; + MV2_Reduce_function=Coll_reduce_mpich::reduce; if(MV2_Reduce_intra_function==NULL) - MV2_Reduce_intra_function=smpi_coll_tuned_reduce_mpich; + MV2_Reduce_intra_function=Coll_reduce_mpich::reduce; if(comm->get_leaders_comm()==MPI_COMM_NULL){ comm->init_smp(); diff --git a/src/smpi/colls/reduce/reduce-ompi.cpp b/src/smpi/colls/reduce/reduce-ompi.cpp index da244bc837..e7c465ca01 100644 --- a/src/smpi/colls/reduce/reduce-ompi.cpp +++ b/src/smpi/colls/reduce/reduce-ompi.cpp @@ -324,7 +324,7 @@ int smpi_coll_tuned_ompi_reduce_generic( void* sendbuf, void* recvbuf, int origi meaning that at least one datatype must fit in the segment ! */ -int smpi_coll_tuned_reduce_ompi_chain( void *sendbuf, void *recvbuf, int count, +int Coll_reduce_ompi_chain::reduce( void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm @@ -352,7 +352,7 @@ int smpi_coll_tuned_reduce_ompi_chain( void *sendbuf, void *recvbuf, int count, } -int smpi_coll_tuned_reduce_ompi_pipeline( void *sendbuf, void *recvbuf, +int Coll_reduce_ompi_pipeline::reduce( void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm ) @@ -397,7 +397,7 @@ int smpi_coll_tuned_reduce_ompi_pipeline( void *sendbuf, void *recvbuf, segcount, 0); } -int smpi_coll_tuned_reduce_ompi_binary( void *sendbuf, void *recvbuf, +int Coll_reduce_ompi_binary::reduce( void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm) @@ -428,7 +428,7 @@ int smpi_coll_tuned_reduce_ompi_binary( void *sendbuf, void *recvbuf, segcount, 0); } -int smpi_coll_tuned_reduce_ompi_binomial( void *sendbuf, void *recvbuf, +int Coll_reduce_ompi_binomial::reduce( void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm) @@ -476,7 +476,7 @@ int smpi_coll_tuned_reduce_ompi_binomial( void *sendbuf, void *recvbuf, * Acecpts: same as MPI_Reduce() * Returns: MPI_SUCCESS or error code */ -int smpi_coll_tuned_reduce_ompi_in_order_binary( void *sendbuf, void *recvbuf, +int Coll_reduce_ompi_in_order_binary::reduce( void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, @@ -589,7 +589,7 @@ int smpi_coll_tuned_reduce_ompi_in_order_binary( void *sendbuf, void *recvbuf, */ int -smpi_coll_tuned_reduce_ompi_basic_linear(void *sbuf, void *rbuf, int count, +Coll_reduce_ompi_basic_linear::reduce(void *sbuf, void *rbuf, int count, MPI_Datatype dtype, MPI_Op op, int root, diff --git a/src/smpi/colls/reduce/reduce-rab.cpp b/src/smpi/colls/reduce/reduce-rab.cpp index 3a0c2c084f..22f34a05d3 100644 --- a/src/smpi/colls/reduce/reduce-rab.cpp +++ b/src/smpi/colls/reduce/reduce-rab.cpp @@ -65,7 +65,7 @@ Exa.: size=13 ==> n=3, r=5 (i.e. size == 13 == 2**n+r == 2**3 + 5) - The algoritm needs for the execution of one mpi_coll_reduce_fun + The algoritm needs for the execution of one Colls::reduce - for r==0 exec_time = n*(L1+L2) + buf_lng * (1-1/2**n) * (T1 + T2 + O/d) @@ -207,7 +207,7 @@ Step 5.n) 7: { [(a+b)+(c+d)] + [(e+f)+(g+h)] } + { [(i+j)+k] + [l+m] } for H -For mpi_coll_allreduce_fun: +For Colls::allreduce: ------------------ Step 6.1) @@ -249,7 +249,7 @@ Step 7) on all nodes 0..12 -For mpi_coll_reduce_fun: +For Colls::reduce: --------------- Step 6.0) @@ -921,19 +921,19 @@ static int MPI_I_anyReduce(void* Sendbuf, void* Recvbuf, int count, MPI_Datatype } /* new_prot */ /*otherwise:*/ if (is_all) - return( mpi_coll_allreduce_fun(Sendbuf, Recvbuf, count, mpi_datatype, mpi_op, comm) ); + return( Colls::allreduce(Sendbuf, Recvbuf, count, mpi_datatype, mpi_op, comm) ); else - return( mpi_coll_reduce_fun(Sendbuf,Recvbuf, count,mpi_datatype,mpi_op, root, comm) ); + return( Colls::reduce(Sendbuf,Recvbuf, count,mpi_datatype,mpi_op, root, comm) ); } #endif /*REDUCE_LIMITS*/ -int smpi_coll_tuned_reduce_rab(void* Sendbuf, void* Recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm) +int Coll_reduce_rab::reduce(void* Sendbuf, void* Recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm) { return( MPI_I_anyReduce(Sendbuf, Recvbuf, count, datatype, op, root, comm, 0) ); } -int smpi_coll_tuned_allreduce_rab(void* Sendbuf, void* Recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) +int Coll_allreduce_rab::allreduce(void* Sendbuf, void* Recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) { return( MPI_I_anyReduce(Sendbuf, Recvbuf, count, datatype, op, -1, comm, 1) ); } diff --git a/src/smpi/colls/reduce/reduce-scatter-gather.cpp b/src/smpi/colls/reduce/reduce-scatter-gather.cpp index 105a490780..b8ac14235c 100644 --- a/src/smpi/colls/reduce/reduce-scatter-gather.cpp +++ b/src/smpi/colls/reduce/reduce-scatter-gather.cpp @@ -11,7 +11,7 @@ Author: MPICH */ -int smpi_coll_tuned_reduce_scatter_gather(void *sendbuf, void *recvbuf, +int Coll_reduce_scatter_gather::reduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm) { diff --git a/src/smpi/colls/reduce_scatter/reduce_scatter-mpich.cpp b/src/smpi/colls/reduce_scatter/reduce_scatter-mpich.cpp index f2387ee76b..e10e5b63a7 100644 --- a/src/smpi/colls/reduce_scatter/reduce_scatter-mpich.cpp +++ b/src/smpi/colls/reduce_scatter/reduce_scatter-mpich.cpp @@ -22,7 +22,7 @@ static inline int MPIU_Mirror_permutation(unsigned int x, int bits) } -int smpi_coll_tuned_reduce_scatter_mpich_pair(void *sendbuf, void *recvbuf, int recvcounts[], +int Coll_reduce_scatter_mpich_pair::reduce_scatter(void *sendbuf, void *recvbuf, int recvcounts[], MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) { int rank, comm_size, i; @@ -148,7 +148,7 @@ int smpi_coll_tuned_reduce_scatter_mpich_pair(void *sendbuf, void *recvbuf, int } -int smpi_coll_tuned_reduce_scatter_mpich_noncomm(void *sendbuf, void *recvbuf, int recvcounts[], +int Coll_reduce_scatter_mpich_noncomm::reduce_scatter(void *sendbuf, void *recvbuf, int recvcounts[], MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) { int mpi_errno = MPI_SUCCESS; @@ -266,7 +266,7 @@ int smpi_coll_tuned_reduce_scatter_mpich_noncomm(void *sendbuf, void *recvbuf, i -int smpi_coll_tuned_reduce_scatter_mpich_rdb(void *sendbuf, void *recvbuf, int recvcounts[], +int Coll_reduce_scatter_mpich_rdb::reduce_scatter(void *sendbuf, void *recvbuf, int recvcounts[], MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) { int rank, comm_size, i; diff --git a/src/smpi/colls/reduce_scatter/reduce_scatter-ompi.cpp b/src/smpi/colls/reduce_scatter/reduce_scatter-ompi.cpp index 0afa9b72cc..ed96d6f7f0 100644 --- a/src/smpi/colls/reduce_scatter/reduce_scatter-ompi.cpp +++ b/src/smpi/colls/reduce_scatter/reduce_scatter-ompi.cpp @@ -42,7 +42,7 @@ * Limitation: - Works only for commutative operations. */ int -smpi_coll_tuned_reduce_scatter_ompi_basic_recursivehalving(void *sbuf, +Coll_reduce_scatter_ompi_basic_recursivehalving::reduce_scatter(void *sbuf, void *rbuf, int *rcounts, MPI_Datatype dtype, @@ -301,7 +301,7 @@ smpi_coll_tuned_reduce_scatter_ompi_basic_recursivehalving(void *sbuf, /* - * smpi_coll_tuned_reduce_scatter_ompi_ring + * Coll_reduce_scatter_ompi_ring::reduce_scatter * * Function: Ring algorithm for reduce_scatter operation * Accepts: Same as MPI_Reduce_scatter() @@ -360,7 +360,7 @@ smpi_coll_tuned_reduce_scatter_ompi_basic_recursivehalving(void *sbuf, * */ int -smpi_coll_tuned_reduce_scatter_ompi_ring(void *sbuf, void *rbuf, int *rcounts, +Coll_reduce_scatter_ompi_ring::reduce_scatter(void *sbuf, void *rbuf, int *rcounts, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm diff --git a/src/smpi/colls/scatter/scatter-mvapich-two-level.cpp b/src/smpi/colls/scatter/scatter-mvapich-two-level.cpp index e76d75625d..d2cb88f90b 100644 --- a/src/smpi/colls/scatter/scatter-mvapich-two-level.cpp +++ b/src/smpi/colls/scatter/scatter-mvapich-two-level.cpp @@ -36,14 +36,14 @@ */ #include "../colls_private.h" -#define MPIR_Scatter_MV2_Binomial smpi_coll_tuned_scatter_ompi_binomial -#define MPIR_Scatter_MV2_Direct smpi_coll_tuned_scatter_ompi_basic_linear +#define MPIR_Scatter_MV2_Binomial Coll_scatter_ompi_binomial::scatter +#define MPIR_Scatter_MV2_Direct Coll_scatter_ompi_basic_linear::scatter extern int (*MV2_Scatter_intra_function) (void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm); -int smpi_coll_tuned_scatter_mvapich2_two_level_direct(void *sendbuf, +int Coll_scatter_mvapich2_two_level_direct::scatter(void *sendbuf, int sendcnt, MPI_Datatype sendtype, void *recvbuf, @@ -63,7 +63,7 @@ int smpi_coll_tuned_scatter_mvapich2_two_level_direct(void *sendbuf, MPI_Comm shmem_comm, leader_comm; //if not set (use of the algo directly, without mvapich2 selector) if(MV2_Scatter_intra_function==NULL) - MV2_Scatter_intra_function=smpi_coll_tuned_scatter_mpich; + MV2_Scatter_intra_function=Coll_scatter_mpich::scatter; if(comm->get_leaders_comm()==MPI_COMM_NULL){ comm->init_smp(); @@ -158,7 +158,7 @@ int smpi_coll_tuned_scatter_mvapich2_two_level_direct(void *sendbuf, sendcnts[i] = node_sizes[i] * nbytes; } } - smpi_mpi_scatterv(leader_scatter_buf, sendcnts, displs, + Colls::scatterv(leader_scatter_buf, sendcnts, displs, MPI_BYTE, tmp_buf, nbytes * local_size, MPI_BYTE, leader_root, leader_comm); } else { @@ -174,7 +174,7 @@ int smpi_coll_tuned_scatter_mvapich2_two_level_direct(void *sendbuf, sendcnts[i] = node_sizes[i] * sendcnt; } } - smpi_mpi_scatterv(sendbuf, sendcnts, displs, + Colls::scatterv(sendbuf, sendcnts, displs, sendtype, tmp_buf, nbytes * local_size, MPI_BYTE, leader_root, leader_comm); @@ -225,7 +225,7 @@ int smpi_coll_tuned_scatter_mvapich2_two_level_direct(void *sendbuf, } -int smpi_coll_tuned_scatter_mvapich2_two_level_binomial(void *sendbuf, +int Coll_scatter_mvapich2_two_level_binomial::scatter(void *sendbuf, int sendcnt, MPI_Datatype sendtype, void *recvbuf, @@ -247,7 +247,7 @@ int smpi_coll_tuned_scatter_mvapich2_two_level_binomial(void *sendbuf, //if not set (use of the algo directly, without mvapich2 selector) if(MV2_Scatter_intra_function==NULL) - MV2_Scatter_intra_function=smpi_coll_tuned_scatter_mpich; + MV2_Scatter_intra_function=Coll_scatter_mpich::scatter; if(comm->get_leaders_comm()==MPI_COMM_NULL){ comm->init_smp(); @@ -339,7 +339,7 @@ int smpi_coll_tuned_scatter_mvapich2_two_level_binomial(void *sendbuf, sendcnts[i] = node_sizes[i] * nbytes; } } - smpi_mpi_scatterv(leader_scatter_buf, sendcnts, displs, + Colls::scatterv(leader_scatter_buf, sendcnts, displs, MPI_BYTE, tmp_buf, nbytes * local_size, MPI_BYTE, leader_root, leader_comm); } else { @@ -355,7 +355,7 @@ int smpi_coll_tuned_scatter_mvapich2_two_level_binomial(void *sendbuf, sendcnts[i] = node_sizes[i] * sendcnt; } } - smpi_mpi_scatterv(sendbuf, sendcnts, displs, + Colls::scatterv(sendbuf, sendcnts, displs, sendtype, tmp_buf, nbytes * local_size, MPI_BYTE, leader_root, leader_comm); diff --git a/src/smpi/colls/scatter/scatter-ompi.cpp b/src/smpi/colls/scatter/scatter-ompi.cpp index 1aee0d7392..04bf1aefec 100644 --- a/src/smpi/colls/scatter/scatter-ompi.cpp +++ b/src/smpi/colls/scatter/scatter-ompi.cpp @@ -25,7 +25,7 @@ int -smpi_coll_tuned_scatter_ompi_binomial(void *sbuf, int scount, +Coll_scatter_ompi_binomial::scatter(void *sbuf, int scount, MPI_Datatype sdtype, void *rbuf, int rcount, MPI_Datatype rdtype, @@ -51,7 +51,7 @@ smpi_coll_tuned_scatter_ompi_binomial(void *sbuf, int scount, rank = comm->rank(); XBT_DEBUG( - "smpi_coll_tuned_scatter_ompi_binomial rank %d", rank); + "Coll_scatter_ompi_binomial::scatter rank %d", rank); /* create the binomial tree */ @@ -191,7 +191,7 @@ smpi_coll_tuned_scatter_ompi_binomial(void *sbuf, int scount, * Returns: - MPI_SUCCESS or error code */ int -smpi_coll_tuned_scatter_ompi_basic_linear(void *sbuf, int scount, +Coll_scatter_ompi_basic_linear::scatter(void *sbuf, int scount, MPI_Datatype sdtype, void *rbuf, int rcount, MPI_Datatype rdtype, diff --git a/src/smpi/colls/smpi_automatic_selector.cpp b/src/smpi/colls/smpi_automatic_selector.cpp index f6e5240ffc..af06809efe 100644 --- a/src/smpi/colls/smpi_automatic_selector.cpp +++ b/src/smpi/colls/smpi_automatic_selector.cpp @@ -23,9 +23,8 @@ new_pajeNewEvent (SIMIX_get_clock(), PJ_container_get(cont_name), type, value);\ } - #define AUTOMATIC_COLL_BENCH(cat, ret, args, args2)\ - ret smpi_coll_tuned_ ## cat ## _ ## automatic(COLL_UNPAREN args)\ + ret Coll_ ## cat ## _automatic:: cat (COLL_UNPAREN args)\ {\ double time1, time2, time_min=DBL_MAX;\ int min_coll=-1, global_coll=-1;\ @@ -34,7 +33,7 @@ for (i = 0; mpi_coll_##cat##_description[i].name; i++){\ if(!strcmp(mpi_coll_##cat##_description[i].name, "automatic"))continue;\ if(!strcmp(mpi_coll_##cat##_description[i].name, "default"))continue;\ - smpi_mpi_barrier(comm);\ + Coll_barrier_default::barrier(comm);\ TRACE_AUTO_COLL(cat)\ time1 = SIMIX_get_clock();\ try {\ @@ -46,7 +45,7 @@ }\ time2 = SIMIX_get_clock();\ buf_out=time2-time1;\ - smpi_mpi_reduce((void*)&buf_out,(void*)&buf_in, 1, MPI_DOUBLE, MPI_MAX, 0,comm );\ + Coll_reduce_default::reduce((void*)&buf_out,(void*)&buf_in, 1, MPI_DOUBLE, MPI_MAX, 0,comm );\ if(time2-time1rank(), time_min);\ return (min_coll!=-1)?MPI_SUCCESS:MPI_ERR_INTERN;\ -}\ +} +namespace simgrid{ +namespace smpi{ COLL_APPLY(AUTOMATIC_COLL_BENCH, COLL_ALLGATHERV_SIG, (send_buff, send_count, send_type, recv_buff, recv_count, recv_disps, recv_type, comm)); COLL_APPLY(AUTOMATIC_COLL_BENCH, COLL_ALLREDUCE_SIG, (sbuf, rbuf, rcount, dtype, op, comm)); @@ -77,3 +78,6 @@ COLL_APPLY(AUTOMATIC_COLL_BENCH, COLL_REDUCE_SIG,(buf,rbuf, count, datatype, op, COLL_APPLY(AUTOMATIC_COLL_BENCH, COLL_REDUCE_SCATTER_SIG ,(sbuf,rbuf, rcounts,dtype,op,comm)); COLL_APPLY(AUTOMATIC_COLL_BENCH, COLL_SCATTER_SIG ,(sendbuf, sendcount, sendtype,recvbuf, recvcount, recvtype,root, comm)); COLL_APPLY(AUTOMATIC_COLL_BENCH, COLL_BARRIER_SIG,(comm)); + +} +} diff --git a/src/smpi/colls/smpi_default_selector.cpp b/src/smpi/colls/smpi_default_selector.cpp new file mode 100644 index 0000000000..65911df302 --- /dev/null +++ b/src/smpi/colls/smpi_default_selector.cpp @@ -0,0 +1,349 @@ +/* selector with default/naive Simgrid algorithms. These should not be trusted for performance evaluations */ + +/* Copyright (c) 2009-2010, 2013-2014. The SimGrid Team. + * All rights reserved. */ + +/* This program is free software; you can redistribute it and/or modify it + * under the terms of the license (GNU LGPL) which comes with this package. */ + +#include "colls_private.h" + +namespace simgrid{ +namespace smpi{ + +int Coll_bcast_default::bcast(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm) +{ + return Coll_bcast_binomial_tree::bcast(buf, count, datatype, root, comm); +} + +int Coll_barrier_default::barrier(MPI_Comm comm) +{ + return Coll_barrier_ompi_basic_linear::barrier(comm); +} + + +int Coll_gather_default::gather(void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm) +{ + int system_tag = COLL_TAG_GATHER; + MPI_Aint lb = 0; + MPI_Aint recvext = 0; + + int rank = comm->rank(); + int size = comm->size(); + if(rank != root) { + // Send buffer to root + Request::send(sendbuf, sendcount, sendtype, root, system_tag, comm); + } else { + recvtype->extent(&lb, &recvext); + // Local copy from root + Datatype::copy(sendbuf, sendcount, sendtype, static_cast(recvbuf) + root * recvcount * recvext, + recvcount, recvtype); + // Receive buffers from senders + MPI_Request *requests = xbt_new(MPI_Request, size - 1); + int index = 0; + for (int src = 0; src < size; src++) { + if(src != root) { + requests[index] = Request::irecv_init(static_cast(recvbuf) + src * recvcount * recvext, recvcount, recvtype, + src, system_tag, comm); + index++; + } + } + // Wait for completion of irecv's. + Request::startall(size - 1, requests); + Request::waitall(size - 1, requests, MPI_STATUS_IGNORE); + for (int src = 0; src < size-1; src++) { + Request::unref(&requests[src]); + } + xbt_free(requests); + } + return MPI_SUCCESS; +} + +int Coll_reduce_scatter_default::reduce_scatter(void *sendbuf, void *recvbuf, int *recvcounts, MPI_Datatype datatype, MPI_Op op, + MPI_Comm comm) +{ + int rank = comm->rank(); + + /* arbitrarily choose root as rank 0 */ + int size = comm->size(); + int count = 0; + int *displs = xbt_new(int, size); + for (int i = 0; i < size; i++) { + displs[i] = count; + count += recvcounts[i]; + } + void *tmpbuf = static_cast(smpi_get_tmp_sendbuffer(count*datatype->get_extent())); + int ret = MPI_SUCCESS; + + ret = Coll_reduce_default::reduce(sendbuf, tmpbuf, count, datatype, op, 0, comm); + if(ret==MPI_SUCCESS) + ret = Colls::scatterv(tmpbuf, recvcounts, displs, datatype, recvbuf, recvcounts[rank], datatype, 0, comm); + xbt_free(displs); + smpi_free_tmp_buffer(tmpbuf); + return ret; +} + + +int Coll_allgather_default::allgather(void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf,int recvcount, MPI_Datatype recvtype, MPI_Comm comm) +{ + int system_tag = COLL_TAG_ALLGATHER; + MPI_Aint lb = 0; + MPI_Aint recvext = 0; + MPI_Request *requests; + + int rank = comm->rank(); + int size = comm->size(); + // FIXME: check for errors + recvtype->extent(&lb, &recvext); + // Local copy from self + Datatype::copy(sendbuf, sendcount, sendtype, static_cast(recvbuf) + rank * recvcount * recvext, recvcount, + recvtype); + // Send/Recv buffers to/from others; + requests = xbt_new(MPI_Request, 2 * (size - 1)); + int index = 0; + for (int other = 0; other < size; other++) { + if(other != rank) { + requests[index] = Request::isend_init(sendbuf, sendcount, sendtype, other, system_tag,comm); + index++; + requests[index] = Request::irecv_init(static_cast(recvbuf) + other * recvcount * recvext, recvcount, recvtype, + other, system_tag, comm); + index++; + } + } + // Wait for completion of all comms. + Request::startall(2 * (size - 1), requests); + Request::waitall(2 * (size - 1), requests, MPI_STATUS_IGNORE); + for (int other = 0; other < 2*(size-1); other++) { + Request::unref(&requests[other]); + } + xbt_free(requests); + return MPI_SUCCESS; +} + +int Coll_allgatherv_default::allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, + int *recvcounts, int *displs, MPI_Datatype recvtype, MPI_Comm comm) +{ + int system_tag = COLL_TAG_ALLGATHERV; + MPI_Aint lb = 0; + MPI_Aint recvext = 0; + + int rank = comm->rank(); + int size = comm->size(); + recvtype->extent(&lb, &recvext); + // Local copy from self + Datatype::copy(sendbuf, sendcount, sendtype, + static_cast(recvbuf) + displs[rank] * recvext,recvcounts[rank], recvtype); + // Send buffers to others; + MPI_Request *requests = xbt_new(MPI_Request, 2 * (size - 1)); + int index = 0; + for (int other = 0; other < size; other++) { + if(other != rank) { + requests[index] = + Request::isend_init(sendbuf, sendcount, sendtype, other, system_tag, comm); + index++; + requests[index] = Request::irecv_init(static_cast(recvbuf) + displs[other] * recvext, recvcounts[other], + recvtype, other, system_tag, comm); + index++; + } + } + // Wait for completion of all comms. + Request::startall(2 * (size - 1), requests); + Request::waitall(2 * (size - 1), requests, MPI_STATUS_IGNORE); + for (int other = 0; other < 2*(size-1); other++) { + Request::unref(&requests[other]); + } + xbt_free(requests); + return MPI_SUCCESS; +} + +int Coll_scatter_default::scatter(void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm) +{ + int system_tag = COLL_TAG_SCATTER; + MPI_Aint lb = 0; + MPI_Aint sendext = 0; + MPI_Request *requests; + + int rank = comm->rank(); + int size = comm->size(); + if(rank != root) { + // Recv buffer from root + Request::recv(recvbuf, recvcount, recvtype, root, system_tag, comm, MPI_STATUS_IGNORE); + } else { + sendtype->extent(&lb, &sendext); + // Local copy from root + if(recvbuf!=MPI_IN_PLACE){ + Datatype::copy(static_cast(sendbuf) + root * sendcount * sendext, + sendcount, sendtype, recvbuf, recvcount, recvtype); + } + // Send buffers to receivers + requests = xbt_new(MPI_Request, size - 1); + int index = 0; + for(int dst = 0; dst < size; dst++) { + if(dst != root) { + requests[index] = Request::isend_init(static_cast(sendbuf) + dst * sendcount * sendext, sendcount, sendtype, + dst, system_tag, comm); + index++; + } + } + // Wait for completion of isend's. + Request::startall(size - 1, requests); + Request::waitall(size - 1, requests, MPI_STATUS_IGNORE); + for (int dst = 0; dst < size-1; dst++) { + Request::unref(&requests[dst]); + } + xbt_free(requests); + } + return MPI_SUCCESS; +} + + + +int Coll_reduce_default::reduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, + MPI_Comm comm) +{ + int system_tag = COLL_TAG_REDUCE; + MPI_Aint lb = 0; + MPI_Aint dataext = 0; + + char* sendtmpbuf = static_cast(sendbuf); + + int rank = comm->rank(); + int size = comm->size(); + //non commutative case, use a working algo from openmpi + if(op != MPI_OP_NULL && !op->is_commutative()){ + return Coll_reduce_ompi_basic_linear::reduce(sendtmpbuf, recvbuf, count, datatype, op, root, comm); + } + + if( sendbuf == MPI_IN_PLACE ) { + sendtmpbuf = static_cast(smpi_get_tmp_sendbuffer(count*datatype->get_extent())); + Datatype::copy(recvbuf, count, datatype,sendtmpbuf, count, datatype); + } + + if(rank != root) { + // Send buffer to root + Request::send(sendtmpbuf, count, datatype, root, system_tag, comm); + } else { + datatype->extent(&lb, &dataext); + // Local copy from root + if (sendtmpbuf != nullptr && recvbuf != nullptr) + Datatype::copy(sendtmpbuf, count, datatype, recvbuf, count, datatype); + // Receive buffers from senders + MPI_Request *requests = xbt_new(MPI_Request, size - 1); + void **tmpbufs = xbt_new(void *, size - 1); + int index = 0; + for (int src = 0; src < size; src++) { + if (src != root) { + if (!smpi_process_get_replaying()) + tmpbufs[index] = xbt_malloc(count * dataext); + else + tmpbufs[index] = smpi_get_tmp_sendbuffer(count * dataext); + requests[index] = + Request::irecv_init(tmpbufs[index], count, datatype, src, system_tag, comm); + index++; + } + } + // Wait for completion of irecv's. + Request::startall(size - 1, requests); + for (int src = 0; src < size - 1; src++) { + index = Request::waitany(size - 1, requests, MPI_STATUS_IGNORE); + XBT_DEBUG("finished waiting any request with index %d", index); + if(index == MPI_UNDEFINED) { + break; + }else{ + Request::unref(&requests[index]); + } + if(op) /* op can be MPI_OP_NULL that does nothing */ + if(op!=MPI_OP_NULL) op->apply( tmpbufs[index], recvbuf, &count, datatype); + } + for(index = 0; index < size - 1; index++) { + smpi_free_tmp_buffer(tmpbufs[index]); + } + xbt_free(tmpbufs); + xbt_free(requests); + + } + if( sendbuf == MPI_IN_PLACE ) { + smpi_free_tmp_buffer(sendtmpbuf); + } + return MPI_SUCCESS; +} + +int Coll_allreduce_default::allreduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) +{ + int ret; + ret = Colls::reduce(sendbuf, recvbuf, count, datatype, op, 0, comm); + if(ret==MPI_SUCCESS) + ret = Colls::bcast(recvbuf, count, datatype, 0, comm); + return ret; +} + +int Coll_alltoall_default::alltoall( void *sbuf, int scount, MPI_Datatype sdtype, void* rbuf, int rcount, MPI_Datatype rdtype, MPI_Comm comm) +{ + return Coll_alltoall_ompi::alltoall(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); +} + + + +int Coll_alltoallv_default::alltoallv(void *sendbuf, int *sendcounts, int *senddisps, MPI_Datatype sendtype, + void *recvbuf, int *recvcounts, int *recvdisps, MPI_Datatype recvtype, MPI_Comm comm) +{ + int system_tag = 889; + int i; + int count; + MPI_Aint lb = 0; + MPI_Aint sendext = 0; + MPI_Aint recvext = 0; + MPI_Request *requests; + + /* Initialize. */ + int rank = comm->rank(); + int size = comm->size(); + XBT_DEBUG("<%d> algorithm basic_alltoallv() called.", rank); + sendtype->extent(&lb, &sendext); + recvtype->extent(&lb, &recvext); + /* Local copy from self */ + int err = Datatype::copy(static_cast(sendbuf) + senddisps[rank] * sendext, sendcounts[rank], sendtype, + static_cast(recvbuf) + recvdisps[rank] * recvext, recvcounts[rank], recvtype); + if (err == MPI_SUCCESS && size > 1) { + /* Initiate all send/recv to/from others. */ + requests = xbt_new(MPI_Request, 2 * (size - 1)); + count = 0; + /* Create all receives that will be posted first */ + for (i = 0; i < size; ++i) { + if (i != rank && recvcounts[i] != 0) { + requests[count] = Request::irecv_init(static_cast(recvbuf) + recvdisps[i] * recvext, + recvcounts[i], recvtype, i, system_tag, comm); + count++; + }else{ + XBT_DEBUG("<%d> skip request creation [src = %d, recvcounts[src] = %d]", rank, i, recvcounts[i]); + } + } + /* Now create all sends */ + for (i = 0; i < size; ++i) { + if (i != rank && sendcounts[i] != 0) { + requests[count] = Request::isend_init(static_cast(sendbuf) + senddisps[i] * sendext, + sendcounts[i], sendtype, i, system_tag, comm); + count++; + }else{ + XBT_DEBUG("<%d> skip request creation [dst = %d, sendcounts[dst] = %d]", rank, i, sendcounts[i]); + } + } + /* Wait for them all. */ + Request::startall(count, requests); + XBT_DEBUG("<%d> wait for %d requests", rank, count); + Request::waitall(count, requests, MPI_STATUS_IGNORE); + for(i = 0; i < count; i++) { + if(requests[i]!=MPI_REQUEST_NULL) + Request::unref(&requests[i]); + } + xbt_free(requests); + } + return err; +} + +} +} + diff --git a/src/smpi/colls/smpi_intel_mpi_selector.cpp b/src/smpi/colls/smpi_intel_mpi_selector.cpp index 574aeaab0e..350f2a56ce 100644 --- a/src/smpi/colls/smpi_intel_mpi_selector.cpp +++ b/src/smpi/colls/smpi_intel_mpi_selector.cpp @@ -53,14 +53,14 @@ int (*intel_allreduce_functions_table[])(void *sendbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) ={ - smpi_coll_tuned_allreduce_rdb, - smpi_coll_tuned_allreduce_rab1, - smpi_coll_tuned_allreduce_redbcast, - smpi_coll_tuned_allreduce_mvapich2_two_level, - smpi_coll_tuned_allreduce_smp_binomial, - smpi_coll_tuned_allreduce_mvapich2_two_level, - smpi_coll_tuned_allreduce_ompi_ring_segmented, - smpi_coll_tuned_allreduce_ompi_ring_segmented + Coll_allreduce_rdb::allreduce, + Coll_allreduce_rab1::allreduce, + Coll_allreduce_redbcast::allreduce, + Coll_allreduce_mvapich2_two_level::allreduce, + Coll_allreduce_smp_binomial::allreduce, + Coll_allreduce_mvapich2_two_level::allreduce, + Coll_allreduce_ompi_ring_segmented::allreduce, + Coll_allreduce_ompi_ring_segmented::allreduce }; intel_tuning_table_element intel_allreduce_table[] = @@ -637,10 +637,10 @@ int (*intel_alltoall_functions_table[])(void *sbuf, int scount, void* rbuf, int rcount, MPI_Datatype rdtype, MPI_Comm comm) ={ - smpi_coll_tuned_alltoall_bruck, - smpi_coll_tuned_alltoall_mvapich2_scatter_dest, - smpi_coll_tuned_alltoall_pair, - smpi_coll_tuned_alltoall_mvapich2//Plum is proprietary ? (and super efficient) + Coll_alltoall_bruck::alltoall, + Coll_alltoall_mvapich2_scatter_dest::alltoall, + Coll_alltoall_pair::alltoall, + Coll_alltoall_mvapich2::alltoall//Plum is proprietary ? (and super efficient) }; /*I_MPI_ADJUST_BARRIER @@ -657,15 +657,15 @@ MPI_Barrier */ static int intel_barrier_gather_scatter(MPI_Comm comm){ //our default barrier performs a antibcast/bcast - smpi_mpi_barrier(comm); + Coll_barrier_default::barrier(comm); return MPI_SUCCESS; } int (*intel_barrier_functions_table[])(MPI_Comm comm) ={ - smpi_coll_tuned_barrier_ompi_basic_linear, - smpi_coll_tuned_barrier_ompi_recursivedoubling, - smpi_coll_tuned_barrier_ompi_basic_linear, - smpi_coll_tuned_barrier_ompi_recursivedoubling, + Coll_barrier_ompi_basic_linear::barrier, + Coll_barrier_ompi_recursivedoubling::barrier, + Coll_barrier_ompi_basic_linear::barrier, + Coll_barrier_ompi_recursivedoubling::barrier, intel_barrier_gather_scatter, intel_barrier_gather_scatter }; @@ -799,15 +799,15 @@ MPI_Bcast int (*intel_bcast_functions_table[])(void *buff, int count, MPI_Datatype datatype, int root, MPI_Comm comm) ={ - smpi_coll_tuned_bcast_binomial_tree, - //smpi_coll_tuned_bcast_scatter_rdb_allgather, - smpi_coll_tuned_bcast_NTSL, - smpi_coll_tuned_bcast_NTSL, - smpi_coll_tuned_bcast_SMP_binomial, - //smpi_coll_tuned_bcast_scatter_rdb_allgather, - smpi_coll_tuned_bcast_NTSL, - smpi_coll_tuned_bcast_SMP_linear, - smpi_coll_tuned_bcast_mvapich2,//we don't know shumilin's algo' + Coll_bcast_binomial_tree::bcast, + //Coll_bcast_scatter_rdb_allgather::bcast, + Coll_bcast_NTSL::bcast, + Coll_bcast_NTSL::bcast, + Coll_bcast_SMP_binomial::bcast, + //Coll_bcast_scatter_rdb_allgather::bcast, + Coll_bcast_NTSL::bcast, + Coll_bcast_SMP_linear::bcast, + Coll_bcast_mvapich2::bcast,//we don't know shumilin's algo' }; intel_tuning_table_element intel_bcast_table[] = @@ -969,12 +969,12 @@ int (*intel_reduce_functions_table[])(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm) ={ - smpi_coll_tuned_reduce_mvapich2, - smpi_coll_tuned_reduce_binomial, - smpi_coll_tuned_reduce_mvapich2, - smpi_coll_tuned_reduce_mvapich2_two_level, - smpi_coll_tuned_reduce_rab, - smpi_coll_tuned_reduce_rab + Coll_reduce_mvapich2::reduce, + Coll_reduce_binomial::reduce, + Coll_reduce_mvapich2::reduce, + Coll_reduce_mvapich2_two_level::reduce, + Coll_reduce_rab::reduce, + Coll_reduce_rab::reduce }; intel_tuning_table_element intel_reduce_table[] = @@ -1059,7 +1059,7 @@ static int intel_reduce_scatter_reduce_scatterv(void *sbuf, void *rbuf, MPI_Op op, MPI_Comm comm) { - smpi_mpi_reduce_scatter(sbuf, rbuf, rcounts,dtype, op,comm); + Coll_reduce_scatter_default::reduce_scatter(sbuf, rbuf, rcounts,dtype, op,comm); return MPI_SUCCESS; } @@ -1070,9 +1070,9 @@ static int intel_reduce_scatter_recursivehalving(void *sbuf, void *rbuf, MPI_Comm comm) { if(op==MPI_OP_NULL || op->is_commutative()) - return smpi_coll_tuned_reduce_scatter_ompi_basic_recursivehalving(sbuf, rbuf, rcounts,dtype, op,comm); + return Coll_reduce_scatter_ompi_basic_recursivehalving::reduce_scatter(sbuf, rbuf, rcounts,dtype, op,comm); else - return smpi_coll_tuned_reduce_scatter_mvapich2(sbuf, rbuf, rcounts,dtype, op,comm); + return Coll_reduce_scatter_mvapich2::reduce_scatter(sbuf, rbuf, rcounts,dtype, op,comm); } int (*intel_reduce_scatter_functions_table[])( void *sbuf, void *rbuf, @@ -1082,8 +1082,8 @@ int (*intel_reduce_scatter_functions_table[])( void *sbuf, void *rbuf, MPI_Comm comm ) ={ intel_reduce_scatter_recursivehalving, - smpi_coll_tuned_reduce_scatter_mpich_pair, - smpi_coll_tuned_reduce_scatter_mpich_rdb, + Coll_reduce_scatter_mpich_pair::reduce_scatter, + Coll_reduce_scatter_mpich_rdb::reduce_scatter, intel_reduce_scatter_reduce_scatterv, intel_reduce_scatter_reduce_scatterv }; @@ -1491,10 +1491,10 @@ int (*intel_allgather_functions_table[])(void *sbuf, int scount, MPI_Datatype rdtype, MPI_Comm comm ) ={ - smpi_coll_tuned_allgather_rdb, - smpi_coll_tuned_allgather_bruck, - smpi_coll_tuned_allgather_ring, - smpi_coll_tuned_allgather_GB + Coll_allgather_rdb::allgather, + Coll_allgather_bruck::allgather, + Coll_allgather_ring::allgather, + Coll_allgather_GB::allgather }; intel_tuning_table_element intel_allgather_table[] = @@ -1661,10 +1661,10 @@ int (*intel_allgatherv_functions_table[])(void *sbuf, int scount, MPI_Datatype rdtype, MPI_Comm comm ) ={ - smpi_coll_tuned_allgatherv_mpich_rdb, - smpi_coll_tuned_allgatherv_ompi_bruck, - smpi_coll_tuned_allgatherv_ring, - smpi_coll_tuned_allgatherv_GB + Coll_allgatherv_mpich_rdb::allgatherv, + Coll_allgatherv_ompi_bruck::allgatherv, + Coll_allgatherv_ring::allgatherv, + Coll_allgatherv_GB::allgatherv }; intel_tuning_table_element intel_allgatherv_table[] = @@ -1872,9 +1872,9 @@ int (*intel_gather_functions_table[])(void *sbuf, int scount, int root, MPI_Comm comm ) ={ - smpi_coll_tuned_gather_ompi_binomial, - smpi_coll_tuned_gather_ompi_binomial, - smpi_coll_tuned_gather_mvapich2 + Coll_gather_ompi_binomial::gather, + Coll_gather_ompi_binomial::gather, + Coll_gather_mvapich2::gather }; intel_tuning_table_element intel_gather_table[] = @@ -1975,9 +1975,9 @@ int (*intel_scatter_functions_table[])(void *sbuf, int scount, MPI_Datatype rdtype, int root, MPI_Comm comm ) ={ - smpi_coll_tuned_scatter_ompi_binomial, - smpi_coll_tuned_scatter_ompi_binomial, - smpi_coll_tuned_scatter_mvapich2 + Coll_scatter_ompi_binomial::scatter, + Coll_scatter_ompi_binomial::scatter, + Coll_scatter_mvapich2::scatter }; intel_tuning_table_element intel_scatter_table[] = @@ -2149,8 +2149,8 @@ int (*intel_alltoallv_functions_table[])(void *sbuf, int *scounts, int *sdisps, MPI_Datatype rdtype, MPI_Comm comm ) ={ - smpi_coll_tuned_alltoallv_ompi_basic_linear, - smpi_coll_tuned_alltoallv_bruck + Coll_alltoallv_ompi_basic_linear::alltoallv, + Coll_alltoallv_bruck::alltoallv }; intel_tuning_table_element intel_alltoallv_table[] = @@ -2261,7 +2261,7 @@ intel_tuning_table_element intel_alltoallv_table[] = size_t block_dsize = 1; #define IMPI_COLL_SELECT(cat, ret, args, args2)\ -ret smpi_coll_tuned_ ## cat ## _impi (COLL_UNPAREN args)\ +ret Coll_ ## cat ## _impi:: cat (COLL_UNPAREN args)\ {\ int comm_size = comm->size();\ int i =0;\ @@ -2289,6 +2289,9 @@ ret smpi_coll_tuned_ ## cat ## _impi (COLL_UNPAREN args)\ args2);\ } +namespace simgrid{ +namespace smpi{ + COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLGATHERV_SIG, (send_buff, send_count, send_type, recv_buff, recv_count, recv_disps, recv_type, comm)); COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLREDUCE_SIG, (sbuf, rbuf, rcount, dtype, op, comm)); COLL_APPLY(IMPI_COLL_SELECT, COLL_GATHER_SIG, (send_buff, send_count, send_type, recv_buff, recv_count, recv_type, root, comm)); @@ -2301,3 +2304,5 @@ COLL_APPLY(IMPI_COLL_SELECT, COLL_REDUCE_SCATTER_SIG ,(sbuf,rbuf, rcounts,dtype, COLL_APPLY(IMPI_COLL_SELECT, COLL_SCATTER_SIG ,(sendbuf, sendcount, sendtype,recvbuf, recvcount, recvtype,root, comm)); COLL_APPLY(IMPI_COLL_SELECT, COLL_BARRIER_SIG,(comm)); +} +} diff --git a/src/smpi/colls/smpi_mpich_selector.cpp b/src/smpi/colls/smpi_mpich_selector.cpp index f29b79c500..8a9ffcd40e 100644 --- a/src/smpi/colls/smpi_mpich_selector.cpp +++ b/src/smpi/colls/smpi_mpich_selector.cpp @@ -56,7 +56,7 @@ End Algorithm: MPI_Allreduce */ -int smpi_coll_tuned_allreduce_mpich(void *sbuf, void *rbuf, int count, +int Coll_allreduce_mpich::allreduce(void *sbuf, void *rbuf, int count, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm) { size_t dsize, block_dsize; @@ -74,12 +74,12 @@ int smpi_coll_tuned_allreduce_mpich(void *sbuf, void *rbuf, int count, if (block_dsize > large_message && count >= pof2 && (op==MPI_OP_NULL || op->is_commutative())) { //for long messages - return (smpi_coll_tuned_allreduce_rab_rdb (sbuf, rbuf, + return (Coll_allreduce_rab_rdb::allreduce (sbuf, rbuf, count, dtype, op, comm)); }else { //for short ones and count < pof2 - return (smpi_coll_tuned_allreduce_rdb (sbuf, rbuf, + return (Coll_allreduce_rdb::allreduce (sbuf, rbuf, count, dtype, op, comm)); } @@ -132,7 +132,7 @@ int smpi_coll_tuned_allreduce_mpich(void *sbuf, void *rbuf, int count, End Algorithm: MPI_Alltoall */ -int smpi_coll_tuned_alltoall_mpich( void *sbuf, int scount, +int Coll_alltoall_mpich::alltoall( void *sbuf, int scount, MPI_Datatype sdtype, void* rbuf, int rcount, MPI_Datatype rdtype, @@ -162,26 +162,26 @@ int smpi_coll_tuned_alltoall_mpich( void *sbuf, int scount, block_dsize = dsize * scount; if ((block_dsize < short_size) && (communicator_size >= 8)) { - return smpi_coll_tuned_alltoall_bruck(sbuf, scount, sdtype, + return Coll_alltoall_bruck::alltoall(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); } else if (block_dsize < medium_size) { - return smpi_coll_tuned_alltoall_basic_linear(sbuf, scount, sdtype, + return Coll_alltoall_basic_linear::alltoall(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); }else if (communicator_size%2){ - return smpi_coll_tuned_alltoall_ring(sbuf, scount, sdtype, + return Coll_alltoall_ring::alltoall(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); } - return smpi_coll_tuned_alltoall_ring (sbuf, scount, sdtype, + return Coll_alltoall_ring::alltoall (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); } -int smpi_coll_tuned_alltoallv_mpich(void *sbuf, int *scounts, int *sdisps, +int Coll_alltoallv_mpich::alltoallv(void *sbuf, int *scounts, int *sdisps, MPI_Datatype sdtype, void *rbuf, int *rcounts, int *rdisps, MPI_Datatype rdtype, @@ -189,15 +189,15 @@ int smpi_coll_tuned_alltoallv_mpich(void *sbuf, int *scounts, int *sdisps, ) { /* For starters, just keep the original algorithm. */ - return smpi_coll_tuned_alltoallv_bruck(sbuf, scounts, sdisps, sdtype, + return Coll_alltoallv_bruck::alltoallv(sbuf, scounts, sdisps, sdtype, rbuf, rcounts, rdisps,rdtype, comm); } -int smpi_coll_tuned_barrier_mpich(MPI_Comm comm) +int Coll_barrier_mpich::barrier(MPI_Comm comm) { - return smpi_coll_tuned_barrier_ompi_bruck(comm); + return Coll_barrier_ompi_bruck::barrier(comm); } /* This is the default implementation of broadcast. The algorithm is: @@ -243,7 +243,7 @@ int smpi_coll_tuned_barrier_mpich(MPI_Comm comm) */ -int smpi_coll_tuned_bcast_mpich(void *buff, int count, +int Coll_bcast_mpich::bcast(void *buff, int count, MPI_Datatype datatype, int root, MPI_Comm comm ) @@ -267,17 +267,17 @@ int smpi_coll_tuned_bcast_mpich(void *buff, int count, single-element broadcasts */ if ((message_size < small_message_size) || (communicator_size <= 8)) { /* Binomial without segmentation */ - return smpi_coll_tuned_bcast_binomial_tree (buff, count, datatype, + return Coll_bcast_binomial_tree::bcast (buff, count, datatype, root, comm); } else if (message_size < intermediate_message_size && !(communicator_size%2)) { // SplittedBinary with 1KB segments - return smpi_coll_tuned_bcast_scatter_rdb_allgather(buff, count, datatype, + return Coll_bcast_scatter_rdb_allgather::bcast(buff, count, datatype, root, comm); } //Handle large message sizes - return smpi_coll_tuned_bcast_scatter_LR_allgather (buff, count, datatype, + return Coll_bcast_scatter_LR_allgather::bcast (buff, count, datatype, root, comm); } @@ -339,7 +339,7 @@ int smpi_coll_tuned_bcast_mpich(void *buff, int count, */ -int smpi_coll_tuned_reduce_mpich( void *sendbuf, void *recvbuf, +int Coll_reduce_mpich::reduce( void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm @@ -360,9 +360,9 @@ int smpi_coll_tuned_reduce_mpich( void *sendbuf, void *recvbuf, if ((count < pof2) || (message_size < 2048) || (op!=MPI_OP_NULL && !op->is_commutative())) { - return smpi_coll_tuned_reduce_binomial (sendbuf, recvbuf, count, datatype, op, root, comm); + return Coll_reduce_binomial::reduce (sendbuf, recvbuf, count, datatype, op, root, comm); } - return smpi_coll_tuned_reduce_scatter_gather(sendbuf, recvbuf, count, datatype, op, root, comm/*, module, + return Coll_reduce_scatter_gather::reduce(sendbuf, recvbuf, count, datatype, op, root, comm/*, module, segsize, max_requests*/); } @@ -416,7 +416,7 @@ int smpi_coll_tuned_reduce_mpich( void *sendbuf, void *recvbuf, */ -int smpi_coll_tuned_reduce_scatter_mpich( void *sbuf, void *rbuf, +int Coll_reduce_scatter_mpich::reduce_scatter( void *sbuf, void *rbuf, int *rcounts, MPI_Datatype dtype, MPI_Op op, @@ -428,7 +428,7 @@ int smpi_coll_tuned_reduce_scatter_mpich( void *sbuf, void *rbuf, if(sbuf==rbuf)sbuf=MPI_IN_PLACE; //restore MPI_IN_PLACE as these algorithms handle it - XBT_DEBUG("smpi_coll_tuned_reduce_scatter_mpich"); + XBT_DEBUG("Coll_reduce_scatter_mpich::reduce"); comm_size = comm->size(); // We need data size for decision function @@ -438,7 +438,7 @@ int smpi_coll_tuned_reduce_scatter_mpich( void *sbuf, void *rbuf, } if( (op==MPI_OP_NULL || op->is_commutative()) && total_message_size > 524288) { - return smpi_coll_tuned_reduce_scatter_mpich_pair (sbuf, rbuf, rcounts, + return Coll_reduce_scatter_mpich_pair::reduce_scatter (sbuf, rbuf, rcounts, dtype, op, comm); }else if ((op!=MPI_OP_NULL && !op->is_commutative())) { @@ -456,12 +456,12 @@ int smpi_coll_tuned_reduce_scatter_mpich( void *sbuf, void *rbuf, if (pof2 == comm_size && is_block_regular) { /* noncommutative, pof2 size, and block regular */ - return smpi_coll_tuned_reduce_scatter_mpich_noncomm(sbuf, rbuf, rcounts, dtype, op, comm); + return Coll_reduce_scatter_mpich_noncomm::reduce_scatter(sbuf, rbuf, rcounts, dtype, op, comm); } - return smpi_coll_tuned_reduce_scatter_mpich_rdb(sbuf, rbuf, rcounts, dtype, op, comm); + return Coll_reduce_scatter_mpich_rdb::reduce_scatter(sbuf, rbuf, rcounts, dtype, op, comm); }else{ - return smpi_coll_tuned_reduce_scatter_mpich_rdb(sbuf, rbuf, rcounts, dtype, op, comm); + return Coll_reduce_scatter_mpich_rdb::reduce_scatter(sbuf, rbuf, rcounts, dtype, op, comm); } } @@ -511,7 +511,7 @@ int smpi_coll_tuned_reduce_scatter_mpich( void *sbuf, void *rbuf, End Algorithm: MPI_Allgather */ -int smpi_coll_tuned_allgather_mpich(void *sbuf, int scount, +int Coll_allgather_mpich::allgather(void *sbuf, int scount, MPI_Datatype sdtype, void* rbuf, int rcount, MPI_Datatype rdtype, @@ -539,15 +539,15 @@ int smpi_coll_tuned_allgather_mpich(void *sbuf, int scount, - for everything else use ring. */ if ((pow2_size == communicator_size) && (total_dsize < 524288)) { - return smpi_coll_tuned_allgather_rdb(sbuf, scount, sdtype, + return Coll_allgather_rdb::allgather(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); } else if (total_dsize <= 81920) { - return smpi_coll_tuned_allgather_bruck(sbuf, scount, sdtype, + return Coll_allgather_bruck::allgather(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); } - return smpi_coll_tuned_allgather_ring(sbuf, scount, sdtype, + return Coll_allgather_ring::allgather(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); } @@ -589,7 +589,7 @@ int smpi_coll_tuned_allgather_mpich(void *sbuf, int scount, End Algorithm: MPI_Allgatherv */ -int smpi_coll_tuned_allgatherv_mpich(void *sbuf, int scount, +int Coll_allgatherv_mpich::allgatherv(void *sbuf, int scount, MPI_Datatype sdtype, void* rbuf, int *rcounts, int *rdispls, @@ -612,15 +612,15 @@ int smpi_coll_tuned_allgatherv_mpich(void *sbuf, int scount, for (pow2_size = 1; pow2_size < communicator_size; pow2_size <<=1); if ((pow2_size == communicator_size) && (total_dsize < 524288)) { - return smpi_coll_tuned_allgatherv_mpich_rdb(sbuf, scount, sdtype, + return Coll_allgatherv_mpich_rdb::allgatherv(sbuf, scount, sdtype, rbuf, rcounts, rdispls, rdtype, comm); } else if (total_dsize <= 81920) { - return smpi_coll_tuned_allgatherv_ompi_bruck(sbuf, scount, sdtype, + return Coll_allgatherv_ompi_bruck::allgatherv(sbuf, scount, sdtype, rbuf, rcounts, rdispls, rdtype, comm); } - return smpi_coll_tuned_allgatherv_mpich_ring(sbuf, scount, sdtype, + return Coll_allgatherv_mpich_ring::allgatherv(sbuf, scount, sdtype, rbuf, rcounts, rdispls, rdtype, comm); } @@ -646,8 +646,10 @@ int smpi_coll_tuned_allgatherv_mpich(void *sbuf, int scount, End Algorithm: MPI_Gather */ +namespace simgrid{ +namespace smpi{ -int smpi_coll_tuned_gather_mpich(void *sbuf, int scount, +int Coll_gather_mpich::gather(void *sbuf, int scount, MPI_Datatype sdtype, void* rbuf, int rcount, MPI_Datatype rdtype, @@ -655,11 +657,14 @@ int smpi_coll_tuned_gather_mpich(void *sbuf, int scount, MPI_Comm comm ) { - return smpi_coll_tuned_gather_ompi_binomial (sbuf, scount, sdtype, + return Coll_gather_ompi_binomial::gather (sbuf, scount, sdtype, rbuf, rcount, rdtype, root, comm); } +} +} + /* This is the default implementation of scatter. The algorithm is: Algorithm: MPI_Scatter @@ -682,7 +687,7 @@ int smpi_coll_tuned_gather_mpich(void *sbuf, int scount, */ -int smpi_coll_tuned_scatter_mpich(void *sbuf, int scount, +int Coll_scatter_mpich::scatter(void *sbuf, int scount, MPI_Datatype sdtype, void* rbuf, int rcount, MPI_Datatype rdtype, @@ -694,7 +699,7 @@ int smpi_coll_tuned_scatter_mpich(void *sbuf, int scount, scount=rcount; sdtype=rdtype; } - int ret= smpi_coll_tuned_scatter_ompi_binomial (sbuf, scount, sdtype, + int ret= Coll_scatter_ompi_binomial::scatter (sbuf, scount, sdtype, rbuf, rcount, rdtype, root, comm); if(comm->rank()!=root){ diff --git a/src/smpi/colls/smpi_mvapich2_selector.cpp b/src/smpi/colls/smpi_mvapich2_selector.cpp index 1d6f4c430e..bda8db4287 100644 --- a/src/smpi/colls/smpi_mvapich2_selector.cpp +++ b/src/smpi/colls/smpi_mvapich2_selector.cpp @@ -11,8 +11,11 @@ #include "smpi_mvapich2_selector_stampede.h" +namespace simgrid{ +namespace smpi{ -int smpi_coll_tuned_alltoall_mvapich2( void *sendbuf, int sendcount, + +int Coll_alltoall_mvapich2::alltoall( void *sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, @@ -82,7 +85,7 @@ int smpi_coll_tuned_alltoall_mvapich2( void *sendbuf, int sendcount, return (mpi_errno); } -int smpi_coll_tuned_allgather_mvapich2(void *sendbuf, int sendcount, MPI_Datatype sendtype, +int Coll_allgather_mvapich2::allgather(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm) { @@ -149,9 +152,9 @@ int smpi_coll_tuned_allgather_mvapich2(void *sendbuf, int sendcount, MPI_Datatyp } /* Set inter-leader pt */ - MV2_Allgather_function = + MV2_Allgatherction = mv2_allgather_thresholds_table[conf_index][range].inter_leader[range_threshold]. - MV2_pt_Allgather_function; + MV2_pt_Allgatherction; is_two_level = mv2_allgather_thresholds_table[conf_index][range].two_level[range_threshold]; @@ -163,7 +166,7 @@ int smpi_coll_tuned_allgather_mvapich2(void *sendbuf, int sendcount, MPI_Datatyp recvbuf, recvcount, recvtype, comm); }else{ - mpi_errno = smpi_coll_tuned_allgather_mpich(sendbuf, sendcount, sendtype, + mpi_errno = Coll_allgather_mpich::allgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm); } @@ -172,10 +175,10 @@ int smpi_coll_tuned_allgather_mvapich2(void *sendbuf, int sendcount, MPI_Datatyp recvbuf, recvcount, recvtype, comm); } - } else if(MV2_Allgather_function == &MPIR_Allgather_Bruck_MV2 - || MV2_Allgather_function == &MPIR_Allgather_RD_MV2 - || MV2_Allgather_function == &MPIR_Allgather_Ring_MV2) { - mpi_errno = MV2_Allgather_function(sendbuf, sendcount, sendtype, + } else if(MV2_Allgatherction == &MPIR_Allgather_Bruck_MV2 + || MV2_Allgatherction == &MPIR_Allgather_RD_MV2 + || MV2_Allgatherction == &MPIR_Allgather_Ring_MV2) { + mpi_errno = MV2_Allgatherction(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm); }else{ @@ -185,8 +188,7 @@ int smpi_coll_tuned_allgather_mvapich2(void *sendbuf, int sendcount, MPI_Datatyp return mpi_errno; } - -int smpi_coll_tuned_gather_mvapich2(void *sendbuf, +int Coll_gather_mvapich2::gather(void *sendbuf, int sendcnt, MPI_Datatype sendtype, void *recvbuf, @@ -255,7 +257,7 @@ int smpi_coll_tuned_gather_mvapich2(void *sendbuf, } else { // Indeed, direct (non SMP-aware)gather is MPICH one - mpi_errno = smpi_coll_tuned_gather_mpich(sendbuf, sendcnt, sendtype, + mpi_errno = Coll_gather_mpich::gather(sendbuf, sendcnt, sendtype, recvbuf, recvcnt, recvtype, root, comm); } @@ -263,8 +265,7 @@ int smpi_coll_tuned_gather_mvapich2(void *sendbuf, return mpi_errno; } - -int smpi_coll_tuned_allgatherv_mvapich2(void *sendbuf, int sendcount, MPI_Datatype sendtype, +int Coll_allgatherv_mvapich2::allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int *recvcounts, int *displs, MPI_Datatype recvtype, MPI_Comm comm ) { @@ -330,7 +331,7 @@ int smpi_coll_tuned_allgatherv_mvapich2(void *sendbuf, int sendcount, MPI_Dataty -int smpi_coll_tuned_allreduce_mvapich2(void *sendbuf, +int Coll_allreduce_mvapich2::allreduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, @@ -378,10 +379,10 @@ int smpi_coll_tuned_allreduce_mvapich2(void *sendbuf, if(mv2_allreduce_thresholds_table[range].mcast_enabled != 1){ while ((range_threshold < (mv2_allreduce_thresholds_table[range].size_inter_table - 1)) && ((mv2_allreduce_thresholds_table[range]. - inter_leader[range_threshold].MV2_pt_Allreduce_function + inter_leader[range_threshold].MV2_pt_Allreducection == &MPIR_Allreduce_mcst_reduce_redscat_gather_MV2) || (mv2_allreduce_thresholds_table[range]. - inter_leader[range_threshold].MV2_pt_Allreduce_function + inter_leader[range_threshold].MV2_pt_Allreducection == &MPIR_Allreduce_mcst_reduce_two_level_helper_MV2) )) { range_threshold++; @@ -406,20 +407,20 @@ int smpi_coll_tuned_allreduce_mvapich2(void *sendbuf, range_threshold_intra++; } - MV2_Allreduce_function = mv2_allreduce_thresholds_table[range].inter_leader[range_threshold] - .MV2_pt_Allreduce_function; + MV2_Allreducection = mv2_allreduce_thresholds_table[range].inter_leader[range_threshold] + .MV2_pt_Allreducection; MV2_Allreduce_intra_function = mv2_allreduce_thresholds_table[range].intra_node[range_threshold_intra] - .MV2_pt_Allreduce_function; + .MV2_pt_Allreducection; /* check if mcast is ready, otherwise replace mcast with other algorithm */ - if((MV2_Allreduce_function == &MPIR_Allreduce_mcst_reduce_redscat_gather_MV2)|| - (MV2_Allreduce_function == &MPIR_Allreduce_mcst_reduce_two_level_helper_MV2)){ + if((MV2_Allreducection == &MPIR_Allreduce_mcst_reduce_redscat_gather_MV2)|| + (MV2_Allreducection == &MPIR_Allreduce_mcst_reduce_two_level_helper_MV2)){ { - MV2_Allreduce_function = &MPIR_Allreduce_pt2pt_rd_MV2; + MV2_Allreducection = &MPIR_Allreduce_pt2pt_rd_MV2; } if(is_two_level != 1) { - MV2_Allreduce_function = &MPIR_Allreduce_pt2pt_rd_MV2; + MV2_Allreducection = &MPIR_Allreduce_pt2pt_rd_MV2; } } @@ -436,7 +437,7 @@ int smpi_coll_tuned_allreduce_mvapich2(void *sendbuf, datatype, op, comm); } } else { - mpi_errno = MV2_Allreduce_function(sendbuf, recvbuf, count, + mpi_errno = MV2_Allreducection(sendbuf, recvbuf, count, datatype, op, comm); } } @@ -449,7 +450,7 @@ int smpi_coll_tuned_allreduce_mvapich2(void *sendbuf, } -int smpi_coll_tuned_alltoallv_mvapich2(void *sbuf, int *scounts, int *sdisps, +int Coll_alltoallv_mvapich2::alltoallv(void *sbuf, int *scounts, int *sdisps, MPI_Datatype sdtype, void *rbuf, int *rcounts, int *rdisps, MPI_Datatype rdtype, @@ -458,25 +459,25 @@ int smpi_coll_tuned_alltoallv_mvapich2(void *sbuf, int *scounts, int *sdisps, { if (sbuf == MPI_IN_PLACE) { - return smpi_coll_tuned_alltoallv_ompi_basic_linear(sbuf, scounts, sdisps, sdtype, + return Coll_alltoallv_ompi_basic_linear::alltoallv(sbuf, scounts, sdisps, sdtype, rbuf, rcounts, rdisps,rdtype, comm); } else /* For starters, just keep the original algorithm. */ - return smpi_coll_tuned_alltoallv_ring(sbuf, scounts, sdisps, sdtype, + return Coll_alltoallv_ring::alltoallv(sbuf, scounts, sdisps, sdtype, rbuf, rcounts, rdisps,rdtype, comm); } -int smpi_coll_tuned_barrier_mvapich2(MPI_Comm comm) +int Coll_barrier_mvapich2::barrier(MPI_Comm comm) { - return smpi_coll_tuned_barrier_mvapich2_pair(comm); + return Coll_barrier_mvapich2_pair::barrier(comm); } -int smpi_coll_tuned_bcast_mvapich2(void *buffer, +int Coll_bcast_mvapich2::bcast(void *buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm) @@ -678,7 +679,7 @@ int smpi_coll_tuned_bcast_mvapich2(void *buffer, -int smpi_coll_tuned_reduce_mvapich2( void *sendbuf, +int Coll_reduce_mvapich2::reduce( void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, @@ -796,7 +797,7 @@ int smpi_coll_tuned_reduce_mvapich2( void *sendbuf, } -int smpi_coll_tuned_reduce_scatter_mvapich2(void *sendbuf, void *recvbuf, int *recvcnts, +int Coll_reduce_scatter_mvapich2::reduce_scatter(void *sendbuf, void *recvbuf, int *recvcnts, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) { @@ -860,7 +861,7 @@ int smpi_coll_tuned_reduce_scatter_mvapich2(void *sendbuf, void *recvbuf, int *r recvcnts, datatype, op, comm); } - mpi_errno = smpi_coll_tuned_reduce_scatter_mpich_rdb(sendbuf, recvbuf, + mpi_errno = Coll_reduce_scatter_mpich_rdb::reduce_scatter(sendbuf, recvbuf, recvcnts, datatype, op, comm); } @@ -871,7 +872,7 @@ int smpi_coll_tuned_reduce_scatter_mvapich2(void *sendbuf, void *recvbuf, int *r -int smpi_coll_tuned_scatter_mvapich2(void *sendbuf, +int Coll_scatter_mvapich2::scatter(void *sendbuf, int sendcnt, MPI_Datatype sendtype, void *recvbuf, @@ -1004,6 +1005,8 @@ int smpi_coll_tuned_scatter_mvapich2(void *sendbuf, return (mpi_errno); } +} +} void smpi_coll_cleanup_mvapich2(void){ int i=0; if(mv2_alltoall_thresholds_table) diff --git a/src/smpi/colls/smpi_mvapich2_selector_stampede.h b/src/smpi/colls/smpi_mvapich2_selector_stampede.h index e0f2156377..c6a28b8029 100644 --- a/src/smpi/colls/smpi_mvapich2_selector_stampede.h +++ b/src/smpi/colls/smpi_mvapich2_selector_stampede.h @@ -10,6 +10,7 @@ #define MV2_MAX_NB_THRESHOLDS 32 +using namespace simgrid::smpi; typedef struct { int min; @@ -36,11 +37,11 @@ int *mv2_size_alltoall_tuning_table = NULL; mv2_alltoall_tuning_table **mv2_alltoall_thresholds_table = NULL; -#define MPIR_Alltoall_bruck_MV2 smpi_coll_tuned_alltoall_bruck -#define MPIR_Alltoall_RD_MV2 smpi_coll_tuned_alltoall_rdb -#define MPIR_Alltoall_Scatter_dest_MV2 smpi_coll_tuned_alltoall_mvapich2_scatter_dest -#define MPIR_Alltoall_pairwise_MV2 smpi_coll_tuned_alltoall_pair -#define MPIR_Alltoall_inplace_MV2 smpi_coll_tuned_alltoall_ring +#define MPIR_Alltoall_bruck_MV2 Coll_alltoall_bruck::alltoall +#define MPIR_Alltoall_RD_MV2 Coll_alltoall_rdb::alltoall +#define MPIR_Alltoall_Scatter_dest_MV2 Coll_alltoall_mvapich2_scatter_dest::alltoall +#define MPIR_Alltoall_pairwise_MV2 Coll_alltoall_pair::alltoall +#define MPIR_Alltoall_inplace_MV2 Coll_alltoall_ring::alltoall static void init_mv2_alltoall_tables_stampede(){ @@ -294,7 +295,7 @@ static void init_mv2_alltoall_tables_stampede(){ typedef struct { int min; int max; - int (*MV2_pt_Allgather_function)(void *sendbuf, + int (*MV2_pt_Allgatherction)(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, @@ -309,7 +310,7 @@ typedef struct { mv2_allgather_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS]; } mv2_allgather_tuning_table; -int (*MV2_Allgather_function)(void *sendbuf, +int (*MV2_Allgatherction)(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, @@ -331,10 +332,10 @@ static int MPIR_Allgather_RD_Allgather_Comm_MV2( void *sendbuf, return 0; } -#define MPIR_Allgather_Bruck_MV2 smpi_coll_tuned_allgather_bruck -#define MPIR_Allgather_RD_MV2 smpi_coll_tuned_allgather_rdb -#define MPIR_Allgather_Ring_MV2 smpi_coll_tuned_allgather_ring -#define MPIR_2lvl_Allgather_MV2 smpi_coll_tuned_allgather_mvapich2_smp +#define MPIR_Allgather_Bruck_MV2 Coll_allgather_bruck::allgather +#define MPIR_Allgather_RD_MV2 Coll_allgather_rdb::allgather +#define MPIR_Allgather_Ring_MV2 Coll_allgather_ring::allgather +#define MPIR_2lvl_Allgather_MV2 Coll_allgather_mvapich2_smp::allgather static void init_mv2_allgather_tables_stampede(){ int i; @@ -590,9 +591,10 @@ MV2_Gather_function_ptr MV2_Gather_inter_leader_function = NULL; MV2_Gather_function_ptr MV2_Gather_intra_node_function = NULL; -#define MPIR_Gather_MV2_Direct smpi_coll_tuned_gather_ompi_basic_linear -#define MPIR_Gather_MV2_two_level_Direct smpi_coll_tuned_gather_mvapich2_two_level -#define MPIR_Gather_intra smpi_coll_tuned_gather_mpich + +#define MPIR_Gather_MV2_Direct Coll_gather_ompi_basic_linear::gather +#define MPIR_Gather_MV2_two_level_Direct Coll_gather_mvapich2_two_level::gather +#define MPIR_Gather_intra Coll_gather_mpich::gather static void init_mv2_gather_tables_stampede(){ @@ -678,9 +680,9 @@ int (*MV2_Allgatherv_function)(void *sendbuf, int mv2_size_allgatherv_tuning_table = 0; mv2_allgatherv_tuning_table *mv2_allgatherv_thresholds_table = NULL; -#define MPIR_Allgatherv_Rec_Doubling_MV2 smpi_coll_tuned_allgatherv_mpich_rdb -#define MPIR_Allgatherv_Bruck_MV2 smpi_coll_tuned_allgatherv_ompi_bruck -#define MPIR_Allgatherv_Ring_MV2 smpi_coll_tuned_allgatherv_mpich_ring +#define MPIR_Allgatherv_Rec_Doubling_MV2 Coll_allgatherv_mpich_rdb::allgatherv +#define MPIR_Allgatherv_Bruck_MV2 Coll_allgatherv_ompi_bruck::allgatherv +#define MPIR_Allgatherv_Ring_MV2 Coll_allgatherv_mpich_ring::allgatherv static void init_mv2_allgatherv_tables_stampede(){ @@ -750,7 +752,7 @@ static void init_mv2_allgatherv_tables_stampede(){ typedef struct { int min; int max; - int (*MV2_pt_Allreduce_function)(void *sendbuf, + int (*MV2_pt_Allreducection)(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, @@ -768,7 +770,7 @@ typedef struct { } mv2_allreduce_tuning_table; -int (*MV2_Allreduce_function)(void *sendbuf, +int (*MV2_Allreducection)(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, @@ -812,7 +814,7 @@ static int MPIR_Allreduce_reduce_p2p_MV2( void *sendbuf, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) { - mpi_coll_reduce_fun(sendbuf,recvbuf,count,datatype,op,0,comm); + Colls::reduce(sendbuf,recvbuf,count,datatype,op,0,comm); return MPI_SUCCESS; } @@ -822,13 +824,13 @@ static int MPIR_Allreduce_reduce_shmem_MV2( void *sendbuf, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) { - mpi_coll_reduce_fun(sendbuf,recvbuf,count,datatype,op,0,comm); + Colls::reduce(sendbuf,recvbuf,count,datatype,op,0,comm); return MPI_SUCCESS; } -#define MPIR_Allreduce_pt2pt_rd_MV2 smpi_coll_tuned_allreduce_rdb -#define MPIR_Allreduce_pt2pt_rs_MV2 smpi_coll_tuned_allreduce_mvapich2_rs -#define MPIR_Allreduce_two_level_MV2 smpi_coll_tuned_allreduce_mvapich2_two_level +#define MPIR_Allreduce_pt2pt_rd_MV2 Coll_allreduce_rdb::allreduce +#define MPIR_Allreduce_pt2pt_rs_MV2 Coll_allreduce_mvapich2_rs::allreduce +#define MPIR_Allreduce_two_level_MV2 Coll_allreduce_mvapich2_two_level::allreduce static void init_mv2_allreduce_tables_stampede(){ @@ -1018,17 +1020,17 @@ int mv2_intra_node_knomial_factor = 4; #define INTRA_NODE_ROOT 0 -#define MPIR_Pipelined_Bcast_Zcpy_MV2 smpi_coll_tuned_bcast_mpich -#define MPIR_Pipelined_Bcast_MV2 smpi_coll_tuned_bcast_mpich -#define MPIR_Bcast_binomial_MV2 smpi_coll_tuned_bcast_binomial_tree -#define MPIR_Bcast_scatter_ring_allgather_shm_MV2 smpi_coll_tuned_bcast_scatter_LR_allgather -#define MPIR_Bcast_scatter_doubling_allgather_MV2 smpi_coll_tuned_bcast_scatter_rdb_allgather -#define MPIR_Bcast_scatter_ring_allgather_MV2 smpi_coll_tuned_bcast_scatter_LR_allgather -#define MPIR_Shmem_Bcast_MV2 smpi_coll_tuned_bcast_mpich -#define MPIR_Bcast_tune_inter_node_helper_MV2 smpi_coll_tuned_bcast_mvapich2_inter_node -#define MPIR_Bcast_inter_node_helper_MV2 smpi_coll_tuned_bcast_mvapich2_inter_node -#define MPIR_Knomial_Bcast_intra_node_MV2 smpi_coll_tuned_bcast_mvapich2_knomial_intra_node -#define MPIR_Bcast_intra_MV2 smpi_coll_tuned_bcast_mvapich2_intra_node +#define MPIR_Pipelined_Bcast_Zcpy_MV2 Coll_bcast_mpich::bcast +#define MPIR_Pipelined_Bcast_MV2 Coll_bcast_mpich::bcast +#define MPIR_Bcast_binomial_MV2 Coll_bcast_binomial_tree::bcast +#define MPIR_Bcast_scatter_ring_allgather_shm_MV2 Coll_bcast_scatter_LR_allgather::bcast +#define MPIR_Bcast_scatter_doubling_allgather_MV2 Coll_bcast_scatter_rdb_allgather::bcast +#define MPIR_Bcast_scatter_ring_allgather_MV2 Coll_bcast_scatter_LR_allgather::bcast +#define MPIR_Shmem_Bcast_MV2 Coll_bcast_mpich::bcast +#define MPIR_Bcast_tune_inter_node_helper_MV2 Coll_bcast_mvapich2_inter_node::bcast +#define MPIR_Bcast_inter_node_helper_MV2 Coll_bcast_mvapich2_inter_node::bcast +#define MPIR_Knomial_Bcast_intra_node_MV2 Coll_bcast_mvapich2_knomial_intra_node::bcast +#define MPIR_Bcast_intra_MV2 Coll_bcast_mvapich2_intra_node::bcast static void init_mv2_bcast_tables_stampede(){ //Stampede, @@ -1291,12 +1293,12 @@ int (*MV2_Reduce_intra_function)( void *sendbuf, MPI_Comm comm_ptr)=NULL; -#define MPIR_Reduce_inter_knomial_wrapper_MV2 smpi_coll_tuned_reduce_mvapich2_knomial -#define MPIR_Reduce_intra_knomial_wrapper_MV2 smpi_coll_tuned_reduce_mvapich2_knomial -#define MPIR_Reduce_binomial_MV2 smpi_coll_tuned_reduce_binomial -#define MPIR_Reduce_redscat_gather_MV2 smpi_coll_tuned_reduce_scatter_gather -#define MPIR_Reduce_shmem_MV2 smpi_coll_tuned_reduce_ompi_basic_linear -#define MPIR_Reduce_two_level_helper_MV2 smpi_coll_tuned_reduce_mvapich2_two_level +#define MPIR_Reduce_inter_knomial_wrapper_MV2 Coll_reduce_mvapich2_knomial::reduce +#define MPIR_Reduce_intra_knomial_wrapper_MV2 Coll_reduce_mvapich2_knomial::reduce +#define MPIR_Reduce_binomial_MV2 Coll_reduce_binomial::reduce +#define MPIR_Reduce_redscat_gather_MV2 Coll_reduce_scatter_gather::reduce +#define MPIR_Reduce_shmem_MV2 Coll_reduce_ompi_basic_linear::reduce +#define MPIR_Reduce_two_level_helper_MV2 Coll_reduce_mvapich2_two_level::reduce static void init_mv2_reduce_tables_stampede(){ @@ -1533,12 +1535,12 @@ static int MPIR_Reduce_Scatter_Basic_MV2(void *sendbuf, MPI_Op op, MPI_Comm comm) { - smpi_mpi_reduce_scatter(sendbuf,recvbuf,recvcnts,datatype,op,comm); + Coll_reduce_scatter_default::reduce_scatter(sendbuf,recvbuf,recvcnts,datatype,op,comm); return MPI_SUCCESS; } -#define MPIR_Reduce_scatter_non_comm_MV2 smpi_coll_tuned_reduce_scatter_mpich_noncomm -#define MPIR_Reduce_scatter_Rec_Halving_MV2 smpi_coll_tuned_reduce_scatter_ompi_basic_recursivehalving -#define MPIR_Reduce_scatter_Pair_Wise_MV2 smpi_coll_tuned_reduce_scatter_mpich_pair +#define MPIR_Reduce_scatter_non_comm_MV2 Coll_reduce_scatter_mpich_noncomm::reduce_scatter +#define MPIR_Reduce_scatter_Rec_Halving_MV2 Coll_reduce_scatter_ompi_basic_recursivehalving::reduce_scatter +#define MPIR_Reduce_scatter_Pair_Wise_MV2 Coll_reduce_scatter_mpich_pair::reduce_scatter @@ -1661,10 +1663,10 @@ int MPIR_Scatter_mcst_wrap_MV2(void *sendbuf, return 0; } -#define MPIR_Scatter_MV2_Binomial smpi_coll_tuned_scatter_ompi_binomial -#define MPIR_Scatter_MV2_Direct smpi_coll_tuned_scatter_ompi_basic_linear -#define MPIR_Scatter_MV2_two_level_Binomial smpi_coll_tuned_scatter_mvapich2_two_level_binomial -#define MPIR_Scatter_MV2_two_level_Direct smpi_coll_tuned_scatter_mvapich2_two_level_direct +#define MPIR_Scatter_MV2_Binomial Coll_scatter_ompi_binomial::scatter +#define MPIR_Scatter_MV2_Direct Coll_scatter_ompi_basic_linear::scatter +#define MPIR_Scatter_MV2_two_level_Binomial Coll_scatter_mvapich2_two_level_binomial::scatter +#define MPIR_Scatter_MV2_two_level_Direct Coll_scatter_mvapich2_two_level_direct::scatter diff --git a/src/smpi/colls/smpi_openmpi_selector.cpp b/src/smpi/colls/smpi_openmpi_selector.cpp index 67c7a39263..d6d85a2c27 100644 --- a/src/smpi/colls/smpi_openmpi_selector.cpp +++ b/src/smpi/colls/smpi_openmpi_selector.cpp @@ -8,8 +8,10 @@ #include "colls_private.h" +namespace simgrid{ +namespace smpi{ -int smpi_coll_tuned_allreduce_ompi(void *sbuf, void *rbuf, int count, +int Coll_allreduce_ompi::allreduce(void *sbuf, void *rbuf, int count, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm) { size_t dsize, block_dsize; @@ -27,7 +29,7 @@ int smpi_coll_tuned_allreduce_ompi(void *sbuf, void *rbuf, int count, block_dsize = dsize * count; if (block_dsize < intermediate_message) { - return (smpi_coll_tuned_allreduce_rdb (sbuf, rbuf, + return (Coll_allreduce_rdb::allreduce (sbuf, rbuf, count, dtype, op, comm)); } @@ -37,23 +39,23 @@ int smpi_coll_tuned_allreduce_ompi(void *sbuf, void *rbuf, int count, if ((comm_size * segment_size >= block_dsize)) { //FIXME: ok, these are not the right algorithms, try to find closer ones // lr is a good match for allreduce_ring (difference is mainly the use of sendrecv) - return smpi_coll_tuned_allreduce_lr(sbuf, rbuf, count, dtype, + return Coll_allreduce_lr::allreduce(sbuf, rbuf, count, dtype, op, comm); } else { - return (smpi_coll_tuned_allreduce_ompi_ring_segmented (sbuf, rbuf, + return (Coll_allreduce_ompi_ring_segmented::allreduce (sbuf, rbuf, count, dtype, op, comm /*segment_size*/)); } } - return (smpi_coll_tuned_allreduce_redbcast(sbuf, rbuf, count, + return (Coll_allreduce_redbcast::allreduce(sbuf, rbuf, count, dtype, op, comm)); } -int smpi_coll_tuned_alltoall_ompi( void *sbuf, int scount, +int Coll_alltoall_ompi::alltoall( void *sbuf, int scount, MPI_Datatype sdtype, void* rbuf, int rcount, MPI_Datatype rdtype, @@ -71,22 +73,22 @@ int smpi_coll_tuned_alltoall_ompi( void *sbuf, int scount, block_dsize = dsize * scount; if ((block_dsize < 200) && (communicator_size > 12)) { - return smpi_coll_tuned_alltoall_bruck(sbuf, scount, sdtype, + return Coll_alltoall_bruck::alltoall(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); } else if (block_dsize < 3000) { - return smpi_coll_tuned_alltoall_basic_linear(sbuf, scount, sdtype, + return Coll_alltoall_basic_linear::alltoall(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); } - return smpi_coll_tuned_alltoall_ring (sbuf, scount, sdtype, + return Coll_alltoall_ring::alltoall (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); } -int smpi_coll_tuned_alltoallv_ompi(void *sbuf, int *scounts, int *sdisps, +int Coll_alltoallv_ompi::alltoallv(void *sbuf, int *scounts, int *sdisps, MPI_Datatype sdtype, void *rbuf, int *rcounts, int *rdisps, MPI_Datatype rdtype, @@ -94,17 +96,17 @@ int smpi_coll_tuned_alltoallv_ompi(void *sbuf, int *scounts, int *sdisps, ) { /* For starters, just keep the original algorithm. */ - return smpi_coll_tuned_alltoallv_ompi_basic_linear(sbuf, scounts, sdisps, sdtype, + return Coll_alltoallv_ompi_basic_linear::alltoallv(sbuf, scounts, sdisps, sdtype, rbuf, rcounts, rdisps,rdtype, comm); } -int smpi_coll_tuned_barrier_ompi(MPI_Comm comm) +int Coll_barrier_ompi::barrier(MPI_Comm comm) { int communicator_size = comm->size(); if( 2 == communicator_size ) - return smpi_coll_tuned_barrier_ompi_two_procs(comm); + return Coll_barrier_ompi_two_procs::barrier(comm); /* * Basic optimisation. If we have a power of 2 number of nodes*/ /* * the use the recursive doubling algorithm, otherwise*/ /* * bruck is the one we want.*/ @@ -113,15 +115,15 @@ int smpi_coll_tuned_barrier_ompi(MPI_Comm comm) for( ; communicator_size > 0; communicator_size >>= 1 ) { if( communicator_size & 0x1 ) { if( has_one ) - return smpi_coll_tuned_barrier_ompi_bruck(comm); + return Coll_barrier_ompi_bruck::barrier(comm); has_one = 1; } } } - return smpi_coll_tuned_barrier_ompi_recursivedoubling(comm); + return Coll_barrier_ompi_recursivedoubling::barrier(comm); } -int smpi_coll_tuned_bcast_ompi(void *buff, int count, +int Coll_bcast_ompi::bcast(void *buff, int count, MPI_Datatype datatype, int root, MPI_Comm comm ) @@ -151,12 +153,12 @@ int smpi_coll_tuned_bcast_ompi(void *buff, int count, single-element broadcasts */ if ((message_size < small_message_size) || (count <= 1)) { /* Binomial without segmentation */ - return smpi_coll_tuned_bcast_binomial_tree (buff, count, datatype, + return Coll_bcast_binomial_tree::bcast (buff, count, datatype, root, comm); } else if (message_size < intermediate_message_size) { // SplittedBinary with 1KB segments - return smpi_coll_tuned_bcast_ompi_split_bintree(buff, count, datatype, + return Coll_bcast_ompi_split_bintree::bcast(buff, count, datatype, root, comm); } @@ -164,65 +166,65 @@ int smpi_coll_tuned_bcast_ompi(void *buff, int count, else if (communicator_size < (a_p128 * message_size + b_p128)) { //Pipeline with 128KB segments //segsize = 1024 << 7; - return smpi_coll_tuned_bcast_ompi_pipeline (buff, count, datatype, + return Coll_bcast_ompi_pipeline::bcast (buff, count, datatype, root, comm); } else if (communicator_size < 13) { // Split Binary with 8KB segments - return smpi_coll_tuned_bcast_ompi_split_bintree(buff, count, datatype, + return Coll_bcast_ompi_split_bintree::bcast(buff, count, datatype, root, comm); } else if (communicator_size < (a_p64 * message_size + b_p64)) { // Pipeline with 64KB segments //segsize = 1024 << 6; - return smpi_coll_tuned_bcast_ompi_pipeline (buff, count, datatype, + return Coll_bcast_ompi_pipeline::bcast (buff, count, datatype, root, comm); } else if (communicator_size < (a_p16 * message_size + b_p16)) { //Pipeline with 16KB segments //segsize = 1024 << 4; - return smpi_coll_tuned_bcast_ompi_pipeline (buff, count, datatype, + return Coll_bcast_ompi_pipeline::bcast (buff, count, datatype, root, comm); } /* Pipeline with 8KB segments */ //segsize = 1024 << 3; - return smpi_coll_tuned_bcast_flattree_pipeline (buff, count, datatype, + return Coll_bcast_flattree_pipeline::bcast (buff, count, datatype, root, comm /*segsize*/); #if 0 /* this is based on gige measurements */ if (communicator_size < 4) { - return smpi_coll_tuned_bcast_intra_basic_linear (buff, count, datatype, root, comm, module); + return Coll_bcast_intra_basic_linear::bcast (buff, count, datatype, root, comm, module); } if (communicator_size == 4) { if (message_size < 524288) segsize = 0; else segsize = 16384; - return smpi_coll_tuned_bcast_intra_bintree (buff, count, datatype, root, comm, module, segsize); + return Coll_bcast_intra_bintree::bcast (buff, count, datatype, root, comm, module, segsize); } if (communicator_size <= 8 && message_size < 4096) { - return smpi_coll_tuned_bcast_intra_basic_linear (buff, count, datatype, root, comm, module); + return Coll_bcast_intra_basic_linear::bcast (buff, count, datatype, root, comm, module); } if (communicator_size > 8 && message_size >= 32768 && message_size < 524288) { segsize = 16384; - return smpi_coll_tuned_bcast_intra_bintree (buff, count, datatype, root, comm, module, segsize); + return Coll_bcast_intra_bintree::bcast (buff, count, datatype, root, comm, module, segsize); } if (message_size >= 524288) { segsize = 16384; - return smpi_coll_tuned_bcast_intra_pipeline (buff, count, datatype, root, comm, module, segsize); + return Coll_bcast_intra_pipeline::bcast (buff, count, datatype, root, comm, module, segsize); } segsize = 0; /* once tested can swap this back in */ - /* return smpi_coll_tuned_bcast_intra_bmtree (buff, count, datatype, root, comm, segsize); */ - return smpi_coll_tuned_bcast_intra_bintree (buff, count, datatype, root, comm, module, segsize); + /* return Coll_bcast_intra_bmtree::bcast (buff, count, datatype, root, comm, segsize); */ + return Coll_bcast_intra_bintree::bcast (buff, count, datatype, root, comm, module, segsize); #endif /* 0 */ } -int smpi_coll_tuned_reduce_ompi( void *sendbuf, void *recvbuf, +int Coll_reduce_ompi::reduce( void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm @@ -255,35 +257,35 @@ int smpi_coll_tuned_reduce_ompi( void *sendbuf, void *recvbuf, */ if( (op!=MPI_OP_NULL) && !op->is_commutative() ) { if ((communicator_size < 12) && (message_size < 2048)) { - return smpi_coll_tuned_reduce_ompi_basic_linear (sendbuf, recvbuf, count, datatype, op, root, comm/*, module*/); + return Coll_reduce_ompi_basic_linear::reduce (sendbuf, recvbuf, count, datatype, op, root, comm/*, module*/); } - return smpi_coll_tuned_reduce_ompi_in_order_binary (sendbuf, recvbuf, count, datatype, op, root, comm/*, module, + return Coll_reduce_ompi_in_order_binary::reduce (sendbuf, recvbuf, count, datatype, op, root, comm/*, module, 0, max_requests*/); } if ((communicator_size < 8) && (message_size < 512)){ /* Linear_0K */ - return smpi_coll_tuned_reduce_ompi_basic_linear (sendbuf, recvbuf, count, datatype, op, root, comm); + return Coll_reduce_ompi_basic_linear::reduce (sendbuf, recvbuf, count, datatype, op, root, comm); } else if (((communicator_size < 8) && (message_size < 20480)) || (message_size < 2048) || (count <= 1)) { /* Binomial_0K */ //segsize = 0; - return smpi_coll_tuned_reduce_ompi_binomial(sendbuf, recvbuf, count, datatype, op, root, comm/*, module, + return Coll_reduce_ompi_binomial::reduce(sendbuf, recvbuf, count, datatype, op, root, comm/*, module, segsize, max_requests*/); } else if (communicator_size > (a1 * message_size + b1)) { // Binomial_1K //segsize = 1024; - return smpi_coll_tuned_reduce_ompi_binomial(sendbuf, recvbuf, count, datatype, op, root, comm/*, module, + return Coll_reduce_ompi_binomial::reduce(sendbuf, recvbuf, count, datatype, op, root, comm/*, module, segsize, max_requests*/); } else if (communicator_size > (a2 * message_size + b2)) { // Pipeline_1K //segsize = 1024; - return smpi_coll_tuned_reduce_ompi_pipeline (sendbuf, recvbuf, count, datatype, op, root, comm/*, module, + return Coll_reduce_ompi_pipeline::reduce (sendbuf, recvbuf, count, datatype, op, root, comm/*, module, segsize, max_requests*/); } else if (communicator_size > (a3 * message_size + b3)) { // Binary_32K //segsize = 32*1024; - return smpi_coll_tuned_reduce_ompi_binary( sendbuf, recvbuf, count, datatype, op, root, + return Coll_reduce_ompi_binary::reduce( sendbuf, recvbuf, count, datatype, op, root, comm/*, module, segsize, max_requests*/); } // if (communicator_size > (a4 * message_size + b4)) { @@ -293,7 +295,7 @@ int smpi_coll_tuned_reduce_ompi( void *sendbuf, void *recvbuf, // Pipeline_64K // segsize = 64*1024; // } - return smpi_coll_tuned_reduce_ompi_pipeline (sendbuf, recvbuf, count, datatype, op, root, comm/*, module, + return Coll_reduce_ompi_pipeline::reduce (sendbuf, recvbuf, count, datatype, op, root, comm/*, module, segsize, max_requests*/); #if 0 @@ -303,8 +305,8 @@ int smpi_coll_tuned_reduce_ompi( void *sendbuf, void *recvbuf, fanout = communicator_size - 1; /* when linear implemented or taken from basic put here, right now using chain as a linear system */ /* it is implemented and I shouldn't be calling a chain with a fanout bigger than MAXTREEFANOUT from topo.h! */ - return smpi_coll_tuned_reduce_intra_basic_linear (sendbuf, recvbuf, count, datatype, op, root, comm, module); - /* return smpi_coll_tuned_reduce_intra_chain (sendbuf, recvbuf, count, datatype, op, root, comm, segsize, fanout); */ + return Coll_reduce_intra_basic_linear::reduce (sendbuf, recvbuf, count, datatype, op, root, comm, module); + /* return Coll_reduce_intra_chain::reduce (sendbuf, recvbuf, count, datatype, op, root, comm, segsize, fanout); */ } if (message_size < 524288) { if (message_size <= 65536 ) { @@ -316,16 +318,16 @@ int smpi_coll_tuned_reduce_ompi( void *sendbuf, void *recvbuf, } /* later swap this for a binary tree */ /* fanout = 2; */ - return smpi_coll_tuned_reduce_intra_chain (sendbuf, recvbuf, count, datatype, op, root, comm, module, + return Coll_reduce_intra_chain::reduce (sendbuf, recvbuf, count, datatype, op, root, comm, module, segsize, fanout, max_requests); } segsize = 1024; - return smpi_coll_tuned_reduce_intra_pipeline (sendbuf, recvbuf, count, datatype, op, root, comm, module, + return Coll_reduce_intra_pipeline::reduce (sendbuf, recvbuf, count, datatype, op, root, comm, module, segsize, max_requests); #endif /* 0 */ } -int smpi_coll_tuned_reduce_scatter_ompi( void *sbuf, void *rbuf, +int Coll_reduce_scatter_ompi::reduce_scatter( void *sbuf, void *rbuf, int *rcounts, MPI_Datatype dtype, MPI_Op op, @@ -340,7 +342,7 @@ int smpi_coll_tuned_reduce_scatter_ompi( void *sbuf, void *rbuf, const size_t large_message_size = 256 * 1024; int zerocounts = 0; - XBT_DEBUG("smpi_coll_tuned_reduce_scatter_ompi"); + XBT_DEBUG("Coll_reduce_scatter_ompi::reduce_scatter"); comm_size = comm->size(); // We need data size for decision function @@ -354,7 +356,7 @@ int smpi_coll_tuned_reduce_scatter_ompi( void *sbuf, void *rbuf, } if( ((op!=MPI_OP_NULL) && !op->is_commutative()) || (zerocounts)) { - smpi_mpi_reduce_scatter (sbuf, rbuf, rcounts, + Coll_reduce_scatter_default::reduce_scatter (sbuf, rbuf, rcounts, dtype, op, comm); return MPI_SUCCESS; @@ -369,11 +371,11 @@ int smpi_coll_tuned_reduce_scatter_ompi( void *sbuf, void *rbuf, ((total_message_size <= large_message_size) && (pow2 == comm_size)) || (comm_size >= a * total_message_size + b)) { return - smpi_coll_tuned_reduce_scatter_ompi_basic_recursivehalving(sbuf, rbuf, rcounts, + Coll_reduce_scatter_ompi_basic_recursivehalving::reduce_scatter(sbuf, rbuf, rcounts, dtype, op, comm); } - return smpi_coll_tuned_reduce_scatter_ompi_ring(sbuf, rbuf, rcounts, + return Coll_reduce_scatter_ompi_ring::reduce_scatter(sbuf, rbuf, rcounts, dtype, op, comm); @@ -381,7 +383,7 @@ int smpi_coll_tuned_reduce_scatter_ompi( void *sbuf, void *rbuf, } -int smpi_coll_tuned_allgather_ompi(void *sbuf, int scount, +int Coll_allgather_ompi::allgather(void *sbuf, int scount, MPI_Datatype sdtype, void* rbuf, int rcount, MPI_Datatype rdtype, @@ -395,7 +397,7 @@ int smpi_coll_tuned_allgather_ompi(void *sbuf, int scount, /* Special case for 2 processes */ if (communicator_size == 2) { - return smpi_coll_tuned_allgather_pair (sbuf, scount, sdtype, + return Coll_allgather_pair::allgather (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm/*, module*/); } @@ -416,21 +418,21 @@ int smpi_coll_tuned_allgather_ompi(void *sbuf, int scount, */ if (total_dsize < 50000) { if (pow2_size == communicator_size) { - return smpi_coll_tuned_allgather_rdb(sbuf, scount, sdtype, + return Coll_allgather_rdb::allgather(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); } else { - return smpi_coll_tuned_allgather_bruck(sbuf, scount, sdtype, + return Coll_allgather_bruck::allgather(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); } } else { if (communicator_size % 2) { - return smpi_coll_tuned_allgather_ring(sbuf, scount, sdtype, + return Coll_allgather_ring::allgather(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); } else { - return smpi_coll_tuned_allgather_ompi_neighborexchange(sbuf, scount, sdtype, + return Coll_allgather_ompi_neighborexchange::allgather(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); } @@ -447,21 +449,21 @@ int smpi_coll_tuned_allgather_ompi(void *sbuf, int scount, - for everything else use ring. */ if ((pow2_size == communicator_size) && (total_dsize < 524288)) { - return smpi_coll_tuned_allgather_rdb(sbuf, scount, sdtype, + return Coll_allgather_rdb::allgather(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); } else if (total_dsize <= 81920) { - return smpi_coll_tuned_allgather_bruck(sbuf, scount, sdtype, + return Coll_allgather_bruck::allgather(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); } - return smpi_coll_tuned_allgather_ring(sbuf, scount, sdtype, + return Coll_allgather_ring::allgather(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); #endif /* defined(USE_MPICH2_DECISION) */ } -int smpi_coll_tuned_allgatherv_ompi(void *sbuf, int scount, +int Coll_allgatherv_ompi::allgatherv(void *sbuf, int scount, MPI_Datatype sdtype, void* rbuf, int *rcounts, int *rdispls, @@ -477,7 +479,7 @@ int smpi_coll_tuned_allgatherv_ompi(void *sbuf, int scount, /* Special case for 2 processes */ if (communicator_size == 2) { - return smpi_coll_tuned_allgatherv_pair(sbuf, scount, sdtype, + return Coll_allgatherv_pair::allgatherv(sbuf, scount, sdtype, rbuf, rcounts, rdispls, rdtype, comm); } @@ -491,27 +493,27 @@ int smpi_coll_tuned_allgatherv_ompi(void *sbuf, int scount, /* Decision based on allgather decision. */ if (total_dsize < 50000) { -/* return smpi_coll_tuned_allgatherv_intra_bruck(sbuf, scount, sdtype, +/* return Coll_allgatherv_intra_bruck::allgatherv(sbuf, scount, sdtype, rbuf, rcounts, rdispls, rdtype, comm, module);*/ - return smpi_coll_tuned_allgatherv_ring(sbuf, scount, sdtype, + return Coll_allgatherv_ring::allgatherv(sbuf, scount, sdtype, rbuf, rcounts, rdispls, rdtype, comm); } else { if (communicator_size % 2) { - return smpi_coll_tuned_allgatherv_ring(sbuf, scount, sdtype, + return Coll_allgatherv_ring::allgatherv(sbuf, scount, sdtype, rbuf, rcounts, rdispls, rdtype, comm); } else { - return smpi_coll_tuned_allgatherv_ompi_neighborexchange(sbuf, scount, sdtype, + return Coll_allgatherv_ompi_neighborexchange::allgatherv(sbuf, scount, sdtype, rbuf, rcounts, rdispls, rdtype, comm); } } } -int smpi_coll_tuned_gather_ompi(void *sbuf, int scount, +int Coll_gather_ompi::gather(void *sbuf, int scount, MPI_Datatype sdtype, void* rbuf, int rcount, MPI_Datatype rdtype, @@ -552,25 +554,26 @@ int smpi_coll_tuned_gather_ompi(void *sbuf, int scount, /* root, comm);*/ /* } else*/ if (block_size > intermediate_block_size) { - return smpi_coll_tuned_gather_ompi_linear_sync (sbuf, scount, sdtype, + return Coll_gather_ompi_linear_sync::gather (sbuf, scount, sdtype, rbuf, rcount, rdtype, root, comm); } else if ((communicator_size > large_communicator_size) || ((communicator_size > small_communicator_size) && (block_size < small_block_size))) { - return smpi_coll_tuned_gather_ompi_binomial (sbuf, scount, sdtype, + return Coll_gather_ompi_binomial::gather (sbuf, scount, sdtype, rbuf, rcount, rdtype, root, comm); } // Otherwise, use basic linear - return smpi_coll_tuned_gather_ompi_basic_linear (sbuf, scount, sdtype, + return Coll_gather_ompi_basic_linear::gather (sbuf, scount, sdtype, rbuf, rcount, rdtype, root, comm); } -int smpi_coll_tuned_scatter_ompi(void *sbuf, int scount, + +int Coll_scatter_ompi::scatter(void *sbuf, int scount, MPI_Datatype sdtype, void* rbuf, int rcount, MPI_Datatype rdtype, @@ -582,7 +585,7 @@ int smpi_coll_tuned_scatter_ompi(void *sbuf, int scount, int communicator_size, rank; size_t dsize, block_size; - XBT_DEBUG("smpi_coll_tuned_scatter_ompi"); + XBT_DEBUG("Coll_scatter_ompi::scatter"); communicator_size = comm->size(); rank = comm->rank(); @@ -602,7 +605,7 @@ int smpi_coll_tuned_scatter_ompi(void *sbuf, int scount, scount=rcount; sdtype=rdtype; } - int ret=smpi_coll_tuned_scatter_ompi_binomial (sbuf, scount, sdtype, + int ret=Coll_scatter_ompi_binomial::scatter (sbuf, scount, sdtype, rbuf, rcount, rdtype, root, comm); if(rank!=root){ @@ -610,8 +613,10 @@ int smpi_coll_tuned_scatter_ompi(void *sbuf, int scount, } return ret; } - return smpi_coll_tuned_scatter_ompi_basic_linear (sbuf, scount, sdtype, + return Coll_scatter_ompi_basic_linear::scatter (sbuf, scount, sdtype, rbuf, rcount, rdtype, root, comm); } +} +} diff --git a/src/smpi/private.h b/src/smpi/private.h index e2c0cda3d6..a324d2de05 100644 --- a/src/smpi/private.h +++ b/src/smpi/private.h @@ -17,6 +17,7 @@ #include "xbt/xbt_os_time.h" #include "src/smpi/smpi_f2c.hpp" #include "src/smpi/smpi_group.hpp" +#include "src/smpi/smpi_coll.hpp" #include "src/smpi/smpi_comm.hpp" #include "src/smpi/smpi_info.hpp" #include "src/smpi/smpi_op.hpp" @@ -134,43 +135,7 @@ XBT_PRIVATE void smpi_mpi_init(); XBT_PRIVATE void smpi_empty_status(MPI_Status * status); XBT_PRIVATE int smpi_mpi_get_count(MPI_Status * status, MPI_Datatype datatype); -XBT_PRIVATE int smpi_info_c2f(MPI_Info info); -XBT_PRIVATE int smpi_info_add_f(MPI_Info info); -XBT_PRIVATE MPI_Info smpi_info_f2c(int info); - - -XBT_PRIVATE void smpi_mpi_bcast(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm); -XBT_PRIVATE void smpi_mpi_barrier(MPI_Comm comm); -XBT_PRIVATE void smpi_mpi_gather(void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm); -XBT_PRIVATE void smpi_mpi_reduce_scatter(void *sendbuf, void *recvbuf, int *recvcounts, - MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); -XBT_PRIVATE void smpi_mpi_gatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, int *recvcounts, int *displs, MPI_Datatype recvtype, int root, MPI_Comm comm); -XBT_PRIVATE void smpi_mpi_allgather(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, - int recvcount, MPI_Datatype recvtype, MPI_Comm comm); -XBT_PRIVATE void smpi_mpi_allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, - int *recvcounts, int *displs, MPI_Datatype recvtype, MPI_Comm comm); -XBT_PRIVATE void smpi_mpi_scatter(void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm); -XBT_PRIVATE void smpi_mpi_scatterv(void *sendbuf, int *sendcounts, int *displs, MPI_Datatype sendtype, - void *recvbuf, int recvcount,MPI_Datatype recvtype, int root, MPI_Comm comm); -XBT_PRIVATE void smpi_mpi_reduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, - MPI_Comm comm); -XBT_PRIVATE void smpi_mpi_allreduce(void *sendbuf, void *recvbuf, int count,MPI_Datatype datatype, MPI_Op op, - MPI_Comm comm); -XBT_PRIVATE void smpi_mpi_scan(void *sendbuf, void *recvbuf, int count,MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); -XBT_PRIVATE void smpi_mpi_exscan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, - MPI_Comm comm); - -XBT_PRIVATE int smpi_coll_tuned_alltoall_ompi2(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, - int recvcount, MPI_Datatype recvtype, MPI_Comm comm); -XBT_PRIVATE int smpi_coll_tuned_alltoall_bruck(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, - int recvcount, MPI_Datatype recvtype, MPI_Comm comm); -XBT_PRIVATE int smpi_coll_tuned_alltoall_basic_linear(void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm); -XBT_PRIVATE int smpi_coll_basic_alltoallv(void *sendbuf, int *sendcounts, int *senddisps, MPI_Datatype sendtype, - void *recvbuf, int *recvcounts, int *recvdisps, MPI_Datatype recvtype, MPI_Comm comm); + XBT_PRIVATE int smpi_comm_keyval_create(MPI_Comm_copy_attr_function* copy_fn, MPI_Comm_delete_attr_function* delete_fn, int* keyval, void* extra_state); XBT_PRIVATE int smpi_comm_keyval_free(int* keyval); diff --git a/src/smpi/smpi_base.cpp b/src/smpi/smpi_base.cpp index 27d4471fce..86f22315cc 100644 --- a/src/smpi/smpi_base.cpp +++ b/src/smpi/smpi_base.cpp @@ -52,460 +52,6 @@ double smpi_mpi_wtime(){ return time; } -void smpi_mpi_bcast(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm) -{ - smpi_coll_tuned_bcast_binomial_tree(buf, count, datatype, root, comm); -} - -void smpi_mpi_barrier(MPI_Comm comm) -{ - smpi_coll_tuned_barrier_ompi_basic_linear(comm); -} - -void smpi_mpi_gather(void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm) -{ - int system_tag = COLL_TAG_GATHER; - MPI_Aint lb = 0; - MPI_Aint recvext = 0; - - int rank = comm->rank(); - int size = comm->size(); - if(rank != root) { - // Send buffer to root - Request::send(sendbuf, sendcount, sendtype, root, system_tag, comm); - } else { - recvtype->extent(&lb, &recvext); - // Local copy from root - Datatype::copy(sendbuf, sendcount, sendtype, static_cast(recvbuf) + root * recvcount * recvext, - recvcount, recvtype); - // Receive buffers from senders - MPI_Request *requests = xbt_new(MPI_Request, size - 1); - int index = 0; - for (int src = 0; src < size; src++) { - if(src != root) { - requests[index] = Request::irecv_init(static_cast(recvbuf) + src * recvcount * recvext, recvcount, recvtype, - src, system_tag, comm); - index++; - } - } - // Wait for completion of irecv's. - Request::startall(size - 1, requests); - Request::waitall(size - 1, requests, MPI_STATUS_IGNORE); - for (int src = 0; src < size-1; src++) { - Request::unref(&requests[src]); - } - xbt_free(requests); - } -} - -void smpi_mpi_reduce_scatter(void *sendbuf, void *recvbuf, int *recvcounts, MPI_Datatype datatype, MPI_Op op, - MPI_Comm comm) -{ - int rank = comm->rank(); - - /* arbitrarily choose root as rank 0 */ - int size = comm->size(); - int count = 0; - int *displs = xbt_new(int, size); - for (int i = 0; i < size; i++) { - displs[i] = count; - count += recvcounts[i]; - } - void *tmpbuf = static_cast(smpi_get_tmp_sendbuffer(count*datatype->get_extent())); - - mpi_coll_reduce_fun(sendbuf, tmpbuf, count, datatype, op, 0, comm); - smpi_mpi_scatterv(tmpbuf, recvcounts, displs, datatype, recvbuf, recvcounts[rank], datatype, 0, comm); - xbt_free(displs); - smpi_free_tmp_buffer(tmpbuf); -} - -void smpi_mpi_gatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int *recvcounts, int *displs, - MPI_Datatype recvtype, int root, MPI_Comm comm) -{ - int system_tag = COLL_TAG_GATHERV; - MPI_Aint lb = 0; - MPI_Aint recvext = 0; - - int rank = comm->rank(); - int size = comm->size(); - if (rank != root) { - // Send buffer to root - Request::send(sendbuf, sendcount, sendtype, root, system_tag, comm); - } else { - recvtype->extent(&lb, &recvext); - // Local copy from root - Datatype::copy(sendbuf, sendcount, sendtype, static_cast(recvbuf) + displs[root] * recvext, - recvcounts[root], recvtype); - // Receive buffers from senders - MPI_Request *requests = xbt_new(MPI_Request, size - 1); - int index = 0; - for (int src = 0; src < size; src++) { - if(src != root) { - requests[index] = Request::irecv_init(static_cast(recvbuf) + displs[src] * recvext, - recvcounts[src], recvtype, src, system_tag, comm); - index++; - } - } - // Wait for completion of irecv's. - Request::startall(size - 1, requests); - Request::waitall(size - 1, requests, MPI_STATUS_IGNORE); - for (int src = 0; src < size-1; src++) { - Request::unref(&requests[src]); - } - xbt_free(requests); - } -} - -void smpi_mpi_allgather(void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf,int recvcount, MPI_Datatype recvtype, MPI_Comm comm) -{ - int system_tag = COLL_TAG_ALLGATHER; - MPI_Aint lb = 0; - MPI_Aint recvext = 0; - MPI_Request *requests; - - int rank = comm->rank(); - int size = comm->size(); - // FIXME: check for errors - recvtype->extent(&lb, &recvext); - // Local copy from self - Datatype::copy(sendbuf, sendcount, sendtype, static_cast(recvbuf) + rank * recvcount * recvext, recvcount, - recvtype); - // Send/Recv buffers to/from others; - requests = xbt_new(MPI_Request, 2 * (size - 1)); - int index = 0; - for (int other = 0; other < size; other++) { - if(other != rank) { - requests[index] = Request::isend_init(sendbuf, sendcount, sendtype, other, system_tag,comm); - index++; - requests[index] = Request::irecv_init(static_cast(recvbuf) + other * recvcount * recvext, recvcount, recvtype, - other, system_tag, comm); - index++; - } - } - // Wait for completion of all comms. - Request::startall(2 * (size - 1), requests); - Request::waitall(2 * (size - 1), requests, MPI_STATUS_IGNORE); - for (int other = 0; other < 2*(size-1); other++) { - Request::unref(&requests[other]); - } - xbt_free(requests); -} - -void smpi_mpi_allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, - int *recvcounts, int *displs, MPI_Datatype recvtype, MPI_Comm comm) -{ - int system_tag = COLL_TAG_ALLGATHERV; - MPI_Aint lb = 0; - MPI_Aint recvext = 0; - - int rank = comm->rank(); - int size = comm->size(); - recvtype->extent(&lb, &recvext); - // Local copy from self - Datatype::copy(sendbuf, sendcount, sendtype, - static_cast(recvbuf) + displs[rank] * recvext,recvcounts[rank], recvtype); - // Send buffers to others; - MPI_Request *requests = xbt_new(MPI_Request, 2 * (size - 1)); - int index = 0; - for (int other = 0; other < size; other++) { - if(other != rank) { - requests[index] = - Request::isend_init(sendbuf, sendcount, sendtype, other, system_tag, comm); - index++; - requests[index] = Request::irecv_init(static_cast(recvbuf) + displs[other] * recvext, recvcounts[other], - recvtype, other, system_tag, comm); - index++; - } - } - // Wait for completion of all comms. - Request::startall(2 * (size - 1), requests); - Request::waitall(2 * (size - 1), requests, MPI_STATUS_IGNORE); - for (int other = 0; other < 2*(size-1); other++) { - Request::unref(&requests[other]); - } - xbt_free(requests); -} - -void smpi_mpi_scatter(void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm) -{ - int system_tag = COLL_TAG_SCATTER; - MPI_Aint lb = 0; - MPI_Aint sendext = 0; - MPI_Request *requests; - - int rank = comm->rank(); - int size = comm->size(); - if(rank != root) { - // Recv buffer from root - Request::recv(recvbuf, recvcount, recvtype, root, system_tag, comm, MPI_STATUS_IGNORE); - } else { - sendtype->extent(&lb, &sendext); - // Local copy from root - if(recvbuf!=MPI_IN_PLACE){ - Datatype::copy(static_cast(sendbuf) + root * sendcount * sendext, - sendcount, sendtype, recvbuf, recvcount, recvtype); - } - // Send buffers to receivers - requests = xbt_new(MPI_Request, size - 1); - int index = 0; - for(int dst = 0; dst < size; dst++) { - if(dst != root) { - requests[index] = Request::isend_init(static_cast(sendbuf) + dst * sendcount * sendext, sendcount, sendtype, - dst, system_tag, comm); - index++; - } - } - // Wait for completion of isend's. - Request::startall(size - 1, requests); - Request::waitall(size - 1, requests, MPI_STATUS_IGNORE); - for (int dst = 0; dst < size-1; dst++) { - Request::unref(&requests[dst]); - } - xbt_free(requests); - } -} - -void smpi_mpi_scatterv(void *sendbuf, int *sendcounts, int *displs, MPI_Datatype sendtype, void *recvbuf, int recvcount, - MPI_Datatype recvtype, int root, MPI_Comm comm) -{ - int system_tag = COLL_TAG_SCATTERV; - MPI_Aint lb = 0; - MPI_Aint sendext = 0; - - int rank = comm->rank(); - int size = comm->size(); - if(rank != root) { - // Recv buffer from root - Request::recv(recvbuf, recvcount, recvtype, root, system_tag, comm, MPI_STATUS_IGNORE); - } else { - sendtype->extent(&lb, &sendext); - // Local copy from root - if(recvbuf!=MPI_IN_PLACE){ - Datatype::copy(static_cast(sendbuf) + displs[root] * sendext, sendcounts[root], - sendtype, recvbuf, recvcount, recvtype); - } - // Send buffers to receivers - MPI_Request *requests = xbt_new(MPI_Request, size - 1); - int index = 0; - for (int dst = 0; dst < size; dst++) { - if (dst != root) { - requests[index] = Request::isend_init(static_cast(sendbuf) + displs[dst] * sendext, sendcounts[dst], - sendtype, dst, system_tag, comm); - index++; - } - } - // Wait for completion of isend's. - Request::startall(size - 1, requests); - Request::waitall(size - 1, requests, MPI_STATUS_IGNORE); - for (int dst = 0; dst < size-1; dst++) { - Request::unref(&requests[dst]); - } - xbt_free(requests); - } -} - -void smpi_mpi_reduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, - MPI_Comm comm) -{ - int system_tag = COLL_TAG_REDUCE; - MPI_Aint lb = 0; - MPI_Aint dataext = 0; - - char* sendtmpbuf = static_cast(sendbuf); - - int rank = comm->rank(); - int size = comm->size(); - //non commutative case, use a working algo from openmpi - if(op != MPI_OP_NULL && !op->is_commutative()){ - smpi_coll_tuned_reduce_ompi_basic_linear(sendtmpbuf, recvbuf, count, datatype, op, root, comm); - return; - } - - if( sendbuf == MPI_IN_PLACE ) { - sendtmpbuf = static_cast(smpi_get_tmp_sendbuffer(count*datatype->get_extent())); - Datatype::copy(recvbuf, count, datatype,sendtmpbuf, count, datatype); - } - - if(rank != root) { - // Send buffer to root - Request::send(sendtmpbuf, count, datatype, root, system_tag, comm); - } else { - datatype->extent(&lb, &dataext); - // Local copy from root - if (sendtmpbuf != nullptr && recvbuf != nullptr) - Datatype::copy(sendtmpbuf, count, datatype, recvbuf, count, datatype); - // Receive buffers from senders - MPI_Request *requests = xbt_new(MPI_Request, size - 1); - void **tmpbufs = xbt_new(void *, size - 1); - int index = 0; - for (int src = 0; src < size; src++) { - if (src != root) { - if (!smpi_process_get_replaying()) - tmpbufs[index] = xbt_malloc(count * dataext); - else - tmpbufs[index] = smpi_get_tmp_sendbuffer(count * dataext); - requests[index] = - Request::irecv_init(tmpbufs[index], count, datatype, src, system_tag, comm); - index++; - } - } - // Wait for completion of irecv's. - Request::startall(size - 1, requests); - for (int src = 0; src < size - 1; src++) { - index = Request::waitany(size - 1, requests, MPI_STATUS_IGNORE); - XBT_DEBUG("finished waiting any request with index %d", index); - if(index == MPI_UNDEFINED) { - break; - }else{ - Request::unref(&requests[index]); - } - if (op != MPI_OP_NULL) /* op can be MPI_OP_NULL that does nothing */ - op->apply(tmpbufs[index], recvbuf, &count, datatype); - } - for(index = 0; index < size - 1; index++) { - smpi_free_tmp_buffer(tmpbufs[index]); - } - xbt_free(tmpbufs); - xbt_free(requests); - - } - if( sendbuf == MPI_IN_PLACE ) { - smpi_free_tmp_buffer(sendtmpbuf); - } -} - -void smpi_mpi_allreduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) -{ - smpi_mpi_reduce(sendbuf, recvbuf, count, datatype, op, 0, comm); - smpi_mpi_bcast(recvbuf, count, datatype, 0, comm); -} - -void smpi_mpi_scan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) -{ - int system_tag = -888; - MPI_Aint lb = 0; - MPI_Aint dataext = 0; - - int rank = comm->rank(); - int size = comm->size(); - - datatype->extent(&lb, &dataext); - - // Local copy from self - Datatype::copy(sendbuf, count, datatype, recvbuf, count, datatype); - - // Send/Recv buffers to/from others; - MPI_Request *requests = xbt_new(MPI_Request, size - 1); - void **tmpbufs = xbt_new(void *, rank); - int index = 0; - for (int other = 0; other < rank; other++) { - tmpbufs[index] = smpi_get_tmp_sendbuffer(count * dataext); - requests[index] = Request::irecv_init(tmpbufs[index], count, datatype, other, system_tag, comm); - index++; - } - for (int other = rank + 1; other < size; other++) { - requests[index] = Request::isend_init(sendbuf, count, datatype, other, system_tag, comm); - index++; - } - // Wait for completion of all comms. - Request::startall(size - 1, requests); - - if(op != MPI_OP_NULL && op->is_commutative()){ - for (int other = 0; other < size - 1; other++) { - index = Request::waitany(size - 1, requests, MPI_STATUS_IGNORE); - if(index == MPI_UNDEFINED) { - break; - } - if (index < rank) - // #Request is below rank: it's a irecv. - op->apply(tmpbufs[index], recvbuf, &count, datatype); - } - }else{ - //non commutative case, wait in order - for (int other = 0; other < size - 1; other++) { - Request::wait(&(requests[other]), MPI_STATUS_IGNORE); - if (index < rank && op != MPI_OP_NULL) - op->apply(tmpbufs[other], recvbuf, &count, datatype); - } - } - for(index = 0; index < rank; index++) { - smpi_free_tmp_buffer(tmpbufs[index]); - } - for(index = 0; index < size-1; index++) { - Request::unref(&requests[index]); - } - xbt_free(tmpbufs); - xbt_free(requests); -} - -void smpi_mpi_exscan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) -{ - int system_tag = -888; - MPI_Aint lb = 0; - MPI_Aint dataext = 0; - int recvbuf_is_empty=1; - int rank = comm->rank(); - int size = comm->size(); - - datatype->extent(&lb, &dataext); - - // Send/Recv buffers to/from others; - MPI_Request *requests = xbt_new(MPI_Request, size - 1); - void **tmpbufs = xbt_new(void *, rank); - int index = 0; - for (int other = 0; other < rank; other++) { - tmpbufs[index] = smpi_get_tmp_sendbuffer(count * dataext); - requests[index] = Request::irecv_init(tmpbufs[index], count, datatype, other, system_tag, comm); - index++; - } - for (int other = rank + 1; other < size; other++) { - requests[index] = Request::isend_init(sendbuf, count, datatype, other, system_tag, comm); - index++; - } - // Wait for completion of all comms. - Request::startall(size - 1, requests); - - if(op != MPI_OP_NULL && op->is_commutative()){ - for (int other = 0; other < size - 1; other++) { - index = Request::waitany(size - 1, requests, MPI_STATUS_IGNORE); - if(index == MPI_UNDEFINED) { - break; - } - if(index < rank) { - if(recvbuf_is_empty){ - Datatype::copy(tmpbufs[index], count, datatype, recvbuf, count, datatype); - recvbuf_is_empty=0; - } else - // #Request is below rank: it's a irecv - op->apply(tmpbufs[index], recvbuf, &count, datatype); - } - } - }else{ - //non commutative case, wait in order - for (int other = 0; other < size - 1; other++) { - Request::wait(&(requests[other]), MPI_STATUS_IGNORE); - if(index < rank) { - if (recvbuf_is_empty) { - Datatype::copy(tmpbufs[other], count, datatype, recvbuf, count, datatype); - recvbuf_is_empty = 0; - } else - if(op!=MPI_OP_NULL) op->apply( tmpbufs[other], recvbuf, &count, datatype); - } - } - } - for(index = 0; index < rank; index++) { - smpi_free_tmp_buffer(tmpbufs[index]); - } - for(index = 0; index < size-1; index++) { - Request::unref(&requests[index]); - } - xbt_free(tmpbufs); - xbt_free(requests); -} - void smpi_empty_status(MPI_Status * status) { if(status != MPI_STATUS_IGNORE) { diff --git a/src/smpi/smpi_coll.cpp b/src/smpi/smpi_coll.cpp index 15d583b86c..37a3612e43 100644 --- a/src/smpi/smpi_coll.cpp +++ b/src/smpi/smpi_coll.cpp @@ -17,51 +17,38 @@ XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_coll, smpi, "Logging specific to SMPI (coll)"); s_mpi_coll_description_t mpi_coll_gather_description[] = { - {"default", "gather default collective", reinterpret_cast(&smpi_mpi_gather)}, COLL_GATHERS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr} /* this array must be nullptr terminated */ }; -s_mpi_coll_description_t mpi_coll_allgather_description[] = { {"default", "allgather default collective", - reinterpret_cast(&smpi_mpi_allgather)}, COLL_ALLGATHERS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr}}; +s_mpi_coll_description_t mpi_coll_allgather_description[] = { + COLL_ALLGATHERS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr} +}; -s_mpi_coll_description_t mpi_coll_allgatherv_description[] = {{"default", "allgatherv default collective", - reinterpret_cast(&smpi_mpi_allgatherv)}, COLL_ALLGATHERVS(COLL_DESCRIPTION, COLL_COMMA), +s_mpi_coll_description_t mpi_coll_allgatherv_description[] = { COLL_ALLGATHERVS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr} /* this array must be nullptr terminated */ }; -s_mpi_coll_description_t mpi_coll_allreduce_description[] = { {"default", "allreduce default collective", - reinterpret_cast(&smpi_mpi_allreduce)}, COLL_ALLREDUCES(COLL_DESCRIPTION, COLL_COMMA), +s_mpi_coll_description_t mpi_coll_allreduce_description[] ={ COLL_ALLREDUCES(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr} /* this array must be nullptr terminated */ }; -s_mpi_coll_description_t mpi_coll_reduce_scatter_description[] = {{"default", "reduce_scatter default collective", - reinterpret_cast(&smpi_mpi_reduce_scatter)}, COLL_REDUCE_SCATTERS(COLL_DESCRIPTION, COLL_COMMA), +s_mpi_coll_description_t mpi_coll_reduce_scatter_description[] = {COLL_REDUCE_SCATTERS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr} /* this array must be nullptr terminated */ }; -s_mpi_coll_description_t mpi_coll_scatter_description[] = { {"default", "scatter default collective", - reinterpret_cast(&smpi_mpi_scatter)}, COLL_SCATTERS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr}}; +s_mpi_coll_description_t mpi_coll_scatter_description[] ={COLL_SCATTERS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr}}; -s_mpi_coll_description_t mpi_coll_barrier_description[] = { {"default", "barrier default collective", - reinterpret_cast(&smpi_mpi_barrier)}, COLL_BARRIERS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr}}; +s_mpi_coll_description_t mpi_coll_barrier_description[] ={COLL_BARRIERS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr}}; -s_mpi_coll_description_t mpi_coll_alltoall_description[] = { {"default", "Ompi alltoall default collective", - reinterpret_cast(&smpi_coll_tuned_alltoall_ompi2)}, COLL_ALLTOALLS(COLL_DESCRIPTION, COLL_COMMA), - {"bruck", "Alltoall Bruck (SG) collective", - reinterpret_cast(&smpi_coll_tuned_alltoall_bruck)}, - {"basic_linear", "Alltoall basic linear (SG) collective", - reinterpret_cast(&smpi_coll_tuned_alltoall_basic_linear)}, {nullptr, nullptr, nullptr}}; +s_mpi_coll_description_t mpi_coll_alltoall_description[] = {COLL_ALLTOALLS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr}}; -s_mpi_coll_description_t mpi_coll_alltoallv_description[] = { {"default", "Ompi alltoallv default collective", - reinterpret_cast(&smpi_coll_basic_alltoallv)}, COLL_ALLTOALLVS(COLL_DESCRIPTION, COLL_COMMA), +s_mpi_coll_description_t mpi_coll_alltoallv_description[] = {COLL_ALLTOALLVS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr} /* this array must be nullptr terminated */ }; -s_mpi_coll_description_t mpi_coll_bcast_description[] = { {"default", "bcast default collective ", - reinterpret_cast(&smpi_mpi_bcast)}, COLL_BCASTS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr}}; +s_mpi_coll_description_t mpi_coll_bcast_description[] = {COLL_BCASTS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr}}; -s_mpi_coll_description_t mpi_coll_reduce_description[] = { {"default", "reduce default collective", - reinterpret_cast(&smpi_mpi_reduce)}, COLL_REDUCES(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr} }; +s_mpi_coll_description_t mpi_coll_reduce_description[] = {COLL_REDUCES(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr} }; @@ -73,7 +60,7 @@ void coll_help(const char *category, s_mpi_coll_description_t * table) printf(" %s: %s\n", table[i].name, table[i].description); } -int find_coll_description(s_mpi_coll_description_t * table, char *name, const char *desc) +int find_coll_description(s_mpi_coll_description_t * table, const char *name, const char *desc) { char *name_list = nullptr; int selector_on=0; @@ -108,204 +95,329 @@ int find_coll_description(s_mpi_coll_description_t * table, char *name, const ch return -1; } -int (*mpi_coll_gather_fun)(void *, int, MPI_Datatype, void*, int, MPI_Datatype, int root, MPI_Comm); -int (*mpi_coll_allgather_fun)(void *, int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm); -int (*mpi_coll_allgatherv_fun)(void *, int, MPI_Datatype, void*, int*, int*, MPI_Datatype, MPI_Comm); -int (*mpi_coll_allreduce_fun)(void *sbuf, void *rbuf, int rcount, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm); -int (*mpi_coll_alltoall_fun)(void *, int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm); -int (*mpi_coll_alltoallv_fun)(void *, int*, int*, MPI_Datatype, void*, int*, int*, MPI_Datatype, MPI_Comm); -int (*mpi_coll_bcast_fun)(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm com); -int (*mpi_coll_reduce_fun)(void *buf, void *rbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm); -int (*mpi_coll_reduce_scatter_fun)(void *sbuf, void *rbuf, int *rcounts,MPI_Datatype dtype,MPI_Op op,MPI_Comm comm); -int (*mpi_coll_scatter_fun)(void *sendbuf, int sendcount, MPI_Datatype sendtype,void *recvbuf, int recvcount, MPI_Datatype recvtype,int root, MPI_Comm comm); -int (*mpi_coll_barrier_fun)(MPI_Comm comm); void (*smpi_coll_cleanup_callback)(); +namespace simgrid{ +namespace smpi{ + +int (*Colls::gather)(void *, int, MPI_Datatype, void*, int, MPI_Datatype, int root, MPI_Comm); +int (*Colls::allgather)(void *, int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm); +int (*Colls::allgatherv)(void *, int, MPI_Datatype, void*, int*, int*, MPI_Datatype, MPI_Comm); +int (*Colls::allreduce)(void *sbuf, void *rbuf, int rcount, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm); +int (*Colls::alltoall)(void *, int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm); +int (*Colls::alltoallv)(void *, int*, int*, MPI_Datatype, void*, int*, int*, MPI_Datatype, MPI_Comm); +int (*Colls::bcast)(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm com); +int (*Colls::reduce)(void *buf, void *rbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm); +int (*Colls::reduce_scatter)(void *sbuf, void *rbuf, int *rcounts,MPI_Datatype dtype,MPI_Op op,MPI_Comm comm); +int (*Colls::scatter)(void *sendbuf, int sendcount, MPI_Datatype sendtype,void *recvbuf, int recvcount, MPI_Datatype recvtype,int root, MPI_Comm comm); +int (*Colls::barrier)(MPI_Comm comm); + + +#define COLL_SETTER(cat, ret, args, args2)\ +void Colls::set_##cat (const char * name){\ + int id = find_coll_description(mpi_coll_## cat ##_description,\ + name,#cat);\ + cat = reinterpret_cast\ + (mpi_coll_## cat ##_description[id].coll);\ +} + +COLL_APPLY(COLL_SETTER,COLL_GATHER_SIG,""); +COLL_APPLY(COLL_SETTER,COLL_ALLGATHER_SIG,""); +COLL_APPLY(COLL_SETTER,COLL_ALLGATHERV_SIG,""); +COLL_APPLY(COLL_SETTER,COLL_REDUCE_SIG,""); +COLL_APPLY(COLL_SETTER,COLL_ALLREDUCE_SIG,""); +COLL_APPLY(COLL_SETTER,COLL_REDUCE_SCATTER_SIG,""); +COLL_APPLY(COLL_SETTER,COLL_SCATTER_SIG,""); +COLL_APPLY(COLL_SETTER,COLL_BARRIER_SIG,""); +COLL_APPLY(COLL_SETTER,COLL_BCAST_SIG,""); +COLL_APPLY(COLL_SETTER,COLL_ALLTOALL_SIG,""); +COLL_APPLY(COLL_SETTER,COLL_ALLTOALLV_SIG,""); + + +void Colls::set_collectives(){ + const char* selector_name = static_cast(xbt_cfg_get_string("smpi/coll-selector")); + if (selector_name==nullptr || selector_name[0] == '\0') + selector_name = "default"; + + const char* name = xbt_cfg_get_string("smpi/gather"); + if (name==nullptr || name[0] == '\0') + name = selector_name; + + set_gather(name); + + name = xbt_cfg_get_string("smpi/allgather"); + if (name==nullptr || name[0] == '\0') + name = selector_name; + + set_allgather(name); + + name = xbt_cfg_get_string("smpi/allgatherv"); + if (name==nullptr || name[0] == '\0') + name = selector_name; + + set_allgatherv(name); + + name = xbt_cfg_get_string("smpi/allreduce"); + if (name==nullptr || name[0] == '\0') + name = selector_name; + + set_allreduce(name); + + name = xbt_cfg_get_string("smpi/alltoall"); + if (name==nullptr || name[0] == '\0') + name = selector_name; + + set_alltoall(name); + + name = xbt_cfg_get_string("smpi/alltoallv"); + if (name==nullptr || name[0] == '\0') + name = selector_name; + + set_alltoallv(name); + + name = xbt_cfg_get_string("smpi/reduce"); + if (name==nullptr || name[0] == '\0') + name = selector_name; + + set_reduce(name); -int smpi_coll_tuned_alltoall_ompi2(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, - int recvcount, MPI_Datatype recvtype, MPI_Comm comm) + name = xbt_cfg_get_string("smpi/reduce-scatter"); + if (name==nullptr || name[0] == '\0') + name = selector_name; + + set_reduce_scatter(name); + + name = xbt_cfg_get_string("smpi/scatter"); + if (name==nullptr || name[0] == '\0') + name = selector_name; + + set_scatter(name); + + name = xbt_cfg_get_string("smpi/bcast"); + if (name==nullptr || name[0] == '\0') + name = selector_name; + + set_bcast(name); + + name = xbt_cfg_get_string("smpi/barrier"); + if (name==nullptr || name[0] == '\0') + name = selector_name; + + set_barrier(name); +} + + +int Colls::gatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int *recvcounts, int *displs, + MPI_Datatype recvtype, int root, MPI_Comm comm) { + int system_tag = COLL_TAG_GATHERV; + MPI_Aint lb = 0; + MPI_Aint recvext = 0; + + int rank = comm->rank(); int size = comm->size(); - int sendsize = sendtype->size() * sendcount; - if (sendsize < 200 && size > 12) { - return smpi_coll_tuned_alltoall_bruck(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm); - } else if (sendsize < 3000) { - return smpi_coll_tuned_alltoall_basic_linear(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm); + if (rank != root) { + // Send buffer to root + Request::send(sendbuf, sendcount, sendtype, root, system_tag, comm); } else { - return smpi_coll_tuned_alltoall_ring(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm); + recvtype->extent(&lb, &recvext); + // Local copy from root + Datatype::copy(sendbuf, sendcount, sendtype, static_cast(recvbuf) + displs[root] * recvext, + recvcounts[root], recvtype); + // Receive buffers from senders + MPI_Request *requests = xbt_new(MPI_Request, size - 1); + int index = 0; + for (int src = 0; src < size; src++) { + if(src != root) { + requests[index] = Request::irecv_init(static_cast(recvbuf) + displs[src] * recvext, + recvcounts[src], recvtype, src, system_tag, comm); + index++; + } + } + // Wait for completion of irecv's. + Request::startall(size - 1, requests); + Request::waitall(size - 1, requests, MPI_STATUS_IGNORE); + for (int src = 0; src < size-1; src++) { + Request::unref(&requests[src]); + } + xbt_free(requests); } + return MPI_SUCCESS; } -/** - * Alltoall Bruck - * - * Openmpi calls this routine when the message size sent to each rank < 2000 bytes and size < 12 - * FIXME: uh, check smpi_pmpi again, but this routine is called for > 12, not less... - **/ -int smpi_coll_tuned_alltoall_bruck(void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm) + +int Colls::scatterv(void *sendbuf, int *sendcounts, int *displs, MPI_Datatype sendtype, void *recvbuf, int recvcount, + MPI_Datatype recvtype, int root, MPI_Comm comm) { - int system_tag = 777; - int i; - int count; - MPI_Aint lb; + int system_tag = COLL_TAG_SCATTERV; + MPI_Aint lb = 0; MPI_Aint sendext = 0; - MPI_Aint recvext = 0; - MPI_Request *requests; - // FIXME: check implementation int rank = comm->rank(); int size = comm->size(); - XBT_DEBUG("<%d> algorithm alltoall_bruck() called.", rank); - sendtype->extent(&lb, &sendext); - recvtype->extent(&lb, &recvext); - /* Local copy from self */ - int err = Datatype::copy(static_cast(sendbuf) + rank * sendcount * sendext, sendcount, sendtype, - static_cast(recvbuf) + rank * recvcount * recvext, recvcount, recvtype); - if (err == MPI_SUCCESS && size > 1) { - /* Initiate all send/recv to/from others. */ - requests = xbt_new(MPI_Request, 2 * (size - 1)); - count = 0; - /* Create all receives that will be posted first */ - for (i = 0; i < size; ++i) { - if (i != rank) { - requests[count] = Request::irecv_init(static_cast(recvbuf) + i * recvcount * recvext, recvcount, - recvtype, i, system_tag, comm); - count++; - }else{ - XBT_DEBUG("<%d> skip request creation [src = %d, recvcount = %d]", rank, i, recvcount); - } + if(rank != root) { + // Recv buffer from root + Request::recv(recvbuf, recvcount, recvtype, root, system_tag, comm, MPI_STATUS_IGNORE); + } else { + sendtype->extent(&lb, &sendext); + // Local copy from root + if(recvbuf!=MPI_IN_PLACE){ + Datatype::copy(static_cast(sendbuf) + displs[root] * sendext, sendcounts[root], + sendtype, recvbuf, recvcount, recvtype); } - /* Now create all sends */ - for (i = 0; i < size; ++i) { - if (i != rank) { - requests[count] = Request::isend_init(static_cast(sendbuf) + i * sendcount * sendext, sendcount, - sendtype, i, system_tag, comm); - count++; - }else{ - XBT_DEBUG("<%d> skip request creation [dst = %d, sendcount = %d]", rank, i, sendcount); + // Send buffers to receivers + MPI_Request *requests = xbt_new(MPI_Request, size - 1); + int index = 0; + for (int dst = 0; dst < size; dst++) { + if (dst != root) { + requests[index] = Request::isend_init(static_cast(sendbuf) + displs[dst] * sendext, sendcounts[dst], + sendtype, dst, system_tag, comm); + index++; } } - /* Wait for them all. */ - Request::startall(count, requests); - XBT_DEBUG("<%d> wait for %d requests", rank, count); - Request::waitall(count, requests, MPI_STATUS_IGNORE); - for(i = 0; i < count; i++) { - if(requests[i]!=MPI_REQUEST_NULL) - Request::unref(&requests[i]); + // Wait for completion of isend's. + Request::startall(size - 1, requests); + Request::waitall(size - 1, requests, MPI_STATUS_IGNORE); + for (int dst = 0; dst < size-1; dst++) { + Request::unref(&requests[dst]); } xbt_free(requests); } return MPI_SUCCESS; } -/** - * Alltoall basic_linear (STARMPI:alltoall-simple) - **/ -int smpi_coll_tuned_alltoall_basic_linear(void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm) + +int Colls::scan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) { - int system_tag = 888; - int i; - int count; - MPI_Aint lb = 0, sendext = 0, recvext = 0; - MPI_Request *requests; + int system_tag = -888; + MPI_Aint lb = 0; + MPI_Aint dataext = 0; - /* Initialize. */ int rank = comm->rank(); int size = comm->size(); - XBT_DEBUG("<%d> algorithm alltoall_basic_linear() called.", rank); - sendtype->extent(&lb, &sendext); - recvtype->extent(&lb, &recvext); - /* simple optimization */ - int err = Datatype::copy(static_cast(sendbuf) + rank * sendcount * sendext, sendcount, sendtype, - static_cast(recvbuf) + rank * recvcount * recvext, recvcount, recvtype); - if (err == MPI_SUCCESS && size > 1) { - /* Initiate all send/recv to/from others. */ - requests = xbt_new(MPI_Request, 2 * (size - 1)); - /* Post all receives first -- a simple optimization */ - count = 0; - for (i = (rank + 1) % size; i != rank; i = (i + 1) % size) { - requests[count] = Request::irecv_init(static_cast(recvbuf) + i * recvcount * recvext, recvcount, - recvtype, i, system_tag, comm); - count++; - } - /* Now post all sends in reverse order - * - We would like to minimize the search time through message queue - * when messages actually arrive in the order in which they were posted. - * TODO: check the previous assertion - */ - for (i = (rank + size - 1) % size; i != rank; i = (i + size - 1) % size) { - requests[count] = Request::isend_init(static_cast(sendbuf) + i * sendcount * sendext, sendcount, - sendtype, i, system_tag, comm); - count++; + + datatype->extent(&lb, &dataext); + + // Local copy from self + Datatype::copy(sendbuf, count, datatype, recvbuf, count, datatype); + + // Send/Recv buffers to/from others; + MPI_Request *requests = xbt_new(MPI_Request, size - 1); + void **tmpbufs = xbt_new(void *, rank); + int index = 0; + for (int other = 0; other < rank; other++) { + tmpbufs[index] = smpi_get_tmp_sendbuffer(count * dataext); + requests[index] = Request::irecv_init(tmpbufs[index], count, datatype, other, system_tag, comm); + index++; + } + for (int other = rank + 1; other < size; other++) { + requests[index] = Request::isend_init(sendbuf, count, datatype, other, system_tag, comm); + index++; + } + // Wait for completion of all comms. + Request::startall(size - 1, requests); + + if(op != MPI_OP_NULL && op->is_commutative()){ + for (int other = 0; other < size - 1; other++) { + index = Request::waitany(size - 1, requests, MPI_STATUS_IGNORE); + if(index == MPI_UNDEFINED) { + break; + } + if(index < rank) { + // #Request is below rank: it's a irecv + if(op!=MPI_OP_NULL) op->apply( tmpbufs[index], recvbuf, &count, datatype); + } } - /* Wait for them all. */ - Request::startall(count, requests); - XBT_DEBUG("<%d> wait for %d requests", rank, count); - Request::waitall(count, requests, MPI_STATUS_IGNORE); - for(i = 0; i < count; i++) { - if(requests[i]!=MPI_REQUEST_NULL) - Request::unref(&requests[i]); + }else{ + //non commutative case, wait in order + for (int other = 0; other < size - 1; other++) { + Request::wait(&(requests[other]), MPI_STATUS_IGNORE); + if(index < rank) { + if(op!=MPI_OP_NULL) op->apply( tmpbufs[other], recvbuf, &count, datatype); + } } - xbt_free(requests); } - return err; + for(index = 0; index < rank; index++) { + smpi_free_tmp_buffer(tmpbufs[index]); + } + for(index = 0; index < size-1; index++) { + Request::unref(&requests[index]); + } + xbt_free(tmpbufs); + xbt_free(requests); + return MPI_SUCCESS; } -int smpi_coll_basic_alltoallv(void *sendbuf, int *sendcounts, int *senddisps, MPI_Datatype sendtype, - void *recvbuf, int *recvcounts, int *recvdisps, MPI_Datatype recvtype, MPI_Comm comm) +int Colls::exscan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) { - int system_tag = 889; - int i; - int count; - MPI_Aint lb = 0; - MPI_Aint sendext = 0; - MPI_Aint recvext = 0; - MPI_Request *requests; - - /* Initialize. */ + int system_tag = -888; + MPI_Aint lb = 0; + MPI_Aint dataext = 0; + int recvbuf_is_empty=1; int rank = comm->rank(); int size = comm->size(); - XBT_DEBUG("<%d> algorithm basic_alltoallv() called.", rank); - sendtype->extent(&lb, &sendext); - recvtype->extent(&lb, &recvext); - /* Local copy from self */ - int err = Datatype::copy(static_cast(sendbuf) + senddisps[rank] * sendext, sendcounts[rank], sendtype, - static_cast(recvbuf) + recvdisps[rank] * recvext, recvcounts[rank], recvtype); - if (err == MPI_SUCCESS && size > 1) { - /* Initiate all send/recv to/from others. */ - requests = xbt_new(MPI_Request, 2 * (size - 1)); - count = 0; - /* Create all receives that will be posted first */ - for (i = 0; i < size; ++i) { - if (i != rank && recvcounts[i] != 0) { - requests[count] = Request::irecv_init(static_cast(recvbuf) + recvdisps[i] * recvext, - recvcounts[i], recvtype, i, system_tag, comm); - count++; - }else{ - XBT_DEBUG("<%d> skip request creation [src = %d, recvcounts[src] = %d]", rank, i, recvcounts[i]); + + datatype->extent(&lb, &dataext); + + // Send/Recv buffers to/from others; + MPI_Request *requests = xbt_new(MPI_Request, size - 1); + void **tmpbufs = xbt_new(void *, rank); + int index = 0; + for (int other = 0; other < rank; other++) { + tmpbufs[index] = smpi_get_tmp_sendbuffer(count * dataext); + requests[index] = Request::irecv_init(tmpbufs[index], count, datatype, other, system_tag, comm); + index++; + } + for (int other = rank + 1; other < size; other++) { + requests[index] = Request::isend_init(sendbuf, count, datatype, other, system_tag, comm); + index++; + } + // Wait for completion of all comms. + Request::startall(size - 1, requests); + + if(op != MPI_OP_NULL && op->is_commutative()){ + for (int other = 0; other < size - 1; other++) { + index = Request::waitany(size - 1, requests, MPI_STATUS_IGNORE); + if(index == MPI_UNDEFINED) { + break; } - } - /* Now create all sends */ - for (i = 0; i < size; ++i) { - if (i != rank && sendcounts[i] != 0) { - requests[count] = Request::isend_init(static_cast(sendbuf) + senddisps[i] * sendext, - sendcounts[i], sendtype, i, system_tag, comm); - count++; - }else{ - XBT_DEBUG("<%d> skip request creation [dst = %d, sendcounts[dst] = %d]", rank, i, sendcounts[i]); + if(index < rank) { + if(recvbuf_is_empty){ + Datatype::copy(tmpbufs[index], count, datatype, recvbuf, count, datatype); + recvbuf_is_empty=0; + } else + // #Request is below rank: it's a irecv + if(op!=MPI_OP_NULL) op->apply( tmpbufs[index], recvbuf, &count, datatype); } } - /* Wait for them all. */ - Request::startall(count, requests); - XBT_DEBUG("<%d> wait for %d requests", rank, count); - Request::waitall(count, requests, MPI_STATUS_IGNORE); - for(i = 0; i < count; i++) { - if(requests[i]!=MPI_REQUEST_NULL) - Request::unref(&requests[i]); + }else{ + //non commutative case, wait in order + for (int other = 0; other < size - 1; other++) { + Request::wait(&(requests[other]), MPI_STATUS_IGNORE); + if(index < rank) { + if (recvbuf_is_empty) { + Datatype::copy(tmpbufs[other], count, datatype, recvbuf, count, datatype); + recvbuf_is_empty = 0; + } else + if(op!=MPI_OP_NULL) op->apply( tmpbufs[other], recvbuf, &count, datatype); + } } - xbt_free(requests); } - return err; + for(index = 0; index < rank; index++) { + smpi_free_tmp_buffer(tmpbufs[index]); + } + for(index = 0; index < size-1; index++) { + Request::unref(&requests[index]); + } + xbt_free(tmpbufs); + xbt_free(requests); + return MPI_SUCCESS; +} + +} } + + + + + diff --git a/src/smpi/smpi_coll.hpp b/src/smpi/smpi_coll.hpp new file mode 100644 index 0000000000..7728984e27 --- /dev/null +++ b/src/smpi/smpi_coll.hpp @@ -0,0 +1,133 @@ +/*High level handling of collective algorithms*/ +/* Copyright (c) 2009-2010, 2012-2014. The SimGrid Team. + * All rights reserved. */ + +/* This program is free software; you can redistribute it and/or modify it + * under the terms of the license (GNU LGPL) which comes with this package. */ + +#ifndef SMPI_COLL_HPP +#define SMPI_COLL_HPP + +#include + +#include "private.h" + +namespace simgrid{ +namespace smpi{ + +class Colls{ + private: + public: + static void set_collectives(); + static void set_gather(const char* name); + static void set_allgather(const char* name); + static void set_allgatherv(const char* name); + static void set_alltoall(const char* name); + static void set_alltoallv(const char* name); + static void set_allreduce(const char* name); + static void set_reduce(const char* name); + static void set_reduce_scatter(const char* name); + static void set_scatter(const char* name); + static void set_barrier(const char* name); + static void set_bcast(const char* name); + + static void coll_help(const char *category, s_mpi_coll_description_t * table); + + static int (*gather)(void *, int, MPI_Datatype, void*, int, MPI_Datatype, int root, MPI_Comm); + static int (*allgather)(void *, int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm); + static int (*allgatherv)(void *, int, MPI_Datatype, void*, int*, int*, MPI_Datatype, MPI_Comm); + static int (*allreduce)(void *sbuf, void *rbuf, int rcount, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm); + static int (*alltoall)(void *, int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm); + static int (*alltoallv)(void *, int*, int*, MPI_Datatype, void*, int*, int*, MPI_Datatype, MPI_Comm); + static int (*bcast)(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm com); + static int (*reduce)(void *buf, void *rbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm); + static int (*reduce_scatter)(void *sbuf, void *rbuf, int *rcounts,MPI_Datatype dtype,MPI_Op op,MPI_Comm comm); + static int (*scatter)(void *sendbuf, int sendcount, MPI_Datatype sendtype,void *recvbuf, int recvcount, MPI_Datatype recvtype,int root, MPI_Comm comm); + static int (*barrier)(MPI_Comm comm); + +//These fairly unused collectives only have one implementation in SMPI + + static int gatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int *recvcounts, int *displs, MPI_Datatype recvtype, int root, MPI_Comm comm); + static int scatterv(void *sendbuf, int *sendcounts, int *displs, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm); + static int scan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); + static int exscan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); +}; + +class Coll_algo{ + private: + char* description_; + public: + char* description(); +}; + +class Coll_gather : public Coll_algo { + private: + public: + static int gather (void *, int, MPI_Datatype, void*, int, MPI_Datatype, int root, MPI_Comm); +}; + +class Coll_allgather : public Coll_algo { + private: + public: + static int allgather (void *, int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm); +}; + +class Coll_allgatherv : public Coll_algo { + private: + public: + static int allgatherv (void *, int, MPI_Datatype, void*, int*, int*, MPI_Datatype, MPI_Comm); +}; + +class Coll_allreduce : public Coll_algo { + private: + public: + static int allreduce (void *sbuf, void *rbuf, int rcount, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm); +}; + +class Coll_alltoall : public Coll_algo { + private: + public: + static int alltoall (void *, int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm); +}; + +class Coll_alltoallv : public Coll_algo { + private: + public: + static int alltoallv (void *, int*, int*, MPI_Datatype, void*, int*, int*, MPI_Datatype, MPI_Comm); +}; + +class Coll_bcast : public Coll_algo { + private: + public: + static int bcast (void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm com); +}; + +class Coll_reduce : public Coll_algo { + private: + public: + static int reduce (void *buf, void *rbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm); +}; + +class Coll_reduce_scatter : public Coll_algo { + private: + public: + static int reduce_scatter (void *sbuf, void *rbuf, int *rcounts,MPI_Datatype dtype,MPI_Op op,MPI_Comm comm); +}; + +class Coll_scatter : public Coll_algo { + private: + public: + static int scatter (void *sendbuf, int sendcount, MPI_Datatype sendtype,void *recvbuf, int recvcount, MPI_Datatype recvtype,int root, MPI_Comm comm); +}; + +class Coll_barrier : public Coll_algo { + private: + public: + static int barrier (MPI_Comm); +}; + + +} +} + +#endif diff --git a/src/smpi/smpi_comm.cpp b/src/smpi/smpi_comm.cpp index aa6c13e920..179708675f 100644 --- a/src/smpi/smpi_comm.cpp +++ b/src/smpi/smpi_comm.cpp @@ -219,7 +219,7 @@ MPI_Comm Comm::split(int color, int key) } else { recvbuf = nullptr; } - smpi_mpi_gather(sendbuf, 2, MPI_INT, recvbuf, 2, MPI_INT, 0, this); + Coll_gather_default::gather(sendbuf, 2, MPI_INT, recvbuf, 2, MPI_INT, 0, this); xbt_free(sendbuf); /* Do the actual job */ if(rank == 0) { @@ -393,7 +393,7 @@ void Comm::init_smp(){ leader_list[i]=-1; } - smpi_coll_tuned_allgather_mpich(&leader, 1, MPI_INT , leaders_map, 1, MPI_INT, this); + Coll_allgather_mpich::allgather(&leader, 1, MPI_INT , leaders_map, 1, MPI_INT, this); if(smpi_privatize_global_variables){ //we need to switch as the called function may silently touch global variables smpi_switch_data_segment(smpi_process_index()); @@ -452,7 +452,7 @@ void Comm::init_smp(){ int my_local_size=comm_intra->size(); if(comm_intra->rank()==0) { int* non_uniform_map = xbt_new0(int,leader_group_size); - smpi_coll_tuned_allgather_mpich(&my_local_size, 1, MPI_INT, + Coll_allgather_mpich::allgather(&my_local_size, 1, MPI_INT, non_uniform_map, 1, MPI_INT, leader_comm); for(i=0; i < leader_group_size; i++) { if(non_uniform_map[0] != non_uniform_map[i]) { @@ -467,7 +467,7 @@ void Comm::init_smp(){ } is_uniform_=is_uniform; } - smpi_coll_tuned_bcast_mpich(&(is_uniform_),1, MPI_INT, 0, comm_intra ); + Coll_bcast_mpich::bcast(&(is_uniform_),1, MPI_INT, 0, comm_intra ); if(smpi_privatize_global_variables){ //we need to switch as the called function may silently touch global variables smpi_switch_data_segment(smpi_process_index()); @@ -485,7 +485,7 @@ void Comm::init_smp(){ } int global_blocked; - smpi_mpi_allreduce(&is_blocked, &(global_blocked), 1, MPI_INT, MPI_LAND, this); + Coll_allreduce_default::allreduce(&is_blocked, &(global_blocked), 1, MPI_INT, MPI_LAND, this); if(MPI_COMM_WORLD==MPI_COMM_UNINITIALIZED || this==MPI_COMM_WORLD){ if(this->rank()==0){ diff --git a/src/smpi/smpi_global.cpp b/src/smpi/smpi_global.cpp index 34065aacaf..bda2de80d2 100644 --- a/src/smpi/smpi_global.cpp +++ b/src/smpi/smpi_global.cpp @@ -715,57 +715,8 @@ static void smpi_init_logs(){ } static void smpi_init_options(){ - int gather_id = find_coll_description(mpi_coll_gather_description, xbt_cfg_get_string("smpi/gather"),"gather"); - mpi_coll_gather_fun = reinterpret_cast - (mpi_coll_gather_description[gather_id].coll); - - int allgather_id = find_coll_description(mpi_coll_allgather_description, - xbt_cfg_get_string("smpi/allgather"),"allgather"); - mpi_coll_allgather_fun = reinterpret_cast - (mpi_coll_allgather_description[allgather_id].coll); - - int allgatherv_id = find_coll_description(mpi_coll_allgatherv_description, - xbt_cfg_get_string("smpi/allgatherv"),"allgatherv"); - mpi_coll_allgatherv_fun = reinterpret_cast - (mpi_coll_allgatherv_description[allgatherv_id].coll); - - int allreduce_id = find_coll_description(mpi_coll_allreduce_description, - xbt_cfg_get_string("smpi/allreduce"),"allreduce"); - mpi_coll_allreduce_fun = reinterpret_cast - (mpi_coll_allreduce_description[allreduce_id].coll); - - int alltoall_id = find_coll_description(mpi_coll_alltoall_description, - xbt_cfg_get_string("smpi/alltoall"),"alltoall"); - mpi_coll_alltoall_fun = reinterpret_cast - (mpi_coll_alltoall_description[alltoall_id].coll); - - int alltoallv_id = find_coll_description(mpi_coll_alltoallv_description, - xbt_cfg_get_string("smpi/alltoallv"),"alltoallv"); - mpi_coll_alltoallv_fun = reinterpret_cast - (mpi_coll_alltoallv_description[alltoallv_id].coll); - - int bcast_id = find_coll_description(mpi_coll_bcast_description, xbt_cfg_get_string("smpi/bcast"),"bcast"); - mpi_coll_bcast_fun = reinterpret_cast - (mpi_coll_bcast_description[bcast_id].coll); - - int reduce_id = find_coll_description(mpi_coll_reduce_description, xbt_cfg_get_string("smpi/reduce"),"reduce"); - mpi_coll_reduce_fun = reinterpret_cast - (mpi_coll_reduce_description[reduce_id].coll); - - int reduce_scatter_id = - find_coll_description(mpi_coll_reduce_scatter_description, - xbt_cfg_get_string("smpi/reduce-scatter"),"reduce_scatter"); - mpi_coll_reduce_scatter_fun = reinterpret_cast - (mpi_coll_reduce_scatter_description[reduce_scatter_id].coll); - - int scatter_id = find_coll_description(mpi_coll_scatter_description, xbt_cfg_get_string("smpi/scatter"),"scatter"); - mpi_coll_scatter_fun = reinterpret_cast - (mpi_coll_scatter_description[scatter_id].coll); - - int barrier_id = find_coll_description(mpi_coll_barrier_description, xbt_cfg_get_string("smpi/barrier"),"barrier"); - mpi_coll_barrier_fun = reinterpret_cast - (mpi_coll_barrier_description[barrier_id].coll); + Colls::set_collectives(); smpi_coll_cleanup_callback=nullptr; smpi_cpu_threshold = xbt_cfg_get_double("smpi/cpu-threshold"); smpi_host_speed = xbt_cfg_get_double("smpi/host-speed"); diff --git a/src/smpi/smpi_pmpi.cpp b/src/smpi/smpi_pmpi.cpp index 6e89a6d9a6..853c980c3b 100644 --- a/src/smpi/smpi_pmpi.cpp +++ b/src/smpi/smpi_pmpi.cpp @@ -1386,7 +1386,7 @@ int PMPI_Bcast(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm c extra->send_size = count * dt_size_send; TRACE_smpi_collective_in(rank, root_traced, __FUNCTION__, extra); if (comm->size() > 1) - mpi_coll_bcast_fun(buf, count, datatype, root, comm); + Colls::bcast(buf, count, datatype, root, comm); retval = MPI_SUCCESS; TRACE_smpi_collective_out(rank, root_traced, __FUNCTION__); @@ -1409,7 +1409,7 @@ int PMPI_Barrier(MPI_Comm comm) extra->type = TRACING_BARRIER; TRACE_smpi_collective_in(rank, -1, __FUNCTION__, extra); - mpi_coll_barrier_fun(comm); + Colls::barrier(comm); retval = MPI_SUCCESS; TRACE_smpi_collective_out(rank, -1, __FUNCTION__); @@ -1461,7 +1461,7 @@ int PMPI_Gather(void *sendbuf, int sendcount, MPI_Datatype sendtype,void *recvbu TRACE_smpi_collective_in(rank, root_traced, __FUNCTION__, extra); - mpi_coll_gather_fun(sendtmpbuf, sendtmpcount, sendtmptype, recvbuf, recvcount, recvtype, root, comm); + Colls::gather(sendtmpbuf, sendtmpcount, sendtmptype, recvbuf, recvcount, recvtype, root, comm); retval = MPI_SUCCESS; TRACE_smpi_collective_out(rank, root_traced, __FUNCTION__); @@ -1521,8 +1521,7 @@ int PMPI_Gatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recv } TRACE_smpi_collective_in(rank, root_traced, __FUNCTION__, extra); - smpi_mpi_gatherv(sendtmpbuf, sendtmpcount, sendtmptype, recvbuf, recvcounts, displs, recvtype, root, comm); - retval = MPI_SUCCESS; + retval = Colls::gatherv(sendtmpbuf, sendtmpcount, sendtmptype, recvbuf, recvcounts, displs, recvtype, root, comm); TRACE_smpi_collective_out(rank, root_traced, __FUNCTION__); } @@ -1568,7 +1567,7 @@ int PMPI_Allgather(void *sendbuf, int sendcount, MPI_Datatype sendtype, TRACE_smpi_collective_in(rank, -1, __FUNCTION__, extra); - mpi_coll_allgather_fun(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm); + Colls::allgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm); retval = MPI_SUCCESS; TRACE_smpi_collective_out(rank, -1, __FUNCTION__); } @@ -1620,7 +1619,7 @@ int PMPI_Allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, TRACE_smpi_collective_in(rank, -1, __FUNCTION__, extra); - mpi_coll_allgatherv_fun(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm); + Colls::allgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm); retval = MPI_SUCCESS; TRACE_smpi_collective_out(rank, -1, __FUNCTION__); } @@ -1668,7 +1667,7 @@ int PMPI_Scatter(void *sendbuf, int sendcount, MPI_Datatype sendtype, extra->recv_size = recvcount * dt_size_recv; TRACE_smpi_collective_in(rank, root_traced, __FUNCTION__, extra); - mpi_coll_scatter_fun(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm); + Colls::scatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm); retval = MPI_SUCCESS; TRACE_smpi_collective_out(rank, root_traced, __FUNCTION__); } @@ -1721,9 +1720,8 @@ int PMPI_Scatterv(void *sendbuf, int *sendcounts, int *displs, extra->recv_size = recvcount * dt_size_recv; TRACE_smpi_collective_in(rank, root_traced, __FUNCTION__, extra); - smpi_mpi_scatterv(sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm); + retval = Colls::scatterv(sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm); - retval = MPI_SUCCESS; TRACE_smpi_collective_out(rank, root_traced, __FUNCTION__); } @@ -1756,7 +1754,7 @@ int PMPI_Reduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, TRACE_smpi_collective_in(rank, root_traced, __FUNCTION__, extra); - mpi_coll_reduce_fun(sendbuf, recvbuf, count, datatype, op, root, comm); + Colls::reduce(sendbuf, recvbuf, count, datatype, op, root, comm); retval = MPI_SUCCESS; TRACE_smpi_collective_out(rank, root_traced, __FUNCTION__); @@ -1811,7 +1809,7 @@ int PMPI_Allreduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatyp TRACE_smpi_collective_in(rank, -1, __FUNCTION__, extra); - mpi_coll_allreduce_fun(sendtmpbuf, recvbuf, count, datatype, op, comm); + Colls::allreduce(sendtmpbuf, recvbuf, count, datatype, op, comm); if( sendbuf == MPI_IN_PLACE ) xbt_free(sendtmpbuf); @@ -1849,9 +1847,8 @@ int PMPI_Scan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MP TRACE_smpi_collective_in(rank, -1, __FUNCTION__, extra); - smpi_mpi_scan(sendbuf, recvbuf, count, datatype, op, comm); + retval = Colls::scan(sendbuf, recvbuf, count, datatype, op, comm); - retval = MPI_SUCCESS; TRACE_smpi_collective_out(rank, -1, __FUNCTION__); } @@ -1887,8 +1884,8 @@ int PMPI_Exscan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, } TRACE_smpi_collective_in(rank, -1, __FUNCTION__, extra); - smpi_mpi_exscan(sendtmpbuf, recvbuf, count, datatype, op, comm); - retval = MPI_SUCCESS; + retval = Colls::exscan(sendtmpbuf, recvbuf, count, datatype, op, comm); + TRACE_smpi_collective_out(rank, -1, __FUNCTION__); if (sendbuf == MPI_IN_PLACE) xbt_free(sendtmpbuf); @@ -1938,7 +1935,7 @@ int PMPI_Reduce_scatter(void *sendbuf, void *recvbuf, int *recvcounts, MPI_Datat TRACE_smpi_collective_in(rank, -1, __FUNCTION__, extra); - mpi_coll_reduce_scatter_fun(sendtmpbuf, recvbuf, recvcounts, datatype, op, comm); + Colls::reduce_scatter(sendtmpbuf, recvbuf, recvcounts, datatype, op, comm); retval = MPI_SUCCESS; TRACE_smpi_collective_out(rank, -1, __FUNCTION__); @@ -1991,7 +1988,7 @@ int PMPI_Reduce_scatter_block(void *sendbuf, void *recvbuf, int recvcount, int* recvcounts = static_cast(xbt_malloc(count * sizeof(int))); for (int i = 0; i < count; i++) recvcounts[i] = recvcount; - mpi_coll_reduce_scatter_fun(sendtmpbuf, recvbuf, recvcounts, datatype, op, comm); + Colls::reduce_scatter(sendtmpbuf, recvbuf, recvcounts, datatype, op, comm); xbt_free(recvcounts); retval = MPI_SUCCESS; @@ -2044,7 +2041,7 @@ int PMPI_Alltoall(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* rec TRACE_smpi_collective_in(rank, -1, __FUNCTION__, extra); - retval = mpi_coll_alltoall_fun(sendtmpbuf, sendtmpcount, sendtmptype, recvbuf, recvcount, recvtype, comm); + retval = Colls::alltoall(sendtmpbuf, sendtmpcount, sendtmptype, recvbuf, recvcount, recvtype, comm); TRACE_smpi_collective_out(rank, -1, __FUNCTION__); @@ -2117,7 +2114,7 @@ int PMPI_Alltoallv(void* sendbuf, int* sendcounts, int* senddisps, MPI_Datatype } extra->num_processes = size; TRACE_smpi_collective_in(rank, -1, __FUNCTION__, extra); - retval = mpi_coll_alltoallv_fun(sendtmpbuf, sendtmpcounts, sendtmpdisps, sendtmptype, recvbuf, recvcounts, + retval = Colls::alltoallv(sendtmpbuf, sendtmpcounts, sendtmpdisps, sendtmptype, recvbuf, recvcounts, recvdisps, recvtype, comm); TRACE_smpi_collective_out(rank, -1, __FUNCTION__); diff --git a/src/smpi/smpi_replay.cpp b/src/smpi/smpi_replay.cpp index 282eb70e7d..3292e7c398 100644 --- a/src/smpi/smpi_replay.cpp +++ b/src/smpi/smpi_replay.cpp @@ -10,6 +10,8 @@ #define KEY_SIZE (sizeof(int) * 2 + 1) +using namespace simgrid::smpi; + XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_replay,smpi,"Trace Replay with SMPI"); int communicator_size = 0; @@ -445,7 +447,7 @@ static void action_barrier(const char *const *action){ extra->type = TRACING_BARRIER; TRACE_smpi_collective_in(rank, -1, __FUNCTION__, extra); - mpi_coll_barrier_fun(MPI_COMM_WORLD); + Colls::barrier(MPI_COMM_WORLD); TRACE_smpi_collective_out(rank, -1, __FUNCTION__); log_timed_action (action, clock); @@ -477,7 +479,7 @@ static void action_bcast(const char *const *action) TRACE_smpi_collective_in(rank, root_traced, __FUNCTION__, extra); void *sendbuf = smpi_get_tmp_sendbuffer(size* MPI_CURRENT_TYPE->size()); - mpi_coll_bcast_fun(sendbuf, size, MPI_CURRENT_TYPE, root, MPI_COMM_WORLD); + Colls::bcast(sendbuf, size, MPI_CURRENT_TYPE, root, MPI_COMM_WORLD); TRACE_smpi_collective_out(rank, root_traced, __FUNCTION__); log_timed_action (action, clock); @@ -511,7 +513,7 @@ static void action_reduce(const char *const *action) void *recvbuf = smpi_get_tmp_sendbuffer(comm_size* MPI_CURRENT_TYPE->size()); void *sendbuf = smpi_get_tmp_sendbuffer(comm_size* MPI_CURRENT_TYPE->size()); - mpi_coll_reduce_fun(sendbuf, recvbuf, comm_size, MPI_CURRENT_TYPE, MPI_OP_NULL, root, MPI_COMM_WORLD); + Colls::reduce(sendbuf, recvbuf, comm_size, MPI_CURRENT_TYPE, MPI_OP_NULL, root, MPI_COMM_WORLD); smpi_execute_flops(comp_size); TRACE_smpi_collective_out(rank, root_traced, __FUNCTION__); @@ -539,7 +541,7 @@ static void action_allReduce(const char *const *action) { void *recvbuf = smpi_get_tmp_sendbuffer(comm_size* MPI_CURRENT_TYPE->size()); void *sendbuf = smpi_get_tmp_sendbuffer(comm_size* MPI_CURRENT_TYPE->size()); - mpi_coll_allreduce_fun(sendbuf, recvbuf, comm_size, MPI_CURRENT_TYPE, MPI_OP_NULL, MPI_COMM_WORLD); + Colls::allreduce(sendbuf, recvbuf, comm_size, MPI_CURRENT_TYPE, MPI_OP_NULL, MPI_COMM_WORLD); smpi_execute_flops(comp_size); TRACE_smpi_collective_out(rank, -1, __FUNCTION__); @@ -574,7 +576,7 @@ static void action_allToAll(const char *const *action) { TRACE_smpi_collective_in(rank, -1, __FUNCTION__,extra); - mpi_coll_alltoall_fun(send, send_size, MPI_CURRENT_TYPE, recv, recv_size, MPI_CURRENT_TYPE2, MPI_COMM_WORLD); + Colls::alltoall(send, send_size, MPI_CURRENT_TYPE, recv, recv_size, MPI_CURRENT_TYPE2, MPI_COMM_WORLD); TRACE_smpi_collective_out(rank, -1, __FUNCTION__); log_timed_action (action, clock); @@ -622,7 +624,7 @@ static void action_gather(const char *const *action) { TRACE_smpi_collective_in(smpi_process_index(), root, __FUNCTION__, extra); - mpi_coll_gather_fun(send, send_size, MPI_CURRENT_TYPE, recv, recv_size, MPI_CURRENT_TYPE2, root, MPI_COMM_WORLD); + Colls::gather(send, send_size, MPI_CURRENT_TYPE, recv, recv_size, MPI_CURRENT_TYPE2, root, MPI_COMM_WORLD); TRACE_smpi_collective_out(smpi_process_index(), -1, __FUNCTION__); log_timed_action (action, clock); @@ -680,7 +682,7 @@ static void action_gatherv(const char *const *action) { TRACE_smpi_collective_in(smpi_process_index(), root, __FUNCTION__, extra); - smpi_mpi_gatherv(send, send_size, MPI_CURRENT_TYPE, recv, recvcounts, disps, MPI_CURRENT_TYPE2, root, MPI_COMM_WORLD); + Colls::gatherv(send, send_size, MPI_CURRENT_TYPE, recv, recvcounts, disps, MPI_CURRENT_TYPE2, root, MPI_COMM_WORLD); TRACE_smpi_collective_out(smpi_process_index(), -1, __FUNCTION__); log_timed_action (action, clock); @@ -726,7 +728,7 @@ static void action_reducescatter(const char *const *action) { void *sendbuf = smpi_get_tmp_sendbuffer(size* MPI_CURRENT_TYPE->size()); void *recvbuf = smpi_get_tmp_recvbuffer(size* MPI_CURRENT_TYPE->size()); - mpi_coll_reduce_scatter_fun(sendbuf, recvbuf, recvcounts, MPI_CURRENT_TYPE, MPI_OP_NULL, MPI_COMM_WORLD); + Colls::reduce_scatter(sendbuf, recvbuf, recvcounts, MPI_CURRENT_TYPE, MPI_OP_NULL, MPI_COMM_WORLD); smpi_execute_flops(comp_size); TRACE_smpi_collective_out(rank, -1, __FUNCTION__); @@ -769,7 +771,7 @@ static void action_allgather(const char *const *action) { TRACE_smpi_collective_in(rank, -1, __FUNCTION__,extra); - mpi_coll_allgather_fun(sendbuf, sendcount, MPI_CURRENT_TYPE, recvbuf, recvcount, MPI_CURRENT_TYPE2, MPI_COMM_WORLD); + Colls::allgather(sendbuf, sendcount, MPI_CURRENT_TYPE, recvbuf, recvcount, MPI_CURRENT_TYPE2, MPI_COMM_WORLD); TRACE_smpi_collective_out(rank, -1, __FUNCTION__); log_timed_action (action, clock); @@ -821,7 +823,7 @@ static void action_allgatherv(const char *const *action) { TRACE_smpi_collective_in(rank, -1, __FUNCTION__,extra); - mpi_coll_allgatherv_fun(sendbuf, sendcount, MPI_CURRENT_TYPE, recvbuf, recvcounts, disps, MPI_CURRENT_TYPE2, + Colls::allgatherv(sendbuf, sendcount, MPI_CURRENT_TYPE, recvbuf, recvcounts, disps, MPI_CURRENT_TYPE2, MPI_COMM_WORLD); TRACE_smpi_collective_out(rank, -1, __FUNCTION__); @@ -885,7 +887,7 @@ static void action_allToAllv(const char *const *action) { TRACE_smpi_collective_in(rank, -1, __FUNCTION__,extra); - mpi_coll_alltoallv_fun(sendbuf, sendcounts, senddisps, MPI_CURRENT_TYPE,recvbuf, recvcounts, recvdisps, + Colls::alltoallv(sendbuf, sendcounts, senddisps, MPI_CURRENT_TYPE,recvbuf, recvcounts, recvdisps, MPI_CURRENT_TYPE, MPI_COMM_WORLD); TRACE_smpi_collective_out(rank, -1, __FUNCTION__); diff --git a/src/smpi/smpi_win.cpp b/src/smpi/smpi_win.cpp index de5d18cc33..c12c4581b5 100644 --- a/src/smpi/smpi_win.cpp +++ b/src/smpi/smpi_win.cpp @@ -29,12 +29,12 @@ Win::Win(void *base, MPI_Aint size, int disp_unit, MPI_Info info, MPI_Comm comm) if(rank==0){ bar_ = MSG_barrier_init(comm_size); } - mpi_coll_allgather_fun(&(connected_wins_[rank]), sizeof(MPI_Win), MPI_BYTE, connected_wins_, sizeof(MPI_Win), + Colls::allgather(&(connected_wins_[rank]), sizeof(MPI_Win), MPI_BYTE, connected_wins_, sizeof(MPI_Win), MPI_BYTE, comm); - mpi_coll_bcast_fun(&(bar_), sizeof(msg_bar_t), MPI_BYTE, 0, comm); + Colls::bcast(&(bar_), sizeof(msg_bar_t), MPI_BYTE, 0, comm); - mpi_coll_barrier_fun(comm); + Colls::barrier(comm); } Win::~Win(){ @@ -51,7 +51,7 @@ Win::~Win(){ MPI_Info_free(&info_); } - mpi_coll_barrier_fun(comm_); + Colls::barrier(comm_); int rank=comm_->rank(); if(rank == 0) MSG_barrier_destroy(bar_); diff --git a/teshsuite/smpi/coll-allgather/coll-allgather.tesh b/teshsuite/smpi/coll-allgather/coll-allgather.tesh index 6778ee1c5c..5c99a9d730 100644 --- a/teshsuite/smpi/coll-allgather/coll-allgather.tesh +++ b/teshsuite/smpi/coll-allgather/coll-allgather.tesh @@ -2,7 +2,7 @@ ! setenv LD_LIBRARY_PATH=../../lib ! output sort -p Test all to all +p Test allgather $ ${bindir:=.}/../../../bin/smpirun -map -hostfile ../hostfile_coll -platform ../../../examples/platforms/small_platform.xml -np 16 --log=xbt_cfg.thres:critical ${bindir:=.}/coll-allgather --log=smpi_kernel.thres:warning --log=smpi_coll.thres:error > [rank 0] -> Tremblay > [rank 1] -> Tremblay diff --git a/tools/cmake/DefinePackages.cmake b/tools/cmake/DefinePackages.cmake index 8b9365bad1..9c63deb29c 100644 --- a/tools/cmake/DefinePackages.cmake +++ b/tools/cmake/DefinePackages.cmake @@ -140,9 +140,10 @@ set(SMPI_SRC src/smpi/colls/allreduce/allreduce-smp-rsag.cpp src/smpi/colls/allreduce/allreduce-mvapich-rs.cpp src/smpi/colls/allreduce/allreduce-mvapich-two-level.cpp + src/smpi/colls/alltoall/alltoall-basic-linear.cpp src/smpi/colls/alltoall/alltoall-2dmesh.cpp src/smpi/colls/alltoall/alltoall-3dmesh.cpp -# src/smpi/colls/alltoall/alltoall-bruck.cpp + src/smpi/colls/alltoall/alltoall-bruck.cpp src/smpi/colls/alltoall/alltoall-pair-light-barrier.cpp src/smpi/colls/alltoall/alltoall-pair-mpi-barrier.cpp src/smpi/colls/alltoall/alltoall-pair-one-barrier.cpp @@ -200,6 +201,7 @@ set(SMPI_SRC src/smpi/colls/scatter/scatter-ompi.cpp src/smpi/colls/scatter/scatter-mvapich-two-level.cpp src/smpi/colls/smpi_automatic_selector.cpp + src/smpi/colls/smpi_default_selector.cpp src/smpi/colls/smpi_mpich_selector.cpp src/smpi/colls/smpi_intel_mpi_selector.cpp src/smpi/colls/smpi_openmpi_selector.cpp