Still needs a lot to be considered clean.
class Type_Vector;
class Win;
+class Colls;
+
}
}
* \brief The list of all available allgather collectives
*/
XBT_PUBLIC_DATA(s_mpi_coll_description_t) mpi_coll_gather_description[];
-XBT_PUBLIC_DATA(int (*mpi_coll_gather_fun)(void *, int, MPI_Datatype, void *, int, MPI_Datatype, int, MPI_Comm));
/** \ingroup MPI allgather
* \brief The list of all available allgather collectives
*/
XBT_PUBLIC_DATA(s_mpi_coll_description_t) mpi_coll_allgather_description[];
-XBT_PUBLIC_DATA(int (*mpi_coll_allgather_fun) (void *, int, MPI_Datatype, void *, int, MPI_Datatype, MPI_Comm));
/** \ingroup MPI allgather
* \brief The list of all available allgather collectives
*/
XBT_PUBLIC_DATA(s_mpi_coll_description_t) mpi_coll_allgatherv_description[];
-XBT_PUBLIC_DATA(int (*mpi_coll_allgatherv_fun) (void *, int, MPI_Datatype, void *, int*, int*, MPI_Datatype, MPI_Comm));
/** \ingroup MPI allreduce
* \brief The list of all available allgather collectives
*/
XBT_PUBLIC_DATA(s_mpi_coll_description_t) mpi_coll_allreduce_description[];
-XBT_PUBLIC_DATA(int (*mpi_coll_allreduce_fun)(void *sbuf, void *rbuf, int rcount, MPI_Datatype dtype,MPI_Op op,
- MPI_Comm comm));
/** \ingroup MPI alltoall
* \brief The list of all available alltoall collectives
*/
XBT_PUBLIC_DATA(s_mpi_coll_description_t) mpi_coll_alltoall_description[];
-XBT_PUBLIC_DATA(int (*mpi_coll_alltoall_fun)(void *, int, MPI_Datatype, void *, int, MPI_Datatype, MPI_Comm));
/** \ingroup MPI alltoallv
* \brief The list of all available alltoallv collectives
*/
XBT_PUBLIC_DATA(s_mpi_coll_description_t) mpi_coll_alltoallv_description[];
-XBT_PUBLIC_DATA(int (*mpi_coll_alltoallv_fun)(void *, int*, int*, MPI_Datatype, void *, int*, int*, MPI_Datatype,
- MPI_Comm));
/** \ingroup MPI bcast
* \brief The list of all available bcast collectives
*/
XBT_PUBLIC_DATA(s_mpi_coll_description_t) mpi_coll_bcast_description[];
-XBT_PUBLIC_DATA(int (*mpi_coll_bcast_fun)(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm com));
/** \ingroup MPI reduce
* \brief The list of all available reduce collectives
*/
XBT_PUBLIC_DATA(s_mpi_coll_description_t) mpi_coll_reduce_description[];
-XBT_PUBLIC_DATA(int (*mpi_coll_reduce_fun)(void *buf, void *rbuf, int count, MPI_Datatype datatype,
- MPI_Op op, int root, MPI_Comm comm));
/** \ingroup MPI reduce_scatter
* \brief The list of all available allgather collectives
*/
XBT_PUBLIC_DATA(s_mpi_coll_description_t) mpi_coll_reduce_scatter_description[];
-XBT_PUBLIC_DATA(int (*mpi_coll_reduce_scatter_fun) (void *sbuf, void *rbuf, int *rcounts,
- MPI_Datatype dtype, MPI_Op op,MPI_Comm comm));
/** \ingroup MPI scatter
* \brief The list of all available allgather collectives
*/
XBT_PUBLIC_DATA(s_mpi_coll_description_t) mpi_coll_scatter_description[];
-XBT_PUBLIC_DATA(int (*mpi_coll_scatter_fun)(void *sendbuf, int sendcount, MPI_Datatype sendtype,
- void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm));
/** \ingroup MPI barrier
* \brief The list of all available allgather collectives
XBT_PUBLIC_DATA(int (*mpi_coll_barrier_fun)(MPI_Comm comm));
XBT_PUBLIC(void) coll_help(const char *category, s_mpi_coll_description_t * table);
-XBT_PUBLIC(int) find_coll_description(s_mpi_coll_description_t * table, char *name, const char *desc);
+XBT_PUBLIC(int) find_coll_description(s_mpi_coll_description_t * table, const char *name, const char *desc);
XBT_PUBLIC_DATA(void) (*smpi_coll_cleanup_callback)();
XBT_PUBLIC(void) smpi_coll_cleanup_mvapich2(void);
* "simple"
* Auther: Ahmad Faraj
****************************************************************************/
+
+namespace simgrid{
+namespace smpi{
+
+
int
-smpi_coll_tuned_allgather_2dmesh(void *send_buff, int send_count, MPI_Datatype
+Coll_allgather_2dmesh::allgather(void *send_buff, int send_count, MPI_Datatype
send_type, void *recv_buff, int recv_count,
MPI_Datatype recv_type, MPI_Comm comm)
{
return MPI_SUCCESS;
}
+
+}
+}
* follows "simple"
* Auther: Ahmad Faraj
****************************************************************************/
-int smpi_coll_tuned_allgather_3dmesh(void *send_buff, int send_count,
+namespace simgrid{
+namespace smpi{
+
+
+int Coll_allgather_3dmesh::allgather(void *send_buff, int send_count,
MPI_Datatype send_type, void *recv_buff,
int recv_count, MPI_Datatype recv_type,
MPI_Comm comm)
return MPI_SUCCESS;
}
+
+
+}
+}
#include "../colls_private.h"
+using namespace simgrid::smpi;
+
// Allgather - gather/bcast algorithm
-int smpi_coll_tuned_allgather_GB(void *send_buff, int send_count,
+int Coll_allgather_GB::allgather(void *send_buff, int send_count,
MPI_Datatype send_type, void *recv_buff,
int recv_count, MPI_Datatype recv_type,
MPI_Comm comm)
{
int num_procs;
num_procs = comm->size();
- mpi_coll_gather_fun(send_buff, send_count, send_type, recv_buff, recv_count, recv_type,
+ Colls::gather(send_buff, send_count, send_type, recv_buff, recv_count, recv_type,
0, comm);
- mpi_coll_bcast_fun(recv_buff, (recv_count * num_procs), recv_type, 0, comm);
+ Colls::bcast(recv_buff, (recv_count * num_procs), recv_type, 0, comm);
return MPI_SUCCESS;
}
#include "../colls_private.h"
+namespace simgrid{
+namespace smpi{
+
+
// Allgather-Non-Topoloty-Scecific-Logical-Ring algorithm
int
-smpi_coll_tuned_allgather_NTSLR_NB(void *sbuf, int scount, MPI_Datatype stype,
+Coll_allgather_NTSLR_NB::allgather(void *sbuf, int scount, MPI_Datatype stype,
void *rbuf, int rcount, MPI_Datatype rtype,
MPI_Comm comm)
{
// irregular case use default MPI fucntions
if (scount * sextent != rcount * rextent) {
XBT_WARN("MPI_allgather_NTSLR_NB use default MPI_allgather.");
- smpi_mpi_allgather(sbuf, scount, stype, rbuf, rcount, rtype, comm);
+ Coll_allgather_default::allgather(sbuf, scount, stype, rbuf, rcount, rtype, comm);
return MPI_SUCCESS;
}
return MPI_SUCCESS;
}
+
+}
+}
#include "../colls_private.h"
+namespace simgrid{
+namespace smpi{
+
+
+
// Allgather-Non-Topoloty-Scecific-Logical-Ring algorithm
int
-smpi_coll_tuned_allgather_NTSLR(void *sbuf, int scount, MPI_Datatype stype,
+Coll_allgather_NTSLR::allgather(void *sbuf, int scount, MPI_Datatype stype,
void *rbuf, int rcount, MPI_Datatype rtype,
MPI_Comm comm)
{
// irregular case use default MPI fucntions
if (scount * sextent != rcount * rextent) {
XBT_WARN("MPI_allgather_NTSLR use default MPI_allgather.");
- smpi_mpi_allgather(sbuf, scount, stype, rbuf, rcount, rtype, comm);
+ Coll_allgather_default::allgather(sbuf, scount, stype, rbuf, rcount, rtype, comm);
return MPI_SUCCESS;
}
return MPI_SUCCESS;
}
+
+
+}
+}
#include "../colls_private.h"
-int smpi_coll_tuned_allgather_SMP_NTS(void *sbuf, int scount,
+namespace simgrid{
+namespace smpi{
+
+
+int Coll_allgather_SMP_NTS::allgather(void *sbuf, int scount,
MPI_Datatype stype, void *rbuf,
int rcount, MPI_Datatype rtype,
MPI_Comm comm)
/* for too small number of processes, use default implementation */
if (comm_size <= num_core) {
XBT_WARN("MPI_allgather_SMP_NTS use default MPI_allgather.");
- smpi_mpi_allgather(sbuf, scount, stype, rbuf, rcount, rtype, comm);
+ Coll_allgather_default::allgather(sbuf, scount, stype, rbuf, rcount, rtype, comm);
return MPI_SUCCESS;
}
return MPI_SUCCESS;
}
+
+
+}
+}
* Comment: Original bruck algorithm from MPICH is slightly modified by
* Ahmad Faraj.
****************************************************************************/
-int smpi_coll_tuned_allgather_bruck(void *send_buff, int send_count,
+
+namespace simgrid{
+namespace smpi{
+
+
+
+int Coll_allgather_bruck::allgather(void *send_buff, int send_count,
MPI_Datatype send_type, void *recv_buff,
int recv_count, MPI_Datatype recv_type,
MPI_Comm comm)
smpi_free_tmp_buffer(tmp_buff);
return MPI_SUCCESS;
}
+
+
+}
+}
#include "../colls_private.h"
-int smpi_coll_tuned_allgather_loosely_lr(void *sbuf, int scount,
+namespace simgrid{
+namespace smpi{
+
+
+int Coll_allgather_loosely_lr::allgather(void *sbuf, int scount,
MPI_Datatype stype, void *rbuf,
int rcount, MPI_Datatype rtype,
MPI_Comm comm)
return MPI_SUCCESS;
}
+
+
+}
+}
*/
#include "../colls_private.h"
+using namespace simgrid::smpi;
-
-int smpi_coll_tuned_allgather_mvapich2_smp(void *sendbuf,int sendcnt, MPI_Datatype sendtype,
+int Coll_allgather_mvapich2_smp::allgather(void *sendbuf,int sendcnt, MPI_Datatype sendtype,
void *recvbuf, int recvcnt,MPI_Datatype recvtype,
MPI_Comm comm)
{
/*If there is just one node, after gather itself,
* root has all the data and it can do bcast*/
if(local_rank == 0) {
- mpi_errno = mpi_coll_gather_fun(sendbuf, sendcnt,sendtype,
+ mpi_errno = Colls::gather(sendbuf, sendcnt,sendtype,
(void*)((char*)recvbuf + (rank * recvcnt * recvtype_extent)),
recvcnt, recvtype,
0, shmem_comm);
/*Since in allgather all the processes could have
* its own data in place*/
if(sendbuf == MPI_IN_PLACE) {
- mpi_errno = mpi_coll_gather_fun((void*)((char*)recvbuf + (rank * recvcnt * recvtype_extent)),
+ mpi_errno = Colls::gather((void*)((char*)recvbuf + (rank * recvcnt * recvtype_extent)),
recvcnt , recvtype,
recvbuf, recvcnt, recvtype,
0, shmem_comm);
} else {
- mpi_errno = mpi_coll_gather_fun(sendbuf, sendcnt,sendtype,
+ mpi_errno = Colls::gather(sendbuf, sendcnt,sendtype,
recvbuf, recvcnt, recvtype,
0, shmem_comm);
}
void* sendbuf=((char*)recvbuf)+recvtype->get_extent()*displs[leader_comm->rank()];
- mpi_errno = mpi_coll_allgatherv_fun(sendbuf,
+ mpi_errno = Colls::allgatherv(sendbuf,
(recvcnt*local_size),
recvtype,
recvbuf, recvcnts,
- mpi_errno = smpi_coll_tuned_allgather_mpich(sendtmpbuf,
+ mpi_errno = Coll_allgather_mpich::allgather(sendtmpbuf,
(recvcnt*local_size),
recvtype,
recvbuf, (recvcnt*local_size), recvtype,
}
/*Bcast the entire data from node leaders to all other cores*/
- mpi_errno = mpi_coll_bcast_fun (recvbuf, recvcnt * size, recvtype, 0, shmem_comm);
+ mpi_errno = Colls::bcast (recvbuf, recvcnt * size, recvtype, 0, shmem_comm);
return mpi_errno;
}
*/
#include "../colls_private.h"
+
+namespace simgrid{
+namespace smpi{
+
int
-smpi_coll_tuned_allgather_ompi_neighborexchange(void *sbuf, int scount,
+Coll_allgather_ompi_neighborexchange::allgather(void *sbuf, int scount,
MPI_Datatype sdtype,
void* rbuf, int rcount,
MPI_Datatype rdtype,
XBT_DEBUG(
"coll:tuned:allgather_intra_neighborexchange WARNING: odd size %d, switching to ring algorithm",
size);
- return smpi_coll_tuned_allgather_ring(sbuf, scount, sdtype,
+ return Coll_allgather_ring::allgather(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm);
}
__FILE__, line, err, rank);
return err;
}
+
+
+}
+}
* phases, nodes in pair communicate their data.
* Auther: Ahmad Faraj
****************************************************************************/
+
+namespace simgrid{
+namespace smpi{
+
+
int
-smpi_coll_tuned_allgather_pair(void *send_buff, int send_count,
+Coll_allgather_pair::allgather(void *send_buff, int send_count,
MPI_Datatype send_type, void *recv_buff,
int recv_count, MPI_Datatype recv_type,
MPI_Comm comm)
return MPI_SUCCESS;
}
+
+
+}
+}
#include "../colls_private.h"
+namespace simgrid{
+namespace smpi{
+
+
int
-smpi_coll_tuned_allgather_rdb(void *sbuf, int send_count,
+Coll_allgather_rdb::allgather(void *sbuf, int send_count,
MPI_Datatype send_type, void *rbuf,
int recv_count, MPI_Datatype recv_type,
MPI_Comm comm)
return success;
}
+
+
+}
+}
#include "../colls_private.h"
+namespace simgrid{
+namespace smpi{
+
+
// now only work with power of two processes
int
-smpi_coll_tuned_allgather_rhv(void *sbuf, int send_count,
+Coll_allgather_rhv::allgather(void *sbuf, int send_count,
MPI_Datatype send_type, void *rbuf,
int recv_count, MPI_Datatype recv_type,
MPI_Comm comm)
if (send_chunk != recv_chunk) {
XBT_WARN("MPI_allgather_rhv use default MPI_allgather.");
- smpi_mpi_allgather(sbuf, send_count, send_type, rbuf, recv_count,
+ Coll_allgather_default::allgather(sbuf, send_count, send_type, rbuf, recv_count,
recv_type, comm);
return MPI_SUCCESS;
}
return MPI_SUCCESS;
}
+
+
+}
+}
* Descrp: Function works in P - 1 steps. In step i, node j - i -> j -> j+ i.
* Auther: Ahmad Faraj
****************************************************************************/
+
+namespace simgrid{
+namespace smpi{
+
+
int
-smpi_coll_tuned_allgather_ring(void *send_buff, int send_count,
+Coll_allgather_ring::allgather(void *send_buff, int send_count,
MPI_Datatype send_type, void *recv_buff,
int recv_count, MPI_Datatype recv_type,
MPI_Comm comm)
return MPI_SUCCESS;
}
+
+
+}
+}
#include "../colls_private.h"
-int smpi_coll_tuned_allgather_smp_simple(void *send_buf, int scount,
+namespace simgrid{
+namespace smpi{
+
+
+int Coll_allgather_smp_simple::allgather(void *send_buf, int scount,
MPI_Datatype stype, void *recv_buf,
int rcount, MPI_Datatype rtype,
MPI_Comm comm)
return MPI_SUCCESS;
}
+
+
+}
+}
*
* Auther: Ahmad Faraj
****************************************************************************/
+
+namespace simgrid{
+namespace smpi{
+
+
int
-smpi_coll_tuned_allgather_spreading_simple(void *send_buff, int send_count,
+Coll_allgather_spreading_simple::allgather(void *send_buff, int send_count,
MPI_Datatype send_type,
void *recv_buff, int recv_count,
MPI_Datatype recv_type,
return MPI_SUCCESS;
}
+
+}
+}
#include "../colls_private.h"
+namespace simgrid{
+namespace smpi{
+
// Allgather - gather/bcast algorithm
-int smpi_coll_tuned_allgatherv_GB(void *send_buff, int send_count,
+int Coll_allgatherv_GB::allgatherv(void *send_buff, int send_count,
MPI_Datatype send_type, void *recv_buff,
int *recv_counts, int *recv_disps, MPI_Datatype recv_type,
MPI_Comm comm)
{
- smpi_mpi_gatherv(send_buff, send_count, send_type, recv_buff, recv_counts,
+ Colls::gatherv(send_buff, send_count, send_type, recv_buff, recv_counts,
recv_disps, recv_type, 0, comm);
int num_procs, i, current, max = 0;
num_procs = comm->size();
if (current > max)
max = current;
}
- mpi_coll_bcast_fun(recv_buff, max, recv_type, 0, comm);
+ Colls::bcast(recv_buff, max, recv_type, 0, comm);
return MPI_SUCCESS;
}
+
+}
+}
/* Short or medium size message and power-of-two no. of processes. Use
* recursive doubling algorithm */
#include "../colls_private.h"
-int smpi_coll_tuned_allgatherv_mpich_rdb (
+
+namespace simgrid{
+namespace smpi{
+
+int Coll_allgatherv_mpich_rdb::allgatherv (
void *sendbuf,
int sendcount,
MPI_Datatype sendtype,
smpi_free_tmp_buffer(tmp_buf_rl);
return MPI_SUCCESS;
}
+
+}
+}
* recv_type: data type of elements being received
* comm: communication
****************************************************************************/
+
+namespace simgrid{
+namespace smpi{
+
int
-smpi_coll_tuned_allgatherv_mpich_ring(void *sendbuf, int sendcount,
+Coll_allgatherv_mpich_ring::allgatherv(void *sendbuf, int sendcount,
MPI_Datatype send_type, void *recvbuf,
int *recvcounts, int *displs, MPI_Datatype recvtype,
MPI_Comm comm)
return MPI_SUCCESS;
}
+
+}
+}
* [5] [5] [5] [5] [5] [5] [5]
* [6] [6] [6] [6] [6] [6] [6]
*/
-int smpi_coll_tuned_allgatherv_ompi_bruck(void *sbuf, int scount,
+
+namespace simgrid{
+namespace smpi{
+
+int Coll_allgatherv_ompi_bruck::allgatherv(void *sbuf, int scount,
MPI_Datatype sdtype,
void *rbuf, int *rcounts,
int *rdispls,
}
+
+}
+}
#include "../colls_private.h"
+namespace simgrid{
+namespace smpi{
+
int
-smpi_coll_tuned_allgatherv_ompi_neighborexchange(void *sbuf, int scount,
+Coll_allgatherv_ompi_neighborexchange::allgatherv(void *sbuf, int scount,
MPI_Datatype sdtype,
void* rbuf, int *rcounts, int *rdispls,
MPI_Datatype rdtype,
XBT_DEBUG(
"coll:tuned:allgatherv_ompi_neighborexchange WARNING: odd size %d, switching to ring algorithm",
size);
- return smpi_coll_tuned_allgatherv_ring(sbuf, scount, sdtype,
+ return Coll_allgatherv_ring::allgatherv(sbuf, scount, sdtype,
rbuf, rcounts,
rdispls, rdtype,
comm);
__FILE__, line, err, rank);
return err;
}
+
+}
+}
* phases, nodes in pair communicate their data.
* Auther: Ahmad Faraj
****************************************************************************/
+
+namespace simgrid{
+namespace smpi{
+
int
-smpi_coll_tuned_allgatherv_pair(void *send_buff, int send_count,
+Coll_allgatherv_pair::allgatherv(void *send_buff, int send_count,
MPI_Datatype send_type, void *recv_buff,
int *recv_counts, int *recv_disps, MPI_Datatype recv_type,
MPI_Comm comm)
return MPI_SUCCESS;
}
+
+}
+}
* Descrp: Function works in P - 1 steps. In step i, node j - i -> j -> j+ i.
* Auther: Ahmad Faraj
****************************************************************************/
+
+namespace simgrid{
+namespace smpi{
+
int
-smpi_coll_tuned_allgatherv_ring(void *send_buff, int send_count,
+Coll_allgatherv_ring::allgatherv(void *send_buff, int send_count,
MPI_Datatype send_type, void *recv_buff,
int *recv_counts, int *recv_disps, MPI_Datatype recv_type,
MPI_Comm comm)
return MPI_SUCCESS;
}
+
+}
+}
//#include <star-reduction.c>
int
-smpi_coll_tuned_allreduce_lr(void *sbuf, void *rbuf, int rcount,
+Coll_allreduce_lr::allreduce(void *sbuf, void *rbuf, int rcount,
MPI_Datatype dtype, MPI_Op op, MPI_Comm comm)
{
int tag = COLL_TAG_ALLREDUCE;
/* when communication size is smaller than number of process (not support) */
if (rcount < size) {
XBT_WARN("MPI_allreduce_lr use default MPI_allreduce.");
- smpi_mpi_allreduce(sbuf, rbuf, rcount, dtype, op, comm);
+ Coll_allreduce_default::allreduce(sbuf, rbuf, rcount, dtype, op, comm);
return MPI_SUCCESS;
}
/* when communication size is not divisible by number of process:
call the native implementation for the remain chunk at the end of the operation */
if (remainder_flag) {
- return mpi_coll_allreduce_fun((char *) sbuf + remainder_offset,
+ return Colls::allreduce((char *) sbuf + remainder_offset,
(char *) rbuf + remainder_offset, remainder, dtype, op,
comm);
}
#include "../colls_private.h"
-int smpi_coll_tuned_allreduce_mvapich2_rs(void *sendbuf,
+int Coll_allreduce_mvapich2_rs::allreduce(void *sendbuf,
void *recvbuf,
int count,
MPI_Datatype datatype,
#include "../colls_private.h"
-#define MPIR_Allreduce_pt2pt_rd_MV2 smpi_coll_tuned_allreduce_rdb
-#define MPIR_Allreduce_pt2pt_rs_MV2 smpi_coll_tuned_allreduce_mvapich2_rs
+#define MPIR_Allreduce_pt2pt_rd_MV2 Coll_allreduce_rdb::allreduce
+#define MPIR_Allreduce_pt2pt_rs_MV2 Coll_allreduce_mvapich2_rs::allreduce
-extern int (*MV2_Allreduce_function)(void *sendbuf,
+extern int (*MV2_Allreducection)(void *sendbuf,
void *recvbuf,
int count,
MPI_Datatype datatype,
MPI_Datatype datatype,
MPI_Op op, MPI_Comm comm)
{
- mpi_coll_reduce_fun(sendbuf,recvbuf,count,datatype,op,0,comm);
+ Colls::reduce(sendbuf,recvbuf,count,datatype,op,0,comm);
return MPI_SUCCESS;
}
MPI_Datatype datatype,
MPI_Op op, MPI_Comm comm)
{
- mpi_coll_reduce_fun(sendbuf,recvbuf,count,datatype,op,0,comm);
+ Colls::reduce(sendbuf,recvbuf,count,datatype,op,0,comm);
return MPI_SUCCESS;
}
/* general two level allreduce helper function */
-int smpi_coll_tuned_allreduce_mvapich2_two_level(void *sendbuf,
+int Coll_allreduce_mvapich2_two_level::allreduce(void *sendbuf,
void *recvbuf,
int count,
MPI_Datatype datatype,
//if not set (use of the algo directly, without mvapich2 selector)
if(MV2_Allreduce_intra_function==NULL)
- MV2_Allreduce_intra_function = smpi_coll_tuned_allreduce_mpich;
- if(MV2_Allreduce_function==NULL)
- MV2_Allreduce_function = smpi_coll_tuned_allreduce_rdb;
+ MV2_Allreduce_intra_function = Coll_allreduce_mpich::allreduce;
+ if(MV2_Allreducection==NULL)
+ MV2_Allreducection = Coll_allreduce_rdb::allreduce;
if(comm->get_leaders_comm()==MPI_COMM_NULL){
comm->init_smp();
void* sendtmpbuf = (char *)smpi_get_tmp_sendbuffer(count*datatype->get_extent());
Datatype::copy(recvbuf, count, datatype,sendtmpbuf, count, datatype);
/* inter-node allreduce */
- if(MV2_Allreduce_function == &MPIR_Allreduce_pt2pt_rd_MV2){
+ if(MV2_Allreducection == &MPIR_Allreduce_pt2pt_rd_MV2){
mpi_errno =
MPIR_Allreduce_pt2pt_rd_MV2(sendtmpbuf, recvbuf, count, datatype, op,
leader_comm);
/* Broadcasting the mesage from leader to the rest */
/* Note: shared memory broadcast could improve the performance */
- mpi_errno = mpi_coll_bcast_fun(recvbuf, count, datatype, 0, shmem_comm);
+ mpi_errno = Colls::bcast(recvbuf, count, datatype, 0, shmem_comm);
return (mpi_errno);
#include "../colls_private.h"
int
-smpi_coll_tuned_allreduce_ompi_ring_segmented(void *sbuf, void *rbuf, int count,
+Coll_allreduce_ompi_ring_segmented::allreduce(void *sbuf, void *rbuf, int count,
MPI_Datatype dtype,
MPI_Op op,
MPI_Comm comm)
/* Special case for count less than size * segcount - use regular ring */
if (count < size * segcount) {
XBT_DEBUG( "coll:tuned:allreduce_ring_segmented rank %d/%d, count %d, switching to regular ring", rank, size, count);
- return (smpi_coll_tuned_allreduce_lr(sbuf, rbuf, count, dtype, op,
+ return (Coll_allreduce_lr::allreduce(sbuf, rbuf, count, dtype, op,
comm));
}
#include "../colls_private.h"
-int smpi_coll_tuned_allreduce_rab_rdb(void *sbuff, void *rbuff, int count,
+int Coll_allreduce_rab_rdb::allreduce(void *sbuff, void *rbuff, int count,
MPI_Datatype dtype, MPI_Op op,
MPI_Comm comm)
{
#include "../colls_private.h"
//#include <star-reduction.c>
+using namespace simgrid::smpi;
// NP pow of 2 for now
-int smpi_coll_tuned_allreduce_rab1(void *sbuff, void *rbuff,
+int Coll_allreduce_rab1::allreduce(void *sbuff, void *rbuff,
int count, MPI_Datatype dtype,
MPI_Op op, MPI_Comm comm)
{
}
memcpy(tmp_buf, (char *) recv + recv_idx * extent, recv_cnt * extent);
- mpi_coll_allgather_fun(tmp_buf, recv_cnt, dtype, recv, recv_cnt, dtype, comm);
+ Colls::allgather(tmp_buf, recv_cnt, dtype, recv, recv_cnt, dtype, comm);
memcpy(rbuff, recv, count * extent);
smpi_free_tmp_buffer(recv);
}
memcpy(tmp_buf, (char *) rbuff + recv_idx * extent, recv_cnt * extent);
- mpi_coll_allgather_fun(tmp_buf, recv_cnt, dtype, rbuff, recv_cnt, dtype, comm);
+ Colls::allgather(tmp_buf, recv_cnt, dtype, rbuff, recv_cnt, dtype, comm);
smpi_free_tmp_buffer(tmp_buf);
}
#include "../colls_private.h"
//#include <star-reduction.c>
+using namespace simgrid::smpi;
// this requires that count >= NP
-int smpi_coll_tuned_allreduce_rab2(void *sbuff, void *rbuff,
+int Coll_allreduce_rab2::allreduce(void *sbuff, void *rbuff,
int count, MPI_Datatype dtype,
MPI_Op op, MPI_Comm comm)
{
memcpy(send, sbuff, s_extent * count);
- mpi_coll_alltoall_fun(send, send_size, dtype, recv, send_size, dtype, comm);
+ Colls::alltoall(send, send_size, dtype, recv, send_size, dtype, comm);
memcpy(tmp, recv, nbytes);
for (i = 1, s_offset = nbytes; i < nprocs; i++, s_offset = i * nbytes)
if(op!=MPI_OP_NULL) op->apply( (char *) recv + s_offset, tmp, &send_size, dtype);
- mpi_coll_allgather_fun(tmp, send_size, dtype, recv, send_size, dtype, comm);
+ Colls::allgather(tmp, send_size, dtype, recv, send_size, dtype, comm);
memcpy(rbuff, recv, count * s_extent);
smpi_free_tmp_buffer(recv);
recv = (void *) smpi_get_tmp_recvbuffer(s_extent * send_size * nprocs);
- mpi_coll_alltoall_fun(send, send_size, dtype, recv, send_size, dtype, comm);
+ Colls::alltoall(send, send_size, dtype, recv, send_size, dtype, comm);
memcpy((char *) rbuff + r_offset, recv, nbytes);
if(op!=MPI_OP_NULL) op->apply( (char *) recv + s_offset, (char *) rbuff + r_offset,
&send_size, dtype);
- mpi_coll_allgather_fun((char *) rbuff + r_offset, send_size, dtype, rbuff, send_size,
+ Colls::allgather((char *) rbuff + r_offset, send_size, dtype, rbuff, send_size,
dtype, comm);
smpi_free_tmp_buffer(recv);
}
#include "../colls_private.h"
//#include <star-reduction.c>
-int smpi_coll_tuned_allreduce_rdb(void *sbuff, void *rbuff, int count,
+int Coll_allreduce_rdb::allreduce(void *sbuff, void *rbuff, int count,
MPI_Datatype dtype, MPI_Op op, MPI_Comm comm)
{
int nprocs, rank, tag = COLL_TAG_ALLREDUCE;
#include "../colls_private.h"
-int smpi_coll_tuned_allreduce_redbcast(void *buf, void *buf2, int count,
+int Coll_allreduce_redbcast::allreduce(void *buf, void *buf2, int count,
MPI_Datatype datatype, MPI_Op op,
MPI_Comm comm)
{
- mpi_coll_reduce_fun(buf, buf2, count, datatype, op, 0, comm);
- mpi_coll_bcast_fun(buf2, count, datatype, 0, comm);
+ Colls::reduce(buf, buf2, count, datatype, op, 0, comm);
+ Colls::bcast(buf2, count, datatype, 0, comm);
return MPI_SUCCESS;
}
3) binomial_tree bcast intra-communication between root of each SMP node
4) binomial_tree bcast inside each SMP node
*/
-int smpi_coll_tuned_allreduce_smp_binomial_pipeline(void *send_buf,
+int Coll_allreduce_smp_binomial_pipeline::allreduce(void *send_buf,
void *recv_buf, int count,
MPI_Datatype dtype,
MPI_Op op, MPI_Comm comm)
3) binomial_tree bcast intra-communication between root of each SMP node
4) binomial_tree bcast inside each SMP node
*/
-int smpi_coll_tuned_allreduce_smp_binomial(void *send_buf, void *recv_buf,
+int Coll_allreduce_smp_binomial::allreduce(void *send_buf, void *recv_buf,
int count, MPI_Datatype dtype,
MPI_Op op, MPI_Comm comm)
{
2) Recursive doubling intra-communication between root of each SMP node
3) binomial_tree bcast inside each SMP node
*/
-int smpi_coll_tuned_allreduce_smp_rdb(void *send_buf, void *recv_buf, int count,
+int Coll_allreduce_smp_rdb::allreduce(void *send_buf, void *recv_buf, int count,
MPI_Datatype dtype, MPI_Op op,
MPI_Comm comm)
{
3) allgather - inter between root of each SMP node
4) binomial_tree bcast inside each SMP node
*/
-int smpi_coll_tuned_allreduce_smp_rsag_lr(void *send_buf, void *recv_buf,
+int Coll_allreduce_smp_rsag_lr::allreduce(void *send_buf, void *recv_buf,
int count, MPI_Datatype dtype,
MPI_Op op, MPI_Comm comm)
{
3) allgather - inter between root of each SMP node
4) binomial_tree bcast inside each SMP node
*/
-int smpi_coll_tuned_allreduce_smp_rsag_rab(void *sbuf, void *rbuf, int count,
+int Coll_allreduce_smp_rsag_rab::allreduce(void *sbuf, void *rbuf, int count,
MPI_Datatype dtype, MPI_Op op,
MPI_Comm comm)
{
3) allgather - inter between root of each SMP node
4) binomial_tree bcast inside each SMP node
*/
-int smpi_coll_tuned_allreduce_smp_rsag(void *send_buf, void *recv_buf,
+int Coll_allreduce_smp_rsag::allreduce(void *send_buf, void *recv_buf,
int count, MPI_Datatype dtype, MPI_Op op,
MPI_Comm comm)
{
return 0;
}
-int smpi_coll_tuned_alltoall_2dmesh(void *send_buff, int send_count,
+int Coll_alltoall_2dmesh::alltoall(void *send_buff, int send_count,
MPI_Datatype send_type,
void *recv_buff, int recv_count,
MPI_Datatype recv_type, MPI_Comm comm)
return 0;
}
-int smpi_coll_tuned_alltoall_3dmesh(void *send_buff, int send_count,
+int Coll_alltoall_3dmesh::alltoall(void *send_buff, int send_count,
MPI_Datatype send_type,
void *recv_buff, int recv_count,
MPI_Datatype recv_type, MPI_Comm comm)
--- /dev/null
+/* Copyright (c) 2013-2017. The SimGrid Team.
+ * All rights reserved. */
+
+/* This program is free software; you can redistribute it and/or modify it
+ * under the terms of the license (GNU LGPL) which comes with this package. */
+
+ #include "../colls_private.h"
+
+/*Naive and simple basic alltoall implementation. */
+
+
+namespace simgrid{
+namespace smpi{
+
+
+int Coll_alltoall_basic_linear::alltoall(void *sendbuf, int sendcount, MPI_Datatype sendtype,
+ void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm)
+{
+ int system_tag = 888;
+ int i;
+ int count;
+ MPI_Aint lb = 0, sendext = 0, recvext = 0;
+ MPI_Request *requests;
+
+ /* Initialize. */
+ int rank = comm->rank();
+ int size = comm->size();
+ XBT_DEBUG("<%d> algorithm alltoall_basic_linear() called.", rank);
+ sendtype->extent(&lb, &sendext);
+ recvtype->extent(&lb, &recvext);
+ /* simple optimization */
+ int err = Datatype::copy(static_cast<char *>(sendbuf) + rank * sendcount * sendext, sendcount, sendtype,
+ static_cast<char *>(recvbuf) + rank * recvcount * recvext, recvcount, recvtype);
+ if (err == MPI_SUCCESS && size > 1) {
+ /* Initiate all send/recv to/from others. */
+ requests = xbt_new(MPI_Request, 2 * (size - 1));
+ /* Post all receives first -- a simple optimization */
+ count = 0;
+ for (i = (rank + 1) % size; i != rank; i = (i + 1) % size) {
+ requests[count] = Request::irecv_init(static_cast<char *>(recvbuf) + i * recvcount * recvext, recvcount,
+ recvtype, i, system_tag, comm);
+ count++;
+ }
+ /* Now post all sends in reverse order
+ * - We would like to minimize the search time through message queue
+ * when messages actually arrive in the order in which they were posted.
+ * TODO: check the previous assertion
+ */
+ for (i = (rank + size - 1) % size; i != rank; i = (i + size - 1) % size) {
+ requests[count] = Request::isend_init(static_cast<char *>(sendbuf) + i * sendcount * sendext, sendcount,
+ sendtype, i, system_tag, comm);
+ count++;
+ }
+ /* Wait for them all. */
+ Request::startall(count, requests);
+ XBT_DEBUG("<%d> wait for %d requests", rank, count);
+ Request::waitall(count, requests, MPI_STATUS_IGNORE);
+ for(i = 0; i < count; i++) {
+ if(requests[i]!=MPI_REQUEST_NULL)
+ Request::unref(&requests[i]);
+ }
+ xbt_free(requests);
+ }
+ return err;
+}
+
+}
+}
* Auther: MPICH / modified by Ahmad Faraj
****************************************************************************/
+
+#include "../colls_private.h"
+
+namespace simgrid{
+namespace smpi{
+
+
int
-smpi_coll_tuned_alltoall_bruck(void *send_buff, int send_count,
+Coll_alltoall_bruck::alltoall(void *send_buff, int send_count,
MPI_Datatype send_type, void *recv_buff,
int recv_count, MPI_Datatype recv_type,
MPI_Comm comm)
MPI_Datatype new_type;
int *blocks_length, *disps;
- int i, src, dst, rank, num_procs, count, remainder, block, position;
+ int i, src, dst, rank, num_procs, count, block, position;
int pack_size, tag = COLL_TAG_ALLTOALL, pof2 = 1;
}
MPI_Type_indexed(count, blocks_length, disps, recv_type, &new_type);
- smpi_datatype_commit(&new_type);
+ new_type->commit();
position = 0;
MPI_Pack(recv_buff, 1, new_type, tmp_buff, pack_size, &position, comm);
smpi_free_tmp_buffer(tmp_buff);
return MPI_SUCCESS;
}
+
+}
+}
#include "../colls_private.h"
-int smpi_coll_tuned_alltoall_mvapich2_scatter_dest(
+int Coll_alltoall_mvapich2_scatter_dest::alltoall(
void *sendbuf,
int sendcount,
MPI_Datatype sendtype,
****************************************************************************/
int
-smpi_coll_tuned_alltoall_pair_light_barrier(void *send_buff, int send_count,
+Coll_alltoall_pair_light_barrier::alltoall(void *send_buff, int send_count,
MPI_Datatype send_type,
void *recv_buff, int recv_count,
MPI_Datatype recv_type,
****************************************************************************/
int
-smpi_coll_tuned_alltoall_pair_mpi_barrier(void *send_buff, int send_count,
+Coll_alltoall_pair_mpi_barrier::alltoall(void *send_buff, int send_count,
MPI_Datatype send_type,
void *recv_buff, int recv_count,
MPI_Datatype recv_type, MPI_Comm comm)
for (i = 0; i < num_procs; i++) {
src = dst = rank ^ i;
- mpi_coll_barrier_fun(comm);
+ Colls::barrier(comm);
Request::sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst,
tag, recv_ptr + src * recv_chunk, recv_count, recv_type,
src, tag, comm, &s);
****************************************************************************/
int
-smpi_coll_tuned_alltoall_pair_one_barrier(void *send_buff, int send_count,
+Coll_alltoall_pair_one_barrier::alltoall(void *send_buff, int send_count,
MPI_Datatype send_type,
void *recv_buff, int recv_count,
MPI_Datatype recv_type, MPI_Comm comm)
send_chunk *= send_count;
recv_chunk *= recv_count;
- mpi_coll_barrier_fun(comm);
+ Colls::barrier(comm);
for (i = 0; i < num_procs; i++) {
src = dst = rank ^ i;
Request::sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst,
****************************************************************************/
-int smpi_coll_tuned_alltoall_pair_rma(void *send_buff, int send_count, MPI_Datatype send_type,
+int Coll_alltoall_pair_rma::alltoall(void *send_buff, int send_count, MPI_Datatype send_type,
void *recv_buff, int recv_count, MPI_Datatype recv_type,
MPI_Comm comm)
{
}
-int smpi_coll_tuned_alltoall_pair(void *send_buff, int send_count,
+int Coll_alltoall_pair::alltoall(void *send_buff, int send_count,
MPI_Datatype send_type,
void *recv_buff, int recv_count,
MPI_Datatype recv_type, MPI_Comm comm)
* Auther: MPICH / slightly modified by Ahmad Faraj.
****************************************************************************/
-int smpi_coll_tuned_alltoall_rdb(void *send_buff, int send_count,
+int Coll_alltoall_rdb::alltoall(void *send_buff, int send_count,
MPI_Datatype send_type,
void *recv_buff, int recv_count,
MPI_Datatype recv_type, MPI_Comm comm)
****************************************************************************/
int
-smpi_coll_tuned_alltoall_ring_light_barrier(void *send_buff, int send_count,
+Coll_alltoall_ring_light_barrier::alltoall(void *send_buff, int send_count,
MPI_Datatype send_type,
void *recv_buff, int recv_count,
MPI_Datatype recv_type,
****************************************************************************/
int
-smpi_coll_tuned_alltoall_ring_mpi_barrier(void *send_buff, int send_count,
+Coll_alltoall_ring_mpi_barrier::alltoall(void *send_buff, int send_count,
MPI_Datatype send_type,
void *recv_buff, int recv_count,
MPI_Datatype recv_type, MPI_Comm comm)
src = (rank - i + num_procs) % num_procs;
dst = (rank + i) % num_procs;
- mpi_coll_barrier_fun(comm);
+ Colls::barrier(comm);
Request::sendrecv(send_ptr + dst * send_chunk, send_count, send_type, dst,
tag, recv_ptr + src * recv_chunk, recv_count, recv_type,
src, tag, comm, &s);
****************************************************************************/
int
-smpi_coll_tuned_alltoall_ring_one_barrier(void *send_buff, int send_count,
+Coll_alltoall_ring_one_barrier::alltoall(void *send_buff, int send_count,
MPI_Datatype send_type,
void *recv_buff, int recv_count,
MPI_Datatype recv_type, MPI_Comm comm)
send_chunk *= send_count;
recv_chunk *= recv_count;
- mpi_coll_barrier_fun(comm);
+ Colls::barrier(comm);
for (i = 0; i < num_procs; i++) {
src = (rank - i + num_procs) % num_procs;
dst = (rank + i) % num_procs;
****************************************************************************/
int
-smpi_coll_tuned_alltoall_ring(void *send_buff, int send_count,
+Coll_alltoall_ring::alltoall(void *send_buff, int send_count,
MPI_Datatype send_type, void *recv_buff,
int recv_count, MPI_Datatype recv_type,
MPI_Comm comm)
* FIXME: uh, check smpi_pmpi again, but this routine is called for > 12, not
* less...
**/
-int smpi_coll_tuned_alltoallv_bruck(void *sendbuf, int *sendcounts, int *senddisps,
+int Coll_alltoallv_bruck::alltoallv(void *sendbuf, int *sendcounts, int *senddisps,
MPI_Datatype sendtype, void *recvbuf,
int *recvcounts, int *recvdisps, MPI_Datatype recvtype,
MPI_Comm comm)
count++;
}
/* Wait for them all. */
- //smpi_mpi_startall(count, requests);
+ //Colls::startall(count, requests);
XBT_DEBUG("<%d> wait for %d requests", rank, count);
Request::waitall(count, requests, MPI_STATUSES_IGNORE);
xbt_free(requests);
* GEF Oct05 after asking Jeff.
*/
int
-smpi_coll_tuned_alltoallv_ompi_basic_linear(void *sbuf, int *scounts, int *sdisps,
+Coll_alltoallv_ompi_basic_linear::alltoallv(void *sbuf, int *scounts, int *sdisps,
MPI_Datatype sdtype,
void *rbuf, int *rcounts, int *rdisps,
MPI_Datatype rdtype,
****************************************************************************/
int
-smpi_coll_tuned_alltoallv_pair_light_barrier(void *send_buff, int *send_counts, int *send_disps,
+Coll_alltoallv_pair_light_barrier::alltoallv(void *send_buff, int *send_counts, int *send_disps,
MPI_Datatype send_type,
void *recv_buff, int *recv_counts, int *recv_disps,
MPI_Datatype recv_type,
****************************************************************************/
int
-smpi_coll_tuned_alltoallv_pair_mpi_barrier(void *send_buff, int *send_counts, int *send_disps,
+Coll_alltoallv_pair_mpi_barrier::alltoallv(void *send_buff, int *send_counts, int *send_disps,
MPI_Datatype send_type,
void *recv_buff, int *recv_counts, int *recv_disps,
MPI_Datatype recv_type, MPI_Comm comm)
for (i = 0; i < num_procs; i++) {
src = dst = rank ^ i;
- smpi_mpi_barrier(comm);
+ Colls::barrier(comm);
Request::sendrecv(send_ptr + send_disps[dst] * send_chunk, send_counts[dst], send_type, dst,
tag, recv_ptr + recv_disps[src] * recv_chunk, recv_counts[src], recv_type,
src, tag, comm, &s);
****************************************************************************/
int
-smpi_coll_tuned_alltoallv_pair_one_barrier(void *send_buff, int *send_counts, int *send_disps,
+Coll_alltoallv_pair_one_barrier::alltoallv(void *send_buff, int *send_counts, int *send_disps,
MPI_Datatype send_type,
void *recv_buff, int *recv_counts, int *recv_disps, MPI_Datatype recv_type, MPI_Comm comm)
{
send_chunk = send_type->get_extent();
recv_chunk = recv_type->get_extent();
- smpi_mpi_barrier(comm);
+ Colls::barrier(comm);
for (i = 0; i < num_procs; i++) {
src = dst = rank ^ i;
Request::sendrecv(send_ptr + send_disps[dst] * send_chunk, send_counts[dst], send_type, dst,
* Auther: Ahmad Faraj
****************************************************************************/
-int smpi_coll_tuned_alltoallv_pair(void *send_buff, int *send_counts, int *send_disps,
+int Coll_alltoallv_pair::alltoallv(void *send_buff, int *send_counts, int *send_disps,
MPI_Datatype send_type,
void *recv_buff, int *recv_counts, int *recv_disps,
MPI_Datatype recv_type, MPI_Comm comm)
****************************************************************************/
int
-smpi_coll_tuned_alltoallv_ring_light_barrier(void *send_buff, int *send_counts, int *send_disps,
+Coll_alltoallv_ring_light_barrier::alltoallv(void *send_buff, int *send_counts, int *send_disps,
MPI_Datatype send_type,
void *recv_buff, int *recv_counts, int *recv_disps,
MPI_Datatype recv_type,
****************************************************************************/
int
-smpi_coll_tuned_alltoallv_ring_mpi_barrier(void *send_buff, int *send_counts, int *send_disps,
+Coll_alltoallv_ring_mpi_barrier::alltoallv(void *send_buff, int *send_counts, int *send_disps,
MPI_Datatype send_type,
void *recv_buff, int *recv_counts, int *recv_disps,
MPI_Datatype recv_type, MPI_Comm comm)
src = (rank - i + num_procs) % num_procs;
dst = (rank + i) % num_procs;
- smpi_mpi_barrier(comm);
+ Colls::barrier(comm);
Request::sendrecv(send_ptr + send_disps[dst] * send_chunk, send_counts[dst], send_type, dst,
tag, recv_ptr + recv_disps[src] * recv_chunk, recv_counts[src], recv_type,
src, tag, comm, &s);
****************************************************************************/
int
-smpi_coll_tuned_alltoallv_ring_one_barrier(void *send_buff, int *send_counts, int *send_disps,
+Coll_alltoallv_ring_one_barrier::alltoallv(void *send_buff, int *send_counts, int *send_disps,
MPI_Datatype send_type,
void *recv_buff, int *recv_counts, int *recv_disps,
MPI_Datatype recv_type, MPI_Comm comm)
send_chunk = send_type->get_extent();
recv_chunk = recv_type->get_extent();
- smpi_mpi_barrier(comm);
+ Colls::barrier(comm);
for (i = 0; i < num_procs; i++) {
src = (rank - i + num_procs) % num_procs;
dst = (rank + i) % num_procs;
****************************************************************************/
int
-smpi_coll_tuned_alltoallv_ring(void *send_buff, int *send_counts, int *send_disps,
+Coll_alltoallv_ring::alltoallv(void *send_buff, int *send_counts, int *send_disps,
MPI_Datatype send_type,
void *recv_buff, int *recv_counts, int *recv_disps,
MPI_Datatype recv_type,
#include "../colls_private.h"
#include "../coll_tuned_topo.h"
-int smpi_coll_tuned_barrier_mvapich2_pair(MPI_Comm comm)
+int Coll_barrier_mvapich2_pair::barrier(MPI_Comm comm)
{
int size, rank;
* synchronous gurantee made by last ring of sends are synchronous
*
*/
-int smpi_coll_tuned_barrier_ompi_doublering(MPI_Comm comm
+int Coll_barrier_ompi_doublering::barrier(MPI_Comm comm
)
{
int rank, size;
* To make synchronous, uses sync sends and sync sendrecvs
*/
-int smpi_coll_tuned_barrier_ompi_recursivedoubling(MPI_Comm comm
+int Coll_barrier_ompi_recursivedoubling::barrier(MPI_Comm comm
)
{
int rank, size, adjsize;
* To make synchronous, uses sync sends and sync sendrecvs
*/
-int smpi_coll_tuned_barrier_ompi_bruck(MPI_Comm comm
+int Coll_barrier_ompi_bruck::barrier(MPI_Comm comm
)
{
int rank, size;
* To make synchronous, uses sync sends and sync sendrecvs
*/
/* special case for two processes */
-int smpi_coll_tuned_barrier_ompi_two_procs(MPI_Comm comm
+int Coll_barrier_ompi_two_procs::barrier(MPI_Comm comm
)
{
int remote;
/* copied function (with appropriate renaming) starts here */
-int smpi_coll_tuned_barrier_ompi_basic_linear(MPI_Comm comm)
+int Coll_barrier_ompi_basic_linear::barrier(MPI_Comm comm)
{
int i;
int size = comm->size();
* Another recursive doubling type algorithm, but in this case
* we go up the tree and back down the tree.
*/
-int smpi_coll_tuned_barrier_ompi_tree(MPI_Comm comm)
+int Coll_barrier_ompi_tree::barrier(MPI_Comm comm)
{
int rank, size, depth;
int jump, partner;
int bcast_NTSB_segment_size_in_byte = 8192;
-int smpi_coll_tuned_bcast_NTSB(void *buf, int count, MPI_Datatype datatype,
+int Coll_bcast_NTSB::bcast(void *buf, int count, MPI_Datatype datatype,
int root, MPI_Comm comm)
{
int tag = COLL_TAG_BCAST;
/* when count is not divisible by block size, use default BCAST for the remainder */
if ((remainder != 0) && (count > segment)) {
XBT_WARN("MPI_bcast_NTSB use default MPI_bcast.");
- smpi_mpi_bcast((char *) buf + (pipe_length * increment), remainder, datatype,
+ Colls::bcast((char *) buf + (pipe_length * increment), remainder, datatype,
root, comm);
}
/* Non-topology-specific pipelined linear-bcast function
0->1, 1->2 ,2->3, ....., ->last node : in a pipeline fashion
*/
-int smpi_coll_tuned_bcast_NTSL_Isend(void *buf, int count, MPI_Datatype datatype,
+int Coll_bcast_NTSL_Isend::bcast(void *buf, int count, MPI_Datatype datatype,
int root, MPI_Comm comm)
{
int tag = COLL_TAG_BCAST;
/* when count is not divisible by block size, use default BCAST for the remainder */
if ((remainder != 0) && (count > segment)) {
XBT_WARN("MPI_bcast_NTSL_Isend_nb use default MPI_bcast.");
- smpi_mpi_bcast((char *) buf + (pipe_length * increment), remainder, datatype,
+ Colls::bcast((char *) buf + (pipe_length * increment), remainder, datatype,
root, comm);
}
/* Non-topology-specific pipelined linear-bcast function
0->1, 1->2 ,2->3, ....., ->last node : in a pipeline fashion
*/
-int smpi_coll_tuned_bcast_NTSL(void *buf, int count, MPI_Datatype datatype,
+int Coll_bcast_NTSL::bcast(void *buf, int count, MPI_Datatype datatype,
int root, MPI_Comm comm)
{
int tag = COLL_TAG_BCAST;
/* when count is not divisible by block size, use default BCAST for the remainder */
if ((remainder != 0) && (count > segment)) {
XBT_WARN("MPI_bcast_arrival_NTSL use default MPI_bcast.");
- smpi_mpi_bcast((char *) buf + (pipe_length * increment), remainder, datatype,
+ Colls::bcast((char *) buf + (pipe_length * increment), remainder, datatype,
root, comm);
}
int bcast_SMP_binary_segment_byte = 8192;
-int smpi_coll_tuned_bcast_SMP_binary(void *buf, int count,
+int Coll_bcast_SMP_binary::bcast(void *buf, int count,
MPI_Datatype datatype, int root,
MPI_Comm comm)
{
host_num_core = comm->get_intra_comm()->size();
}else{
//implementation buggy in this case
- return smpi_coll_tuned_bcast_mpich( buf , count, datatype,
+ return Coll_bcast_mpich::bcast( buf , count, datatype,
root, comm);
}
// when count is not divisible by block size, use default BCAST for the remainder
if ((remainder != 0) && (count > segment)) {
XBT_WARN("MPI_bcast_SMP_binary use default MPI_bcast.");
- smpi_mpi_bcast((char *) buf + (pipe_length * increment), remainder, datatype,
+ Colls::bcast((char *) buf + (pipe_length * increment), remainder, datatype,
root, comm);
}
#include "../colls_private.h"
-int smpi_coll_tuned_bcast_SMP_binomial(void *buf, int count,
+int Coll_bcast_SMP_binomial::bcast(void *buf, int count,
MPI_Datatype datatype, int root,
MPI_Comm comm)
{
num_core = comm->get_intra_comm()->size();
}else{
//implementation buggy in this case
- return smpi_coll_tuned_bcast_mpich( buf , count, datatype,
+ return Coll_bcast_mpich::bcast( buf , count, datatype,
root, comm);
}
int bcast_SMP_linear_segment_byte = 8192;
-int smpi_coll_tuned_bcast_SMP_linear(void *buf, int count,
+int Coll_bcast_SMP_linear::bcast(void *buf, int count,
MPI_Datatype datatype, int root,
MPI_Comm comm)
{
num_core = comm->get_intra_comm()->size();
}else{
//implementation buggy in this case
- return smpi_coll_tuned_bcast_mpich( buf , count, datatype,
+ return Coll_bcast_mpich::bcast( buf , count, datatype,
root, comm);
}
// call native when MPI communication size is too small
if (size <= num_core) {
XBT_WARN("MPI_bcast_SMP_linear use default MPI_bcast.");
- smpi_mpi_bcast(buf, count, datatype, root, comm);
+ Coll_bcast_default::bcast(buf, count, datatype, root, comm);
return MPI_SUCCESS;
}
// if root is not zero send to rank zero first
// when count is not divisible by block size, use default BCAST for the remainder
if ((remainder != 0) && (count > segment)) {
XBT_WARN("MPI_bcast_SMP_linear use default MPI_bcast.");
- smpi_mpi_bcast((char *) buf + (pipe_length * increment), remainder, datatype,
+ Colls::bcast((char *) buf + (pipe_length * increment), remainder, datatype,
root, comm);
}
#endif
/* Non-topology-specific pipelined linear-bcast function */
-int smpi_coll_tuned_bcast_arrival_pattern_aware_wait(void *buf, int count,
+int Coll_bcast_arrival_pattern_aware_wait::bcast(void *buf, int count,
MPI_Datatype datatype,
int root, MPI_Comm comm)
{
/* when count is not divisible by block size, use default BCAST for the remainder */
if ((remainder != 0) && (count > segment)) {
XBT_WARN("MPI_bcast_arrival_pattern_aware_wait use default MPI_bcast.");
- smpi_mpi_bcast((char *)buf + (pipe_length * increment), remainder, datatype, root, comm);
+ Colls::bcast((char *)buf + (pipe_length * increment), remainder, datatype, root, comm);
}
return MPI_SUCCESS;
#define MAX_NODE 1024
/* Non-topology-specific pipelined linear-bcast function */
-int smpi_coll_tuned_bcast_arrival_pattern_aware(void *buf, int count,
+int Coll_bcast_arrival_pattern_aware::bcast(void *buf, int count,
MPI_Datatype datatype, int root,
MPI_Comm comm)
{
/* when count is not divisible by block size, use default BCAST for the remainder */
if ((remainder != 0) && (count > segment)) {
XBT_WARN("MPI_bcast_arrival_pattern_aware use default MPI_bcast.");
- smpi_mpi_bcast((char *)buf + (pipe_length * increment), remainder, datatype, root, comm);
+ Colls::bcast((char *)buf + (pipe_length * increment), remainder, datatype, root, comm);
}
return MPI_SUCCESS;
#endif
/* Non-topology-specific pipelined linear-bcast function */
-int smpi_coll_tuned_bcast_arrival_scatter(void *buf, int count,
+int Coll_bcast_arrival_scatter::bcast(void *buf, int count,
MPI_Datatype datatype, int root,
MPI_Comm comm)
{
/* message too small */
if (count < size) {
XBT_WARN("MPI_bcast_arrival_scatter use default MPI_bcast.");
- smpi_mpi_bcast(buf, count, datatype, root, comm);
+ Colls::bcast(buf, count, datatype, root, comm);
return MPI_SUCCESS;
}
****************************************************************************/
int
-smpi_coll_tuned_bcast_binomial_tree(void *buff, int count,
+Coll_bcast_binomial_tree::bcast(void *buff, int count,
MPI_Datatype data_type, int root,
MPI_Comm comm)
{
int flattree_segment_in_byte = 8192;
int
-smpi_coll_tuned_bcast_flattree_pipeline(void *buff, int count,
+Coll_bcast_flattree_pipeline::bcast(void *buff, int count,
MPI_Datatype data_type, int root,
MPI_Comm comm)
{
int increment = segment * extent;
if (pipe_length==0) {
XBT_WARN("MPI_bcast_flattree_pipeline use default MPI_bcast_flattree.");
- return smpi_coll_tuned_bcast_flattree(buff, count, data_type, root, comm);
+ return Coll_bcast_flattree::bcast(buff, count, data_type, root, comm);
}
rank = comm->rank();
num_procs = comm->size();
#include "../colls_private.h"
int
-smpi_coll_tuned_bcast_flattree(void *buff, int count, MPI_Datatype data_type,
+Coll_bcast_flattree::bcast(void *buff, int count, MPI_Datatype data_type,
int root, MPI_Comm comm)
{
MPI_Request *req_ptr;
extern int mv2_bcast_two_level_system_size;
#define INTRA_NODE_ROOT 0
-#define MPIR_Pipelined_Bcast_Zcpy_MV2 smpi_coll_tuned_bcast_mpich
-#define MPIR_Pipelined_Bcast_MV2 smpi_coll_tuned_bcast_mpich
-#define MPIR_Bcast_binomial_MV2 smpi_coll_tuned_bcast_binomial_tree
-#define MPIR_Bcast_scatter_ring_allgather_shm_MV2 smpi_coll_tuned_bcast_scatter_LR_allgather
-#define MPIR_Bcast_scatter_doubling_allgather_MV2 smpi_coll_tuned_bcast_scatter_rdb_allgather
-#define MPIR_Bcast_scatter_ring_allgather_MV2 smpi_coll_tuned_bcast_scatter_LR_allgather
-#define MPIR_Shmem_Bcast_MV2 smpi_coll_tuned_bcast_mpich
-#define MPIR_Bcast_tune_inter_node_helper_MV2 smpi_coll_tuned_bcast_mvapich2_inter_node
-#define MPIR_Bcast_inter_node_helper_MV2 smpi_coll_tuned_bcast_mvapich2_inter_node
-#define MPIR_Knomial_Bcast_intra_node_MV2 smpi_coll_tuned_bcast_mvapich2_knomial_intra_node
-#define MPIR_Bcast_intra_MV2 smpi_coll_tuned_bcast_mvapich2_intra_node
+#define MPIR_Pipelined_Bcast_Zcpy_MV2 Coll_bcast_mpich::bcast
+#define MPIR_Pipelined_Bcast_MV2 Coll_bcast_mpich::bcast
+#define MPIR_Bcast_binomial_MV2 Coll_bcast_binomial_tree::bcast
+#define MPIR_Bcast_scatter_ring_allgather_shm_MV2 Coll_bcast_scatter_LR_allgather::bcast
+#define MPIR_Bcast_scatter_doubling_allgather_MV2 Coll_bcast_scatter_rdb_allgather::bcast
+#define MPIR_Bcast_scatter_ring_allgather_MV2 Coll_bcast_scatter_LR_allgather::bcast
+#define MPIR_Shmem_Bcast_MV2 Coll_bcast_mpich::bcast
+#define MPIR_Bcast_tune_inter_node_helper_MV2 Coll_bcast_mvapich2_inter_node::bcast
+#define MPIR_Bcast_inter_node_helper_MV2 Coll_bcast_mvapich2_inter_node::bcast
+#define MPIR_Knomial_Bcast_intra_node_MV2 Coll_bcast_mvapich2_knomial_intra_node::bcast
+#define MPIR_Bcast_intra_MV2 Coll_bcast_mvapich2_intra_node::bcast
extern int zcpy_knomial_factor;
extern int mv2_pipelined_zcpy_knomial_factor;
#define mv2_bcast_large_msg 512*1024
#define mv2_knomial_intra_node_threshold 131072
#define mv2_scatter_rd_inter_leader_bcast 1
-int smpi_coll_tuned_bcast_mvapich2_inter_node(void *buffer,
+int Coll_bcast_mvapich2_inter_node::bcast(void *buffer,
int count,
MPI_Datatype datatype,
int root,
if (MV2_Bcast_function==NULL){
- MV2_Bcast_function=smpi_coll_tuned_bcast_mpich;
+ MV2_Bcast_function=Coll_bcast_mpich::bcast;
}
if (MV2_Bcast_intra_node_function==NULL){
- MV2_Bcast_intra_node_function= smpi_coll_tuned_bcast_mpich;
+ MV2_Bcast_intra_node_function= Coll_bcast_mpich::bcast;
}
if(comm->get_leaders_comm()==MPI_COMM_NULL){
}
-int smpi_coll_tuned_bcast_mvapich2_knomial_intra_node(void *buffer,
+int Coll_bcast_mvapich2_knomial_intra_node::bcast(void *buffer,
int count,
MPI_Datatype datatype,
int root, MPI_Comm comm)
int src, dst, mask, relative_rank;
int k;
if (MV2_Bcast_function==NULL){
- MV2_Bcast_function=smpi_coll_tuned_bcast_mpich;
+ MV2_Bcast_function=Coll_bcast_mpich::bcast;
}
if (MV2_Bcast_intra_node_function==NULL){
- MV2_Bcast_intra_node_function= smpi_coll_tuned_bcast_mpich;
+ MV2_Bcast_intra_node_function= Coll_bcast_mpich::bcast;
}
if(comm->get_leaders_comm()==MPI_COMM_NULL){
}
-int smpi_coll_tuned_bcast_mvapich2_intra_node(void *buffer,
+int Coll_bcast_mvapich2_intra_node::bcast(void *buffer,
int count,
MPI_Datatype datatype,
int root, MPI_Comm comm)
if (count == 0)
return MPI_SUCCESS;
if (MV2_Bcast_function==NULL){
- MV2_Bcast_function=smpi_coll_tuned_bcast_mpich;
+ MV2_Bcast_function=Coll_bcast_mpich::bcast;
}
if (MV2_Bcast_intra_node_function==NULL){
- MV2_Bcast_intra_node_function= smpi_coll_tuned_bcast_mpich;
+ MV2_Bcast_intra_node_function= Coll_bcast_mpich::bcast;
}
if(comm->get_leaders_comm()==MPI_COMM_NULL){
#define MAXTREEFANOUT 32
-int smpi_coll_tuned_bcast_ompi_pipeline( void* buffer,
+int Coll_bcast_ompi_pipeline::bcast( void* buffer,
int original_count,
MPI_Datatype datatype,
int root,
#define MAXTREEFANOUT 32
int
-smpi_coll_tuned_bcast_ompi_split_bintree ( void* buffer,
+Coll_bcast_ompi_split_bintree::bcast ( void* buffer,
int count,
MPI_Datatype datatype,
int root,
(segsize > counts[0] * type_size) ||
(segsize > counts[1] * type_size) ) {
/* call linear version here ! */
- return (smpi_coll_tuned_bcast_SMP_linear ( buffer, count, datatype,
+ return (Coll_bcast_SMP_linear::bcast ( buffer, count, datatype,
root, comm));
}
type_extent = datatype->get_extent();
****************************************************************************/
int
-smpi_coll_tuned_bcast_scatter_LR_allgather(void *buff, int count,
+Coll_bcast_scatter_LR_allgather::bcast(void *buff, int count,
MPI_Datatype data_type, int root,
MPI_Comm comm)
{
}
int
-smpi_coll_tuned_bcast_scatter_rdb_allgather (
+Coll_bcast_scatter_rdb_allgather::bcast (
void *buffer,
int count,
MPI_Datatype datatype,
SG_BEGIN_DECL()
+
+namespace simgrid{
+namespace smpi{
+
#define COLL_DESCRIPTION(cat, ret, args, name) \
{# name,\
# cat " " # name " collective",\
- (void*)smpi_coll_tuned_ ## cat ## _ ## name}
+ (void*) Coll_ ## cat ## _ ## name::cat }
#define COLL_PROTO(cat, ret, args, name) \
- ret smpi_coll_tuned_ ## cat ## _ ## name(COLL_UNPAREN args);
+class Coll_ ## cat ## _ ## name : public Coll_ ## cat { \
+public: \
+static ret cat (COLL_UNPAREN args); \
+};
+
#define COLL_UNPAREN(...) __VA_ARGS__
#define COLL_APPLY(action, sig, name) action(sig, name)
/*************
* GATHER *
*************/
+
#define COLL_GATHER_SIG gather, int, \
(void *send_buff, int send_count, MPI_Datatype send_type, \
void *recv_buff, int recv_count, MPI_Datatype recv_type, \
int root, MPI_Comm comm)
#define COLL_GATHERS(action, COLL_sep) \
+COLL_APPLY(action, COLL_GATHER_SIG, default) COLL_sep \
COLL_APPLY(action, COLL_GATHER_SIG, ompi) COLL_sep \
COLL_APPLY(action, COLL_GATHER_SIG, ompi_basic_linear) COLL_sep \
COLL_APPLY(action, COLL_GATHER_SIG, ompi_binomial) COLL_sep \
COLL_APPLY(action, COLL_GATHER_SIG, impi) COLL_sep \
COLL_APPLY(action, COLL_GATHER_SIG, automatic)
-
-
COLL_GATHERS(COLL_PROTO, COLL_NOsep)
/*************
MPI_Comm comm)
#define COLL_ALLGATHERS(action, COLL_sep) \
+COLL_APPLY(action, COLL_ALLGATHER_SIG, default) COLL_sep \
COLL_APPLY(action, COLL_ALLGATHER_SIG, 2dmesh) COLL_sep \
COLL_APPLY(action, COLL_ALLGATHER_SIG, 3dmesh) COLL_sep \
COLL_APPLY(action, COLL_ALLGATHER_SIG, bruck) COLL_sep \
COLL_APPLY(action, COLL_ALLGATHER_SIG, impi) COLL_sep \
COLL_APPLY(action, COLL_ALLGATHER_SIG, automatic)
-
COLL_ALLGATHERS(COLL_PROTO, COLL_NOsep)
/**************
MPI_Datatype recv_type, MPI_Comm comm)
#define COLL_ALLGATHERVS(action, COLL_sep) \
+COLL_APPLY(action, COLL_ALLGATHERV_SIG, default) COLL_sep \
COLL_APPLY(action, COLL_ALLGATHERV_SIG, GB) COLL_sep \
COLL_APPLY(action, COLL_ALLGATHERV_SIG, pair) COLL_sep \
COLL_APPLY(action, COLL_ALLGATHERV_SIG, ring) COLL_sep \
MPI_Datatype dtype, MPI_Op op, MPI_Comm comm)
#define COLL_ALLREDUCES(action, COLL_sep) \
+COLL_APPLY(action, COLL_ALLREDUCE_SIG, default) COLL_sep \
COLL_APPLY(action, COLL_ALLREDUCE_SIG, lr) COLL_sep \
COLL_APPLY(action, COLL_ALLREDUCE_SIG, rab1) COLL_sep \
COLL_APPLY(action, COLL_ALLREDUCE_SIG, rab2) COLL_sep \
COLL_ALLREDUCES(COLL_PROTO, COLL_NOsep)
-
/************
* ALLTOALL *
************/
MPI_Comm comm)
#define COLL_ALLTOALLS(action, COLL_sep) \
+COLL_APPLY(action, COLL_ALLTOALL_SIG, default) COLL_sep \
COLL_APPLY(action, COLL_ALLTOALL_SIG, 2dmesh) COLL_sep \
COLL_APPLY(action, COLL_ALLTOALL_SIG, 3dmesh) COLL_sep \
COLL_APPLY(action, COLL_ALLTOALL_SIG, basic_linear) COLL_sep \
MPI_Comm comm)
#define COLL_ALLTOALLVS(action, COLL_sep) \
+COLL_APPLY(action, COLL_ALLTOALLV_SIG, default) COLL_sep \
COLL_APPLY(action, COLL_ALLTOALLV_SIG, bruck) COLL_sep \
COLL_APPLY(action, COLL_ALLTOALLV_SIG, pair) COLL_sep \
COLL_APPLY(action, COLL_ALLTOALLV_SIG, pair_light_barrier) COLL_sep \
int root, MPI_Comm comm)
#define COLL_BCASTS(action, COLL_sep) \
+COLL_APPLY(action, COLL_BCAST_SIG, default) COLL_sep \
COLL_APPLY(action, COLL_BCAST_SIG, arrival_pattern_aware) COLL_sep \
COLL_APPLY(action, COLL_BCAST_SIG, arrival_pattern_aware_wait) COLL_sep \
COLL_APPLY(action, COLL_BCAST_SIG, arrival_scatter) COLL_sep \
MPI_Op op, int root, MPI_Comm comm)
#define COLL_REDUCES(action, COLL_sep) \
+COLL_APPLY(action, COLL_REDUCE_SIG, default) COLL_sep \
COLL_APPLY(action, COLL_REDUCE_SIG, arrival_pattern_aware) COLL_sep \
COLL_APPLY(action, COLL_REDUCE_SIG, binomial) COLL_sep \
COLL_APPLY(action, COLL_REDUCE_SIG, flat_tree) COLL_sep \
MPI_Datatype dtype,MPI_Op op,MPI_Comm comm)
#define COLL_REDUCE_SCATTERS(action, COLL_sep) \
+COLL_APPLY(action, COLL_REDUCE_SCATTER_SIG, default) COLL_sep \
COLL_APPLY(action, COLL_REDUCE_SCATTER_SIG, ompi) COLL_sep \
COLL_APPLY(action, COLL_REDUCE_SCATTER_SIG, ompi_basic_recursivehalving) COLL_sep \
COLL_APPLY(action, COLL_REDUCE_SCATTER_SIG, ompi_ring) COLL_sep \
int root, MPI_Comm comm)
#define COLL_SCATTERS(action, COLL_sep) \
+COLL_APPLY(action, COLL_SCATTER_SIG, default) COLL_sep \
COLL_APPLY(action, COLL_SCATTER_SIG, ompi) COLL_sep \
COLL_APPLY(action, COLL_SCATTER_SIG, ompi_basic_linear) COLL_sep \
COLL_APPLY(action, COLL_SCATTER_SIG, ompi_binomial) COLL_sep \
(MPI_Comm comm)
#define COLL_BARRIERS(action, COLL_sep) \
+COLL_APPLY(action, COLL_BARRIER_SIG, default) COLL_sep \
COLL_APPLY(action, COLL_BARRIER_SIG, ompi) COLL_sep \
COLL_APPLY(action, COLL_BARRIER_SIG, ompi_basic_linear) COLL_sep \
COLL_APPLY(action, COLL_BARRIER_SIG, ompi_two_procs) COLL_sep \
COLL_BARRIERS(COLL_PROTO, COLL_NOsep)
+}
+}
+
SG_END_DECL()
#endif
#include "../colls_private.h"
-#define MPIR_Gather_MV2_Direct smpi_coll_tuned_gather_ompi_basic_linear
-#define MPIR_Gather_MV2_two_level_Direct smpi_coll_tuned_gather_ompi_basic_linear
-#define MPIR_Gather_intra smpi_coll_tuned_gather_mpich
+
+
+
+
+#define MPIR_Gather_MV2_Direct Coll_gather_ompi_basic_linear::gather
+#define MPIR_Gather_MV2_two_level_Direct Coll_gather_ompi_basic_linear::gather
+#define MPIR_Gather_intra Coll_gather_mpich::gather
typedef int (*MV2_Gather_function_ptr) (void *sendbuf,
int sendcnt,
MPI_Datatype sendtype,
#define TEMP_BUF_HAS_NO_DATA (0)
#define TEMP_BUF_HAS_DATA (1)
+
+namespace simgrid{
+namespace smpi{
+
/* sendbuf - (in) sender's buffer
* sendcnt - (in) sender's element count
* sendtype - (in) sender's data type
}
-int smpi_coll_tuned_gather_mvapich2_two_level(void *sendbuf,
+
+int Coll_gather_mvapich2_two_level::gather(void *sendbuf,
int sendcnt,
MPI_Datatype sendtype,
void *recvbuf,
//if not set (use of the algo directly, without mvapich2 selector)
if(MV2_Gather_intra_node_function==NULL)
- MV2_Gather_intra_node_function=smpi_coll_tuned_gather_mpich;
+ MV2_Gather_intra_node_function= Coll_gather_mpich::gather;
if(comm->get_leaders_comm()==MPI_COMM_NULL){
comm->init_smp();
recvcnts[i] = node_sizes[i] * recvcnt;
}
}
- smpi_mpi_gatherv(tmp_buf,
+ Colls::gatherv(tmp_buf,
local_size * nbytes,
MPI_BYTE, recvbuf, recvcnts,
displs, recvtype,
recvcnts[i] = node_sizes[i] * nbytes;
}
}
- smpi_mpi_gatherv(tmp_buf, local_size * nbytes,
+ Colls::gatherv(tmp_buf, local_size * nbytes,
MPI_BYTE, leader_gather_buf,
recvcnts, displs, MPI_BYTE,
leader_root, leader_comm);
return (mpi_errno);
}
+}
+}
#include "../colls_private.h"
#include "../coll_tuned_topo.h"
-/* Todo: gather_intra_generic, gather_intra_binary, gather_intra_chain,
- * gather_intra_pipeline, segmentation? */
-int
-smpi_coll_tuned_gather_ompi_binomial(void *sbuf, int scount,
+namespace simgrid{
+namespace smpi{
+
+
+int Coll_gather_ompi_binomial::gather(void *sbuf, int scount,
MPI_Datatype sdtype,
void *rbuf, int rcount,
MPI_Datatype rdtype,
* Accepts: - same arguments as MPI_Gather(), first segment size
* Returns: - MPI_SUCCESS or error code
*/
-int
-smpi_coll_tuned_gather_ompi_linear_sync(void *sbuf, int scount,
+int Coll_gather_ompi_linear_sync::gather(void *sbuf, int scount,
MPI_Datatype sdtype,
void *rbuf, int rcount,
MPI_Datatype rdtype,
* Accepts: - same arguments as MPI_Gather()
* Returns: - MPI_SUCCESS or error code
*/
-int
-smpi_coll_tuned_gather_ompi_basic_linear(void *sbuf, int scount,
+int Coll_gather_ompi_basic_linear::gather(void *sbuf, int scount,
MPI_Datatype sdtype,
void *rbuf, int rcount,
MPI_Datatype rdtype,
return MPI_SUCCESS;
}
+
+}
+}
/* Non-topology-specific pipelined linear-bcast function
0->1, 1->2 ,2->3, ....., ->last node : in a pipeline fashion
*/
-int smpi_coll_tuned_reduce_NTSL(void *buf, void *rbuf, int count,
+int Coll_reduce_NTSL::reduce(void *buf, void *rbuf, int count,
MPI_Datatype datatype, MPI_Op op, int root,
MPI_Comm comm)
{
/* when count is not divisible by block size, use default BCAST for the remainder */
if ((remainder != 0) && (count > segment)) {
XBT_WARN("MPI_reduce_NTSL use default MPI_reduce.");
- smpi_mpi_reduce((char *)buf + (pipe_length * increment),
+ Coll_reduce_default::reduce((char *)buf + (pipe_length * increment),
(char *)rbuf + (pipe_length * increment), remainder, datatype, op, root,
comm);
}
#endif
/* Non-topology-specific pipelined linear-reduce function */
-int smpi_coll_tuned_reduce_arrival_pattern_aware(void *buf, void *rbuf,
+int Coll_reduce_arrival_pattern_aware::reduce(void *buf, void *rbuf,
int count,
MPI_Datatype datatype,
MPI_Op op, int root,
/* when count is not divisible by block size, use default BCAST for the remainder */
if ((remainder != 0) && (count > segment)) {
- smpi_mpi_reduce((char *)buf + (pipe_length * increment),
+ Coll_reduce_default::reduce((char *)buf + (pipe_length * increment),
(char *)rbuf + (pipe_length * increment), remainder, datatype, op, root,
comm);
}
//#include <star-reduction.c>
-int smpi_coll_tuned_reduce_binomial(void *sendbuf, void *recvbuf, int count,
+int Coll_reduce_binomial::reduce(void *sendbuf, void *recvbuf, int count,
MPI_Datatype datatype, MPI_Op op, int root,
MPI_Comm comm)
{
//#include <star-reduction.c>
int
-smpi_coll_tuned_reduce_flat_tree(void *sbuf, void *rbuf, int count,
+Coll_reduce_flat_tree::reduce(void *sbuf, void *rbuf, int count,
MPI_Datatype dtype, MPI_Op op,
int root, MPI_Comm comm)
{
return 0;
}
-int smpi_coll_tuned_reduce_mvapich2_knomial (
+int Coll_reduce_mvapich2_knomial::reduce (
void *sendbuf,
void *recvbuf,
int count,
#define SHMEM_COLL_BLOCK_SIZE (local_size * mv2_g_shmem_coll_max_msg_size)
#define mv2_use_knomial_reduce 1
-#define MPIR_Reduce_inter_knomial_wrapper_MV2 smpi_coll_tuned_reduce_mvapich2_knomial
-#define MPIR_Reduce_intra_knomial_wrapper_MV2 smpi_coll_tuned_reduce_mvapich2_knomial
-#define MPIR_Reduce_binomial_MV2 smpi_coll_tuned_reduce_binomial
-#define MPIR_Reduce_redscat_gather_MV2 smpi_coll_tuned_reduce_scatter_gather
-#define MPIR_Reduce_shmem_MV2 smpi_coll_tuned_reduce_ompi_basic_linear
+#define MPIR_Reduce_inter_knomial_wrapper_MV2 Coll_reduce_mvapich2_knomial::reduce
+#define MPIR_Reduce_intra_knomial_wrapper_MV2 Coll_reduce_mvapich2_knomial::reduce
+#define MPIR_Reduce_binomial_MV2 Coll_reduce_binomial::reduce
+#define MPIR_Reduce_redscat_gather_MV2 Coll_reduce_scatter_gather::reduce
+#define MPIR_Reduce_shmem_MV2 Coll_reduce_ompi_basic_linear::reduce
extern int (*MV2_Reduce_function)( void *sendbuf,
void *recvbuf,
MPI_Datatype datatype,
MPI_Op op, int root, MPI_Comm comm);
-int smpi_coll_tuned_reduce_mvapich2_two_level( void *sendbuf,
+int Coll_reduce_mvapich2_two_level::reduce( void *sendbuf,
void *recvbuf,
int count,
MPI_Datatype datatype,
//if not set (use of the algo directly, without mvapich2 selector)
if(MV2_Reduce_function==NULL)
- MV2_Reduce_function=smpi_coll_tuned_reduce_mpich;
+ MV2_Reduce_function=Coll_reduce_mpich::reduce;
if(MV2_Reduce_intra_function==NULL)
- MV2_Reduce_intra_function=smpi_coll_tuned_reduce_mpich;
+ MV2_Reduce_intra_function=Coll_reduce_mpich::reduce;
if(comm->get_leaders_comm()==MPI_COMM_NULL){
comm->init_smp();
meaning that at least one datatype must fit in the segment !
*/
-int smpi_coll_tuned_reduce_ompi_chain( void *sendbuf, void *recvbuf, int count,
+int Coll_reduce_ompi_chain::reduce( void *sendbuf, void *recvbuf, int count,
MPI_Datatype datatype,
MPI_Op op, int root,
MPI_Comm comm
}
-int smpi_coll_tuned_reduce_ompi_pipeline( void *sendbuf, void *recvbuf,
+int Coll_reduce_ompi_pipeline::reduce( void *sendbuf, void *recvbuf,
int count, MPI_Datatype datatype,
MPI_Op op, int root,
MPI_Comm comm )
segcount, 0);
}
-int smpi_coll_tuned_reduce_ompi_binary( void *sendbuf, void *recvbuf,
+int Coll_reduce_ompi_binary::reduce( void *sendbuf, void *recvbuf,
int count, MPI_Datatype datatype,
MPI_Op op, int root,
MPI_Comm comm)
segcount, 0);
}
-int smpi_coll_tuned_reduce_ompi_binomial( void *sendbuf, void *recvbuf,
+int Coll_reduce_ompi_binomial::reduce( void *sendbuf, void *recvbuf,
int count, MPI_Datatype datatype,
MPI_Op op, int root,
MPI_Comm comm)
* Acecpts: same as MPI_Reduce()
* Returns: MPI_SUCCESS or error code
*/
-int smpi_coll_tuned_reduce_ompi_in_order_binary( void *sendbuf, void *recvbuf,
+int Coll_reduce_ompi_in_order_binary::reduce( void *sendbuf, void *recvbuf,
int count,
MPI_Datatype datatype,
MPI_Op op, int root,
*/
int
-smpi_coll_tuned_reduce_ompi_basic_linear(void *sbuf, void *rbuf, int count,
+Coll_reduce_ompi_basic_linear::reduce(void *sbuf, void *rbuf, int count,
MPI_Datatype dtype,
MPI_Op op,
int root,
Exa.: size=13 ==> n=3, r=5 (i.e. size == 13 == 2**n+r == 2**3 + 5)
- The algoritm needs for the execution of one mpi_coll_reduce_fun
+ The algoritm needs for the execution of one Colls::reduce
- for r==0
exec_time = n*(L1+L2) + buf_lng * (1-1/2**n) * (T1 + T2 + O/d)
7: { [(a+b)+(c+d)] + [(e+f)+(g+h)] } + { [(i+j)+k] + [l+m] } for H
-For mpi_coll_allreduce_fun:
+For Colls::allreduce:
------------------
Step 6.1)
on all nodes 0..12
-For mpi_coll_reduce_fun:
+For Colls::reduce:
---------------
Step 6.0)
} /* new_prot */
/*otherwise:*/
if (is_all)
- return( mpi_coll_allreduce_fun(Sendbuf, Recvbuf, count, mpi_datatype, mpi_op, comm) );
+ return( Colls::allreduce(Sendbuf, Recvbuf, count, mpi_datatype, mpi_op, comm) );
else
- return( mpi_coll_reduce_fun(Sendbuf,Recvbuf, count,mpi_datatype,mpi_op, root, comm) );
+ return( Colls::reduce(Sendbuf,Recvbuf, count,mpi_datatype,mpi_op, root, comm) );
}
#endif /*REDUCE_LIMITS*/
-int smpi_coll_tuned_reduce_rab(void* Sendbuf, void* Recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm)
+int Coll_reduce_rab::reduce(void* Sendbuf, void* Recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm)
{
return( MPI_I_anyReduce(Sendbuf, Recvbuf, count, datatype, op, root, comm, 0) );
}
-int smpi_coll_tuned_allreduce_rab(void* Sendbuf, void* Recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
+int Coll_allreduce_rab::allreduce(void* Sendbuf, void* Recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
{
return( MPI_I_anyReduce(Sendbuf, Recvbuf, count, datatype, op, -1, comm, 1) );
}
Author: MPICH
*/
-int smpi_coll_tuned_reduce_scatter_gather(void *sendbuf, void *recvbuf,
+int Coll_reduce_scatter_gather::reduce(void *sendbuf, void *recvbuf,
int count, MPI_Datatype datatype,
MPI_Op op, int root, MPI_Comm comm)
{
}
-int smpi_coll_tuned_reduce_scatter_mpich_pair(void *sendbuf, void *recvbuf, int recvcounts[],
+int Coll_reduce_scatter_mpich_pair::reduce_scatter(void *sendbuf, void *recvbuf, int recvcounts[],
MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
{
int rank, comm_size, i;
}
-int smpi_coll_tuned_reduce_scatter_mpich_noncomm(void *sendbuf, void *recvbuf, int recvcounts[],
+int Coll_reduce_scatter_mpich_noncomm::reduce_scatter(void *sendbuf, void *recvbuf, int recvcounts[],
MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
{
int mpi_errno = MPI_SUCCESS;
-int smpi_coll_tuned_reduce_scatter_mpich_rdb(void *sendbuf, void *recvbuf, int recvcounts[],
+int Coll_reduce_scatter_mpich_rdb::reduce_scatter(void *sendbuf, void *recvbuf, int recvcounts[],
MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
{
int rank, comm_size, i;
* Limitation: - Works only for commutative operations.
*/
int
-smpi_coll_tuned_reduce_scatter_ompi_basic_recursivehalving(void *sbuf,
+Coll_reduce_scatter_ompi_basic_recursivehalving::reduce_scatter(void *sbuf,
void *rbuf,
int *rcounts,
MPI_Datatype dtype,
/*
- * smpi_coll_tuned_reduce_scatter_ompi_ring
+ * Coll_reduce_scatter_ompi_ring::reduce_scatter
*
* Function: Ring algorithm for reduce_scatter operation
* Accepts: Same as MPI_Reduce_scatter()
*
*/
int
-smpi_coll_tuned_reduce_scatter_ompi_ring(void *sbuf, void *rbuf, int *rcounts,
+Coll_reduce_scatter_ompi_ring::reduce_scatter(void *sbuf, void *rbuf, int *rcounts,
MPI_Datatype dtype,
MPI_Op op,
MPI_Comm comm
*/
#include "../colls_private.h"
-#define MPIR_Scatter_MV2_Binomial smpi_coll_tuned_scatter_ompi_binomial
-#define MPIR_Scatter_MV2_Direct smpi_coll_tuned_scatter_ompi_basic_linear
+#define MPIR_Scatter_MV2_Binomial Coll_scatter_ompi_binomial::scatter
+#define MPIR_Scatter_MV2_Direct Coll_scatter_ompi_basic_linear::scatter
extern int (*MV2_Scatter_intra_function) (void *sendbuf, int sendcount, MPI_Datatype sendtype,
void *recvbuf, int recvcount, MPI_Datatype recvtype,
int root, MPI_Comm comm);
-int smpi_coll_tuned_scatter_mvapich2_two_level_direct(void *sendbuf,
+int Coll_scatter_mvapich2_two_level_direct::scatter(void *sendbuf,
int sendcnt,
MPI_Datatype sendtype,
void *recvbuf,
MPI_Comm shmem_comm, leader_comm;
//if not set (use of the algo directly, without mvapich2 selector)
if(MV2_Scatter_intra_function==NULL)
- MV2_Scatter_intra_function=smpi_coll_tuned_scatter_mpich;
+ MV2_Scatter_intra_function=Coll_scatter_mpich::scatter;
if(comm->get_leaders_comm()==MPI_COMM_NULL){
comm->init_smp();
sendcnts[i] = node_sizes[i] * nbytes;
}
}
- smpi_mpi_scatterv(leader_scatter_buf, sendcnts, displs,
+ Colls::scatterv(leader_scatter_buf, sendcnts, displs,
MPI_BYTE, tmp_buf, nbytes * local_size,
MPI_BYTE, leader_root, leader_comm);
} else {
sendcnts[i] = node_sizes[i] * sendcnt;
}
}
- smpi_mpi_scatterv(sendbuf, sendcnts, displs,
+ Colls::scatterv(sendbuf, sendcnts, displs,
sendtype, tmp_buf,
nbytes * local_size, MPI_BYTE,
leader_root, leader_comm);
}
-int smpi_coll_tuned_scatter_mvapich2_two_level_binomial(void *sendbuf,
+int Coll_scatter_mvapich2_two_level_binomial::scatter(void *sendbuf,
int sendcnt,
MPI_Datatype sendtype,
void *recvbuf,
//if not set (use of the algo directly, without mvapich2 selector)
if(MV2_Scatter_intra_function==NULL)
- MV2_Scatter_intra_function=smpi_coll_tuned_scatter_mpich;
+ MV2_Scatter_intra_function=Coll_scatter_mpich::scatter;
if(comm->get_leaders_comm()==MPI_COMM_NULL){
comm->init_smp();
sendcnts[i] = node_sizes[i] * nbytes;
}
}
- smpi_mpi_scatterv(leader_scatter_buf, sendcnts, displs,
+ Colls::scatterv(leader_scatter_buf, sendcnts, displs,
MPI_BYTE, tmp_buf, nbytes * local_size,
MPI_BYTE, leader_root, leader_comm);
} else {
sendcnts[i] = node_sizes[i] * sendcnt;
}
}
- smpi_mpi_scatterv(sendbuf, sendcnts, displs,
+ Colls::scatterv(sendbuf, sendcnts, displs,
sendtype, tmp_buf,
nbytes * local_size, MPI_BYTE,
leader_root, leader_comm);
int
-smpi_coll_tuned_scatter_ompi_binomial(void *sbuf, int scount,
+Coll_scatter_ompi_binomial::scatter(void *sbuf, int scount,
MPI_Datatype sdtype,
void *rbuf, int rcount,
MPI_Datatype rdtype,
rank = comm->rank();
XBT_DEBUG(
- "smpi_coll_tuned_scatter_ompi_binomial rank %d", rank);
+ "Coll_scatter_ompi_binomial::scatter rank %d", rank);
/* create the binomial tree */
* Returns: - MPI_SUCCESS or error code
*/
int
-smpi_coll_tuned_scatter_ompi_basic_linear(void *sbuf, int scount,
+Coll_scatter_ompi_basic_linear::scatter(void *sbuf, int scount,
MPI_Datatype sdtype,
void *rbuf, int rcount,
MPI_Datatype rdtype,
new_pajeNewEvent (SIMIX_get_clock(), PJ_container_get(cont_name), type, value);\
}
-
#define AUTOMATIC_COLL_BENCH(cat, ret, args, args2)\
- ret smpi_coll_tuned_ ## cat ## _ ## automatic(COLL_UNPAREN args)\
+ ret Coll_ ## cat ## _automatic:: cat (COLL_UNPAREN args)\
{\
double time1, time2, time_min=DBL_MAX;\
int min_coll=-1, global_coll=-1;\
for (i = 0; mpi_coll_##cat##_description[i].name; i++){\
if(!strcmp(mpi_coll_##cat##_description[i].name, "automatic"))continue;\
if(!strcmp(mpi_coll_##cat##_description[i].name, "default"))continue;\
- smpi_mpi_barrier(comm);\
+ Coll_barrier_default::barrier(comm);\
TRACE_AUTO_COLL(cat)\
time1 = SIMIX_get_clock();\
try {\
}\
time2 = SIMIX_get_clock();\
buf_out=time2-time1;\
- smpi_mpi_reduce((void*)&buf_out,(void*)&buf_in, 1, MPI_DOUBLE, MPI_MAX, 0,comm );\
+ Coll_reduce_default::reduce((void*)&buf_out,(void*)&buf_in, 1, MPI_DOUBLE, MPI_MAX, 0,comm );\
if(time2-time1<time_min){\
min_coll=i;\
time_min=time2-time1;\
}else\
XBT_WARN("The quickest %s was %s on rank %d and took %f",#cat,mpi_coll_##cat##_description[min_coll].name, comm->rank(), time_min);\
return (min_coll!=-1)?MPI_SUCCESS:MPI_ERR_INTERN;\
-}\
+}
+namespace simgrid{
+namespace smpi{
COLL_APPLY(AUTOMATIC_COLL_BENCH, COLL_ALLGATHERV_SIG, (send_buff, send_count, send_type, recv_buff, recv_count, recv_disps, recv_type, comm));
COLL_APPLY(AUTOMATIC_COLL_BENCH, COLL_ALLREDUCE_SIG, (sbuf, rbuf, rcount, dtype, op, comm));
COLL_APPLY(AUTOMATIC_COLL_BENCH, COLL_REDUCE_SCATTER_SIG ,(sbuf,rbuf, rcounts,dtype,op,comm));
COLL_APPLY(AUTOMATIC_COLL_BENCH, COLL_SCATTER_SIG ,(sendbuf, sendcount, sendtype,recvbuf, recvcount, recvtype,root, comm));
COLL_APPLY(AUTOMATIC_COLL_BENCH, COLL_BARRIER_SIG,(comm));
+
+}
+}
--- /dev/null
+/* selector with default/naive Simgrid algorithms. These should not be trusted for performance evaluations */
+
+/* Copyright (c) 2009-2010, 2013-2014. The SimGrid Team.
+ * All rights reserved. */
+
+/* This program is free software; you can redistribute it and/or modify it
+ * under the terms of the license (GNU LGPL) which comes with this package. */
+
+#include "colls_private.h"
+
+namespace simgrid{
+namespace smpi{
+
+int Coll_bcast_default::bcast(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm)
+{
+ return Coll_bcast_binomial_tree::bcast(buf, count, datatype, root, comm);
+}
+
+int Coll_barrier_default::barrier(MPI_Comm comm)
+{
+ return Coll_barrier_ompi_basic_linear::barrier(comm);
+}
+
+
+int Coll_gather_default::gather(void *sendbuf, int sendcount, MPI_Datatype sendtype,
+ void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm)
+{
+ int system_tag = COLL_TAG_GATHER;
+ MPI_Aint lb = 0;
+ MPI_Aint recvext = 0;
+
+ int rank = comm->rank();
+ int size = comm->size();
+ if(rank != root) {
+ // Send buffer to root
+ Request::send(sendbuf, sendcount, sendtype, root, system_tag, comm);
+ } else {
+ recvtype->extent(&lb, &recvext);
+ // Local copy from root
+ Datatype::copy(sendbuf, sendcount, sendtype, static_cast<char*>(recvbuf) + root * recvcount * recvext,
+ recvcount, recvtype);
+ // Receive buffers from senders
+ MPI_Request *requests = xbt_new(MPI_Request, size - 1);
+ int index = 0;
+ for (int src = 0; src < size; src++) {
+ if(src != root) {
+ requests[index] = Request::irecv_init(static_cast<char*>(recvbuf) + src * recvcount * recvext, recvcount, recvtype,
+ src, system_tag, comm);
+ index++;
+ }
+ }
+ // Wait for completion of irecv's.
+ Request::startall(size - 1, requests);
+ Request::waitall(size - 1, requests, MPI_STATUS_IGNORE);
+ for (int src = 0; src < size-1; src++) {
+ Request::unref(&requests[src]);
+ }
+ xbt_free(requests);
+ }
+ return MPI_SUCCESS;
+}
+
+int Coll_reduce_scatter_default::reduce_scatter(void *sendbuf, void *recvbuf, int *recvcounts, MPI_Datatype datatype, MPI_Op op,
+ MPI_Comm comm)
+{
+ int rank = comm->rank();
+
+ /* arbitrarily choose root as rank 0 */
+ int size = comm->size();
+ int count = 0;
+ int *displs = xbt_new(int, size);
+ for (int i = 0; i < size; i++) {
+ displs[i] = count;
+ count += recvcounts[i];
+ }
+ void *tmpbuf = static_cast<void*>(smpi_get_tmp_sendbuffer(count*datatype->get_extent()));
+ int ret = MPI_SUCCESS;
+
+ ret = Coll_reduce_default::reduce(sendbuf, tmpbuf, count, datatype, op, 0, comm);
+ if(ret==MPI_SUCCESS)
+ ret = Colls::scatterv(tmpbuf, recvcounts, displs, datatype, recvbuf, recvcounts[rank], datatype, 0, comm);
+ xbt_free(displs);
+ smpi_free_tmp_buffer(tmpbuf);
+ return ret;
+}
+
+
+int Coll_allgather_default::allgather(void *sendbuf, int sendcount, MPI_Datatype sendtype,
+ void *recvbuf,int recvcount, MPI_Datatype recvtype, MPI_Comm comm)
+{
+ int system_tag = COLL_TAG_ALLGATHER;
+ MPI_Aint lb = 0;
+ MPI_Aint recvext = 0;
+ MPI_Request *requests;
+
+ int rank = comm->rank();
+ int size = comm->size();
+ // FIXME: check for errors
+ recvtype->extent(&lb, &recvext);
+ // Local copy from self
+ Datatype::copy(sendbuf, sendcount, sendtype, static_cast<char *>(recvbuf) + rank * recvcount * recvext, recvcount,
+ recvtype);
+ // Send/Recv buffers to/from others;
+ requests = xbt_new(MPI_Request, 2 * (size - 1));
+ int index = 0;
+ for (int other = 0; other < size; other++) {
+ if(other != rank) {
+ requests[index] = Request::isend_init(sendbuf, sendcount, sendtype, other, system_tag,comm);
+ index++;
+ requests[index] = Request::irecv_init(static_cast<char *>(recvbuf) + other * recvcount * recvext, recvcount, recvtype,
+ other, system_tag, comm);
+ index++;
+ }
+ }
+ // Wait for completion of all comms.
+ Request::startall(2 * (size - 1), requests);
+ Request::waitall(2 * (size - 1), requests, MPI_STATUS_IGNORE);
+ for (int other = 0; other < 2*(size-1); other++) {
+ Request::unref(&requests[other]);
+ }
+ xbt_free(requests);
+ return MPI_SUCCESS;
+}
+
+int Coll_allgatherv_default::allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
+ int *recvcounts, int *displs, MPI_Datatype recvtype, MPI_Comm comm)
+{
+ int system_tag = COLL_TAG_ALLGATHERV;
+ MPI_Aint lb = 0;
+ MPI_Aint recvext = 0;
+
+ int rank = comm->rank();
+ int size = comm->size();
+ recvtype->extent(&lb, &recvext);
+ // Local copy from self
+ Datatype::copy(sendbuf, sendcount, sendtype,
+ static_cast<char *>(recvbuf) + displs[rank] * recvext,recvcounts[rank], recvtype);
+ // Send buffers to others;
+ MPI_Request *requests = xbt_new(MPI_Request, 2 * (size - 1));
+ int index = 0;
+ for (int other = 0; other < size; other++) {
+ if(other != rank) {
+ requests[index] =
+ Request::isend_init(sendbuf, sendcount, sendtype, other, system_tag, comm);
+ index++;
+ requests[index] = Request::irecv_init(static_cast<char *>(recvbuf) + displs[other] * recvext, recvcounts[other],
+ recvtype, other, system_tag, comm);
+ index++;
+ }
+ }
+ // Wait for completion of all comms.
+ Request::startall(2 * (size - 1), requests);
+ Request::waitall(2 * (size - 1), requests, MPI_STATUS_IGNORE);
+ for (int other = 0; other < 2*(size-1); other++) {
+ Request::unref(&requests[other]);
+ }
+ xbt_free(requests);
+ return MPI_SUCCESS;
+}
+
+int Coll_scatter_default::scatter(void *sendbuf, int sendcount, MPI_Datatype sendtype,
+ void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm)
+{
+ int system_tag = COLL_TAG_SCATTER;
+ MPI_Aint lb = 0;
+ MPI_Aint sendext = 0;
+ MPI_Request *requests;
+
+ int rank = comm->rank();
+ int size = comm->size();
+ if(rank != root) {
+ // Recv buffer from root
+ Request::recv(recvbuf, recvcount, recvtype, root, system_tag, comm, MPI_STATUS_IGNORE);
+ } else {
+ sendtype->extent(&lb, &sendext);
+ // Local copy from root
+ if(recvbuf!=MPI_IN_PLACE){
+ Datatype::copy(static_cast<char *>(sendbuf) + root * sendcount * sendext,
+ sendcount, sendtype, recvbuf, recvcount, recvtype);
+ }
+ // Send buffers to receivers
+ requests = xbt_new(MPI_Request, size - 1);
+ int index = 0;
+ for(int dst = 0; dst < size; dst++) {
+ if(dst != root) {
+ requests[index] = Request::isend_init(static_cast<char *>(sendbuf) + dst * sendcount * sendext, sendcount, sendtype,
+ dst, system_tag, comm);
+ index++;
+ }
+ }
+ // Wait for completion of isend's.
+ Request::startall(size - 1, requests);
+ Request::waitall(size - 1, requests, MPI_STATUS_IGNORE);
+ for (int dst = 0; dst < size-1; dst++) {
+ Request::unref(&requests[dst]);
+ }
+ xbt_free(requests);
+ }
+ return MPI_SUCCESS;
+}
+
+
+
+int Coll_reduce_default::reduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root,
+ MPI_Comm comm)
+{
+ int system_tag = COLL_TAG_REDUCE;
+ MPI_Aint lb = 0;
+ MPI_Aint dataext = 0;
+
+ char* sendtmpbuf = static_cast<char *>(sendbuf);
+
+ int rank = comm->rank();
+ int size = comm->size();
+ //non commutative case, use a working algo from openmpi
+ if(op != MPI_OP_NULL && !op->is_commutative()){
+ return Coll_reduce_ompi_basic_linear::reduce(sendtmpbuf, recvbuf, count, datatype, op, root, comm);
+ }
+
+ if( sendbuf == MPI_IN_PLACE ) {
+ sendtmpbuf = static_cast<char *>(smpi_get_tmp_sendbuffer(count*datatype->get_extent()));
+ Datatype::copy(recvbuf, count, datatype,sendtmpbuf, count, datatype);
+ }
+
+ if(rank != root) {
+ // Send buffer to root
+ Request::send(sendtmpbuf, count, datatype, root, system_tag, comm);
+ } else {
+ datatype->extent(&lb, &dataext);
+ // Local copy from root
+ if (sendtmpbuf != nullptr && recvbuf != nullptr)
+ Datatype::copy(sendtmpbuf, count, datatype, recvbuf, count, datatype);
+ // Receive buffers from senders
+ MPI_Request *requests = xbt_new(MPI_Request, size - 1);
+ void **tmpbufs = xbt_new(void *, size - 1);
+ int index = 0;
+ for (int src = 0; src < size; src++) {
+ if (src != root) {
+ if (!smpi_process_get_replaying())
+ tmpbufs[index] = xbt_malloc(count * dataext);
+ else
+ tmpbufs[index] = smpi_get_tmp_sendbuffer(count * dataext);
+ requests[index] =
+ Request::irecv_init(tmpbufs[index], count, datatype, src, system_tag, comm);
+ index++;
+ }
+ }
+ // Wait for completion of irecv's.
+ Request::startall(size - 1, requests);
+ for (int src = 0; src < size - 1; src++) {
+ index = Request::waitany(size - 1, requests, MPI_STATUS_IGNORE);
+ XBT_DEBUG("finished waiting any request with index %d", index);
+ if(index == MPI_UNDEFINED) {
+ break;
+ }else{
+ Request::unref(&requests[index]);
+ }
+ if(op) /* op can be MPI_OP_NULL that does nothing */
+ if(op!=MPI_OP_NULL) op->apply( tmpbufs[index], recvbuf, &count, datatype);
+ }
+ for(index = 0; index < size - 1; index++) {
+ smpi_free_tmp_buffer(tmpbufs[index]);
+ }
+ xbt_free(tmpbufs);
+ xbt_free(requests);
+
+ }
+ if( sendbuf == MPI_IN_PLACE ) {
+ smpi_free_tmp_buffer(sendtmpbuf);
+ }
+ return MPI_SUCCESS;
+}
+
+int Coll_allreduce_default::allreduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
+{
+ int ret;
+ ret = Colls::reduce(sendbuf, recvbuf, count, datatype, op, 0, comm);
+ if(ret==MPI_SUCCESS)
+ ret = Colls::bcast(recvbuf, count, datatype, 0, comm);
+ return ret;
+}
+
+int Coll_alltoall_default::alltoall( void *sbuf, int scount, MPI_Datatype sdtype, void* rbuf, int rcount, MPI_Datatype rdtype, MPI_Comm comm)
+{
+ return Coll_alltoall_ompi::alltoall(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
+}
+
+
+
+int Coll_alltoallv_default::alltoallv(void *sendbuf, int *sendcounts, int *senddisps, MPI_Datatype sendtype,
+ void *recvbuf, int *recvcounts, int *recvdisps, MPI_Datatype recvtype, MPI_Comm comm)
+{
+ int system_tag = 889;
+ int i;
+ int count;
+ MPI_Aint lb = 0;
+ MPI_Aint sendext = 0;
+ MPI_Aint recvext = 0;
+ MPI_Request *requests;
+
+ /* Initialize. */
+ int rank = comm->rank();
+ int size = comm->size();
+ XBT_DEBUG("<%d> algorithm basic_alltoallv() called.", rank);
+ sendtype->extent(&lb, &sendext);
+ recvtype->extent(&lb, &recvext);
+ /* Local copy from self */
+ int err = Datatype::copy(static_cast<char *>(sendbuf) + senddisps[rank] * sendext, sendcounts[rank], sendtype,
+ static_cast<char *>(recvbuf) + recvdisps[rank] * recvext, recvcounts[rank], recvtype);
+ if (err == MPI_SUCCESS && size > 1) {
+ /* Initiate all send/recv to/from others. */
+ requests = xbt_new(MPI_Request, 2 * (size - 1));
+ count = 0;
+ /* Create all receives that will be posted first */
+ for (i = 0; i < size; ++i) {
+ if (i != rank && recvcounts[i] != 0) {
+ requests[count] = Request::irecv_init(static_cast<char *>(recvbuf) + recvdisps[i] * recvext,
+ recvcounts[i], recvtype, i, system_tag, comm);
+ count++;
+ }else{
+ XBT_DEBUG("<%d> skip request creation [src = %d, recvcounts[src] = %d]", rank, i, recvcounts[i]);
+ }
+ }
+ /* Now create all sends */
+ for (i = 0; i < size; ++i) {
+ if (i != rank && sendcounts[i] != 0) {
+ requests[count] = Request::isend_init(static_cast<char *>(sendbuf) + senddisps[i] * sendext,
+ sendcounts[i], sendtype, i, system_tag, comm);
+ count++;
+ }else{
+ XBT_DEBUG("<%d> skip request creation [dst = %d, sendcounts[dst] = %d]", rank, i, sendcounts[i]);
+ }
+ }
+ /* Wait for them all. */
+ Request::startall(count, requests);
+ XBT_DEBUG("<%d> wait for %d requests", rank, count);
+ Request::waitall(count, requests, MPI_STATUS_IGNORE);
+ for(i = 0; i < count; i++) {
+ if(requests[i]!=MPI_REQUEST_NULL)
+ Request::unref(&requests[i]);
+ }
+ xbt_free(requests);
+ }
+ return err;
+}
+
+}
+}
+
int count,
MPI_Datatype datatype,
MPI_Op op, MPI_Comm comm) ={
- smpi_coll_tuned_allreduce_rdb,
- smpi_coll_tuned_allreduce_rab1,
- smpi_coll_tuned_allreduce_redbcast,
- smpi_coll_tuned_allreduce_mvapich2_two_level,
- smpi_coll_tuned_allreduce_smp_binomial,
- smpi_coll_tuned_allreduce_mvapich2_two_level,
- smpi_coll_tuned_allreduce_ompi_ring_segmented,
- smpi_coll_tuned_allreduce_ompi_ring_segmented
+ Coll_allreduce_rdb::allreduce,
+ Coll_allreduce_rab1::allreduce,
+ Coll_allreduce_redbcast::allreduce,
+ Coll_allreduce_mvapich2_two_level::allreduce,
+ Coll_allreduce_smp_binomial::allreduce,
+ Coll_allreduce_mvapich2_two_level::allreduce,
+ Coll_allreduce_ompi_ring_segmented::allreduce,
+ Coll_allreduce_ompi_ring_segmented::allreduce
};
intel_tuning_table_element intel_allreduce_table[] =
void* rbuf, int rcount,
MPI_Datatype rdtype,
MPI_Comm comm) ={
- smpi_coll_tuned_alltoall_bruck,
- smpi_coll_tuned_alltoall_mvapich2_scatter_dest,
- smpi_coll_tuned_alltoall_pair,
- smpi_coll_tuned_alltoall_mvapich2//Plum is proprietary ? (and super efficient)
+ Coll_alltoall_bruck::alltoall,
+ Coll_alltoall_mvapich2_scatter_dest::alltoall,
+ Coll_alltoall_pair::alltoall,
+ Coll_alltoall_mvapich2::alltoall//Plum is proprietary ? (and super efficient)
};
/*I_MPI_ADJUST_BARRIER
*/
static int intel_barrier_gather_scatter(MPI_Comm comm){
//our default barrier performs a antibcast/bcast
- smpi_mpi_barrier(comm);
+ Coll_barrier_default::barrier(comm);
return MPI_SUCCESS;
}
int (*intel_barrier_functions_table[])(MPI_Comm comm) ={
- smpi_coll_tuned_barrier_ompi_basic_linear,
- smpi_coll_tuned_barrier_ompi_recursivedoubling,
- smpi_coll_tuned_barrier_ompi_basic_linear,
- smpi_coll_tuned_barrier_ompi_recursivedoubling,
+ Coll_barrier_ompi_basic_linear::barrier,
+ Coll_barrier_ompi_recursivedoubling::barrier,
+ Coll_barrier_ompi_basic_linear::barrier,
+ Coll_barrier_ompi_recursivedoubling::barrier,
intel_barrier_gather_scatter,
intel_barrier_gather_scatter
};
int (*intel_bcast_functions_table[])(void *buff, int count,
MPI_Datatype datatype, int root,
MPI_Comm comm) ={
- smpi_coll_tuned_bcast_binomial_tree,
- //smpi_coll_tuned_bcast_scatter_rdb_allgather,
- smpi_coll_tuned_bcast_NTSL,
- smpi_coll_tuned_bcast_NTSL,
- smpi_coll_tuned_bcast_SMP_binomial,
- //smpi_coll_tuned_bcast_scatter_rdb_allgather,
- smpi_coll_tuned_bcast_NTSL,
- smpi_coll_tuned_bcast_SMP_linear,
- smpi_coll_tuned_bcast_mvapich2,//we don't know shumilin's algo'
+ Coll_bcast_binomial_tree::bcast,
+ //Coll_bcast_scatter_rdb_allgather::bcast,
+ Coll_bcast_NTSL::bcast,
+ Coll_bcast_NTSL::bcast,
+ Coll_bcast_SMP_binomial::bcast,
+ //Coll_bcast_scatter_rdb_allgather::bcast,
+ Coll_bcast_NTSL::bcast,
+ Coll_bcast_SMP_linear::bcast,
+ Coll_bcast_mvapich2::bcast,//we don't know shumilin's algo'
};
intel_tuning_table_element intel_bcast_table[] =
int count, MPI_Datatype datatype,
MPI_Op op, int root,
MPI_Comm comm) ={
- smpi_coll_tuned_reduce_mvapich2,
- smpi_coll_tuned_reduce_binomial,
- smpi_coll_tuned_reduce_mvapich2,
- smpi_coll_tuned_reduce_mvapich2_two_level,
- smpi_coll_tuned_reduce_rab,
- smpi_coll_tuned_reduce_rab
+ Coll_reduce_mvapich2::reduce,
+ Coll_reduce_binomial::reduce,
+ Coll_reduce_mvapich2::reduce,
+ Coll_reduce_mvapich2_two_level::reduce,
+ Coll_reduce_rab::reduce,
+ Coll_reduce_rab::reduce
};
intel_tuning_table_element intel_reduce_table[] =
MPI_Op op,
MPI_Comm comm)
{
- smpi_mpi_reduce_scatter(sbuf, rbuf, rcounts,dtype, op,comm);
+ Coll_reduce_scatter_default::reduce_scatter(sbuf, rbuf, rcounts,dtype, op,comm);
return MPI_SUCCESS;
}
MPI_Comm comm)
{
if(op==MPI_OP_NULL || op->is_commutative())
- return smpi_coll_tuned_reduce_scatter_ompi_basic_recursivehalving(sbuf, rbuf, rcounts,dtype, op,comm);
+ return Coll_reduce_scatter_ompi_basic_recursivehalving::reduce_scatter(sbuf, rbuf, rcounts,dtype, op,comm);
else
- return smpi_coll_tuned_reduce_scatter_mvapich2(sbuf, rbuf, rcounts,dtype, op,comm);
+ return Coll_reduce_scatter_mvapich2::reduce_scatter(sbuf, rbuf, rcounts,dtype, op,comm);
}
int (*intel_reduce_scatter_functions_table[])( void *sbuf, void *rbuf,
MPI_Comm comm
) ={
intel_reduce_scatter_recursivehalving,
- smpi_coll_tuned_reduce_scatter_mpich_pair,
- smpi_coll_tuned_reduce_scatter_mpich_rdb,
+ Coll_reduce_scatter_mpich_pair::reduce_scatter,
+ Coll_reduce_scatter_mpich_rdb::reduce_scatter,
intel_reduce_scatter_reduce_scatterv,
intel_reduce_scatter_reduce_scatterv
};
MPI_Datatype rdtype,
MPI_Comm comm
) ={
- smpi_coll_tuned_allgather_rdb,
- smpi_coll_tuned_allgather_bruck,
- smpi_coll_tuned_allgather_ring,
- smpi_coll_tuned_allgather_GB
+ Coll_allgather_rdb::allgather,
+ Coll_allgather_bruck::allgather,
+ Coll_allgather_ring::allgather,
+ Coll_allgather_GB::allgather
};
intel_tuning_table_element intel_allgather_table[] =
MPI_Datatype rdtype,
MPI_Comm comm
) ={
- smpi_coll_tuned_allgatherv_mpich_rdb,
- smpi_coll_tuned_allgatherv_ompi_bruck,
- smpi_coll_tuned_allgatherv_ring,
- smpi_coll_tuned_allgatherv_GB
+ Coll_allgatherv_mpich_rdb::allgatherv,
+ Coll_allgatherv_ompi_bruck::allgatherv,
+ Coll_allgatherv_ring::allgatherv,
+ Coll_allgatherv_GB::allgatherv
};
intel_tuning_table_element intel_allgatherv_table[] =
int root,
MPI_Comm comm
) ={
- smpi_coll_tuned_gather_ompi_binomial,
- smpi_coll_tuned_gather_ompi_binomial,
- smpi_coll_tuned_gather_mvapich2
+ Coll_gather_ompi_binomial::gather,
+ Coll_gather_ompi_binomial::gather,
+ Coll_gather_mvapich2::gather
};
intel_tuning_table_element intel_gather_table[] =
MPI_Datatype rdtype,
int root, MPI_Comm comm
) ={
- smpi_coll_tuned_scatter_ompi_binomial,
- smpi_coll_tuned_scatter_ompi_binomial,
- smpi_coll_tuned_scatter_mvapich2
+ Coll_scatter_ompi_binomial::scatter,
+ Coll_scatter_ompi_binomial::scatter,
+ Coll_scatter_mvapich2::scatter
};
intel_tuning_table_element intel_scatter_table[] =
MPI_Datatype rdtype,
MPI_Comm comm
) ={
- smpi_coll_tuned_alltoallv_ompi_basic_linear,
- smpi_coll_tuned_alltoallv_bruck
+ Coll_alltoallv_ompi_basic_linear::alltoallv,
+ Coll_alltoallv_bruck::alltoallv
};
intel_tuning_table_element intel_alltoallv_table[] =
size_t block_dsize = 1;
#define IMPI_COLL_SELECT(cat, ret, args, args2)\
-ret smpi_coll_tuned_ ## cat ## _impi (COLL_UNPAREN args)\
+ret Coll_ ## cat ## _impi:: cat (COLL_UNPAREN args)\
{\
int comm_size = comm->size();\
int i =0;\
args2);\
}
+namespace simgrid{
+namespace smpi{
+
COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLGATHERV_SIG, (send_buff, send_count, send_type, recv_buff, recv_count, recv_disps, recv_type, comm));
COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLREDUCE_SIG, (sbuf, rbuf, rcount, dtype, op, comm));
COLL_APPLY(IMPI_COLL_SELECT, COLL_GATHER_SIG, (send_buff, send_count, send_type, recv_buff, recv_count, recv_type, root, comm));
COLL_APPLY(IMPI_COLL_SELECT, COLL_SCATTER_SIG ,(sendbuf, sendcount, sendtype,recvbuf, recvcount, recvtype,root, comm));
COLL_APPLY(IMPI_COLL_SELECT, COLL_BARRIER_SIG,(comm));
+}
+}
End Algorithm: MPI_Allreduce
*/
-int smpi_coll_tuned_allreduce_mpich(void *sbuf, void *rbuf, int count,
+int Coll_allreduce_mpich::allreduce(void *sbuf, void *rbuf, int count,
MPI_Datatype dtype, MPI_Op op, MPI_Comm comm)
{
size_t dsize, block_dsize;
if (block_dsize > large_message && count >= pof2 && (op==MPI_OP_NULL || op->is_commutative())) {
//for long messages
- return (smpi_coll_tuned_allreduce_rab_rdb (sbuf, rbuf,
+ return (Coll_allreduce_rab_rdb::allreduce (sbuf, rbuf,
count, dtype,
op, comm));
}else {
//for short ones and count < pof2
- return (smpi_coll_tuned_allreduce_rdb (sbuf, rbuf,
+ return (Coll_allreduce_rdb::allreduce (sbuf, rbuf,
count, dtype,
op, comm));
}
End Algorithm: MPI_Alltoall
*/
-int smpi_coll_tuned_alltoall_mpich( void *sbuf, int scount,
+int Coll_alltoall_mpich::alltoall( void *sbuf, int scount,
MPI_Datatype sdtype,
void* rbuf, int rcount,
MPI_Datatype rdtype,
block_dsize = dsize * scount;
if ((block_dsize < short_size) && (communicator_size >= 8)) {
- return smpi_coll_tuned_alltoall_bruck(sbuf, scount, sdtype,
+ return Coll_alltoall_bruck::alltoall(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm);
} else if (block_dsize < medium_size) {
- return smpi_coll_tuned_alltoall_basic_linear(sbuf, scount, sdtype,
+ return Coll_alltoall_basic_linear::alltoall(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm);
}else if (communicator_size%2){
- return smpi_coll_tuned_alltoall_ring(sbuf, scount, sdtype,
+ return Coll_alltoall_ring::alltoall(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm);
}
- return smpi_coll_tuned_alltoall_ring (sbuf, scount, sdtype,
+ return Coll_alltoall_ring::alltoall (sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm);
}
-int smpi_coll_tuned_alltoallv_mpich(void *sbuf, int *scounts, int *sdisps,
+int Coll_alltoallv_mpich::alltoallv(void *sbuf, int *scounts, int *sdisps,
MPI_Datatype sdtype,
void *rbuf, int *rcounts, int *rdisps,
MPI_Datatype rdtype,
)
{
/* For starters, just keep the original algorithm. */
- return smpi_coll_tuned_alltoallv_bruck(sbuf, scounts, sdisps, sdtype,
+ return Coll_alltoallv_bruck::alltoallv(sbuf, scounts, sdisps, sdtype,
rbuf, rcounts, rdisps,rdtype,
comm);
}
-int smpi_coll_tuned_barrier_mpich(MPI_Comm comm)
+int Coll_barrier_mpich::barrier(MPI_Comm comm)
{
- return smpi_coll_tuned_barrier_ompi_bruck(comm);
+ return Coll_barrier_ompi_bruck::barrier(comm);
}
/* This is the default implementation of broadcast. The algorithm is:
*/
-int smpi_coll_tuned_bcast_mpich(void *buff, int count,
+int Coll_bcast_mpich::bcast(void *buff, int count,
MPI_Datatype datatype, int root,
MPI_Comm comm
)
single-element broadcasts */
if ((message_size < small_message_size) || (communicator_size <= 8)) {
/* Binomial without segmentation */
- return smpi_coll_tuned_bcast_binomial_tree (buff, count, datatype,
+ return Coll_bcast_binomial_tree::bcast (buff, count, datatype,
root, comm);
} else if (message_size < intermediate_message_size && !(communicator_size%2)) {
// SplittedBinary with 1KB segments
- return smpi_coll_tuned_bcast_scatter_rdb_allgather(buff, count, datatype,
+ return Coll_bcast_scatter_rdb_allgather::bcast(buff, count, datatype,
root, comm);
}
//Handle large message sizes
- return smpi_coll_tuned_bcast_scatter_LR_allgather (buff, count, datatype,
+ return Coll_bcast_scatter_LR_allgather::bcast (buff, count, datatype,
root, comm);
}
*/
-int smpi_coll_tuned_reduce_mpich( void *sendbuf, void *recvbuf,
+int Coll_reduce_mpich::reduce( void *sendbuf, void *recvbuf,
int count, MPI_Datatype datatype,
MPI_Op op, int root,
MPI_Comm comm
if ((count < pof2) || (message_size < 2048) || (op!=MPI_OP_NULL && !op->is_commutative())) {
- return smpi_coll_tuned_reduce_binomial (sendbuf, recvbuf, count, datatype, op, root, comm);
+ return Coll_reduce_binomial::reduce (sendbuf, recvbuf, count, datatype, op, root, comm);
}
- return smpi_coll_tuned_reduce_scatter_gather(sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
+ return Coll_reduce_scatter_gather::reduce(sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
segsize, max_requests*/);
}
*/
-int smpi_coll_tuned_reduce_scatter_mpich( void *sbuf, void *rbuf,
+int Coll_reduce_scatter_mpich::reduce_scatter( void *sbuf, void *rbuf,
int *rcounts,
MPI_Datatype dtype,
MPI_Op op,
if(sbuf==rbuf)sbuf=MPI_IN_PLACE; //restore MPI_IN_PLACE as these algorithms handle it
- XBT_DEBUG("smpi_coll_tuned_reduce_scatter_mpich");
+ XBT_DEBUG("Coll_reduce_scatter_mpich::reduce");
comm_size = comm->size();
// We need data size for decision function
}
if( (op==MPI_OP_NULL || op->is_commutative()) && total_message_size > 524288) {
- return smpi_coll_tuned_reduce_scatter_mpich_pair (sbuf, rbuf, rcounts,
+ return Coll_reduce_scatter_mpich_pair::reduce_scatter (sbuf, rbuf, rcounts,
dtype, op,
comm);
}else if ((op!=MPI_OP_NULL && !op->is_commutative())) {
if (pof2 == comm_size && is_block_regular) {
/* noncommutative, pof2 size, and block regular */
- return smpi_coll_tuned_reduce_scatter_mpich_noncomm(sbuf, rbuf, rcounts, dtype, op, comm);
+ return Coll_reduce_scatter_mpich_noncomm::reduce_scatter(sbuf, rbuf, rcounts, dtype, op, comm);
}
- return smpi_coll_tuned_reduce_scatter_mpich_rdb(sbuf, rbuf, rcounts, dtype, op, comm);
+ return Coll_reduce_scatter_mpich_rdb::reduce_scatter(sbuf, rbuf, rcounts, dtype, op, comm);
}else{
- return smpi_coll_tuned_reduce_scatter_mpich_rdb(sbuf, rbuf, rcounts, dtype, op, comm);
+ return Coll_reduce_scatter_mpich_rdb::reduce_scatter(sbuf, rbuf, rcounts, dtype, op, comm);
}
}
End Algorithm: MPI_Allgather
*/
-int smpi_coll_tuned_allgather_mpich(void *sbuf, int scount,
+int Coll_allgather_mpich::allgather(void *sbuf, int scount,
MPI_Datatype sdtype,
void* rbuf, int rcount,
MPI_Datatype rdtype,
- for everything else use ring.
*/
if ((pow2_size == communicator_size) && (total_dsize < 524288)) {
- return smpi_coll_tuned_allgather_rdb(sbuf, scount, sdtype,
+ return Coll_allgather_rdb::allgather(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm);
} else if (total_dsize <= 81920) {
- return smpi_coll_tuned_allgather_bruck(sbuf, scount, sdtype,
+ return Coll_allgather_bruck::allgather(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm);
}
- return smpi_coll_tuned_allgather_ring(sbuf, scount, sdtype,
+ return Coll_allgather_ring::allgather(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm);
}
End Algorithm: MPI_Allgatherv
*/
-int smpi_coll_tuned_allgatherv_mpich(void *sbuf, int scount,
+int Coll_allgatherv_mpich::allgatherv(void *sbuf, int scount,
MPI_Datatype sdtype,
void* rbuf, int *rcounts,
int *rdispls,
for (pow2_size = 1; pow2_size < communicator_size; pow2_size <<=1);
if ((pow2_size == communicator_size) && (total_dsize < 524288)) {
- return smpi_coll_tuned_allgatherv_mpich_rdb(sbuf, scount, sdtype,
+ return Coll_allgatherv_mpich_rdb::allgatherv(sbuf, scount, sdtype,
rbuf, rcounts, rdispls, rdtype,
comm);
} else if (total_dsize <= 81920) {
- return smpi_coll_tuned_allgatherv_ompi_bruck(sbuf, scount, sdtype,
+ return Coll_allgatherv_ompi_bruck::allgatherv(sbuf, scount, sdtype,
rbuf, rcounts, rdispls, rdtype,
comm);
}
- return smpi_coll_tuned_allgatherv_mpich_ring(sbuf, scount, sdtype,
+ return Coll_allgatherv_mpich_ring::allgatherv(sbuf, scount, sdtype,
rbuf, rcounts, rdispls, rdtype,
comm);
}
End Algorithm: MPI_Gather
*/
+namespace simgrid{
+namespace smpi{
-int smpi_coll_tuned_gather_mpich(void *sbuf, int scount,
+int Coll_gather_mpich::gather(void *sbuf, int scount,
MPI_Datatype sdtype,
void* rbuf, int rcount,
MPI_Datatype rdtype,
MPI_Comm comm
)
{
- return smpi_coll_tuned_gather_ompi_binomial (sbuf, scount, sdtype,
+ return Coll_gather_ompi_binomial::gather (sbuf, scount, sdtype,
rbuf, rcount, rdtype,
root, comm);
}
+}
+}
+
/* This is the default implementation of scatter. The algorithm is:
Algorithm: MPI_Scatter
*/
-int smpi_coll_tuned_scatter_mpich(void *sbuf, int scount,
+int Coll_scatter_mpich::scatter(void *sbuf, int scount,
MPI_Datatype sdtype,
void* rbuf, int rcount,
MPI_Datatype rdtype,
scount=rcount;
sdtype=rdtype;
}
- int ret= smpi_coll_tuned_scatter_ompi_binomial (sbuf, scount, sdtype,
+ int ret= Coll_scatter_ompi_binomial::scatter (sbuf, scount, sdtype,
rbuf, rcount, rdtype,
root, comm);
if(comm->rank()!=root){
#include "smpi_mvapich2_selector_stampede.h"
+namespace simgrid{
+namespace smpi{
-int smpi_coll_tuned_alltoall_mvapich2( void *sendbuf, int sendcount,
+
+int Coll_alltoall_mvapich2::alltoall( void *sendbuf, int sendcount,
MPI_Datatype sendtype,
void* recvbuf, int recvcount,
MPI_Datatype recvtype,
return (mpi_errno);
}
-int smpi_coll_tuned_allgather_mvapich2(void *sendbuf, int sendcount, MPI_Datatype sendtype,
+int Coll_allgather_mvapich2::allgather(void *sendbuf, int sendcount, MPI_Datatype sendtype,
void *recvbuf, int recvcount, MPI_Datatype recvtype,
MPI_Comm comm)
{
}
/* Set inter-leader pt */
- MV2_Allgather_function =
+ MV2_Allgatherction =
mv2_allgather_thresholds_table[conf_index][range].inter_leader[range_threshold].
- MV2_pt_Allgather_function;
+ MV2_pt_Allgatherction;
is_two_level = mv2_allgather_thresholds_table[conf_index][range].two_level[range_threshold];
recvbuf, recvcount, recvtype,
comm);
}else{
- mpi_errno = smpi_coll_tuned_allgather_mpich(sendbuf, sendcount, sendtype,
+ mpi_errno = Coll_allgather_mpich::allgather(sendbuf, sendcount, sendtype,
recvbuf, recvcount, recvtype,
comm);
}
recvbuf, recvcount, recvtype,
comm);
}
- } else if(MV2_Allgather_function == &MPIR_Allgather_Bruck_MV2
- || MV2_Allgather_function == &MPIR_Allgather_RD_MV2
- || MV2_Allgather_function == &MPIR_Allgather_Ring_MV2) {
- mpi_errno = MV2_Allgather_function(sendbuf, sendcount, sendtype,
+ } else if(MV2_Allgatherction == &MPIR_Allgather_Bruck_MV2
+ || MV2_Allgatherction == &MPIR_Allgather_RD_MV2
+ || MV2_Allgatherction == &MPIR_Allgather_Ring_MV2) {
+ mpi_errno = MV2_Allgatherction(sendbuf, sendcount, sendtype,
recvbuf, recvcount, recvtype,
comm);
}else{
return mpi_errno;
}
-
-int smpi_coll_tuned_gather_mvapich2(void *sendbuf,
+int Coll_gather_mvapich2::gather(void *sendbuf,
int sendcnt,
MPI_Datatype sendtype,
void *recvbuf,
} else {
// Indeed, direct (non SMP-aware)gather is MPICH one
- mpi_errno = smpi_coll_tuned_gather_mpich(sendbuf, sendcnt, sendtype,
+ mpi_errno = Coll_gather_mpich::gather(sendbuf, sendcnt, sendtype,
recvbuf, recvcnt, recvtype,
root, comm);
}
return mpi_errno;
}
-
-int smpi_coll_tuned_allgatherv_mvapich2(void *sendbuf, int sendcount, MPI_Datatype sendtype,
+int Coll_allgatherv_mvapich2::allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype,
void *recvbuf, int *recvcounts, int *displs,
MPI_Datatype recvtype, MPI_Comm comm )
{
-int smpi_coll_tuned_allreduce_mvapich2(void *sendbuf,
+int Coll_allreduce_mvapich2::allreduce(void *sendbuf,
void *recvbuf,
int count,
MPI_Datatype datatype,
if(mv2_allreduce_thresholds_table[range].mcast_enabled != 1){
while ((range_threshold < (mv2_allreduce_thresholds_table[range].size_inter_table - 1))
&& ((mv2_allreduce_thresholds_table[range].
- inter_leader[range_threshold].MV2_pt_Allreduce_function
+ inter_leader[range_threshold].MV2_pt_Allreducection
== &MPIR_Allreduce_mcst_reduce_redscat_gather_MV2) ||
(mv2_allreduce_thresholds_table[range].
- inter_leader[range_threshold].MV2_pt_Allreduce_function
+ inter_leader[range_threshold].MV2_pt_Allreducection
== &MPIR_Allreduce_mcst_reduce_two_level_helper_MV2)
)) {
range_threshold++;
range_threshold_intra++;
}
- MV2_Allreduce_function = mv2_allreduce_thresholds_table[range].inter_leader[range_threshold]
- .MV2_pt_Allreduce_function;
+ MV2_Allreducection = mv2_allreduce_thresholds_table[range].inter_leader[range_threshold]
+ .MV2_pt_Allreducection;
MV2_Allreduce_intra_function = mv2_allreduce_thresholds_table[range].intra_node[range_threshold_intra]
- .MV2_pt_Allreduce_function;
+ .MV2_pt_Allreducection;
/* check if mcast is ready, otherwise replace mcast with other algorithm */
- if((MV2_Allreduce_function == &MPIR_Allreduce_mcst_reduce_redscat_gather_MV2)||
- (MV2_Allreduce_function == &MPIR_Allreduce_mcst_reduce_two_level_helper_MV2)){
+ if((MV2_Allreducection == &MPIR_Allreduce_mcst_reduce_redscat_gather_MV2)||
+ (MV2_Allreducection == &MPIR_Allreduce_mcst_reduce_two_level_helper_MV2)){
{
- MV2_Allreduce_function = &MPIR_Allreduce_pt2pt_rd_MV2;
+ MV2_Allreducection = &MPIR_Allreduce_pt2pt_rd_MV2;
}
if(is_two_level != 1) {
- MV2_Allreduce_function = &MPIR_Allreduce_pt2pt_rd_MV2;
+ MV2_Allreducection = &MPIR_Allreduce_pt2pt_rd_MV2;
}
}
datatype, op, comm);
}
} else {
- mpi_errno = MV2_Allreduce_function(sendbuf, recvbuf, count,
+ mpi_errno = MV2_Allreducection(sendbuf, recvbuf, count,
datatype, op, comm);
}
}
}
-int smpi_coll_tuned_alltoallv_mvapich2(void *sbuf, int *scounts, int *sdisps,
+int Coll_alltoallv_mvapich2::alltoallv(void *sbuf, int *scounts, int *sdisps,
MPI_Datatype sdtype,
void *rbuf, int *rcounts, int *rdisps,
MPI_Datatype rdtype,
{
if (sbuf == MPI_IN_PLACE) {
- return smpi_coll_tuned_alltoallv_ompi_basic_linear(sbuf, scounts, sdisps, sdtype,
+ return Coll_alltoallv_ompi_basic_linear::alltoallv(sbuf, scounts, sdisps, sdtype,
rbuf, rcounts, rdisps,rdtype,
comm);
} else /* For starters, just keep the original algorithm. */
- return smpi_coll_tuned_alltoallv_ring(sbuf, scounts, sdisps, sdtype,
+ return Coll_alltoallv_ring::alltoallv(sbuf, scounts, sdisps, sdtype,
rbuf, rcounts, rdisps,rdtype,
comm);
}
-int smpi_coll_tuned_barrier_mvapich2(MPI_Comm comm)
+int Coll_barrier_mvapich2::barrier(MPI_Comm comm)
{
- return smpi_coll_tuned_barrier_mvapich2_pair(comm);
+ return Coll_barrier_mvapich2_pair::barrier(comm);
}
-int smpi_coll_tuned_bcast_mvapich2(void *buffer,
+int Coll_bcast_mvapich2::bcast(void *buffer,
int count,
MPI_Datatype datatype,
int root, MPI_Comm comm)
-int smpi_coll_tuned_reduce_mvapich2( void *sendbuf,
+int Coll_reduce_mvapich2::reduce( void *sendbuf,
void *recvbuf,
int count,
MPI_Datatype datatype,
}
-int smpi_coll_tuned_reduce_scatter_mvapich2(void *sendbuf, void *recvbuf, int *recvcnts,
+int Coll_reduce_scatter_mvapich2::reduce_scatter(void *sendbuf, void *recvbuf, int *recvcnts,
MPI_Datatype datatype, MPI_Op op,
MPI_Comm comm)
{
recvcnts, datatype,
op, comm);
}
- mpi_errno = smpi_coll_tuned_reduce_scatter_mpich_rdb(sendbuf, recvbuf,
+ mpi_errno = Coll_reduce_scatter_mpich_rdb::reduce_scatter(sendbuf, recvbuf,
recvcnts, datatype,
op, comm);
}
-int smpi_coll_tuned_scatter_mvapich2(void *sendbuf,
+int Coll_scatter_mvapich2::scatter(void *sendbuf,
int sendcnt,
MPI_Datatype sendtype,
void *recvbuf,
return (mpi_errno);
}
+}
+}
void smpi_coll_cleanup_mvapich2(void){
int i=0;
if(mv2_alltoall_thresholds_table)
#define MV2_MAX_NB_THRESHOLDS 32
+using namespace simgrid::smpi;
typedef struct {
int min;
mv2_alltoall_tuning_table **mv2_alltoall_thresholds_table = NULL;
-#define MPIR_Alltoall_bruck_MV2 smpi_coll_tuned_alltoall_bruck
-#define MPIR_Alltoall_RD_MV2 smpi_coll_tuned_alltoall_rdb
-#define MPIR_Alltoall_Scatter_dest_MV2 smpi_coll_tuned_alltoall_mvapich2_scatter_dest
-#define MPIR_Alltoall_pairwise_MV2 smpi_coll_tuned_alltoall_pair
-#define MPIR_Alltoall_inplace_MV2 smpi_coll_tuned_alltoall_ring
+#define MPIR_Alltoall_bruck_MV2 Coll_alltoall_bruck::alltoall
+#define MPIR_Alltoall_RD_MV2 Coll_alltoall_rdb::alltoall
+#define MPIR_Alltoall_Scatter_dest_MV2 Coll_alltoall_mvapich2_scatter_dest::alltoall
+#define MPIR_Alltoall_pairwise_MV2 Coll_alltoall_pair::alltoall
+#define MPIR_Alltoall_inplace_MV2 Coll_alltoall_ring::alltoall
static void init_mv2_alltoall_tables_stampede(){
typedef struct {
int min;
int max;
- int (*MV2_pt_Allgather_function)(void *sendbuf,
+ int (*MV2_pt_Allgatherction)(void *sendbuf,
int sendcount,
MPI_Datatype sendtype,
void *recvbuf,
mv2_allgather_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
} mv2_allgather_tuning_table;
-int (*MV2_Allgather_function)(void *sendbuf,
+int (*MV2_Allgatherction)(void *sendbuf,
int sendcount,
MPI_Datatype sendtype,
void *recvbuf,
return 0;
}
-#define MPIR_Allgather_Bruck_MV2 smpi_coll_tuned_allgather_bruck
-#define MPIR_Allgather_RD_MV2 smpi_coll_tuned_allgather_rdb
-#define MPIR_Allgather_Ring_MV2 smpi_coll_tuned_allgather_ring
-#define MPIR_2lvl_Allgather_MV2 smpi_coll_tuned_allgather_mvapich2_smp
+#define MPIR_Allgather_Bruck_MV2 Coll_allgather_bruck::allgather
+#define MPIR_Allgather_RD_MV2 Coll_allgather_rdb::allgather
+#define MPIR_Allgather_Ring_MV2 Coll_allgather_ring::allgather
+#define MPIR_2lvl_Allgather_MV2 Coll_allgather_mvapich2_smp::allgather
static void init_mv2_allgather_tables_stampede(){
int i;
MV2_Gather_function_ptr MV2_Gather_intra_node_function = NULL;
-#define MPIR_Gather_MV2_Direct smpi_coll_tuned_gather_ompi_basic_linear
-#define MPIR_Gather_MV2_two_level_Direct smpi_coll_tuned_gather_mvapich2_two_level
-#define MPIR_Gather_intra smpi_coll_tuned_gather_mpich
+
+#define MPIR_Gather_MV2_Direct Coll_gather_ompi_basic_linear::gather
+#define MPIR_Gather_MV2_two_level_Direct Coll_gather_mvapich2_two_level::gather
+#define MPIR_Gather_intra Coll_gather_mpich::gather
static void init_mv2_gather_tables_stampede(){
int mv2_size_allgatherv_tuning_table = 0;
mv2_allgatherv_tuning_table *mv2_allgatherv_thresholds_table = NULL;
-#define MPIR_Allgatherv_Rec_Doubling_MV2 smpi_coll_tuned_allgatherv_mpich_rdb
-#define MPIR_Allgatherv_Bruck_MV2 smpi_coll_tuned_allgatherv_ompi_bruck
-#define MPIR_Allgatherv_Ring_MV2 smpi_coll_tuned_allgatherv_mpich_ring
+#define MPIR_Allgatherv_Rec_Doubling_MV2 Coll_allgatherv_mpich_rdb::allgatherv
+#define MPIR_Allgatherv_Bruck_MV2 Coll_allgatherv_ompi_bruck::allgatherv
+#define MPIR_Allgatherv_Ring_MV2 Coll_allgatherv_mpich_ring::allgatherv
static void init_mv2_allgatherv_tables_stampede(){
typedef struct {
int min;
int max;
- int (*MV2_pt_Allreduce_function)(void *sendbuf,
+ int (*MV2_pt_Allreducection)(void *sendbuf,
void *recvbuf,
int count,
MPI_Datatype datatype,
} mv2_allreduce_tuning_table;
-int (*MV2_Allreduce_function)(void *sendbuf,
+int (*MV2_Allreducection)(void *sendbuf,
void *recvbuf,
int count,
MPI_Datatype datatype,
MPI_Datatype datatype,
MPI_Op op, MPI_Comm comm)
{
- mpi_coll_reduce_fun(sendbuf,recvbuf,count,datatype,op,0,comm);
+ Colls::reduce(sendbuf,recvbuf,count,datatype,op,0,comm);
return MPI_SUCCESS;
}
MPI_Datatype datatype,
MPI_Op op, MPI_Comm comm)
{
- mpi_coll_reduce_fun(sendbuf,recvbuf,count,datatype,op,0,comm);
+ Colls::reduce(sendbuf,recvbuf,count,datatype,op,0,comm);
return MPI_SUCCESS;
}
-#define MPIR_Allreduce_pt2pt_rd_MV2 smpi_coll_tuned_allreduce_rdb
-#define MPIR_Allreduce_pt2pt_rs_MV2 smpi_coll_tuned_allreduce_mvapich2_rs
-#define MPIR_Allreduce_two_level_MV2 smpi_coll_tuned_allreduce_mvapich2_two_level
+#define MPIR_Allreduce_pt2pt_rd_MV2 Coll_allreduce_rdb::allreduce
+#define MPIR_Allreduce_pt2pt_rs_MV2 Coll_allreduce_mvapich2_rs::allreduce
+#define MPIR_Allreduce_two_level_MV2 Coll_allreduce_mvapich2_two_level::allreduce
static void init_mv2_allreduce_tables_stampede(){
#define INTRA_NODE_ROOT 0
-#define MPIR_Pipelined_Bcast_Zcpy_MV2 smpi_coll_tuned_bcast_mpich
-#define MPIR_Pipelined_Bcast_MV2 smpi_coll_tuned_bcast_mpich
-#define MPIR_Bcast_binomial_MV2 smpi_coll_tuned_bcast_binomial_tree
-#define MPIR_Bcast_scatter_ring_allgather_shm_MV2 smpi_coll_tuned_bcast_scatter_LR_allgather
-#define MPIR_Bcast_scatter_doubling_allgather_MV2 smpi_coll_tuned_bcast_scatter_rdb_allgather
-#define MPIR_Bcast_scatter_ring_allgather_MV2 smpi_coll_tuned_bcast_scatter_LR_allgather
-#define MPIR_Shmem_Bcast_MV2 smpi_coll_tuned_bcast_mpich
-#define MPIR_Bcast_tune_inter_node_helper_MV2 smpi_coll_tuned_bcast_mvapich2_inter_node
-#define MPIR_Bcast_inter_node_helper_MV2 smpi_coll_tuned_bcast_mvapich2_inter_node
-#define MPIR_Knomial_Bcast_intra_node_MV2 smpi_coll_tuned_bcast_mvapich2_knomial_intra_node
-#define MPIR_Bcast_intra_MV2 smpi_coll_tuned_bcast_mvapich2_intra_node
+#define MPIR_Pipelined_Bcast_Zcpy_MV2 Coll_bcast_mpich::bcast
+#define MPIR_Pipelined_Bcast_MV2 Coll_bcast_mpich::bcast
+#define MPIR_Bcast_binomial_MV2 Coll_bcast_binomial_tree::bcast
+#define MPIR_Bcast_scatter_ring_allgather_shm_MV2 Coll_bcast_scatter_LR_allgather::bcast
+#define MPIR_Bcast_scatter_doubling_allgather_MV2 Coll_bcast_scatter_rdb_allgather::bcast
+#define MPIR_Bcast_scatter_ring_allgather_MV2 Coll_bcast_scatter_LR_allgather::bcast
+#define MPIR_Shmem_Bcast_MV2 Coll_bcast_mpich::bcast
+#define MPIR_Bcast_tune_inter_node_helper_MV2 Coll_bcast_mvapich2_inter_node::bcast
+#define MPIR_Bcast_inter_node_helper_MV2 Coll_bcast_mvapich2_inter_node::bcast
+#define MPIR_Knomial_Bcast_intra_node_MV2 Coll_bcast_mvapich2_knomial_intra_node::bcast
+#define MPIR_Bcast_intra_MV2 Coll_bcast_mvapich2_intra_node::bcast
static void init_mv2_bcast_tables_stampede(){
//Stampede,
MPI_Comm comm_ptr)=NULL;
-#define MPIR_Reduce_inter_knomial_wrapper_MV2 smpi_coll_tuned_reduce_mvapich2_knomial
-#define MPIR_Reduce_intra_knomial_wrapper_MV2 smpi_coll_tuned_reduce_mvapich2_knomial
-#define MPIR_Reduce_binomial_MV2 smpi_coll_tuned_reduce_binomial
-#define MPIR_Reduce_redscat_gather_MV2 smpi_coll_tuned_reduce_scatter_gather
-#define MPIR_Reduce_shmem_MV2 smpi_coll_tuned_reduce_ompi_basic_linear
-#define MPIR_Reduce_two_level_helper_MV2 smpi_coll_tuned_reduce_mvapich2_two_level
+#define MPIR_Reduce_inter_knomial_wrapper_MV2 Coll_reduce_mvapich2_knomial::reduce
+#define MPIR_Reduce_intra_knomial_wrapper_MV2 Coll_reduce_mvapich2_knomial::reduce
+#define MPIR_Reduce_binomial_MV2 Coll_reduce_binomial::reduce
+#define MPIR_Reduce_redscat_gather_MV2 Coll_reduce_scatter_gather::reduce
+#define MPIR_Reduce_shmem_MV2 Coll_reduce_ompi_basic_linear::reduce
+#define MPIR_Reduce_two_level_helper_MV2 Coll_reduce_mvapich2_two_level::reduce
static void init_mv2_reduce_tables_stampede(){
MPI_Op op,
MPI_Comm comm)
{
- smpi_mpi_reduce_scatter(sendbuf,recvbuf,recvcnts,datatype,op,comm);
+ Coll_reduce_scatter_default::reduce_scatter(sendbuf,recvbuf,recvcnts,datatype,op,comm);
return MPI_SUCCESS;
}
-#define MPIR_Reduce_scatter_non_comm_MV2 smpi_coll_tuned_reduce_scatter_mpich_noncomm
-#define MPIR_Reduce_scatter_Rec_Halving_MV2 smpi_coll_tuned_reduce_scatter_ompi_basic_recursivehalving
-#define MPIR_Reduce_scatter_Pair_Wise_MV2 smpi_coll_tuned_reduce_scatter_mpich_pair
+#define MPIR_Reduce_scatter_non_comm_MV2 Coll_reduce_scatter_mpich_noncomm::reduce_scatter
+#define MPIR_Reduce_scatter_Rec_Halving_MV2 Coll_reduce_scatter_ompi_basic_recursivehalving::reduce_scatter
+#define MPIR_Reduce_scatter_Pair_Wise_MV2 Coll_reduce_scatter_mpich_pair::reduce_scatter
return 0;
}
-#define MPIR_Scatter_MV2_Binomial smpi_coll_tuned_scatter_ompi_binomial
-#define MPIR_Scatter_MV2_Direct smpi_coll_tuned_scatter_ompi_basic_linear
-#define MPIR_Scatter_MV2_two_level_Binomial smpi_coll_tuned_scatter_mvapich2_two_level_binomial
-#define MPIR_Scatter_MV2_two_level_Direct smpi_coll_tuned_scatter_mvapich2_two_level_direct
+#define MPIR_Scatter_MV2_Binomial Coll_scatter_ompi_binomial::scatter
+#define MPIR_Scatter_MV2_Direct Coll_scatter_ompi_basic_linear::scatter
+#define MPIR_Scatter_MV2_two_level_Binomial Coll_scatter_mvapich2_two_level_binomial::scatter
+#define MPIR_Scatter_MV2_two_level_Direct Coll_scatter_mvapich2_two_level_direct::scatter
#include "colls_private.h"
+namespace simgrid{
+namespace smpi{
-int smpi_coll_tuned_allreduce_ompi(void *sbuf, void *rbuf, int count,
+int Coll_allreduce_ompi::allreduce(void *sbuf, void *rbuf, int count,
MPI_Datatype dtype, MPI_Op op, MPI_Comm comm)
{
size_t dsize, block_dsize;
block_dsize = dsize * count;
if (block_dsize < intermediate_message) {
- return (smpi_coll_tuned_allreduce_rdb (sbuf, rbuf,
+ return (Coll_allreduce_rdb::allreduce (sbuf, rbuf,
count, dtype,
op, comm));
}
if ((comm_size * segment_size >= block_dsize)) {
//FIXME: ok, these are not the right algorithms, try to find closer ones
// lr is a good match for allreduce_ring (difference is mainly the use of sendrecv)
- return smpi_coll_tuned_allreduce_lr(sbuf, rbuf, count, dtype,
+ return Coll_allreduce_lr::allreduce(sbuf, rbuf, count, dtype,
op, comm);
} else {
- return (smpi_coll_tuned_allreduce_ompi_ring_segmented (sbuf, rbuf,
+ return (Coll_allreduce_ompi_ring_segmented::allreduce (sbuf, rbuf,
count, dtype,
op, comm
/*segment_size*/));
}
}
- return (smpi_coll_tuned_allreduce_redbcast(sbuf, rbuf, count,
+ return (Coll_allreduce_redbcast::allreduce(sbuf, rbuf, count,
dtype, op, comm));
}
-int smpi_coll_tuned_alltoall_ompi( void *sbuf, int scount,
+int Coll_alltoall_ompi::alltoall( void *sbuf, int scount,
MPI_Datatype sdtype,
void* rbuf, int rcount,
MPI_Datatype rdtype,
block_dsize = dsize * scount;
if ((block_dsize < 200) && (communicator_size > 12)) {
- return smpi_coll_tuned_alltoall_bruck(sbuf, scount, sdtype,
+ return Coll_alltoall_bruck::alltoall(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm);
} else if (block_dsize < 3000) {
- return smpi_coll_tuned_alltoall_basic_linear(sbuf, scount, sdtype,
+ return Coll_alltoall_basic_linear::alltoall(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm);
}
- return smpi_coll_tuned_alltoall_ring (sbuf, scount, sdtype,
+ return Coll_alltoall_ring::alltoall (sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm);
}
-int smpi_coll_tuned_alltoallv_ompi(void *sbuf, int *scounts, int *sdisps,
+int Coll_alltoallv_ompi::alltoallv(void *sbuf, int *scounts, int *sdisps,
MPI_Datatype sdtype,
void *rbuf, int *rcounts, int *rdisps,
MPI_Datatype rdtype,
)
{
/* For starters, just keep the original algorithm. */
- return smpi_coll_tuned_alltoallv_ompi_basic_linear(sbuf, scounts, sdisps, sdtype,
+ return Coll_alltoallv_ompi_basic_linear::alltoallv(sbuf, scounts, sdisps, sdtype,
rbuf, rcounts, rdisps,rdtype,
comm);
}
-int smpi_coll_tuned_barrier_ompi(MPI_Comm comm)
+int Coll_barrier_ompi::barrier(MPI_Comm comm)
{ int communicator_size = comm->size();
if( 2 == communicator_size )
- return smpi_coll_tuned_barrier_ompi_two_procs(comm);
+ return Coll_barrier_ompi_two_procs::barrier(comm);
/* * Basic optimisation. If we have a power of 2 number of nodes*/
/* * the use the recursive doubling algorithm, otherwise*/
/* * bruck is the one we want.*/
for( ; communicator_size > 0; communicator_size >>= 1 ) {
if( communicator_size & 0x1 ) {
if( has_one )
- return smpi_coll_tuned_barrier_ompi_bruck(comm);
+ return Coll_barrier_ompi_bruck::barrier(comm);
has_one = 1;
}
}
}
- return smpi_coll_tuned_barrier_ompi_recursivedoubling(comm);
+ return Coll_barrier_ompi_recursivedoubling::barrier(comm);
}
-int smpi_coll_tuned_bcast_ompi(void *buff, int count,
+int Coll_bcast_ompi::bcast(void *buff, int count,
MPI_Datatype datatype, int root,
MPI_Comm comm
)
single-element broadcasts */
if ((message_size < small_message_size) || (count <= 1)) {
/* Binomial without segmentation */
- return smpi_coll_tuned_bcast_binomial_tree (buff, count, datatype,
+ return Coll_bcast_binomial_tree::bcast (buff, count, datatype,
root, comm);
} else if (message_size < intermediate_message_size) {
// SplittedBinary with 1KB segments
- return smpi_coll_tuned_bcast_ompi_split_bintree(buff, count, datatype,
+ return Coll_bcast_ompi_split_bintree::bcast(buff, count, datatype,
root, comm);
}
else if (communicator_size < (a_p128 * message_size + b_p128)) {
//Pipeline with 128KB segments
//segsize = 1024 << 7;
- return smpi_coll_tuned_bcast_ompi_pipeline (buff, count, datatype,
+ return Coll_bcast_ompi_pipeline::bcast (buff, count, datatype,
root, comm);
} else if (communicator_size < 13) {
// Split Binary with 8KB segments
- return smpi_coll_tuned_bcast_ompi_split_bintree(buff, count, datatype,
+ return Coll_bcast_ompi_split_bintree::bcast(buff, count, datatype,
root, comm);
} else if (communicator_size < (a_p64 * message_size + b_p64)) {
// Pipeline with 64KB segments
//segsize = 1024 << 6;
- return smpi_coll_tuned_bcast_ompi_pipeline (buff, count, datatype,
+ return Coll_bcast_ompi_pipeline::bcast (buff, count, datatype,
root, comm);
} else if (communicator_size < (a_p16 * message_size + b_p16)) {
//Pipeline with 16KB segments
//segsize = 1024 << 4;
- return smpi_coll_tuned_bcast_ompi_pipeline (buff, count, datatype,
+ return Coll_bcast_ompi_pipeline::bcast (buff, count, datatype,
root, comm);
}
/* Pipeline with 8KB segments */
//segsize = 1024 << 3;
- return smpi_coll_tuned_bcast_flattree_pipeline (buff, count, datatype,
+ return Coll_bcast_flattree_pipeline::bcast (buff, count, datatype,
root, comm
/*segsize*/);
#if 0
/* this is based on gige measurements */
if (communicator_size < 4) {
- return smpi_coll_tuned_bcast_intra_basic_linear (buff, count, datatype, root, comm, module);
+ return Coll_bcast_intra_basic_linear::bcast (buff, count, datatype, root, comm, module);
}
if (communicator_size == 4) {
if (message_size < 524288) segsize = 0;
else segsize = 16384;
- return smpi_coll_tuned_bcast_intra_bintree (buff, count, datatype, root, comm, module, segsize);
+ return Coll_bcast_intra_bintree::bcast (buff, count, datatype, root, comm, module, segsize);
}
if (communicator_size <= 8 && message_size < 4096) {
- return smpi_coll_tuned_bcast_intra_basic_linear (buff, count, datatype, root, comm, module);
+ return Coll_bcast_intra_basic_linear::bcast (buff, count, datatype, root, comm, module);
}
if (communicator_size > 8 && message_size >= 32768 && message_size < 524288) {
segsize = 16384;
- return smpi_coll_tuned_bcast_intra_bintree (buff, count, datatype, root, comm, module, segsize);
+ return Coll_bcast_intra_bintree::bcast (buff, count, datatype, root, comm, module, segsize);
}
if (message_size >= 524288) {
segsize = 16384;
- return smpi_coll_tuned_bcast_intra_pipeline (buff, count, datatype, root, comm, module, segsize);
+ return Coll_bcast_intra_pipeline::bcast (buff, count, datatype, root, comm, module, segsize);
}
segsize = 0;
/* once tested can swap this back in */
- /* return smpi_coll_tuned_bcast_intra_bmtree (buff, count, datatype, root, comm, segsize); */
- return smpi_coll_tuned_bcast_intra_bintree (buff, count, datatype, root, comm, module, segsize);
+ /* return Coll_bcast_intra_bmtree::bcast (buff, count, datatype, root, comm, segsize); */
+ return Coll_bcast_intra_bintree::bcast (buff, count, datatype, root, comm, module, segsize);
#endif /* 0 */
}
-int smpi_coll_tuned_reduce_ompi( void *sendbuf, void *recvbuf,
+int Coll_reduce_ompi::reduce( void *sendbuf, void *recvbuf,
int count, MPI_Datatype datatype,
MPI_Op op, int root,
MPI_Comm comm
*/
if( (op!=MPI_OP_NULL) && !op->is_commutative() ) {
if ((communicator_size < 12) && (message_size < 2048)) {
- return smpi_coll_tuned_reduce_ompi_basic_linear (sendbuf, recvbuf, count, datatype, op, root, comm/*, module*/);
+ return Coll_reduce_ompi_basic_linear::reduce (sendbuf, recvbuf, count, datatype, op, root, comm/*, module*/);
}
- return smpi_coll_tuned_reduce_ompi_in_order_binary (sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
+ return Coll_reduce_ompi_in_order_binary::reduce (sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
0, max_requests*/);
}
if ((communicator_size < 8) && (message_size < 512)){
/* Linear_0K */
- return smpi_coll_tuned_reduce_ompi_basic_linear (sendbuf, recvbuf, count, datatype, op, root, comm);
+ return Coll_reduce_ompi_basic_linear::reduce (sendbuf, recvbuf, count, datatype, op, root, comm);
} else if (((communicator_size < 8) && (message_size < 20480)) ||
(message_size < 2048) || (count <= 1)) {
/* Binomial_0K */
//segsize = 0;
- return smpi_coll_tuned_reduce_ompi_binomial(sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
+ return Coll_reduce_ompi_binomial::reduce(sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
segsize, max_requests*/);
} else if (communicator_size > (a1 * message_size + b1)) {
// Binomial_1K
//segsize = 1024;
- return smpi_coll_tuned_reduce_ompi_binomial(sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
+ return Coll_reduce_ompi_binomial::reduce(sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
segsize, max_requests*/);
} else if (communicator_size > (a2 * message_size + b2)) {
// Pipeline_1K
//segsize = 1024;
- return smpi_coll_tuned_reduce_ompi_pipeline (sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
+ return Coll_reduce_ompi_pipeline::reduce (sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
segsize, max_requests*/);
} else if (communicator_size > (a3 * message_size + b3)) {
// Binary_32K
//segsize = 32*1024;
- return smpi_coll_tuned_reduce_ompi_binary( sendbuf, recvbuf, count, datatype, op, root,
+ return Coll_reduce_ompi_binary::reduce( sendbuf, recvbuf, count, datatype, op, root,
comm/*, module, segsize, max_requests*/);
}
// if (communicator_size > (a4 * message_size + b4)) {
// Pipeline_64K
// segsize = 64*1024;
// }
- return smpi_coll_tuned_reduce_ompi_pipeline (sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
+ return Coll_reduce_ompi_pipeline::reduce (sendbuf, recvbuf, count, datatype, op, root, comm/*, module,
segsize, max_requests*/);
#if 0
fanout = communicator_size - 1;
/* when linear implemented or taken from basic put here, right now using chain as a linear system */
/* it is implemented and I shouldn't be calling a chain with a fanout bigger than MAXTREEFANOUT from topo.h! */
- return smpi_coll_tuned_reduce_intra_basic_linear (sendbuf, recvbuf, count, datatype, op, root, comm, module);
- /* return smpi_coll_tuned_reduce_intra_chain (sendbuf, recvbuf, count, datatype, op, root, comm, segsize, fanout); */
+ return Coll_reduce_intra_basic_linear::reduce (sendbuf, recvbuf, count, datatype, op, root, comm, module);
+ /* return Coll_reduce_intra_chain::reduce (sendbuf, recvbuf, count, datatype, op, root, comm, segsize, fanout); */
}
if (message_size < 524288) {
if (message_size <= 65536 ) {
}
/* later swap this for a binary tree */
/* fanout = 2; */
- return smpi_coll_tuned_reduce_intra_chain (sendbuf, recvbuf, count, datatype, op, root, comm, module,
+ return Coll_reduce_intra_chain::reduce (sendbuf, recvbuf, count, datatype, op, root, comm, module,
segsize, fanout, max_requests);
}
segsize = 1024;
- return smpi_coll_tuned_reduce_intra_pipeline (sendbuf, recvbuf, count, datatype, op, root, comm, module,
+ return Coll_reduce_intra_pipeline::reduce (sendbuf, recvbuf, count, datatype, op, root, comm, module,
segsize, max_requests);
#endif /* 0 */
}
-int smpi_coll_tuned_reduce_scatter_ompi( void *sbuf, void *rbuf,
+int Coll_reduce_scatter_ompi::reduce_scatter( void *sbuf, void *rbuf,
int *rcounts,
MPI_Datatype dtype,
MPI_Op op,
const size_t large_message_size = 256 * 1024;
int zerocounts = 0;
- XBT_DEBUG("smpi_coll_tuned_reduce_scatter_ompi");
+ XBT_DEBUG("Coll_reduce_scatter_ompi::reduce_scatter");
comm_size = comm->size();
// We need data size for decision function
}
if( ((op!=MPI_OP_NULL) && !op->is_commutative()) || (zerocounts)) {
- smpi_mpi_reduce_scatter (sbuf, rbuf, rcounts,
+ Coll_reduce_scatter_default::reduce_scatter (sbuf, rbuf, rcounts,
dtype, op,
comm);
return MPI_SUCCESS;
((total_message_size <= large_message_size) && (pow2 == comm_size)) ||
(comm_size >= a * total_message_size + b)) {
return
- smpi_coll_tuned_reduce_scatter_ompi_basic_recursivehalving(sbuf, rbuf, rcounts,
+ Coll_reduce_scatter_ompi_basic_recursivehalving::reduce_scatter(sbuf, rbuf, rcounts,
dtype, op,
comm);
}
- return smpi_coll_tuned_reduce_scatter_ompi_ring(sbuf, rbuf, rcounts,
+ return Coll_reduce_scatter_ompi_ring::reduce_scatter(sbuf, rbuf, rcounts,
dtype, op,
comm);
}
-int smpi_coll_tuned_allgather_ompi(void *sbuf, int scount,
+int Coll_allgather_ompi::allgather(void *sbuf, int scount,
MPI_Datatype sdtype,
void* rbuf, int rcount,
MPI_Datatype rdtype,
/* Special case for 2 processes */
if (communicator_size == 2) {
- return smpi_coll_tuned_allgather_pair (sbuf, scount, sdtype,
+ return Coll_allgather_pair::allgather (sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm/*, module*/);
}
*/
if (total_dsize < 50000) {
if (pow2_size == communicator_size) {
- return smpi_coll_tuned_allgather_rdb(sbuf, scount, sdtype,
+ return Coll_allgather_rdb::allgather(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm);
} else {
- return smpi_coll_tuned_allgather_bruck(sbuf, scount, sdtype,
+ return Coll_allgather_bruck::allgather(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm);
}
} else {
if (communicator_size % 2) {
- return smpi_coll_tuned_allgather_ring(sbuf, scount, sdtype,
+ return Coll_allgather_ring::allgather(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm);
} else {
- return smpi_coll_tuned_allgather_ompi_neighborexchange(sbuf, scount, sdtype,
+ return Coll_allgather_ompi_neighborexchange::allgather(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm);
}
- for everything else use ring.
*/
if ((pow2_size == communicator_size) && (total_dsize < 524288)) {
- return smpi_coll_tuned_allgather_rdb(sbuf, scount, sdtype,
+ return Coll_allgather_rdb::allgather(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm);
} else if (total_dsize <= 81920) {
- return smpi_coll_tuned_allgather_bruck(sbuf, scount, sdtype,
+ return Coll_allgather_bruck::allgather(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm);
}
- return smpi_coll_tuned_allgather_ring(sbuf, scount, sdtype,
+ return Coll_allgather_ring::allgather(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm);
#endif /* defined(USE_MPICH2_DECISION) */
}
-int smpi_coll_tuned_allgatherv_ompi(void *sbuf, int scount,
+int Coll_allgatherv_ompi::allgatherv(void *sbuf, int scount,
MPI_Datatype sdtype,
void* rbuf, int *rcounts,
int *rdispls,
/* Special case for 2 processes */
if (communicator_size == 2) {
- return smpi_coll_tuned_allgatherv_pair(sbuf, scount, sdtype,
+ return Coll_allgatherv_pair::allgatherv(sbuf, scount, sdtype,
rbuf, rcounts, rdispls, rdtype,
comm);
}
/* Decision based on allgather decision. */
if (total_dsize < 50000) {
-/* return smpi_coll_tuned_allgatherv_intra_bruck(sbuf, scount, sdtype,
+/* return Coll_allgatherv_intra_bruck::allgatherv(sbuf, scount, sdtype,
rbuf, rcounts, rdispls, rdtype,
comm, module);*/
- return smpi_coll_tuned_allgatherv_ring(sbuf, scount, sdtype,
+ return Coll_allgatherv_ring::allgatherv(sbuf, scount, sdtype,
rbuf, rcounts, rdispls, rdtype,
comm);
} else {
if (communicator_size % 2) {
- return smpi_coll_tuned_allgatherv_ring(sbuf, scount, sdtype,
+ return Coll_allgatherv_ring::allgatherv(sbuf, scount, sdtype,
rbuf, rcounts, rdispls, rdtype,
comm);
} else {
- return smpi_coll_tuned_allgatherv_ompi_neighborexchange(sbuf, scount, sdtype,
+ return Coll_allgatherv_ompi_neighborexchange::allgatherv(sbuf, scount, sdtype,
rbuf, rcounts, rdispls, rdtype,
comm);
}
}
}
-int smpi_coll_tuned_gather_ompi(void *sbuf, int scount,
+int Coll_gather_ompi::gather(void *sbuf, int scount,
MPI_Datatype sdtype,
void* rbuf, int rcount,
MPI_Datatype rdtype,
/* root, comm);*/
/* } else*/ if (block_size > intermediate_block_size) {
- return smpi_coll_tuned_gather_ompi_linear_sync (sbuf, scount, sdtype,
+ return Coll_gather_ompi_linear_sync::gather (sbuf, scount, sdtype,
rbuf, rcount, rdtype,
root, comm);
} else if ((communicator_size > large_communicator_size) ||
((communicator_size > small_communicator_size) &&
(block_size < small_block_size))) {
- return smpi_coll_tuned_gather_ompi_binomial (sbuf, scount, sdtype,
+ return Coll_gather_ompi_binomial::gather (sbuf, scount, sdtype,
rbuf, rcount, rdtype,
root, comm);
}
// Otherwise, use basic linear
- return smpi_coll_tuned_gather_ompi_basic_linear (sbuf, scount, sdtype,
+ return Coll_gather_ompi_basic_linear::gather (sbuf, scount, sdtype,
rbuf, rcount, rdtype,
root, comm);
}
-int smpi_coll_tuned_scatter_ompi(void *sbuf, int scount,
+
+int Coll_scatter_ompi::scatter(void *sbuf, int scount,
MPI_Datatype sdtype,
void* rbuf, int rcount,
MPI_Datatype rdtype,
int communicator_size, rank;
size_t dsize, block_size;
- XBT_DEBUG("smpi_coll_tuned_scatter_ompi");
+ XBT_DEBUG("Coll_scatter_ompi::scatter");
communicator_size = comm->size();
rank = comm->rank();
scount=rcount;
sdtype=rdtype;
}
- int ret=smpi_coll_tuned_scatter_ompi_binomial (sbuf, scount, sdtype,
+ int ret=Coll_scatter_ompi_binomial::scatter (sbuf, scount, sdtype,
rbuf, rcount, rdtype,
root, comm);
if(rank!=root){
}
return ret;
}
- return smpi_coll_tuned_scatter_ompi_basic_linear (sbuf, scount, sdtype,
+ return Coll_scatter_ompi_basic_linear::scatter (sbuf, scount, sdtype,
rbuf, rcount, rdtype,
root, comm);
}
+}
+}
#include "xbt/xbt_os_time.h"
#include "src/smpi/smpi_f2c.hpp"
#include "src/smpi/smpi_group.hpp"
+#include "src/smpi/smpi_coll.hpp"
#include "src/smpi/smpi_comm.hpp"
#include "src/smpi/smpi_info.hpp"
#include "src/smpi/smpi_op.hpp"
XBT_PRIVATE void smpi_empty_status(MPI_Status * status);
XBT_PRIVATE int smpi_mpi_get_count(MPI_Status * status, MPI_Datatype datatype);
-XBT_PRIVATE int smpi_info_c2f(MPI_Info info);
-XBT_PRIVATE int smpi_info_add_f(MPI_Info info);
-XBT_PRIVATE MPI_Info smpi_info_f2c(int info);
-
-
-XBT_PRIVATE void smpi_mpi_bcast(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm);
-XBT_PRIVATE void smpi_mpi_barrier(MPI_Comm comm);
-XBT_PRIVATE void smpi_mpi_gather(void *sendbuf, int sendcount, MPI_Datatype sendtype,
- void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm);
-XBT_PRIVATE void smpi_mpi_reduce_scatter(void *sendbuf, void *recvbuf, int *recvcounts,
- MPI_Datatype datatype, MPI_Op op, MPI_Comm comm);
-XBT_PRIVATE void smpi_mpi_gatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype,
- void *recvbuf, int *recvcounts, int *displs, MPI_Datatype recvtype, int root, MPI_Comm comm);
-XBT_PRIVATE void smpi_mpi_allgather(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
- int recvcount, MPI_Datatype recvtype, MPI_Comm comm);
-XBT_PRIVATE void smpi_mpi_allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
- int *recvcounts, int *displs, MPI_Datatype recvtype, MPI_Comm comm);
-XBT_PRIVATE void smpi_mpi_scatter(void *sendbuf, int sendcount, MPI_Datatype sendtype,
- void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm);
-XBT_PRIVATE void smpi_mpi_scatterv(void *sendbuf, int *sendcounts, int *displs, MPI_Datatype sendtype,
- void *recvbuf, int recvcount,MPI_Datatype recvtype, int root, MPI_Comm comm);
-XBT_PRIVATE void smpi_mpi_reduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root,
- MPI_Comm comm);
-XBT_PRIVATE void smpi_mpi_allreduce(void *sendbuf, void *recvbuf, int count,MPI_Datatype datatype, MPI_Op op,
- MPI_Comm comm);
-XBT_PRIVATE void smpi_mpi_scan(void *sendbuf, void *recvbuf, int count,MPI_Datatype datatype, MPI_Op op, MPI_Comm comm);
-XBT_PRIVATE void smpi_mpi_exscan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
- MPI_Comm comm);
-
-XBT_PRIVATE int smpi_coll_tuned_alltoall_ompi2(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
- int recvcount, MPI_Datatype recvtype, MPI_Comm comm);
-XBT_PRIVATE int smpi_coll_tuned_alltoall_bruck(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
- int recvcount, MPI_Datatype recvtype, MPI_Comm comm);
-XBT_PRIVATE int smpi_coll_tuned_alltoall_basic_linear(void *sendbuf, int sendcount, MPI_Datatype sendtype,
- void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm);
-XBT_PRIVATE int smpi_coll_basic_alltoallv(void *sendbuf, int *sendcounts, int *senddisps, MPI_Datatype sendtype,
- void *recvbuf, int *recvcounts, int *recvdisps, MPI_Datatype recvtype, MPI_Comm comm);
+
XBT_PRIVATE int smpi_comm_keyval_create(MPI_Comm_copy_attr_function* copy_fn, MPI_Comm_delete_attr_function* delete_fn,
int* keyval, void* extra_state);
XBT_PRIVATE int smpi_comm_keyval_free(int* keyval);
return time;
}
-void smpi_mpi_bcast(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm)
-{
- smpi_coll_tuned_bcast_binomial_tree(buf, count, datatype, root, comm);
-}
-
-void smpi_mpi_barrier(MPI_Comm comm)
-{
- smpi_coll_tuned_barrier_ompi_basic_linear(comm);
-}
-
-void smpi_mpi_gather(void *sendbuf, int sendcount, MPI_Datatype sendtype,
- void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm)
-{
- int system_tag = COLL_TAG_GATHER;
- MPI_Aint lb = 0;
- MPI_Aint recvext = 0;
-
- int rank = comm->rank();
- int size = comm->size();
- if(rank != root) {
- // Send buffer to root
- Request::send(sendbuf, sendcount, sendtype, root, system_tag, comm);
- } else {
- recvtype->extent(&lb, &recvext);
- // Local copy from root
- Datatype::copy(sendbuf, sendcount, sendtype, static_cast<char*>(recvbuf) + root * recvcount * recvext,
- recvcount, recvtype);
- // Receive buffers from senders
- MPI_Request *requests = xbt_new(MPI_Request, size - 1);
- int index = 0;
- for (int src = 0; src < size; src++) {
- if(src != root) {
- requests[index] = Request::irecv_init(static_cast<char*>(recvbuf) + src * recvcount * recvext, recvcount, recvtype,
- src, system_tag, comm);
- index++;
- }
- }
- // Wait for completion of irecv's.
- Request::startall(size - 1, requests);
- Request::waitall(size - 1, requests, MPI_STATUS_IGNORE);
- for (int src = 0; src < size-1; src++) {
- Request::unref(&requests[src]);
- }
- xbt_free(requests);
- }
-}
-
-void smpi_mpi_reduce_scatter(void *sendbuf, void *recvbuf, int *recvcounts, MPI_Datatype datatype, MPI_Op op,
- MPI_Comm comm)
-{
- int rank = comm->rank();
-
- /* arbitrarily choose root as rank 0 */
- int size = comm->size();
- int count = 0;
- int *displs = xbt_new(int, size);
- for (int i = 0; i < size; i++) {
- displs[i] = count;
- count += recvcounts[i];
- }
- void *tmpbuf = static_cast<void*>(smpi_get_tmp_sendbuffer(count*datatype->get_extent()));
-
- mpi_coll_reduce_fun(sendbuf, tmpbuf, count, datatype, op, 0, comm);
- smpi_mpi_scatterv(tmpbuf, recvcounts, displs, datatype, recvbuf, recvcounts[rank], datatype, 0, comm);
- xbt_free(displs);
- smpi_free_tmp_buffer(tmpbuf);
-}
-
-void smpi_mpi_gatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int *recvcounts, int *displs,
- MPI_Datatype recvtype, int root, MPI_Comm comm)
-{
- int system_tag = COLL_TAG_GATHERV;
- MPI_Aint lb = 0;
- MPI_Aint recvext = 0;
-
- int rank = comm->rank();
- int size = comm->size();
- if (rank != root) {
- // Send buffer to root
- Request::send(sendbuf, sendcount, sendtype, root, system_tag, comm);
- } else {
- recvtype->extent(&lb, &recvext);
- // Local copy from root
- Datatype::copy(sendbuf, sendcount, sendtype, static_cast<char*>(recvbuf) + displs[root] * recvext,
- recvcounts[root], recvtype);
- // Receive buffers from senders
- MPI_Request *requests = xbt_new(MPI_Request, size - 1);
- int index = 0;
- for (int src = 0; src < size; src++) {
- if(src != root) {
- requests[index] = Request::irecv_init(static_cast<char*>(recvbuf) + displs[src] * recvext,
- recvcounts[src], recvtype, src, system_tag, comm);
- index++;
- }
- }
- // Wait for completion of irecv's.
- Request::startall(size - 1, requests);
- Request::waitall(size - 1, requests, MPI_STATUS_IGNORE);
- for (int src = 0; src < size-1; src++) {
- Request::unref(&requests[src]);
- }
- xbt_free(requests);
- }
-}
-
-void smpi_mpi_allgather(void *sendbuf, int sendcount, MPI_Datatype sendtype,
- void *recvbuf,int recvcount, MPI_Datatype recvtype, MPI_Comm comm)
-{
- int system_tag = COLL_TAG_ALLGATHER;
- MPI_Aint lb = 0;
- MPI_Aint recvext = 0;
- MPI_Request *requests;
-
- int rank = comm->rank();
- int size = comm->size();
- // FIXME: check for errors
- recvtype->extent(&lb, &recvext);
- // Local copy from self
- Datatype::copy(sendbuf, sendcount, sendtype, static_cast<char *>(recvbuf) + rank * recvcount * recvext, recvcount,
- recvtype);
- // Send/Recv buffers to/from others;
- requests = xbt_new(MPI_Request, 2 * (size - 1));
- int index = 0;
- for (int other = 0; other < size; other++) {
- if(other != rank) {
- requests[index] = Request::isend_init(sendbuf, sendcount, sendtype, other, system_tag,comm);
- index++;
- requests[index] = Request::irecv_init(static_cast<char *>(recvbuf) + other * recvcount * recvext, recvcount, recvtype,
- other, system_tag, comm);
- index++;
- }
- }
- // Wait for completion of all comms.
- Request::startall(2 * (size - 1), requests);
- Request::waitall(2 * (size - 1), requests, MPI_STATUS_IGNORE);
- for (int other = 0; other < 2*(size-1); other++) {
- Request::unref(&requests[other]);
- }
- xbt_free(requests);
-}
-
-void smpi_mpi_allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
- int *recvcounts, int *displs, MPI_Datatype recvtype, MPI_Comm comm)
-{
- int system_tag = COLL_TAG_ALLGATHERV;
- MPI_Aint lb = 0;
- MPI_Aint recvext = 0;
-
- int rank = comm->rank();
- int size = comm->size();
- recvtype->extent(&lb, &recvext);
- // Local copy from self
- Datatype::copy(sendbuf, sendcount, sendtype,
- static_cast<char *>(recvbuf) + displs[rank] * recvext,recvcounts[rank], recvtype);
- // Send buffers to others;
- MPI_Request *requests = xbt_new(MPI_Request, 2 * (size - 1));
- int index = 0;
- for (int other = 0; other < size; other++) {
- if(other != rank) {
- requests[index] =
- Request::isend_init(sendbuf, sendcount, sendtype, other, system_tag, comm);
- index++;
- requests[index] = Request::irecv_init(static_cast<char *>(recvbuf) + displs[other] * recvext, recvcounts[other],
- recvtype, other, system_tag, comm);
- index++;
- }
- }
- // Wait for completion of all comms.
- Request::startall(2 * (size - 1), requests);
- Request::waitall(2 * (size - 1), requests, MPI_STATUS_IGNORE);
- for (int other = 0; other < 2*(size-1); other++) {
- Request::unref(&requests[other]);
- }
- xbt_free(requests);
-}
-
-void smpi_mpi_scatter(void *sendbuf, int sendcount, MPI_Datatype sendtype,
- void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm)
-{
- int system_tag = COLL_TAG_SCATTER;
- MPI_Aint lb = 0;
- MPI_Aint sendext = 0;
- MPI_Request *requests;
-
- int rank = comm->rank();
- int size = comm->size();
- if(rank != root) {
- // Recv buffer from root
- Request::recv(recvbuf, recvcount, recvtype, root, system_tag, comm, MPI_STATUS_IGNORE);
- } else {
- sendtype->extent(&lb, &sendext);
- // Local copy from root
- if(recvbuf!=MPI_IN_PLACE){
- Datatype::copy(static_cast<char *>(sendbuf) + root * sendcount * sendext,
- sendcount, sendtype, recvbuf, recvcount, recvtype);
- }
- // Send buffers to receivers
- requests = xbt_new(MPI_Request, size - 1);
- int index = 0;
- for(int dst = 0; dst < size; dst++) {
- if(dst != root) {
- requests[index] = Request::isend_init(static_cast<char *>(sendbuf) + dst * sendcount * sendext, sendcount, sendtype,
- dst, system_tag, comm);
- index++;
- }
- }
- // Wait for completion of isend's.
- Request::startall(size - 1, requests);
- Request::waitall(size - 1, requests, MPI_STATUS_IGNORE);
- for (int dst = 0; dst < size-1; dst++) {
- Request::unref(&requests[dst]);
- }
- xbt_free(requests);
- }
-}
-
-void smpi_mpi_scatterv(void *sendbuf, int *sendcounts, int *displs, MPI_Datatype sendtype, void *recvbuf, int recvcount,
- MPI_Datatype recvtype, int root, MPI_Comm comm)
-{
- int system_tag = COLL_TAG_SCATTERV;
- MPI_Aint lb = 0;
- MPI_Aint sendext = 0;
-
- int rank = comm->rank();
- int size = comm->size();
- if(rank != root) {
- // Recv buffer from root
- Request::recv(recvbuf, recvcount, recvtype, root, system_tag, comm, MPI_STATUS_IGNORE);
- } else {
- sendtype->extent(&lb, &sendext);
- // Local copy from root
- if(recvbuf!=MPI_IN_PLACE){
- Datatype::copy(static_cast<char *>(sendbuf) + displs[root] * sendext, sendcounts[root],
- sendtype, recvbuf, recvcount, recvtype);
- }
- // Send buffers to receivers
- MPI_Request *requests = xbt_new(MPI_Request, size - 1);
- int index = 0;
- for (int dst = 0; dst < size; dst++) {
- if (dst != root) {
- requests[index] = Request::isend_init(static_cast<char *>(sendbuf) + displs[dst] * sendext, sendcounts[dst],
- sendtype, dst, system_tag, comm);
- index++;
- }
- }
- // Wait for completion of isend's.
- Request::startall(size - 1, requests);
- Request::waitall(size - 1, requests, MPI_STATUS_IGNORE);
- for (int dst = 0; dst < size-1; dst++) {
- Request::unref(&requests[dst]);
- }
- xbt_free(requests);
- }
-}
-
-void smpi_mpi_reduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root,
- MPI_Comm comm)
-{
- int system_tag = COLL_TAG_REDUCE;
- MPI_Aint lb = 0;
- MPI_Aint dataext = 0;
-
- char* sendtmpbuf = static_cast<char *>(sendbuf);
-
- int rank = comm->rank();
- int size = comm->size();
- //non commutative case, use a working algo from openmpi
- if(op != MPI_OP_NULL && !op->is_commutative()){
- smpi_coll_tuned_reduce_ompi_basic_linear(sendtmpbuf, recvbuf, count, datatype, op, root, comm);
- return;
- }
-
- if( sendbuf == MPI_IN_PLACE ) {
- sendtmpbuf = static_cast<char *>(smpi_get_tmp_sendbuffer(count*datatype->get_extent()));
- Datatype::copy(recvbuf, count, datatype,sendtmpbuf, count, datatype);
- }
-
- if(rank != root) {
- // Send buffer to root
- Request::send(sendtmpbuf, count, datatype, root, system_tag, comm);
- } else {
- datatype->extent(&lb, &dataext);
- // Local copy from root
- if (sendtmpbuf != nullptr && recvbuf != nullptr)
- Datatype::copy(sendtmpbuf, count, datatype, recvbuf, count, datatype);
- // Receive buffers from senders
- MPI_Request *requests = xbt_new(MPI_Request, size - 1);
- void **tmpbufs = xbt_new(void *, size - 1);
- int index = 0;
- for (int src = 0; src < size; src++) {
- if (src != root) {
- if (!smpi_process_get_replaying())
- tmpbufs[index] = xbt_malloc(count * dataext);
- else
- tmpbufs[index] = smpi_get_tmp_sendbuffer(count * dataext);
- requests[index] =
- Request::irecv_init(tmpbufs[index], count, datatype, src, system_tag, comm);
- index++;
- }
- }
- // Wait for completion of irecv's.
- Request::startall(size - 1, requests);
- for (int src = 0; src < size - 1; src++) {
- index = Request::waitany(size - 1, requests, MPI_STATUS_IGNORE);
- XBT_DEBUG("finished waiting any request with index %d", index);
- if(index == MPI_UNDEFINED) {
- break;
- }else{
- Request::unref(&requests[index]);
- }
- if (op != MPI_OP_NULL) /* op can be MPI_OP_NULL that does nothing */
- op->apply(tmpbufs[index], recvbuf, &count, datatype);
- }
- for(index = 0; index < size - 1; index++) {
- smpi_free_tmp_buffer(tmpbufs[index]);
- }
- xbt_free(tmpbufs);
- xbt_free(requests);
-
- }
- if( sendbuf == MPI_IN_PLACE ) {
- smpi_free_tmp_buffer(sendtmpbuf);
- }
-}
-
-void smpi_mpi_allreduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
-{
- smpi_mpi_reduce(sendbuf, recvbuf, count, datatype, op, 0, comm);
- smpi_mpi_bcast(recvbuf, count, datatype, 0, comm);
-}
-
-void smpi_mpi_scan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
-{
- int system_tag = -888;
- MPI_Aint lb = 0;
- MPI_Aint dataext = 0;
-
- int rank = comm->rank();
- int size = comm->size();
-
- datatype->extent(&lb, &dataext);
-
- // Local copy from self
- Datatype::copy(sendbuf, count, datatype, recvbuf, count, datatype);
-
- // Send/Recv buffers to/from others;
- MPI_Request *requests = xbt_new(MPI_Request, size - 1);
- void **tmpbufs = xbt_new(void *, rank);
- int index = 0;
- for (int other = 0; other < rank; other++) {
- tmpbufs[index] = smpi_get_tmp_sendbuffer(count * dataext);
- requests[index] = Request::irecv_init(tmpbufs[index], count, datatype, other, system_tag, comm);
- index++;
- }
- for (int other = rank + 1; other < size; other++) {
- requests[index] = Request::isend_init(sendbuf, count, datatype, other, system_tag, comm);
- index++;
- }
- // Wait for completion of all comms.
- Request::startall(size - 1, requests);
-
- if(op != MPI_OP_NULL && op->is_commutative()){
- for (int other = 0; other < size - 1; other++) {
- index = Request::waitany(size - 1, requests, MPI_STATUS_IGNORE);
- if(index == MPI_UNDEFINED) {
- break;
- }
- if (index < rank)
- // #Request is below rank: it's a irecv.
- op->apply(tmpbufs[index], recvbuf, &count, datatype);
- }
- }else{
- //non commutative case, wait in order
- for (int other = 0; other < size - 1; other++) {
- Request::wait(&(requests[other]), MPI_STATUS_IGNORE);
- if (index < rank && op != MPI_OP_NULL)
- op->apply(tmpbufs[other], recvbuf, &count, datatype);
- }
- }
- for(index = 0; index < rank; index++) {
- smpi_free_tmp_buffer(tmpbufs[index]);
- }
- for(index = 0; index < size-1; index++) {
- Request::unref(&requests[index]);
- }
- xbt_free(tmpbufs);
- xbt_free(requests);
-}
-
-void smpi_mpi_exscan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
-{
- int system_tag = -888;
- MPI_Aint lb = 0;
- MPI_Aint dataext = 0;
- int recvbuf_is_empty=1;
- int rank = comm->rank();
- int size = comm->size();
-
- datatype->extent(&lb, &dataext);
-
- // Send/Recv buffers to/from others;
- MPI_Request *requests = xbt_new(MPI_Request, size - 1);
- void **tmpbufs = xbt_new(void *, rank);
- int index = 0;
- for (int other = 0; other < rank; other++) {
- tmpbufs[index] = smpi_get_tmp_sendbuffer(count * dataext);
- requests[index] = Request::irecv_init(tmpbufs[index], count, datatype, other, system_tag, comm);
- index++;
- }
- for (int other = rank + 1; other < size; other++) {
- requests[index] = Request::isend_init(sendbuf, count, datatype, other, system_tag, comm);
- index++;
- }
- // Wait for completion of all comms.
- Request::startall(size - 1, requests);
-
- if(op != MPI_OP_NULL && op->is_commutative()){
- for (int other = 0; other < size - 1; other++) {
- index = Request::waitany(size - 1, requests, MPI_STATUS_IGNORE);
- if(index == MPI_UNDEFINED) {
- break;
- }
- if(index < rank) {
- if(recvbuf_is_empty){
- Datatype::copy(tmpbufs[index], count, datatype, recvbuf, count, datatype);
- recvbuf_is_empty=0;
- } else
- // #Request is below rank: it's a irecv
- op->apply(tmpbufs[index], recvbuf, &count, datatype);
- }
- }
- }else{
- //non commutative case, wait in order
- for (int other = 0; other < size - 1; other++) {
- Request::wait(&(requests[other]), MPI_STATUS_IGNORE);
- if(index < rank) {
- if (recvbuf_is_empty) {
- Datatype::copy(tmpbufs[other], count, datatype, recvbuf, count, datatype);
- recvbuf_is_empty = 0;
- } else
- if(op!=MPI_OP_NULL) op->apply( tmpbufs[other], recvbuf, &count, datatype);
- }
- }
- }
- for(index = 0; index < rank; index++) {
- smpi_free_tmp_buffer(tmpbufs[index]);
- }
- for(index = 0; index < size-1; index++) {
- Request::unref(&requests[index]);
- }
- xbt_free(tmpbufs);
- xbt_free(requests);
-}
-
void smpi_empty_status(MPI_Status * status)
{
if(status != MPI_STATUS_IGNORE) {
XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_coll, smpi, "Logging specific to SMPI (coll)");
s_mpi_coll_description_t mpi_coll_gather_description[] = {
- {"default", "gather default collective", reinterpret_cast<void*>(&smpi_mpi_gather)},
COLL_GATHERS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr} /* this array must be nullptr terminated */
};
-s_mpi_coll_description_t mpi_coll_allgather_description[] = { {"default", "allgather default collective",
- reinterpret_cast<void*>(&smpi_mpi_allgather)}, COLL_ALLGATHERS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr}};
+s_mpi_coll_description_t mpi_coll_allgather_description[] = {
+ COLL_ALLGATHERS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr}
+};
-s_mpi_coll_description_t mpi_coll_allgatherv_description[] = {{"default", "allgatherv default collective",
- reinterpret_cast<void*>(&smpi_mpi_allgatherv)}, COLL_ALLGATHERVS(COLL_DESCRIPTION, COLL_COMMA),
+s_mpi_coll_description_t mpi_coll_allgatherv_description[] = { COLL_ALLGATHERVS(COLL_DESCRIPTION, COLL_COMMA),
{nullptr, nullptr, nullptr} /* this array must be nullptr terminated */
};
-s_mpi_coll_description_t mpi_coll_allreduce_description[] = { {"default", "allreduce default collective",
- reinterpret_cast<void*>(&smpi_mpi_allreduce)}, COLL_ALLREDUCES(COLL_DESCRIPTION, COLL_COMMA),
+s_mpi_coll_description_t mpi_coll_allreduce_description[] ={ COLL_ALLREDUCES(COLL_DESCRIPTION, COLL_COMMA),
{nullptr, nullptr, nullptr} /* this array must be nullptr terminated */
};
-s_mpi_coll_description_t mpi_coll_reduce_scatter_description[] = {{"default", "reduce_scatter default collective",
- reinterpret_cast<void*>(&smpi_mpi_reduce_scatter)}, COLL_REDUCE_SCATTERS(COLL_DESCRIPTION, COLL_COMMA),
+s_mpi_coll_description_t mpi_coll_reduce_scatter_description[] = {COLL_REDUCE_SCATTERS(COLL_DESCRIPTION, COLL_COMMA),
{nullptr, nullptr, nullptr} /* this array must be nullptr terminated */
};
-s_mpi_coll_description_t mpi_coll_scatter_description[] = { {"default", "scatter default collective",
- reinterpret_cast<void*>(&smpi_mpi_scatter)}, COLL_SCATTERS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr}};
+s_mpi_coll_description_t mpi_coll_scatter_description[] ={COLL_SCATTERS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr}};
-s_mpi_coll_description_t mpi_coll_barrier_description[] = { {"default", "barrier default collective",
- reinterpret_cast<void*>(&smpi_mpi_barrier)}, COLL_BARRIERS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr}};
+s_mpi_coll_description_t mpi_coll_barrier_description[] ={COLL_BARRIERS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr}};
-s_mpi_coll_description_t mpi_coll_alltoall_description[] = { {"default", "Ompi alltoall default collective",
- reinterpret_cast<void*>(&smpi_coll_tuned_alltoall_ompi2)}, COLL_ALLTOALLS(COLL_DESCRIPTION, COLL_COMMA),
- {"bruck", "Alltoall Bruck (SG) collective",
- reinterpret_cast<void*>(&smpi_coll_tuned_alltoall_bruck)},
- {"basic_linear", "Alltoall basic linear (SG) collective",
- reinterpret_cast<void*>(&smpi_coll_tuned_alltoall_basic_linear)}, {nullptr, nullptr, nullptr}};
+s_mpi_coll_description_t mpi_coll_alltoall_description[] = {COLL_ALLTOALLS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr}};
-s_mpi_coll_description_t mpi_coll_alltoallv_description[] = { {"default", "Ompi alltoallv default collective",
- reinterpret_cast<void*>(&smpi_coll_basic_alltoallv)}, COLL_ALLTOALLVS(COLL_DESCRIPTION, COLL_COMMA),
+s_mpi_coll_description_t mpi_coll_alltoallv_description[] = {COLL_ALLTOALLVS(COLL_DESCRIPTION, COLL_COMMA),
{nullptr, nullptr, nullptr} /* this array must be nullptr terminated */
};
-s_mpi_coll_description_t mpi_coll_bcast_description[] = { {"default", "bcast default collective ",
- reinterpret_cast<void*>(&smpi_mpi_bcast)}, COLL_BCASTS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr}};
+s_mpi_coll_description_t mpi_coll_bcast_description[] = {COLL_BCASTS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr}};
-s_mpi_coll_description_t mpi_coll_reduce_description[] = { {"default", "reduce default collective",
- reinterpret_cast<void*>(&smpi_mpi_reduce)}, COLL_REDUCES(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr} };
+s_mpi_coll_description_t mpi_coll_reduce_description[] = {COLL_REDUCES(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr} };
printf(" %s: %s\n", table[i].name, table[i].description);
}
-int find_coll_description(s_mpi_coll_description_t * table, char *name, const char *desc)
+int find_coll_description(s_mpi_coll_description_t * table, const char *name, const char *desc)
{
char *name_list = nullptr;
int selector_on=0;
return -1;
}
-int (*mpi_coll_gather_fun)(void *, int, MPI_Datatype, void*, int, MPI_Datatype, int root, MPI_Comm);
-int (*mpi_coll_allgather_fun)(void *, int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm);
-int (*mpi_coll_allgatherv_fun)(void *, int, MPI_Datatype, void*, int*, int*, MPI_Datatype, MPI_Comm);
-int (*mpi_coll_allreduce_fun)(void *sbuf, void *rbuf, int rcount, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm);
-int (*mpi_coll_alltoall_fun)(void *, int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm);
-int (*mpi_coll_alltoallv_fun)(void *, int*, int*, MPI_Datatype, void*, int*, int*, MPI_Datatype, MPI_Comm);
-int (*mpi_coll_bcast_fun)(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm com);
-int (*mpi_coll_reduce_fun)(void *buf, void *rbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm);
-int (*mpi_coll_reduce_scatter_fun)(void *sbuf, void *rbuf, int *rcounts,MPI_Datatype dtype,MPI_Op op,MPI_Comm comm);
-int (*mpi_coll_scatter_fun)(void *sendbuf, int sendcount, MPI_Datatype sendtype,void *recvbuf, int recvcount, MPI_Datatype recvtype,int root, MPI_Comm comm);
-int (*mpi_coll_barrier_fun)(MPI_Comm comm);
void (*smpi_coll_cleanup_callback)();
+namespace simgrid{
+namespace smpi{
+
+int (*Colls::gather)(void *, int, MPI_Datatype, void*, int, MPI_Datatype, int root, MPI_Comm);
+int (*Colls::allgather)(void *, int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm);
+int (*Colls::allgatherv)(void *, int, MPI_Datatype, void*, int*, int*, MPI_Datatype, MPI_Comm);
+int (*Colls::allreduce)(void *sbuf, void *rbuf, int rcount, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm);
+int (*Colls::alltoall)(void *, int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm);
+int (*Colls::alltoallv)(void *, int*, int*, MPI_Datatype, void*, int*, int*, MPI_Datatype, MPI_Comm);
+int (*Colls::bcast)(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm com);
+int (*Colls::reduce)(void *buf, void *rbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm);
+int (*Colls::reduce_scatter)(void *sbuf, void *rbuf, int *rcounts,MPI_Datatype dtype,MPI_Op op,MPI_Comm comm);
+int (*Colls::scatter)(void *sendbuf, int sendcount, MPI_Datatype sendtype,void *recvbuf, int recvcount, MPI_Datatype recvtype,int root, MPI_Comm comm);
+int (*Colls::barrier)(MPI_Comm comm);
+
+
+#define COLL_SETTER(cat, ret, args, args2)\
+void Colls::set_##cat (const char * name){\
+ int id = find_coll_description(mpi_coll_## cat ##_description,\
+ name,#cat);\
+ cat = reinterpret_cast<ret (*) args>\
+ (mpi_coll_## cat ##_description[id].coll);\
+}
+
+COLL_APPLY(COLL_SETTER,COLL_GATHER_SIG,"");
+COLL_APPLY(COLL_SETTER,COLL_ALLGATHER_SIG,"");
+COLL_APPLY(COLL_SETTER,COLL_ALLGATHERV_SIG,"");
+COLL_APPLY(COLL_SETTER,COLL_REDUCE_SIG,"");
+COLL_APPLY(COLL_SETTER,COLL_ALLREDUCE_SIG,"");
+COLL_APPLY(COLL_SETTER,COLL_REDUCE_SCATTER_SIG,"");
+COLL_APPLY(COLL_SETTER,COLL_SCATTER_SIG,"");
+COLL_APPLY(COLL_SETTER,COLL_BARRIER_SIG,"");
+COLL_APPLY(COLL_SETTER,COLL_BCAST_SIG,"");
+COLL_APPLY(COLL_SETTER,COLL_ALLTOALL_SIG,"");
+COLL_APPLY(COLL_SETTER,COLL_ALLTOALLV_SIG,"");
+
+
+void Colls::set_collectives(){
+ const char* selector_name = static_cast<char*>(xbt_cfg_get_string("smpi/coll-selector"));
+ if (selector_name==nullptr || selector_name[0] == '\0')
+ selector_name = "default";
+
+ const char* name = xbt_cfg_get_string("smpi/gather");
+ if (name==nullptr || name[0] == '\0')
+ name = selector_name;
+
+ set_gather(name);
+
+ name = xbt_cfg_get_string("smpi/allgather");
+ if (name==nullptr || name[0] == '\0')
+ name = selector_name;
+
+ set_allgather(name);
+
+ name = xbt_cfg_get_string("smpi/allgatherv");
+ if (name==nullptr || name[0] == '\0')
+ name = selector_name;
+
+ set_allgatherv(name);
+
+ name = xbt_cfg_get_string("smpi/allreduce");
+ if (name==nullptr || name[0] == '\0')
+ name = selector_name;
+
+ set_allreduce(name);
+
+ name = xbt_cfg_get_string("smpi/alltoall");
+ if (name==nullptr || name[0] == '\0')
+ name = selector_name;
+
+ set_alltoall(name);
+
+ name = xbt_cfg_get_string("smpi/alltoallv");
+ if (name==nullptr || name[0] == '\0')
+ name = selector_name;
+
+ set_alltoallv(name);
+
+ name = xbt_cfg_get_string("smpi/reduce");
+ if (name==nullptr || name[0] == '\0')
+ name = selector_name;
+
+ set_reduce(name);
-int smpi_coll_tuned_alltoall_ompi2(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
- int recvcount, MPI_Datatype recvtype, MPI_Comm comm)
+ name = xbt_cfg_get_string("smpi/reduce-scatter");
+ if (name==nullptr || name[0] == '\0')
+ name = selector_name;
+
+ set_reduce_scatter(name);
+
+ name = xbt_cfg_get_string("smpi/scatter");
+ if (name==nullptr || name[0] == '\0')
+ name = selector_name;
+
+ set_scatter(name);
+
+ name = xbt_cfg_get_string("smpi/bcast");
+ if (name==nullptr || name[0] == '\0')
+ name = selector_name;
+
+ set_bcast(name);
+
+ name = xbt_cfg_get_string("smpi/barrier");
+ if (name==nullptr || name[0] == '\0')
+ name = selector_name;
+
+ set_barrier(name);
+}
+
+
+int Colls::gatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int *recvcounts, int *displs,
+ MPI_Datatype recvtype, int root, MPI_Comm comm)
{
+ int system_tag = COLL_TAG_GATHERV;
+ MPI_Aint lb = 0;
+ MPI_Aint recvext = 0;
+
+ int rank = comm->rank();
int size = comm->size();
- int sendsize = sendtype->size() * sendcount;
- if (sendsize < 200 && size > 12) {
- return smpi_coll_tuned_alltoall_bruck(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm);
- } else if (sendsize < 3000) {
- return smpi_coll_tuned_alltoall_basic_linear(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm);
+ if (rank != root) {
+ // Send buffer to root
+ Request::send(sendbuf, sendcount, sendtype, root, system_tag, comm);
} else {
- return smpi_coll_tuned_alltoall_ring(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm);
+ recvtype->extent(&lb, &recvext);
+ // Local copy from root
+ Datatype::copy(sendbuf, sendcount, sendtype, static_cast<char*>(recvbuf) + displs[root] * recvext,
+ recvcounts[root], recvtype);
+ // Receive buffers from senders
+ MPI_Request *requests = xbt_new(MPI_Request, size - 1);
+ int index = 0;
+ for (int src = 0; src < size; src++) {
+ if(src != root) {
+ requests[index] = Request::irecv_init(static_cast<char*>(recvbuf) + displs[src] * recvext,
+ recvcounts[src], recvtype, src, system_tag, comm);
+ index++;
+ }
+ }
+ // Wait for completion of irecv's.
+ Request::startall(size - 1, requests);
+ Request::waitall(size - 1, requests, MPI_STATUS_IGNORE);
+ for (int src = 0; src < size-1; src++) {
+ Request::unref(&requests[src]);
+ }
+ xbt_free(requests);
}
+ return MPI_SUCCESS;
}
-/**
- * Alltoall Bruck
- *
- * Openmpi calls this routine when the message size sent to each rank < 2000 bytes and size < 12
- * FIXME: uh, check smpi_pmpi again, but this routine is called for > 12, not less...
- **/
-int smpi_coll_tuned_alltoall_bruck(void *sendbuf, int sendcount, MPI_Datatype sendtype,
- void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm)
+
+int Colls::scatterv(void *sendbuf, int *sendcounts, int *displs, MPI_Datatype sendtype, void *recvbuf, int recvcount,
+ MPI_Datatype recvtype, int root, MPI_Comm comm)
{
- int system_tag = 777;
- int i;
- int count;
- MPI_Aint lb;
+ int system_tag = COLL_TAG_SCATTERV;
+ MPI_Aint lb = 0;
MPI_Aint sendext = 0;
- MPI_Aint recvext = 0;
- MPI_Request *requests;
- // FIXME: check implementation
int rank = comm->rank();
int size = comm->size();
- XBT_DEBUG("<%d> algorithm alltoall_bruck() called.", rank);
- sendtype->extent(&lb, &sendext);
- recvtype->extent(&lb, &recvext);
- /* Local copy from self */
- int err = Datatype::copy(static_cast<char *>(sendbuf) + rank * sendcount * sendext, sendcount, sendtype,
- static_cast<char *>(recvbuf) + rank * recvcount * recvext, recvcount, recvtype);
- if (err == MPI_SUCCESS && size > 1) {
- /* Initiate all send/recv to/from others. */
- requests = xbt_new(MPI_Request, 2 * (size - 1));
- count = 0;
- /* Create all receives that will be posted first */
- for (i = 0; i < size; ++i) {
- if (i != rank) {
- requests[count] = Request::irecv_init(static_cast<char *>(recvbuf) + i * recvcount * recvext, recvcount,
- recvtype, i, system_tag, comm);
- count++;
- }else{
- XBT_DEBUG("<%d> skip request creation [src = %d, recvcount = %d]", rank, i, recvcount);
- }
+ if(rank != root) {
+ // Recv buffer from root
+ Request::recv(recvbuf, recvcount, recvtype, root, system_tag, comm, MPI_STATUS_IGNORE);
+ } else {
+ sendtype->extent(&lb, &sendext);
+ // Local copy from root
+ if(recvbuf!=MPI_IN_PLACE){
+ Datatype::copy(static_cast<char *>(sendbuf) + displs[root] * sendext, sendcounts[root],
+ sendtype, recvbuf, recvcount, recvtype);
}
- /* Now create all sends */
- for (i = 0; i < size; ++i) {
- if (i != rank) {
- requests[count] = Request::isend_init(static_cast<char *>(sendbuf) + i * sendcount * sendext, sendcount,
- sendtype, i, system_tag, comm);
- count++;
- }else{
- XBT_DEBUG("<%d> skip request creation [dst = %d, sendcount = %d]", rank, i, sendcount);
+ // Send buffers to receivers
+ MPI_Request *requests = xbt_new(MPI_Request, size - 1);
+ int index = 0;
+ for (int dst = 0; dst < size; dst++) {
+ if (dst != root) {
+ requests[index] = Request::isend_init(static_cast<char *>(sendbuf) + displs[dst] * sendext, sendcounts[dst],
+ sendtype, dst, system_tag, comm);
+ index++;
}
}
- /* Wait for them all. */
- Request::startall(count, requests);
- XBT_DEBUG("<%d> wait for %d requests", rank, count);
- Request::waitall(count, requests, MPI_STATUS_IGNORE);
- for(i = 0; i < count; i++) {
- if(requests[i]!=MPI_REQUEST_NULL)
- Request::unref(&requests[i]);
+ // Wait for completion of isend's.
+ Request::startall(size - 1, requests);
+ Request::waitall(size - 1, requests, MPI_STATUS_IGNORE);
+ for (int dst = 0; dst < size-1; dst++) {
+ Request::unref(&requests[dst]);
}
xbt_free(requests);
}
return MPI_SUCCESS;
}
-/**
- * Alltoall basic_linear (STARMPI:alltoall-simple)
- **/
-int smpi_coll_tuned_alltoall_basic_linear(void *sendbuf, int sendcount, MPI_Datatype sendtype,
- void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm)
+
+int Colls::scan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
{
- int system_tag = 888;
- int i;
- int count;
- MPI_Aint lb = 0, sendext = 0, recvext = 0;
- MPI_Request *requests;
+ int system_tag = -888;
+ MPI_Aint lb = 0;
+ MPI_Aint dataext = 0;
- /* Initialize. */
int rank = comm->rank();
int size = comm->size();
- XBT_DEBUG("<%d> algorithm alltoall_basic_linear() called.", rank);
- sendtype->extent(&lb, &sendext);
- recvtype->extent(&lb, &recvext);
- /* simple optimization */
- int err = Datatype::copy(static_cast<char *>(sendbuf) + rank * sendcount * sendext, sendcount, sendtype,
- static_cast<char *>(recvbuf) + rank * recvcount * recvext, recvcount, recvtype);
- if (err == MPI_SUCCESS && size > 1) {
- /* Initiate all send/recv to/from others. */
- requests = xbt_new(MPI_Request, 2 * (size - 1));
- /* Post all receives first -- a simple optimization */
- count = 0;
- for (i = (rank + 1) % size; i != rank; i = (i + 1) % size) {
- requests[count] = Request::irecv_init(static_cast<char *>(recvbuf) + i * recvcount * recvext, recvcount,
- recvtype, i, system_tag, comm);
- count++;
- }
- /* Now post all sends in reverse order
- * - We would like to minimize the search time through message queue
- * when messages actually arrive in the order in which they were posted.
- * TODO: check the previous assertion
- */
- for (i = (rank + size - 1) % size; i != rank; i = (i + size - 1) % size) {
- requests[count] = Request::isend_init(static_cast<char *>(sendbuf) + i * sendcount * sendext, sendcount,
- sendtype, i, system_tag, comm);
- count++;
+
+ datatype->extent(&lb, &dataext);
+
+ // Local copy from self
+ Datatype::copy(sendbuf, count, datatype, recvbuf, count, datatype);
+
+ // Send/Recv buffers to/from others;
+ MPI_Request *requests = xbt_new(MPI_Request, size - 1);
+ void **tmpbufs = xbt_new(void *, rank);
+ int index = 0;
+ for (int other = 0; other < rank; other++) {
+ tmpbufs[index] = smpi_get_tmp_sendbuffer(count * dataext);
+ requests[index] = Request::irecv_init(tmpbufs[index], count, datatype, other, system_tag, comm);
+ index++;
+ }
+ for (int other = rank + 1; other < size; other++) {
+ requests[index] = Request::isend_init(sendbuf, count, datatype, other, system_tag, comm);
+ index++;
+ }
+ // Wait for completion of all comms.
+ Request::startall(size - 1, requests);
+
+ if(op != MPI_OP_NULL && op->is_commutative()){
+ for (int other = 0; other < size - 1; other++) {
+ index = Request::waitany(size - 1, requests, MPI_STATUS_IGNORE);
+ if(index == MPI_UNDEFINED) {
+ break;
+ }
+ if(index < rank) {
+ // #Request is below rank: it's a irecv
+ if(op!=MPI_OP_NULL) op->apply( tmpbufs[index], recvbuf, &count, datatype);
+ }
}
- /* Wait for them all. */
- Request::startall(count, requests);
- XBT_DEBUG("<%d> wait for %d requests", rank, count);
- Request::waitall(count, requests, MPI_STATUS_IGNORE);
- for(i = 0; i < count; i++) {
- if(requests[i]!=MPI_REQUEST_NULL)
- Request::unref(&requests[i]);
+ }else{
+ //non commutative case, wait in order
+ for (int other = 0; other < size - 1; other++) {
+ Request::wait(&(requests[other]), MPI_STATUS_IGNORE);
+ if(index < rank) {
+ if(op!=MPI_OP_NULL) op->apply( tmpbufs[other], recvbuf, &count, datatype);
+ }
}
- xbt_free(requests);
}
- return err;
+ for(index = 0; index < rank; index++) {
+ smpi_free_tmp_buffer(tmpbufs[index]);
+ }
+ for(index = 0; index < size-1; index++) {
+ Request::unref(&requests[index]);
+ }
+ xbt_free(tmpbufs);
+ xbt_free(requests);
+ return MPI_SUCCESS;
}
-int smpi_coll_basic_alltoallv(void *sendbuf, int *sendcounts, int *senddisps, MPI_Datatype sendtype,
- void *recvbuf, int *recvcounts, int *recvdisps, MPI_Datatype recvtype, MPI_Comm comm)
+int Colls::exscan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
{
- int system_tag = 889;
- int i;
- int count;
- MPI_Aint lb = 0;
- MPI_Aint sendext = 0;
- MPI_Aint recvext = 0;
- MPI_Request *requests;
-
- /* Initialize. */
+ int system_tag = -888;
+ MPI_Aint lb = 0;
+ MPI_Aint dataext = 0;
+ int recvbuf_is_empty=1;
int rank = comm->rank();
int size = comm->size();
- XBT_DEBUG("<%d> algorithm basic_alltoallv() called.", rank);
- sendtype->extent(&lb, &sendext);
- recvtype->extent(&lb, &recvext);
- /* Local copy from self */
- int err = Datatype::copy(static_cast<char *>(sendbuf) + senddisps[rank] * sendext, sendcounts[rank], sendtype,
- static_cast<char *>(recvbuf) + recvdisps[rank] * recvext, recvcounts[rank], recvtype);
- if (err == MPI_SUCCESS && size > 1) {
- /* Initiate all send/recv to/from others. */
- requests = xbt_new(MPI_Request, 2 * (size - 1));
- count = 0;
- /* Create all receives that will be posted first */
- for (i = 0; i < size; ++i) {
- if (i != rank && recvcounts[i] != 0) {
- requests[count] = Request::irecv_init(static_cast<char *>(recvbuf) + recvdisps[i] * recvext,
- recvcounts[i], recvtype, i, system_tag, comm);
- count++;
- }else{
- XBT_DEBUG("<%d> skip request creation [src = %d, recvcounts[src] = %d]", rank, i, recvcounts[i]);
+
+ datatype->extent(&lb, &dataext);
+
+ // Send/Recv buffers to/from others;
+ MPI_Request *requests = xbt_new(MPI_Request, size - 1);
+ void **tmpbufs = xbt_new(void *, rank);
+ int index = 0;
+ for (int other = 0; other < rank; other++) {
+ tmpbufs[index] = smpi_get_tmp_sendbuffer(count * dataext);
+ requests[index] = Request::irecv_init(tmpbufs[index], count, datatype, other, system_tag, comm);
+ index++;
+ }
+ for (int other = rank + 1; other < size; other++) {
+ requests[index] = Request::isend_init(sendbuf, count, datatype, other, system_tag, comm);
+ index++;
+ }
+ // Wait for completion of all comms.
+ Request::startall(size - 1, requests);
+
+ if(op != MPI_OP_NULL && op->is_commutative()){
+ for (int other = 0; other < size - 1; other++) {
+ index = Request::waitany(size - 1, requests, MPI_STATUS_IGNORE);
+ if(index == MPI_UNDEFINED) {
+ break;
}
- }
- /* Now create all sends */
- for (i = 0; i < size; ++i) {
- if (i != rank && sendcounts[i] != 0) {
- requests[count] = Request::isend_init(static_cast<char *>(sendbuf) + senddisps[i] * sendext,
- sendcounts[i], sendtype, i, system_tag, comm);
- count++;
- }else{
- XBT_DEBUG("<%d> skip request creation [dst = %d, sendcounts[dst] = %d]", rank, i, sendcounts[i]);
+ if(index < rank) {
+ if(recvbuf_is_empty){
+ Datatype::copy(tmpbufs[index], count, datatype, recvbuf, count, datatype);
+ recvbuf_is_empty=0;
+ } else
+ // #Request is below rank: it's a irecv
+ if(op!=MPI_OP_NULL) op->apply( tmpbufs[index], recvbuf, &count, datatype);
}
}
- /* Wait for them all. */
- Request::startall(count, requests);
- XBT_DEBUG("<%d> wait for %d requests", rank, count);
- Request::waitall(count, requests, MPI_STATUS_IGNORE);
- for(i = 0; i < count; i++) {
- if(requests[i]!=MPI_REQUEST_NULL)
- Request::unref(&requests[i]);
+ }else{
+ //non commutative case, wait in order
+ for (int other = 0; other < size - 1; other++) {
+ Request::wait(&(requests[other]), MPI_STATUS_IGNORE);
+ if(index < rank) {
+ if (recvbuf_is_empty) {
+ Datatype::copy(tmpbufs[other], count, datatype, recvbuf, count, datatype);
+ recvbuf_is_empty = 0;
+ } else
+ if(op!=MPI_OP_NULL) op->apply( tmpbufs[other], recvbuf, &count, datatype);
+ }
}
- xbt_free(requests);
}
- return err;
+ for(index = 0; index < rank; index++) {
+ smpi_free_tmp_buffer(tmpbufs[index]);
+ }
+ for(index = 0; index < size-1; index++) {
+ Request::unref(&requests[index]);
+ }
+ xbt_free(tmpbufs);
+ xbt_free(requests);
+ return MPI_SUCCESS;
+}
+
+}
}
+
+
+
+
+
--- /dev/null
+/*High level handling of collective algorithms*/
+/* Copyright (c) 2009-2010, 2012-2014. The SimGrid Team.
+ * All rights reserved. */
+
+/* This program is free software; you can redistribute it and/or modify it
+ * under the terms of the license (GNU LGPL) which comes with this package. */
+
+#ifndef SMPI_COLL_HPP
+#define SMPI_COLL_HPP
+
+#include <xbt/base.h>
+
+#include "private.h"
+
+namespace simgrid{
+namespace smpi{
+
+class Colls{
+ private:
+ public:
+ static void set_collectives();
+ static void set_gather(const char* name);
+ static void set_allgather(const char* name);
+ static void set_allgatherv(const char* name);
+ static void set_alltoall(const char* name);
+ static void set_alltoallv(const char* name);
+ static void set_allreduce(const char* name);
+ static void set_reduce(const char* name);
+ static void set_reduce_scatter(const char* name);
+ static void set_scatter(const char* name);
+ static void set_barrier(const char* name);
+ static void set_bcast(const char* name);
+
+ static void coll_help(const char *category, s_mpi_coll_description_t * table);
+
+ static int (*gather)(void *, int, MPI_Datatype, void*, int, MPI_Datatype, int root, MPI_Comm);
+ static int (*allgather)(void *, int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm);
+ static int (*allgatherv)(void *, int, MPI_Datatype, void*, int*, int*, MPI_Datatype, MPI_Comm);
+ static int (*allreduce)(void *sbuf, void *rbuf, int rcount, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm);
+ static int (*alltoall)(void *, int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm);
+ static int (*alltoallv)(void *, int*, int*, MPI_Datatype, void*, int*, int*, MPI_Datatype, MPI_Comm);
+ static int (*bcast)(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm com);
+ static int (*reduce)(void *buf, void *rbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm);
+ static int (*reduce_scatter)(void *sbuf, void *rbuf, int *rcounts,MPI_Datatype dtype,MPI_Op op,MPI_Comm comm);
+ static int (*scatter)(void *sendbuf, int sendcount, MPI_Datatype sendtype,void *recvbuf, int recvcount, MPI_Datatype recvtype,int root, MPI_Comm comm);
+ static int (*barrier)(MPI_Comm comm);
+
+//These fairly unused collectives only have one implementation in SMPI
+
+ static int gatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int *recvcounts, int *displs, MPI_Datatype recvtype, int root, MPI_Comm comm);
+ static int scatterv(void *sendbuf, int *sendcounts, int *displs, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm);
+ static int scan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm);
+ static int exscan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm);
+};
+
+class Coll_algo{
+ private:
+ char* description_;
+ public:
+ char* description();
+};
+
+class Coll_gather : public Coll_algo {
+ private:
+ public:
+ static int gather (void *, int, MPI_Datatype, void*, int, MPI_Datatype, int root, MPI_Comm);
+};
+
+class Coll_allgather : public Coll_algo {
+ private:
+ public:
+ static int allgather (void *, int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm);
+};
+
+class Coll_allgatherv : public Coll_algo {
+ private:
+ public:
+ static int allgatherv (void *, int, MPI_Datatype, void*, int*, int*, MPI_Datatype, MPI_Comm);
+};
+
+class Coll_allreduce : public Coll_algo {
+ private:
+ public:
+ static int allreduce (void *sbuf, void *rbuf, int rcount, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm);
+};
+
+class Coll_alltoall : public Coll_algo {
+ private:
+ public:
+ static int alltoall (void *, int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm);
+};
+
+class Coll_alltoallv : public Coll_algo {
+ private:
+ public:
+ static int alltoallv (void *, int*, int*, MPI_Datatype, void*, int*, int*, MPI_Datatype, MPI_Comm);
+};
+
+class Coll_bcast : public Coll_algo {
+ private:
+ public:
+ static int bcast (void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm com);
+};
+
+class Coll_reduce : public Coll_algo {
+ private:
+ public:
+ static int reduce (void *buf, void *rbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm);
+};
+
+class Coll_reduce_scatter : public Coll_algo {
+ private:
+ public:
+ static int reduce_scatter (void *sbuf, void *rbuf, int *rcounts,MPI_Datatype dtype,MPI_Op op,MPI_Comm comm);
+};
+
+class Coll_scatter : public Coll_algo {
+ private:
+ public:
+ static int scatter (void *sendbuf, int sendcount, MPI_Datatype sendtype,void *recvbuf, int recvcount, MPI_Datatype recvtype,int root, MPI_Comm comm);
+};
+
+class Coll_barrier : public Coll_algo {
+ private:
+ public:
+ static int barrier (MPI_Comm);
+};
+
+
+}
+}
+
+#endif
} else {
recvbuf = nullptr;
}
- smpi_mpi_gather(sendbuf, 2, MPI_INT, recvbuf, 2, MPI_INT, 0, this);
+ Coll_gather_default::gather(sendbuf, 2, MPI_INT, recvbuf, 2, MPI_INT, 0, this);
xbt_free(sendbuf);
/* Do the actual job */
if(rank == 0) {
leader_list[i]=-1;
}
- smpi_coll_tuned_allgather_mpich(&leader, 1, MPI_INT , leaders_map, 1, MPI_INT, this);
+ Coll_allgather_mpich::allgather(&leader, 1, MPI_INT , leaders_map, 1, MPI_INT, this);
if(smpi_privatize_global_variables){ //we need to switch as the called function may silently touch global variables
smpi_switch_data_segment(smpi_process_index());
int my_local_size=comm_intra->size();
if(comm_intra->rank()==0) {
int* non_uniform_map = xbt_new0(int,leader_group_size);
- smpi_coll_tuned_allgather_mpich(&my_local_size, 1, MPI_INT,
+ Coll_allgather_mpich::allgather(&my_local_size, 1, MPI_INT,
non_uniform_map, 1, MPI_INT, leader_comm);
for(i=0; i < leader_group_size; i++) {
if(non_uniform_map[0] != non_uniform_map[i]) {
}
is_uniform_=is_uniform;
}
- smpi_coll_tuned_bcast_mpich(&(is_uniform_),1, MPI_INT, 0, comm_intra );
+ Coll_bcast_mpich::bcast(&(is_uniform_),1, MPI_INT, 0, comm_intra );
if(smpi_privatize_global_variables){ //we need to switch as the called function may silently touch global variables
smpi_switch_data_segment(smpi_process_index());
}
int global_blocked;
- smpi_mpi_allreduce(&is_blocked, &(global_blocked), 1, MPI_INT, MPI_LAND, this);
+ Coll_allreduce_default::allreduce(&is_blocked, &(global_blocked), 1, MPI_INT, MPI_LAND, this);
if(MPI_COMM_WORLD==MPI_COMM_UNINITIALIZED || this==MPI_COMM_WORLD){
if(this->rank()==0){
}
static void smpi_init_options(){
- int gather_id = find_coll_description(mpi_coll_gather_description, xbt_cfg_get_string("smpi/gather"),"gather");
- mpi_coll_gather_fun = reinterpret_cast<int (*)(void *, int, MPI_Datatype, void *, int, MPI_Datatype, int, MPI_Comm)>
- (mpi_coll_gather_description[gather_id].coll);
-
- int allgather_id = find_coll_description(mpi_coll_allgather_description,
- xbt_cfg_get_string("smpi/allgather"),"allgather");
- mpi_coll_allgather_fun = reinterpret_cast<int (*)(void *, int, MPI_Datatype, void *, int, MPI_Datatype, MPI_Comm)>
- (mpi_coll_allgather_description[allgather_id].coll);
-
- int allgatherv_id = find_coll_description(mpi_coll_allgatherv_description,
- xbt_cfg_get_string("smpi/allgatherv"),"allgatherv");
- mpi_coll_allgatherv_fun = reinterpret_cast<int (*)(void *, int, MPI_Datatype, void *, int *, int *, MPI_Datatype, MPI_Comm)>
- (mpi_coll_allgatherv_description[allgatherv_id].coll);
-
- int allreduce_id = find_coll_description(mpi_coll_allreduce_description,
- xbt_cfg_get_string("smpi/allreduce"),"allreduce");
- mpi_coll_allreduce_fun = reinterpret_cast<int (*)(void *sbuf, void *rbuf, int rcount, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm)>
- (mpi_coll_allreduce_description[allreduce_id].coll);
-
- int alltoall_id = find_coll_description(mpi_coll_alltoall_description,
- xbt_cfg_get_string("smpi/alltoall"),"alltoall");
- mpi_coll_alltoall_fun = reinterpret_cast<int (*)(void *, int, MPI_Datatype, void *, int, MPI_Datatype, MPI_Comm)>
- (mpi_coll_alltoall_description[alltoall_id].coll);
-
- int alltoallv_id = find_coll_description(mpi_coll_alltoallv_description,
- xbt_cfg_get_string("smpi/alltoallv"),"alltoallv");
- mpi_coll_alltoallv_fun = reinterpret_cast<int (*)(void *, int *, int *, MPI_Datatype, void *, int *, int *, MPI_Datatype, MPI_Comm)>
- (mpi_coll_alltoallv_description[alltoallv_id].coll);
-
- int bcast_id = find_coll_description(mpi_coll_bcast_description, xbt_cfg_get_string("smpi/bcast"),"bcast");
- mpi_coll_bcast_fun = reinterpret_cast<int (*)(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm com)>
- (mpi_coll_bcast_description[bcast_id].coll);
-
- int reduce_id = find_coll_description(mpi_coll_reduce_description, xbt_cfg_get_string("smpi/reduce"),"reduce");
- mpi_coll_reduce_fun = reinterpret_cast<int (*)(void *buf, void *rbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm)>
- (mpi_coll_reduce_description[reduce_id].coll);
-
- int reduce_scatter_id =
- find_coll_description(mpi_coll_reduce_scatter_description,
- xbt_cfg_get_string("smpi/reduce-scatter"),"reduce_scatter");
- mpi_coll_reduce_scatter_fun = reinterpret_cast<int (*)(void *sbuf, void *rbuf, int *rcounts,MPI_Datatype dtype, MPI_Op op, MPI_Comm comm)>
- (mpi_coll_reduce_scatter_description[reduce_scatter_id].coll);
-
- int scatter_id = find_coll_description(mpi_coll_scatter_description, xbt_cfg_get_string("smpi/scatter"),"scatter");
- mpi_coll_scatter_fun = reinterpret_cast<int (*)(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm)>
- (mpi_coll_scatter_description[scatter_id].coll);
-
- int barrier_id = find_coll_description(mpi_coll_barrier_description, xbt_cfg_get_string("smpi/barrier"),"barrier");
- mpi_coll_barrier_fun = reinterpret_cast<int (*)(MPI_Comm comm)>
- (mpi_coll_barrier_description[barrier_id].coll);
+ Colls::set_collectives();
smpi_coll_cleanup_callback=nullptr;
smpi_cpu_threshold = xbt_cfg_get_double("smpi/cpu-threshold");
smpi_host_speed = xbt_cfg_get_double("smpi/host-speed");
extra->send_size = count * dt_size_send;
TRACE_smpi_collective_in(rank, root_traced, __FUNCTION__, extra);
if (comm->size() > 1)
- mpi_coll_bcast_fun(buf, count, datatype, root, comm);
+ Colls::bcast(buf, count, datatype, root, comm);
retval = MPI_SUCCESS;
TRACE_smpi_collective_out(rank, root_traced, __FUNCTION__);
extra->type = TRACING_BARRIER;
TRACE_smpi_collective_in(rank, -1, __FUNCTION__, extra);
- mpi_coll_barrier_fun(comm);
+ Colls::barrier(comm);
retval = MPI_SUCCESS;
TRACE_smpi_collective_out(rank, -1, __FUNCTION__);
TRACE_smpi_collective_in(rank, root_traced, __FUNCTION__, extra);
- mpi_coll_gather_fun(sendtmpbuf, sendtmpcount, sendtmptype, recvbuf, recvcount, recvtype, root, comm);
+ Colls::gather(sendtmpbuf, sendtmpcount, sendtmptype, recvbuf, recvcount, recvtype, root, comm);
retval = MPI_SUCCESS;
TRACE_smpi_collective_out(rank, root_traced, __FUNCTION__);
}
TRACE_smpi_collective_in(rank, root_traced, __FUNCTION__, extra);
- smpi_mpi_gatherv(sendtmpbuf, sendtmpcount, sendtmptype, recvbuf, recvcounts, displs, recvtype, root, comm);
- retval = MPI_SUCCESS;
+ retval = Colls::gatherv(sendtmpbuf, sendtmpcount, sendtmptype, recvbuf, recvcounts, displs, recvtype, root, comm);
TRACE_smpi_collective_out(rank, root_traced, __FUNCTION__);
}
TRACE_smpi_collective_in(rank, -1, __FUNCTION__, extra);
- mpi_coll_allgather_fun(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm);
+ Colls::allgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm);
retval = MPI_SUCCESS;
TRACE_smpi_collective_out(rank, -1, __FUNCTION__);
}
TRACE_smpi_collective_in(rank, -1, __FUNCTION__, extra);
- mpi_coll_allgatherv_fun(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm);
+ Colls::allgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm);
retval = MPI_SUCCESS;
TRACE_smpi_collective_out(rank, -1, __FUNCTION__);
}
extra->recv_size = recvcount * dt_size_recv;
TRACE_smpi_collective_in(rank, root_traced, __FUNCTION__, extra);
- mpi_coll_scatter_fun(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm);
+ Colls::scatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm);
retval = MPI_SUCCESS;
TRACE_smpi_collective_out(rank, root_traced, __FUNCTION__);
}
extra->recv_size = recvcount * dt_size_recv;
TRACE_smpi_collective_in(rank, root_traced, __FUNCTION__, extra);
- smpi_mpi_scatterv(sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm);
+ retval = Colls::scatterv(sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm);
- retval = MPI_SUCCESS;
TRACE_smpi_collective_out(rank, root_traced, __FUNCTION__);
}
TRACE_smpi_collective_in(rank, root_traced, __FUNCTION__, extra);
- mpi_coll_reduce_fun(sendbuf, recvbuf, count, datatype, op, root, comm);
+ Colls::reduce(sendbuf, recvbuf, count, datatype, op, root, comm);
retval = MPI_SUCCESS;
TRACE_smpi_collective_out(rank, root_traced, __FUNCTION__);
TRACE_smpi_collective_in(rank, -1, __FUNCTION__, extra);
- mpi_coll_allreduce_fun(sendtmpbuf, recvbuf, count, datatype, op, comm);
+ Colls::allreduce(sendtmpbuf, recvbuf, count, datatype, op, comm);
if( sendbuf == MPI_IN_PLACE )
xbt_free(sendtmpbuf);
TRACE_smpi_collective_in(rank, -1, __FUNCTION__, extra);
- smpi_mpi_scan(sendbuf, recvbuf, count, datatype, op, comm);
+ retval = Colls::scan(sendbuf, recvbuf, count, datatype, op, comm);
- retval = MPI_SUCCESS;
TRACE_smpi_collective_out(rank, -1, __FUNCTION__);
}
}
TRACE_smpi_collective_in(rank, -1, __FUNCTION__, extra);
- smpi_mpi_exscan(sendtmpbuf, recvbuf, count, datatype, op, comm);
- retval = MPI_SUCCESS;
+ retval = Colls::exscan(sendtmpbuf, recvbuf, count, datatype, op, comm);
+
TRACE_smpi_collective_out(rank, -1, __FUNCTION__);
if (sendbuf == MPI_IN_PLACE)
xbt_free(sendtmpbuf);
TRACE_smpi_collective_in(rank, -1, __FUNCTION__, extra);
- mpi_coll_reduce_scatter_fun(sendtmpbuf, recvbuf, recvcounts, datatype, op, comm);
+ Colls::reduce_scatter(sendtmpbuf, recvbuf, recvcounts, datatype, op, comm);
retval = MPI_SUCCESS;
TRACE_smpi_collective_out(rank, -1, __FUNCTION__);
int* recvcounts = static_cast<int*>(xbt_malloc(count * sizeof(int)));
for (int i = 0; i < count; i++)
recvcounts[i] = recvcount;
- mpi_coll_reduce_scatter_fun(sendtmpbuf, recvbuf, recvcounts, datatype, op, comm);
+ Colls::reduce_scatter(sendtmpbuf, recvbuf, recvcounts, datatype, op, comm);
xbt_free(recvcounts);
retval = MPI_SUCCESS;
TRACE_smpi_collective_in(rank, -1, __FUNCTION__, extra);
- retval = mpi_coll_alltoall_fun(sendtmpbuf, sendtmpcount, sendtmptype, recvbuf, recvcount, recvtype, comm);
+ retval = Colls::alltoall(sendtmpbuf, sendtmpcount, sendtmptype, recvbuf, recvcount, recvtype, comm);
TRACE_smpi_collective_out(rank, -1, __FUNCTION__);
}
extra->num_processes = size;
TRACE_smpi_collective_in(rank, -1, __FUNCTION__, extra);
- retval = mpi_coll_alltoallv_fun(sendtmpbuf, sendtmpcounts, sendtmpdisps, sendtmptype, recvbuf, recvcounts,
+ retval = Colls::alltoallv(sendtmpbuf, sendtmpcounts, sendtmpdisps, sendtmptype, recvbuf, recvcounts,
recvdisps, recvtype, comm);
TRACE_smpi_collective_out(rank, -1, __FUNCTION__);
#define KEY_SIZE (sizeof(int) * 2 + 1)
+using namespace simgrid::smpi;
+
XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_replay,smpi,"Trace Replay with SMPI");
int communicator_size = 0;
extra->type = TRACING_BARRIER;
TRACE_smpi_collective_in(rank, -1, __FUNCTION__, extra);
- mpi_coll_barrier_fun(MPI_COMM_WORLD);
+ Colls::barrier(MPI_COMM_WORLD);
TRACE_smpi_collective_out(rank, -1, __FUNCTION__);
log_timed_action (action, clock);
TRACE_smpi_collective_in(rank, root_traced, __FUNCTION__, extra);
void *sendbuf = smpi_get_tmp_sendbuffer(size* MPI_CURRENT_TYPE->size());
- mpi_coll_bcast_fun(sendbuf, size, MPI_CURRENT_TYPE, root, MPI_COMM_WORLD);
+ Colls::bcast(sendbuf, size, MPI_CURRENT_TYPE, root, MPI_COMM_WORLD);
TRACE_smpi_collective_out(rank, root_traced, __FUNCTION__);
log_timed_action (action, clock);
void *recvbuf = smpi_get_tmp_sendbuffer(comm_size* MPI_CURRENT_TYPE->size());
void *sendbuf = smpi_get_tmp_sendbuffer(comm_size* MPI_CURRENT_TYPE->size());
- mpi_coll_reduce_fun(sendbuf, recvbuf, comm_size, MPI_CURRENT_TYPE, MPI_OP_NULL, root, MPI_COMM_WORLD);
+ Colls::reduce(sendbuf, recvbuf, comm_size, MPI_CURRENT_TYPE, MPI_OP_NULL, root, MPI_COMM_WORLD);
smpi_execute_flops(comp_size);
TRACE_smpi_collective_out(rank, root_traced, __FUNCTION__);
void *recvbuf = smpi_get_tmp_sendbuffer(comm_size* MPI_CURRENT_TYPE->size());
void *sendbuf = smpi_get_tmp_sendbuffer(comm_size* MPI_CURRENT_TYPE->size());
- mpi_coll_allreduce_fun(sendbuf, recvbuf, comm_size, MPI_CURRENT_TYPE, MPI_OP_NULL, MPI_COMM_WORLD);
+ Colls::allreduce(sendbuf, recvbuf, comm_size, MPI_CURRENT_TYPE, MPI_OP_NULL, MPI_COMM_WORLD);
smpi_execute_flops(comp_size);
TRACE_smpi_collective_out(rank, -1, __FUNCTION__);
TRACE_smpi_collective_in(rank, -1, __FUNCTION__,extra);
- mpi_coll_alltoall_fun(send, send_size, MPI_CURRENT_TYPE, recv, recv_size, MPI_CURRENT_TYPE2, MPI_COMM_WORLD);
+ Colls::alltoall(send, send_size, MPI_CURRENT_TYPE, recv, recv_size, MPI_CURRENT_TYPE2, MPI_COMM_WORLD);
TRACE_smpi_collective_out(rank, -1, __FUNCTION__);
log_timed_action (action, clock);
TRACE_smpi_collective_in(smpi_process_index(), root, __FUNCTION__, extra);
- mpi_coll_gather_fun(send, send_size, MPI_CURRENT_TYPE, recv, recv_size, MPI_CURRENT_TYPE2, root, MPI_COMM_WORLD);
+ Colls::gather(send, send_size, MPI_CURRENT_TYPE, recv, recv_size, MPI_CURRENT_TYPE2, root, MPI_COMM_WORLD);
TRACE_smpi_collective_out(smpi_process_index(), -1, __FUNCTION__);
log_timed_action (action, clock);
TRACE_smpi_collective_in(smpi_process_index(), root, __FUNCTION__, extra);
- smpi_mpi_gatherv(send, send_size, MPI_CURRENT_TYPE, recv, recvcounts, disps, MPI_CURRENT_TYPE2, root, MPI_COMM_WORLD);
+ Colls::gatherv(send, send_size, MPI_CURRENT_TYPE, recv, recvcounts, disps, MPI_CURRENT_TYPE2, root, MPI_COMM_WORLD);
TRACE_smpi_collective_out(smpi_process_index(), -1, __FUNCTION__);
log_timed_action (action, clock);
void *sendbuf = smpi_get_tmp_sendbuffer(size* MPI_CURRENT_TYPE->size());
void *recvbuf = smpi_get_tmp_recvbuffer(size* MPI_CURRENT_TYPE->size());
- mpi_coll_reduce_scatter_fun(sendbuf, recvbuf, recvcounts, MPI_CURRENT_TYPE, MPI_OP_NULL, MPI_COMM_WORLD);
+ Colls::reduce_scatter(sendbuf, recvbuf, recvcounts, MPI_CURRENT_TYPE, MPI_OP_NULL, MPI_COMM_WORLD);
smpi_execute_flops(comp_size);
TRACE_smpi_collective_out(rank, -1, __FUNCTION__);
TRACE_smpi_collective_in(rank, -1, __FUNCTION__,extra);
- mpi_coll_allgather_fun(sendbuf, sendcount, MPI_CURRENT_TYPE, recvbuf, recvcount, MPI_CURRENT_TYPE2, MPI_COMM_WORLD);
+ Colls::allgather(sendbuf, sendcount, MPI_CURRENT_TYPE, recvbuf, recvcount, MPI_CURRENT_TYPE2, MPI_COMM_WORLD);
TRACE_smpi_collective_out(rank, -1, __FUNCTION__);
log_timed_action (action, clock);
TRACE_smpi_collective_in(rank, -1, __FUNCTION__,extra);
- mpi_coll_allgatherv_fun(sendbuf, sendcount, MPI_CURRENT_TYPE, recvbuf, recvcounts, disps, MPI_CURRENT_TYPE2,
+ Colls::allgatherv(sendbuf, sendcount, MPI_CURRENT_TYPE, recvbuf, recvcounts, disps, MPI_CURRENT_TYPE2,
MPI_COMM_WORLD);
TRACE_smpi_collective_out(rank, -1, __FUNCTION__);
TRACE_smpi_collective_in(rank, -1, __FUNCTION__,extra);
- mpi_coll_alltoallv_fun(sendbuf, sendcounts, senddisps, MPI_CURRENT_TYPE,recvbuf, recvcounts, recvdisps,
+ Colls::alltoallv(sendbuf, sendcounts, senddisps, MPI_CURRENT_TYPE,recvbuf, recvcounts, recvdisps,
MPI_CURRENT_TYPE, MPI_COMM_WORLD);
TRACE_smpi_collective_out(rank, -1, __FUNCTION__);
if(rank==0){
bar_ = MSG_barrier_init(comm_size);
}
- mpi_coll_allgather_fun(&(connected_wins_[rank]), sizeof(MPI_Win), MPI_BYTE, connected_wins_, sizeof(MPI_Win),
+ Colls::allgather(&(connected_wins_[rank]), sizeof(MPI_Win), MPI_BYTE, connected_wins_, sizeof(MPI_Win),
MPI_BYTE, comm);
- mpi_coll_bcast_fun(&(bar_), sizeof(msg_bar_t), MPI_BYTE, 0, comm);
+ Colls::bcast(&(bar_), sizeof(msg_bar_t), MPI_BYTE, 0, comm);
- mpi_coll_barrier_fun(comm);
+ Colls::barrier(comm);
}
Win::~Win(){
MPI_Info_free(&info_);
}
- mpi_coll_barrier_fun(comm_);
+ Colls::barrier(comm_);
int rank=comm_->rank();
if(rank == 0)
MSG_barrier_destroy(bar_);
! setenv LD_LIBRARY_PATH=../../lib
! output sort
-p Test all to all
+p Test allgather
$ ${bindir:=.}/../../../bin/smpirun -map -hostfile ../hostfile_coll -platform ../../../examples/platforms/small_platform.xml -np 16 --log=xbt_cfg.thres:critical ${bindir:=.}/coll-allgather --log=smpi_kernel.thres:warning --log=smpi_coll.thres:error
> [rank 0] -> Tremblay
> [rank 1] -> Tremblay
src/smpi/colls/allreduce/allreduce-smp-rsag.cpp
src/smpi/colls/allreduce/allreduce-mvapich-rs.cpp
src/smpi/colls/allreduce/allreduce-mvapich-two-level.cpp
+ src/smpi/colls/alltoall/alltoall-basic-linear.cpp
src/smpi/colls/alltoall/alltoall-2dmesh.cpp
src/smpi/colls/alltoall/alltoall-3dmesh.cpp
-# src/smpi/colls/alltoall/alltoall-bruck.cpp
+ src/smpi/colls/alltoall/alltoall-bruck.cpp
src/smpi/colls/alltoall/alltoall-pair-light-barrier.cpp
src/smpi/colls/alltoall/alltoall-pair-mpi-barrier.cpp
src/smpi/colls/alltoall/alltoall-pair-one-barrier.cpp
src/smpi/colls/scatter/scatter-ompi.cpp
src/smpi/colls/scatter/scatter-mvapich-two-level.cpp
src/smpi/colls/smpi_automatic_selector.cpp
+ src/smpi/colls/smpi_default_selector.cpp
src/smpi/colls/smpi_mpich_selector.cpp
src/smpi/colls/smpi_intel_mpi_selector.cpp
src/smpi/colls/smpi_openmpi_selector.cpp