/* selector for collective algorithms based on mvapich decision logic */
-/* Copyright (c) 2009-2010, 2013-2014. The SimGrid Team.
+/* Copyright (c) 2009-2010, 2013-2017. The SimGrid Team.
* All rights reserved. */
/* This program is free software; you can redistribute it and/or modify it
#include "smpi_mvapich2_selector_stampede.h"
+namespace simgrid{
+namespace smpi{
-int smpi_coll_tuned_alltoall_mvapich2( void *sendbuf, int sendcount,
+int Coll_alltoall_mvapich2::alltoall( void *sendbuf, int sendcount,
MPI_Datatype sendtype,
void* recvbuf, int recvcount,
MPI_Datatype recvtype,
return (mpi_errno);
}
-int smpi_coll_tuned_allgather_mvapich2(void *sendbuf, int sendcount, MPI_Datatype sendtype,
+int Coll_allgather_mvapich2::allgather(void *sendbuf, int sendcount, MPI_Datatype sendtype,
void *recvbuf, int recvcount, MPI_Datatype recvtype,
MPI_Comm comm)
{
if(mv2_allgather_table_ppn_conf==NULL)
init_mv2_allgather_tables_stampede();
-
+
if(comm->get_leaders_comm()==MPI_COMM_NULL){
comm->init_smp();
}
if (partial_sub_ok != 1) {
conf_index = 0;
}
-
+
/* Search for the corresponding system size inside the tuning table */
while ((range < (mv2_size_allgather_tuning_table[conf_index] - 1)) &&
(comm_size >
}
/* Set inter-leader pt */
- MV2_Allgather_function =
+ MV2_Allgatherction =
mv2_allgather_thresholds_table[conf_index][range].inter_leader[range_threshold].
- MV2_pt_Allgather_function;
+ MV2_pt_Allgatherction;
is_two_level = mv2_allgather_thresholds_table[conf_index][range].two_level[range_threshold];
recvbuf, recvcount, recvtype,
comm);
}else{
- mpi_errno = smpi_coll_tuned_allgather_mpich(sendbuf, sendcount, sendtype,
+ mpi_errno = Coll_allgather_mpich::allgather(sendbuf, sendcount, sendtype,
recvbuf, recvcount, recvtype,
comm);
}
recvbuf, recvcount, recvtype,
comm);
}
- } else if(MV2_Allgather_function == &MPIR_Allgather_Bruck_MV2
- || MV2_Allgather_function == &MPIR_Allgather_RD_MV2
- || MV2_Allgather_function == &MPIR_Allgather_Ring_MV2) {
- mpi_errno = MV2_Allgather_function(sendbuf, sendcount, sendtype,
+ } else if(MV2_Allgatherction == &MPIR_Allgather_Bruck_MV2
+ || MV2_Allgatherction == &MPIR_Allgather_RD_MV2
+ || MV2_Allgatherction == &MPIR_Allgather_Ring_MV2) {
+ mpi_errno = MV2_Allgatherction(sendbuf, sendcount, sendtype,
recvbuf, recvcount, recvtype,
comm);
}else{
return mpi_errno;
}
-
-int smpi_coll_tuned_gather_mvapich2(void *sendbuf,
+int Coll_gather_mvapich2::gather(void *sendbuf,
int sendcnt,
MPI_Datatype sendtype,
void *recvbuf,
-1)) {
range_intra_threshold++;
}
-
+
if (comm->is_blocked() ) {
- // Set intra-node function pt for gather_two_level
- MV2_Gather_intra_node_function =
+ // Set intra-node function pt for gather_two_level
+ MV2_Gather_intra_node_function =
mv2_gather_thresholds_table[range].intra_node[range_intra_threshold].
MV2_pt_Gather_function;
- //Set inter-leader pt
+ //Set inter-leader pt
MV2_Gather_inter_leader_function =
mv2_gather_thresholds_table[range].inter_leader[range_threshold].
MV2_pt_Gather_function;
- // We call Gather function
+ // We call Gather function
mpi_errno =
MV2_Gather_inter_leader_function(sendbuf, sendcnt, sendtype, recvbuf, recvcnt,
recvtype, root, comm);
} else {
// Indeed, direct (non SMP-aware)gather is MPICH one
- mpi_errno = smpi_coll_tuned_gather_mpich(sendbuf, sendcnt, sendtype,
+ mpi_errno = Coll_gather_mpich::gather(sendbuf, sendcnt, sendtype,
recvbuf, recvcnt, recvtype,
root, comm);
}
return mpi_errno;
}
-
-int smpi_coll_tuned_allgatherv_mvapich2(void *sendbuf, int sendcount, MPI_Datatype sendtype,
+int Coll_allgatherv_mvapich2::allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype,
void *recvbuf, int *recvcounts, int *displs,
MPI_Datatype recvtype, MPI_Comm comm )
{
if (MV2_Allgatherv_function == &MPIR_Allgatherv_Rec_Doubling_MV2)
{
- if(!(comm_size & (comm_size - 1)))
- {
- mpi_errno =
- MPIR_Allgatherv_Rec_Doubling_MV2(sendbuf, sendcount,
- sendtype, recvbuf,
- recvcounts, displs,
- recvtype, comm);
+ if (not(comm_size & (comm_size - 1))) {
+ mpi_errno =
+ MPIR_Allgatherv_Rec_Doubling_MV2(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm);
} else {
mpi_errno =
MPIR_Allgatherv_Bruck_MV2(sendbuf, sendcount,
-int smpi_coll_tuned_allreduce_mvapich2(void *sendbuf,
+int Coll_allreduce_mvapich2::allreduce(void *sendbuf,
void *recvbuf,
int count,
MPI_Datatype datatype,
if(mv2_allreduce_thresholds_table[range].mcast_enabled != 1){
while ((range_threshold < (mv2_allreduce_thresholds_table[range].size_inter_table - 1))
&& ((mv2_allreduce_thresholds_table[range].
- inter_leader[range_threshold].MV2_pt_Allreduce_function
+ inter_leader[range_threshold].MV2_pt_Allreducection
== &MPIR_Allreduce_mcst_reduce_redscat_gather_MV2) ||
(mv2_allreduce_thresholds_table[range].
- inter_leader[range_threshold].MV2_pt_Allreduce_function
+ inter_leader[range_threshold].MV2_pt_Allreducection
== &MPIR_Allreduce_mcst_reduce_two_level_helper_MV2)
)) {
range_threshold++;
range_threshold_intra++;
}
- MV2_Allreduce_function = mv2_allreduce_thresholds_table[range].inter_leader[range_threshold]
- .MV2_pt_Allreduce_function;
+ MV2_Allreducection = mv2_allreduce_thresholds_table[range].inter_leader[range_threshold]
+ .MV2_pt_Allreducection;
MV2_Allreduce_intra_function = mv2_allreduce_thresholds_table[range].intra_node[range_threshold_intra]
- .MV2_pt_Allreduce_function;
+ .MV2_pt_Allreducection;
/* check if mcast is ready, otherwise replace mcast with other algorithm */
- if((MV2_Allreduce_function == &MPIR_Allreduce_mcst_reduce_redscat_gather_MV2)||
- (MV2_Allreduce_function == &MPIR_Allreduce_mcst_reduce_two_level_helper_MV2)){
+ if((MV2_Allreducection == &MPIR_Allreduce_mcst_reduce_redscat_gather_MV2)||
+ (MV2_Allreducection == &MPIR_Allreduce_mcst_reduce_two_level_helper_MV2)){
{
- MV2_Allreduce_function = &MPIR_Allreduce_pt2pt_rd_MV2;
+ MV2_Allreducection = &MPIR_Allreduce_pt2pt_rd_MV2;
}
if(is_two_level != 1) {
- MV2_Allreduce_function = &MPIR_Allreduce_pt2pt_rd_MV2;
+ MV2_Allreducection = &MPIR_Allreduce_pt2pt_rd_MV2;
}
}
datatype, op, comm);
}
} else {
- mpi_errno = MV2_Allreduce_function(sendbuf, recvbuf, count,
+ mpi_errno = MV2_Allreducection(sendbuf, recvbuf, count,
datatype, op, comm);
}
}
}
-int smpi_coll_tuned_alltoallv_mvapich2(void *sbuf, int *scounts, int *sdisps,
+int Coll_alltoallv_mvapich2::alltoallv(void *sbuf, int *scounts, int *sdisps,
MPI_Datatype sdtype,
void *rbuf, int *rcounts, int *rdisps,
MPI_Datatype rdtype,
{
if (sbuf == MPI_IN_PLACE) {
- return smpi_coll_tuned_alltoallv_ompi_basic_linear(sbuf, scounts, sdisps, sdtype,
+ return Coll_alltoallv_ompi_basic_linear::alltoallv(sbuf, scounts, sdisps, sdtype,
rbuf, rcounts, rdisps,rdtype,
comm);
} else /* For starters, just keep the original algorithm. */
- return smpi_coll_tuned_alltoallv_ring(sbuf, scounts, sdisps, sdtype,
+ return Coll_alltoallv_ring::alltoallv(sbuf, scounts, sdisps, sdtype,
rbuf, rcounts, rdisps,rdtype,
comm);
}
-int smpi_coll_tuned_barrier_mvapich2(MPI_Comm comm)
-{
- return smpi_coll_tuned_barrier_mvapich2_pair(comm);
+int Coll_barrier_mvapich2::barrier(MPI_Comm comm)
+{
+ return Coll_barrier_mvapich2_pair::barrier(comm);
}
-int smpi_coll_tuned_bcast_mvapich2(void *buffer,
+int Coll_bcast_mvapich2::bcast(void *buffer,
int count,
MPI_Datatype datatype,
int root, MPI_Comm comm)
int mpi_errno = MPI_SUCCESS;
int comm_size/*, rank*/;
int two_level_bcast = 1;
- long nbytes = 0;
+ long nbytes = 0;
int range = 0;
int range_threshold = 0;
int range_threshold_intra = 0;
if(comm->get_leaders_comm()==MPI_COMM_NULL){
comm->init_smp();
}
- if(!mv2_bcast_thresholds_table)
+ if (not mv2_bcast_thresholds_table)
init_mv2_bcast_tables_stampede();
comm_size = comm->size();
//rank = comm->rank();
if (mv2_bcast_thresholds_table[range].inter_leader[range_threshold].
zcpy_pipelined_knomial_factor != -1) {
- zcpy_knomial_factor =
+ zcpy_knomial_factor =
mv2_bcast_thresholds_table[range].inter_leader[range_threshold].
zcpy_pipelined_knomial_factor;
}
/* Set value of pipeline segment size */
bcast_segment_size = mv2_bcast_thresholds_table[range].bcast_segment_size;
-
+
/* Set value of inter node knomial factor */
mv2_inter_node_knomial_factor = mv2_bcast_thresholds_table[range].inter_node_knomial_factor;
/* Check if we will use a two level algorithm or not */
two_level_bcast =
#if defined(_MCST_SUPPORT_)
- mv2_bcast_thresholds_table[range].is_two_level_bcast[range_threshold]
+ mv2_bcast_thresholds_table[range].is_two_level_bcast[range_threshold]
|| comm->ch.is_mcast_ok;
#else
mv2_bcast_thresholds_table[range].is_two_level_bcast[range_threshold];
#endif
if (two_level_bcast == 1) {
- if (!is_contig || !is_homogeneous) {
- tmp_buf=(void *)smpi_get_tmp_sendbuffer(nbytes);
-
-/* position = 0;*/
-/* if (rank == root) {*/
-/* mpi_errno =*/
-/* MPIR_Pack_impl(buffer, count, datatype, tmp_buf, nbytes, &position);*/
-/* if (mpi_errno)*/
-/* MPIU_ERR_POP(mpi_errno);*/
-/* }*/
+ if (not is_contig || not is_homogeneous) {
+ tmp_buf = (void*)smpi_get_tmp_sendbuffer(nbytes);
+
+ /* position = 0;*/
+ /* if (rank == root) {*/
+ /* mpi_errno =*/
+ /* MPIR_Pack_impl(buffer, count, datatype, tmp_buf, nbytes, &position);*/
+ /* if (mpi_errno)*/
+ /* MPIU_ERR_POP(mpi_errno);*/
+ /* }*/
}
#ifdef CHANNEL_MRAIL_GEN2
if ((mv2_enable_zcpy_bcast == 1) &&
- (&MPIR_Pipelined_Bcast_Zcpy_MV2 == MV2_Bcast_function)) {
- if (!is_contig || !is_homogeneous) {
- mpi_errno = MPIR_Pipelined_Bcast_Zcpy_MV2(tmp_buf, nbytes, MPI_BYTE,
- root, comm);
- } else {
+ (&MPIR_Pipelined_Bcast_Zcpy_MV2 == MV2_Bcast_function)) {
+ if (not is_contig || not is_homogeneous) {
+ mpi_errno = MPIR_Pipelined_Bcast_Zcpy_MV2(tmp_buf, nbytes, MPI_BYTE, root, comm);
+ } else {
mpi_errno = MPIR_Pipelined_Bcast_Zcpy_MV2(buffer, count, datatype,
root, comm);
- }
- } else
+ }
+ } else
#endif /* defined(CHANNEL_MRAIL_GEN2) */
- {
+ {
shmem_comm = comm->get_intra_comm();
- if (!is_contig || !is_homogeneous) {
- mpi_errno =
- MPIR_Bcast_tune_inter_node_helper_MV2(tmp_buf, nbytes, MPI_BYTE,
- root, comm);
+ if (not is_contig || not is_homogeneous) {
+ mpi_errno = MPIR_Bcast_tune_inter_node_helper_MV2(tmp_buf, nbytes, MPI_BYTE, root, comm);
} else {
mpi_errno =
MPIR_Bcast_tune_inter_node_helper_MV2(buffer, count, datatype, root,
root = INTRA_NODE_ROOT;
-
- if (!is_contig || !is_homogeneous) {
- mpi_errno = MV2_Bcast_intra_node_function(tmp_buf, nbytes,
- MPI_BYTE, root, shmem_comm);
+ if (not is_contig || not is_homogeneous) {
+ mpi_errno = MV2_Bcast_intra_node_function(tmp_buf, nbytes, MPI_BYTE, root, shmem_comm);
} else {
mpi_errno = MV2_Bcast_intra_node_function(buffer, count,
datatype, root, shmem_comm);
}
- }
-/* if (!is_contig || !is_homogeneous) {*/
-/* if (rank != root) {*/
-/* position = 0;*/
-/* mpi_errno = MPIR_Unpack_impl(tmp_buf, nbytes, &position, buffer,*/
-/* count, datatype);*/
-/* }*/
-/* }*/
+ }
+ /* if (not is_contig || not is_homogeneous) {*/
+ /* if (rank != root) {*/
+ /* position = 0;*/
+ /* mpi_errno = MPIR_Unpack_impl(tmp_buf, nbytes, &position, buffer,*/
+ /* count, datatype);*/
+ /* }*/
+ /* }*/
} else {
/* We use Knomial for intra node */
MV2_Bcast_intra_node_function = &MPIR_Knomial_Bcast_intra_node_MV2;
-int smpi_coll_tuned_reduce_mvapich2( void *sendbuf,
+int Coll_reduce_mvapich2::reduce( void *sendbuf,
void *recvbuf,
int count,
MPI_Datatype datatype,
if(comm->get_leaders_comm()==MPI_COMM_NULL){
comm->init_smp();
}
- mpi_errno = MPIR_Reduce_two_level_helper_MV2(sendbuf, recvbuf, count,
+ mpi_errno = MPIR_Reduce_two_level_helper_MV2(sendbuf, recvbuf, count,
datatype, op, root, comm);
} else {
mpi_errno = MPIR_Reduce_binomial_MV2(sendbuf, recvbuf, count,
} else if(MV2_Reduce_function == &MPIR_Reduce_inter_knomial_wrapper_MV2 ){
if(is_commutative ==1)
{
- mpi_errno = MV2_Reduce_function(sendbuf, recvbuf, count,
+ mpi_errno = MV2_Reduce_function(sendbuf, recvbuf, count,
datatype, op, root, comm);
} else {
mpi_errno = MPIR_Reduce_binomial_MV2(sendbuf, recvbuf, count,
} else if(MV2_Reduce_function == &MPIR_Reduce_redscat_gather_MV2){
if (/*(HANDLE_GET_KIND(op) == HANDLE_KIND_BUILTIN) &&*/ (count >= pof2))
{
- mpi_errno = MV2_Reduce_function(sendbuf, recvbuf, count,
+ mpi_errno = MV2_Reduce_function(sendbuf, recvbuf, count,
datatype, op, root, comm);
} else {
mpi_errno = MPIR_Reduce_binomial_MV2(sendbuf, recvbuf, count,
datatype, op, root, comm);
}
} else {
- mpi_errno = MV2_Reduce_function(sendbuf, recvbuf, count,
+ mpi_errno = MV2_Reduce_function(sendbuf, recvbuf, count,
datatype, op, root, comm);
}
}
-int smpi_coll_tuned_reduce_scatter_mvapich2(void *sendbuf, void *recvbuf, int *recvcnts,
+int Coll_reduce_scatter_mvapich2::reduce_scatter(void *sendbuf, void *recvbuf, int *recvcnts,
MPI_Datatype datatype, MPI_Op op,
MPI_Comm comm)
{
recvcnts, datatype,
op, comm);
}
- mpi_errno = smpi_coll_tuned_reduce_scatter_mpich_rdb(sendbuf, recvbuf,
+ mpi_errno = Coll_reduce_scatter_mpich_rdb::reduce_scatter(sendbuf, recvbuf,
recvcnts, datatype,
op, comm);
}
-int smpi_coll_tuned_scatter_mvapich2(void *sendbuf,
+int Coll_scatter_mvapich2::scatter(void *sendbuf,
int sendcnt,
MPI_Datatype sendtype,
void *recvbuf,
if(comm->get_leaders_comm()==MPI_COMM_NULL){
comm->init_smp();
}
-
+
comm_size = comm->size();
rank = comm->rank();
recvtype_size=recvtype->size();
nbytes = recvcnt * recvtype_size;
}
-
- // check if safe to use partial subscription mode
+
+ // check if safe to use partial subscription mode
if (comm->is_uniform()) {
shmem_comm = comm->get_intra_comm();
local_size = shmem_comm->size();
i = 0;
if (mv2_scatter_table_ppn_conf[0] == -1) {
- // Indicating user defined tuning
+ // Indicating user defined tuning
conf_index = 0;
}else{
do {
} while(i < mv2_scatter_num_ppn_conf);
}
}
-
+
if (partial_sub_ok != 1) {
conf_index = 0;
}
/* Fallback! */
MV2_Scatter_function = &MPIR_Scatter_MV2_Binomial;
}
- }
+ }
}
if( (MV2_Scatter_function == &MPIR_Scatter_MV2_two_level_Direct) ||
return (mpi_errno);
}
-void smpi_coll_cleanup_mvapich2(void){
-int i=0;
-if(mv2_alltoall_thresholds_table)
- xbt_free(mv2_alltoall_thresholds_table[i]);
-xbt_free(mv2_alltoall_thresholds_table);
-xbt_free(mv2_size_alltoall_tuning_table);
-xbt_free(mv2_alltoall_table_ppn_conf);
-
-xbt_free(mv2_gather_thresholds_table);
-if(mv2_allgather_thresholds_table)
- xbt_free(mv2_allgather_thresholds_table[0]);
-xbt_free(mv2_size_allgather_tuning_table);
-xbt_free(mv2_allgather_table_ppn_conf);
-xbt_free(mv2_allgather_thresholds_table);
-
-xbt_free(mv2_allgatherv_thresholds_table);
-xbt_free(mv2_reduce_thresholds_table);
-xbt_free(mv2_red_scat_thresholds_table);
-xbt_free(mv2_allreduce_thresholds_table);
-xbt_free(mv2_bcast_thresholds_table);
-if(mv2_scatter_thresholds_table)
- xbt_free(mv2_scatter_thresholds_table[0]);
-xbt_free(mv2_scatter_thresholds_table);
-xbt_free(mv2_size_scatter_tuning_table);
-xbt_free(mv2_scatter_table_ppn_conf);
+}
+}
+
+void smpi_coll_cleanup_mvapich2()
+{
+ int i = 0;
+ if (mv2_alltoall_thresholds_table)
+ xbt_free(mv2_alltoall_thresholds_table[i]);
+ xbt_free(mv2_alltoall_thresholds_table);
+ xbt_free(mv2_size_alltoall_tuning_table);
+ xbt_free(mv2_alltoall_table_ppn_conf);
+
+ xbt_free(mv2_gather_thresholds_table);
+ if (mv2_allgather_thresholds_table)
+ xbt_free(mv2_allgather_thresholds_table[0]);
+ xbt_free(mv2_size_allgather_tuning_table);
+ xbt_free(mv2_allgather_table_ppn_conf);
+ xbt_free(mv2_allgather_thresholds_table);
+
+ xbt_free(mv2_allgatherv_thresholds_table);
+ xbt_free(mv2_reduce_thresholds_table);
+ xbt_free(mv2_red_scat_thresholds_table);
+ xbt_free(mv2_allreduce_thresholds_table);
+ xbt_free(mv2_bcast_thresholds_table);
+ if (mv2_scatter_thresholds_table)
+ xbt_free(mv2_scatter_thresholds_table[0]);
+ xbt_free(mv2_scatter_thresholds_table);
+ xbt_free(mv2_size_scatter_tuning_table);
+ xbt_free(mv2_scatter_table_ppn_conf);
}