+ void *recvbuf,
+ int count,
+ MPI_Datatype datatype,
+ MPI_Op op, MPI_Comm comm)
+{
+ simgrid::smpi::Colls::reduce(sendbuf,recvbuf,count,datatype,op,0,comm);
+ return MPI_SUCCESS;
+}
+
+#define MPIR_Allreduce_pt2pt_rd_MV2 simgrid::smpi::Coll_allreduce_rdb::allreduce
+#define MPIR_Allreduce_pt2pt_rs_MV2 simgrid::smpi::Coll_allreduce_mvapich2_rs::allreduce
+#define MPIR_Allreduce_two_level_MV2 simgrid::smpi::Coll_allreduce_mvapich2_two_level::allreduce
+
+
+static void init_mv2_allreduce_tables_stampede(){
+ if(simgrid::smpi::Colls::smpi_coll_cleanup_callback==NULL)
+ simgrid::smpi::Colls::smpi_coll_cleanup_callback=&smpi_coll_cleanup_mvapich2;
+ mv2_size_allreduce_tuning_table = 8;
+ mv2_allreduce_thresholds_table = static_cast<mv2_allreduce_tuning_table*>(xbt_malloc(mv2_size_allreduce_tuning_table *
+ sizeof (mv2_allreduce_tuning_table)));
+ mv2_allreduce_tuning_table mv2_tmp_allreduce_thresholds_table[] = {
+ {
+ 16,
+ 0,
+ {1, 0},
+ 2,
+ {
+ {0, 1024, &MPIR_Allreduce_pt2pt_rd_MV2},
+ {1024, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
+ },
+ 2,
+ {
+ {0, 1024, &MPIR_Allreduce_reduce_shmem_MV2},
+ {1024, -1, &MPIR_Allreduce_reduce_p2p_MV2},
+ },
+ },
+ {
+ 32,
+ 0,
+ {1, 1, 0},
+ 3,
+ {
+ {0, 1024, &MPIR_Allreduce_pt2pt_rd_MV2},
+ {1024, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
+ {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
+ },
+ 2,
+ {
+ {0, 1024, &MPIR_Allreduce_reduce_shmem_MV2},
+ {1024, 16384, &MPIR_Allreduce_reduce_p2p_MV2},
+ },
+ },
+ {
+ 64,
+ 0,
+ {1, 1, 0},
+ 3,
+ {
+ {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
+ {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
+ {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
+ },
+ 2,
+ {
+ {0, 512, &MPIR_Allreduce_reduce_shmem_MV2},
+ {512, 16384, &MPIR_Allreduce_reduce_p2p_MV2},
+ },
+ },
+ {
+ 128,
+ 0,
+ {1, 1, 0},
+ 3,
+ {
+ {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
+ {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
+ {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
+ },
+ 2,
+ {
+ {0, 512, &MPIR_Allreduce_reduce_shmem_MV2},
+ {512, 16384, &MPIR_Allreduce_reduce_p2p_MV2},
+ },
+ },
+ {
+ 256,
+ 0,
+ {1, 1, 0},
+ 3,
+ {
+ {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
+ {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
+ {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
+ },
+ 2,
+ {
+ {0, 512, &MPIR_Allreduce_reduce_shmem_MV2},
+ {512, -1, &MPIR_Allreduce_reduce_p2p_MV2},
+ },
+ },
+ {
+ 512,
+ 0,
+ {1, 1, 0},
+ 3,
+ {
+ {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
+ {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
+ {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
+ },
+ 2,
+ {
+ {0, 512, &MPIR_Allreduce_reduce_shmem_MV2},
+ {512, 16384, &MPIR_Allreduce_reduce_p2p_MV2},
+ },
+ },
+ {
+ 1024,
+ 0,
+ {1, 1, 1, 0},
+ 4,
+ {
+ {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
+ {512, 8192, &MPIR_Allreduce_pt2pt_rd_MV2},
+ {8192, 65536, &MPIR_Allreduce_pt2pt_rs_MV2},
+ {65536, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
+ },
+ 2,
+ {
+ {0, 512, &MPIR_Allreduce_reduce_shmem_MV2},
+ {512, -1, &MPIR_Allreduce_reduce_p2p_MV2},
+ },
+ },
+ {
+ 2048,
+ 0,
+ {1, 1, 1, 0},
+ 4,
+ {
+ {0, 64, &MPIR_Allreduce_pt2pt_rd_MV2},
+ {64, 512, &MPIR_Allreduce_reduce_p2p_MV2},
+ {512, 4096, &MPIR_Allreduce_mcst_reduce_two_level_helper_MV2},
+ {4096, 16384, &MPIR_Allreduce_pt2pt_rs_MV2},
+ {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
+ },
+ 2,
+ {
+ {0, 512, &MPIR_Allreduce_reduce_shmem_MV2},
+ {512, -1, &MPIR_Allreduce_reduce_p2p_MV2},
+ },
+ },
+
+ };
+ memcpy(mv2_allreduce_thresholds_table, mv2_tmp_allreduce_thresholds_table,
+ mv2_size_allreduce_tuning_table * sizeof (mv2_allreduce_tuning_table));
+}
+
+
+
+
+typedef struct {
+ int min;
+ int max;
+ int (*MV2_pt_Bcast_function) (void *buf, int count, MPI_Datatype datatype,
+ int root, MPI_Comm comm_ptr);
+ int zcpy_pipelined_knomial_factor;
+} mv2_bcast_tuning_element;
+
+typedef struct {
+ int numproc;
+ int bcast_segment_size;
+ int intra_node_knomial_factor;
+ int inter_node_knomial_factor;
+ int is_two_level_bcast[MV2_MAX_NB_THRESHOLDS];
+ int size_inter_table;
+ mv2_bcast_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
+ int size_intra_table;
+ mv2_bcast_tuning_element intra_node[MV2_MAX_NB_THRESHOLDS];
+} mv2_bcast_tuning_table;
+
+int mv2_size_bcast_tuning_table = 0;
+mv2_bcast_tuning_table *mv2_bcast_thresholds_table = NULL;
+
+
+int (*MV2_Bcast_function) (void *buffer, int count, MPI_Datatype datatype,
+ int root, MPI_Comm comm_ptr) = NULL;
+
+int (*MV2_Bcast_intra_node_function) (void *buffer, int count, MPI_Datatype datatype,
+ int root, MPI_Comm comm_ptr) = NULL;
+
+int zcpy_knomial_factor = 2;
+int mv2_pipelined_zcpy_knomial_factor = -1;
+int bcast_segment_size = 8192;
+int mv2_inter_node_knomial_factor = 4;
+int mv2_intra_node_knomial_factor = 4;
+#define mv2_bcast_two_level_system_size 64
+#define mv2_bcast_short_msg 16384
+#define mv2_bcast_large_msg 512*1024
+
+#define INTRA_NODE_ROOT 0
+
+#define MPIR_Pipelined_Bcast_Zcpy_MV2 simgrid::smpi::Coll_bcast_mpich::bcast
+#define MPIR_Pipelined_Bcast_MV2 simgrid::smpi::Coll_bcast_mpich::bcast
+#define MPIR_Bcast_binomial_MV2 simgrid::smpi::Coll_bcast_binomial_tree::bcast
+#define MPIR_Bcast_scatter_ring_allgather_shm_MV2 simgrid::smpi::Coll_bcast_scatter_LR_allgather::bcast
+#define MPIR_Bcast_scatter_doubling_allgather_MV2 simgrid::smpi::Coll_bcast_scatter_rdb_allgather::bcast
+#define MPIR_Bcast_scatter_ring_allgather_MV2 simgrid::smpi::Coll_bcast_scatter_LR_allgather::bcast
+#define MPIR_Shmem_Bcast_MV2 simgrid::smpi::Coll_bcast_mpich::bcast
+#define MPIR_Bcast_tune_inter_node_helper_MV2 simgrid::smpi::Coll_bcast_mvapich2_inter_node::bcast
+#define MPIR_Bcast_inter_node_helper_MV2 simgrid::smpi::Coll_bcast_mvapich2_inter_node::bcast
+#define MPIR_Knomial_Bcast_intra_node_MV2 simgrid::smpi::Coll_bcast_mvapich2_knomial_intra_node::bcast
+#define MPIR_Bcast_intra_MV2 simgrid::smpi::Coll_bcast_mvapich2_intra_node::bcast
+
+static void init_mv2_bcast_tables_stampede(){
+ //Stampede,
+ if(simgrid::smpi::Colls::smpi_coll_cleanup_callback==NULL)
+ simgrid::smpi::Colls::smpi_coll_cleanup_callback=&smpi_coll_cleanup_mvapich2;
+ mv2_size_bcast_tuning_table=8;
+ mv2_bcast_thresholds_table = static_cast<mv2_bcast_tuning_table*>(xbt_malloc(mv2_size_bcast_tuning_table *
+ sizeof (mv2_bcast_tuning_table)));
+
+ mv2_bcast_tuning_table mv2_tmp_bcast_thresholds_table[]={
+ {
+ 16,
+ 8192, 4, 4,
+ {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+ 11,
+ {
+ {0, 8, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+ {8, 16, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {16, 1024, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+ {1024, 8192, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {8192, 16384, &MPIR_Bcast_binomial_MV2, -1},
+ {16384, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {32768, 65536, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+ {65536, 131072, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1},
+ {131072, 262144, &MPIR_Bcast_scatter_ring_allgather_MV2, -1},
+ {262144, 524288, &MPIR_Bcast_scatter_doubling_allgather_MV2, -1},
+ {524288, -1, &MPIR_Bcast_scatter_ring_allgather_MV2, -1}
+ },
+ 11,
+ {
+ {0, 8, &MPIR_Shmem_Bcast_MV2, 2},
+ {8, 16, &MPIR_Shmem_Bcast_MV2, 4},
+ {16, 1024, &MPIR_Shmem_Bcast_MV2, 2},
+ {1024, 8192, &MPIR_Shmem_Bcast_MV2, 4},
+ {8192, 16384, &MPIR_Shmem_Bcast_MV2, -1},
+ {16384, 32768, &MPIR_Shmem_Bcast_MV2, 4},
+ {32768, 65536, &MPIR_Shmem_Bcast_MV2, 2},
+ {65536, 131072, &MPIR_Shmem_Bcast_MV2, -1},
+ {131072, 262144, &MPIR_Shmem_Bcast_MV2, -1},
+ {262144, 524288, &MPIR_Shmem_Bcast_MV2, -1},
+ {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}
+ }
+ },
+ {
+ 32,
+ 8192, 4, 4,
+ {1, 1, 1, 1, 1, 1, 1, 1},
+ 8,
+ {
+ {0, 128, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+ {128, 256, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {256, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+ {32768, 65536, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {65536, 131072, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+ {131072, 262144, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
+ {262144, 524288, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+ {524288, -1, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8}
+ },
+ 8,
+ {
+ {0, 128, &MPIR_Shmem_Bcast_MV2, 2},
+ {128, 256, &MPIR_Shmem_Bcast_MV2, 4},
+ {256, 32768, &MPIR_Shmem_Bcast_MV2, 2},
+ {32768, 65536, &MPIR_Shmem_Bcast_MV2, 4},
+ {65536, 131072, &MPIR_Shmem_Bcast_MV2, 2},
+ {131072, 262144, &MPIR_Shmem_Bcast_MV2, 8},
+ {262144, 524288, &MPIR_Shmem_Bcast_MV2, 2},
+ {524288, -1, &MPIR_Shmem_Bcast_MV2, 8}
+ }
+ },
+ {
+ 64,
+ 8192, 4, 4,
+ {1, 1, 1, 1, 1, 1, 1, 1, 1},
+ 9,
+ {
+ {0, 2, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {2, 4, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
+ {4, 16, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {16, 32, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
+ {32, 128, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {128, 256, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
+ {256, 4096, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {4096, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
+ {32768, -1, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2}
+ },
+ 9,
+ {
+ {0, 2, &MPIR_Shmem_Bcast_MV2, 4},
+ {2, 4, &MPIR_Shmem_Bcast_MV2, 8},
+ {4, 16, &MPIR_Shmem_Bcast_MV2, 4},
+ {16, 32, &MPIR_Shmem_Bcast_MV2, 8},
+ {32, 128, &MPIR_Shmem_Bcast_MV2, 4},
+ {128, 256, &MPIR_Shmem_Bcast_MV2, 8},
+ {256, 4096, &MPIR_Shmem_Bcast_MV2, 4},
+ {4096, 32768, &MPIR_Shmem_Bcast_MV2, 8},
+ {32768, -1, &MPIR_Shmem_Bcast_MV2, 2}
+ }
+ },
+ {
+ 128,
+ 8192, 4, 4,
+ {1, 1, 1, 0},
+ 4,
+ {
+ {0, 8192, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
+ {8192, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {16384, 524288, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+ {524288, -1, &MPIR_Bcast_scatter_ring_allgather_MV2, -1}
+ },
+ 4,
+ {
+ {0, 8192, &MPIR_Shmem_Bcast_MV2, 8},
+ {8192, 16384, &MPIR_Shmem_Bcast_MV2, 4},
+ {16384, 524288, &MPIR_Shmem_Bcast_MV2, 2},
+ {524288, -1, NULL, -1}
+ }
+ },
+ {
+ 256,
+ 8192, 4, 4,
+ {1, 1, 1, 1, 1},
+ 5,
+ {
+ {0, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {16384, 131072, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+ {131072, 262144, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1},
+ {262144, 524288, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+ {524288, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}
+ },
+ 5,
+ {
+ {0, 16384, &MPIR_Shmem_Bcast_MV2, 4},
+ {16384, 131072, &MPIR_Shmem_Bcast_MV2, 2},
+ {131072, 262144, &MPIR_Shmem_Bcast_MV2, -1},
+ {262144, 524288, &MPIR_Shmem_Bcast_MV2, 2},
+ {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}
+ }
+ },
+ {
+ 512,
+ 8192, 4, 4,
+ {1, 1, 1, 1, 1},
+ 5,
+ {
+ {0, 4096, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
+ {4096, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {16384, 131072, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+ {131072, 262144, &MPIR_Pipelined_Bcast_MV2, -1},
+ {262144, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}
+ },
+ 5,
+ {
+ {0, 4096, &MPIR_Shmem_Bcast_MV2, 8},
+ {4096, 16384, &MPIR_Shmem_Bcast_MV2, 4},
+ {16384, 131072, &MPIR_Shmem_Bcast_MV2, 2},
+ {131072, 262144, &MPIR_Shmem_Bcast_MV2, -1},
+ {262144, -1, &MPIR_Shmem_Bcast_MV2, -1}
+ }
+ },
+ {
+ 1024,
+ 8192, 4, 4,
+ {1, 1, 1, 1, 1},
+ 5,
+ {
+ {0, 8192, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
+ {8192, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {16384, 65536, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+ {65536, 524288, &MPIR_Pipelined_Bcast_MV2, -1},
+ {524288, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}
+ },
+ 5,
+ {
+ {0, 8192, &MPIR_Shmem_Bcast_MV2, 8},
+ {8192, 16384, &MPIR_Shmem_Bcast_MV2, 4},
+ {16384, 65536, &MPIR_Shmem_Bcast_MV2, 2},
+ {65536, 524288, &MPIR_Shmem_Bcast_MV2, -1},
+ {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}
+ }
+ },
+ {
+ 2048,
+ 8192, 4, 4,
+ {1, 1, 1, 1, 1, 1, 1},
+ 7,
+ {
+ {0, 16, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
+ {16, 32, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {32, 4096, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
+ {4096, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {16384, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+ {32768, 524288, &MPIR_Pipelined_Bcast_MV2, -1},
+ {524288, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}
+ },
+ 7,
+ {
+ {0, 16, &MPIR_Shmem_Bcast_MV2, 8},
+ {16, 32, &MPIR_Shmem_Bcast_MV2, 4},
+ {32, 4096, &MPIR_Shmem_Bcast_MV2, 8},
+ {4096, 16384, &MPIR_Shmem_Bcast_MV2, 4},
+ {16384, 32768, &MPIR_Shmem_Bcast_MV2, 2},
+ {32768, 524288, &MPIR_Shmem_Bcast_MV2, -1},
+ {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}
+ }
+ }
+ };
+
+ memcpy(mv2_bcast_thresholds_table, mv2_tmp_bcast_thresholds_table,
+ mv2_size_bcast_tuning_table * sizeof (mv2_bcast_tuning_table));
+}
+
+
+/************ Reduce variables and initializers */
+
+typedef struct {
+ int min;
+ int max;
+ int (*MV2_pt_Reduce_function)(void *sendbuf,
+ void *recvbuf,
+ int count,
+ MPI_Datatype datatype,
+ MPI_Op op,
+ int root,
+ MPI_Comm comm_ptr);
+} mv2_reduce_tuning_element;
+
+typedef struct {
+ int numproc;
+ int inter_k_degree;
+ int intra_k_degree;
+ int is_two_level_reduce[MV2_MAX_NB_THRESHOLDS];
+ int size_inter_table;
+ mv2_reduce_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
+ int size_intra_table;
+ mv2_reduce_tuning_element intra_node[MV2_MAX_NB_THRESHOLDS];
+} mv2_reduce_tuning_table;
+
+int mv2_size_reduce_tuning_table = 0;
+mv2_reduce_tuning_table *mv2_reduce_thresholds_table = NULL;
+
+
+int mv2_reduce_intra_knomial_factor = -1;
+int mv2_reduce_inter_knomial_factor = -1;
+
+int (*MV2_Reduce_function)( void *sendbuf,
+ void *recvbuf,
+ int count,
+ MPI_Datatype datatype,
+ MPI_Op op,
+ int root,
+ MPI_Comm comm_ptr)=NULL;
+
+int (*MV2_Reduce_intra_function)( void *sendbuf,
+ void *recvbuf,
+ int count,
+ MPI_Datatype datatype,
+ MPI_Op op,
+ int root,
+ MPI_Comm comm_ptr)=NULL;
+
+
+#define MPIR_Reduce_inter_knomial_wrapper_MV2 simgrid::smpi::Coll_reduce_mvapich2_knomial::reduce
+#define MPIR_Reduce_intra_knomial_wrapper_MV2 simgrid::smpi::Coll_reduce_mvapich2_knomial::reduce
+#define MPIR_Reduce_binomial_MV2 simgrid::smpi::Coll_reduce_binomial::reduce
+#define MPIR_Reduce_redscat_gather_MV2 simgrid::smpi::Coll_reduce_scatter_gather::reduce
+#define MPIR_Reduce_shmem_MV2 simgrid::smpi::Coll_reduce_ompi_basic_linear::reduce
+#define MPIR_Reduce_two_level_helper_MV2 simgrid::smpi::Coll_reduce_mvapich2_two_level::reduce
+
+
+static void init_mv2_reduce_tables_stampede(){
+ if(simgrid::smpi::Colls::smpi_coll_cleanup_callback==NULL)
+ simgrid::smpi::Colls::smpi_coll_cleanup_callback=&smpi_coll_cleanup_mvapich2;
+ /*Stampede*/
+ mv2_size_reduce_tuning_table = 8;
+ mv2_reduce_thresholds_table = static_cast<mv2_reduce_tuning_table*>(xbt_malloc(mv2_size_reduce_tuning_table *
+ sizeof (mv2_reduce_tuning_table)));
+ mv2_reduce_tuning_table mv2_tmp_reduce_thresholds_table[] = {
+ {
+ 16,
+ 4,
+ 4,
+ {1, 0, 0},
+ 3,
+ {
+ {0, 262144, &MPIR_Reduce_inter_knomial_wrapper_MV2},
+ {262144, 1048576, &MPIR_Reduce_binomial_MV2},
+ {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
+ },
+ 2,
+ {
+ {0, 65536, &MPIR_Reduce_shmem_MV2},
+ {65536,-1, &MPIR_Reduce_binomial_MV2},
+ },
+ },
+ {
+ 32,
+ 4,
+ 4,
+ {1, 1, 1, 1, 0, 0, 0},
+ 7,
+ {
+ {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
+ {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
+ {16384, 32768, &MPIR_Reduce_inter_knomial_wrapper_MV2},
+ {32768, 65536, &MPIR_Reduce_binomial_MV2},
+ {65536, 262144, &MPIR_Reduce_inter_knomial_wrapper_MV2},
+ {262144, 1048576, &MPIR_Reduce_binomial_MV2},
+ {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
+ },
+ 6,
+ {
+ {0, 8192, &MPIR_Reduce_shmem_MV2},
+ {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
+ {16384, 32768, &MPIR_Reduce_shmem_MV2},
+ {32768, 65536, &MPIR_Reduce_shmem_MV2},
+ {65536, 262144, &MPIR_Reduce_shmem_MV2},
+ {262144,-1, &MPIR_Reduce_binomial_MV2},
+ },
+ },
+ {
+ 64,
+ 4,
+ 4,
+ {1, 1, 1, 1, 0},
+ 5,
+ {
+ {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
+ {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
+ {16384, 65536, &MPIR_Reduce_binomial_MV2},
+ {65536, 262144, &MPIR_Reduce_inter_knomial_wrapper_MV2},
+ {262144, -1, &MPIR_Reduce_redscat_gather_MV2},
+ },
+ 5,
+ {
+ {0, 8192, &MPIR_Reduce_shmem_MV2},
+ {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
+ {16384, 65536, &MPIR_Reduce_shmem_MV2},
+ {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
+ {262144, -1, &MPIR_Reduce_binomial_MV2},
+ },
+ },
+ {
+ 128,
+ 4,
+ 4,
+ {1, 0, 1, 0, 1, 0},
+ 6,
+ {
+ {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
+ {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
+ {16384, 65536, &MPIR_Reduce_binomial_MV2},
+ {65536, 262144, &MPIR_Reduce_inter_knomial_wrapper_MV2},
+ {262144, 1048576, &MPIR_Reduce_binomial_MV2},
+ {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
+ },
+ 5,
+ {
+ {0, 8192, &MPIR_Reduce_shmem_MV2},
+ {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
+ {16384, 65536, &MPIR_Reduce_shmem_MV2},
+ {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
+ {262144, -1, &MPIR_Reduce_binomial_MV2},
+ },
+ },
+ {
+ 256,
+ 4,
+ 4,
+ {1, 1, 1, 0, 1, 1, 0},
+ 7,
+ {
+ {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
+ {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
+ {16384, 32768, &MPIR_Reduce_binomial_MV2},
+ {32768, 65536, &MPIR_Reduce_binomial_MV2},
+ {65536, 262144, &MPIR_Reduce_binomial_MV2},
+ {262144, 1048576, &MPIR_Reduce_binomial_MV2},
+ {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
+ },
+ 6,
+ {
+ {0, 8192, &MPIR_Reduce_shmem_MV2},
+ {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
+ {16384, 32768, &MPIR_Reduce_shmem_MV2},
+ {32768, 65536, &MPIR_Reduce_shmem_MV2},
+ {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
+ {262144, -1, &MPIR_Reduce_binomial_MV2},
+ },
+ },
+ {
+ 512,
+ 4,
+ 4,
+ {1, 0, 1, 1, 1, 0},
+ 6,
+ {
+ {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
+ {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
+ {16384, 65536, &MPIR_Reduce_binomial_MV2},
+ {65536, 262144, &MPIR_Reduce_binomial_MV2},
+ {262144, 1048576, &MPIR_Reduce_binomial_MV2},
+ {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
+ },
+ 5,
+ {
+ {0, 8192, &MPIR_Reduce_shmem_MV2},
+ {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
+ {16384, 65536, &MPIR_Reduce_shmem_MV2},
+ {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
+ {262144, -1, &MPIR_Reduce_binomial_MV2},
+ },
+ },
+ {
+ 1024,
+ 4,
+ 4,
+ {1, 0, 1, 1, 1},
+ 5,
+ {
+ {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
+ {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
+ {16384, 65536, &MPIR_Reduce_binomial_MV2},
+ {65536, 262144, &MPIR_Reduce_binomial_MV2},
+ {262144, -1, &MPIR_Reduce_binomial_MV2},
+ },
+ 5,
+ {
+ {0, 8192, &MPIR_Reduce_shmem_MV2},
+ {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
+ {16384, 65536, &MPIR_Reduce_shmem_MV2},
+ {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
+ {262144, -1, &MPIR_Reduce_binomial_MV2},
+ },
+ },
+ {
+ 2048,
+ 4,
+ 4,
+ {1, 0, 1, 1, 1,1},
+ 6,
+ {
+ {0, 2048, &MPIR_Reduce_inter_knomial_wrapper_MV2},
+ {2048, 4096, &MPIR_Reduce_inter_knomial_wrapper_MV2},
+ {4096, 16384, &MPIR_Reduce_binomial_MV2},
+ {16384, 65536, &MPIR_Reduce_binomial_MV2},
+ {65536, 131072, &MPIR_Reduce_binomial_MV2},
+ {131072, -1, &MPIR_Reduce_binomial_MV2},
+ },
+ 6,
+ {
+ {0, 2048, &MPIR_Reduce_shmem_MV2},
+ {2048, 4096, &MPIR_Reduce_shmem_MV2},
+ {4096, 16384, &MPIR_Reduce_shmem_MV2},
+ {16384, 65536, &MPIR_Reduce_intra_knomial_wrapper_MV2},
+ {65536, 131072, &MPIR_Reduce_binomial_MV2},
+ {131072, -1, &MPIR_Reduce_shmem_MV2},
+ },
+ },
+
+ };
+ memcpy(mv2_reduce_thresholds_table, mv2_tmp_reduce_thresholds_table,
+ mv2_size_reduce_tuning_table * sizeof (mv2_reduce_tuning_table));
+}
+
+/************ Reduce scatter variables and initializers */
+
+typedef struct {
+ int min;
+ int max;
+ int (*MV2_pt_Red_scat_function)(void *sendbuf,
+ void *recvbuf,
+ int *recvcnts,
+ MPI_Datatype datatype,
+ MPI_Op op,
+ MPI_Comm comm_ptr);
+} mv2_red_scat_tuning_element;
+
+typedef struct {
+ int numproc;
+ int size_inter_table;
+ mv2_red_scat_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
+} mv2_red_scat_tuning_table;
+
+int mv2_size_red_scat_tuning_table = 0;
+mv2_red_scat_tuning_table *mv2_red_scat_thresholds_table = NULL;
+
+
+int (*MV2_Red_scat_function)(void *sendbuf,
+ void *recvbuf,
+ int *recvcnts,
+ MPI_Datatype datatype,
+ MPI_Op op,
+ MPI_Comm comm_ptr);
+
+
+
+static int MPIR_Reduce_Scatter_Basic_MV2(void *sendbuf,
+ void *recvbuf,
+ int *recvcnts,
+ MPI_Datatype datatype,
+ MPI_Op op,
+ MPI_Comm comm)