+
+int zcpy_knomial_factor = 2;
+int mv2_pipelined_zcpy_knomial_factor = -1;
+int bcast_segment_size = 8192;
+int mv2_inter_node_knomial_factor = 4;
+int mv2_intra_node_knomial_factor = 4;
+#define mv2_bcast_two_level_system_size 64
+#define mv2_bcast_short_msg 16384
+#define mv2_bcast_large_msg 512*1024
+
+#define INTRA_NODE_ROOT 0
+
+#define MPIR_Pipelined_Bcast_Zcpy_MV2 smpi_coll_tuned_bcast_mpich
+#define MPIR_Pipelined_Bcast_MV2 smpi_coll_tuned_bcast_mpich
+#define MPIR_Bcast_binomial_MV2 smpi_coll_tuned_bcast_binomial_tree
+#define MPIR_Bcast_scatter_ring_allgather_shm_MV2 smpi_coll_tuned_bcast_scatter_LR_allgather
+#define MPIR_Bcast_scatter_doubling_allgather_MV2 smpi_coll_tuned_bcast_scatter_rdb_allgather
+#define MPIR_Bcast_scatter_ring_allgather_MV2 smpi_coll_tuned_bcast_scatter_LR_allgather
+#define MPIR_Shmem_Bcast_MV2 smpi_coll_tuned_bcast_mpich
+#define MPIR_Bcast_tune_inter_node_helper_MV2 smpi_coll_tuned_bcast_mvapich2_inter_node
+#define MPIR_Bcast_inter_node_helper_MV2 smpi_coll_tuned_bcast_mvapich2_inter_node
+#define MPIR_Knomial_Bcast_intra_node_MV2 smpi_coll_tuned_bcast_mvapich2_knomial_intra_node
+#define MPIR_Bcast_intra_MV2 smpi_coll_tuned_bcast_mvapich2_intra_node
+
+static void init_mv2_bcast_tables_stampede(){
+ //Stampede,
+ if(smpi_coll_cleanup_callback==NULL)
+ smpi_coll_cleanup_callback=&smpi_coll_cleanup_mvapich2;
+ mv2_size_bcast_tuning_table=8;
+ mv2_bcast_thresholds_table = static_cast<mv2_bcast_tuning_table*>(xbt_malloc(mv2_size_bcast_tuning_table *
+ sizeof (mv2_bcast_tuning_table)));
+
+ mv2_bcast_tuning_table mv2_tmp_bcast_thresholds_table[]={
+ {
+ 16,
+ 8192, 4, 4,
+ {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+ 11,
+ {
+ {0, 8, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+ {8, 16, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {16, 1024, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+ {1024, 8192, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {8192, 16384, &MPIR_Bcast_binomial_MV2, -1},
+ {16384, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {32768, 65536, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+ {65536, 131072, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1},
+ {131072, 262144, &MPIR_Bcast_scatter_ring_allgather_MV2, -1},
+ {262144, 524288, &MPIR_Bcast_scatter_doubling_allgather_MV2, -1},
+ {524288, -1, &MPIR_Bcast_scatter_ring_allgather_MV2, -1}
+ },
+ 11,
+ {
+ {0, 8, &MPIR_Shmem_Bcast_MV2, 2},
+ {8, 16, &MPIR_Shmem_Bcast_MV2, 4},
+ {16, 1024, &MPIR_Shmem_Bcast_MV2, 2},
+ {1024, 8192, &MPIR_Shmem_Bcast_MV2, 4},
+ {8192, 16384, &MPIR_Shmem_Bcast_MV2, -1},
+ {16384, 32768, &MPIR_Shmem_Bcast_MV2, 4},
+ {32768, 65536, &MPIR_Shmem_Bcast_MV2, 2},
+ {65536, 131072, &MPIR_Shmem_Bcast_MV2, -1},
+ {131072, 262144, &MPIR_Shmem_Bcast_MV2, -1},
+ {262144, 524288, &MPIR_Shmem_Bcast_MV2, -1},
+ {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}
+ }
+ },
+ {
+ 32,
+ 8192, 4, 4,
+ {1, 1, 1, 1, 1, 1, 1, 1},
+ 8,
+ {
+ {0, 128, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+ {128, 256, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {256, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+ {32768, 65536, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {65536, 131072, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+ {131072, 262144, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
+ {262144, 524288, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+ {524288, -1, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8}
+ },
+ 8,
+ {
+ {0, 128, &MPIR_Shmem_Bcast_MV2, 2},
+ {128, 256, &MPIR_Shmem_Bcast_MV2, 4},
+ {256, 32768, &MPIR_Shmem_Bcast_MV2, 2},
+ {32768, 65536, &MPIR_Shmem_Bcast_MV2, 4},
+ {65536, 131072, &MPIR_Shmem_Bcast_MV2, 2},
+ {131072, 262144, &MPIR_Shmem_Bcast_MV2, 8},
+ {262144, 524288, &MPIR_Shmem_Bcast_MV2, 2},
+ {524288, -1, &MPIR_Shmem_Bcast_MV2, 8}
+ }
+ },
+ {
+ 64,
+ 8192, 4, 4,
+ {1, 1, 1, 1, 1, 1, 1, 1, 1},
+ 9,
+ {
+ {0, 2, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {2, 4, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
+ {4, 16, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {16, 32, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
+ {32, 128, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {128, 256, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
+ {256, 4096, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {4096, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
+ {32768, -1, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2}
+ },
+ 9,
+ {
+ {0, 2, &MPIR_Shmem_Bcast_MV2, 4},
+ {2, 4, &MPIR_Shmem_Bcast_MV2, 8},
+ {4, 16, &MPIR_Shmem_Bcast_MV2, 4},
+ {16, 32, &MPIR_Shmem_Bcast_MV2, 8},
+ {32, 128, &MPIR_Shmem_Bcast_MV2, 4},
+ {128, 256, &MPIR_Shmem_Bcast_MV2, 8},
+ {256, 4096, &MPIR_Shmem_Bcast_MV2, 4},
+ {4096, 32768, &MPIR_Shmem_Bcast_MV2, 8},
+ {32768, -1, &MPIR_Shmem_Bcast_MV2, 2}
+ }
+ },
+ {
+ 128,
+ 8192, 4, 4,
+ {1, 1, 1, 0},
+ 4,
+ {
+ {0, 8192, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
+ {8192, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {16384, 524288, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+ {524288, -1, &MPIR_Bcast_scatter_ring_allgather_MV2, -1}
+ },
+ 4,
+ {
+ {0, 8192, &MPIR_Shmem_Bcast_MV2, 8},
+ {8192, 16384, &MPIR_Shmem_Bcast_MV2, 4},
+ {16384, 524288, &MPIR_Shmem_Bcast_MV2, 2},
+ {524288, -1, NULL, -1}
+ }
+ },
+ {
+ 256,
+ 8192, 4, 4,
+ {1, 1, 1, 1, 1},
+ 5,
+ {
+ {0, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {16384, 131072, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+ {131072, 262144, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1},
+ {262144, 524288, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+ {524288, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}
+ },
+ 5,
+ {
+ {0, 16384, &MPIR_Shmem_Bcast_MV2, 4},
+ {16384, 131072, &MPIR_Shmem_Bcast_MV2, 2},
+ {131072, 262144, &MPIR_Shmem_Bcast_MV2, -1},
+ {262144, 524288, &MPIR_Shmem_Bcast_MV2, 2},
+ {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}
+ }
+ },
+ {
+ 512,
+ 8192, 4, 4,
+ {1, 1, 1, 1, 1},
+ 5,
+ {
+ {0, 4096, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
+ {4096, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {16384, 131072, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+ {131072, 262144, &MPIR_Pipelined_Bcast_MV2, -1},
+ {262144, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}
+ },
+ 5,
+ {
+ {0, 4096, &MPIR_Shmem_Bcast_MV2, 8},
+ {4096, 16384, &MPIR_Shmem_Bcast_MV2, 4},
+ {16384, 131072, &MPIR_Shmem_Bcast_MV2, 2},
+ {131072, 262144, &MPIR_Shmem_Bcast_MV2, -1},
+ {262144, -1, &MPIR_Shmem_Bcast_MV2, -1}
+ }
+ },
+ {
+ 1024,
+ 8192, 4, 4,
+ {1, 1, 1, 1, 1},
+ 5,
+ {
+ {0, 8192, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
+ {8192, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {16384, 65536, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+ {65536, 524288, &MPIR_Pipelined_Bcast_MV2, -1},
+ {524288, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}
+ },
+ 5,
+ {
+ {0, 8192, &MPIR_Shmem_Bcast_MV2, 8},
+ {8192, 16384, &MPIR_Shmem_Bcast_MV2, 4},
+ {16384, 65536, &MPIR_Shmem_Bcast_MV2, 2},
+ {65536, 524288, &MPIR_Shmem_Bcast_MV2, -1},
+ {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}
+ }
+ },
+ {
+ 2048,
+ 8192, 4, 4,
+ {1, 1, 1, 1, 1, 1, 1},
+ 7,
+ {
+ {0, 16, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
+ {16, 32, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {32, 4096, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
+ {4096, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+ {16384, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+ {32768, 524288, &MPIR_Pipelined_Bcast_MV2, -1},
+ {524288, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}
+ },
+ 7,
+ {
+ {0, 16, &MPIR_Shmem_Bcast_MV2, 8},
+ {16, 32, &MPIR_Shmem_Bcast_MV2, 4},
+ {32, 4096, &MPIR_Shmem_Bcast_MV2, 8},
+ {4096, 16384, &MPIR_Shmem_Bcast_MV2, 4},
+ {16384, 32768, &MPIR_Shmem_Bcast_MV2, 2},
+ {32768, 524288, &MPIR_Shmem_Bcast_MV2, -1},
+ {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}
+ }
+ }
+ };
+
+ memcpy(mv2_bcast_thresholds_table, mv2_tmp_bcast_thresholds_table,
+ mv2_size_bcast_tuning_table * sizeof (mv2_bcast_tuning_table));
+}
+
+
+/************ Reduce variables and initializers */