X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/a141412c1f2459ae5275d6b03e03c654fab69dbf..d53d00d608a60a6f05e77ea7b7cd5c4e544d7ab1:/src/smpi/colls/smpi_mvapich2_selector_stampede.h diff --git a/src/smpi/colls/smpi_mvapich2_selector_stampede.h b/src/smpi/colls/smpi_mvapich2_selector_stampede.h index bf44fb7515..7245090080 100644 --- a/src/smpi/colls/smpi_mvapich2_selector_stampede.h +++ b/src/smpi/colls/smpi_mvapich2_selector_stampede.h @@ -49,13 +49,15 @@ static void init_mv2_alltoall_tables_stampede(){ int agg_table_sum = 0; mv2_alltoall_tuning_table **table_ptrs = NULL; mv2_alltoall_num_ppn_conf = 3; - mv2_alltoall_thresholds_table = xbt_malloc(sizeof(mv2_alltoall_tuning_table *) - * mv2_alltoall_num_ppn_conf); - table_ptrs = xbt_malloc(sizeof(mv2_alltoall_tuning_table *) - * mv2_alltoall_num_ppn_conf); - mv2_size_alltoall_tuning_table = xbt_malloc(sizeof(int) * - mv2_alltoall_num_ppn_conf); - mv2_alltoall_table_ppn_conf = xbt_malloc(mv2_alltoall_num_ppn_conf * sizeof(int)); + if(smpi_coll_cleanup_callback==NULL) + smpi_coll_cleanup_callback=&smpi_coll_cleanup_mvapich2; + mv2_alltoall_thresholds_table = static_cast(xbt_malloc(sizeof(mv2_alltoall_tuning_table *) + * mv2_alltoall_num_ppn_conf)); + table_ptrs = static_cast(xbt_malloc(sizeof(mv2_alltoall_tuning_table *) + * mv2_alltoall_num_ppn_conf)); + mv2_size_alltoall_tuning_table = static_cast(xbt_malloc(sizeof(int) * + mv2_alltoall_num_ppn_conf)); + mv2_alltoall_table_ppn_conf = static_cast(xbt_malloc(mv2_alltoall_num_ppn_conf * sizeof(int))); mv2_alltoall_table_ppn_conf[0] = 1; mv2_size_alltoall_tuning_table[0] = 6; mv2_alltoall_tuning_table mv2_tmp_alltoall_thresholds_table_1ppn[] = { @@ -270,7 +272,7 @@ static void init_mv2_alltoall_tables_stampede(){ agg_table_sum += mv2_size_alltoall_tuning_table[i]; } mv2_alltoall_thresholds_table[0] = - xbt_malloc(agg_table_sum * sizeof (mv2_alltoall_tuning_table)); + static_cast(xbt_malloc(agg_table_sum * sizeof (mv2_alltoall_tuning_table))); memcpy(mv2_alltoall_thresholds_table[0], table_ptrs[0], (sizeof(mv2_alltoall_tuning_table) * mv2_size_alltoall_tuning_table[0])); @@ -338,17 +340,20 @@ static int MPIR_Allgather_RD_Allgather_Comm_MV2( void *sendbuf, static void init_mv2_allgather_tables_stampede(){ int i; int agg_table_sum = 0; + + if(smpi_coll_cleanup_callback==NULL) + smpi_coll_cleanup_callback=&smpi_coll_cleanup_mvapich2; mv2_allgather_tuning_table **table_ptrs = NULL; mv2_allgather_num_ppn_conf = 3; mv2_allgather_thresholds_table - = xbt_malloc(sizeof(mv2_allgather_tuning_table *) - * mv2_allgather_num_ppn_conf); - table_ptrs = xbt_malloc(sizeof(mv2_allgather_tuning_table *) - * mv2_allgather_num_ppn_conf); - mv2_size_allgather_tuning_table = xbt_malloc(sizeof(int) * - mv2_allgather_num_ppn_conf); + = static_cast(xbt_malloc(sizeof(mv2_allgather_tuning_table *) + * mv2_allgather_num_ppn_conf)); + table_ptrs = static_cast(xbt_malloc(sizeof(mv2_allgather_tuning_table *) + * mv2_allgather_num_ppn_conf)); + mv2_size_allgather_tuning_table = static_cast(xbt_malloc(sizeof(int) * + mv2_allgather_num_ppn_conf)); mv2_allgather_table_ppn_conf - = xbt_malloc(mv2_allgather_num_ppn_conf * sizeof(int)); + = static_cast(xbt_malloc(mv2_allgather_num_ppn_conf * sizeof(int))); mv2_allgather_table_ppn_conf[0] = 1; mv2_size_allgather_tuning_table[0] = 6; mv2_allgather_tuning_table mv2_tmp_allgather_thresholds_table_1ppn[] = { @@ -536,7 +541,7 @@ static void init_mv2_allgather_tables_stampede(){ agg_table_sum += mv2_size_allgather_tuning_table[i]; } mv2_allgather_thresholds_table[0] = - xbt_malloc(agg_table_sum * sizeof (mv2_allgather_tuning_table)); + static_cast(xbt_malloc(agg_table_sum * sizeof (mv2_allgather_tuning_table))); memcpy(mv2_allgather_thresholds_table[0], table_ptrs[0], (sizeof(mv2_allgather_tuning_table) * mv2_size_allgather_tuning_table[0])); @@ -593,9 +598,11 @@ MV2_Gather_function_ptr MV2_Gather_intra_node_function = NULL; static void init_mv2_gather_tables_stampede(){ + if(smpi_coll_cleanup_callback==NULL) + smpi_coll_cleanup_callback=&smpi_coll_cleanup_mvapich2; mv2_size_gather_tuning_table=7; - mv2_gather_thresholds_table = xbt_malloc(mv2_size_gather_tuning_table* - sizeof (mv2_gather_tuning_table)); + mv2_gather_thresholds_table = static_cast(xbt_malloc(mv2_size_gather_tuning_table* + sizeof (mv2_gather_tuning_table))); mv2_gather_tuning_table mv2_tmp_gather_thresholds_table[]={ {16, 2,{{0, 524288, &MPIR_Gather_MV2_Direct}, @@ -678,9 +685,11 @@ mv2_allgatherv_tuning_table *mv2_allgatherv_thresholds_table = NULL; static void init_mv2_allgatherv_tables_stampede(){ + if(smpi_coll_cleanup_callback==NULL) + smpi_coll_cleanup_callback=&smpi_coll_cleanup_mvapich2; mv2_size_allgatherv_tuning_table = 6; - mv2_allgatherv_thresholds_table = xbt_malloc(mv2_size_allgatherv_tuning_table * - sizeof (mv2_allgatherv_tuning_table)); + mv2_allgatherv_thresholds_table = static_cast(xbt_malloc(mv2_size_allgatherv_tuning_table * + sizeof (mv2_allgatherv_tuning_table))); mv2_allgatherv_tuning_table mv2_tmp_allgatherv_thresholds_table[] = { { 16, @@ -824,9 +833,11 @@ static int MPIR_Allreduce_reduce_shmem_MV2( void *sendbuf, static void init_mv2_allreduce_tables_stampede(){ + if(smpi_coll_cleanup_callback==NULL) + smpi_coll_cleanup_callback=&smpi_coll_cleanup_mvapich2; mv2_size_allreduce_tuning_table = 8; - mv2_allreduce_thresholds_table = xbt_malloc(mv2_size_allreduce_tuning_table * - sizeof (mv2_allreduce_tuning_table)); + mv2_allreduce_thresholds_table = static_cast(xbt_malloc(mv2_size_allreduce_tuning_table * + sizeof (mv2_allreduce_tuning_table))); mv2_allreduce_tuning_table mv2_tmp_allreduce_thresholds_table[] = { { 16, @@ -1002,24 +1013,31 @@ int mv2_pipelined_zcpy_knomial_factor = -1; int bcast_segment_size = 8192; int mv2_inter_node_knomial_factor = 4; int mv2_intra_node_knomial_factor = 4; +#define mv2_bcast_two_level_system_size 64 +#define mv2_bcast_short_msg 16384 +#define mv2_bcast_large_msg 512*1024 + #define INTRA_NODE_ROOT 0 #define MPIR_Pipelined_Bcast_Zcpy_MV2 smpi_coll_tuned_bcast_mpich #define MPIR_Pipelined_Bcast_MV2 smpi_coll_tuned_bcast_mpich -#define MPIR_Bcast_binomial_MV2 smpi_coll_tuned_bcast_mpich -#define MPIR_Bcast_scatter_ring_allgather_shm_MV2 smpi_coll_tuned_bcast_mpich -#define MPIR_Bcast_scatter_doubling_allgather_MV2 smpi_coll_tuned_bcast_mpich -#define MPIR_Bcast_scatter_ring_allgather_MV2 smpi_coll_tuned_bcast_mpich +#define MPIR_Bcast_binomial_MV2 smpi_coll_tuned_bcast_binomial_tree +#define MPIR_Bcast_scatter_ring_allgather_shm_MV2 smpi_coll_tuned_bcast_scatter_LR_allgather +#define MPIR_Bcast_scatter_doubling_allgather_MV2 smpi_coll_tuned_bcast_scatter_rdb_allgather +#define MPIR_Bcast_scatter_ring_allgather_MV2 smpi_coll_tuned_bcast_scatter_LR_allgather #define MPIR_Shmem_Bcast_MV2 smpi_coll_tuned_bcast_mpich -#define MPIR_Bcast_tune_inter_node_helper_MV2 smpi_coll_tuned_bcast_mpich -#define MPIR_Knomial_Bcast_intra_node_MV2 smpi_coll_tuned_bcast_mpich -#define MPIR_Bcast_intra_MV2 smpi_coll_tuned_bcast_mpich +#define MPIR_Bcast_tune_inter_node_helper_MV2 smpi_coll_tuned_bcast_mvapich2_inter_node +#define MPIR_Bcast_inter_node_helper_MV2 smpi_coll_tuned_bcast_mvapich2_inter_node +#define MPIR_Knomial_Bcast_intra_node_MV2 smpi_coll_tuned_bcast_mvapich2_knomial_intra_node +#define MPIR_Bcast_intra_MV2 smpi_coll_tuned_bcast_mvapich2_intra_node static void init_mv2_bcast_tables_stampede(){ //Stampede, - mv2_size_bcast_tuning_table=8; - mv2_bcast_thresholds_table = xbt_malloc(mv2_size_bcast_tuning_table * - sizeof (mv2_bcast_tuning_table)); + if(smpi_coll_cleanup_callback==NULL) + smpi_coll_cleanup_callback=&smpi_coll_cleanup_mvapich2; + mv2_size_bcast_tuning_table=8; + mv2_bcast_thresholds_table = static_cast(xbt_malloc(mv2_size_bcast_tuning_table * + sizeof (mv2_bcast_tuning_table))); mv2_bcast_tuning_table mv2_tmp_bcast_thresholds_table[]={ { @@ -1254,8 +1272,8 @@ int mv2_size_reduce_tuning_table = 0; mv2_reduce_tuning_table *mv2_reduce_thresholds_table = NULL; -int mv2_reduce_intra_knomial_factor = 2; -int mv2_reduce_inter_knomial_factor = 2; +int mv2_reduce_intra_knomial_factor = -1; +int mv2_reduce_inter_knomial_factor = -1; int (*MV2_Reduce_function)( void *sendbuf, void *recvbuf, @@ -1283,10 +1301,12 @@ int (*MV2_Reduce_intra_function)( void *sendbuf, static void init_mv2_reduce_tables_stampede(){ + if(smpi_coll_cleanup_callback==NULL) + smpi_coll_cleanup_callback=&smpi_coll_cleanup_mvapich2; /*Stampede*/ mv2_size_reduce_tuning_table = 8; - mv2_reduce_thresholds_table = xbt_malloc(mv2_size_reduce_tuning_table * - sizeof (mv2_reduce_tuning_table)); + mv2_reduce_thresholds_table = static_cast(xbt_malloc(mv2_size_reduce_tuning_table * + sizeof (mv2_reduce_tuning_table))); mv2_reduce_tuning_table mv2_tmp_reduce_thresholds_table[] = { { 16, @@ -1525,9 +1545,11 @@ static int MPIR_Reduce_Scatter_Basic_MV2(void *sendbuf, static void init_mv2_reduce_scatter_tables_stampede(){ + if(smpi_coll_cleanup_callback==NULL) + smpi_coll_cleanup_callback=&smpi_coll_cleanup_mvapich2; mv2_size_red_scat_tuning_table = 6; - mv2_red_scat_thresholds_table = xbt_malloc(mv2_size_red_scat_tuning_table * - sizeof (mv2_red_scat_tuning_table)); + mv2_red_scat_thresholds_table = static_cast(xbt_malloc(mv2_size_red_scat_tuning_table * + sizeof (mv2_red_scat_tuning_table))); mv2_red_scat_tuning_table mv2_tmp_red_scat_thresholds_table[] = { { 16, @@ -1649,20 +1671,22 @@ int MPIR_Scatter_mcst_wrap_MV2(void *sendbuf, static void init_mv2_scatter_tables_stampede(){ - { + if(smpi_coll_cleanup_callback==NULL) + smpi_coll_cleanup_callback=&smpi_coll_cleanup_mvapich2; + int agg_table_sum = 0; int i; mv2_scatter_tuning_table **table_ptrs = NULL; mv2_scatter_num_ppn_conf = 3; mv2_scatter_thresholds_table - = xbt_malloc(sizeof(mv2_scatter_tuning_table *) - * mv2_scatter_num_ppn_conf); - table_ptrs = xbt_malloc(sizeof(mv2_scatter_tuning_table *) - * mv2_scatter_num_ppn_conf); - mv2_size_scatter_tuning_table = xbt_malloc(sizeof(int) * - mv2_scatter_num_ppn_conf); + = static_cast(xbt_malloc(sizeof(mv2_scatter_tuning_table *) + * mv2_scatter_num_ppn_conf)); + table_ptrs = static_cast(xbt_malloc(sizeof(mv2_scatter_tuning_table *) + * mv2_scatter_num_ppn_conf)); + mv2_size_scatter_tuning_table = static_cast(xbt_malloc(sizeof(int) * + mv2_scatter_num_ppn_conf)); mv2_scatter_table_ppn_conf - = xbt_malloc(mv2_scatter_num_ppn_conf * sizeof(int)); + = static_cast(xbt_malloc(mv2_scatter_num_ppn_conf * sizeof(int))); mv2_scatter_table_ppn_conf[0] = 1; mv2_size_scatter_tuning_table[0] = 6; mv2_scatter_tuning_table mv2_tmp_scatter_thresholds_table_1ppn[] = { @@ -1943,7 +1967,7 @@ static void init_mv2_scatter_tables_stampede(){ agg_table_sum += mv2_size_scatter_tuning_table[i]; } mv2_scatter_thresholds_table[0] = - xbt_malloc(agg_table_sum * sizeof (mv2_scatter_tuning_table)); + static_cast(xbt_malloc(agg_table_sum * sizeof (mv2_scatter_tuning_table))); memcpy(mv2_scatter_thresholds_table[0], table_ptrs[0], (sizeof(mv2_scatter_tuning_table) * mv2_size_scatter_tuning_table[0])); @@ -1956,6 +1980,6 @@ static void init_mv2_scatter_tables_stampede(){ * mv2_size_scatter_tuning_table[i])); } xbt_free(table_ptrs); - } + }