1 /* selector for collective algorithms based on mvapich decision logic, with calibration from Stampede cluster at TACC*/
2 /* This is the tuning used by MVAPICH for Stampede platform based on (MV2_ARCH_INTEL_XEON_E5_2680_16,
3 * MV2_HCA_MLX_CX_FDR) */
5 /* Copyright (c) 2009-2017. The SimGrid Team. All rights reserved. */
7 /* This program is free software; you can redistribute it and/or modify it
8 * under the terms of the license (GNU LGPL) which comes with this package. */
10 /************ Alltoall variables and initializers */
12 #define MV2_MAX_NB_THRESHOLDS 32
14 XBT_PUBLIC(void) smpi_coll_cleanup_mvapich2(void);
19 int (*MV2_pt_Alltoall_function)(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
20 MPI_Datatype recvtype, MPI_Comm comm_ptr);
21 } mv2_alltoall_tuning_element;
26 mv2_alltoall_tuning_element algo_table[MV2_MAX_NB_THRESHOLDS];
27 mv2_alltoall_tuning_element in_place_algo_table[MV2_MAX_NB_THRESHOLDS];
28 } mv2_alltoall_tuning_table;
30 int (*MV2_Alltoall_function)(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
31 MPI_Datatype recvtype, MPI_Comm comm_ptr) = NULL;
33 /* Indicates number of processes per node */
34 int* mv2_alltoall_table_ppn_conf = NULL;
35 /* Indicates total number of configurations */
36 int mv2_alltoall_num_ppn_conf = 1;
37 int* mv2_size_alltoall_tuning_table = NULL;
38 mv2_alltoall_tuning_table** mv2_alltoall_thresholds_table = NULL;
40 #define MPIR_Alltoall_bruck_MV2 simgrid::smpi::Coll_alltoall_bruck::alltoall
41 #define MPIR_Alltoall_RD_MV2 simgrid::smpi::Coll_alltoall_rdb::alltoall
42 #define MPIR_Alltoall_Scatter_dest_MV2 simgrid::smpi::Coll_alltoall_mvapich2_scatter_dest::alltoall
43 #define MPIR_Alltoall_pairwise_MV2 simgrid::smpi::Coll_alltoall_pair::alltoall
44 #define MPIR_Alltoall_inplace_MV2 simgrid::smpi::Coll_alltoall_ring::alltoall
46 static void init_mv2_alltoall_tables_stampede()
49 int agg_table_sum = 0;
50 mv2_alltoall_tuning_table** table_ptrs = NULL;
51 mv2_alltoall_num_ppn_conf = 3;
52 if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
53 simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
54 mv2_alltoall_thresholds_table = static_cast<mv2_alltoall_tuning_table**>(
55 xbt_malloc(sizeof(mv2_alltoall_tuning_table*) * mv2_alltoall_num_ppn_conf));
56 table_ptrs = static_cast<mv2_alltoall_tuning_table**>(
57 xbt_malloc(sizeof(mv2_alltoall_tuning_table*) * mv2_alltoall_num_ppn_conf));
58 mv2_size_alltoall_tuning_table = static_cast<int*>(xbt_malloc(sizeof(int) * mv2_alltoall_num_ppn_conf));
59 mv2_alltoall_table_ppn_conf = static_cast<int*>(xbt_malloc(mv2_alltoall_num_ppn_conf * sizeof(int)));
60 mv2_alltoall_table_ppn_conf[0] = 1;
61 mv2_size_alltoall_tuning_table[0] = 6;
62 mv2_alltoall_tuning_table mv2_tmp_alltoall_thresholds_table_1ppn[] = {
67 {0, -1, &MPIR_Alltoall_pairwise_MV2},
71 {0, -1, &MPIR_Alltoall_inplace_MV2},
79 {0, 262144, &MPIR_Alltoall_Scatter_dest_MV2}, {262144, -1, &MPIR_Alltoall_pairwise_MV2},
83 {0, -1, &MPIR_Alltoall_inplace_MV2},
91 {0, 8, &MPIR_Alltoall_RD_MV2}, {8, -1, &MPIR_Alltoall_Scatter_dest_MV2},
95 {0, -1, &MPIR_Alltoall_inplace_MV2},
103 {0, 64, &MPIR_Alltoall_RD_MV2},
104 {64, 512, &MPIR_Alltoall_bruck_MV2},
105 {512, -1, &MPIR_Alltoall_Scatter_dest_MV2},
109 {0, -1, &MPIR_Alltoall_inplace_MV2},
117 {0, 32, &MPIR_Alltoall_RD_MV2},
118 {32, 2048, &MPIR_Alltoall_bruck_MV2},
119 {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
123 {0, -1, &MPIR_Alltoall_inplace_MV2},
131 {0, 8, &MPIR_Alltoall_RD_MV2},
132 {8, 1024, &MPIR_Alltoall_bruck_MV2},
133 {1024, -1, &MPIR_Alltoall_Scatter_dest_MV2},
137 {0, -1, &MPIR_Alltoall_inplace_MV2},
141 table_ptrs[0] = mv2_tmp_alltoall_thresholds_table_1ppn;
142 mv2_alltoall_table_ppn_conf[1] = 2;
143 mv2_size_alltoall_tuning_table[1] = 6;
144 mv2_alltoall_tuning_table mv2_tmp_alltoall_thresholds_table_2ppn[] = {
149 {0, 32, &MPIR_Alltoall_RD_MV2}, {32, -1, &MPIR_Alltoall_Scatter_dest_MV2},
153 {0, -1, &MPIR_Alltoall_inplace_MV2},
161 {0, 64, &MPIR_Alltoall_RD_MV2}, {64, -1, &MPIR_Alltoall_Scatter_dest_MV2},
165 {0, -1, &MPIR_Alltoall_inplace_MV2},
173 {0, 64, &MPIR_Alltoall_RD_MV2},
174 {64, 2048, &MPIR_Alltoall_bruck_MV2},
175 {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
179 {0, -1, &MPIR_Alltoall_inplace_MV2},
187 {0, 16, &MPIR_Alltoall_RD_MV2},
188 {16, 2048, &MPIR_Alltoall_bruck_MV2},
189 {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
193 {0, -1, &MPIR_Alltoall_inplace_MV2},
201 {0, 8, &MPIR_Alltoall_RD_MV2},
202 {8, 1024, &MPIR_Alltoall_bruck_MV2},
203 {1024, -1, &MPIR_Alltoall_Scatter_dest_MV2},
207 {0, -1, &MPIR_Alltoall_inplace_MV2},
215 {0, 4, &MPIR_Alltoall_RD_MV2},
216 {4, 2048, &MPIR_Alltoall_bruck_MV2},
217 {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
221 {0, -1, &MPIR_Alltoall_inplace_MV2},
225 table_ptrs[1] = mv2_tmp_alltoall_thresholds_table_2ppn;
226 mv2_alltoall_table_ppn_conf[2] = 16;
227 mv2_size_alltoall_tuning_table[2] = 7;
228 mv2_alltoall_tuning_table mv2_tmp_alltoall_thresholds_table_16ppn[] = {
233 {0, 2048, &MPIR_Alltoall_bruck_MV2}, {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
237 {32768, -1, &MPIR_Alltoall_inplace_MV2},
245 {0, 2048, &MPIR_Alltoall_bruck_MV2}, {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
249 {16384, -1, &MPIR_Alltoall_inplace_MV2},
257 {0, 2048, &MPIR_Alltoall_bruck_MV2},
258 {2048, 16384, &MPIR_Alltoall_Scatter_dest_MV2},
259 {16384, -1, &MPIR_Alltoall_pairwise_MV2},
263 {32768, 131072, &MPIR_Alltoall_inplace_MV2},
271 {0, 2048, &MPIR_Alltoall_bruck_MV2}, {2048, -1, &MPIR_Alltoall_pairwise_MV2},
275 {16384, 65536, &MPIR_Alltoall_inplace_MV2},
283 {0, 1024, &MPIR_Alltoall_bruck_MV2}, {1024, -1, &MPIR_Alltoall_pairwise_MV2},
287 {16384, 65536, &MPIR_Alltoall_inplace_MV2},
295 {0, 1024, &MPIR_Alltoall_bruck_MV2}, {1024, -1, &MPIR_Alltoall_pairwise_MV2},
299 {16384, 65536, &MPIR_Alltoall_inplace_MV2},
306 {0, 1024, &MPIR_Alltoall_bruck_MV2}, {1024, -1, &MPIR_Alltoall_pairwise_MV2},
310 {16384, 65536, &MPIR_Alltoall_inplace_MV2},
315 table_ptrs[2] = mv2_tmp_alltoall_thresholds_table_16ppn;
317 for (i = 0; i < mv2_alltoall_num_ppn_conf; i++) {
318 agg_table_sum += mv2_size_alltoall_tuning_table[i];
320 mv2_alltoall_thresholds_table[0] =
321 static_cast<mv2_alltoall_tuning_table*>(xbt_malloc(agg_table_sum * sizeof(mv2_alltoall_tuning_table)));
322 memcpy(mv2_alltoall_thresholds_table[0], table_ptrs[0],
323 (sizeof(mv2_alltoall_tuning_table) * mv2_size_alltoall_tuning_table[0]));
324 for (i = 1; i < mv2_alltoall_num_ppn_conf; i++) {
325 mv2_alltoall_thresholds_table[i] = mv2_alltoall_thresholds_table[i - 1] + mv2_size_alltoall_tuning_table[i - 1];
326 memcpy(mv2_alltoall_thresholds_table[i], table_ptrs[i],
327 (sizeof(mv2_alltoall_tuning_table) * mv2_size_alltoall_tuning_table[i]));
329 xbt_free(table_ptrs);
332 /************ Allgather variables and initializers */
337 int (*MV2_pt_Allgatherction)(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
338 MPI_Datatype recvtype, MPI_Comm comm_ptr);
339 } mv2_allgather_tuning_element;
343 int two_level[MV2_MAX_NB_THRESHOLDS];
344 int size_inter_table;
345 mv2_allgather_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
346 } mv2_allgather_tuning_table;
348 int (*MV2_Allgatherction)(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
349 MPI_Datatype recvtype, MPI_Comm comm);
351 int* mv2_allgather_table_ppn_conf = NULL;
352 int mv2_allgather_num_ppn_conf = 1;
353 int* mv2_size_allgather_tuning_table = NULL;
354 mv2_allgather_tuning_table** mv2_allgather_thresholds_table = NULL;
356 static int MPIR_Allgather_RD_Allgather_Comm_MV2(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf,
357 int recvcount, MPI_Datatype recvtype, MPI_Comm comm_ptr)
362 #define MPIR_Allgather_Bruck_MV2 simgrid::smpi::Coll_allgather_bruck::allgather
363 #define MPIR_Allgather_RD_MV2 simgrid::smpi::Coll_allgather_rdb::allgather
364 #define MPIR_Allgather_Ring_MV2 simgrid::smpi::Coll_allgather_ring::allgather
365 #define MPIR_2lvl_Allgather_MV2 simgrid::smpi::Coll_allgather_mvapich2_smp::allgather
367 static void init_mv2_allgather_tables_stampede()
370 int agg_table_sum = 0;
372 if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
373 simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
374 mv2_allgather_tuning_table** table_ptrs = NULL;
375 mv2_allgather_num_ppn_conf = 3;
376 mv2_allgather_thresholds_table = static_cast<mv2_allgather_tuning_table**>(
377 xbt_malloc(sizeof(mv2_allgather_tuning_table*) * mv2_allgather_num_ppn_conf));
378 table_ptrs = static_cast<mv2_allgather_tuning_table**>(
379 xbt_malloc(sizeof(mv2_allgather_tuning_table*) * mv2_allgather_num_ppn_conf));
380 mv2_size_allgather_tuning_table = static_cast<int*>(xbt_malloc(sizeof(int) * mv2_allgather_num_ppn_conf));
381 mv2_allgather_table_ppn_conf = static_cast<int*>(xbt_malloc(mv2_allgather_num_ppn_conf * sizeof(int)));
382 mv2_allgather_table_ppn_conf[0] = 1;
383 mv2_size_allgather_tuning_table[0] = 6;
384 mv2_allgather_tuning_table mv2_tmp_allgather_thresholds_table_1ppn[] = {
390 {0, -1, &MPIR_Allgather_Ring_MV2},
398 {0, 262144, &MPIR_Allgather_RD_MV2}, {262144, -1, &MPIR_Allgather_Ring_MV2},
406 {0, 131072, &MPIR_Allgather_RD_MV2}, {131072, -1, &MPIR_Allgather_Ring_MV2},
414 {0, 131072, &MPIR_Allgather_RD_MV2}, {131072, -1, &MPIR_Allgather_Ring_MV2},
422 {0, 65536, &MPIR_Allgather_RD_MV2}, {65536, -1, &MPIR_Allgather_Ring_MV2},
430 {0, 32768, &MPIR_Allgather_RD_MV2}, {32768, -1, &MPIR_Allgather_Ring_MV2},
434 table_ptrs[0] = mv2_tmp_allgather_thresholds_table_1ppn;
435 mv2_allgather_table_ppn_conf[1] = 2;
436 mv2_size_allgather_tuning_table[1] = 6;
437 mv2_allgather_tuning_table mv2_tmp_allgather_thresholds_table_2ppn[] = {
443 {0, 524288, &MPIR_Allgather_RD_MV2}, {524288, -1, &MPIR_Allgather_Ring_MV2},
451 {0, 32768, &MPIR_Allgather_RD_MV2},
452 {32768, 524288, &MPIR_Allgather_Ring_MV2},
453 {524288, -1, &MPIR_Allgather_Ring_MV2},
461 {0, 16384, &MPIR_Allgather_RD_MV2},
462 {16384, 524288, &MPIR_Allgather_Ring_MV2},
463 {524288, -1, &MPIR_Allgather_Ring_MV2},
471 {0, 65536, &MPIR_Allgather_RD_MV2},
472 {65536, 524288, &MPIR_Allgather_Ring_MV2},
473 {524288, -1, &MPIR_Allgather_Ring_MV2},
481 {0, 32768, &MPIR_Allgather_RD_MV2},
482 {32768, 524288, &MPIR_Allgather_Ring_MV2},
483 {524288, -1, &MPIR_Allgather_Ring_MV2},
491 {0, 65536, &MPIR_Allgather_RD_MV2},
492 {65536, 524288, &MPIR_Allgather_Ring_MV2},
493 {524288, -1, &MPIR_Allgather_Ring_MV2},
497 table_ptrs[1] = mv2_tmp_allgather_thresholds_table_2ppn;
498 mv2_allgather_table_ppn_conf[2] = 16;
499 mv2_size_allgather_tuning_table[2] = 6;
500 mv2_allgather_tuning_table mv2_tmp_allgather_thresholds_table_16ppn[] = {
506 {0, 1024, &MPIR_Allgather_RD_MV2}, {1024, -1, &MPIR_Allgather_Ring_MV2},
514 {0, 1024, &MPIR_Allgather_RD_Allgather_Comm_MV2}, {1024, -1, &MPIR_Allgather_Ring_MV2},
522 {0, 1024, &MPIR_Allgather_RD_Allgather_Comm_MV2}, {1024, -1, &MPIR_Allgather_Ring_MV2},
530 {0, 1024, &MPIR_Allgather_RD_Allgather_Comm_MV2}, {1024, -1, &MPIR_Allgather_Ring_MV2},
538 {0, 1024, &MPIR_Allgather_RD_Allgather_Comm_MV2}, {1024, -1, &MPIR_Allgather_Ring_MV2},
546 {0, 1024, &MPIR_Allgather_RD_Allgather_Comm_MV2}, {1024, -1, &MPIR_Allgather_Ring_MV2},
551 table_ptrs[2] = mv2_tmp_allgather_thresholds_table_16ppn;
553 for (i = 0; i < mv2_allgather_num_ppn_conf; i++) {
554 agg_table_sum += mv2_size_allgather_tuning_table[i];
556 mv2_allgather_thresholds_table[0] =
557 static_cast<mv2_allgather_tuning_table*>(xbt_malloc(agg_table_sum * sizeof(mv2_allgather_tuning_table)));
558 memcpy(mv2_allgather_thresholds_table[0], table_ptrs[0],
559 (sizeof(mv2_allgather_tuning_table) * mv2_size_allgather_tuning_table[0]));
560 for (i = 1; i < mv2_allgather_num_ppn_conf; i++) {
561 mv2_allgather_thresholds_table[i] = mv2_allgather_thresholds_table[i - 1] + mv2_size_allgather_tuning_table[i - 1];
562 memcpy(mv2_allgather_thresholds_table[i], table_ptrs[i],
563 (sizeof(mv2_allgather_tuning_table) * mv2_size_allgather_tuning_table[i]));
565 xbt_free(table_ptrs);
568 /************ Gather variables and initializers */
573 int (*MV2_pt_Gather_function)(void* sendbuf, int sendcnt, MPI_Datatype sendtype, void* recvbuf, int recvcnt,
574 MPI_Datatype recvtype, int root, MPI_Comm comm_ptr);
575 } mv2_gather_tuning_element;
579 int size_inter_table;
580 mv2_gather_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
581 int size_intra_table;
582 mv2_gather_tuning_element intra_node[MV2_MAX_NB_THRESHOLDS];
583 } mv2_gather_tuning_table;
585 int mv2_size_gather_tuning_table = 7;
586 mv2_gather_tuning_table* mv2_gather_thresholds_table = NULL;
588 typedef int (*MV2_Gather_function_ptr)(void* sendbuf, int sendcnt, MPI_Datatype sendtype, void* recvbuf, int recvcnt,
589 MPI_Datatype recvtype, int root, MPI_Comm comm);
591 MV2_Gather_function_ptr MV2_Gather_inter_leader_function = NULL;
592 MV2_Gather_function_ptr MV2_Gather_intra_node_function = NULL;
594 #define MPIR_Gather_MV2_Direct simgrid::smpi::Coll_gather_ompi_basic_linear::gather
595 #define MPIR_Gather_MV2_two_level_Direct simgrid::smpi::Coll_gather_mvapich2_two_level::gather
596 #define MPIR_Gather_intra simgrid::smpi::Coll_gather_mpich::gather
598 static void init_mv2_gather_tables_stampede()
601 if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
602 simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
603 mv2_size_gather_tuning_table = 7;
604 mv2_gather_thresholds_table =
605 static_cast<mv2_gather_tuning_table*>(xbt_malloc(mv2_size_gather_tuning_table * sizeof(mv2_gather_tuning_table)));
606 mv2_gather_tuning_table mv2_tmp_gather_thresholds_table[] = {
609 {{0, 524288, &MPIR_Gather_MV2_Direct}, {524288, -1, &MPIR_Gather_intra}},
611 {{0, -1, &MPIR_Gather_MV2_Direct}}},
614 {{0, 16384, &MPIR_Gather_MV2_Direct},
615 {16384, 131072, &MPIR_Gather_intra},
616 {131072, -1, &MPIR_Gather_MV2_two_level_Direct}},
618 {{0, -1, &MPIR_Gather_intra}}},
621 {{0, 256, &MPIR_Gather_MV2_two_level_Direct},
622 {256, 16384, &MPIR_Gather_MV2_Direct},
623 {256, -1, &MPIR_Gather_MV2_two_level_Direct}},
625 {{0, -1, &MPIR_Gather_intra}}},
628 {{0, 512, &MPIR_Gather_MV2_two_level_Direct},
629 {512, 16384, &MPIR_Gather_MV2_Direct},
630 {16384, -1, &MPIR_Gather_MV2_two_level_Direct}},
632 {{0, -1, &MPIR_Gather_intra}}},
635 {{0, 512, &MPIR_Gather_MV2_two_level_Direct},
636 {512, 16384, &MPIR_Gather_MV2_Direct},
637 {16384, -1, &MPIR_Gather_MV2_two_level_Direct}},
639 {{0, -1, &MPIR_Gather_intra}}},
642 {{0, 512, &MPIR_Gather_MV2_two_level_Direct},
643 {512, 16384, &MPIR_Gather_MV2_Direct},
644 {8196, -1, &MPIR_Gather_MV2_two_level_Direct}},
646 {{0, -1, &MPIR_Gather_intra}}},
649 {{0, 512, &MPIR_Gather_MV2_two_level_Direct},
650 {512, 16384, &MPIR_Gather_MV2_Direct},
651 {8196, -1, &MPIR_Gather_MV2_two_level_Direct}},
653 {{0, -1, &MPIR_Gather_intra}}},
656 memcpy(mv2_gather_thresholds_table, mv2_tmp_gather_thresholds_table,
657 mv2_size_gather_tuning_table * sizeof(mv2_gather_tuning_table));
660 /************ Allgatherv variables and initializers */
665 int (*MV2_pt_Allgatherv_function)(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int* recvcounts,
666 int* displs, MPI_Datatype recvtype, MPI_Comm commg);
667 } mv2_allgatherv_tuning_element;
671 int size_inter_table;
672 mv2_allgatherv_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
673 } mv2_allgatherv_tuning_table;
675 int (*MV2_Allgatherv_function)(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int* recvcounts,
676 int* displs, MPI_Datatype recvtype, MPI_Comm comm);
678 int mv2_size_allgatherv_tuning_table = 0;
679 mv2_allgatherv_tuning_table* mv2_allgatherv_thresholds_table = NULL;
681 #define MPIR_Allgatherv_Rec_Doubling_MV2 simgrid::smpi::Coll_allgatherv_mpich_rdb::allgatherv
682 #define MPIR_Allgatherv_Bruck_MV2 simgrid::smpi::Coll_allgatherv_ompi_bruck::allgatherv
683 #define MPIR_Allgatherv_Ring_MV2 simgrid::smpi::Coll_allgatherv_mpich_ring::allgatherv
685 static void init_mv2_allgatherv_tables_stampede()
687 if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
688 simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
689 mv2_size_allgatherv_tuning_table = 6;
690 mv2_allgatherv_thresholds_table = static_cast<mv2_allgatherv_tuning_table*>(
691 xbt_malloc(mv2_size_allgatherv_tuning_table * sizeof(mv2_allgatherv_tuning_table)));
692 mv2_allgatherv_tuning_table mv2_tmp_allgatherv_thresholds_table[] = {
697 {0, 512, &MPIR_Allgatherv_Rec_Doubling_MV2}, {512, -1, &MPIR_Allgatherv_Ring_MV2},
704 {0, 512, &MPIR_Allgatherv_Rec_Doubling_MV2}, {512, -1, &MPIR_Allgatherv_Ring_MV2},
711 {0, 256, &MPIR_Allgatherv_Rec_Doubling_MV2}, {256, -1, &MPIR_Allgatherv_Ring_MV2},
718 {0, 256, &MPIR_Allgatherv_Rec_Doubling_MV2}, {256, -1, &MPIR_Allgatherv_Ring_MV2},
725 {0, 256, &MPIR_Allgatherv_Rec_Doubling_MV2}, {256, -1, &MPIR_Allgatherv_Ring_MV2},
732 {0, 256, &MPIR_Allgatherv_Rec_Doubling_MV2}, {256, -1, &MPIR_Allgatherv_Ring_MV2},
737 memcpy(mv2_allgatherv_thresholds_table, mv2_tmp_allgatherv_thresholds_table,
738 mv2_size_allgatherv_tuning_table * sizeof(mv2_allgatherv_tuning_table));
741 /************ Allreduce variables and initializers */
746 int (*MV2_pt_Allreducection)(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
748 } mv2_allreduce_tuning_element;
753 int is_two_level_allreduce[MV2_MAX_NB_THRESHOLDS];
754 int size_inter_table;
755 mv2_allreduce_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
756 int size_intra_table;
757 mv2_allreduce_tuning_element intra_node[MV2_MAX_NB_THRESHOLDS];
758 } mv2_allreduce_tuning_table;
760 int (*MV2_Allreducection)(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
761 MPI_Comm comm) = NULL;
763 int (*MV2_Allreduce_intra_function)(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
764 MPI_Comm comm) = NULL;
766 int mv2_size_allreduce_tuning_table = 0;
767 mv2_allreduce_tuning_table* mv2_allreduce_thresholds_table = NULL;
769 static int MPIR_Allreduce_mcst_reduce_two_level_helper_MV2(void* sendbuf, void* recvbuf, int count,
770 MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
775 static int MPIR_Allreduce_mcst_reduce_redscat_gather_MV2(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype,
776 MPI_Op op, MPI_Comm comm)
781 static int MPIR_Allreduce_reduce_p2p_MV2(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
784 simgrid::smpi::Colls::reduce(sendbuf, recvbuf, count, datatype, op, 0, comm);
788 static int MPIR_Allreduce_reduce_shmem_MV2(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
791 simgrid::smpi::Colls::reduce(sendbuf, recvbuf, count, datatype, op, 0, comm);
795 #define MPIR_Allreduce_pt2pt_rd_MV2 simgrid::smpi::Coll_allreduce_rdb::allreduce
796 #define MPIR_Allreduce_pt2pt_rs_MV2 simgrid::smpi::Coll_allreduce_mvapich2_rs::allreduce
797 #define MPIR_Allreduce_two_level_MV2 simgrid::smpi::Coll_allreduce_mvapich2_two_level::allreduce
799 static void init_mv2_allreduce_tables_stampede()
801 if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
802 simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
803 mv2_size_allreduce_tuning_table = 8;
804 mv2_allreduce_thresholds_table = static_cast<mv2_allreduce_tuning_table*>(
805 xbt_malloc(mv2_size_allreduce_tuning_table * sizeof(mv2_allreduce_tuning_table)));
806 mv2_allreduce_tuning_table mv2_tmp_allreduce_thresholds_table[] = {
813 {0, 1024, &MPIR_Allreduce_pt2pt_rd_MV2}, {1024, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
817 {0, 1024, &MPIR_Allreduce_reduce_shmem_MV2}, {1024, -1, &MPIR_Allreduce_reduce_p2p_MV2},
826 {0, 1024, &MPIR_Allreduce_pt2pt_rd_MV2},
827 {1024, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
828 {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
832 {0, 1024, &MPIR_Allreduce_reduce_shmem_MV2}, {1024, 16384, &MPIR_Allreduce_reduce_p2p_MV2},
841 {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
842 {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
843 {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
847 {0, 512, &MPIR_Allreduce_reduce_shmem_MV2}, {512, 16384, &MPIR_Allreduce_reduce_p2p_MV2},
856 {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
857 {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
858 {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
862 {0, 512, &MPIR_Allreduce_reduce_shmem_MV2}, {512, 16384, &MPIR_Allreduce_reduce_p2p_MV2},
871 {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
872 {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
873 {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
877 {0, 512, &MPIR_Allreduce_reduce_shmem_MV2}, {512, -1, &MPIR_Allreduce_reduce_p2p_MV2},
886 {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
887 {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
888 {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
892 {0, 512, &MPIR_Allreduce_reduce_shmem_MV2}, {512, 16384, &MPIR_Allreduce_reduce_p2p_MV2},
901 {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
902 {512, 8192, &MPIR_Allreduce_pt2pt_rd_MV2},
903 {8192, 65536, &MPIR_Allreduce_pt2pt_rs_MV2},
904 {65536, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
908 {0, 512, &MPIR_Allreduce_reduce_shmem_MV2}, {512, -1, &MPIR_Allreduce_reduce_p2p_MV2},
917 {0, 64, &MPIR_Allreduce_pt2pt_rd_MV2},
918 {64, 512, &MPIR_Allreduce_reduce_p2p_MV2},
919 {512, 4096, &MPIR_Allreduce_mcst_reduce_two_level_helper_MV2},
920 {4096, 16384, &MPIR_Allreduce_pt2pt_rs_MV2},
921 {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
925 {0, 512, &MPIR_Allreduce_reduce_shmem_MV2}, {512, -1, &MPIR_Allreduce_reduce_p2p_MV2},
930 memcpy(mv2_allreduce_thresholds_table, mv2_tmp_allreduce_thresholds_table,
931 mv2_size_allreduce_tuning_table * sizeof(mv2_allreduce_tuning_table));
937 int (*MV2_pt_Bcast_function)(void* buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm_ptr);
938 int zcpy_pipelined_knomial_factor;
939 } mv2_bcast_tuning_element;
943 int bcast_segment_size;
944 int intra_node_knomial_factor;
945 int inter_node_knomial_factor;
946 int is_two_level_bcast[MV2_MAX_NB_THRESHOLDS];
947 int size_inter_table;
948 mv2_bcast_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
949 int size_intra_table;
950 mv2_bcast_tuning_element intra_node[MV2_MAX_NB_THRESHOLDS];
951 } mv2_bcast_tuning_table;
953 int mv2_size_bcast_tuning_table = 0;
954 mv2_bcast_tuning_table* mv2_bcast_thresholds_table = NULL;
956 int (*MV2_Bcast_function)(void* buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm_ptr) = NULL;
958 int (*MV2_Bcast_intra_node_function)(void* buffer, int count, MPI_Datatype datatype, int root,
959 MPI_Comm comm_ptr) = NULL;
961 int zcpy_knomial_factor = 2;
962 int mv2_pipelined_zcpy_knomial_factor = -1;
963 int bcast_segment_size = 8192;
964 int mv2_inter_node_knomial_factor = 4;
965 int mv2_intra_node_knomial_factor = 4;
966 #define mv2_bcast_two_level_system_size 64
967 #define mv2_bcast_short_msg 16384
968 #define mv2_bcast_large_msg 512 * 1024
970 #define INTRA_NODE_ROOT 0
972 #define MPIR_Pipelined_Bcast_Zcpy_MV2 simgrid::smpi::Coll_bcast_mpich::bcast
973 #define MPIR_Pipelined_Bcast_MV2 simgrid::smpi::Coll_bcast_mpich::bcast
974 #define MPIR_Bcast_binomial_MV2 simgrid::smpi::Coll_bcast_binomial_tree::bcast
975 #define MPIR_Bcast_scatter_ring_allgather_shm_MV2 simgrid::smpi::Coll_bcast_scatter_LR_allgather::bcast
976 #define MPIR_Bcast_scatter_doubling_allgather_MV2 simgrid::smpi::Coll_bcast_scatter_rdb_allgather::bcast
977 #define MPIR_Bcast_scatter_ring_allgather_MV2 simgrid::smpi::Coll_bcast_scatter_LR_allgather::bcast
978 #define MPIR_Shmem_Bcast_MV2 simgrid::smpi::Coll_bcast_mpich::bcast
979 #define MPIR_Bcast_tune_inter_node_helper_MV2 simgrid::smpi::Coll_bcast_mvapich2_inter_node::bcast
980 #define MPIR_Bcast_inter_node_helper_MV2 simgrid::smpi::Coll_bcast_mvapich2_inter_node::bcast
981 #define MPIR_Knomial_Bcast_intra_node_MV2 simgrid::smpi::Coll_bcast_mvapich2_knomial_intra_node::bcast
982 #define MPIR_Bcast_intra_MV2 simgrid::smpi::Coll_bcast_mvapich2_intra_node::bcast
984 static void init_mv2_bcast_tables_stampede()
987 if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
988 simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
989 mv2_size_bcast_tuning_table = 8;
990 mv2_bcast_thresholds_table =
991 static_cast<mv2_bcast_tuning_table*>(xbt_malloc(mv2_size_bcast_tuning_table * sizeof(mv2_bcast_tuning_table)));
993 mv2_bcast_tuning_table mv2_tmp_bcast_thresholds_table[] = {
998 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
1000 {{0, 8, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1001 {8, 16, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1002 {16, 1024, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1003 {1024, 8192, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1004 {8192, 16384, &MPIR_Bcast_binomial_MV2, -1},
1005 {16384, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1006 {32768, 65536, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1007 {65536, 131072, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1},
1008 {131072, 262144, &MPIR_Bcast_scatter_ring_allgather_MV2, -1},
1009 {262144, 524288, &MPIR_Bcast_scatter_doubling_allgather_MV2, -1},
1010 {524288, -1, &MPIR_Bcast_scatter_ring_allgather_MV2, -1}},
1012 {{0, 8, &MPIR_Shmem_Bcast_MV2, 2},
1013 {8, 16, &MPIR_Shmem_Bcast_MV2, 4},
1014 {16, 1024, &MPIR_Shmem_Bcast_MV2, 2},
1015 {1024, 8192, &MPIR_Shmem_Bcast_MV2, 4},
1016 {8192, 16384, &MPIR_Shmem_Bcast_MV2, -1},
1017 {16384, 32768, &MPIR_Shmem_Bcast_MV2, 4},
1018 {32768, 65536, &MPIR_Shmem_Bcast_MV2, 2},
1019 {65536, 131072, &MPIR_Shmem_Bcast_MV2, -1},
1020 {131072, 262144, &MPIR_Shmem_Bcast_MV2, -1},
1021 {262144, 524288, &MPIR_Shmem_Bcast_MV2, -1},
1022 {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}}},
1027 {1, 1, 1, 1, 1, 1, 1, 1},
1029 {{0, 128, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1030 {128, 256, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1031 {256, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1032 {32768, 65536, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1033 {65536, 131072, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1034 {131072, 262144, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1035 {262144, 524288, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1036 {524288, -1, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8}},
1038 {{0, 128, &MPIR_Shmem_Bcast_MV2, 2},
1039 {128, 256, &MPIR_Shmem_Bcast_MV2, 4},
1040 {256, 32768, &MPIR_Shmem_Bcast_MV2, 2},
1041 {32768, 65536, &MPIR_Shmem_Bcast_MV2, 4},
1042 {65536, 131072, &MPIR_Shmem_Bcast_MV2, 2},
1043 {131072, 262144, &MPIR_Shmem_Bcast_MV2, 8},
1044 {262144, 524288, &MPIR_Shmem_Bcast_MV2, 2},
1045 {524288, -1, &MPIR_Shmem_Bcast_MV2, 8}}},
1050 {1, 1, 1, 1, 1, 1, 1, 1, 1},
1052 {{0, 2, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1053 {2, 4, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1054 {4, 16, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1055 {16, 32, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1056 {32, 128, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1057 {128, 256, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1058 {256, 4096, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1059 {4096, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1060 {32768, -1, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2}},
1062 {{0, 2, &MPIR_Shmem_Bcast_MV2, 4},
1063 {2, 4, &MPIR_Shmem_Bcast_MV2, 8},
1064 {4, 16, &MPIR_Shmem_Bcast_MV2, 4},
1065 {16, 32, &MPIR_Shmem_Bcast_MV2, 8},
1066 {32, 128, &MPIR_Shmem_Bcast_MV2, 4},
1067 {128, 256, &MPIR_Shmem_Bcast_MV2, 8},
1068 {256, 4096, &MPIR_Shmem_Bcast_MV2, 4},
1069 {4096, 32768, &MPIR_Shmem_Bcast_MV2, 8},
1070 {32768, -1, &MPIR_Shmem_Bcast_MV2, 2}}},
1077 {{0, 8192, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1078 {8192, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1079 {16384, 524288, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1080 {524288, -1, &MPIR_Bcast_scatter_ring_allgather_MV2, -1}},
1082 {{0, 8192, &MPIR_Shmem_Bcast_MV2, 8},
1083 {8192, 16384, &MPIR_Shmem_Bcast_MV2, 4},
1084 {16384, 524288, &MPIR_Shmem_Bcast_MV2, 2},
1085 {524288, -1, NULL, -1}}},
1092 {{0, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1093 {16384, 131072, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1094 {131072, 262144, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1},
1095 {262144, 524288, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1096 {524288, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}},
1098 {{0, 16384, &MPIR_Shmem_Bcast_MV2, 4},
1099 {16384, 131072, &MPIR_Shmem_Bcast_MV2, 2},
1100 {131072, 262144, &MPIR_Shmem_Bcast_MV2, -1},
1101 {262144, 524288, &MPIR_Shmem_Bcast_MV2, 2},
1102 {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}}},
1109 {{0, 4096, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1110 {4096, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1111 {16384, 131072, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1112 {131072, 262144, &MPIR_Pipelined_Bcast_MV2, -1},
1113 {262144, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}},
1115 {{0, 4096, &MPIR_Shmem_Bcast_MV2, 8},
1116 {4096, 16384, &MPIR_Shmem_Bcast_MV2, 4},
1117 {16384, 131072, &MPIR_Shmem_Bcast_MV2, 2},
1118 {131072, 262144, &MPIR_Shmem_Bcast_MV2, -1},
1119 {262144, -1, &MPIR_Shmem_Bcast_MV2, -1}}},
1126 {{0, 8192, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1127 {8192, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1128 {16384, 65536, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1129 {65536, 524288, &MPIR_Pipelined_Bcast_MV2, -1},
1130 {524288, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}},
1132 {{0, 8192, &MPIR_Shmem_Bcast_MV2, 8},
1133 {8192, 16384, &MPIR_Shmem_Bcast_MV2, 4},
1134 {16384, 65536, &MPIR_Shmem_Bcast_MV2, 2},
1135 {65536, 524288, &MPIR_Shmem_Bcast_MV2, -1},
1136 {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}}},
1141 {1, 1, 1, 1, 1, 1, 1},
1143 {{0, 16, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1144 {16, 32, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1145 {32, 4096, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1146 {4096, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1147 {16384, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1148 {32768, 524288, &MPIR_Pipelined_Bcast_MV2, -1},
1149 {524288, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}},
1151 {{0, 16, &MPIR_Shmem_Bcast_MV2, 8},
1152 {16, 32, &MPIR_Shmem_Bcast_MV2, 4},
1153 {32, 4096, &MPIR_Shmem_Bcast_MV2, 8},
1154 {4096, 16384, &MPIR_Shmem_Bcast_MV2, 4},
1155 {16384, 32768, &MPIR_Shmem_Bcast_MV2, 2},
1156 {32768, 524288, &MPIR_Shmem_Bcast_MV2, -1},
1157 {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}}}};
1159 memcpy(mv2_bcast_thresholds_table, mv2_tmp_bcast_thresholds_table,
1160 mv2_size_bcast_tuning_table * sizeof(mv2_bcast_tuning_table));
1163 /************ Reduce variables and initializers */
1168 int (*MV2_pt_Reduce_function)(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root,
1170 } mv2_reduce_tuning_element;
1176 int is_two_level_reduce[MV2_MAX_NB_THRESHOLDS];
1177 int size_inter_table;
1178 mv2_reduce_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
1179 int size_intra_table;
1180 mv2_reduce_tuning_element intra_node[MV2_MAX_NB_THRESHOLDS];
1181 } mv2_reduce_tuning_table;
1183 int mv2_size_reduce_tuning_table = 0;
1184 mv2_reduce_tuning_table* mv2_reduce_thresholds_table = NULL;
1186 int mv2_reduce_intra_knomial_factor = -1;
1187 int mv2_reduce_inter_knomial_factor = -1;
1189 int (*MV2_Reduce_function)(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root,
1190 MPI_Comm comm_ptr) = NULL;
1192 int (*MV2_Reduce_intra_function)(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root,
1193 MPI_Comm comm_ptr) = NULL;
1195 #define MPIR_Reduce_inter_knomial_wrapper_MV2 simgrid::smpi::Coll_reduce_mvapich2_knomial::reduce
1196 #define MPIR_Reduce_intra_knomial_wrapper_MV2 simgrid::smpi::Coll_reduce_mvapich2_knomial::reduce
1197 #define MPIR_Reduce_binomial_MV2 simgrid::smpi::Coll_reduce_binomial::reduce
1198 #define MPIR_Reduce_redscat_gather_MV2 simgrid::smpi::Coll_reduce_scatter_gather::reduce
1199 #define MPIR_Reduce_shmem_MV2 simgrid::smpi::Coll_reduce_ompi_basic_linear::reduce
1200 #define MPIR_Reduce_two_level_helper_MV2 simgrid::smpi::Coll_reduce_mvapich2_two_level::reduce
1202 static void init_mv2_reduce_tables_stampede()
1204 if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
1205 simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
1207 mv2_size_reduce_tuning_table = 8;
1208 mv2_reduce_thresholds_table =
1209 static_cast<mv2_reduce_tuning_table*>(xbt_malloc(mv2_size_reduce_tuning_table * sizeof(mv2_reduce_tuning_table)));
1210 mv2_reduce_tuning_table mv2_tmp_reduce_thresholds_table[] = {
1218 {0, 262144, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1219 {262144, 1048576, &MPIR_Reduce_binomial_MV2},
1220 {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
1224 {0, 65536, &MPIR_Reduce_shmem_MV2}, {65536, -1, &MPIR_Reduce_binomial_MV2},
1231 {1, 1, 1, 1, 0, 0, 0},
1234 {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1235 {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1236 {16384, 32768, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1237 {32768, 65536, &MPIR_Reduce_binomial_MV2},
1238 {65536, 262144, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1239 {262144, 1048576, &MPIR_Reduce_binomial_MV2},
1240 {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
1244 {0, 8192, &MPIR_Reduce_shmem_MV2},
1245 {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1246 {16384, 32768, &MPIR_Reduce_shmem_MV2},
1247 {32768, 65536, &MPIR_Reduce_shmem_MV2},
1248 {65536, 262144, &MPIR_Reduce_shmem_MV2},
1249 {262144, -1, &MPIR_Reduce_binomial_MV2},
1259 {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1260 {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1261 {16384, 65536, &MPIR_Reduce_binomial_MV2},
1262 {65536, 262144, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1263 {262144, -1, &MPIR_Reduce_redscat_gather_MV2},
1267 {0, 8192, &MPIR_Reduce_shmem_MV2},
1268 {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1269 {16384, 65536, &MPIR_Reduce_shmem_MV2},
1270 {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1271 {262144, -1, &MPIR_Reduce_binomial_MV2},
1281 {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1282 {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1283 {16384, 65536, &MPIR_Reduce_binomial_MV2},
1284 {65536, 262144, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1285 {262144, 1048576, &MPIR_Reduce_binomial_MV2},
1286 {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
1290 {0, 8192, &MPIR_Reduce_shmem_MV2},
1291 {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1292 {16384, 65536, &MPIR_Reduce_shmem_MV2},
1293 {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1294 {262144, -1, &MPIR_Reduce_binomial_MV2},
1301 {1, 1, 1, 0, 1, 1, 0},
1304 {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1305 {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1306 {16384, 32768, &MPIR_Reduce_binomial_MV2},
1307 {32768, 65536, &MPIR_Reduce_binomial_MV2},
1308 {65536, 262144, &MPIR_Reduce_binomial_MV2},
1309 {262144, 1048576, &MPIR_Reduce_binomial_MV2},
1310 {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
1314 {0, 8192, &MPIR_Reduce_shmem_MV2},
1315 {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1316 {16384, 32768, &MPIR_Reduce_shmem_MV2},
1317 {32768, 65536, &MPIR_Reduce_shmem_MV2},
1318 {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1319 {262144, -1, &MPIR_Reduce_binomial_MV2},
1329 {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1330 {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1331 {16384, 65536, &MPIR_Reduce_binomial_MV2},
1332 {65536, 262144, &MPIR_Reduce_binomial_MV2},
1333 {262144, 1048576, &MPIR_Reduce_binomial_MV2},
1334 {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
1338 {0, 8192, &MPIR_Reduce_shmem_MV2},
1339 {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1340 {16384, 65536, &MPIR_Reduce_shmem_MV2},
1341 {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1342 {262144, -1, &MPIR_Reduce_binomial_MV2},
1352 {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1353 {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1354 {16384, 65536, &MPIR_Reduce_binomial_MV2},
1355 {65536, 262144, &MPIR_Reduce_binomial_MV2},
1356 {262144, -1, &MPIR_Reduce_binomial_MV2},
1360 {0, 8192, &MPIR_Reduce_shmem_MV2},
1361 {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1362 {16384, 65536, &MPIR_Reduce_shmem_MV2},
1363 {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1364 {262144, -1, &MPIR_Reduce_binomial_MV2},
1374 {0, 2048, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1375 {2048, 4096, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1376 {4096, 16384, &MPIR_Reduce_binomial_MV2},
1377 {16384, 65536, &MPIR_Reduce_binomial_MV2},
1378 {65536, 131072, &MPIR_Reduce_binomial_MV2},
1379 {131072, -1, &MPIR_Reduce_binomial_MV2},
1383 {0, 2048, &MPIR_Reduce_shmem_MV2},
1384 {2048, 4096, &MPIR_Reduce_shmem_MV2},
1385 {4096, 16384, &MPIR_Reduce_shmem_MV2},
1386 {16384, 65536, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1387 {65536, 131072, &MPIR_Reduce_binomial_MV2},
1388 {131072, -1, &MPIR_Reduce_shmem_MV2},
1393 memcpy(mv2_reduce_thresholds_table, mv2_tmp_reduce_thresholds_table,
1394 mv2_size_reduce_tuning_table * sizeof(mv2_reduce_tuning_table));
1397 /************ Reduce scatter variables and initializers */
1402 int (*MV2_pt_Red_scat_function)(void* sendbuf, void* recvbuf, int* recvcnts, MPI_Datatype datatype, MPI_Op op,
1404 } mv2_red_scat_tuning_element;
1408 int size_inter_table;
1409 mv2_red_scat_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
1410 } mv2_red_scat_tuning_table;
1412 int mv2_size_red_scat_tuning_table = 0;
1413 mv2_red_scat_tuning_table* mv2_red_scat_thresholds_table = NULL;
1415 int (*MV2_Red_scat_function)(void* sendbuf, void* recvbuf, int* recvcnts, MPI_Datatype datatype, MPI_Op op,
1418 static int MPIR_Reduce_Scatter_Basic_MV2(void* sendbuf, void* recvbuf, int* recvcnts, MPI_Datatype datatype, MPI_Op op,
1421 simgrid::smpi::Coll_reduce_scatter_default::reduce_scatter(sendbuf, recvbuf, recvcnts, datatype, op, comm);
1424 #define MPIR_Reduce_scatter_non_comm_MV2 simgrid::smpi::Coll_reduce_scatter_mpich_noncomm::reduce_scatter
1425 #define MPIR_Reduce_scatter_Rec_Halving_MV2 \
1426 simgrid::smpi::Coll_reduce_scatter_ompi_basic_recursivehalving::reduce_scatter
1427 #define MPIR_Reduce_scatter_Pair_Wise_MV2 simgrid::smpi::Coll_reduce_scatter_mpich_pair::reduce_scatter
1429 static void init_mv2_reduce_scatter_tables_stampede()
1431 if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
1432 simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
1433 mv2_size_red_scat_tuning_table = 6;
1434 mv2_red_scat_thresholds_table = static_cast<mv2_red_scat_tuning_table*>(
1435 xbt_malloc(mv2_size_red_scat_tuning_table * sizeof(mv2_red_scat_tuning_table)));
1436 mv2_red_scat_tuning_table mv2_tmp_red_scat_thresholds_table[] = {
1441 {0, 64, &MPIR_Reduce_Scatter_Basic_MV2},
1442 {64, 65536, &MPIR_Reduce_scatter_Rec_Halving_MV2},
1443 {65536, -1, &MPIR_Reduce_scatter_Pair_Wise_MV2},
1450 {0, 64, &MPIR_Reduce_Scatter_Basic_MV2},
1451 {64, 131072, &MPIR_Reduce_scatter_Rec_Halving_MV2},
1452 {131072, -1, &MPIR_Reduce_scatter_Pair_Wise_MV2},
1459 {0, 1024, &MPIR_Reduce_Scatter_Basic_MV2},
1460 {1024, 262144, &MPIR_Reduce_scatter_Rec_Halving_MV2},
1461 {262144, -1, &MPIR_Reduce_scatter_Pair_Wise_MV2},
1468 {0, 128, &MPIR_Reduce_Scatter_Basic_MV2}, {128, -1, &MPIR_Reduce_scatter_Rec_Halving_MV2},
1475 {0, 128, &MPIR_Reduce_Scatter_Basic_MV2}, {128, -1, &MPIR_Reduce_scatter_Rec_Halving_MV2},
1482 {0, 256, &MPIR_Reduce_Scatter_Basic_MV2}, {256, -1, &MPIR_Reduce_scatter_Rec_Halving_MV2},
1487 memcpy(mv2_red_scat_thresholds_table, mv2_tmp_red_scat_thresholds_table,
1488 mv2_size_red_scat_tuning_table * sizeof(mv2_red_scat_tuning_table));
1491 /************ Scatter variables and initializers */
1496 int (*MV2_pt_Scatter_function)(void* sendbuf, int sendcnt, MPI_Datatype sendtype, void* recvbuf, int recvcnt,
1497 MPI_Datatype recvtype, int root, MPI_Comm comm);
1498 } mv2_scatter_tuning_element;
1502 int size_inter_table;
1503 mv2_scatter_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
1504 int size_intra_table;
1505 mv2_scatter_tuning_element intra_node[MV2_MAX_NB_THRESHOLDS];
1506 } mv2_scatter_tuning_table;
1508 int* mv2_scatter_table_ppn_conf = NULL;
1509 int mv2_scatter_num_ppn_conf = 1;
1510 int* mv2_size_scatter_tuning_table = NULL;
1511 mv2_scatter_tuning_table** mv2_scatter_thresholds_table = NULL;
1513 int (*MV2_Scatter_function)(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
1514 MPI_Datatype recvtype, int root, MPI_Comm comm) = NULL;
1516 int (*MV2_Scatter_intra_function)(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
1517 MPI_Datatype recvtype, int root, MPI_Comm comm) = NULL;
1518 int MPIR_Scatter_mcst_wrap_MV2(void* sendbuf, int sendcnt, MPI_Datatype sendtype, void* recvbuf, int recvcnt,
1519 MPI_Datatype recvtype, int root, MPI_Comm comm_ptr);
1521 int MPIR_Scatter_mcst_wrap_MV2(void* sendbuf, int sendcnt, MPI_Datatype sendtype, void* recvbuf, int recvcnt,
1522 MPI_Datatype recvtype, int root, MPI_Comm comm_ptr)
1527 #define MPIR_Scatter_MV2_Binomial simgrid::smpi::Coll_scatter_ompi_binomial::scatter
1528 #define MPIR_Scatter_MV2_Direct simgrid::smpi::Coll_scatter_ompi_basic_linear::scatter
1529 #define MPIR_Scatter_MV2_two_level_Binomial simgrid::smpi::Coll_scatter_mvapich2_two_level_binomial::scatter
1530 #define MPIR_Scatter_MV2_two_level_Direct simgrid::smpi::Coll_scatter_mvapich2_two_level_direct::scatter
1532 static void init_mv2_scatter_tables_stampede()
1534 if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
1535 simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
1537 int agg_table_sum = 0;
1539 mv2_scatter_tuning_table** table_ptrs = NULL;
1540 mv2_scatter_num_ppn_conf = 3;
1541 mv2_scatter_thresholds_table =
1542 static_cast<mv2_scatter_tuning_table**>(xbt_malloc(sizeof(mv2_scatter_tuning_table*) * mv2_scatter_num_ppn_conf));
1544 static_cast<mv2_scatter_tuning_table**>(xbt_malloc(sizeof(mv2_scatter_tuning_table*) * mv2_scatter_num_ppn_conf));
1545 mv2_size_scatter_tuning_table = static_cast<int*>(xbt_malloc(sizeof(int) * mv2_scatter_num_ppn_conf));
1546 mv2_scatter_table_ppn_conf = static_cast<int*>(xbt_malloc(mv2_scatter_num_ppn_conf * sizeof(int)));
1547 mv2_scatter_table_ppn_conf[0] = 1;
1548 mv2_size_scatter_tuning_table[0] = 6;
1549 mv2_scatter_tuning_table mv2_tmp_scatter_thresholds_table_1ppn[] = {
1554 {0, -1, &MPIR_Scatter_MV2_Binomial},
1558 {0, -1, &MPIR_Scatter_MV2_Binomial},
1566 {0, -1, &MPIR_Scatter_MV2_Direct},
1570 {0, -1, &MPIR_Scatter_MV2_Direct},
1578 {0, -1, &MPIR_Scatter_MV2_Direct},
1582 {0, -1, &MPIR_Scatter_MV2_Direct},
1590 {0, -1, &MPIR_Scatter_MV2_Direct},
1594 {0, -1, &MPIR_Scatter_MV2_Direct},
1602 {0, -1, &MPIR_Scatter_MV2_Direct},
1606 {0, -1, &MPIR_Scatter_MV2_Direct},
1614 {0, 32, &MPIR_Scatter_MV2_Binomial}, {32, -1, &MPIR_Scatter_MV2_Direct},
1618 {0, -1, &MPIR_Scatter_MV2_Binomial},
1622 table_ptrs[0] = mv2_tmp_scatter_thresholds_table_1ppn;
1623 mv2_scatter_table_ppn_conf[1] = 2;
1624 mv2_size_scatter_tuning_table[1] = 6;
1625 mv2_scatter_tuning_table mv2_tmp_scatter_thresholds_table_2ppn[] = {
1630 {0, 4096, &MPIR_Scatter_MV2_Binomial}, {4096, -1, &MPIR_Scatter_MV2_Direct},
1634 {0, -1, &MPIR_Scatter_MV2_Direct},
1642 {0, 512, &MPIR_Scatter_MV2_two_level_Direct}, {512, -1, &MPIR_Scatter_MV2_Direct},
1646 {0, -1, &MPIR_Scatter_MV2_Binomial},
1654 {0, 2048, &MPIR_Scatter_MV2_two_level_Direct}, {2048, -1, &MPIR_Scatter_MV2_Direct},
1658 {0, -1, &MPIR_Scatter_MV2_Binomial},
1666 {0, 2048, &MPIR_Scatter_MV2_two_level_Direct}, {2048, -1, &MPIR_Scatter_MV2_Direct},
1670 {0, -1, &MPIR_Scatter_MV2_Binomial},
1678 {0, 8192, &MPIR_Scatter_MV2_two_level_Direct}, {8192, -1, &MPIR_Scatter_MV2_Direct},
1682 {0, -1, &MPIR_Scatter_MV2_Binomial},
1690 {0, 16, &MPIR_Scatter_MV2_Binomial},
1691 {16, 128, &MPIR_Scatter_MV2_two_level_Binomial},
1692 {128, 16384, &MPIR_Scatter_MV2_two_level_Direct},
1693 {16384, -1, &MPIR_Scatter_MV2_Direct},
1697 {0, 128, &MPIR_Scatter_MV2_Direct}, {128, -1, &MPIR_Scatter_MV2_Binomial},
1701 table_ptrs[1] = mv2_tmp_scatter_thresholds_table_2ppn;
1702 mv2_scatter_table_ppn_conf[2] = 16;
1703 mv2_size_scatter_tuning_table[2] = 8;
1704 mv2_scatter_tuning_table mv2_tmp_scatter_thresholds_table_16ppn[] = {
1709 {0, 256, &MPIR_Scatter_MV2_Binomial}, {256, -1, &MPIR_Scatter_MV2_Direct},
1713 {0, -1, &MPIR_Scatter_MV2_Direct},
1721 {0, 512, &MPIR_Scatter_MV2_Binomial}, {512, -1, &MPIR_Scatter_MV2_Direct},
1725 {0, -1, &MPIR_Scatter_MV2_Direct},
1733 {0, 1024, &MPIR_Scatter_MV2_two_level_Direct}, {1024, -1, &MPIR_Scatter_MV2_Direct},
1737 {0, -1, &MPIR_Scatter_MV2_Direct},
1745 {0, 16, &MPIR_Scatter_mcst_wrap_MV2},
1746 {0, 16, &MPIR_Scatter_MV2_two_level_Direct},
1747 {16, 2048, &MPIR_Scatter_MV2_two_level_Direct},
1748 {2048, -1, &MPIR_Scatter_MV2_Direct},
1752 {0, -1, &MPIR_Scatter_MV2_Direct},
1760 {0, 16, &MPIR_Scatter_mcst_wrap_MV2},
1761 {0, 16, &MPIR_Scatter_MV2_two_level_Direct},
1762 {16, 2048, &MPIR_Scatter_MV2_two_level_Direct},
1763 {2048, -1, &MPIR_Scatter_MV2_Direct},
1767 {0, -1, &MPIR_Scatter_MV2_Direct},
1775 {0, 16, &MPIR_Scatter_mcst_wrap_MV2},
1776 {16, 16, &MPIR_Scatter_MV2_two_level_Direct},
1777 {16, 4096, &MPIR_Scatter_MV2_two_level_Direct},
1778 {4096, -1, &MPIR_Scatter_MV2_Direct},
1782 {0, -1, &MPIR_Scatter_MV2_Binomial},
1789 {0, 16, &MPIR_Scatter_mcst_wrap_MV2},
1790 {0, 16, &MPIR_Scatter_MV2_Binomial},
1791 {16, 32, &MPIR_Scatter_MV2_Binomial},
1792 {32, 4096, &MPIR_Scatter_MV2_two_level_Direct},
1793 {4096, -1, &MPIR_Scatter_MV2_Direct},
1797 {0, -1, &MPIR_Scatter_MV2_Binomial},
1804 {0, 16, &MPIR_Scatter_mcst_wrap_MV2},
1805 {0, 16, &MPIR_Scatter_MV2_two_level_Binomial},
1806 {16, 128, &MPIR_Scatter_MV2_two_level_Binomial},
1807 {128, 1024, &MPIR_Scatter_MV2_two_level_Direct},
1808 {1024, 16384, &MPIR_Scatter_MV2_two_level_Direct},
1809 {16384, 65536, &MPIR_Scatter_MV2_Direct},
1810 {65536, -1, &MPIR_Scatter_MV2_two_level_Direct},
1814 {0, 16, &MPIR_Scatter_MV2_Binomial},
1815 {16, 128, &MPIR_Scatter_MV2_Binomial},
1816 {128, 1024, &MPIR_Scatter_MV2_Binomial},
1817 {1024, 16384, &MPIR_Scatter_MV2_Direct},
1818 {16384, 65536, &MPIR_Scatter_MV2_Direct},
1819 {65536, -1, &MPIR_Scatter_MV2_Direct},
1823 table_ptrs[2] = mv2_tmp_scatter_thresholds_table_16ppn;
1825 for (i = 0; i < mv2_scatter_num_ppn_conf; i++) {
1826 agg_table_sum += mv2_size_scatter_tuning_table[i];
1828 mv2_scatter_thresholds_table[0] =
1829 static_cast<mv2_scatter_tuning_table*>(xbt_malloc(agg_table_sum * sizeof(mv2_scatter_tuning_table)));
1830 memcpy(mv2_scatter_thresholds_table[0], table_ptrs[0],
1831 (sizeof(mv2_scatter_tuning_table) * mv2_size_scatter_tuning_table[0]));
1832 for (i = 1; i < mv2_scatter_num_ppn_conf; i++) {
1833 mv2_scatter_thresholds_table[i] = mv2_scatter_thresholds_table[i - 1] + mv2_size_scatter_tuning_table[i - 1];
1834 memcpy(mv2_scatter_thresholds_table[i], table_ptrs[i],
1835 (sizeof(mv2_scatter_tuning_table) * mv2_size_scatter_tuning_table[i]));
1837 xbt_free(table_ptrs);