Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Merge pull request #259 from simgrid/configfix
[simgrid.git] / src / smpi / colls / smpi_mvapich2_selector_stampede.hpp
1 /* selector for collective algorithms based on mvapich decision logic, with calibration from Stampede cluster at TACC*/
2 /* This is the tuning used by MVAPICH for Stampede platform based on (MV2_ARCH_INTEL_XEON_E5_2680_16,
3  * MV2_HCA_MLX_CX_FDR) */
4
5 /* Copyright (c) 2009-2018. The SimGrid Team. All rights reserved.          */
6
7 /* This program is free software; you can redistribute it and/or modify it
8  * under the terms of the license (GNU LGPL) which comes with this package. */
9
10 /************ Alltoall variables and initializers                        */
11
12 #ifndef SMPI_MVAPICH2_SELECTOR_STAMPEDE_HPP
13 #define SMPI_MVAPICH2_SELECTOR_STAMPEDE_HPP
14
15 #include <algorithm>
16
17 #define MV2_MAX_NB_THRESHOLDS 32
18
19 XBT_PUBLIC void smpi_coll_cleanup_mvapich2(void);
20
21 struct mv2_alltoall_tuning_element {
22   int min;
23   int max;
24   int (*MV2_pt_Alltoall_function)(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
25                                   MPI_Datatype recvtype, MPI_Comm comm_ptr);
26 };
27
28 struct mv2_alltoall_tuning_table {
29   int numproc;
30   int size_table;
31   mv2_alltoall_tuning_element algo_table[MV2_MAX_NB_THRESHOLDS];
32   mv2_alltoall_tuning_element in_place_algo_table[MV2_MAX_NB_THRESHOLDS];
33 };
34
35 int (*MV2_Alltoall_function)(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
36                              MPI_Datatype recvtype, MPI_Comm comm_ptr) = NULL;
37
38 /* Indicates number of processes per node */
39 int* mv2_alltoall_table_ppn_conf = NULL;
40 /* Indicates total number of configurations */
41 int mv2_alltoall_num_ppn_conf                             = 1;
42 int* mv2_size_alltoall_tuning_table                       = NULL;
43 mv2_alltoall_tuning_table** mv2_alltoall_thresholds_table = NULL;
44
45 #define MPIR_Alltoall_bruck_MV2 simgrid::smpi::Coll_alltoall_bruck::alltoall
46 #define MPIR_Alltoall_RD_MV2 simgrid::smpi::Coll_alltoall_rdb::alltoall
47 #define MPIR_Alltoall_Scatter_dest_MV2 simgrid::smpi::Coll_alltoall_mvapich2_scatter_dest::alltoall
48 #define MPIR_Alltoall_pairwise_MV2 simgrid::smpi::Coll_alltoall_pair::alltoall
49 #define MPIR_Alltoall_inplace_MV2 simgrid::smpi::Coll_alltoall_ring::alltoall
50
51 static void init_mv2_alltoall_tables_stampede()
52 {
53   int agg_table_sum                      = 0;
54   mv2_alltoall_tuning_table** table_ptrs = NULL;
55   mv2_alltoall_num_ppn_conf              = 3;
56   if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
57     simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
58   mv2_alltoall_thresholds_table                      = new mv2_alltoall_tuning_table*[mv2_alltoall_num_ppn_conf];
59   table_ptrs                                         = new mv2_alltoall_tuning_table*[mv2_alltoall_num_ppn_conf];
60   mv2_size_alltoall_tuning_table                     = new int[mv2_alltoall_num_ppn_conf];
61   mv2_alltoall_table_ppn_conf                        = new int[mv2_alltoall_num_ppn_conf];
62   mv2_alltoall_table_ppn_conf[0]    = 1;
63   mv2_size_alltoall_tuning_table[0] = 6;
64   mv2_alltoall_tuning_table mv2_tmp_alltoall_thresholds_table_1ppn[] = {
65       {
66           2,
67           1,
68           {
69               {0, -1, &MPIR_Alltoall_pairwise_MV2},
70           },
71
72           {
73               {0, -1, &MPIR_Alltoall_inplace_MV2},
74           },
75       },
76
77       {
78           4,
79           2,
80           {
81               {0, 262144, &MPIR_Alltoall_Scatter_dest_MV2}, {262144, -1, &MPIR_Alltoall_pairwise_MV2},
82           },
83
84           {
85               {0, -1, &MPIR_Alltoall_inplace_MV2},
86           },
87       },
88
89       {
90           8,
91           2,
92           {
93               {0, 8, &MPIR_Alltoall_RD_MV2}, {8, -1, &MPIR_Alltoall_Scatter_dest_MV2},
94           },
95
96           {
97               {0, -1, &MPIR_Alltoall_inplace_MV2},
98           },
99       },
100
101       {
102           16,
103           3,
104           {
105               {0, 64, &MPIR_Alltoall_RD_MV2},
106               {64, 512, &MPIR_Alltoall_bruck_MV2},
107               {512, -1, &MPIR_Alltoall_Scatter_dest_MV2},
108           },
109
110           {
111               {0, -1, &MPIR_Alltoall_inplace_MV2},
112           },
113       },
114
115       {
116           32,
117           3,
118           {
119               {0, 32, &MPIR_Alltoall_RD_MV2},
120               {32, 2048, &MPIR_Alltoall_bruck_MV2},
121               {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
122           },
123
124           {
125               {0, -1, &MPIR_Alltoall_inplace_MV2},
126           },
127       },
128
129       {
130           64,
131           3,
132           {
133               {0, 8, &MPIR_Alltoall_RD_MV2},
134               {8, 1024, &MPIR_Alltoall_bruck_MV2},
135               {1024, -1, &MPIR_Alltoall_Scatter_dest_MV2},
136           },
137
138           {
139               {0, -1, &MPIR_Alltoall_inplace_MV2},
140           },
141       },
142   };
143   table_ptrs[0]                                                      = mv2_tmp_alltoall_thresholds_table_1ppn;
144   mv2_alltoall_table_ppn_conf[1]                                     = 2;
145   mv2_size_alltoall_tuning_table[1]                                  = 6;
146   mv2_alltoall_tuning_table mv2_tmp_alltoall_thresholds_table_2ppn[] = {
147       {
148           4,
149           2,
150           {
151               {0, 32, &MPIR_Alltoall_RD_MV2}, {32, -1, &MPIR_Alltoall_Scatter_dest_MV2},
152           },
153
154           {
155               {0, -1, &MPIR_Alltoall_inplace_MV2},
156           },
157       },
158
159       {
160           8,
161           2,
162           {
163               {0, 64, &MPIR_Alltoall_RD_MV2}, {64, -1, &MPIR_Alltoall_Scatter_dest_MV2},
164           },
165
166           {
167               {0, -1, &MPIR_Alltoall_inplace_MV2},
168           },
169       },
170
171       {
172           16,
173           3,
174           {
175               {0, 64, &MPIR_Alltoall_RD_MV2},
176               {64, 2048, &MPIR_Alltoall_bruck_MV2},
177               {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
178           },
179
180           {
181               {0, -1, &MPIR_Alltoall_inplace_MV2},
182           },
183       },
184
185       {
186           32,
187           3,
188           {
189               {0, 16, &MPIR_Alltoall_RD_MV2},
190               {16, 2048, &MPIR_Alltoall_bruck_MV2},
191               {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
192           },
193
194           {
195               {0, -1, &MPIR_Alltoall_inplace_MV2},
196           },
197       },
198
199       {
200           64,
201           3,
202           {
203               {0, 8, &MPIR_Alltoall_RD_MV2},
204               {8, 1024, &MPIR_Alltoall_bruck_MV2},
205               {1024, -1, &MPIR_Alltoall_Scatter_dest_MV2},
206           },
207
208           {
209               {0, -1, &MPIR_Alltoall_inplace_MV2},
210           },
211       },
212
213       {
214           128,
215           3,
216           {
217               {0, 4, &MPIR_Alltoall_RD_MV2},
218               {4, 2048, &MPIR_Alltoall_bruck_MV2},
219               {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
220           },
221
222           {
223               {0, -1, &MPIR_Alltoall_inplace_MV2},
224           },
225       },
226   };
227   table_ptrs[1]                                                       = mv2_tmp_alltoall_thresholds_table_2ppn;
228   mv2_alltoall_table_ppn_conf[2]                                      = 16;
229   mv2_size_alltoall_tuning_table[2]                                   = 7;
230   mv2_alltoall_tuning_table mv2_tmp_alltoall_thresholds_table_16ppn[] = {
231       {
232           16,
233           2,
234           {
235               {0, 2048, &MPIR_Alltoall_bruck_MV2}, {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
236           },
237
238           {
239               {32768, -1, &MPIR_Alltoall_inplace_MV2},
240           },
241       },
242
243       {
244           32,
245           2,
246           {
247               {0, 2048, &MPIR_Alltoall_bruck_MV2}, {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
248           },
249
250           {
251               {16384, -1, &MPIR_Alltoall_inplace_MV2},
252           },
253       },
254
255       {
256           64,
257           3,
258           {
259               {0, 2048, &MPIR_Alltoall_bruck_MV2},
260               {2048, 16384, &MPIR_Alltoall_Scatter_dest_MV2},
261               {16384, -1, &MPIR_Alltoall_pairwise_MV2},
262           },
263
264           {
265               {32768, 131072, &MPIR_Alltoall_inplace_MV2},
266           },
267       },
268
269       {
270           128,
271           2,
272           {
273               {0, 2048, &MPIR_Alltoall_bruck_MV2}, {2048, -1, &MPIR_Alltoall_pairwise_MV2},
274           },
275
276           {
277               {16384, 65536, &MPIR_Alltoall_inplace_MV2},
278           },
279       },
280
281       {
282           256,
283           2,
284           {
285               {0, 1024, &MPIR_Alltoall_bruck_MV2}, {1024, -1, &MPIR_Alltoall_pairwise_MV2},
286           },
287
288           {
289               {16384, 65536, &MPIR_Alltoall_inplace_MV2},
290           },
291       },
292
293       {
294           512,
295           2,
296           {
297               {0, 1024, &MPIR_Alltoall_bruck_MV2}, {1024, -1, &MPIR_Alltoall_pairwise_MV2},
298           },
299
300           {
301               {16384, 65536, &MPIR_Alltoall_inplace_MV2},
302           },
303       },
304       {
305           1024,
306           2,
307           {
308               {0, 1024, &MPIR_Alltoall_bruck_MV2}, {1024, -1, &MPIR_Alltoall_pairwise_MV2},
309           },
310
311           {
312               {16384, 65536, &MPIR_Alltoall_inplace_MV2},
313           },
314       },
315
316   };
317   table_ptrs[2] = mv2_tmp_alltoall_thresholds_table_16ppn;
318   agg_table_sum = 0;
319   for (int i = 0; i < mv2_alltoall_num_ppn_conf; i++) {
320     agg_table_sum += mv2_size_alltoall_tuning_table[i];
321   }
322   mv2_alltoall_thresholds_table[0] = new mv2_alltoall_tuning_table[agg_table_sum];
323   std::copy_n(table_ptrs[0], mv2_size_alltoall_tuning_table[0], mv2_alltoall_thresholds_table[0]);
324   for (int i = 1; i < mv2_alltoall_num_ppn_conf; i++) {
325     mv2_alltoall_thresholds_table[i] = mv2_alltoall_thresholds_table[i - 1] + mv2_size_alltoall_tuning_table[i - 1];
326     std::copy_n(table_ptrs[i], mv2_size_alltoall_tuning_table[i], mv2_alltoall_thresholds_table[i]);
327   }
328   delete[] table_ptrs;
329 }
330
331 /************ Allgather variables and initializers                        */
332
333 struct mv2_allgather_tuning_element {
334   int min;
335   int max;
336   int (*MV2_pt_Allgatherction)(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
337                                MPI_Datatype recvtype, MPI_Comm comm_ptr);
338 };
339
340 struct mv2_allgather_tuning_table {
341   int numproc;
342   int two_level[MV2_MAX_NB_THRESHOLDS];
343   int size_inter_table;
344   mv2_allgather_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
345 };
346
347 int (*MV2_Allgatherction)(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
348                           MPI_Datatype recvtype, MPI_Comm comm);
349
350 int* mv2_allgather_table_ppn_conf                           = NULL;
351 int mv2_allgather_num_ppn_conf                              = 1;
352 int* mv2_size_allgather_tuning_table                        = NULL;
353 mv2_allgather_tuning_table** mv2_allgather_thresholds_table = NULL;
354
355 static int MPIR_Allgather_RD_Allgather_Comm_MV2(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf,
356                                                 int recvcount, MPI_Datatype recvtype, MPI_Comm comm_ptr)
357 {
358   return 0;
359 }
360
361 #define MPIR_Allgather_Bruck_MV2 simgrid::smpi::Coll_allgather_bruck::allgather
362 #define MPIR_Allgather_RD_MV2 simgrid::smpi::Coll_allgather_rdb::allgather
363 #define MPIR_Allgather_Ring_MV2 simgrid::smpi::Coll_allgather_ring::allgather
364 #define MPIR_2lvl_Allgather_MV2 simgrid::smpi::Coll_allgather_mvapich2_smp::allgather
365
366 static void init_mv2_allgather_tables_stampede()
367 {
368   int agg_table_sum = 0;
369
370   if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
371     simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
372   mv2_allgather_tuning_table** table_ptrs            = NULL;
373   mv2_allgather_num_ppn_conf                         = 3;
374   mv2_allgather_thresholds_table                     = new mv2_allgather_tuning_table*[mv2_allgather_num_ppn_conf];
375   table_ptrs                                         = new mv2_allgather_tuning_table*[mv2_allgather_num_ppn_conf];
376   mv2_size_allgather_tuning_table                    = new int[mv2_allgather_num_ppn_conf];
377   mv2_allgather_table_ppn_conf                       = new int[mv2_allgather_num_ppn_conf];
378   mv2_allgather_table_ppn_conf[0]    = 1;
379   mv2_size_allgather_tuning_table[0] = 6;
380   mv2_allgather_tuning_table mv2_tmp_allgather_thresholds_table_1ppn[] = {
381       {
382           2,
383           {0},
384           1,
385           {
386               {0, -1, &MPIR_Allgather_Ring_MV2},
387           },
388       },
389       {
390           4,
391           {0, 0},
392           2,
393           {
394               {0, 262144, &MPIR_Allgather_RD_MV2}, {262144, -1, &MPIR_Allgather_Ring_MV2},
395           },
396       },
397       {
398           8,
399           {0, 0},
400           2,
401           {
402               {0, 131072, &MPIR_Allgather_RD_MV2}, {131072, -1, &MPIR_Allgather_Ring_MV2},
403           },
404       },
405       {
406           16,
407           {0, 0},
408           2,
409           {
410               {0, 131072, &MPIR_Allgather_RD_MV2}, {131072, -1, &MPIR_Allgather_Ring_MV2},
411           },
412       },
413       {
414           32,
415           {0, 0},
416           2,
417           {
418               {0, 65536, &MPIR_Allgather_RD_MV2}, {65536, -1, &MPIR_Allgather_Ring_MV2},
419           },
420       },
421       {
422           64,
423           {0, 0},
424           2,
425           {
426               {0, 32768, &MPIR_Allgather_RD_MV2}, {32768, -1, &MPIR_Allgather_Ring_MV2},
427           },
428       },
429   };
430   table_ptrs[0]                                                        = mv2_tmp_allgather_thresholds_table_1ppn;
431   mv2_allgather_table_ppn_conf[1]                                      = 2;
432   mv2_size_allgather_tuning_table[1]                                   = 6;
433   mv2_allgather_tuning_table mv2_tmp_allgather_thresholds_table_2ppn[] = {
434       {
435           4,
436           {0, 0},
437           2,
438           {
439               {0, 524288, &MPIR_Allgather_RD_MV2}, {524288, -1, &MPIR_Allgather_Ring_MV2},
440           },
441       },
442       {
443           8,
444           {0, 1, 0},
445           2,
446           {
447               {0, 32768, &MPIR_Allgather_RD_MV2},
448               {32768, 524288, &MPIR_Allgather_Ring_MV2},
449               {524288, -1, &MPIR_Allgather_Ring_MV2},
450           },
451       },
452       {
453           16,
454           {0, 1, 0},
455           2,
456           {
457               {0, 16384, &MPIR_Allgather_RD_MV2},
458               {16384, 524288, &MPIR_Allgather_Ring_MV2},
459               {524288, -1, &MPIR_Allgather_Ring_MV2},
460           },
461       },
462       {
463           32,
464           {1, 1, 0},
465           2,
466           {
467               {0, 65536, &MPIR_Allgather_RD_MV2},
468               {65536, 524288, &MPIR_Allgather_Ring_MV2},
469               {524288, -1, &MPIR_Allgather_Ring_MV2},
470           },
471       },
472       {
473           64,
474           {1, 1, 0},
475           2,
476           {
477               {0, 32768, &MPIR_Allgather_RD_MV2},
478               {32768, 524288, &MPIR_Allgather_Ring_MV2},
479               {524288, -1, &MPIR_Allgather_Ring_MV2},
480           },
481       },
482       {
483           128,
484           {1, 1, 0},
485           2,
486           {
487               {0, 65536, &MPIR_Allgather_RD_MV2},
488               {65536, 524288, &MPIR_Allgather_Ring_MV2},
489               {524288, -1, &MPIR_Allgather_Ring_MV2},
490           },
491       },
492   };
493   table_ptrs[1]                                                         = mv2_tmp_allgather_thresholds_table_2ppn;
494   mv2_allgather_table_ppn_conf[2]                                       = 16;
495   mv2_size_allgather_tuning_table[2]                                    = 6;
496   mv2_allgather_tuning_table mv2_tmp_allgather_thresholds_table_16ppn[] = {
497       {
498           16,
499           {0, 0},
500           2,
501           {
502               {0, 1024, &MPIR_Allgather_RD_MV2}, {1024, -1, &MPIR_Allgather_Ring_MV2},
503           },
504       },
505       {
506           32,
507           {0, 0},
508           2,
509           {
510               {0, 1024, &MPIR_Allgather_RD_Allgather_Comm_MV2}, {1024, -1, &MPIR_Allgather_Ring_MV2},
511           },
512       },
513       {
514           64,
515           {0, 0},
516           2,
517           {
518               {0, 1024, &MPIR_Allgather_RD_Allgather_Comm_MV2}, {1024, -1, &MPIR_Allgather_Ring_MV2},
519           },
520       },
521       {
522           128,
523           {0, 0},
524           2,
525           {
526               {0, 1024, &MPIR_Allgather_RD_Allgather_Comm_MV2}, {1024, -1, &MPIR_Allgather_Ring_MV2},
527           },
528       },
529       {
530           256,
531           {0, 0},
532           2,
533           {
534               {0, 1024, &MPIR_Allgather_RD_Allgather_Comm_MV2}, {1024, -1, &MPIR_Allgather_Ring_MV2},
535           },
536       },
537       {
538           512,
539           {0, 0},
540           2,
541           {
542               {0, 1024, &MPIR_Allgather_RD_Allgather_Comm_MV2}, {1024, -1, &MPIR_Allgather_Ring_MV2},
543           },
544       },
545
546   };
547   table_ptrs[2] = mv2_tmp_allgather_thresholds_table_16ppn;
548   agg_table_sum = 0;
549   for (int i = 0; i < mv2_allgather_num_ppn_conf; i++) {
550     agg_table_sum += mv2_size_allgather_tuning_table[i];
551   }
552   mv2_allgather_thresholds_table[0] = new mv2_allgather_tuning_table[agg_table_sum];
553   std::copy_n(table_ptrs[0], mv2_size_allgather_tuning_table[0], mv2_allgather_thresholds_table[0]);
554   for (int i = 1; i < mv2_allgather_num_ppn_conf; i++) {
555     mv2_allgather_thresholds_table[i] = mv2_allgather_thresholds_table[i - 1] + mv2_size_allgather_tuning_table[i - 1];
556     std::copy_n(table_ptrs[i], mv2_size_allgather_tuning_table[i], mv2_allgather_thresholds_table[i]);
557   }
558   delete[] table_ptrs;
559 }
560
561 /************ Gather variables and initializers                        */
562
563 struct mv2_gather_tuning_element {
564   int min;
565   int max;
566   int (*MV2_pt_Gather_function)(void* sendbuf, int sendcnt, MPI_Datatype sendtype, void* recvbuf, int recvcnt,
567                                 MPI_Datatype recvtype, int root, MPI_Comm comm_ptr);
568 };
569
570 struct mv2_gather_tuning_table {
571   int numproc;
572   int size_inter_table;
573   mv2_gather_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
574   int size_intra_table;
575   mv2_gather_tuning_element intra_node[MV2_MAX_NB_THRESHOLDS];
576 };
577
578 int mv2_size_gather_tuning_table                     = 7;
579 mv2_gather_tuning_table* mv2_gather_thresholds_table = NULL;
580
581 typedef int (*MV2_Gather_function_ptr)(void* sendbuf, int sendcnt, MPI_Datatype sendtype, void* recvbuf, int recvcnt,
582                                        MPI_Datatype recvtype, int root, MPI_Comm comm);
583
584 MV2_Gather_function_ptr MV2_Gather_inter_leader_function = NULL;
585 MV2_Gather_function_ptr MV2_Gather_intra_node_function   = NULL;
586
587 #define MPIR_Gather_MV2_Direct simgrid::smpi::Coll_gather_ompi_basic_linear::gather
588 #define MPIR_Gather_MV2_two_level_Direct simgrid::smpi::Coll_gather_mvapich2_two_level::gather
589 #define MPIR_Gather_intra simgrid::smpi::Coll_gather_mpich::gather
590
591 static void init_mv2_gather_tables_stampede()
592 {
593
594   if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
595     simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
596   mv2_size_gather_tuning_table                       = 7;
597   mv2_gather_thresholds_table                               = new mv2_gather_tuning_table[mv2_size_gather_tuning_table];
598   mv2_gather_tuning_table mv2_tmp_gather_thresholds_table[] = {
599       {16,
600        2,
601        {{0, 524288, &MPIR_Gather_MV2_Direct}, {524288, -1, &MPIR_Gather_intra}},
602        1,
603        {{0, -1, &MPIR_Gather_MV2_Direct}}},
604       {32,
605        3,
606        {{0, 16384, &MPIR_Gather_MV2_Direct},
607         {16384, 131072, &MPIR_Gather_intra},
608         {131072, -1, &MPIR_Gather_MV2_two_level_Direct}},
609        1,
610        {{0, -1, &MPIR_Gather_intra}}},
611       {64,
612        3,
613        {{0, 256, &MPIR_Gather_MV2_two_level_Direct},
614         {256, 16384, &MPIR_Gather_MV2_Direct},
615         {256, -1, &MPIR_Gather_MV2_two_level_Direct}},
616        1,
617        {{0, -1, &MPIR_Gather_intra}}},
618       {128,
619        3,
620        {{0, 512, &MPIR_Gather_MV2_two_level_Direct},
621         {512, 16384, &MPIR_Gather_MV2_Direct},
622         {16384, -1, &MPIR_Gather_MV2_two_level_Direct}},
623        1,
624        {{0, -1, &MPIR_Gather_intra}}},
625       {256,
626        3,
627        {{0, 512, &MPIR_Gather_MV2_two_level_Direct},
628         {512, 16384, &MPIR_Gather_MV2_Direct},
629         {16384, -1, &MPIR_Gather_MV2_two_level_Direct}},
630        1,
631        {{0, -1, &MPIR_Gather_intra}}},
632       {512,
633        3,
634        {{0, 512, &MPIR_Gather_MV2_two_level_Direct},
635         {512, 16384, &MPIR_Gather_MV2_Direct},
636         {8196, -1, &MPIR_Gather_MV2_two_level_Direct}},
637        1,
638        {{0, -1, &MPIR_Gather_intra}}},
639       {1024,
640        3,
641        {{0, 512, &MPIR_Gather_MV2_two_level_Direct},
642         {512, 16384, &MPIR_Gather_MV2_Direct},
643         {8196, -1, &MPIR_Gather_MV2_two_level_Direct}},
644        1,
645        {{0, -1, &MPIR_Gather_intra}}},
646   };
647
648   std::copy_n(mv2_tmp_gather_thresholds_table, mv2_size_gather_tuning_table, mv2_gather_thresholds_table);
649 }
650
651 /************ Allgatherv variables and initializers                        */
652
653 struct mv2_allgatherv_tuning_element {
654   int min;
655   int max;
656   int (*MV2_pt_Allgatherv_function)(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int* recvcounts,
657                                     int* displs, MPI_Datatype recvtype, MPI_Comm commg);
658 };
659
660 struct mv2_allgatherv_tuning_table {
661   int numproc;
662   int size_inter_table;
663   mv2_allgatherv_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
664 };
665
666 int (*MV2_Allgatherv_function)(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int* recvcounts,
667                                int* displs, MPI_Datatype recvtype, MPI_Comm comm);
668
669 int mv2_size_allgatherv_tuning_table                         = 0;
670 mv2_allgatherv_tuning_table* mv2_allgatherv_thresholds_table = NULL;
671
672 #define MPIR_Allgatherv_Rec_Doubling_MV2 simgrid::smpi::Coll_allgatherv_mpich_rdb::allgatherv
673 #define MPIR_Allgatherv_Bruck_MV2 simgrid::smpi::Coll_allgatherv_ompi_bruck::allgatherv
674 #define MPIR_Allgatherv_Ring_MV2 simgrid::smpi::Coll_allgatherv_mpich_ring::allgatherv
675
676 static void init_mv2_allgatherv_tables_stampede()
677 {
678   if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
679     simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
680   mv2_size_allgatherv_tuning_table                   = 6;
681   mv2_allgatherv_thresholds_table = new mv2_allgatherv_tuning_table[mv2_size_allgatherv_tuning_table];
682   mv2_allgatherv_tuning_table mv2_tmp_allgatherv_thresholds_table[] = {
683       {
684           16,
685           2,
686           {
687               {0, 512, &MPIR_Allgatherv_Rec_Doubling_MV2}, {512, -1, &MPIR_Allgatherv_Ring_MV2},
688           },
689       },
690       {
691           32,
692           2,
693           {
694               {0, 512, &MPIR_Allgatherv_Rec_Doubling_MV2}, {512, -1, &MPIR_Allgatherv_Ring_MV2},
695           },
696       },
697       {
698           64,
699           2,
700           {
701               {0, 256, &MPIR_Allgatherv_Rec_Doubling_MV2}, {256, -1, &MPIR_Allgatherv_Ring_MV2},
702           },
703       },
704       {
705           128,
706           2,
707           {
708               {0, 256, &MPIR_Allgatherv_Rec_Doubling_MV2}, {256, -1, &MPIR_Allgatherv_Ring_MV2},
709           },
710       },
711       {
712           256,
713           2,
714           {
715               {0, 256, &MPIR_Allgatherv_Rec_Doubling_MV2}, {256, -1, &MPIR_Allgatherv_Ring_MV2},
716           },
717       },
718       {
719           512,
720           2,
721           {
722               {0, 256, &MPIR_Allgatherv_Rec_Doubling_MV2}, {256, -1, &MPIR_Allgatherv_Ring_MV2},
723           },
724       },
725
726   };
727   std::copy_n(mv2_tmp_allgatherv_thresholds_table, mv2_size_allgatherv_tuning_table, mv2_allgatherv_thresholds_table);
728 }
729
730 /************ Allreduce variables and initializers                        */
731
732 struct mv2_allreduce_tuning_element {
733   int min;
734   int max;
735   int (*MV2_pt_Allreducection)(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
736                                MPI_Comm comm);
737 };
738
739 struct mv2_allreduce_tuning_table {
740   int numproc;
741   int mcast_enabled;
742   int is_two_level_allreduce[MV2_MAX_NB_THRESHOLDS];
743   int size_inter_table;
744   mv2_allreduce_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
745   int size_intra_table;
746   mv2_allreduce_tuning_element intra_node[MV2_MAX_NB_THRESHOLDS];
747 };
748
749 int (*MV2_Allreducection)(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
750                           MPI_Comm comm) = NULL;
751
752 int (*MV2_Allreduce_intra_function)(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
753                                     MPI_Comm comm) = NULL;
754
755 int mv2_size_allreduce_tuning_table                        = 0;
756 mv2_allreduce_tuning_table* mv2_allreduce_thresholds_table = NULL;
757
758 static int MPIR_Allreduce_mcst_reduce_two_level_helper_MV2(void* sendbuf, void* recvbuf, int count,
759                                                            MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
760 {
761   return 0;
762 }
763
764 static int MPIR_Allreduce_mcst_reduce_redscat_gather_MV2(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype,
765                                                          MPI_Op op, MPI_Comm comm)
766 {
767   return 0;
768 }
769
770 static int MPIR_Allreduce_reduce_p2p_MV2(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
771                                          MPI_Comm comm)
772 {
773   simgrid::smpi::Colls::reduce(sendbuf, recvbuf, count, datatype, op, 0, comm);
774   return MPI_SUCCESS;
775 }
776
777 static int MPIR_Allreduce_reduce_shmem_MV2(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
778                                            MPI_Comm comm)
779 {
780   simgrid::smpi::Colls::reduce(sendbuf, recvbuf, count, datatype, op, 0, comm);
781   return MPI_SUCCESS;
782 }
783
784 #define MPIR_Allreduce_pt2pt_rd_MV2 simgrid::smpi::Coll_allreduce_rdb::allreduce
785 #define MPIR_Allreduce_pt2pt_rs_MV2 simgrid::smpi::Coll_allreduce_mvapich2_rs::allreduce
786 #define MPIR_Allreduce_two_level_MV2 simgrid::smpi::Coll_allreduce_mvapich2_two_level::allreduce
787
788 static void init_mv2_allreduce_tables_stampede()
789 {
790   if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
791     simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
792   mv2_size_allreduce_tuning_table                    = 8;
793   mv2_allreduce_thresholds_table                     = new mv2_allreduce_tuning_table[mv2_size_allreduce_tuning_table];
794   mv2_allreduce_tuning_table mv2_tmp_allreduce_thresholds_table[] = {
795       {
796           16,
797           0,
798           {1, 0},
799           2,
800           {
801               {0, 1024, &MPIR_Allreduce_pt2pt_rd_MV2}, {1024, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
802           },
803           2,
804           {
805               {0, 1024, &MPIR_Allreduce_reduce_shmem_MV2}, {1024, -1, &MPIR_Allreduce_reduce_p2p_MV2},
806           },
807       },
808       {
809           32,
810           0,
811           {1, 1, 0},
812           3,
813           {
814               {0, 1024, &MPIR_Allreduce_pt2pt_rd_MV2},
815               {1024, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
816               {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
817           },
818           2,
819           {
820               {0, 1024, &MPIR_Allreduce_reduce_shmem_MV2}, {1024, 16384, &MPIR_Allreduce_reduce_p2p_MV2},
821           },
822       },
823       {
824           64,
825           0,
826           {1, 1, 0},
827           3,
828           {
829               {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
830               {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
831               {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
832           },
833           2,
834           {
835               {0, 512, &MPIR_Allreduce_reduce_shmem_MV2}, {512, 16384, &MPIR_Allreduce_reduce_p2p_MV2},
836           },
837       },
838       {
839           128,
840           0,
841           {1, 1, 0},
842           3,
843           {
844               {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
845               {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
846               {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
847           },
848           2,
849           {
850               {0, 512, &MPIR_Allreduce_reduce_shmem_MV2}, {512, 16384, &MPIR_Allreduce_reduce_p2p_MV2},
851           },
852       },
853       {
854           256,
855           0,
856           {1, 1, 0},
857           3,
858           {
859               {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
860               {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
861               {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
862           },
863           2,
864           {
865               {0, 512, &MPIR_Allreduce_reduce_shmem_MV2}, {512, -1, &MPIR_Allreduce_reduce_p2p_MV2},
866           },
867       },
868       {
869           512,
870           0,
871           {1, 1, 0},
872           3,
873           {
874               {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
875               {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
876               {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
877           },
878           2,
879           {
880               {0, 512, &MPIR_Allreduce_reduce_shmem_MV2}, {512, 16384, &MPIR_Allreduce_reduce_p2p_MV2},
881           },
882       },
883       {
884           1024,
885           0,
886           {1, 1, 1, 0},
887           4,
888           {
889               {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
890               {512, 8192, &MPIR_Allreduce_pt2pt_rd_MV2},
891               {8192, 65536, &MPIR_Allreduce_pt2pt_rs_MV2},
892               {65536, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
893           },
894           2,
895           {
896               {0, 512, &MPIR_Allreduce_reduce_shmem_MV2}, {512, -1, &MPIR_Allreduce_reduce_p2p_MV2},
897           },
898       },
899       {
900           2048,
901           0,
902           {1, 1, 1, 0},
903           4,
904           {
905               {0, 64, &MPIR_Allreduce_pt2pt_rd_MV2},
906               {64, 512, &MPIR_Allreduce_reduce_p2p_MV2},
907               {512, 4096, &MPIR_Allreduce_mcst_reduce_two_level_helper_MV2},
908               {4096, 16384, &MPIR_Allreduce_pt2pt_rs_MV2},
909               {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
910           },
911           2,
912           {
913               {0, 512, &MPIR_Allreduce_reduce_shmem_MV2}, {512, -1, &MPIR_Allreduce_reduce_p2p_MV2},
914           },
915       },
916
917   };
918   std::copy_n(mv2_tmp_allreduce_thresholds_table, mv2_size_allreduce_tuning_table, mv2_allreduce_thresholds_table);
919 }
920
921 struct mv2_bcast_tuning_element {
922   int min;
923   int max;
924   int (*MV2_pt_Bcast_function)(void* buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm_ptr);
925   int zcpy_pipelined_knomial_factor;
926 };
927
928 struct mv2_bcast_tuning_table {
929   int numproc;
930   int bcast_segment_size;
931   int intra_node_knomial_factor;
932   int inter_node_knomial_factor;
933   int is_two_level_bcast[MV2_MAX_NB_THRESHOLDS];
934   int size_inter_table;
935   mv2_bcast_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
936   int size_intra_table;
937   mv2_bcast_tuning_element intra_node[MV2_MAX_NB_THRESHOLDS];
938 };
939
940 int mv2_size_bcast_tuning_table                    = 0;
941 mv2_bcast_tuning_table* mv2_bcast_thresholds_table = NULL;
942
943 int (*MV2_Bcast_function)(void* buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm_ptr) = NULL;
944
945 int (*MV2_Bcast_intra_node_function)(void* buffer, int count, MPI_Datatype datatype, int root,
946                                      MPI_Comm comm_ptr) = NULL;
947
948 int zcpy_knomial_factor               = 2;
949 int mv2_pipelined_zcpy_knomial_factor = -1;
950 int bcast_segment_size                = 8192;
951 int mv2_inter_node_knomial_factor     = 4;
952 int mv2_intra_node_knomial_factor     = 4;
953 #define mv2_bcast_two_level_system_size 64
954 #define mv2_bcast_short_msg 16384
955 #define mv2_bcast_large_msg 512 * 1024
956
957 #define INTRA_NODE_ROOT 0
958
959 #define MPIR_Pipelined_Bcast_Zcpy_MV2 simgrid::smpi::Coll_bcast_mpich::bcast
960 #define MPIR_Pipelined_Bcast_MV2 simgrid::smpi::Coll_bcast_mpich::bcast
961 #define MPIR_Bcast_binomial_MV2 simgrid::smpi::Coll_bcast_binomial_tree::bcast
962 #define MPIR_Bcast_scatter_ring_allgather_shm_MV2 simgrid::smpi::Coll_bcast_scatter_LR_allgather::bcast
963 #define MPIR_Bcast_scatter_doubling_allgather_MV2 simgrid::smpi::Coll_bcast_scatter_rdb_allgather::bcast
964 #define MPIR_Bcast_scatter_ring_allgather_MV2 simgrid::smpi::Coll_bcast_scatter_LR_allgather::bcast
965 #define MPIR_Shmem_Bcast_MV2 simgrid::smpi::Coll_bcast_mpich::bcast
966 #define MPIR_Bcast_tune_inter_node_helper_MV2 simgrid::smpi::Coll_bcast_mvapich2_inter_node::bcast
967 #define MPIR_Bcast_inter_node_helper_MV2 simgrid::smpi::Coll_bcast_mvapich2_inter_node::bcast
968 #define MPIR_Knomial_Bcast_intra_node_MV2 simgrid::smpi::Coll_bcast_mvapich2_knomial_intra_node::bcast
969 #define MPIR_Bcast_intra_MV2 simgrid::smpi::Coll_bcast_mvapich2_intra_node::bcast
970
971 static void init_mv2_bcast_tables_stampede()
972 {
973   // Stampede,
974   if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
975     simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
976   mv2_size_bcast_tuning_table                        = 8;
977   mv2_bcast_thresholds_table                         = new mv2_bcast_tuning_table[mv2_size_bcast_tuning_table];
978
979   mv2_bcast_tuning_table mv2_tmp_bcast_thresholds_table[] = {
980       {16,
981        8192,
982        4,
983        4,
984        {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
985        11,
986        {{0, 8, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
987         {8, 16, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
988         {16, 1024, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
989         {1024, 8192, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
990         {8192, 16384, &MPIR_Bcast_binomial_MV2, -1},
991         {16384, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
992         {32768, 65536, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
993         {65536, 131072, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1},
994         {131072, 262144, &MPIR_Bcast_scatter_ring_allgather_MV2, -1},
995         {262144, 524288, &MPIR_Bcast_scatter_doubling_allgather_MV2, -1},
996         {524288, -1, &MPIR_Bcast_scatter_ring_allgather_MV2, -1}},
997        11,
998        {{0, 8, &MPIR_Shmem_Bcast_MV2, 2},
999         {8, 16, &MPIR_Shmem_Bcast_MV2, 4},
1000         {16, 1024, &MPIR_Shmem_Bcast_MV2, 2},
1001         {1024, 8192, &MPIR_Shmem_Bcast_MV2, 4},
1002         {8192, 16384, &MPIR_Shmem_Bcast_MV2, -1},
1003         {16384, 32768, &MPIR_Shmem_Bcast_MV2, 4},
1004         {32768, 65536, &MPIR_Shmem_Bcast_MV2, 2},
1005         {65536, 131072, &MPIR_Shmem_Bcast_MV2, -1},
1006         {131072, 262144, &MPIR_Shmem_Bcast_MV2, -1},
1007         {262144, 524288, &MPIR_Shmem_Bcast_MV2, -1},
1008         {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}}},
1009       {32,
1010        8192,
1011        4,
1012        4,
1013        {1, 1, 1, 1, 1, 1, 1, 1},
1014        8,
1015        {{0, 128, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1016         {128, 256, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1017         {256, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1018         {32768, 65536, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1019         {65536, 131072, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1020         {131072, 262144, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1021         {262144, 524288, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1022         {524288, -1, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8}},
1023        8,
1024        {{0, 128, &MPIR_Shmem_Bcast_MV2, 2},
1025         {128, 256, &MPIR_Shmem_Bcast_MV2, 4},
1026         {256, 32768, &MPIR_Shmem_Bcast_MV2, 2},
1027         {32768, 65536, &MPIR_Shmem_Bcast_MV2, 4},
1028         {65536, 131072, &MPIR_Shmem_Bcast_MV2, 2},
1029         {131072, 262144, &MPIR_Shmem_Bcast_MV2, 8},
1030         {262144, 524288, &MPIR_Shmem_Bcast_MV2, 2},
1031         {524288, -1, &MPIR_Shmem_Bcast_MV2, 8}}},
1032       {64,
1033        8192,
1034        4,
1035        4,
1036        {1, 1, 1, 1, 1, 1, 1, 1, 1},
1037        9,
1038        {{0, 2, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1039         {2, 4, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1040         {4, 16, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1041         {16, 32, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1042         {32, 128, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1043         {128, 256, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1044         {256, 4096, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1045         {4096, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1046         {32768, -1, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2}},
1047        9,
1048        {{0, 2, &MPIR_Shmem_Bcast_MV2, 4},
1049         {2, 4, &MPIR_Shmem_Bcast_MV2, 8},
1050         {4, 16, &MPIR_Shmem_Bcast_MV2, 4},
1051         {16, 32, &MPIR_Shmem_Bcast_MV2, 8},
1052         {32, 128, &MPIR_Shmem_Bcast_MV2, 4},
1053         {128, 256, &MPIR_Shmem_Bcast_MV2, 8},
1054         {256, 4096, &MPIR_Shmem_Bcast_MV2, 4},
1055         {4096, 32768, &MPIR_Shmem_Bcast_MV2, 8},
1056         {32768, -1, &MPIR_Shmem_Bcast_MV2, 2}}},
1057       {128,
1058        8192,
1059        4,
1060        4,
1061        {1, 1, 1, 0},
1062        4,
1063        {{0, 8192, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1064         {8192, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1065         {16384, 524288, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1066         {524288, -1, &MPIR_Bcast_scatter_ring_allgather_MV2, -1}},
1067        4,
1068        {{0, 8192, &MPIR_Shmem_Bcast_MV2, 8},
1069         {8192, 16384, &MPIR_Shmem_Bcast_MV2, 4},
1070         {16384, 524288, &MPIR_Shmem_Bcast_MV2, 2},
1071         {524288, -1, NULL, -1}}},
1072       {256,
1073        8192,
1074        4,
1075        4,
1076        {1, 1, 1, 1, 1},
1077        5,
1078        {{0, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1079         {16384, 131072, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1080         {131072, 262144, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1},
1081         {262144, 524288, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1082         {524288, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}},
1083        5,
1084        {{0, 16384, &MPIR_Shmem_Bcast_MV2, 4},
1085         {16384, 131072, &MPIR_Shmem_Bcast_MV2, 2},
1086         {131072, 262144, &MPIR_Shmem_Bcast_MV2, -1},
1087         {262144, 524288, &MPIR_Shmem_Bcast_MV2, 2},
1088         {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}}},
1089       {512,
1090        8192,
1091        4,
1092        4,
1093        {1, 1, 1, 1, 1},
1094        5,
1095        {{0, 4096, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1096         {4096, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1097         {16384, 131072, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1098         {131072, 262144, &MPIR_Pipelined_Bcast_MV2, -1},
1099         {262144, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}},
1100        5,
1101        {{0, 4096, &MPIR_Shmem_Bcast_MV2, 8},
1102         {4096, 16384, &MPIR_Shmem_Bcast_MV2, 4},
1103         {16384, 131072, &MPIR_Shmem_Bcast_MV2, 2},
1104         {131072, 262144, &MPIR_Shmem_Bcast_MV2, -1},
1105         {262144, -1, &MPIR_Shmem_Bcast_MV2, -1}}},
1106       {1024,
1107        8192,
1108        4,
1109        4,
1110        {1, 1, 1, 1, 1},
1111        5,
1112        {{0, 8192, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1113         {8192, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1114         {16384, 65536, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1115         {65536, 524288, &MPIR_Pipelined_Bcast_MV2, -1},
1116         {524288, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}},
1117        5,
1118        {{0, 8192, &MPIR_Shmem_Bcast_MV2, 8},
1119         {8192, 16384, &MPIR_Shmem_Bcast_MV2, 4},
1120         {16384, 65536, &MPIR_Shmem_Bcast_MV2, 2},
1121         {65536, 524288, &MPIR_Shmem_Bcast_MV2, -1},
1122         {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}}},
1123       {2048,
1124        8192,
1125        4,
1126        4,
1127        {1, 1, 1, 1, 1, 1, 1},
1128        7,
1129        {{0, 16, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1130         {16, 32, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1131         {32, 4096, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1132         {4096, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1133         {16384, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1134         {32768, 524288, &MPIR_Pipelined_Bcast_MV2, -1},
1135         {524288, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}},
1136        7,
1137        {{0, 16, &MPIR_Shmem_Bcast_MV2, 8},
1138         {16, 32, &MPIR_Shmem_Bcast_MV2, 4},
1139         {32, 4096, &MPIR_Shmem_Bcast_MV2, 8},
1140         {4096, 16384, &MPIR_Shmem_Bcast_MV2, 4},
1141         {16384, 32768, &MPIR_Shmem_Bcast_MV2, 2},
1142         {32768, 524288, &MPIR_Shmem_Bcast_MV2, -1},
1143         {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}}}};
1144
1145   std::copy_n(mv2_tmp_bcast_thresholds_table, mv2_size_bcast_tuning_table, mv2_bcast_thresholds_table);
1146 }
1147
1148 /************ Reduce variables and initializers                        */
1149
1150 struct mv2_reduce_tuning_element {
1151   int min;
1152   int max;
1153   int (*MV2_pt_Reduce_function)(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root,
1154                                 MPI_Comm comm_ptr);
1155 };
1156
1157 struct mv2_reduce_tuning_table {
1158   int numproc;
1159   int inter_k_degree;
1160   int intra_k_degree;
1161   int is_two_level_reduce[MV2_MAX_NB_THRESHOLDS];
1162   int size_inter_table;
1163   mv2_reduce_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
1164   int size_intra_table;
1165   mv2_reduce_tuning_element intra_node[MV2_MAX_NB_THRESHOLDS];
1166 };
1167
1168 int mv2_size_reduce_tuning_table                     = 0;
1169 mv2_reduce_tuning_table* mv2_reduce_thresholds_table = NULL;
1170
1171 int mv2_reduce_intra_knomial_factor = -1;
1172 int mv2_reduce_inter_knomial_factor = -1;
1173
1174 int (*MV2_Reduce_function)(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root,
1175                            MPI_Comm comm_ptr) = NULL;
1176
1177 int (*MV2_Reduce_intra_function)(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root,
1178                                  MPI_Comm comm_ptr) = NULL;
1179
1180 #define MPIR_Reduce_inter_knomial_wrapper_MV2 simgrid::smpi::Coll_reduce_mvapich2_knomial::reduce
1181 #define MPIR_Reduce_intra_knomial_wrapper_MV2 simgrid::smpi::Coll_reduce_mvapich2_knomial::reduce
1182 #define MPIR_Reduce_binomial_MV2 simgrid::smpi::Coll_reduce_binomial::reduce
1183 #define MPIR_Reduce_redscat_gather_MV2 simgrid::smpi::Coll_reduce_scatter_gather::reduce
1184 #define MPIR_Reduce_shmem_MV2 simgrid::smpi::Coll_reduce_ompi_basic_linear::reduce
1185 #define MPIR_Reduce_two_level_helper_MV2 simgrid::smpi::Coll_reduce_mvapich2_two_level::reduce
1186
1187 static void init_mv2_reduce_tables_stampede()
1188 {
1189   if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
1190     simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
1191   /*Stampede*/
1192   mv2_size_reduce_tuning_table = 8;
1193   mv2_reduce_thresholds_table                               = new mv2_reduce_tuning_table[mv2_size_reduce_tuning_table];
1194   mv2_reduce_tuning_table mv2_tmp_reduce_thresholds_table[] = {
1195       {
1196           16,
1197           4,
1198           4,
1199           {1, 0, 0},
1200           3,
1201           {
1202               {0, 262144, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1203               {262144, 1048576, &MPIR_Reduce_binomial_MV2},
1204               {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
1205           },
1206           2,
1207           {
1208               {0, 65536, &MPIR_Reduce_shmem_MV2}, {65536, -1, &MPIR_Reduce_binomial_MV2},
1209           },
1210       },
1211       {
1212           32,
1213           4,
1214           4,
1215           {1, 1, 1, 1, 0, 0, 0},
1216           7,
1217           {
1218               {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1219               {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1220               {16384, 32768, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1221               {32768, 65536, &MPIR_Reduce_binomial_MV2},
1222               {65536, 262144, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1223               {262144, 1048576, &MPIR_Reduce_binomial_MV2},
1224               {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
1225           },
1226           6,
1227           {
1228               {0, 8192, &MPIR_Reduce_shmem_MV2},
1229               {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1230               {16384, 32768, &MPIR_Reduce_shmem_MV2},
1231               {32768, 65536, &MPIR_Reduce_shmem_MV2},
1232               {65536, 262144, &MPIR_Reduce_shmem_MV2},
1233               {262144, -1, &MPIR_Reduce_binomial_MV2},
1234           },
1235       },
1236       {
1237           64,
1238           4,
1239           4,
1240           {1, 1, 1, 1, 0},
1241           5,
1242           {
1243               {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1244               {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1245               {16384, 65536, &MPIR_Reduce_binomial_MV2},
1246               {65536, 262144, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1247               {262144, -1, &MPIR_Reduce_redscat_gather_MV2},
1248           },
1249           5,
1250           {
1251               {0, 8192, &MPIR_Reduce_shmem_MV2},
1252               {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1253               {16384, 65536, &MPIR_Reduce_shmem_MV2},
1254               {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1255               {262144, -1, &MPIR_Reduce_binomial_MV2},
1256           },
1257       },
1258       {
1259           128,
1260           4,
1261           4,
1262           {1, 0, 1, 0, 1, 0},
1263           6,
1264           {
1265               {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1266               {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1267               {16384, 65536, &MPIR_Reduce_binomial_MV2},
1268               {65536, 262144, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1269               {262144, 1048576, &MPIR_Reduce_binomial_MV2},
1270               {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
1271           },
1272           5,
1273           {
1274               {0, 8192, &MPIR_Reduce_shmem_MV2},
1275               {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1276               {16384, 65536, &MPIR_Reduce_shmem_MV2},
1277               {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1278               {262144, -1, &MPIR_Reduce_binomial_MV2},
1279           },
1280       },
1281       {
1282           256,
1283           4,
1284           4,
1285           {1, 1, 1, 0, 1, 1, 0},
1286           7,
1287           {
1288               {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1289               {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1290               {16384, 32768, &MPIR_Reduce_binomial_MV2},
1291               {32768, 65536, &MPIR_Reduce_binomial_MV2},
1292               {65536, 262144, &MPIR_Reduce_binomial_MV2},
1293               {262144, 1048576, &MPIR_Reduce_binomial_MV2},
1294               {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
1295           },
1296           6,
1297           {
1298               {0, 8192, &MPIR_Reduce_shmem_MV2},
1299               {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1300               {16384, 32768, &MPIR_Reduce_shmem_MV2},
1301               {32768, 65536, &MPIR_Reduce_shmem_MV2},
1302               {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1303               {262144, -1, &MPIR_Reduce_binomial_MV2},
1304           },
1305       },
1306       {
1307           512,
1308           4,
1309           4,
1310           {1, 0, 1, 1, 1, 0},
1311           6,
1312           {
1313               {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1314               {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1315               {16384, 65536, &MPIR_Reduce_binomial_MV2},
1316               {65536, 262144, &MPIR_Reduce_binomial_MV2},
1317               {262144, 1048576, &MPIR_Reduce_binomial_MV2},
1318               {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
1319           },
1320           5,
1321           {
1322               {0, 8192, &MPIR_Reduce_shmem_MV2},
1323               {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1324               {16384, 65536, &MPIR_Reduce_shmem_MV2},
1325               {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1326               {262144, -1, &MPIR_Reduce_binomial_MV2},
1327           },
1328       },
1329       {
1330           1024,
1331           4,
1332           4,
1333           {1, 0, 1, 1, 1},
1334           5,
1335           {
1336               {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1337               {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1338               {16384, 65536, &MPIR_Reduce_binomial_MV2},
1339               {65536, 262144, &MPIR_Reduce_binomial_MV2},
1340               {262144, -1, &MPIR_Reduce_binomial_MV2},
1341           },
1342           5,
1343           {
1344               {0, 8192, &MPIR_Reduce_shmem_MV2},
1345               {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1346               {16384, 65536, &MPIR_Reduce_shmem_MV2},
1347               {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1348               {262144, -1, &MPIR_Reduce_binomial_MV2},
1349           },
1350       },
1351       {
1352           2048,
1353           4,
1354           4,
1355           {1, 0, 1, 1, 1, 1},
1356           6,
1357           {
1358               {0, 2048, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1359               {2048, 4096, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1360               {4096, 16384, &MPIR_Reduce_binomial_MV2},
1361               {16384, 65536, &MPIR_Reduce_binomial_MV2},
1362               {65536, 131072, &MPIR_Reduce_binomial_MV2},
1363               {131072, -1, &MPIR_Reduce_binomial_MV2},
1364           },
1365           6,
1366           {
1367               {0, 2048, &MPIR_Reduce_shmem_MV2},
1368               {2048, 4096, &MPIR_Reduce_shmem_MV2},
1369               {4096, 16384, &MPIR_Reduce_shmem_MV2},
1370               {16384, 65536, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1371               {65536, 131072, &MPIR_Reduce_binomial_MV2},
1372               {131072, -1, &MPIR_Reduce_shmem_MV2},
1373           },
1374       },
1375
1376   };
1377   std::copy_n(mv2_tmp_reduce_thresholds_table, mv2_size_reduce_tuning_table, mv2_reduce_thresholds_table);
1378 }
1379
1380 /************ Reduce scatter variables and initializers                        */
1381
1382 struct mv2_red_scat_tuning_element {
1383   int min;
1384   int max;
1385   int (*MV2_pt_Red_scat_function)(void* sendbuf, void* recvbuf, int* recvcnts, MPI_Datatype datatype, MPI_Op op,
1386                                   MPI_Comm comm_ptr);
1387 };
1388
1389 struct mv2_red_scat_tuning_table {
1390   int numproc;
1391   int size_inter_table;
1392   mv2_red_scat_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
1393 };
1394
1395 int mv2_size_red_scat_tuning_table                       = 0;
1396 mv2_red_scat_tuning_table* mv2_red_scat_thresholds_table = NULL;
1397
1398 int (*MV2_Red_scat_function)(void* sendbuf, void* recvbuf, int* recvcnts, MPI_Datatype datatype, MPI_Op op,
1399                              MPI_Comm comm_ptr);
1400
1401 static int MPIR_Reduce_Scatter_Basic_MV2(void* sendbuf, void* recvbuf, int* recvcnts, MPI_Datatype datatype, MPI_Op op,
1402                                          MPI_Comm comm)
1403 {
1404   simgrid::smpi::Coll_reduce_scatter_default::reduce_scatter(sendbuf, recvbuf, recvcnts, datatype, op, comm);
1405   return MPI_SUCCESS;
1406 }
1407 #define MPIR_Reduce_scatter_non_comm_MV2 simgrid::smpi::Coll_reduce_scatter_mpich_noncomm::reduce_scatter
1408 #define MPIR_Reduce_scatter_Rec_Halving_MV2                                                                            \
1409   simgrid::smpi::Coll_reduce_scatter_ompi_basic_recursivehalving::reduce_scatter
1410 #define MPIR_Reduce_scatter_Pair_Wise_MV2 simgrid::smpi::Coll_reduce_scatter_mpich_pair::reduce_scatter
1411
1412 static void init_mv2_reduce_scatter_tables_stampede()
1413 {
1414   if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
1415     simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
1416   mv2_size_red_scat_tuning_table                     = 6;
1417   mv2_red_scat_thresholds_table                      = new mv2_red_scat_tuning_table[mv2_size_red_scat_tuning_table];
1418   mv2_red_scat_tuning_table mv2_tmp_red_scat_thresholds_table[] = {
1419       {
1420           16,
1421           3,
1422           {
1423               {0, 64, &MPIR_Reduce_Scatter_Basic_MV2},
1424               {64, 65536, &MPIR_Reduce_scatter_Rec_Halving_MV2},
1425               {65536, -1, &MPIR_Reduce_scatter_Pair_Wise_MV2},
1426           },
1427       },
1428       {
1429           32,
1430           3,
1431           {
1432               {0, 64, &MPIR_Reduce_Scatter_Basic_MV2},
1433               {64, 131072, &MPIR_Reduce_scatter_Rec_Halving_MV2},
1434               {131072, -1, &MPIR_Reduce_scatter_Pair_Wise_MV2},
1435           },
1436       },
1437       {
1438           64,
1439           3,
1440           {
1441               {0, 1024, &MPIR_Reduce_Scatter_Basic_MV2},
1442               {1024, 262144, &MPIR_Reduce_scatter_Rec_Halving_MV2},
1443               {262144, -1, &MPIR_Reduce_scatter_Pair_Wise_MV2},
1444           },
1445       },
1446       {
1447           128,
1448           2,
1449           {
1450               {0, 128, &MPIR_Reduce_Scatter_Basic_MV2}, {128, -1, &MPIR_Reduce_scatter_Rec_Halving_MV2},
1451           },
1452       },
1453       {
1454           256,
1455           2,
1456           {
1457               {0, 128, &MPIR_Reduce_Scatter_Basic_MV2}, {128, -1, &MPIR_Reduce_scatter_Rec_Halving_MV2},
1458           },
1459       },
1460       {
1461           512,
1462           2,
1463           {
1464               {0, 256, &MPIR_Reduce_Scatter_Basic_MV2}, {256, -1, &MPIR_Reduce_scatter_Rec_Halving_MV2},
1465           },
1466       },
1467
1468   };
1469   std::copy_n(mv2_tmp_red_scat_thresholds_table, mv2_size_red_scat_tuning_table, mv2_red_scat_thresholds_table);
1470 }
1471
1472 /************ Scatter variables and initializers                        */
1473
1474 struct mv2_scatter_tuning_element {
1475   int min;
1476   int max;
1477   int (*MV2_pt_Scatter_function)(void* sendbuf, int sendcnt, MPI_Datatype sendtype, void* recvbuf, int recvcnt,
1478                                  MPI_Datatype recvtype, int root, MPI_Comm comm);
1479 };
1480
1481 struct mv2_scatter_tuning_table {
1482   int numproc;
1483   int size_inter_table;
1484   mv2_scatter_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
1485   int size_intra_table;
1486   mv2_scatter_tuning_element intra_node[MV2_MAX_NB_THRESHOLDS];
1487 };
1488
1489 int* mv2_scatter_table_ppn_conf                         = NULL;
1490 int mv2_scatter_num_ppn_conf                            = 1;
1491 int* mv2_size_scatter_tuning_table                      = NULL;
1492 mv2_scatter_tuning_table** mv2_scatter_thresholds_table = NULL;
1493
1494 int (*MV2_Scatter_function)(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
1495                             MPI_Datatype recvtype, int root, MPI_Comm comm) = NULL;
1496
1497 int (*MV2_Scatter_intra_function)(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
1498                                   MPI_Datatype recvtype, int root, MPI_Comm comm) = NULL;
1499 int MPIR_Scatter_mcst_wrap_MV2(void* sendbuf, int sendcnt, MPI_Datatype sendtype, void* recvbuf, int recvcnt,
1500                                MPI_Datatype recvtype, int root, MPI_Comm comm_ptr);
1501
1502 int MPIR_Scatter_mcst_wrap_MV2(void* sendbuf, int sendcnt, MPI_Datatype sendtype, void* recvbuf, int recvcnt,
1503                                MPI_Datatype recvtype, int root, MPI_Comm comm_ptr)
1504 {
1505   return 0;
1506 }
1507
1508 #define MPIR_Scatter_MV2_Binomial simgrid::smpi::Coll_scatter_ompi_binomial::scatter
1509 #define MPIR_Scatter_MV2_Direct simgrid::smpi::Coll_scatter_ompi_basic_linear::scatter
1510 #define MPIR_Scatter_MV2_two_level_Binomial simgrid::smpi::Coll_scatter_mvapich2_two_level_binomial::scatter
1511 #define MPIR_Scatter_MV2_two_level_Direct simgrid::smpi::Coll_scatter_mvapich2_two_level_direct::scatter
1512
1513 static void init_mv2_scatter_tables_stampede()
1514 {
1515   if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
1516     simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
1517
1518   int agg_table_sum = 0;
1519   mv2_scatter_tuning_table** table_ptrs = NULL;
1520   mv2_scatter_num_ppn_conf              = 3;
1521   mv2_scatter_thresholds_table          = new mv2_scatter_tuning_table*[mv2_scatter_num_ppn_conf];
1522   table_ptrs                            = new mv2_scatter_tuning_table*[mv2_scatter_num_ppn_conf];
1523   mv2_size_scatter_tuning_table         = new int[mv2_scatter_num_ppn_conf];
1524   mv2_scatter_table_ppn_conf            = new int[mv2_scatter_num_ppn_conf];
1525   mv2_scatter_table_ppn_conf[0]    = 1;
1526   mv2_size_scatter_tuning_table[0] = 6;
1527   mv2_scatter_tuning_table mv2_tmp_scatter_thresholds_table_1ppn[] = {
1528       {
1529           2,
1530           1,
1531           {
1532               {0, -1, &MPIR_Scatter_MV2_Binomial},
1533           },
1534           1,
1535           {
1536               {0, -1, &MPIR_Scatter_MV2_Binomial},
1537           },
1538       },
1539
1540       {
1541           4,
1542           1,
1543           {
1544               {0, -1, &MPIR_Scatter_MV2_Direct},
1545           },
1546           1,
1547           {
1548               {0, -1, &MPIR_Scatter_MV2_Direct},
1549           },
1550       },
1551
1552       {
1553           8,
1554           1,
1555           {
1556               {0, -1, &MPIR_Scatter_MV2_Direct},
1557           },
1558           1,
1559           {
1560               {0, -1, &MPIR_Scatter_MV2_Direct},
1561           },
1562       },
1563
1564       {
1565           16,
1566           1,
1567           {
1568               {0, -1, &MPIR_Scatter_MV2_Direct},
1569           },
1570           1,
1571           {
1572               {0, -1, &MPIR_Scatter_MV2_Direct},
1573           },
1574       },
1575
1576       {
1577           32,
1578           1,
1579           {
1580               {0, -1, &MPIR_Scatter_MV2_Direct},
1581           },
1582           1,
1583           {
1584               {0, -1, &MPIR_Scatter_MV2_Direct},
1585           },
1586       },
1587
1588       {
1589           64,
1590           2,
1591           {
1592               {0, 32, &MPIR_Scatter_MV2_Binomial}, {32, -1, &MPIR_Scatter_MV2_Direct},
1593           },
1594           1,
1595           {
1596               {0, -1, &MPIR_Scatter_MV2_Binomial},
1597           },
1598       },
1599   };
1600   table_ptrs[0]                                                    = mv2_tmp_scatter_thresholds_table_1ppn;
1601   mv2_scatter_table_ppn_conf[1]                                    = 2;
1602   mv2_size_scatter_tuning_table[1]                                 = 6;
1603   mv2_scatter_tuning_table mv2_tmp_scatter_thresholds_table_2ppn[] = {
1604       {
1605           4,
1606           2,
1607           {
1608               {0, 4096, &MPIR_Scatter_MV2_Binomial}, {4096, -1, &MPIR_Scatter_MV2_Direct},
1609           },
1610           1,
1611           {
1612               {0, -1, &MPIR_Scatter_MV2_Direct},
1613           },
1614       },
1615
1616       {
1617           8,
1618           2,
1619           {
1620               {0, 512, &MPIR_Scatter_MV2_two_level_Direct}, {512, -1, &MPIR_Scatter_MV2_Direct},
1621           },
1622           1,
1623           {
1624               {0, -1, &MPIR_Scatter_MV2_Binomial},
1625           },
1626       },
1627
1628       {
1629           16,
1630           2,
1631           {
1632               {0, 2048, &MPIR_Scatter_MV2_two_level_Direct}, {2048, -1, &MPIR_Scatter_MV2_Direct},
1633           },
1634           1,
1635           {
1636               {0, -1, &MPIR_Scatter_MV2_Binomial},
1637           },
1638       },
1639
1640       {
1641           32,
1642           2,
1643           {
1644               {0, 2048, &MPIR_Scatter_MV2_two_level_Direct}, {2048, -1, &MPIR_Scatter_MV2_Direct},
1645           },
1646           1,
1647           {
1648               {0, -1, &MPIR_Scatter_MV2_Binomial},
1649           },
1650       },
1651
1652       {
1653           64,
1654           2,
1655           {
1656               {0, 8192, &MPIR_Scatter_MV2_two_level_Direct}, {8192, -1, &MPIR_Scatter_MV2_Direct},
1657           },
1658           1,
1659           {
1660               {0, -1, &MPIR_Scatter_MV2_Binomial},
1661           },
1662       },
1663
1664       {
1665           128,
1666           4,
1667           {
1668               {0, 16, &MPIR_Scatter_MV2_Binomial},
1669               {16, 128, &MPIR_Scatter_MV2_two_level_Binomial},
1670               {128, 16384, &MPIR_Scatter_MV2_two_level_Direct},
1671               {16384, -1, &MPIR_Scatter_MV2_Direct},
1672           },
1673           1,
1674           {
1675               {0, 128, &MPIR_Scatter_MV2_Direct}, {128, -1, &MPIR_Scatter_MV2_Binomial},
1676           },
1677       },
1678   };
1679   table_ptrs[1]                                                     = mv2_tmp_scatter_thresholds_table_2ppn;
1680   mv2_scatter_table_ppn_conf[2]                                     = 16;
1681   mv2_size_scatter_tuning_table[2]                                  = 8;
1682   mv2_scatter_tuning_table mv2_tmp_scatter_thresholds_table_16ppn[] = {
1683       {
1684           16,
1685           2,
1686           {
1687               {0, 256, &MPIR_Scatter_MV2_Binomial}, {256, -1, &MPIR_Scatter_MV2_Direct},
1688           },
1689           1,
1690           {
1691               {0, -1, &MPIR_Scatter_MV2_Direct},
1692           },
1693       },
1694
1695       {
1696           32,
1697           2,
1698           {
1699               {0, 512, &MPIR_Scatter_MV2_Binomial}, {512, -1, &MPIR_Scatter_MV2_Direct},
1700           },
1701           1,
1702           {
1703               {0, -1, &MPIR_Scatter_MV2_Direct},
1704           },
1705       },
1706
1707       {
1708           64,
1709           2,
1710           {
1711               {0, 1024, &MPIR_Scatter_MV2_two_level_Direct}, {1024, -1, &MPIR_Scatter_MV2_Direct},
1712           },
1713           1,
1714           {
1715               {0, -1, &MPIR_Scatter_MV2_Direct},
1716           },
1717       },
1718
1719       {
1720           128,
1721           4,
1722           {
1723               {0, 16, &MPIR_Scatter_mcst_wrap_MV2},
1724               {0, 16, &MPIR_Scatter_MV2_two_level_Direct},
1725               {16, 2048, &MPIR_Scatter_MV2_two_level_Direct},
1726               {2048, -1, &MPIR_Scatter_MV2_Direct},
1727           },
1728           1,
1729           {
1730               {0, -1, &MPIR_Scatter_MV2_Direct},
1731           },
1732       },
1733
1734       {
1735           256,
1736           4,
1737           {
1738               {0, 16, &MPIR_Scatter_mcst_wrap_MV2},
1739               {0, 16, &MPIR_Scatter_MV2_two_level_Direct},
1740               {16, 2048, &MPIR_Scatter_MV2_two_level_Direct},
1741               {2048, -1, &MPIR_Scatter_MV2_Direct},
1742           },
1743           1,
1744           {
1745               {0, -1, &MPIR_Scatter_MV2_Direct},
1746           },
1747       },
1748
1749       {
1750           512,
1751           4,
1752           {
1753               {0, 16, &MPIR_Scatter_mcst_wrap_MV2},
1754               {16, 16, &MPIR_Scatter_MV2_two_level_Direct},
1755               {16, 4096, &MPIR_Scatter_MV2_two_level_Direct},
1756               {4096, -1, &MPIR_Scatter_MV2_Direct},
1757           },
1758           1,
1759           {
1760               {0, -1, &MPIR_Scatter_MV2_Binomial},
1761           },
1762       },
1763       {
1764           1024,
1765           5,
1766           {
1767               {0, 16, &MPIR_Scatter_mcst_wrap_MV2},
1768               {0, 16, &MPIR_Scatter_MV2_Binomial},
1769               {16, 32, &MPIR_Scatter_MV2_Binomial},
1770               {32, 4096, &MPIR_Scatter_MV2_two_level_Direct},
1771               {4096, -1, &MPIR_Scatter_MV2_Direct},
1772           },
1773           1,
1774           {
1775               {0, -1, &MPIR_Scatter_MV2_Binomial},
1776           },
1777       },
1778       {
1779           2048,
1780           7,
1781           {
1782               {0, 16, &MPIR_Scatter_mcst_wrap_MV2},
1783               {0, 16, &MPIR_Scatter_MV2_two_level_Binomial},
1784               {16, 128, &MPIR_Scatter_MV2_two_level_Binomial},
1785               {128, 1024, &MPIR_Scatter_MV2_two_level_Direct},
1786               {1024, 16384, &MPIR_Scatter_MV2_two_level_Direct},
1787               {16384, 65536, &MPIR_Scatter_MV2_Direct},
1788               {65536, -1, &MPIR_Scatter_MV2_two_level_Direct},
1789           },
1790           6,
1791           {
1792               {0, 16, &MPIR_Scatter_MV2_Binomial},
1793               {16, 128, &MPIR_Scatter_MV2_Binomial},
1794               {128, 1024, &MPIR_Scatter_MV2_Binomial},
1795               {1024, 16384, &MPIR_Scatter_MV2_Direct},
1796               {16384, 65536, &MPIR_Scatter_MV2_Direct},
1797               {65536, -1, &MPIR_Scatter_MV2_Direct},
1798           },
1799       },
1800   };
1801   table_ptrs[2] = mv2_tmp_scatter_thresholds_table_16ppn;
1802   agg_table_sum = 0;
1803   for (int i = 0; i < mv2_scatter_num_ppn_conf; i++) {
1804     agg_table_sum += mv2_size_scatter_tuning_table[i];
1805   }
1806   mv2_scatter_thresholds_table[0] = new mv2_scatter_tuning_table[agg_table_sum];
1807   std::copy_n(table_ptrs[0], mv2_size_scatter_tuning_table[0], mv2_scatter_thresholds_table[0]);
1808   for (int i = 1; i < mv2_scatter_num_ppn_conf; i++) {
1809     mv2_scatter_thresholds_table[i] = mv2_scatter_thresholds_table[i - 1] + mv2_size_scatter_tuning_table[i - 1];
1810     std::copy_n(table_ptrs[i], mv2_size_scatter_tuning_table[i], mv2_scatter_thresholds_table[i]);
1811   }
1812   delete[] table_ptrs;
1813 }
1814
1815 #endif