Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Merge branch 'master' of git+ssh://scm.gforge.inria.fr//gitroot/simgrid/simgrid
[simgrid.git] / src / smpi / colls / smpi_mvapich2_selector_stampede.hpp
1 /* selector for collective algorithms based on mvapich decision logic, with calibration from Stampede cluster at TACC*/
2 /* This is the tuning used by MVAPICH for Stampede platform based on (MV2_ARCH_INTEL_XEON_E5_2680_16,
3  * MV2_HCA_MLX_CX_FDR) */
4
5 /* Copyright (c) 2009-2017. The SimGrid Team. All rights reserved.          */
6
7 /* This program is free software; you can redistribute it and/or modify it
8  * under the terms of the license (GNU LGPL) which comes with this package. */
9
10 /************ Alltoall variables and initializers                        */
11
12 #define MV2_MAX_NB_THRESHOLDS 32
13
14 XBT_PUBLIC(void) smpi_coll_cleanup_mvapich2(void);
15
16 typedef struct {
17   int min;
18   int max;
19   int (*MV2_pt_Alltoall_function)(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
20                                   MPI_Datatype recvtype, MPI_Comm comm_ptr);
21 } mv2_alltoall_tuning_element;
22
23 typedef struct {
24   int numproc;
25   int size_table;
26   mv2_alltoall_tuning_element algo_table[MV2_MAX_NB_THRESHOLDS];
27   mv2_alltoall_tuning_element in_place_algo_table[MV2_MAX_NB_THRESHOLDS];
28 } mv2_alltoall_tuning_table;
29
30 int (*MV2_Alltoall_function)(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
31                              MPI_Datatype recvtype, MPI_Comm comm_ptr) = NULL;
32
33 /* Indicates number of processes per node */
34 int* mv2_alltoall_table_ppn_conf = NULL;
35 /* Indicates total number of configurations */
36 int mv2_alltoall_num_ppn_conf                             = 1;
37 int* mv2_size_alltoall_tuning_table                       = NULL;
38 mv2_alltoall_tuning_table** mv2_alltoall_thresholds_table = NULL;
39
40 #define MPIR_Alltoall_bruck_MV2 simgrid::smpi::Coll_alltoall_bruck::alltoall
41 #define MPIR_Alltoall_RD_MV2 simgrid::smpi::Coll_alltoall_rdb::alltoall
42 #define MPIR_Alltoall_Scatter_dest_MV2 simgrid::smpi::Coll_alltoall_mvapich2_scatter_dest::alltoall
43 #define MPIR_Alltoall_pairwise_MV2 simgrid::smpi::Coll_alltoall_pair::alltoall
44 #define MPIR_Alltoall_inplace_MV2 simgrid::smpi::Coll_alltoall_ring::alltoall
45
46 static void init_mv2_alltoall_tables_stampede()
47 {
48   int i;
49   int agg_table_sum                      = 0;
50   mv2_alltoall_tuning_table** table_ptrs = NULL;
51   mv2_alltoall_num_ppn_conf              = 3;
52   if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
53     simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
54   mv2_alltoall_thresholds_table                      = static_cast<mv2_alltoall_tuning_table**>(
55       xbt_malloc(sizeof(mv2_alltoall_tuning_table*) * mv2_alltoall_num_ppn_conf));
56   table_ptrs = static_cast<mv2_alltoall_tuning_table**>(
57       xbt_malloc(sizeof(mv2_alltoall_tuning_table*) * mv2_alltoall_num_ppn_conf));
58   mv2_size_alltoall_tuning_table    = static_cast<int*>(xbt_malloc(sizeof(int) * mv2_alltoall_num_ppn_conf));
59   mv2_alltoall_table_ppn_conf       = static_cast<int*>(xbt_malloc(mv2_alltoall_num_ppn_conf * sizeof(int)));
60   mv2_alltoall_table_ppn_conf[0]    = 1;
61   mv2_size_alltoall_tuning_table[0] = 6;
62   mv2_alltoall_tuning_table mv2_tmp_alltoall_thresholds_table_1ppn[] = {
63       {
64           2,
65           1,
66           {
67               {0, -1, &MPIR_Alltoall_pairwise_MV2},
68           },
69
70           {
71               {0, -1, &MPIR_Alltoall_inplace_MV2},
72           },
73       },
74
75       {
76           4,
77           2,
78           {
79               {0, 262144, &MPIR_Alltoall_Scatter_dest_MV2}, {262144, -1, &MPIR_Alltoall_pairwise_MV2},
80           },
81
82           {
83               {0, -1, &MPIR_Alltoall_inplace_MV2},
84           },
85       },
86
87       {
88           8,
89           2,
90           {
91               {0, 8, &MPIR_Alltoall_RD_MV2}, {8, -1, &MPIR_Alltoall_Scatter_dest_MV2},
92           },
93
94           {
95               {0, -1, &MPIR_Alltoall_inplace_MV2},
96           },
97       },
98
99       {
100           16,
101           3,
102           {
103               {0, 64, &MPIR_Alltoall_RD_MV2},
104               {64, 512, &MPIR_Alltoall_bruck_MV2},
105               {512, -1, &MPIR_Alltoall_Scatter_dest_MV2},
106           },
107
108           {
109               {0, -1, &MPIR_Alltoall_inplace_MV2},
110           },
111       },
112
113       {
114           32,
115           3,
116           {
117               {0, 32, &MPIR_Alltoall_RD_MV2},
118               {32, 2048, &MPIR_Alltoall_bruck_MV2},
119               {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
120           },
121
122           {
123               {0, -1, &MPIR_Alltoall_inplace_MV2},
124           },
125       },
126
127       {
128           64,
129           3,
130           {
131               {0, 8, &MPIR_Alltoall_RD_MV2},
132               {8, 1024, &MPIR_Alltoall_bruck_MV2},
133               {1024, -1, &MPIR_Alltoall_Scatter_dest_MV2},
134           },
135
136           {
137               {0, -1, &MPIR_Alltoall_inplace_MV2},
138           },
139       },
140   };
141   table_ptrs[0]                                                      = mv2_tmp_alltoall_thresholds_table_1ppn;
142   mv2_alltoall_table_ppn_conf[1]                                     = 2;
143   mv2_size_alltoall_tuning_table[1]                                  = 6;
144   mv2_alltoall_tuning_table mv2_tmp_alltoall_thresholds_table_2ppn[] = {
145       {
146           4,
147           2,
148           {
149               {0, 32, &MPIR_Alltoall_RD_MV2}, {32, -1, &MPIR_Alltoall_Scatter_dest_MV2},
150           },
151
152           {
153               {0, -1, &MPIR_Alltoall_inplace_MV2},
154           },
155       },
156
157       {
158           8,
159           2,
160           {
161               {0, 64, &MPIR_Alltoall_RD_MV2}, {64, -1, &MPIR_Alltoall_Scatter_dest_MV2},
162           },
163
164           {
165               {0, -1, &MPIR_Alltoall_inplace_MV2},
166           },
167       },
168
169       {
170           16,
171           3,
172           {
173               {0, 64, &MPIR_Alltoall_RD_MV2},
174               {64, 2048, &MPIR_Alltoall_bruck_MV2},
175               {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
176           },
177
178           {
179               {0, -1, &MPIR_Alltoall_inplace_MV2},
180           },
181       },
182
183       {
184           32,
185           3,
186           {
187               {0, 16, &MPIR_Alltoall_RD_MV2},
188               {16, 2048, &MPIR_Alltoall_bruck_MV2},
189               {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
190           },
191
192           {
193               {0, -1, &MPIR_Alltoall_inplace_MV2},
194           },
195       },
196
197       {
198           64,
199           3,
200           {
201               {0, 8, &MPIR_Alltoall_RD_MV2},
202               {8, 1024, &MPIR_Alltoall_bruck_MV2},
203               {1024, -1, &MPIR_Alltoall_Scatter_dest_MV2},
204           },
205
206           {
207               {0, -1, &MPIR_Alltoall_inplace_MV2},
208           },
209       },
210
211       {
212           128,
213           3,
214           {
215               {0, 4, &MPIR_Alltoall_RD_MV2},
216               {4, 2048, &MPIR_Alltoall_bruck_MV2},
217               {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
218           },
219
220           {
221               {0, -1, &MPIR_Alltoall_inplace_MV2},
222           },
223       },
224   };
225   table_ptrs[1]                                                       = mv2_tmp_alltoall_thresholds_table_2ppn;
226   mv2_alltoall_table_ppn_conf[2]                                      = 16;
227   mv2_size_alltoall_tuning_table[2]                                   = 7;
228   mv2_alltoall_tuning_table mv2_tmp_alltoall_thresholds_table_16ppn[] = {
229       {
230           16,
231           2,
232           {
233               {0, 2048, &MPIR_Alltoall_bruck_MV2}, {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
234           },
235
236           {
237               {32768, -1, &MPIR_Alltoall_inplace_MV2},
238           },
239       },
240
241       {
242           32,
243           2,
244           {
245               {0, 2048, &MPIR_Alltoall_bruck_MV2}, {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
246           },
247
248           {
249               {16384, -1, &MPIR_Alltoall_inplace_MV2},
250           },
251       },
252
253       {
254           64,
255           3,
256           {
257               {0, 2048, &MPIR_Alltoall_bruck_MV2},
258               {2048, 16384, &MPIR_Alltoall_Scatter_dest_MV2},
259               {16384, -1, &MPIR_Alltoall_pairwise_MV2},
260           },
261
262           {
263               {32768, 131072, &MPIR_Alltoall_inplace_MV2},
264           },
265       },
266
267       {
268           128,
269           2,
270           {
271               {0, 2048, &MPIR_Alltoall_bruck_MV2}, {2048, -1, &MPIR_Alltoall_pairwise_MV2},
272           },
273
274           {
275               {16384, 65536, &MPIR_Alltoall_inplace_MV2},
276           },
277       },
278
279       {
280           256,
281           2,
282           {
283               {0, 1024, &MPIR_Alltoall_bruck_MV2}, {1024, -1, &MPIR_Alltoall_pairwise_MV2},
284           },
285
286           {
287               {16384, 65536, &MPIR_Alltoall_inplace_MV2},
288           },
289       },
290
291       {
292           512,
293           2,
294           {
295               {0, 1024, &MPIR_Alltoall_bruck_MV2}, {1024, -1, &MPIR_Alltoall_pairwise_MV2},
296           },
297
298           {
299               {16384, 65536, &MPIR_Alltoall_inplace_MV2},
300           },
301       },
302       {
303           1024,
304           2,
305           {
306               {0, 1024, &MPIR_Alltoall_bruck_MV2}, {1024, -1, &MPIR_Alltoall_pairwise_MV2},
307           },
308
309           {
310               {16384, 65536, &MPIR_Alltoall_inplace_MV2},
311           },
312       },
313
314   };
315   table_ptrs[2] = mv2_tmp_alltoall_thresholds_table_16ppn;
316   agg_table_sum = 0;
317   for (i = 0; i < mv2_alltoall_num_ppn_conf; i++) {
318     agg_table_sum += mv2_size_alltoall_tuning_table[i];
319   }
320   mv2_alltoall_thresholds_table[0] =
321       static_cast<mv2_alltoall_tuning_table*>(xbt_malloc(agg_table_sum * sizeof(mv2_alltoall_tuning_table)));
322   memcpy(mv2_alltoall_thresholds_table[0], table_ptrs[0],
323          (sizeof(mv2_alltoall_tuning_table) * mv2_size_alltoall_tuning_table[0]));
324   for (i = 1; i < mv2_alltoall_num_ppn_conf; i++) {
325     mv2_alltoall_thresholds_table[i] = mv2_alltoall_thresholds_table[i - 1] + mv2_size_alltoall_tuning_table[i - 1];
326     memcpy(mv2_alltoall_thresholds_table[i], table_ptrs[i],
327            (sizeof(mv2_alltoall_tuning_table) * mv2_size_alltoall_tuning_table[i]));
328   }
329   xbt_free(table_ptrs);
330 }
331
332 /************ Allgather variables and initializers                        */
333
334 typedef struct {
335   int min;
336   int max;
337   int (*MV2_pt_Allgatherction)(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
338                                MPI_Datatype recvtype, MPI_Comm comm_ptr);
339 } mv2_allgather_tuning_element;
340
341 typedef struct {
342   int numproc;
343   int two_level[MV2_MAX_NB_THRESHOLDS];
344   int size_inter_table;
345   mv2_allgather_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
346 } mv2_allgather_tuning_table;
347
348 int (*MV2_Allgatherction)(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
349                           MPI_Datatype recvtype, MPI_Comm comm);
350
351 int* mv2_allgather_table_ppn_conf                           = NULL;
352 int mv2_allgather_num_ppn_conf                              = 1;
353 int* mv2_size_allgather_tuning_table                        = NULL;
354 mv2_allgather_tuning_table** mv2_allgather_thresholds_table = NULL;
355
356 static int MPIR_Allgather_RD_Allgather_Comm_MV2(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf,
357                                                 int recvcount, MPI_Datatype recvtype, MPI_Comm comm_ptr)
358 {
359   return 0;
360 }
361
362 #define MPIR_Allgather_Bruck_MV2 simgrid::smpi::Coll_allgather_bruck::allgather
363 #define MPIR_Allgather_RD_MV2 simgrid::smpi::Coll_allgather_rdb::allgather
364 #define MPIR_Allgather_Ring_MV2 simgrid::smpi::Coll_allgather_ring::allgather
365 #define MPIR_2lvl_Allgather_MV2 simgrid::smpi::Coll_allgather_mvapich2_smp::allgather
366
367 static void init_mv2_allgather_tables_stampede()
368 {
369   int i;
370   int agg_table_sum = 0;
371
372   if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
373     simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
374   mv2_allgather_tuning_table** table_ptrs            = NULL;
375   mv2_allgather_num_ppn_conf                         = 3;
376   mv2_allgather_thresholds_table                     = static_cast<mv2_allgather_tuning_table**>(
377       xbt_malloc(sizeof(mv2_allgather_tuning_table*) * mv2_allgather_num_ppn_conf));
378   table_ptrs = static_cast<mv2_allgather_tuning_table**>(
379       xbt_malloc(sizeof(mv2_allgather_tuning_table*) * mv2_allgather_num_ppn_conf));
380   mv2_size_allgather_tuning_table    = static_cast<int*>(xbt_malloc(sizeof(int) * mv2_allgather_num_ppn_conf));
381   mv2_allgather_table_ppn_conf       = static_cast<int*>(xbt_malloc(mv2_allgather_num_ppn_conf * sizeof(int)));
382   mv2_allgather_table_ppn_conf[0]    = 1;
383   mv2_size_allgather_tuning_table[0] = 6;
384   mv2_allgather_tuning_table mv2_tmp_allgather_thresholds_table_1ppn[] = {
385       {
386           2,
387           {0},
388           1,
389           {
390               {0, -1, &MPIR_Allgather_Ring_MV2},
391           },
392       },
393       {
394           4,
395           {0, 0},
396           2,
397           {
398               {0, 262144, &MPIR_Allgather_RD_MV2}, {262144, -1, &MPIR_Allgather_Ring_MV2},
399           },
400       },
401       {
402           8,
403           {0, 0},
404           2,
405           {
406               {0, 131072, &MPIR_Allgather_RD_MV2}, {131072, -1, &MPIR_Allgather_Ring_MV2},
407           },
408       },
409       {
410           16,
411           {0, 0},
412           2,
413           {
414               {0, 131072, &MPIR_Allgather_RD_MV2}, {131072, -1, &MPIR_Allgather_Ring_MV2},
415           },
416       },
417       {
418           32,
419           {0, 0},
420           2,
421           {
422               {0, 65536, &MPIR_Allgather_RD_MV2}, {65536, -1, &MPIR_Allgather_Ring_MV2},
423           },
424       },
425       {
426           64,
427           {0, 0},
428           2,
429           {
430               {0, 32768, &MPIR_Allgather_RD_MV2}, {32768, -1, &MPIR_Allgather_Ring_MV2},
431           },
432       },
433   };
434   table_ptrs[0]                                                        = mv2_tmp_allgather_thresholds_table_1ppn;
435   mv2_allgather_table_ppn_conf[1]                                      = 2;
436   mv2_size_allgather_tuning_table[1]                                   = 6;
437   mv2_allgather_tuning_table mv2_tmp_allgather_thresholds_table_2ppn[] = {
438       {
439           4,
440           {0, 0},
441           2,
442           {
443               {0, 524288, &MPIR_Allgather_RD_MV2}, {524288, -1, &MPIR_Allgather_Ring_MV2},
444           },
445       },
446       {
447           8,
448           {0, 1, 0},
449           2,
450           {
451               {0, 32768, &MPIR_Allgather_RD_MV2},
452               {32768, 524288, &MPIR_Allgather_Ring_MV2},
453               {524288, -1, &MPIR_Allgather_Ring_MV2},
454           },
455       },
456       {
457           16,
458           {0, 1, 0},
459           2,
460           {
461               {0, 16384, &MPIR_Allgather_RD_MV2},
462               {16384, 524288, &MPIR_Allgather_Ring_MV2},
463               {524288, -1, &MPIR_Allgather_Ring_MV2},
464           },
465       },
466       {
467           32,
468           {1, 1, 0},
469           2,
470           {
471               {0, 65536, &MPIR_Allgather_RD_MV2},
472               {65536, 524288, &MPIR_Allgather_Ring_MV2},
473               {524288, -1, &MPIR_Allgather_Ring_MV2},
474           },
475       },
476       {
477           64,
478           {1, 1, 0},
479           2,
480           {
481               {0, 32768, &MPIR_Allgather_RD_MV2},
482               {32768, 524288, &MPIR_Allgather_Ring_MV2},
483               {524288, -1, &MPIR_Allgather_Ring_MV2},
484           },
485       },
486       {
487           128,
488           {1, 1, 0},
489           2,
490           {
491               {0, 65536, &MPIR_Allgather_RD_MV2},
492               {65536, 524288, &MPIR_Allgather_Ring_MV2},
493               {524288, -1, &MPIR_Allgather_Ring_MV2},
494           },
495       },
496   };
497   table_ptrs[1]                                                         = mv2_tmp_allgather_thresholds_table_2ppn;
498   mv2_allgather_table_ppn_conf[2]                                       = 16;
499   mv2_size_allgather_tuning_table[2]                                    = 6;
500   mv2_allgather_tuning_table mv2_tmp_allgather_thresholds_table_16ppn[] = {
501       {
502           16,
503           {0, 0},
504           2,
505           {
506               {0, 1024, &MPIR_Allgather_RD_MV2}, {1024, -1, &MPIR_Allgather_Ring_MV2},
507           },
508       },
509       {
510           32,
511           {0, 0},
512           2,
513           {
514               {0, 1024, &MPIR_Allgather_RD_Allgather_Comm_MV2}, {1024, -1, &MPIR_Allgather_Ring_MV2},
515           },
516       },
517       {
518           64,
519           {0, 0},
520           2,
521           {
522               {0, 1024, &MPIR_Allgather_RD_Allgather_Comm_MV2}, {1024, -1, &MPIR_Allgather_Ring_MV2},
523           },
524       },
525       {
526           128,
527           {0, 0},
528           2,
529           {
530               {0, 1024, &MPIR_Allgather_RD_Allgather_Comm_MV2}, {1024, -1, &MPIR_Allgather_Ring_MV2},
531           },
532       },
533       {
534           256,
535           {0, 0},
536           2,
537           {
538               {0, 1024, &MPIR_Allgather_RD_Allgather_Comm_MV2}, {1024, -1, &MPIR_Allgather_Ring_MV2},
539           },
540       },
541       {
542           512,
543           {0, 0},
544           2,
545           {
546               {0, 1024, &MPIR_Allgather_RD_Allgather_Comm_MV2}, {1024, -1, &MPIR_Allgather_Ring_MV2},
547           },
548       },
549
550   };
551   table_ptrs[2] = mv2_tmp_allgather_thresholds_table_16ppn;
552   agg_table_sum = 0;
553   for (i = 0; i < mv2_allgather_num_ppn_conf; i++) {
554     agg_table_sum += mv2_size_allgather_tuning_table[i];
555   }
556   mv2_allgather_thresholds_table[0] =
557       static_cast<mv2_allgather_tuning_table*>(xbt_malloc(agg_table_sum * sizeof(mv2_allgather_tuning_table)));
558   memcpy(mv2_allgather_thresholds_table[0], table_ptrs[0],
559          (sizeof(mv2_allgather_tuning_table) * mv2_size_allgather_tuning_table[0]));
560   for (i = 1; i < mv2_allgather_num_ppn_conf; i++) {
561     mv2_allgather_thresholds_table[i] = mv2_allgather_thresholds_table[i - 1] + mv2_size_allgather_tuning_table[i - 1];
562     memcpy(mv2_allgather_thresholds_table[i], table_ptrs[i],
563            (sizeof(mv2_allgather_tuning_table) * mv2_size_allgather_tuning_table[i]));
564   }
565   xbt_free(table_ptrs);
566 }
567
568 /************ Gather variables and initializers                        */
569
570 typedef struct {
571   int min;
572   int max;
573   int (*MV2_pt_Gather_function)(void* sendbuf, int sendcnt, MPI_Datatype sendtype, void* recvbuf, int recvcnt,
574                                 MPI_Datatype recvtype, int root, MPI_Comm comm_ptr);
575 } mv2_gather_tuning_element;
576
577 typedef struct {
578   int numproc;
579   int size_inter_table;
580   mv2_gather_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
581   int size_intra_table;
582   mv2_gather_tuning_element intra_node[MV2_MAX_NB_THRESHOLDS];
583 } mv2_gather_tuning_table;
584
585 int mv2_size_gather_tuning_table                     = 7;
586 mv2_gather_tuning_table* mv2_gather_thresholds_table = NULL;
587
588 typedef int (*MV2_Gather_function_ptr)(void* sendbuf, int sendcnt, MPI_Datatype sendtype, void* recvbuf, int recvcnt,
589                                        MPI_Datatype recvtype, int root, MPI_Comm comm);
590
591 MV2_Gather_function_ptr MV2_Gather_inter_leader_function = NULL;
592 MV2_Gather_function_ptr MV2_Gather_intra_node_function   = NULL;
593
594 #define MPIR_Gather_MV2_Direct simgrid::smpi::Coll_gather_ompi_basic_linear::gather
595 #define MPIR_Gather_MV2_two_level_Direct simgrid::smpi::Coll_gather_mvapich2_two_level::gather
596 #define MPIR_Gather_intra simgrid::smpi::Coll_gather_mpich::gather
597
598 static void init_mv2_gather_tables_stampede()
599 {
600
601   if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
602     simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
603   mv2_size_gather_tuning_table                       = 7;
604   mv2_gather_thresholds_table =
605       static_cast<mv2_gather_tuning_table*>(xbt_malloc(mv2_size_gather_tuning_table * sizeof(mv2_gather_tuning_table)));
606   mv2_gather_tuning_table mv2_tmp_gather_thresholds_table[] = {
607       {16,
608        2,
609        {{0, 524288, &MPIR_Gather_MV2_Direct}, {524288, -1, &MPIR_Gather_intra}},
610        1,
611        {{0, -1, &MPIR_Gather_MV2_Direct}}},
612       {32,
613        3,
614        {{0, 16384, &MPIR_Gather_MV2_Direct},
615         {16384, 131072, &MPIR_Gather_intra},
616         {131072, -1, &MPIR_Gather_MV2_two_level_Direct}},
617        1,
618        {{0, -1, &MPIR_Gather_intra}}},
619       {64,
620        3,
621        {{0, 256, &MPIR_Gather_MV2_two_level_Direct},
622         {256, 16384, &MPIR_Gather_MV2_Direct},
623         {256, -1, &MPIR_Gather_MV2_two_level_Direct}},
624        1,
625        {{0, -1, &MPIR_Gather_intra}}},
626       {128,
627        3,
628        {{0, 512, &MPIR_Gather_MV2_two_level_Direct},
629         {512, 16384, &MPIR_Gather_MV2_Direct},
630         {16384, -1, &MPIR_Gather_MV2_two_level_Direct}},
631        1,
632        {{0, -1, &MPIR_Gather_intra}}},
633       {256,
634        3,
635        {{0, 512, &MPIR_Gather_MV2_two_level_Direct},
636         {512, 16384, &MPIR_Gather_MV2_Direct},
637         {16384, -1, &MPIR_Gather_MV2_two_level_Direct}},
638        1,
639        {{0, -1, &MPIR_Gather_intra}}},
640       {512,
641        3,
642        {{0, 512, &MPIR_Gather_MV2_two_level_Direct},
643         {512, 16384, &MPIR_Gather_MV2_Direct},
644         {8196, -1, &MPIR_Gather_MV2_two_level_Direct}},
645        1,
646        {{0, -1, &MPIR_Gather_intra}}},
647       {1024,
648        3,
649        {{0, 512, &MPIR_Gather_MV2_two_level_Direct},
650         {512, 16384, &MPIR_Gather_MV2_Direct},
651         {8196, -1, &MPIR_Gather_MV2_two_level_Direct}},
652        1,
653        {{0, -1, &MPIR_Gather_intra}}},
654   };
655
656   memcpy(mv2_gather_thresholds_table, mv2_tmp_gather_thresholds_table,
657          mv2_size_gather_tuning_table * sizeof(mv2_gather_tuning_table));
658 }
659
660 /************ Allgatherv variables and initializers                        */
661
662 typedef struct {
663   int min;
664   int max;
665   int (*MV2_pt_Allgatherv_function)(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int* recvcounts,
666                                     int* displs, MPI_Datatype recvtype, MPI_Comm commg);
667 } mv2_allgatherv_tuning_element;
668
669 typedef struct {
670   int numproc;
671   int size_inter_table;
672   mv2_allgatherv_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
673 } mv2_allgatherv_tuning_table;
674
675 int (*MV2_Allgatherv_function)(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int* recvcounts,
676                                int* displs, MPI_Datatype recvtype, MPI_Comm comm);
677
678 int mv2_size_allgatherv_tuning_table                         = 0;
679 mv2_allgatherv_tuning_table* mv2_allgatherv_thresholds_table = NULL;
680
681 #define MPIR_Allgatherv_Rec_Doubling_MV2 simgrid::smpi::Coll_allgatherv_mpich_rdb::allgatherv
682 #define MPIR_Allgatherv_Bruck_MV2 simgrid::smpi::Coll_allgatherv_ompi_bruck::allgatherv
683 #define MPIR_Allgatherv_Ring_MV2 simgrid::smpi::Coll_allgatherv_mpich_ring::allgatherv
684
685 static void init_mv2_allgatherv_tables_stampede()
686 {
687   if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
688     simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
689   mv2_size_allgatherv_tuning_table                   = 6;
690   mv2_allgatherv_thresholds_table                    = static_cast<mv2_allgatherv_tuning_table*>(
691       xbt_malloc(mv2_size_allgatherv_tuning_table * sizeof(mv2_allgatherv_tuning_table)));
692   mv2_allgatherv_tuning_table mv2_tmp_allgatherv_thresholds_table[] = {
693       {
694           16,
695           2,
696           {
697               {0, 512, &MPIR_Allgatherv_Rec_Doubling_MV2}, {512, -1, &MPIR_Allgatherv_Ring_MV2},
698           },
699       },
700       {
701           32,
702           2,
703           {
704               {0, 512, &MPIR_Allgatherv_Rec_Doubling_MV2}, {512, -1, &MPIR_Allgatherv_Ring_MV2},
705           },
706       },
707       {
708           64,
709           2,
710           {
711               {0, 256, &MPIR_Allgatherv_Rec_Doubling_MV2}, {256, -1, &MPIR_Allgatherv_Ring_MV2},
712           },
713       },
714       {
715           128,
716           2,
717           {
718               {0, 256, &MPIR_Allgatherv_Rec_Doubling_MV2}, {256, -1, &MPIR_Allgatherv_Ring_MV2},
719           },
720       },
721       {
722           256,
723           2,
724           {
725               {0, 256, &MPIR_Allgatherv_Rec_Doubling_MV2}, {256, -1, &MPIR_Allgatherv_Ring_MV2},
726           },
727       },
728       {
729           512,
730           2,
731           {
732               {0, 256, &MPIR_Allgatherv_Rec_Doubling_MV2}, {256, -1, &MPIR_Allgatherv_Ring_MV2},
733           },
734       },
735
736   };
737   memcpy(mv2_allgatherv_thresholds_table, mv2_tmp_allgatherv_thresholds_table,
738          mv2_size_allgatherv_tuning_table * sizeof(mv2_allgatherv_tuning_table));
739 }
740
741 /************ Allreduce variables and initializers                        */
742
743 typedef struct {
744   int min;
745   int max;
746   int (*MV2_pt_Allreducection)(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
747                                MPI_Comm comm);
748 } mv2_allreduce_tuning_element;
749
750 typedef struct {
751   int numproc;
752   int mcast_enabled;
753   int is_two_level_allreduce[MV2_MAX_NB_THRESHOLDS];
754   int size_inter_table;
755   mv2_allreduce_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
756   int size_intra_table;
757   mv2_allreduce_tuning_element intra_node[MV2_MAX_NB_THRESHOLDS];
758 } mv2_allreduce_tuning_table;
759
760 int (*MV2_Allreducection)(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
761                           MPI_Comm comm) = NULL;
762
763 int (*MV2_Allreduce_intra_function)(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
764                                     MPI_Comm comm) = NULL;
765
766 int mv2_size_allreduce_tuning_table                        = 0;
767 mv2_allreduce_tuning_table* mv2_allreduce_thresholds_table = NULL;
768
769 static int MPIR_Allreduce_mcst_reduce_two_level_helper_MV2(void* sendbuf, void* recvbuf, int count,
770                                                            MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
771 {
772   return 0;
773 }
774
775 static int MPIR_Allreduce_mcst_reduce_redscat_gather_MV2(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype,
776                                                          MPI_Op op, MPI_Comm comm)
777 {
778   return 0;
779 }
780
781 static int MPIR_Allreduce_reduce_p2p_MV2(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
782                                          MPI_Comm comm)
783 {
784   simgrid::smpi::Colls::reduce(sendbuf, recvbuf, count, datatype, op, 0, comm);
785   return MPI_SUCCESS;
786 }
787
788 static int MPIR_Allreduce_reduce_shmem_MV2(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
789                                            MPI_Comm comm)
790 {
791   simgrid::smpi::Colls::reduce(sendbuf, recvbuf, count, datatype, op, 0, comm);
792   return MPI_SUCCESS;
793 }
794
795 #define MPIR_Allreduce_pt2pt_rd_MV2 simgrid::smpi::Coll_allreduce_rdb::allreduce
796 #define MPIR_Allreduce_pt2pt_rs_MV2 simgrid::smpi::Coll_allreduce_mvapich2_rs::allreduce
797 #define MPIR_Allreduce_two_level_MV2 simgrid::smpi::Coll_allreduce_mvapich2_two_level::allreduce
798
799 static void init_mv2_allreduce_tables_stampede()
800 {
801   if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
802     simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
803   mv2_size_allreduce_tuning_table                    = 8;
804   mv2_allreduce_thresholds_table                     = static_cast<mv2_allreduce_tuning_table*>(
805       xbt_malloc(mv2_size_allreduce_tuning_table * sizeof(mv2_allreduce_tuning_table)));
806   mv2_allreduce_tuning_table mv2_tmp_allreduce_thresholds_table[] = {
807       {
808           16,
809           0,
810           {1, 0},
811           2,
812           {
813               {0, 1024, &MPIR_Allreduce_pt2pt_rd_MV2}, {1024, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
814           },
815           2,
816           {
817               {0, 1024, &MPIR_Allreduce_reduce_shmem_MV2}, {1024, -1, &MPIR_Allreduce_reduce_p2p_MV2},
818           },
819       },
820       {
821           32,
822           0,
823           {1, 1, 0},
824           3,
825           {
826               {0, 1024, &MPIR_Allreduce_pt2pt_rd_MV2},
827               {1024, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
828               {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
829           },
830           2,
831           {
832               {0, 1024, &MPIR_Allreduce_reduce_shmem_MV2}, {1024, 16384, &MPIR_Allreduce_reduce_p2p_MV2},
833           },
834       },
835       {
836           64,
837           0,
838           {1, 1, 0},
839           3,
840           {
841               {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
842               {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
843               {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
844           },
845           2,
846           {
847               {0, 512, &MPIR_Allreduce_reduce_shmem_MV2}, {512, 16384, &MPIR_Allreduce_reduce_p2p_MV2},
848           },
849       },
850       {
851           128,
852           0,
853           {1, 1, 0},
854           3,
855           {
856               {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
857               {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
858               {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
859           },
860           2,
861           {
862               {0, 512, &MPIR_Allreduce_reduce_shmem_MV2}, {512, 16384, &MPIR_Allreduce_reduce_p2p_MV2},
863           },
864       },
865       {
866           256,
867           0,
868           {1, 1, 0},
869           3,
870           {
871               {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
872               {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
873               {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
874           },
875           2,
876           {
877               {0, 512, &MPIR_Allreduce_reduce_shmem_MV2}, {512, -1, &MPIR_Allreduce_reduce_p2p_MV2},
878           },
879       },
880       {
881           512,
882           0,
883           {1, 1, 0},
884           3,
885           {
886               {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
887               {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
888               {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
889           },
890           2,
891           {
892               {0, 512, &MPIR_Allreduce_reduce_shmem_MV2}, {512, 16384, &MPIR_Allreduce_reduce_p2p_MV2},
893           },
894       },
895       {
896           1024,
897           0,
898           {1, 1, 1, 0},
899           4,
900           {
901               {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
902               {512, 8192, &MPIR_Allreduce_pt2pt_rd_MV2},
903               {8192, 65536, &MPIR_Allreduce_pt2pt_rs_MV2},
904               {65536, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
905           },
906           2,
907           {
908               {0, 512, &MPIR_Allreduce_reduce_shmem_MV2}, {512, -1, &MPIR_Allreduce_reduce_p2p_MV2},
909           },
910       },
911       {
912           2048,
913           0,
914           {1, 1, 1, 0},
915           4,
916           {
917               {0, 64, &MPIR_Allreduce_pt2pt_rd_MV2},
918               {64, 512, &MPIR_Allreduce_reduce_p2p_MV2},
919               {512, 4096, &MPIR_Allreduce_mcst_reduce_two_level_helper_MV2},
920               {4096, 16384, &MPIR_Allreduce_pt2pt_rs_MV2},
921               {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
922           },
923           2,
924           {
925               {0, 512, &MPIR_Allreduce_reduce_shmem_MV2}, {512, -1, &MPIR_Allreduce_reduce_p2p_MV2},
926           },
927       },
928
929   };
930   memcpy(mv2_allreduce_thresholds_table, mv2_tmp_allreduce_thresholds_table,
931          mv2_size_allreduce_tuning_table * sizeof(mv2_allreduce_tuning_table));
932 }
933
934 typedef struct {
935   int min;
936   int max;
937   int (*MV2_pt_Bcast_function)(void* buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm_ptr);
938   int zcpy_pipelined_knomial_factor;
939 } mv2_bcast_tuning_element;
940
941 typedef struct {
942   int numproc;
943   int bcast_segment_size;
944   int intra_node_knomial_factor;
945   int inter_node_knomial_factor;
946   int is_two_level_bcast[MV2_MAX_NB_THRESHOLDS];
947   int size_inter_table;
948   mv2_bcast_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
949   int size_intra_table;
950   mv2_bcast_tuning_element intra_node[MV2_MAX_NB_THRESHOLDS];
951 } mv2_bcast_tuning_table;
952
953 int mv2_size_bcast_tuning_table                    = 0;
954 mv2_bcast_tuning_table* mv2_bcast_thresholds_table = NULL;
955
956 int (*MV2_Bcast_function)(void* buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm_ptr) = NULL;
957
958 int (*MV2_Bcast_intra_node_function)(void* buffer, int count, MPI_Datatype datatype, int root,
959                                      MPI_Comm comm_ptr) = NULL;
960
961 int zcpy_knomial_factor               = 2;
962 int mv2_pipelined_zcpy_knomial_factor = -1;
963 int bcast_segment_size                = 8192;
964 int mv2_inter_node_knomial_factor     = 4;
965 int mv2_intra_node_knomial_factor     = 4;
966 #define mv2_bcast_two_level_system_size 64
967 #define mv2_bcast_short_msg 16384
968 #define mv2_bcast_large_msg 512 * 1024
969
970 #define INTRA_NODE_ROOT 0
971
972 #define MPIR_Pipelined_Bcast_Zcpy_MV2 simgrid::smpi::Coll_bcast_mpich::bcast
973 #define MPIR_Pipelined_Bcast_MV2 simgrid::smpi::Coll_bcast_mpich::bcast
974 #define MPIR_Bcast_binomial_MV2 simgrid::smpi::Coll_bcast_binomial_tree::bcast
975 #define MPIR_Bcast_scatter_ring_allgather_shm_MV2 simgrid::smpi::Coll_bcast_scatter_LR_allgather::bcast
976 #define MPIR_Bcast_scatter_doubling_allgather_MV2 simgrid::smpi::Coll_bcast_scatter_rdb_allgather::bcast
977 #define MPIR_Bcast_scatter_ring_allgather_MV2 simgrid::smpi::Coll_bcast_scatter_LR_allgather::bcast
978 #define MPIR_Shmem_Bcast_MV2 simgrid::smpi::Coll_bcast_mpich::bcast
979 #define MPIR_Bcast_tune_inter_node_helper_MV2 simgrid::smpi::Coll_bcast_mvapich2_inter_node::bcast
980 #define MPIR_Bcast_inter_node_helper_MV2 simgrid::smpi::Coll_bcast_mvapich2_inter_node::bcast
981 #define MPIR_Knomial_Bcast_intra_node_MV2 simgrid::smpi::Coll_bcast_mvapich2_knomial_intra_node::bcast
982 #define MPIR_Bcast_intra_MV2 simgrid::smpi::Coll_bcast_mvapich2_intra_node::bcast
983
984 static void init_mv2_bcast_tables_stampede()
985 {
986   // Stampede,
987   if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
988     simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
989   mv2_size_bcast_tuning_table                        = 8;
990   mv2_bcast_thresholds_table =
991       static_cast<mv2_bcast_tuning_table*>(xbt_malloc(mv2_size_bcast_tuning_table * sizeof(mv2_bcast_tuning_table)));
992
993   mv2_bcast_tuning_table mv2_tmp_bcast_thresholds_table[] = {
994       {16,
995        8192,
996        4,
997        4,
998        {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
999        11,
1000        {{0, 8, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1001         {8, 16, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1002         {16, 1024, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1003         {1024, 8192, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1004         {8192, 16384, &MPIR_Bcast_binomial_MV2, -1},
1005         {16384, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1006         {32768, 65536, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1007         {65536, 131072, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1},
1008         {131072, 262144, &MPIR_Bcast_scatter_ring_allgather_MV2, -1},
1009         {262144, 524288, &MPIR_Bcast_scatter_doubling_allgather_MV2, -1},
1010         {524288, -1, &MPIR_Bcast_scatter_ring_allgather_MV2, -1}},
1011        11,
1012        {{0, 8, &MPIR_Shmem_Bcast_MV2, 2},
1013         {8, 16, &MPIR_Shmem_Bcast_MV2, 4},
1014         {16, 1024, &MPIR_Shmem_Bcast_MV2, 2},
1015         {1024, 8192, &MPIR_Shmem_Bcast_MV2, 4},
1016         {8192, 16384, &MPIR_Shmem_Bcast_MV2, -1},
1017         {16384, 32768, &MPIR_Shmem_Bcast_MV2, 4},
1018         {32768, 65536, &MPIR_Shmem_Bcast_MV2, 2},
1019         {65536, 131072, &MPIR_Shmem_Bcast_MV2, -1},
1020         {131072, 262144, &MPIR_Shmem_Bcast_MV2, -1},
1021         {262144, 524288, &MPIR_Shmem_Bcast_MV2, -1},
1022         {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}}},
1023       {32,
1024        8192,
1025        4,
1026        4,
1027        {1, 1, 1, 1, 1, 1, 1, 1},
1028        8,
1029        {{0, 128, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1030         {128, 256, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1031         {256, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1032         {32768, 65536, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1033         {65536, 131072, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1034         {131072, 262144, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1035         {262144, 524288, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1036         {524288, -1, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8}},
1037        8,
1038        {{0, 128, &MPIR_Shmem_Bcast_MV2, 2},
1039         {128, 256, &MPIR_Shmem_Bcast_MV2, 4},
1040         {256, 32768, &MPIR_Shmem_Bcast_MV2, 2},
1041         {32768, 65536, &MPIR_Shmem_Bcast_MV2, 4},
1042         {65536, 131072, &MPIR_Shmem_Bcast_MV2, 2},
1043         {131072, 262144, &MPIR_Shmem_Bcast_MV2, 8},
1044         {262144, 524288, &MPIR_Shmem_Bcast_MV2, 2},
1045         {524288, -1, &MPIR_Shmem_Bcast_MV2, 8}}},
1046       {64,
1047        8192,
1048        4,
1049        4,
1050        {1, 1, 1, 1, 1, 1, 1, 1, 1},
1051        9,
1052        {{0, 2, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1053         {2, 4, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1054         {4, 16, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1055         {16, 32, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1056         {32, 128, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1057         {128, 256, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1058         {256, 4096, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1059         {4096, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1060         {32768, -1, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2}},
1061        9,
1062        {{0, 2, &MPIR_Shmem_Bcast_MV2, 4},
1063         {2, 4, &MPIR_Shmem_Bcast_MV2, 8},
1064         {4, 16, &MPIR_Shmem_Bcast_MV2, 4},
1065         {16, 32, &MPIR_Shmem_Bcast_MV2, 8},
1066         {32, 128, &MPIR_Shmem_Bcast_MV2, 4},
1067         {128, 256, &MPIR_Shmem_Bcast_MV2, 8},
1068         {256, 4096, &MPIR_Shmem_Bcast_MV2, 4},
1069         {4096, 32768, &MPIR_Shmem_Bcast_MV2, 8},
1070         {32768, -1, &MPIR_Shmem_Bcast_MV2, 2}}},
1071       {128,
1072        8192,
1073        4,
1074        4,
1075        {1, 1, 1, 0},
1076        4,
1077        {{0, 8192, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1078         {8192, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1079         {16384, 524288, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1080         {524288, -1, &MPIR_Bcast_scatter_ring_allgather_MV2, -1}},
1081        4,
1082        {{0, 8192, &MPIR_Shmem_Bcast_MV2, 8},
1083         {8192, 16384, &MPIR_Shmem_Bcast_MV2, 4},
1084         {16384, 524288, &MPIR_Shmem_Bcast_MV2, 2},
1085         {524288, -1, NULL, -1}}},
1086       {256,
1087        8192,
1088        4,
1089        4,
1090        {1, 1, 1, 1, 1},
1091        5,
1092        {{0, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1093         {16384, 131072, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1094         {131072, 262144, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1},
1095         {262144, 524288, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1096         {524288, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}},
1097        5,
1098        {{0, 16384, &MPIR_Shmem_Bcast_MV2, 4},
1099         {16384, 131072, &MPIR_Shmem_Bcast_MV2, 2},
1100         {131072, 262144, &MPIR_Shmem_Bcast_MV2, -1},
1101         {262144, 524288, &MPIR_Shmem_Bcast_MV2, 2},
1102         {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}}},
1103       {512,
1104        8192,
1105        4,
1106        4,
1107        {1, 1, 1, 1, 1},
1108        5,
1109        {{0, 4096, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1110         {4096, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1111         {16384, 131072, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1112         {131072, 262144, &MPIR_Pipelined_Bcast_MV2, -1},
1113         {262144, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}},
1114        5,
1115        {{0, 4096, &MPIR_Shmem_Bcast_MV2, 8},
1116         {4096, 16384, &MPIR_Shmem_Bcast_MV2, 4},
1117         {16384, 131072, &MPIR_Shmem_Bcast_MV2, 2},
1118         {131072, 262144, &MPIR_Shmem_Bcast_MV2, -1},
1119         {262144, -1, &MPIR_Shmem_Bcast_MV2, -1}}},
1120       {1024,
1121        8192,
1122        4,
1123        4,
1124        {1, 1, 1, 1, 1},
1125        5,
1126        {{0, 8192, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1127         {8192, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1128         {16384, 65536, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1129         {65536, 524288, &MPIR_Pipelined_Bcast_MV2, -1},
1130         {524288, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}},
1131        5,
1132        {{0, 8192, &MPIR_Shmem_Bcast_MV2, 8},
1133         {8192, 16384, &MPIR_Shmem_Bcast_MV2, 4},
1134         {16384, 65536, &MPIR_Shmem_Bcast_MV2, 2},
1135         {65536, 524288, &MPIR_Shmem_Bcast_MV2, -1},
1136         {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}}},
1137       {2048,
1138        8192,
1139        4,
1140        4,
1141        {1, 1, 1, 1, 1, 1, 1},
1142        7,
1143        {{0, 16, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1144         {16, 32, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1145         {32, 4096, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1146         {4096, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1147         {16384, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1148         {32768, 524288, &MPIR_Pipelined_Bcast_MV2, -1},
1149         {524288, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}},
1150        7,
1151        {{0, 16, &MPIR_Shmem_Bcast_MV2, 8},
1152         {16, 32, &MPIR_Shmem_Bcast_MV2, 4},
1153         {32, 4096, &MPIR_Shmem_Bcast_MV2, 8},
1154         {4096, 16384, &MPIR_Shmem_Bcast_MV2, 4},
1155         {16384, 32768, &MPIR_Shmem_Bcast_MV2, 2},
1156         {32768, 524288, &MPIR_Shmem_Bcast_MV2, -1},
1157         {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}}}};
1158
1159   memcpy(mv2_bcast_thresholds_table, mv2_tmp_bcast_thresholds_table,
1160          mv2_size_bcast_tuning_table * sizeof(mv2_bcast_tuning_table));
1161 }
1162
1163 /************ Reduce variables and initializers                        */
1164
1165 typedef struct {
1166   int min;
1167   int max;
1168   int (*MV2_pt_Reduce_function)(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root,
1169                                 MPI_Comm comm_ptr);
1170 } mv2_reduce_tuning_element;
1171
1172 typedef struct {
1173   int numproc;
1174   int inter_k_degree;
1175   int intra_k_degree;
1176   int is_two_level_reduce[MV2_MAX_NB_THRESHOLDS];
1177   int size_inter_table;
1178   mv2_reduce_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
1179   int size_intra_table;
1180   mv2_reduce_tuning_element intra_node[MV2_MAX_NB_THRESHOLDS];
1181 } mv2_reduce_tuning_table;
1182
1183 int mv2_size_reduce_tuning_table                     = 0;
1184 mv2_reduce_tuning_table* mv2_reduce_thresholds_table = NULL;
1185
1186 int mv2_reduce_intra_knomial_factor = -1;
1187 int mv2_reduce_inter_knomial_factor = -1;
1188
1189 int (*MV2_Reduce_function)(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root,
1190                            MPI_Comm comm_ptr) = NULL;
1191
1192 int (*MV2_Reduce_intra_function)(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root,
1193                                  MPI_Comm comm_ptr) = NULL;
1194
1195 #define MPIR_Reduce_inter_knomial_wrapper_MV2 simgrid::smpi::Coll_reduce_mvapich2_knomial::reduce
1196 #define MPIR_Reduce_intra_knomial_wrapper_MV2 simgrid::smpi::Coll_reduce_mvapich2_knomial::reduce
1197 #define MPIR_Reduce_binomial_MV2 simgrid::smpi::Coll_reduce_binomial::reduce
1198 #define MPIR_Reduce_redscat_gather_MV2 simgrid::smpi::Coll_reduce_scatter_gather::reduce
1199 #define MPIR_Reduce_shmem_MV2 simgrid::smpi::Coll_reduce_ompi_basic_linear::reduce
1200 #define MPIR_Reduce_two_level_helper_MV2 simgrid::smpi::Coll_reduce_mvapich2_two_level::reduce
1201
1202 static void init_mv2_reduce_tables_stampede()
1203 {
1204   if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
1205     simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
1206   /*Stampede*/
1207   mv2_size_reduce_tuning_table = 8;
1208   mv2_reduce_thresholds_table =
1209       static_cast<mv2_reduce_tuning_table*>(xbt_malloc(mv2_size_reduce_tuning_table * sizeof(mv2_reduce_tuning_table)));
1210   mv2_reduce_tuning_table mv2_tmp_reduce_thresholds_table[] = {
1211       {
1212           16,
1213           4,
1214           4,
1215           {1, 0, 0},
1216           3,
1217           {
1218               {0, 262144, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1219               {262144, 1048576, &MPIR_Reduce_binomial_MV2},
1220               {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
1221           },
1222           2,
1223           {
1224               {0, 65536, &MPIR_Reduce_shmem_MV2}, {65536, -1, &MPIR_Reduce_binomial_MV2},
1225           },
1226       },
1227       {
1228           32,
1229           4,
1230           4,
1231           {1, 1, 1, 1, 0, 0, 0},
1232           7,
1233           {
1234               {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1235               {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1236               {16384, 32768, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1237               {32768, 65536, &MPIR_Reduce_binomial_MV2},
1238               {65536, 262144, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1239               {262144, 1048576, &MPIR_Reduce_binomial_MV2},
1240               {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
1241           },
1242           6,
1243           {
1244               {0, 8192, &MPIR_Reduce_shmem_MV2},
1245               {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1246               {16384, 32768, &MPIR_Reduce_shmem_MV2},
1247               {32768, 65536, &MPIR_Reduce_shmem_MV2},
1248               {65536, 262144, &MPIR_Reduce_shmem_MV2},
1249               {262144, -1, &MPIR_Reduce_binomial_MV2},
1250           },
1251       },
1252       {
1253           64,
1254           4,
1255           4,
1256           {1, 1, 1, 1, 0},
1257           5,
1258           {
1259               {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1260               {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1261               {16384, 65536, &MPIR_Reduce_binomial_MV2},
1262               {65536, 262144, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1263               {262144, -1, &MPIR_Reduce_redscat_gather_MV2},
1264           },
1265           5,
1266           {
1267               {0, 8192, &MPIR_Reduce_shmem_MV2},
1268               {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1269               {16384, 65536, &MPIR_Reduce_shmem_MV2},
1270               {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1271               {262144, -1, &MPIR_Reduce_binomial_MV2},
1272           },
1273       },
1274       {
1275           128,
1276           4,
1277           4,
1278           {1, 0, 1, 0, 1, 0},
1279           6,
1280           {
1281               {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1282               {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1283               {16384, 65536, &MPIR_Reduce_binomial_MV2},
1284               {65536, 262144, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1285               {262144, 1048576, &MPIR_Reduce_binomial_MV2},
1286               {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
1287           },
1288           5,
1289           {
1290               {0, 8192, &MPIR_Reduce_shmem_MV2},
1291               {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1292               {16384, 65536, &MPIR_Reduce_shmem_MV2},
1293               {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1294               {262144, -1, &MPIR_Reduce_binomial_MV2},
1295           },
1296       },
1297       {
1298           256,
1299           4,
1300           4,
1301           {1, 1, 1, 0, 1, 1, 0},
1302           7,
1303           {
1304               {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1305               {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1306               {16384, 32768, &MPIR_Reduce_binomial_MV2},
1307               {32768, 65536, &MPIR_Reduce_binomial_MV2},
1308               {65536, 262144, &MPIR_Reduce_binomial_MV2},
1309               {262144, 1048576, &MPIR_Reduce_binomial_MV2},
1310               {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
1311           },
1312           6,
1313           {
1314               {0, 8192, &MPIR_Reduce_shmem_MV2},
1315               {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1316               {16384, 32768, &MPIR_Reduce_shmem_MV2},
1317               {32768, 65536, &MPIR_Reduce_shmem_MV2},
1318               {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1319               {262144, -1, &MPIR_Reduce_binomial_MV2},
1320           },
1321       },
1322       {
1323           512,
1324           4,
1325           4,
1326           {1, 0, 1, 1, 1, 0},
1327           6,
1328           {
1329               {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1330               {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1331               {16384, 65536, &MPIR_Reduce_binomial_MV2},
1332               {65536, 262144, &MPIR_Reduce_binomial_MV2},
1333               {262144, 1048576, &MPIR_Reduce_binomial_MV2},
1334               {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
1335           },
1336           5,
1337           {
1338               {0, 8192, &MPIR_Reduce_shmem_MV2},
1339               {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1340               {16384, 65536, &MPIR_Reduce_shmem_MV2},
1341               {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1342               {262144, -1, &MPIR_Reduce_binomial_MV2},
1343           },
1344       },
1345       {
1346           1024,
1347           4,
1348           4,
1349           {1, 0, 1, 1, 1},
1350           5,
1351           {
1352               {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1353               {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1354               {16384, 65536, &MPIR_Reduce_binomial_MV2},
1355               {65536, 262144, &MPIR_Reduce_binomial_MV2},
1356               {262144, -1, &MPIR_Reduce_binomial_MV2},
1357           },
1358           5,
1359           {
1360               {0, 8192, &MPIR_Reduce_shmem_MV2},
1361               {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1362               {16384, 65536, &MPIR_Reduce_shmem_MV2},
1363               {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1364               {262144, -1, &MPIR_Reduce_binomial_MV2},
1365           },
1366       },
1367       {
1368           2048,
1369           4,
1370           4,
1371           {1, 0, 1, 1, 1, 1},
1372           6,
1373           {
1374               {0, 2048, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1375               {2048, 4096, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1376               {4096, 16384, &MPIR_Reduce_binomial_MV2},
1377               {16384, 65536, &MPIR_Reduce_binomial_MV2},
1378               {65536, 131072, &MPIR_Reduce_binomial_MV2},
1379               {131072, -1, &MPIR_Reduce_binomial_MV2},
1380           },
1381           6,
1382           {
1383               {0, 2048, &MPIR_Reduce_shmem_MV2},
1384               {2048, 4096, &MPIR_Reduce_shmem_MV2},
1385               {4096, 16384, &MPIR_Reduce_shmem_MV2},
1386               {16384, 65536, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1387               {65536, 131072, &MPIR_Reduce_binomial_MV2},
1388               {131072, -1, &MPIR_Reduce_shmem_MV2},
1389           },
1390       },
1391
1392   };
1393   memcpy(mv2_reduce_thresholds_table, mv2_tmp_reduce_thresholds_table,
1394          mv2_size_reduce_tuning_table * sizeof(mv2_reduce_tuning_table));
1395 }
1396
1397 /************ Reduce scatter variables and initializers                        */
1398
1399 typedef struct {
1400   int min;
1401   int max;
1402   int (*MV2_pt_Red_scat_function)(void* sendbuf, void* recvbuf, int* recvcnts, MPI_Datatype datatype, MPI_Op op,
1403                                   MPI_Comm comm_ptr);
1404 } mv2_red_scat_tuning_element;
1405
1406 typedef struct {
1407   int numproc;
1408   int size_inter_table;
1409   mv2_red_scat_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
1410 } mv2_red_scat_tuning_table;
1411
1412 int mv2_size_red_scat_tuning_table                       = 0;
1413 mv2_red_scat_tuning_table* mv2_red_scat_thresholds_table = NULL;
1414
1415 int (*MV2_Red_scat_function)(void* sendbuf, void* recvbuf, int* recvcnts, MPI_Datatype datatype, MPI_Op op,
1416                              MPI_Comm comm_ptr);
1417
1418 static int MPIR_Reduce_Scatter_Basic_MV2(void* sendbuf, void* recvbuf, int* recvcnts, MPI_Datatype datatype, MPI_Op op,
1419                                          MPI_Comm comm)
1420 {
1421   simgrid::smpi::Coll_reduce_scatter_default::reduce_scatter(sendbuf, recvbuf, recvcnts, datatype, op, comm);
1422   return MPI_SUCCESS;
1423 }
1424 #define MPIR_Reduce_scatter_non_comm_MV2 simgrid::smpi::Coll_reduce_scatter_mpich_noncomm::reduce_scatter
1425 #define MPIR_Reduce_scatter_Rec_Halving_MV2                                                                            \
1426   simgrid::smpi::Coll_reduce_scatter_ompi_basic_recursivehalving::reduce_scatter
1427 #define MPIR_Reduce_scatter_Pair_Wise_MV2 simgrid::smpi::Coll_reduce_scatter_mpich_pair::reduce_scatter
1428
1429 static void init_mv2_reduce_scatter_tables_stampede()
1430 {
1431   if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
1432     simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
1433   mv2_size_red_scat_tuning_table                     = 6;
1434   mv2_red_scat_thresholds_table                      = static_cast<mv2_red_scat_tuning_table*>(
1435       xbt_malloc(mv2_size_red_scat_tuning_table * sizeof(mv2_red_scat_tuning_table)));
1436   mv2_red_scat_tuning_table mv2_tmp_red_scat_thresholds_table[] = {
1437       {
1438           16,
1439           3,
1440           {
1441               {0, 64, &MPIR_Reduce_Scatter_Basic_MV2},
1442               {64, 65536, &MPIR_Reduce_scatter_Rec_Halving_MV2},
1443               {65536, -1, &MPIR_Reduce_scatter_Pair_Wise_MV2},
1444           },
1445       },
1446       {
1447           32,
1448           3,
1449           {
1450               {0, 64, &MPIR_Reduce_Scatter_Basic_MV2},
1451               {64, 131072, &MPIR_Reduce_scatter_Rec_Halving_MV2},
1452               {131072, -1, &MPIR_Reduce_scatter_Pair_Wise_MV2},
1453           },
1454       },
1455       {
1456           64,
1457           3,
1458           {
1459               {0, 1024, &MPIR_Reduce_Scatter_Basic_MV2},
1460               {1024, 262144, &MPIR_Reduce_scatter_Rec_Halving_MV2},
1461               {262144, -1, &MPIR_Reduce_scatter_Pair_Wise_MV2},
1462           },
1463       },
1464       {
1465           128,
1466           2,
1467           {
1468               {0, 128, &MPIR_Reduce_Scatter_Basic_MV2}, {128, -1, &MPIR_Reduce_scatter_Rec_Halving_MV2},
1469           },
1470       },
1471       {
1472           256,
1473           2,
1474           {
1475               {0, 128, &MPIR_Reduce_Scatter_Basic_MV2}, {128, -1, &MPIR_Reduce_scatter_Rec_Halving_MV2},
1476           },
1477       },
1478       {
1479           512,
1480           2,
1481           {
1482               {0, 256, &MPIR_Reduce_Scatter_Basic_MV2}, {256, -1, &MPIR_Reduce_scatter_Rec_Halving_MV2},
1483           },
1484       },
1485
1486   };
1487   memcpy(mv2_red_scat_thresholds_table, mv2_tmp_red_scat_thresholds_table,
1488          mv2_size_red_scat_tuning_table * sizeof(mv2_red_scat_tuning_table));
1489 }
1490
1491 /************ Scatter variables and initializers                        */
1492
1493 typedef struct {
1494   int min;
1495   int max;
1496   int (*MV2_pt_Scatter_function)(void* sendbuf, int sendcnt, MPI_Datatype sendtype, void* recvbuf, int recvcnt,
1497                                  MPI_Datatype recvtype, int root, MPI_Comm comm);
1498 } mv2_scatter_tuning_element;
1499
1500 typedef struct {
1501   int numproc;
1502   int size_inter_table;
1503   mv2_scatter_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
1504   int size_intra_table;
1505   mv2_scatter_tuning_element intra_node[MV2_MAX_NB_THRESHOLDS];
1506 } mv2_scatter_tuning_table;
1507
1508 int* mv2_scatter_table_ppn_conf                         = NULL;
1509 int mv2_scatter_num_ppn_conf                            = 1;
1510 int* mv2_size_scatter_tuning_table                      = NULL;
1511 mv2_scatter_tuning_table** mv2_scatter_thresholds_table = NULL;
1512
1513 int (*MV2_Scatter_function)(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
1514                             MPI_Datatype recvtype, int root, MPI_Comm comm) = NULL;
1515
1516 int (*MV2_Scatter_intra_function)(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
1517                                   MPI_Datatype recvtype, int root, MPI_Comm comm) = NULL;
1518 int MPIR_Scatter_mcst_wrap_MV2(void* sendbuf, int sendcnt, MPI_Datatype sendtype, void* recvbuf, int recvcnt,
1519                                MPI_Datatype recvtype, int root, MPI_Comm comm_ptr);
1520
1521 int MPIR_Scatter_mcst_wrap_MV2(void* sendbuf, int sendcnt, MPI_Datatype sendtype, void* recvbuf, int recvcnt,
1522                                MPI_Datatype recvtype, int root, MPI_Comm comm_ptr)
1523 {
1524   return 0;
1525 }
1526
1527 #define MPIR_Scatter_MV2_Binomial simgrid::smpi::Coll_scatter_ompi_binomial::scatter
1528 #define MPIR_Scatter_MV2_Direct simgrid::smpi::Coll_scatter_ompi_basic_linear::scatter
1529 #define MPIR_Scatter_MV2_two_level_Binomial simgrid::smpi::Coll_scatter_mvapich2_two_level_binomial::scatter
1530 #define MPIR_Scatter_MV2_two_level_Direct simgrid::smpi::Coll_scatter_mvapich2_two_level_direct::scatter
1531
1532 static void init_mv2_scatter_tables_stampede()
1533 {
1534   if (simgrid::smpi::Colls::smpi_coll_cleanup_callback == NULL)
1535     simgrid::smpi::Colls::smpi_coll_cleanup_callback = &smpi_coll_cleanup_mvapich2;
1536
1537   int agg_table_sum = 0;
1538   int i;
1539   mv2_scatter_tuning_table** table_ptrs = NULL;
1540   mv2_scatter_num_ppn_conf              = 3;
1541   mv2_scatter_thresholds_table =
1542       static_cast<mv2_scatter_tuning_table**>(xbt_malloc(sizeof(mv2_scatter_tuning_table*) * mv2_scatter_num_ppn_conf));
1543   table_ptrs =
1544       static_cast<mv2_scatter_tuning_table**>(xbt_malloc(sizeof(mv2_scatter_tuning_table*) * mv2_scatter_num_ppn_conf));
1545   mv2_size_scatter_tuning_table    = static_cast<int*>(xbt_malloc(sizeof(int) * mv2_scatter_num_ppn_conf));
1546   mv2_scatter_table_ppn_conf       = static_cast<int*>(xbt_malloc(mv2_scatter_num_ppn_conf * sizeof(int)));
1547   mv2_scatter_table_ppn_conf[0]    = 1;
1548   mv2_size_scatter_tuning_table[0] = 6;
1549   mv2_scatter_tuning_table mv2_tmp_scatter_thresholds_table_1ppn[] = {
1550       {
1551           2,
1552           1,
1553           {
1554               {0, -1, &MPIR_Scatter_MV2_Binomial},
1555           },
1556           1,
1557           {
1558               {0, -1, &MPIR_Scatter_MV2_Binomial},
1559           },
1560       },
1561
1562       {
1563           4,
1564           1,
1565           {
1566               {0, -1, &MPIR_Scatter_MV2_Direct},
1567           },
1568           1,
1569           {
1570               {0, -1, &MPIR_Scatter_MV2_Direct},
1571           },
1572       },
1573
1574       {
1575           8,
1576           1,
1577           {
1578               {0, -1, &MPIR_Scatter_MV2_Direct},
1579           },
1580           1,
1581           {
1582               {0, -1, &MPIR_Scatter_MV2_Direct},
1583           },
1584       },
1585
1586       {
1587           16,
1588           1,
1589           {
1590               {0, -1, &MPIR_Scatter_MV2_Direct},
1591           },
1592           1,
1593           {
1594               {0, -1, &MPIR_Scatter_MV2_Direct},
1595           },
1596       },
1597
1598       {
1599           32,
1600           1,
1601           {
1602               {0, -1, &MPIR_Scatter_MV2_Direct},
1603           },
1604           1,
1605           {
1606               {0, -1, &MPIR_Scatter_MV2_Direct},
1607           },
1608       },
1609
1610       {
1611           64,
1612           2,
1613           {
1614               {0, 32, &MPIR_Scatter_MV2_Binomial}, {32, -1, &MPIR_Scatter_MV2_Direct},
1615           },
1616           1,
1617           {
1618               {0, -1, &MPIR_Scatter_MV2_Binomial},
1619           },
1620       },
1621   };
1622   table_ptrs[0]                                                    = mv2_tmp_scatter_thresholds_table_1ppn;
1623   mv2_scatter_table_ppn_conf[1]                                    = 2;
1624   mv2_size_scatter_tuning_table[1]                                 = 6;
1625   mv2_scatter_tuning_table mv2_tmp_scatter_thresholds_table_2ppn[] = {
1626       {
1627           4,
1628           2,
1629           {
1630               {0, 4096, &MPIR_Scatter_MV2_Binomial}, {4096, -1, &MPIR_Scatter_MV2_Direct},
1631           },
1632           1,
1633           {
1634               {0, -1, &MPIR_Scatter_MV2_Direct},
1635           },
1636       },
1637
1638       {
1639           8,
1640           2,
1641           {
1642               {0, 512, &MPIR_Scatter_MV2_two_level_Direct}, {512, -1, &MPIR_Scatter_MV2_Direct},
1643           },
1644           1,
1645           {
1646               {0, -1, &MPIR_Scatter_MV2_Binomial},
1647           },
1648       },
1649
1650       {
1651           16,
1652           2,
1653           {
1654               {0, 2048, &MPIR_Scatter_MV2_two_level_Direct}, {2048, -1, &MPIR_Scatter_MV2_Direct},
1655           },
1656           1,
1657           {
1658               {0, -1, &MPIR_Scatter_MV2_Binomial},
1659           },
1660       },
1661
1662       {
1663           32,
1664           2,
1665           {
1666               {0, 2048, &MPIR_Scatter_MV2_two_level_Direct}, {2048, -1, &MPIR_Scatter_MV2_Direct},
1667           },
1668           1,
1669           {
1670               {0, -1, &MPIR_Scatter_MV2_Binomial},
1671           },
1672       },
1673
1674       {
1675           64,
1676           2,
1677           {
1678               {0, 8192, &MPIR_Scatter_MV2_two_level_Direct}, {8192, -1, &MPIR_Scatter_MV2_Direct},
1679           },
1680           1,
1681           {
1682               {0, -1, &MPIR_Scatter_MV2_Binomial},
1683           },
1684       },
1685
1686       {
1687           128,
1688           4,
1689           {
1690               {0, 16, &MPIR_Scatter_MV2_Binomial},
1691               {16, 128, &MPIR_Scatter_MV2_two_level_Binomial},
1692               {128, 16384, &MPIR_Scatter_MV2_two_level_Direct},
1693               {16384, -1, &MPIR_Scatter_MV2_Direct},
1694           },
1695           1,
1696           {
1697               {0, 128, &MPIR_Scatter_MV2_Direct}, {128, -1, &MPIR_Scatter_MV2_Binomial},
1698           },
1699       },
1700   };
1701   table_ptrs[1]                                                     = mv2_tmp_scatter_thresholds_table_2ppn;
1702   mv2_scatter_table_ppn_conf[2]                                     = 16;
1703   mv2_size_scatter_tuning_table[2]                                  = 8;
1704   mv2_scatter_tuning_table mv2_tmp_scatter_thresholds_table_16ppn[] = {
1705       {
1706           16,
1707           2,
1708           {
1709               {0, 256, &MPIR_Scatter_MV2_Binomial}, {256, -1, &MPIR_Scatter_MV2_Direct},
1710           },
1711           1,
1712           {
1713               {0, -1, &MPIR_Scatter_MV2_Direct},
1714           },
1715       },
1716
1717       {
1718           32,
1719           2,
1720           {
1721               {0, 512, &MPIR_Scatter_MV2_Binomial}, {512, -1, &MPIR_Scatter_MV2_Direct},
1722           },
1723           1,
1724           {
1725               {0, -1, &MPIR_Scatter_MV2_Direct},
1726           },
1727       },
1728
1729       {
1730           64,
1731           2,
1732           {
1733               {0, 1024, &MPIR_Scatter_MV2_two_level_Direct}, {1024, -1, &MPIR_Scatter_MV2_Direct},
1734           },
1735           1,
1736           {
1737               {0, -1, &MPIR_Scatter_MV2_Direct},
1738           },
1739       },
1740
1741       {
1742           128,
1743           4,
1744           {
1745               {0, 16, &MPIR_Scatter_mcst_wrap_MV2},
1746               {0, 16, &MPIR_Scatter_MV2_two_level_Direct},
1747               {16, 2048, &MPIR_Scatter_MV2_two_level_Direct},
1748               {2048, -1, &MPIR_Scatter_MV2_Direct},
1749           },
1750           1,
1751           {
1752               {0, -1, &MPIR_Scatter_MV2_Direct},
1753           },
1754       },
1755
1756       {
1757           256,
1758           4,
1759           {
1760               {0, 16, &MPIR_Scatter_mcst_wrap_MV2},
1761               {0, 16, &MPIR_Scatter_MV2_two_level_Direct},
1762               {16, 2048, &MPIR_Scatter_MV2_two_level_Direct},
1763               {2048, -1, &MPIR_Scatter_MV2_Direct},
1764           },
1765           1,
1766           {
1767               {0, -1, &MPIR_Scatter_MV2_Direct},
1768           },
1769       },
1770
1771       {
1772           512,
1773           4,
1774           {
1775               {0, 16, &MPIR_Scatter_mcst_wrap_MV2},
1776               {16, 16, &MPIR_Scatter_MV2_two_level_Direct},
1777               {16, 4096, &MPIR_Scatter_MV2_two_level_Direct},
1778               {4096, -1, &MPIR_Scatter_MV2_Direct},
1779           },
1780           1,
1781           {
1782               {0, -1, &MPIR_Scatter_MV2_Binomial},
1783           },
1784       },
1785       {
1786           1024,
1787           5,
1788           {
1789               {0, 16, &MPIR_Scatter_mcst_wrap_MV2},
1790               {0, 16, &MPIR_Scatter_MV2_Binomial},
1791               {16, 32, &MPIR_Scatter_MV2_Binomial},
1792               {32, 4096, &MPIR_Scatter_MV2_two_level_Direct},
1793               {4096, -1, &MPIR_Scatter_MV2_Direct},
1794           },
1795           1,
1796           {
1797               {0, -1, &MPIR_Scatter_MV2_Binomial},
1798           },
1799       },
1800       {
1801           2048,
1802           7,
1803           {
1804               {0, 16, &MPIR_Scatter_mcst_wrap_MV2},
1805               {0, 16, &MPIR_Scatter_MV2_two_level_Binomial},
1806               {16, 128, &MPIR_Scatter_MV2_two_level_Binomial},
1807               {128, 1024, &MPIR_Scatter_MV2_two_level_Direct},
1808               {1024, 16384, &MPIR_Scatter_MV2_two_level_Direct},
1809               {16384, 65536, &MPIR_Scatter_MV2_Direct},
1810               {65536, -1, &MPIR_Scatter_MV2_two_level_Direct},
1811           },
1812           6,
1813           {
1814               {0, 16, &MPIR_Scatter_MV2_Binomial},
1815               {16, 128, &MPIR_Scatter_MV2_Binomial},
1816               {128, 1024, &MPIR_Scatter_MV2_Binomial},
1817               {1024, 16384, &MPIR_Scatter_MV2_Direct},
1818               {16384, 65536, &MPIR_Scatter_MV2_Direct},
1819               {65536, -1, &MPIR_Scatter_MV2_Direct},
1820           },
1821       },
1822   };
1823   table_ptrs[2] = mv2_tmp_scatter_thresholds_table_16ppn;
1824   agg_table_sum = 0;
1825   for (i = 0; i < mv2_scatter_num_ppn_conf; i++) {
1826     agg_table_sum += mv2_size_scatter_tuning_table[i];
1827   }
1828   mv2_scatter_thresholds_table[0] =
1829       static_cast<mv2_scatter_tuning_table*>(xbt_malloc(agg_table_sum * sizeof(mv2_scatter_tuning_table)));
1830   memcpy(mv2_scatter_thresholds_table[0], table_ptrs[0],
1831          (sizeof(mv2_scatter_tuning_table) * mv2_size_scatter_tuning_table[0]));
1832   for (i = 1; i < mv2_scatter_num_ppn_conf; i++) {
1833     mv2_scatter_thresholds_table[i] = mv2_scatter_thresholds_table[i - 1] + mv2_size_scatter_tuning_table[i - 1];
1834     memcpy(mv2_scatter_thresholds_table[i], table_ptrs[i],
1835            (sizeof(mv2_scatter_tuning_table) * mv2_size_scatter_tuning_table[i]));
1836   }
1837   xbt_free(table_ptrs);
1838 }