Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Merge branch 'toufic' of github.com:Takishipp/simgrid
[simgrid.git] / src / smpi / colls / smpi_mvapich2_selector_stampede.h
1 /* selector for collective algorithms based on mvapich decision logic, with calibration from Stampede cluster at TACC*/
2 /* This is the tuning used by MVAPICH for Stampede platform based on (MV2_ARCH_INTEL_XEON_E5_2680_16, MV2_HCA_MLX_CX_FDR) */
3
4 /* Copyright (c) 2009-2017. The SimGrid Team. All rights reserved.          */
5
6 /* This program is free software; you can redistribute it and/or modify it
7  * under the terms of the license (GNU LGPL) which comes with this package. */
8
9 /************ Alltoall variables and initializers                        */
10
11 #define MV2_MAX_NB_THRESHOLDS  32
12
13
14 XBT_PUBLIC(void) smpi_coll_cleanup_mvapich2(void);
15
16 typedef struct {
17   int min;
18   int max;
19   int (*MV2_pt_Alltoall_function) (void *sendbuf, int sendcount, MPI_Datatype sendtype,
20       void *recvbuf, int recvcount, MPI_Datatype recvtype,
21       MPI_Comm comm_ptr );
22 } mv2_alltoall_tuning_element;
23
24 typedef struct {
25   int numproc;
26   int size_table;
27   mv2_alltoall_tuning_element algo_table[MV2_MAX_NB_THRESHOLDS];
28   mv2_alltoall_tuning_element in_place_algo_table[MV2_MAX_NB_THRESHOLDS];
29 } mv2_alltoall_tuning_table;
30
31 int (*MV2_Alltoall_function) (void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm_ptr)=NULL;
32
33 /* Indicates number of processes per node */
34 int *mv2_alltoall_table_ppn_conf = NULL;
35 /* Indicates total number of configurations */
36 int mv2_alltoall_num_ppn_conf = 1;
37 int *mv2_size_alltoall_tuning_table = NULL;
38 mv2_alltoall_tuning_table **mv2_alltoall_thresholds_table = NULL;
39
40
41 #define MPIR_Alltoall_bruck_MV2 simgrid::smpi::Coll_alltoall_bruck::alltoall
42 #define MPIR_Alltoall_RD_MV2 simgrid::smpi::Coll_alltoall_rdb::alltoall
43 #define MPIR_Alltoall_Scatter_dest_MV2 simgrid::smpi::Coll_alltoall_mvapich2_scatter_dest::alltoall
44 #define MPIR_Alltoall_pairwise_MV2 simgrid::smpi::Coll_alltoall_pair::alltoall
45 #define MPIR_Alltoall_inplace_MV2 simgrid::smpi::Coll_alltoall_ring::alltoall 
46
47
48 static void init_mv2_alltoall_tables_stampede(){
49   int i;
50   int agg_table_sum = 0;
51   mv2_alltoall_tuning_table **table_ptrs = NULL;
52   mv2_alltoall_num_ppn_conf = 3;
53   if(simgrid::smpi::Colls::smpi_coll_cleanup_callback==NULL)
54     simgrid::smpi::Colls::smpi_coll_cleanup_callback=&smpi_coll_cleanup_mvapich2;
55   mv2_alltoall_thresholds_table = static_cast<mv2_alltoall_tuning_table**>(xbt_malloc(sizeof(mv2_alltoall_tuning_table *)
56       * mv2_alltoall_num_ppn_conf));
57   table_ptrs = static_cast<mv2_alltoall_tuning_table**>(xbt_malloc(sizeof(mv2_alltoall_tuning_table *)
58       * mv2_alltoall_num_ppn_conf));
59   mv2_size_alltoall_tuning_table = static_cast<int*>(xbt_malloc(sizeof(int) *
60       mv2_alltoall_num_ppn_conf));
61   mv2_alltoall_table_ppn_conf = static_cast<int*>(xbt_malloc(mv2_alltoall_num_ppn_conf * sizeof(int)));
62   mv2_alltoall_table_ppn_conf[0] = 1;
63   mv2_size_alltoall_tuning_table[0] = 6;
64   mv2_alltoall_tuning_table mv2_tmp_alltoall_thresholds_table_1ppn[] = {
65       {2,
66           1,
67           {{0, -1, &MPIR_Alltoall_pairwise_MV2},
68           },
69
70           {{0, -1, &MPIR_Alltoall_inplace_MV2},
71           },
72       },
73
74       {4,
75           2,
76           {{0, 262144, &MPIR_Alltoall_Scatter_dest_MV2},
77               {262144, -1, &MPIR_Alltoall_pairwise_MV2},
78           },
79
80           {{0, -1, &MPIR_Alltoall_inplace_MV2},
81           },
82       },
83
84       {8,
85           2,
86           {{0, 8, &MPIR_Alltoall_RD_MV2},
87               {8, -1, &MPIR_Alltoall_Scatter_dest_MV2},
88           },
89
90           {{0, -1, &MPIR_Alltoall_inplace_MV2},
91           },
92       },
93
94       {16,
95           3,
96           {{0, 64, &MPIR_Alltoall_RD_MV2},
97               {64, 512, &MPIR_Alltoall_bruck_MV2},
98               {512, -1, &MPIR_Alltoall_Scatter_dest_MV2},
99           },
100
101           {{0,-1, &MPIR_Alltoall_inplace_MV2},
102           },
103       },
104
105       {32,
106           3,
107           {{0, 32, &MPIR_Alltoall_RD_MV2},
108               {32, 2048, &MPIR_Alltoall_bruck_MV2},
109               {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
110           },
111
112           {{0, -1, &MPIR_Alltoall_inplace_MV2},
113           },
114       },
115
116       {64,
117           3,
118           {{0, 8, &MPIR_Alltoall_RD_MV2},
119               {8, 1024, &MPIR_Alltoall_bruck_MV2},
120               {1024, -1, &MPIR_Alltoall_Scatter_dest_MV2},
121           },
122
123           {{0, -1, &MPIR_Alltoall_inplace_MV2},
124           },
125       },
126   };
127   table_ptrs[0] = mv2_tmp_alltoall_thresholds_table_1ppn;
128   mv2_alltoall_table_ppn_conf[1] = 2;
129   mv2_size_alltoall_tuning_table[1] = 6;
130   mv2_alltoall_tuning_table mv2_tmp_alltoall_thresholds_table_2ppn[] = {
131       {4,
132           2,
133           {{0, 32, &MPIR_Alltoall_RD_MV2},
134               {32, -1, &MPIR_Alltoall_Scatter_dest_MV2},
135           },
136
137           {{0, -1, &MPIR_Alltoall_inplace_MV2},
138           },
139       },
140
141       {8,
142           2,
143           {{0, 64, &MPIR_Alltoall_RD_MV2},
144               {64, -1, &MPIR_Alltoall_Scatter_dest_MV2},
145           },
146
147           {{0, -1, &MPIR_Alltoall_inplace_MV2},
148           },
149       },
150
151       {16,
152           3,
153           {{0, 64, &MPIR_Alltoall_RD_MV2},
154               {64, 2048, &MPIR_Alltoall_bruck_MV2},
155               {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
156           },
157
158           {{0,-1, &MPIR_Alltoall_inplace_MV2},
159           },
160       },
161
162       {32,
163           3,
164           {{0, 16, &MPIR_Alltoall_RD_MV2},
165               {16, 2048, &MPIR_Alltoall_bruck_MV2},
166               {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
167           },
168
169           {{0, -1, &MPIR_Alltoall_inplace_MV2},
170           },
171       },
172
173       {64,
174           3,
175           {{0, 8, &MPIR_Alltoall_RD_MV2},
176               {8, 1024, &MPIR_Alltoall_bruck_MV2},
177               {1024, -1, &MPIR_Alltoall_Scatter_dest_MV2},
178           },
179
180           {{0, -1, &MPIR_Alltoall_inplace_MV2},
181           },
182       },
183
184       {128,
185           3,
186           {{0, 4, &MPIR_Alltoall_RD_MV2},
187               {4, 2048, &MPIR_Alltoall_bruck_MV2},
188               {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
189           },
190
191           {{0, -1, &MPIR_Alltoall_inplace_MV2},
192           },
193       },
194   };
195   table_ptrs[1] = mv2_tmp_alltoall_thresholds_table_2ppn;
196   mv2_alltoall_table_ppn_conf[2] = 16;
197   mv2_size_alltoall_tuning_table[2] = 7;
198   mv2_alltoall_tuning_table mv2_tmp_alltoall_thresholds_table_16ppn[] = {
199       {16,
200           2,
201           {{0, 2048, &MPIR_Alltoall_bruck_MV2},
202               {2048, -1,  &MPIR_Alltoall_Scatter_dest_MV2},
203           },
204
205           {{32768, -1, &MPIR_Alltoall_inplace_MV2},
206           },
207       },
208
209       {32,
210           2,
211           {{0, 2048, &MPIR_Alltoall_bruck_MV2},
212               {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
213           },
214
215           {{16384, -1, &MPIR_Alltoall_inplace_MV2},
216           },
217       },
218
219       {64,
220           3,
221           {{0, 2048, &MPIR_Alltoall_bruck_MV2},
222               {2048, 16384, &MPIR_Alltoall_Scatter_dest_MV2},
223               {16384, -1, &MPIR_Alltoall_pairwise_MV2},
224           },
225
226           {{32768, 131072, &MPIR_Alltoall_inplace_MV2},
227           },
228       },
229
230       {128,
231           2,
232           {{0, 2048, &MPIR_Alltoall_bruck_MV2},
233               {2048, -1, &MPIR_Alltoall_pairwise_MV2},
234           },
235
236           {{16384,65536, &MPIR_Alltoall_inplace_MV2},
237           },
238       },
239
240       {256,
241           2,
242           {{0, 1024, &MPIR_Alltoall_bruck_MV2},
243               {1024, -1, &MPIR_Alltoall_pairwise_MV2},
244           },
245
246           {{16384, 65536, &MPIR_Alltoall_inplace_MV2},
247           },
248       },
249
250       {512,
251           2,
252           {{0, 1024, &MPIR_Alltoall_bruck_MV2},
253               {1024, -1, &MPIR_Alltoall_pairwise_MV2},
254           },
255
256           {{16384, 65536, &MPIR_Alltoall_inplace_MV2},
257           },
258       },
259       {1024,
260           2,
261           {{0, 1024, &MPIR_Alltoall_bruck_MV2},
262               {1024, -1, &MPIR_Alltoall_pairwise_MV2},
263           },
264
265           {{16384, 65536, &MPIR_Alltoall_inplace_MV2},
266           },
267       },
268
269   };
270   table_ptrs[2] = mv2_tmp_alltoall_thresholds_table_16ppn;
271   agg_table_sum = 0;
272   for (i = 0; i < mv2_alltoall_num_ppn_conf; i++) {
273       agg_table_sum += mv2_size_alltoall_tuning_table[i];
274   }
275   mv2_alltoall_thresholds_table[0] =
276       static_cast<mv2_alltoall_tuning_table*>(xbt_malloc(agg_table_sum * sizeof (mv2_alltoall_tuning_table)));
277   memcpy(mv2_alltoall_thresholds_table[0], table_ptrs[0],
278       (sizeof(mv2_alltoall_tuning_table)
279           * mv2_size_alltoall_tuning_table[0]));
280   for (i = 1; i < mv2_alltoall_num_ppn_conf; i++) {
281       mv2_alltoall_thresholds_table[i] =
282           mv2_alltoall_thresholds_table[i - 1]
283                                         + mv2_size_alltoall_tuning_table[i - 1];
284       memcpy(mv2_alltoall_thresholds_table[i], table_ptrs[i],
285           (sizeof(mv2_alltoall_tuning_table)
286               * mv2_size_alltoall_tuning_table[i]));
287   }
288   xbt_free(table_ptrs);
289
290
291 }
292
293
294 /************ Allgather variables and initializers                        */
295
296 typedef struct {
297   int min;
298   int max;
299   int (*MV2_pt_Allgatherction)(void *sendbuf,
300       int sendcount,
301       MPI_Datatype sendtype,
302       void *recvbuf,
303       int recvcount,
304       MPI_Datatype recvtype, MPI_Comm comm_ptr);
305 } mv2_allgather_tuning_element;
306
307 typedef struct {
308   int numproc;
309   int two_level[MV2_MAX_NB_THRESHOLDS];
310   int size_inter_table;
311   mv2_allgather_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
312 } mv2_allgather_tuning_table;
313
314 int (*MV2_Allgatherction)(void *sendbuf,
315     int sendcount,
316     MPI_Datatype sendtype,
317     void *recvbuf,
318     int recvcount,
319     MPI_Datatype recvtype, MPI_Comm comm);
320
321 int *mv2_allgather_table_ppn_conf = NULL;
322 int mv2_allgather_num_ppn_conf = 1;
323 int *mv2_size_allgather_tuning_table = NULL;
324 mv2_allgather_tuning_table **mv2_allgather_thresholds_table = NULL;
325
326 static int MPIR_Allgather_RD_Allgather_Comm_MV2( void *sendbuf,
327                                  int sendcount,
328                                  MPI_Datatype sendtype,
329                                  void *recvbuf,
330                                  int recvcount,
331                                  MPI_Datatype recvtype, MPI_Comm comm_ptr)
332 {
333     return 0;
334 }
335
336 #define MPIR_Allgather_Bruck_MV2 simgrid::smpi::Coll_allgather_bruck::allgather
337 #define MPIR_Allgather_RD_MV2 simgrid::smpi::Coll_allgather_rdb::allgather
338 #define MPIR_Allgather_Ring_MV2 simgrid::smpi::Coll_allgather_ring::allgather
339 #define MPIR_2lvl_Allgather_MV2 simgrid::smpi::Coll_allgather_mvapich2_smp::allgather
340
341 static void init_mv2_allgather_tables_stampede(){
342   int i;
343   int agg_table_sum = 0;
344
345   if(simgrid::smpi::Colls::smpi_coll_cleanup_callback==NULL)
346     simgrid::smpi::Colls::smpi_coll_cleanup_callback=&smpi_coll_cleanup_mvapich2;
347   mv2_allgather_tuning_table **table_ptrs = NULL;
348   mv2_allgather_num_ppn_conf = 3;
349   mv2_allgather_thresholds_table
350   = static_cast<mv2_allgather_tuning_table**>(xbt_malloc(sizeof(mv2_allgather_tuning_table *)
351       * mv2_allgather_num_ppn_conf));
352   table_ptrs = static_cast<mv2_allgather_tuning_table**>(xbt_malloc(sizeof(mv2_allgather_tuning_table *)
353       * mv2_allgather_num_ppn_conf));
354   mv2_size_allgather_tuning_table = static_cast<int*>(xbt_malloc(sizeof(int) *
355       mv2_allgather_num_ppn_conf));
356   mv2_allgather_table_ppn_conf
357   = static_cast<int*>(xbt_malloc(mv2_allgather_num_ppn_conf * sizeof(int)));
358   mv2_allgather_table_ppn_conf[0] = 1;
359   mv2_size_allgather_tuning_table[0] = 6;
360   mv2_allgather_tuning_table mv2_tmp_allgather_thresholds_table_1ppn[] = {
361       {
362           2,
363           {0},
364           1,
365           {
366               {0, -1, &MPIR_Allgather_Ring_MV2},
367           },
368       },
369       {
370           4,
371           {0,0},
372           2,
373           {
374               {0, 262144, &MPIR_Allgather_RD_MV2},
375               {262144, -1, &MPIR_Allgather_Ring_MV2},
376           },
377       },
378       {
379           8,
380           {0,0},
381           2,
382           {
383               {0, 131072, &MPIR_Allgather_RD_MV2},
384               {131072, -1, &MPIR_Allgather_Ring_MV2},
385           },
386       },
387       {
388           16,
389           {0,0},
390           2,
391           {
392               {0, 131072, &MPIR_Allgather_RD_MV2},
393               {131072, -1, &MPIR_Allgather_Ring_MV2},
394           },
395       },
396       {
397           32,
398           {0,0},
399           2,
400           {
401               {0, 65536, &MPIR_Allgather_RD_MV2},
402               {65536, -1, &MPIR_Allgather_Ring_MV2},
403           },
404       },
405       {
406           64,
407           {0,0},
408           2,
409           {
410               {0, 32768, &MPIR_Allgather_RD_MV2},
411               {32768, -1, &MPIR_Allgather_Ring_MV2},
412           },
413       },
414   };
415   table_ptrs[0] = mv2_tmp_allgather_thresholds_table_1ppn;
416   mv2_allgather_table_ppn_conf[1] = 2;
417   mv2_size_allgather_tuning_table[1] = 6;
418   mv2_allgather_tuning_table mv2_tmp_allgather_thresholds_table_2ppn[] = {
419       {
420           4,
421           {0,0},
422           2,
423           {
424               {0, 524288, &MPIR_Allgather_RD_MV2},
425               {524288, -1, &MPIR_Allgather_Ring_MV2},
426           },
427       },
428       {
429           8,
430           {0,1,0},
431           2,
432           {
433               {0, 32768, &MPIR_Allgather_RD_MV2},
434               {32768, 524288, &MPIR_Allgather_Ring_MV2},
435               {524288, -1, &MPIR_Allgather_Ring_MV2},
436           },
437       },
438       {
439           16,
440           {0,1,0},
441           2,
442           {
443               {0, 16384, &MPIR_Allgather_RD_MV2},
444               {16384, 524288, &MPIR_Allgather_Ring_MV2},
445               {524288, -1, &MPIR_Allgather_Ring_MV2},
446           },
447       },
448       {
449           32,
450           {1,1,0},
451           2,
452           {
453               {0, 65536, &MPIR_Allgather_RD_MV2},
454               {65536, 524288, &MPIR_Allgather_Ring_MV2},
455               {524288, -1, &MPIR_Allgather_Ring_MV2},
456           },
457       },
458       {
459           64,
460           {1,1,0},
461           2,
462           {
463               {0, 32768, &MPIR_Allgather_RD_MV2},
464               {32768, 524288, &MPIR_Allgather_Ring_MV2},
465               {524288, -1, &MPIR_Allgather_Ring_MV2},
466           },
467       },
468       {
469           128,
470           {1,1,0},
471           2,
472           {
473               {0, 65536, &MPIR_Allgather_RD_MV2},
474               {65536, 524288, &MPIR_Allgather_Ring_MV2},
475               {524288, -1, &MPIR_Allgather_Ring_MV2},
476           },
477       },
478   };
479   table_ptrs[1] = mv2_tmp_allgather_thresholds_table_2ppn;
480   mv2_allgather_table_ppn_conf[2] = 16;
481   mv2_size_allgather_tuning_table[2] = 6;
482   mv2_allgather_tuning_table mv2_tmp_allgather_thresholds_table_16ppn[] = {
483       {
484           16,
485           {0,0},
486           2,
487           {
488               {0, 1024, &MPIR_Allgather_RD_MV2},
489               {1024, -1, &MPIR_Allgather_Ring_MV2},
490           },
491       },
492       {
493           32,
494           {0,0},
495           2,
496           {
497               {0, 1024, &MPIR_Allgather_RD_Allgather_Comm_MV2},
498               {1024, -1, &MPIR_Allgather_Ring_MV2},
499           },
500       },
501       {
502           64,
503           {0,0},
504           2,
505           {
506               {0, 1024, &MPIR_Allgather_RD_Allgather_Comm_MV2},
507               {1024, -1, &MPIR_Allgather_Ring_MV2},
508           },
509       },
510       {
511           128,
512           {0,0},
513           2,
514           {
515               {0, 1024, &MPIR_Allgather_RD_Allgather_Comm_MV2},
516               {1024, -1, &MPIR_Allgather_Ring_MV2},
517           },
518       },
519       {
520           256,
521           {0,0},
522           2,
523           {
524               {0, 1024, &MPIR_Allgather_RD_Allgather_Comm_MV2},
525               {1024, -1, &MPIR_Allgather_Ring_MV2},
526           },
527       },
528       {
529           512,
530           {0,0},
531           2,
532           {
533               {0, 1024, &MPIR_Allgather_RD_Allgather_Comm_MV2},
534               {1024, -1, &MPIR_Allgather_Ring_MV2},
535           },
536       },
537
538   };
539   table_ptrs[2] = mv2_tmp_allgather_thresholds_table_16ppn;
540   agg_table_sum = 0;
541   for (i = 0; i < mv2_allgather_num_ppn_conf; i++) {
542       agg_table_sum += mv2_size_allgather_tuning_table[i];
543   }
544   mv2_allgather_thresholds_table[0] =
545       static_cast<mv2_allgather_tuning_table*>(xbt_malloc(agg_table_sum * sizeof (mv2_allgather_tuning_table)));
546   memcpy(mv2_allgather_thresholds_table[0], table_ptrs[0],
547       (sizeof(mv2_allgather_tuning_table)
548           * mv2_size_allgather_tuning_table[0]));
549   for (i = 1; i < mv2_allgather_num_ppn_conf; i++) {
550       mv2_allgather_thresholds_table[i] =
551           mv2_allgather_thresholds_table[i - 1]
552                                          + mv2_size_allgather_tuning_table[i - 1];
553       memcpy(mv2_allgather_thresholds_table[i], table_ptrs[i],
554           (sizeof(mv2_allgather_tuning_table)
555               * mv2_size_allgather_tuning_table[i]));
556   }
557   xbt_free(table_ptrs);
558 }
559
560
561 /************ Gather variables and initializers                        */
562
563 typedef struct {
564   int min;
565   int max;
566   int (*MV2_pt_Gather_function)(void *sendbuf, int sendcnt,
567       MPI_Datatype sendtype, void *recvbuf, int recvcnt,
568       MPI_Datatype recvtype, int root, MPI_Comm  comm_ptr);
569 } mv2_gather_tuning_element;
570
571
572 typedef struct {
573   int numproc;
574   int size_inter_table;
575   mv2_gather_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
576   int size_intra_table;
577   mv2_gather_tuning_element intra_node[MV2_MAX_NB_THRESHOLDS];
578 } mv2_gather_tuning_table;
579
580 int mv2_size_gather_tuning_table=7;
581 mv2_gather_tuning_table * mv2_gather_thresholds_table=NULL; 
582
583 typedef int (*MV2_Gather_function_ptr) (void *sendbuf,
584     int sendcnt,
585     MPI_Datatype sendtype,
586     void *recvbuf,
587     int recvcnt,
588     MPI_Datatype recvtype,
589     int root, MPI_Comm comm);
590
591 MV2_Gather_function_ptr MV2_Gather_inter_leader_function = NULL;
592 MV2_Gather_function_ptr MV2_Gather_intra_node_function = NULL;
593
594
595
596 #define MPIR_Gather_MV2_Direct simgrid::smpi::Coll_gather_ompi_basic_linear::gather
597 #define MPIR_Gather_MV2_two_level_Direct simgrid::smpi::Coll_gather_mvapich2_two_level::gather
598 #define MPIR_Gather_intra simgrid::smpi::Coll_gather_mpich::gather
599
600
601 static void init_mv2_gather_tables_stampede(){
602
603   if(simgrid::smpi::Colls::smpi_coll_cleanup_callback==NULL)
604     simgrid::smpi::Colls::smpi_coll_cleanup_callback=&smpi_coll_cleanup_mvapich2;
605   mv2_size_gather_tuning_table=7;
606   mv2_gather_thresholds_table = static_cast<mv2_gather_tuning_table*>(xbt_malloc(mv2_size_gather_tuning_table*
607       sizeof (mv2_gather_tuning_table)));
608   mv2_gather_tuning_table mv2_tmp_gather_thresholds_table[]={
609       {16,
610           2,{{0, 524288, &MPIR_Gather_MV2_Direct},
611               {524288, -1, &MPIR_Gather_intra}},
612               1,{{0, -1, &MPIR_Gather_MV2_Direct}}},
613               {32,
614                   3,{{0, 16384, &MPIR_Gather_MV2_Direct},
615                       {16384, 131072, &MPIR_Gather_intra},
616                       {131072, -1, &MPIR_Gather_MV2_two_level_Direct}},
617                       1,{{0, -1, &MPIR_Gather_intra}}},
618                       {64,
619                           3,{{0, 256, &MPIR_Gather_MV2_two_level_Direct},
620                               {256, 16384, &MPIR_Gather_MV2_Direct},
621                               {256, -1, &MPIR_Gather_MV2_two_level_Direct}},
622                               1,{{0, -1, &MPIR_Gather_intra}}},
623                               {128,
624                                   3,{{0, 512, &MPIR_Gather_MV2_two_level_Direct},
625                                       {512, 16384, &MPIR_Gather_MV2_Direct},
626                                       {16384, -1, &MPIR_Gather_MV2_two_level_Direct}},
627                                       1,{{0, -1, &MPIR_Gather_intra}}},
628                                       {256,
629                                           3,{{0, 512, &MPIR_Gather_MV2_two_level_Direct},
630                                               {512, 16384, &MPIR_Gather_MV2_Direct},
631                                               {16384, -1, &MPIR_Gather_MV2_two_level_Direct}},
632                                               1,{{0, -1, &MPIR_Gather_intra}}},
633                                               {512,
634                                                   3,{{0, 512, &MPIR_Gather_MV2_two_level_Direct},
635                                                       {512, 16384, &MPIR_Gather_MV2_Direct},
636                                                       {8196, -1, &MPIR_Gather_MV2_two_level_Direct}},
637                                                       1,{{0, -1, &MPIR_Gather_intra}}},
638                                                       {1024,
639                                                           3,{{0, 512, &MPIR_Gather_MV2_two_level_Direct},
640                                                               {512, 16384, &MPIR_Gather_MV2_Direct},
641                                                               {8196, -1, &MPIR_Gather_MV2_two_level_Direct}},
642                                                               1,{{0, -1, &MPIR_Gather_intra}}},
643   };
644
645   memcpy(mv2_gather_thresholds_table, mv2_tmp_gather_thresholds_table,
646       mv2_size_gather_tuning_table * sizeof (mv2_gather_tuning_table));
647
648 }
649
650
651 /************ Allgatherv variables and initializers                        */
652
653 typedef struct {
654   int min;
655   int max;
656   int (*MV2_pt_Allgatherv_function)(void *sendbuf,
657       int sendcount,
658       MPI_Datatype sendtype,
659       void *recvbuf,
660       int *recvcounts,
661       int *displs,
662       MPI_Datatype recvtype,
663       MPI_Comm commg);
664 } mv2_allgatherv_tuning_element;
665
666 typedef struct {
667   int numproc;
668   int size_inter_table;
669   mv2_allgatherv_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
670 } mv2_allgatherv_tuning_table;
671
672 int (*MV2_Allgatherv_function)(void *sendbuf,
673     int sendcount,
674     MPI_Datatype sendtype,
675     void *recvbuf,
676     int *recvcounts,
677     int *displs,
678     MPI_Datatype recvtype,
679     MPI_Comm comm);
680
681 int mv2_size_allgatherv_tuning_table = 0;
682 mv2_allgatherv_tuning_table *mv2_allgatherv_thresholds_table = NULL;
683
684 #define MPIR_Allgatherv_Rec_Doubling_MV2 simgrid::smpi::Coll_allgatherv_mpich_rdb::allgatherv
685 #define MPIR_Allgatherv_Bruck_MV2 simgrid::smpi::Coll_allgatherv_ompi_bruck::allgatherv
686 #define MPIR_Allgatherv_Ring_MV2 simgrid::smpi::Coll_allgatherv_mpich_ring::allgatherv
687
688
689 static void init_mv2_allgatherv_tables_stampede(){
690   if(simgrid::smpi::Colls::smpi_coll_cleanup_callback==NULL)
691     simgrid::smpi::Colls::smpi_coll_cleanup_callback=&smpi_coll_cleanup_mvapich2;
692   mv2_size_allgatherv_tuning_table = 6;
693   mv2_allgatherv_thresholds_table = static_cast<mv2_allgatherv_tuning_table*>(xbt_malloc(mv2_size_allgatherv_tuning_table *
694       sizeof (mv2_allgatherv_tuning_table)));
695   mv2_allgatherv_tuning_table mv2_tmp_allgatherv_thresholds_table[] = {
696       {
697           16,
698           2,
699           {
700               {0, 512, &MPIR_Allgatherv_Rec_Doubling_MV2},
701               {512, -1, &MPIR_Allgatherv_Ring_MV2},
702           },
703       },
704       {
705           32,
706           2,
707           {
708               {0, 512, &MPIR_Allgatherv_Rec_Doubling_MV2},
709               {512, -1, &MPIR_Allgatherv_Ring_MV2},
710           },
711       },
712       {
713           64,
714           2,
715           {
716               {0, 256, &MPIR_Allgatherv_Rec_Doubling_MV2},
717               {256, -1, &MPIR_Allgatherv_Ring_MV2},
718           },
719       },
720       {
721           128,
722           2,
723           {
724               {0, 256, &MPIR_Allgatherv_Rec_Doubling_MV2},
725               {256, -1, &MPIR_Allgatherv_Ring_MV2},
726           },
727       },
728       {
729           256,
730           2,
731           {
732               {0, 256, &MPIR_Allgatherv_Rec_Doubling_MV2},
733               {256, -1, &MPIR_Allgatherv_Ring_MV2},
734           },
735       },
736       {
737           512,
738           2,
739           {
740               {0, 256, &MPIR_Allgatherv_Rec_Doubling_MV2},
741               {256, -1, &MPIR_Allgatherv_Ring_MV2},
742           },
743       },
744
745   };
746   memcpy(mv2_allgatherv_thresholds_table, mv2_tmp_allgatherv_thresholds_table,
747       mv2_size_allgatherv_tuning_table * sizeof (mv2_allgatherv_tuning_table));
748 }
749
750
751 /************ Allreduce variables and initializers                        */
752
753 typedef struct {
754   int min;
755   int max;
756   int (*MV2_pt_Allreducection)(void *sendbuf,
757       void *recvbuf,
758       int count,
759       MPI_Datatype datatype,
760       MPI_Op op, MPI_Comm comm);
761 } mv2_allreduce_tuning_element;
762
763 typedef struct {
764   int numproc;
765   int mcast_enabled;
766   int is_two_level_allreduce[MV2_MAX_NB_THRESHOLDS];
767   int size_inter_table;
768   mv2_allreduce_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
769   int size_intra_table;
770   mv2_allreduce_tuning_element intra_node[MV2_MAX_NB_THRESHOLDS];
771 } mv2_allreduce_tuning_table;
772
773
774 int (*MV2_Allreducection)(void *sendbuf,
775     void *recvbuf,
776     int count,
777     MPI_Datatype datatype,
778     MPI_Op op, MPI_Comm comm)=NULL;
779
780
781 int (*MV2_Allreduce_intra_function)( void *sendbuf,
782     void *recvbuf,
783     int count,
784     MPI_Datatype datatype,
785     MPI_Op op, MPI_Comm comm)=NULL;
786
787 int mv2_size_allreduce_tuning_table = 0;
788 mv2_allreduce_tuning_table *mv2_allreduce_thresholds_table = NULL;
789
790
791
792
793
794 static int MPIR_Allreduce_mcst_reduce_two_level_helper_MV2( void *sendbuf,
795     void *recvbuf,
796     int count,
797     MPI_Datatype datatype,
798     MPI_Op op, MPI_Comm comm)
799
800   return 0;
801 }
802
803 static  int MPIR_Allreduce_mcst_reduce_redscat_gather_MV2( void *sendbuf,
804     void *recvbuf,
805     int count,
806     MPI_Datatype datatype,
807     MPI_Op op, MPI_Comm  comm)
808 {
809   return 0;
810 }
811
812 static  int MPIR_Allreduce_reduce_p2p_MV2( void *sendbuf,
813     void *recvbuf,
814     int count,
815     MPI_Datatype datatype,
816     MPI_Op op, MPI_Comm  comm)
817 {
818   simgrid::smpi::Colls::reduce(sendbuf,recvbuf,count,datatype,op,0,comm);
819   return MPI_SUCCESS;
820 }
821
822 static  int MPIR_Allreduce_reduce_shmem_MV2( void *sendbuf,
823     void *recvbuf,
824     int count,
825     MPI_Datatype datatype,
826     MPI_Op op, MPI_Comm  comm)
827 {
828   simgrid::smpi::Colls::reduce(sendbuf,recvbuf,count,datatype,op,0,comm);
829   return MPI_SUCCESS;
830 }
831
832 #define MPIR_Allreduce_pt2pt_rd_MV2 simgrid::smpi::Coll_allreduce_rdb::allreduce
833 #define MPIR_Allreduce_pt2pt_rs_MV2 simgrid::smpi::Coll_allreduce_mvapich2_rs::allreduce
834 #define MPIR_Allreduce_two_level_MV2 simgrid::smpi::Coll_allreduce_mvapich2_two_level::allreduce
835
836
837 static void init_mv2_allreduce_tables_stampede(){
838   if(simgrid::smpi::Colls::smpi_coll_cleanup_callback==NULL)
839     simgrid::smpi::Colls::smpi_coll_cleanup_callback=&smpi_coll_cleanup_mvapich2;
840   mv2_size_allreduce_tuning_table = 8;
841   mv2_allreduce_thresholds_table = static_cast<mv2_allreduce_tuning_table*>(xbt_malloc(mv2_size_allreduce_tuning_table *
842       sizeof (mv2_allreduce_tuning_table)));
843   mv2_allreduce_tuning_table mv2_tmp_allreduce_thresholds_table[] = {
844       {
845           16,
846           0,
847           {1, 0},
848           2,
849           {
850               {0, 1024, &MPIR_Allreduce_pt2pt_rd_MV2},
851               {1024, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
852           },
853           2,
854           {
855               {0, 1024, &MPIR_Allreduce_reduce_shmem_MV2},
856               {1024, -1, &MPIR_Allreduce_reduce_p2p_MV2},
857           },
858       },
859       {
860           32,
861           0,
862           {1, 1, 0},
863           3,
864           {
865               {0, 1024, &MPIR_Allreduce_pt2pt_rd_MV2},
866               {1024, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
867               {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
868           },
869           2,
870           {
871               {0, 1024, &MPIR_Allreduce_reduce_shmem_MV2},
872               {1024, 16384, &MPIR_Allreduce_reduce_p2p_MV2},
873           },
874       },
875       {
876           64,
877           0,
878           {1, 1, 0},
879           3,
880           {
881               {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
882               {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
883               {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
884           },
885           2,
886           {
887               {0, 512, &MPIR_Allreduce_reduce_shmem_MV2},
888               {512, 16384, &MPIR_Allreduce_reduce_p2p_MV2},
889           },
890       },
891       {
892           128,
893           0,
894           {1, 1, 0},
895           3,
896           {
897               {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
898               {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
899               {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
900           },
901           2,
902           {
903               {0, 512, &MPIR_Allreduce_reduce_shmem_MV2},
904               {512, 16384, &MPIR_Allreduce_reduce_p2p_MV2},
905           },
906       },
907       {
908           256,
909           0,
910           {1, 1, 0},
911           3,
912           {
913               {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
914               {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
915               {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
916           },
917           2,
918           {
919               {0, 512, &MPIR_Allreduce_reduce_shmem_MV2},
920               {512, -1, &MPIR_Allreduce_reduce_p2p_MV2},
921           },
922       },
923       {
924           512,
925           0,
926           {1, 1, 0},
927           3,
928           {
929               {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
930               {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
931               {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
932           },
933           2,
934           {
935               {0, 512, &MPIR_Allreduce_reduce_shmem_MV2},
936               {512, 16384, &MPIR_Allreduce_reduce_p2p_MV2},
937           },
938       },
939       {
940           1024,
941           0,
942           {1, 1, 1, 0},
943           4,
944           {
945               {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
946               {512, 8192, &MPIR_Allreduce_pt2pt_rd_MV2},
947               {8192, 65536, &MPIR_Allreduce_pt2pt_rs_MV2},
948               {65536, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
949           },
950           2,
951           {
952               {0, 512, &MPIR_Allreduce_reduce_shmem_MV2},
953               {512, -1, &MPIR_Allreduce_reduce_p2p_MV2},
954           },
955       },
956       {
957           2048,
958           0,
959           {1, 1, 1, 0},
960           4,
961           {
962               {0, 64, &MPIR_Allreduce_pt2pt_rd_MV2},
963               {64, 512, &MPIR_Allreduce_reduce_p2p_MV2},
964               {512, 4096, &MPIR_Allreduce_mcst_reduce_two_level_helper_MV2},
965               {4096, 16384, &MPIR_Allreduce_pt2pt_rs_MV2},
966               {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
967           },
968           2,
969           {
970               {0, 512, &MPIR_Allreduce_reduce_shmem_MV2},
971               {512, -1, &MPIR_Allreduce_reduce_p2p_MV2},
972           },
973       },
974
975   };
976   memcpy(mv2_allreduce_thresholds_table, mv2_tmp_allreduce_thresholds_table,
977       mv2_size_allreduce_tuning_table * sizeof (mv2_allreduce_tuning_table));
978 }
979
980
981
982
983 typedef struct {
984     int min;
985     int max;
986     int (*MV2_pt_Bcast_function) (void *buf, int count, MPI_Datatype datatype,
987                                   int root, MPI_Comm comm_ptr);
988     int zcpy_pipelined_knomial_factor;
989 } mv2_bcast_tuning_element;
990
991 typedef struct {
992     int numproc;
993     int bcast_segment_size;
994     int intra_node_knomial_factor;
995     int inter_node_knomial_factor;
996     int is_two_level_bcast[MV2_MAX_NB_THRESHOLDS];
997     int size_inter_table;
998     mv2_bcast_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
999     int size_intra_table;
1000     mv2_bcast_tuning_element intra_node[MV2_MAX_NB_THRESHOLDS];
1001 } mv2_bcast_tuning_table;
1002
1003 int mv2_size_bcast_tuning_table = 0;
1004 mv2_bcast_tuning_table *mv2_bcast_thresholds_table = NULL;
1005
1006
1007 int (*MV2_Bcast_function) (void *buffer, int count, MPI_Datatype datatype,
1008                            int root, MPI_Comm comm_ptr) = NULL;
1009
1010 int (*MV2_Bcast_intra_node_function) (void *buffer, int count, MPI_Datatype datatype,
1011                                       int root, MPI_Comm comm_ptr) = NULL;
1012
1013 int zcpy_knomial_factor = 2;
1014 int mv2_pipelined_zcpy_knomial_factor = -1;
1015 int bcast_segment_size = 8192;
1016 int mv2_inter_node_knomial_factor = 4;
1017 int mv2_intra_node_knomial_factor = 4;
1018 #define mv2_bcast_two_level_system_size  64
1019 #define mv2_bcast_short_msg             16384
1020 #define mv2_bcast_large_msg            512*1024
1021
1022 #define INTRA_NODE_ROOT 0
1023
1024 #define MPIR_Pipelined_Bcast_Zcpy_MV2 simgrid::smpi::Coll_bcast_mpich::bcast
1025 #define MPIR_Pipelined_Bcast_MV2 simgrid::smpi::Coll_bcast_mpich::bcast
1026 #define MPIR_Bcast_binomial_MV2 simgrid::smpi::Coll_bcast_binomial_tree::bcast
1027 #define MPIR_Bcast_scatter_ring_allgather_shm_MV2 simgrid::smpi::Coll_bcast_scatter_LR_allgather::bcast
1028 #define MPIR_Bcast_scatter_doubling_allgather_MV2 simgrid::smpi::Coll_bcast_scatter_rdb_allgather::bcast
1029 #define MPIR_Bcast_scatter_ring_allgather_MV2 simgrid::smpi::Coll_bcast_scatter_LR_allgather::bcast
1030 #define MPIR_Shmem_Bcast_MV2 simgrid::smpi::Coll_bcast_mpich::bcast
1031 #define MPIR_Bcast_tune_inter_node_helper_MV2 simgrid::smpi::Coll_bcast_mvapich2_inter_node::bcast
1032 #define MPIR_Bcast_inter_node_helper_MV2 simgrid::smpi::Coll_bcast_mvapich2_inter_node::bcast
1033 #define MPIR_Knomial_Bcast_intra_node_MV2 simgrid::smpi::Coll_bcast_mvapich2_knomial_intra_node::bcast
1034 #define MPIR_Bcast_intra_MV2 simgrid::smpi::Coll_bcast_mvapich2_intra_node::bcast
1035
1036 static void init_mv2_bcast_tables_stampede(){
1037  //Stampede,
1038   if(simgrid::smpi::Colls::smpi_coll_cleanup_callback==NULL)
1039     simgrid::smpi::Colls::smpi_coll_cleanup_callback=&smpi_coll_cleanup_mvapich2;
1040   mv2_size_bcast_tuning_table=8;
1041   mv2_bcast_thresholds_table = static_cast<mv2_bcast_tuning_table*>(xbt_malloc(mv2_size_bcast_tuning_table *
1042   sizeof (mv2_bcast_tuning_table)));
1043
1044   mv2_bcast_tuning_table mv2_tmp_bcast_thresholds_table[]={
1045     {
1046             16,
1047             8192, 4, 4,
1048             {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
1049             11,
1050             {
1051               {0, 8, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1052               {8, 16, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1053               {16, 1024, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1054               {1024, 8192, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1055               {8192, 16384, &MPIR_Bcast_binomial_MV2, -1},
1056               {16384, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1057               {32768, 65536, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1058               {65536, 131072, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1},
1059               {131072, 262144, &MPIR_Bcast_scatter_ring_allgather_MV2, -1},
1060               {262144, 524288, &MPIR_Bcast_scatter_doubling_allgather_MV2, -1},
1061               {524288, -1, &MPIR_Bcast_scatter_ring_allgather_MV2, -1}
1062             },
1063             11,
1064             {
1065               {0, 8, &MPIR_Shmem_Bcast_MV2, 2},
1066               {8, 16, &MPIR_Shmem_Bcast_MV2, 4},
1067               {16, 1024, &MPIR_Shmem_Bcast_MV2, 2},
1068               {1024, 8192, &MPIR_Shmem_Bcast_MV2, 4},
1069               {8192, 16384, &MPIR_Shmem_Bcast_MV2, -1},
1070               {16384, 32768, &MPIR_Shmem_Bcast_MV2, 4},
1071               {32768, 65536, &MPIR_Shmem_Bcast_MV2, 2},
1072               {65536, 131072, &MPIR_Shmem_Bcast_MV2, -1},
1073               {131072, 262144, &MPIR_Shmem_Bcast_MV2, -1},
1074               {262144, 524288, &MPIR_Shmem_Bcast_MV2, -1},
1075               {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}
1076             }
1077     },
1078     {
1079             32,
1080             8192, 4, 4,
1081             {1, 1, 1, 1, 1, 1, 1, 1},
1082             8,
1083             {
1084               {0, 128, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1085               {128, 256, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1086               {256, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1087               {32768, 65536, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1088               {65536, 131072, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1089               {131072, 262144, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1090               {262144, 524288, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1091               {524288, -1, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8}
1092             },
1093             8,
1094             {
1095               {0, 128, &MPIR_Shmem_Bcast_MV2, 2},
1096               {128, 256, &MPIR_Shmem_Bcast_MV2, 4},
1097               {256, 32768, &MPIR_Shmem_Bcast_MV2, 2},
1098               {32768, 65536, &MPIR_Shmem_Bcast_MV2, 4},
1099               {65536, 131072, &MPIR_Shmem_Bcast_MV2, 2},
1100               {131072, 262144, &MPIR_Shmem_Bcast_MV2, 8},
1101               {262144, 524288, &MPIR_Shmem_Bcast_MV2, 2},
1102               {524288, -1, &MPIR_Shmem_Bcast_MV2, 8}
1103             }
1104     },
1105     {
1106             64,
1107             8192, 4, 4,
1108             {1, 1, 1, 1, 1, 1, 1, 1, 1},
1109             9,
1110             {
1111               {0, 2, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1112               {2, 4, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1113               {4, 16, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1114               {16, 32, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1115               {32, 128, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1116               {128, 256, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1117               {256, 4096, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1118               {4096, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1119               {32768, -1, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2}
1120             },
1121             9,
1122             {
1123               {0, 2, &MPIR_Shmem_Bcast_MV2, 4},
1124               {2, 4, &MPIR_Shmem_Bcast_MV2, 8},
1125               {4, 16, &MPIR_Shmem_Bcast_MV2, 4},
1126               {16, 32, &MPIR_Shmem_Bcast_MV2, 8},
1127               {32, 128, &MPIR_Shmem_Bcast_MV2, 4},
1128               {128, 256, &MPIR_Shmem_Bcast_MV2, 8},
1129               {256, 4096, &MPIR_Shmem_Bcast_MV2, 4},
1130               {4096, 32768, &MPIR_Shmem_Bcast_MV2, 8},
1131               {32768, -1, &MPIR_Shmem_Bcast_MV2, 2}
1132             }
1133     },
1134     {
1135             128,
1136             8192, 4, 4,
1137             {1, 1, 1, 0},
1138             4,
1139             {
1140               {0, 8192, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1141               {8192, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1142               {16384, 524288, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1143               {524288, -1, &MPIR_Bcast_scatter_ring_allgather_MV2, -1}
1144             },
1145             4,
1146             {
1147               {0, 8192, &MPIR_Shmem_Bcast_MV2, 8},
1148               {8192, 16384, &MPIR_Shmem_Bcast_MV2, 4},
1149               {16384, 524288, &MPIR_Shmem_Bcast_MV2, 2},
1150               {524288, -1, NULL, -1}
1151             }
1152     },
1153     {
1154             256,
1155             8192, 4, 4,
1156             {1, 1, 1, 1, 1},
1157             5,
1158             {
1159               {0, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1160               {16384, 131072, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1161               {131072, 262144, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1},
1162               {262144, 524288, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1163               {524288, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}
1164             },
1165             5,
1166             {
1167               {0, 16384, &MPIR_Shmem_Bcast_MV2, 4},
1168               {16384, 131072, &MPIR_Shmem_Bcast_MV2, 2},
1169               {131072, 262144, &MPIR_Shmem_Bcast_MV2, -1},
1170               {262144, 524288, &MPIR_Shmem_Bcast_MV2, 2},
1171               {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}
1172             }
1173     },
1174     {
1175             512,
1176             8192, 4, 4,
1177             {1, 1, 1, 1, 1},
1178             5,
1179             {
1180               {0, 4096, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1181               {4096, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1182               {16384, 131072, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1183               {131072, 262144, &MPIR_Pipelined_Bcast_MV2, -1},
1184               {262144, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}
1185             },
1186             5,
1187             {
1188               {0, 4096, &MPIR_Shmem_Bcast_MV2, 8},
1189               {4096, 16384, &MPIR_Shmem_Bcast_MV2, 4},
1190               {16384, 131072, &MPIR_Shmem_Bcast_MV2, 2},
1191               {131072, 262144, &MPIR_Shmem_Bcast_MV2, -1},
1192               {262144, -1, &MPIR_Shmem_Bcast_MV2, -1}
1193             }
1194     },
1195     {
1196             1024,
1197             8192, 4, 4,
1198             {1, 1, 1, 1, 1},
1199             5,
1200             {
1201               {0, 8192, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1202               {8192, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1203               {16384, 65536, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1204               {65536, 524288, &MPIR_Pipelined_Bcast_MV2, -1},
1205               {524288, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}
1206             },
1207             5,
1208             {
1209               {0, 8192, &MPIR_Shmem_Bcast_MV2, 8},
1210               {8192, 16384, &MPIR_Shmem_Bcast_MV2, 4},
1211               {16384, 65536, &MPIR_Shmem_Bcast_MV2, 2},
1212               {65536, 524288, &MPIR_Shmem_Bcast_MV2, -1},
1213               {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}
1214             }
1215     },
1216     {
1217             2048,
1218             8192, 4, 4,
1219             {1, 1, 1, 1, 1, 1, 1},
1220             7,
1221             {
1222               {0, 16, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1223               {16, 32, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1224               {32, 4096, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
1225               {4096, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
1226               {16384, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
1227               {32768, 524288, &MPIR_Pipelined_Bcast_MV2, -1},
1228               {524288, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}
1229             },
1230             7,
1231             {
1232               {0, 16, &MPIR_Shmem_Bcast_MV2, 8},
1233               {16, 32, &MPIR_Shmem_Bcast_MV2, 4},
1234               {32, 4096, &MPIR_Shmem_Bcast_MV2, 8},
1235               {4096, 16384, &MPIR_Shmem_Bcast_MV2, 4},
1236               {16384, 32768, &MPIR_Shmem_Bcast_MV2, 2},
1237               {32768, 524288, &MPIR_Shmem_Bcast_MV2, -1},
1238               {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}
1239             }
1240     }
1241   };
1242
1243         memcpy(mv2_bcast_thresholds_table, mv2_tmp_bcast_thresholds_table,
1244                     mv2_size_bcast_tuning_table * sizeof (mv2_bcast_tuning_table));
1245 }
1246
1247
1248 /************ Reduce variables and initializers                        */
1249
1250 typedef struct {
1251   int min;
1252   int max;
1253   int (*MV2_pt_Reduce_function)(void *sendbuf,
1254       void *recvbuf,
1255       int count,
1256       MPI_Datatype datatype,
1257       MPI_Op op,
1258       int root,
1259       MPI_Comm  comm_ptr);
1260 } mv2_reduce_tuning_element;
1261
1262 typedef struct {
1263   int numproc;
1264   int inter_k_degree;
1265   int intra_k_degree;
1266   int is_two_level_reduce[MV2_MAX_NB_THRESHOLDS];
1267   int size_inter_table;
1268   mv2_reduce_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
1269   int size_intra_table;
1270   mv2_reduce_tuning_element intra_node[MV2_MAX_NB_THRESHOLDS];
1271 } mv2_reduce_tuning_table;
1272
1273 int mv2_size_reduce_tuning_table = 0;
1274 mv2_reduce_tuning_table *mv2_reduce_thresholds_table = NULL;
1275
1276
1277 int mv2_reduce_intra_knomial_factor = -1;
1278 int mv2_reduce_inter_knomial_factor = -1;
1279
1280 int (*MV2_Reduce_function)( void *sendbuf,
1281     void *recvbuf,
1282     int count,
1283     MPI_Datatype datatype,
1284     MPI_Op op,
1285     int root,
1286     MPI_Comm  comm_ptr)=NULL;
1287
1288 int (*MV2_Reduce_intra_function)( void *sendbuf,
1289     void *recvbuf,
1290     int count,
1291     MPI_Datatype datatype,
1292     MPI_Op op,
1293     int root,
1294     MPI_Comm  comm_ptr)=NULL;
1295
1296
1297 #define MPIR_Reduce_inter_knomial_wrapper_MV2 simgrid::smpi::Coll_reduce_mvapich2_knomial::reduce
1298 #define MPIR_Reduce_intra_knomial_wrapper_MV2 simgrid::smpi::Coll_reduce_mvapich2_knomial::reduce
1299 #define MPIR_Reduce_binomial_MV2 simgrid::smpi::Coll_reduce_binomial::reduce
1300 #define MPIR_Reduce_redscat_gather_MV2 simgrid::smpi::Coll_reduce_scatter_gather::reduce
1301 #define MPIR_Reduce_shmem_MV2 simgrid::smpi::Coll_reduce_ompi_basic_linear::reduce
1302 #define MPIR_Reduce_two_level_helper_MV2 simgrid::smpi::Coll_reduce_mvapich2_two_level::reduce
1303
1304
1305 static void init_mv2_reduce_tables_stampede(){
1306   if(simgrid::smpi::Colls::smpi_coll_cleanup_callback==NULL)
1307     simgrid::smpi::Colls::smpi_coll_cleanup_callback=&smpi_coll_cleanup_mvapich2;
1308   /*Stampede*/
1309   mv2_size_reduce_tuning_table = 8;
1310   mv2_reduce_thresholds_table = static_cast<mv2_reduce_tuning_table*>(xbt_malloc(mv2_size_reduce_tuning_table *
1311       sizeof (mv2_reduce_tuning_table)));
1312   mv2_reduce_tuning_table mv2_tmp_reduce_thresholds_table[] = {
1313       {
1314           16,
1315           4,
1316           4,
1317           {1, 0, 0},
1318           3,
1319           {
1320               {0, 262144, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1321               {262144, 1048576, &MPIR_Reduce_binomial_MV2},
1322               {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
1323           },
1324           2,
1325           {
1326               {0, 65536, &MPIR_Reduce_shmem_MV2},
1327               {65536,-1,  &MPIR_Reduce_binomial_MV2},
1328           },
1329       },
1330       {
1331           32,
1332           4,
1333           4,
1334           {1, 1, 1, 1, 0, 0, 0},
1335           7,
1336           {
1337               {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1338               {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1339               {16384, 32768, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1340               {32768, 65536, &MPIR_Reduce_binomial_MV2},
1341               {65536, 262144, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1342               {262144, 1048576, &MPIR_Reduce_binomial_MV2},
1343               {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
1344           },
1345           6,
1346           {
1347               {0, 8192, &MPIR_Reduce_shmem_MV2},
1348               {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1349               {16384, 32768, &MPIR_Reduce_shmem_MV2},
1350               {32768, 65536, &MPIR_Reduce_shmem_MV2},
1351               {65536, 262144, &MPIR_Reduce_shmem_MV2},
1352               {262144,-1,  &MPIR_Reduce_binomial_MV2},
1353           },
1354       },
1355       {
1356           64,
1357           4,
1358           4,
1359           {1, 1, 1, 1, 0},
1360           5,
1361           {
1362               {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1363               {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1364               {16384, 65536, &MPIR_Reduce_binomial_MV2},
1365               {65536, 262144, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1366               {262144, -1, &MPIR_Reduce_redscat_gather_MV2},
1367           },
1368           5,
1369           {
1370               {0, 8192, &MPIR_Reduce_shmem_MV2},
1371               {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1372               {16384, 65536, &MPIR_Reduce_shmem_MV2},
1373               {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1374               {262144, -1, &MPIR_Reduce_binomial_MV2},
1375           },
1376       },
1377       {
1378           128,
1379           4,
1380           4,
1381           {1, 0, 1, 0, 1, 0},
1382           6,
1383           {
1384               {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1385               {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1386               {16384, 65536, &MPIR_Reduce_binomial_MV2},
1387               {65536, 262144, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1388               {262144, 1048576, &MPIR_Reduce_binomial_MV2},
1389               {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
1390           },
1391           5,
1392           {
1393               {0, 8192, &MPIR_Reduce_shmem_MV2},
1394               {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1395               {16384, 65536, &MPIR_Reduce_shmem_MV2},
1396               {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1397               {262144, -1, &MPIR_Reduce_binomial_MV2},
1398           },
1399       },
1400       {
1401           256,
1402           4,
1403           4,
1404           {1, 1, 1, 0, 1, 1, 0},
1405           7,
1406           {
1407               {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1408               {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1409               {16384, 32768, &MPIR_Reduce_binomial_MV2},
1410               {32768, 65536, &MPIR_Reduce_binomial_MV2},
1411               {65536, 262144, &MPIR_Reduce_binomial_MV2},
1412               {262144, 1048576, &MPIR_Reduce_binomial_MV2},
1413               {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
1414           },
1415           6,
1416           {
1417               {0, 8192, &MPIR_Reduce_shmem_MV2},
1418               {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1419               {16384, 32768, &MPIR_Reduce_shmem_MV2},
1420               {32768, 65536, &MPIR_Reduce_shmem_MV2},
1421               {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1422               {262144, -1, &MPIR_Reduce_binomial_MV2},
1423           },
1424       },
1425       {
1426           512,
1427           4,
1428           4,
1429           {1, 0, 1, 1, 1, 0},
1430           6,
1431           {
1432               {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1433               {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1434               {16384, 65536, &MPIR_Reduce_binomial_MV2},
1435               {65536, 262144, &MPIR_Reduce_binomial_MV2},
1436               {262144, 1048576, &MPIR_Reduce_binomial_MV2},
1437               {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
1438           },
1439           5,
1440           {
1441               {0, 8192, &MPIR_Reduce_shmem_MV2},
1442               {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1443               {16384, 65536, &MPIR_Reduce_shmem_MV2},
1444               {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1445               {262144, -1, &MPIR_Reduce_binomial_MV2},
1446           },
1447       },
1448       {
1449           1024,
1450           4,
1451           4,
1452           {1, 0, 1, 1, 1},
1453           5,
1454           {
1455               {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1456               {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1457               {16384, 65536, &MPIR_Reduce_binomial_MV2},
1458               {65536, 262144, &MPIR_Reduce_binomial_MV2},
1459               {262144, -1, &MPIR_Reduce_binomial_MV2},
1460           },
1461           5,
1462           {
1463               {0, 8192, &MPIR_Reduce_shmem_MV2},
1464               {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1465               {16384, 65536, &MPIR_Reduce_shmem_MV2},
1466               {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1467               {262144, -1, &MPIR_Reduce_binomial_MV2},
1468           },
1469       },
1470       {
1471           2048,
1472           4,
1473           4,
1474           {1, 0, 1, 1, 1,1},
1475           6,
1476           {
1477               {0, 2048, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1478               {2048, 4096, &MPIR_Reduce_inter_knomial_wrapper_MV2},
1479               {4096, 16384, &MPIR_Reduce_binomial_MV2},
1480               {16384, 65536, &MPIR_Reduce_binomial_MV2},
1481               {65536, 131072, &MPIR_Reduce_binomial_MV2},
1482               {131072, -1, &MPIR_Reduce_binomial_MV2},
1483           },
1484           6,
1485           {
1486               {0, 2048, &MPIR_Reduce_shmem_MV2},
1487               {2048, 4096, &MPIR_Reduce_shmem_MV2},
1488               {4096, 16384, &MPIR_Reduce_shmem_MV2},
1489               {16384, 65536, &MPIR_Reduce_intra_knomial_wrapper_MV2},
1490               {65536, 131072, &MPIR_Reduce_binomial_MV2},
1491               {131072, -1, &MPIR_Reduce_shmem_MV2},
1492           },
1493       },
1494
1495   };
1496   memcpy(mv2_reduce_thresholds_table, mv2_tmp_reduce_thresholds_table,
1497       mv2_size_reduce_tuning_table * sizeof (mv2_reduce_tuning_table));
1498 }
1499
1500 /************ Reduce scatter variables and initializers                        */
1501
1502 typedef struct {
1503   int min;
1504   int max;
1505   int (*MV2_pt_Red_scat_function)(void *sendbuf,
1506       void *recvbuf,
1507       int *recvcnts,
1508       MPI_Datatype datatype,
1509       MPI_Op op,
1510       MPI_Comm comm_ptr);
1511 } mv2_red_scat_tuning_element;
1512
1513 typedef struct {
1514   int numproc;
1515   int size_inter_table;
1516   mv2_red_scat_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
1517 } mv2_red_scat_tuning_table;
1518
1519 int mv2_size_red_scat_tuning_table = 0;
1520 mv2_red_scat_tuning_table *mv2_red_scat_thresholds_table = NULL;
1521
1522
1523 int (*MV2_Red_scat_function)(void *sendbuf,
1524     void *recvbuf,
1525     int *recvcnts,
1526     MPI_Datatype datatype,
1527     MPI_Op op,
1528     MPI_Comm comm_ptr);
1529
1530
1531
1532 static  int MPIR_Reduce_Scatter_Basic_MV2(void *sendbuf,
1533     void *recvbuf,
1534     int *recvcnts,
1535     MPI_Datatype datatype,
1536     MPI_Op op,
1537     MPI_Comm comm)
1538 {
1539   simgrid::smpi::Coll_reduce_scatter_default::reduce_scatter(sendbuf,recvbuf,recvcnts,datatype,op,comm);
1540   return MPI_SUCCESS;
1541 }
1542 #define MPIR_Reduce_scatter_non_comm_MV2 simgrid::smpi::Coll_reduce_scatter_mpich_noncomm::reduce_scatter
1543 #define MPIR_Reduce_scatter_Rec_Halving_MV2 simgrid::smpi::Coll_reduce_scatter_ompi_basic_recursivehalving::reduce_scatter
1544 #define MPIR_Reduce_scatter_Pair_Wise_MV2 simgrid::smpi::Coll_reduce_scatter_mpich_pair::reduce_scatter
1545
1546
1547
1548
1549 static void init_mv2_reduce_scatter_tables_stampede(){
1550   if(simgrid::smpi::Colls::smpi_coll_cleanup_callback==NULL)
1551     simgrid::smpi::Colls::smpi_coll_cleanup_callback=&smpi_coll_cleanup_mvapich2;
1552   mv2_size_red_scat_tuning_table = 6;
1553   mv2_red_scat_thresholds_table = static_cast<mv2_red_scat_tuning_table*>(xbt_malloc(mv2_size_red_scat_tuning_table *
1554       sizeof (mv2_red_scat_tuning_table)));
1555   mv2_red_scat_tuning_table mv2_tmp_red_scat_thresholds_table[] = {
1556       {
1557           16,
1558           3,
1559           {
1560               {0, 64, &MPIR_Reduce_Scatter_Basic_MV2},
1561               {64, 65536, &MPIR_Reduce_scatter_Rec_Halving_MV2},
1562               {65536, -1, &MPIR_Reduce_scatter_Pair_Wise_MV2},
1563           },
1564       },
1565       {
1566           32,
1567           3,
1568           {
1569               {0, 64, &MPIR_Reduce_Scatter_Basic_MV2},
1570               {64, 131072, &MPIR_Reduce_scatter_Rec_Halving_MV2},
1571               {131072, -1, &MPIR_Reduce_scatter_Pair_Wise_MV2},
1572           },
1573       },
1574       {
1575           64,
1576           3,
1577           {
1578               {0, 1024, &MPIR_Reduce_Scatter_Basic_MV2},
1579               {1024, 262144, &MPIR_Reduce_scatter_Rec_Halving_MV2},
1580               {262144, -1, &MPIR_Reduce_scatter_Pair_Wise_MV2},
1581           },
1582       },
1583       {
1584           128,
1585           2,
1586           {
1587               {0, 128, &MPIR_Reduce_Scatter_Basic_MV2},
1588               {128, -1, &MPIR_Reduce_scatter_Rec_Halving_MV2},
1589           },
1590       },
1591       {
1592           256,
1593           2,
1594           {
1595               {0, 128, &MPIR_Reduce_Scatter_Basic_MV2},
1596               {128, -1, &MPIR_Reduce_scatter_Rec_Halving_MV2},
1597           },
1598       },
1599       {
1600           512,
1601           2,
1602           {
1603               {0, 256, &MPIR_Reduce_Scatter_Basic_MV2},
1604               {256, -1, &MPIR_Reduce_scatter_Rec_Halving_MV2},
1605           },
1606       },
1607
1608   };
1609   memcpy(mv2_red_scat_thresholds_table, mv2_tmp_red_scat_thresholds_table,
1610       mv2_size_red_scat_tuning_table * sizeof (mv2_red_scat_tuning_table));
1611 }
1612
1613 /************ Scatter variables and initializers                        */
1614
1615 typedef struct {
1616   int min;
1617   int max;
1618   int (*MV2_pt_Scatter_function)(void *sendbuf,
1619       int sendcnt,
1620       MPI_Datatype sendtype,
1621       void *recvbuf,
1622       int recvcnt,
1623       MPI_Datatype recvtype,
1624       int root, MPI_Comm comm);
1625 } mv2_scatter_tuning_element;
1626
1627 typedef struct {
1628   int numproc;
1629   int size_inter_table;
1630   mv2_scatter_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
1631   int size_intra_table;
1632   mv2_scatter_tuning_element intra_node[MV2_MAX_NB_THRESHOLDS];
1633 } mv2_scatter_tuning_table;
1634
1635
1636 int *mv2_scatter_table_ppn_conf = NULL;
1637 int mv2_scatter_num_ppn_conf = 1;
1638 int *mv2_size_scatter_tuning_table = NULL;
1639 mv2_scatter_tuning_table **mv2_scatter_thresholds_table = NULL;
1640
1641 int (*MV2_Scatter_function) (void *sendbuf, int sendcount, MPI_Datatype sendtype,
1642     void *recvbuf, int recvcount, MPI_Datatype recvtype,
1643     int root, MPI_Comm comm)=NULL;
1644
1645 int (*MV2_Scatter_intra_function) (void *sendbuf, int sendcount, MPI_Datatype sendtype,
1646     void *recvbuf, int recvcount, MPI_Datatype recvtype,
1647     int root, MPI_Comm comm)=NULL;
1648 int MPIR_Scatter_mcst_wrap_MV2(void *sendbuf,
1649     int sendcnt,
1650     MPI_Datatype sendtype,
1651     void *recvbuf,
1652     int recvcnt,
1653     MPI_Datatype recvtype,
1654     int root, MPI_Comm comm_ptr);
1655
1656 int MPIR_Scatter_mcst_wrap_MV2(void *sendbuf,
1657     int sendcnt,
1658     MPI_Datatype sendtype,
1659     void *recvbuf,
1660     int recvcnt,
1661     MPI_Datatype recvtype,
1662     int root, MPI_Comm comm_ptr)
1663 {
1664   return 0;
1665 }
1666
1667 #define MPIR_Scatter_MV2_Binomial simgrid::smpi::Coll_scatter_ompi_binomial::scatter
1668 #define MPIR_Scatter_MV2_Direct  simgrid::smpi::Coll_scatter_ompi_basic_linear::scatter
1669 #define MPIR_Scatter_MV2_two_level_Binomial  simgrid::smpi::Coll_scatter_mvapich2_two_level_binomial::scatter
1670 #define MPIR_Scatter_MV2_two_level_Direct  simgrid::smpi::Coll_scatter_mvapich2_two_level_direct::scatter
1671
1672
1673
1674
1675 static void init_mv2_scatter_tables_stampede(){
1676     if(simgrid::smpi::Colls::smpi_coll_cleanup_callback==NULL)
1677       simgrid::smpi::Colls::smpi_coll_cleanup_callback=&smpi_coll_cleanup_mvapich2;
1678
1679     int agg_table_sum = 0;
1680     int i;
1681     mv2_scatter_tuning_table **table_ptrs = NULL;
1682     mv2_scatter_num_ppn_conf = 3;
1683     mv2_scatter_thresholds_table
1684     = static_cast<mv2_scatter_tuning_table**>(xbt_malloc(sizeof(mv2_scatter_tuning_table *)
1685         * mv2_scatter_num_ppn_conf));
1686     table_ptrs = static_cast<mv2_scatter_tuning_table**>(xbt_malloc(sizeof(mv2_scatter_tuning_table *)
1687         * mv2_scatter_num_ppn_conf));
1688     mv2_size_scatter_tuning_table = static_cast<int*>(xbt_malloc(sizeof(int) *
1689         mv2_scatter_num_ppn_conf));
1690     mv2_scatter_table_ppn_conf
1691     = static_cast<int*>(xbt_malloc(mv2_scatter_num_ppn_conf * sizeof(int)));
1692     mv2_scatter_table_ppn_conf[0] = 1;
1693     mv2_size_scatter_tuning_table[0] = 6;
1694     mv2_scatter_tuning_table mv2_tmp_scatter_thresholds_table_1ppn[] = {
1695         {2,
1696             1,
1697             {
1698                 {0, -1, &MPIR_Scatter_MV2_Binomial},
1699             },
1700             1,
1701             {
1702                 {0, -1, &MPIR_Scatter_MV2_Binomial},
1703             },
1704         },
1705
1706         {4,
1707             1,
1708             {
1709                 {0, -1, &MPIR_Scatter_MV2_Direct},
1710             },
1711             1,
1712             {
1713                 {0, -1, &MPIR_Scatter_MV2_Direct},
1714             },
1715         },
1716
1717         {8,
1718             1,
1719             {
1720                 {0, -1, &MPIR_Scatter_MV2_Direct},
1721             },
1722             1,
1723             {
1724                 {0, -1, &MPIR_Scatter_MV2_Direct},
1725             },
1726         },
1727
1728         {16,
1729             1,
1730             {
1731                 {0, -1, &MPIR_Scatter_MV2_Direct},
1732             },
1733             1,
1734             {
1735                 {0, -1, &MPIR_Scatter_MV2_Direct},
1736             },
1737         },
1738
1739         {32,
1740             1,
1741             {
1742                 {0, -1, &MPIR_Scatter_MV2_Direct},
1743             },
1744             1,
1745             {
1746                 {0, -1, &MPIR_Scatter_MV2_Direct},
1747             },
1748         },
1749
1750         {64,
1751             2,
1752             {
1753                 {0, 32, &MPIR_Scatter_MV2_Binomial},
1754                 {32, -1, &MPIR_Scatter_MV2_Direct},
1755             },
1756             1,
1757             {
1758                 {0, -1, &MPIR_Scatter_MV2_Binomial},
1759             },
1760         },
1761     };
1762     table_ptrs[0] = mv2_tmp_scatter_thresholds_table_1ppn;
1763     mv2_scatter_table_ppn_conf[1] = 2;
1764     mv2_size_scatter_tuning_table[1] = 6;
1765     mv2_scatter_tuning_table mv2_tmp_scatter_thresholds_table_2ppn[] = {
1766         {4,
1767             2,
1768             {
1769                 {0, 4096, &MPIR_Scatter_MV2_Binomial},
1770                 {4096, -1, &MPIR_Scatter_MV2_Direct},
1771             },
1772             1,
1773             {
1774                 {0, -1, &MPIR_Scatter_MV2_Direct},
1775             },
1776         },
1777
1778         {8,
1779             2,
1780             {
1781                 {0, 512, &MPIR_Scatter_MV2_two_level_Direct},
1782                 {512, -1, &MPIR_Scatter_MV2_Direct},
1783             },
1784             1,
1785             {
1786                 {0, -1, &MPIR_Scatter_MV2_Binomial},
1787             },
1788         },
1789
1790         {16,
1791             2,
1792             {
1793                 {0, 2048, &MPIR_Scatter_MV2_two_level_Direct},
1794                 {2048, -1, &MPIR_Scatter_MV2_Direct},
1795             },
1796             1,
1797             {
1798                 {0, -1, &MPIR_Scatter_MV2_Binomial},
1799             },
1800         },
1801
1802         {32,
1803             2,
1804             {
1805                 {0, 2048, &MPIR_Scatter_MV2_two_level_Direct},
1806                 {2048, -1, &MPIR_Scatter_MV2_Direct},
1807             },
1808             1,
1809             {
1810                 {0, -1, &MPIR_Scatter_MV2_Binomial},
1811             },
1812         },
1813
1814         {64,
1815             2,
1816             {
1817                 {0, 8192, &MPIR_Scatter_MV2_two_level_Direct},
1818                 {8192, -1, &MPIR_Scatter_MV2_Direct},
1819             },
1820             1,
1821             {
1822                 {0, -1, &MPIR_Scatter_MV2_Binomial},
1823             },
1824         },
1825
1826         {128,
1827             4,
1828             {
1829                 {0, 16, &MPIR_Scatter_MV2_Binomial},
1830                 {16, 128, &MPIR_Scatter_MV2_two_level_Binomial},
1831                 {128, 16384, &MPIR_Scatter_MV2_two_level_Direct},
1832                 {16384, -1, &MPIR_Scatter_MV2_Direct},
1833             },
1834             1,
1835             {
1836                 {0, 128, &MPIR_Scatter_MV2_Direct},
1837                 {128, -1, &MPIR_Scatter_MV2_Binomial},
1838             },
1839         },
1840     };
1841     table_ptrs[1] = mv2_tmp_scatter_thresholds_table_2ppn;
1842     mv2_scatter_table_ppn_conf[2] = 16;
1843     mv2_size_scatter_tuning_table[2] = 8;
1844     mv2_scatter_tuning_table mv2_tmp_scatter_thresholds_table_16ppn[] = {
1845         {
1846             16,
1847             2,
1848             {
1849                 {0, 256, &MPIR_Scatter_MV2_Binomial},
1850                 {256, -1, &MPIR_Scatter_MV2_Direct},
1851             },
1852             1,
1853             {
1854                 { 0, -1, &MPIR_Scatter_MV2_Direct},
1855             },
1856         },
1857
1858         {
1859             32,
1860             2,
1861             {
1862                 {0, 512, &MPIR_Scatter_MV2_Binomial},
1863                 {512, -1, &MPIR_Scatter_MV2_Direct},
1864             },
1865             1,
1866             {
1867                 { 0, -1, &MPIR_Scatter_MV2_Direct},
1868             },
1869         },
1870
1871         {
1872             64,
1873             2,
1874             {
1875                 {0, 1024, &MPIR_Scatter_MV2_two_level_Direct},
1876                 {1024, -1, &MPIR_Scatter_MV2_Direct},
1877             },
1878             1,
1879             {
1880                 { 0, -1, &MPIR_Scatter_MV2_Direct},
1881             },
1882         },
1883
1884         {
1885             128,
1886             4,
1887             {
1888                 {0, 16, &MPIR_Scatter_mcst_wrap_MV2},
1889                 {0, 16, &MPIR_Scatter_MV2_two_level_Direct},
1890                 {16, 2048, &MPIR_Scatter_MV2_two_level_Direct},
1891                 {2048, -1, &MPIR_Scatter_MV2_Direct},
1892             },
1893             1,
1894             {
1895                 { 0, -1, &MPIR_Scatter_MV2_Direct},
1896             },
1897         },
1898
1899         {
1900             256,
1901             4,
1902             {
1903                 {0, 16, &MPIR_Scatter_mcst_wrap_MV2},
1904                 {0, 16, &MPIR_Scatter_MV2_two_level_Direct},
1905                 {16, 2048, &MPIR_Scatter_MV2_two_level_Direct},
1906                 {2048, -1,  &MPIR_Scatter_MV2_Direct},
1907             },
1908             1,
1909             {
1910                 { 0, -1, &MPIR_Scatter_MV2_Direct},
1911             },
1912         },
1913
1914         {
1915             512,
1916             4,
1917             {
1918                 {0, 16, &MPIR_Scatter_mcst_wrap_MV2},
1919                 {16, 16, &MPIR_Scatter_MV2_two_level_Direct},
1920                 {16, 4096, &MPIR_Scatter_MV2_two_level_Direct},
1921                 {4096, -1, &MPIR_Scatter_MV2_Direct},
1922             },
1923             1,
1924             {
1925                 { 0, -1, &MPIR_Scatter_MV2_Binomial},
1926             },
1927         },
1928         {
1929             1024,
1930             5,
1931             {
1932                 {0, 16, &MPIR_Scatter_mcst_wrap_MV2},
1933                 {0, 16,  &MPIR_Scatter_MV2_Binomial},
1934                 {16, 32, &MPIR_Scatter_MV2_Binomial},
1935                 {32, 4096, &MPIR_Scatter_MV2_two_level_Direct},
1936                 {4096, -1, &MPIR_Scatter_MV2_Direct},
1937             },
1938             1,
1939             {
1940                 { 0, -1, &MPIR_Scatter_MV2_Binomial},
1941             },
1942         },
1943         {
1944             2048,
1945             7,
1946             {
1947                 {0, 16, &MPIR_Scatter_mcst_wrap_MV2},
1948                 {0, 16,  &MPIR_Scatter_MV2_two_level_Binomial},
1949                 {16, 128, &MPIR_Scatter_MV2_two_level_Binomial},
1950                 {128, 1024, &MPIR_Scatter_MV2_two_level_Direct},
1951                 {1024, 16384, &MPIR_Scatter_MV2_two_level_Direct},
1952                 {16384, 65536, &MPIR_Scatter_MV2_Direct},
1953                 {65536, -1, &MPIR_Scatter_MV2_two_level_Direct},
1954             },
1955             6,
1956             {
1957                 {0, 16, &MPIR_Scatter_MV2_Binomial},
1958                 {16, 128, &MPIR_Scatter_MV2_Binomial},
1959                 {128, 1024, &MPIR_Scatter_MV2_Binomial},
1960                 {1024, 16384, &MPIR_Scatter_MV2_Direct},
1961                 {16384, 65536, &MPIR_Scatter_MV2_Direct},
1962                 {65536, -1, &MPIR_Scatter_MV2_Direct},
1963             },
1964         },
1965     };
1966     table_ptrs[2] = mv2_tmp_scatter_thresholds_table_16ppn;
1967     agg_table_sum = 0;
1968     for (i = 0; i < mv2_scatter_num_ppn_conf; i++) {
1969         agg_table_sum += mv2_size_scatter_tuning_table[i];
1970     }
1971     mv2_scatter_thresholds_table[0] =
1972         static_cast<mv2_scatter_tuning_table*>(xbt_malloc(agg_table_sum * sizeof (mv2_scatter_tuning_table)));
1973     memcpy(mv2_scatter_thresholds_table[0], table_ptrs[0],
1974         (sizeof(mv2_scatter_tuning_table)
1975             * mv2_size_scatter_tuning_table[0]));
1976     for (i = 1; i < mv2_scatter_num_ppn_conf; i++) {
1977         mv2_scatter_thresholds_table[i] =
1978             mv2_scatter_thresholds_table[i - 1]
1979                                          + mv2_size_scatter_tuning_table[i - 1];
1980         memcpy(mv2_scatter_thresholds_table[i], table_ptrs[i],
1981             (sizeof(mv2_scatter_tuning_table)
1982                 * mv2_size_scatter_tuning_table[i]));
1983     }
1984     xbt_free(table_ptrs);
1985   
1986 }
1987