Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
cleanup a bit the code, ensure tests do pass
authorAugustin Degomme <augustin.degomme@imag.fr>
Thu, 24 Jul 2014 13:20:25 +0000 (15:20 +0200)
committerAugustin Degomme <augustin.degomme@imag.fr>
Thu, 24 Jul 2014 13:23:03 +0000 (15:23 +0200)
src/smpi/colls/smpi_mvapich2_selector.c
src/smpi/colls/smpi_mvapich2_selector_stampede.h

index 49906af..32408ed 100644 (file)
@@ -3,7 +3,7 @@
 /* Copyright (c) 2009-2010, 2013-2014. The SimGrid Team.
  * All rights reserved.                                                     */
 
-/* This program is free software; you can redistribute it and/or modify it
+/* This program is xbt_free software; you can redistribute it and/or modify it
  * under the terms of the license (GNU LGPL) which comes with this package. */
 
 #include "colls_private.h"
 #include "smpi_mvapich2_selector_stampede.h"
 
 
-static void init_mv2_alltoall_tables_stampede(){
-int i;
-  int agg_table_sum = 0;
-mv2_alltoall_tuning_table **table_ptrs = NULL;
-   mv2_alltoall_num_ppn_conf = 3;
-        mv2_alltoall_thresholds_table
-         = malloc(sizeof(mv2_alltoall_tuning_table *)
-                       * mv2_alltoall_num_ppn_conf);
-        table_ptrs = malloc(sizeof(mv2_alltoall_tuning_table *)
-                                * mv2_alltoall_num_ppn_conf);
-        mv2_size_alltoall_tuning_table = malloc(sizeof(int) *
-                                                    mv2_alltoall_num_ppn_conf);
-        mv2_alltoall_table_ppn_conf =malloc(mv2_alltoall_num_ppn_conf * sizeof(int));
-        mv2_alltoall_table_ppn_conf[0] = 1;
-        mv2_size_alltoall_tuning_table[0] = 6;
-        mv2_alltoall_tuning_table mv2_tmp_alltoall_thresholds_table_1ppn[] = {
-         {2,
-          1, 
-          {{0, -1, &MPIR_Alltoall_pairwise_MV2},
-          },
-  
-          {{0, -1, &MPIR_Alltoall_inplace_MV2},
-          },
-         },
-  
-         {4,
-          2,
-          {{0, 262144, &MPIR_Alltoall_Scatter_dest_MV2},
-           {262144, -1, &MPIR_Alltoall_pairwise_MV2},
-          },
-                
-          {{0, -1, &MPIR_Alltoall_inplace_MV2},
-          },
-         },
-  
-         {8,
-          2,
-          {{0, 8, &MPIR_Alltoall_RD_MV2},
-           {8, -1, &MPIR_Alltoall_Scatter_dest_MV2},
-          },
-  
-          {{0, -1, &MPIR_Alltoall_inplace_MV2},
-          },
-         },
-  
-         {16,
-          3,
-          {{0, 64, &MPIR_Alltoall_RD_MV2},
-           {64, 512, &MPIR_Alltoall_bruck_MV2},
-           {512, -1, &MPIR_Alltoall_Scatter_dest_MV2},
-          },
-  
-          {{0,-1, &MPIR_Alltoall_inplace_MV2},
-          },
-         },
-  
-         {32,
-          3,
-          {{0, 32, &MPIR_Alltoall_RD_MV2},
-           {32, 2048, &MPIR_Alltoall_bruck_MV2},
-           {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
-          },
-  
-          {{0, -1, &MPIR_Alltoall_inplace_MV2},
-          },
-         },
-  
-         {64,
-          3,
-          {{0, 8, &MPIR_Alltoall_RD_MV2},
-           {8, 1024, &MPIR_Alltoall_bruck_MV2},
-           {1024, -1, &MPIR_Alltoall_Scatter_dest_MV2},
-          },
-  
-          {{0, -1, &MPIR_Alltoall_inplace_MV2},
-          },
-         },
-        };
-        table_ptrs[0] = mv2_tmp_alltoall_thresholds_table_1ppn;
-        mv2_alltoall_table_ppn_conf[1] = 2;
-        mv2_size_alltoall_tuning_table[1] = 6;
-        mv2_alltoall_tuning_table mv2_tmp_alltoall_thresholds_table_2ppn[] = {
-         {4,
-          2,
-          {{0, 32, &MPIR_Alltoall_RD_MV2},
-           {32, -1, &MPIR_Alltoall_Scatter_dest_MV2},
-          },
-                
-          {{0, -1, &MPIR_Alltoall_inplace_MV2},
-          },
-         },
-  
-         {8,
-          2,
-          {{0, 64, &MPIR_Alltoall_RD_MV2},
-           {64, -1, &MPIR_Alltoall_Scatter_dest_MV2},
-          },
-                
-          {{0, -1, &MPIR_Alltoall_inplace_MV2},
-          },
-         },
-  
-         {16,
-          3,
-          {{0, 64, &MPIR_Alltoall_RD_MV2},
-           {64, 2048, &MPIR_Alltoall_bruck_MV2},
-           {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
-          },
-  
-          {{0,-1, &MPIR_Alltoall_inplace_MV2},
-          },
-         },
-  
-         {32,
-          3,
-          {{0, 16, &MPIR_Alltoall_RD_MV2},
-           {16, 2048, &MPIR_Alltoall_bruck_MV2},
-           {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
-          },
-  
-          {{0, -1, &MPIR_Alltoall_inplace_MV2},
-          },
-         },
-  
-         {64,
-          3,
-          {{0, 8, &MPIR_Alltoall_RD_MV2},
-           {8, 1024, &MPIR_Alltoall_bruck_MV2},
-           {1024, -1, &MPIR_Alltoall_Scatter_dest_MV2},
-          },
-  
-          {{0, -1, &MPIR_Alltoall_inplace_MV2},
-          },
-         },
-
-         {128,
-          3,
-          {{0, 4, &MPIR_Alltoall_RD_MV2},
-           {4, 2048, &MPIR_Alltoall_bruck_MV2},
-           {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
-          },
-  
-          {{0, -1, &MPIR_Alltoall_inplace_MV2},
-          },
-         },
-        };
-        table_ptrs[1] = mv2_tmp_alltoall_thresholds_table_2ppn;
-        mv2_alltoall_table_ppn_conf[2] = 16;
-        mv2_size_alltoall_tuning_table[2] = 7;
-        mv2_alltoall_tuning_table mv2_tmp_alltoall_thresholds_table_16ppn[] = {
-         {16,
-          2, 
-          {{0, 2048, &MPIR_Alltoall_bruck_MV2},
-           {2048, -1,  &MPIR_Alltoall_Scatter_dest_MV2},
-          },
-  
-          {{32768, -1, &MPIR_Alltoall_inplace_MV2},
-          },
-         },
-  
-         {32,
-          2,
-          {{0, 2048, &MPIR_Alltoall_bruck_MV2},
-           {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
-          },
-                
-          {{16384, -1, &MPIR_Alltoall_inplace_MV2},
-          },
-         },
-  
-         {64,
-          3,
-          {{0, 2048, &MPIR_Alltoall_bruck_MV2},
-           {2048, 16384, &MPIR_Alltoall_Scatter_dest_MV2},
-           {16384, -1, &MPIR_Alltoall_pairwise_MV2},
-          },
-  
-          {{32768, 131072, &MPIR_Alltoall_inplace_MV2},
-          },
-         },
-  
-         {128,
-          2,
-          {{0, 2048, &MPIR_Alltoall_bruck_MV2},
-           {2048, -1, &MPIR_Alltoall_pairwise_MV2},
-          },
-  
-          {{16384,65536, &MPIR_Alltoall_inplace_MV2},
-          },
-         },
-  
-         {256,
-          2,
-          {{0, 1024, &MPIR_Alltoall_bruck_MV2},
-           {1024, -1, &MPIR_Alltoall_pairwise_MV2},
-          },
-  
-          {{16384, 65536, &MPIR_Alltoall_inplace_MV2},
-          },
-         },
-  
-         {512,
-          2,
-          {{0, 1024, &MPIR_Alltoall_bruck_MV2},
-           {1024, -1, &MPIR_Alltoall_pairwise_MV2},
-          },
-  
-          {{16384, 65536, &MPIR_Alltoall_inplace_MV2},
-          },
-         },
-         {1024,
-          2,
-          {{0, 1024, &MPIR_Alltoall_bruck_MV2},
-           {1024, -1, &MPIR_Alltoall_pairwise_MV2},
-          },
-  
-          {{16384, 65536, &MPIR_Alltoall_inplace_MV2},
-          },
-         },
-  
-        };
-        table_ptrs[2] = mv2_tmp_alltoall_thresholds_table_16ppn;
-        agg_table_sum = 0;
-        for (i = 0; i < mv2_alltoall_num_ppn_conf; i++) {
-         agg_table_sum += mv2_size_alltoall_tuning_table[i];
-        }
-        mv2_alltoall_thresholds_table[0] =
-         malloc(agg_table_sum * sizeof (mv2_alltoall_tuning_table));
-        memcpy(mv2_alltoall_thresholds_table[0], table_ptrs[0],
-                    (sizeof(mv2_alltoall_tuning_table)
-                     * mv2_size_alltoall_tuning_table[0]));
-        for (i = 1; i < mv2_alltoall_num_ppn_conf; i++) {
-         mv2_alltoall_thresholds_table[i] =
-            mv2_alltoall_thresholds_table[i - 1]
-            + mv2_size_alltoall_tuning_table[i - 1];
-         memcpy(mv2_alltoall_thresholds_table[i], table_ptrs[i],
-                      (sizeof(mv2_alltoall_tuning_table)
-                       * mv2_size_alltoall_tuning_table[i]));
-        }
-        free(table_ptrs);
-        
-        
-}
                             
 int smpi_coll_tuned_alltoall_mvapich2( void *sendbuf, int sendcount, 
                                              MPI_Datatype sendtype,
@@ -304,7 +61,7 @@ int smpi_coll_tuned_alltoall_mvapich2( void *sendbuf, int sendcount,
           mv2_alltoall_thresholds_table[conf_index][range].in_place_algo_table[range_threshold].min
           ||nbytes > mv2_alltoall_thresholds_table[conf_index][range].in_place_algo_table[range_threshold].max
           ) {
-            tmp_buf = (char *)malloc( comm_size * recvcount * recvtype_size );
+            tmp_buf = (char *)xbt_malloc( comm_size * recvcount * recvtype_size );
             mpi_errno = smpi_datatype_copy((char *)recvbuf,
                                        comm_size*recvcount, recvtype,
                                        (char *)tmp_buf,
@@ -313,7 +70,7 @@ int smpi_coll_tuned_alltoall_mvapich2( void *sendbuf, int sendcount,
             mpi_errno = MV2_Alltoall_function(tmp_buf, recvcount, recvtype,
                                                recvbuf, recvcount, recvtype,
                                                 comm );        
-            free(tmp_buf);
+            xbt_free(tmp_buf);
         } else { 
             mpi_errno = MPIR_Alltoall_inplace_MV2(sendbuf, sendcount, sendtype,
                                               recvbuf, recvcount, recvtype,
@@ -326,221 +83,6 @@ int smpi_coll_tuned_alltoall_mvapich2( void *sendbuf, int sendcount,
 }
 
 
-static void init_mv2_allgather_tables_stampede(){
-int i;
-  int agg_table_sum = 0;
-mv2_allgather_tuning_table **table_ptrs = NULL;
- mv2_allgather_num_ppn_conf = 3;
-        mv2_allgather_thresholds_table
-            = malloc(sizeof(mv2_allgather_tuning_table *)
-                  * mv2_allgather_num_ppn_conf);
-        table_ptrs = malloc(sizeof(mv2_allgather_tuning_table *)
-                                 * mv2_allgather_num_ppn_conf);
-        mv2_size_allgather_tuning_table = malloc(sizeof(int) *
-                                                      mv2_allgather_num_ppn_conf);
-        mv2_allgather_table_ppn_conf 
-            = malloc(mv2_allgather_num_ppn_conf * sizeof(int));
-        mv2_allgather_table_ppn_conf[0] = 1;
-        mv2_size_allgather_tuning_table[0] = 6;
-        mv2_allgather_tuning_table mv2_tmp_allgather_thresholds_table_1ppn[] = {
-            {
-                2,
-                {0},
-                1,
-                {
-                    {0, -1, &MPIR_Allgather_Ring_MV2},
-                },
-            },
-            {
-                4,
-                {0,0},
-                2,
-                {
-                    {0, 262144, &MPIR_Allgather_RD_MV2},
-                    {262144, -1, &MPIR_Allgather_Ring_MV2},
-                },
-            },
-            {
-                8,
-                {0,0},
-                2,
-                {
-                    {0, 131072, &MPIR_Allgather_RD_MV2},
-                    {131072, -1, &MPIR_Allgather_Ring_MV2},
-                },
-            },
-            {
-                16,
-                {0,0},
-                2,
-                {
-                    {0, 131072, &MPIR_Allgather_RD_MV2},
-                    {131072, -1, &MPIR_Allgather_Ring_MV2},
-                },
-            },
-            {
-                32,
-                {0,0},
-                2,
-                {
-                    {0, 65536, &MPIR_Allgather_RD_MV2},
-                    {65536, -1, &MPIR_Allgather_Ring_MV2},
-                },
-            },
-            {
-                64,
-                {0,0},
-                2,
-                {
-                    {0, 32768, &MPIR_Allgather_RD_MV2},
-                    {32768, -1, &MPIR_Allgather_Ring_MV2},
-                },
-            },
-        };
-        table_ptrs[0] = mv2_tmp_allgather_thresholds_table_1ppn;
-        mv2_allgather_table_ppn_conf[1] = 2;
-        mv2_size_allgather_tuning_table[1] = 6;
-        mv2_allgather_tuning_table mv2_tmp_allgather_thresholds_table_2ppn[] = {
-            {
-                4,
-                {0,0},
-                2,
-                {
-                    {0, 524288, &MPIR_Allgather_RD_MV2},
-                    {524288, -1, &MPIR_Allgather_Ring_MV2},
-                },
-            },
-            {
-                8,
-                {0,1,0},
-                2,
-                {
-                    {0, 32768, &MPIR_Allgather_RD_MV2},
-                    {32768, 524288, &MPIR_Allgather_Ring_MV2},
-                    {524288, -1, &MPIR_Allgather_Ring_MV2},
-                },
-            },
-            {
-                16,
-                {0,1,0},
-                2,
-                {
-                    {0, 16384, &MPIR_Allgather_RD_MV2},
-                    {16384, 524288, &MPIR_Allgather_Ring_MV2},
-                    {524288, -1, &MPIR_Allgather_Ring_MV2},
-                },
-            },
-            {
-                32,
-                {1,1,0},
-                2,
-                {
-                    {0, 65536, &MPIR_Allgather_RD_MV2},
-                    {65536, 524288, &MPIR_Allgather_Ring_MV2},
-                    {524288, -1, &MPIR_Allgather_Ring_MV2},
-                },
-            },
-            {
-                64,
-                {1,1,0},
-                2,
-                {
-                    {0, 32768, &MPIR_Allgather_RD_MV2},
-                    {32768, 524288, &MPIR_Allgather_Ring_MV2},
-                    {524288, -1, &MPIR_Allgather_Ring_MV2},
-                },
-            },
-            {
-                128,
-                {1,1,0},
-                2,
-                {
-                    {0, 65536, &MPIR_Allgather_RD_MV2},
-                    {65536, 524288, &MPIR_Allgather_Ring_MV2},
-                    {524288, -1, &MPIR_Allgather_Ring_MV2},
-                },
-            },
-        };
-        table_ptrs[1] = mv2_tmp_allgather_thresholds_table_2ppn;
-        mv2_allgather_table_ppn_conf[2] = 16;
-        mv2_size_allgather_tuning_table[2] = 6;
-        mv2_allgather_tuning_table mv2_tmp_allgather_thresholds_table_16ppn[] = {
-            {
-                16,
-                {0,0},
-                2,
-                {
-                    {0, 1024, &MPIR_Allgather_RD_MV2},
-                    {1024, -1, &MPIR_Allgather_Ring_MV2},
-                },
-            },
-            {
-                32,
-                {0,0},
-                2,
-                {
-                    {0, 1024, &MPIR_Allgather_RD_Allgather_Comm_MV2},
-                    {1024, -1, &MPIR_Allgather_Ring_MV2},
-                },
-            },
-            {
-                64,
-                {0,0},
-                2,
-                {
-                    {0, 1024, &MPIR_Allgather_RD_Allgather_Comm_MV2},
-                    {1024, -1, &MPIR_Allgather_Ring_MV2},
-                },
-            },
-            {
-                128,
-                {0,0},
-                2,
-                {
-                    {0, 1024, &MPIR_Allgather_RD_Allgather_Comm_MV2},
-                    {1024, -1, &MPIR_Allgather_Ring_MV2},
-                },
-            },
-            {
-                256,
-                {0,0},
-                2,
-                {
-                    {0, 1024, &MPIR_Allgather_RD_Allgather_Comm_MV2},
-                    {1024, -1, &MPIR_Allgather_Ring_MV2},
-                },
-            },
-            {
-                512,
-                {0,0},
-                2,
-                {
-                    {0, 1024, &MPIR_Allgather_RD_Allgather_Comm_MV2},
-                    {1024, -1, &MPIR_Allgather_Ring_MV2},
-                },
-            },
-
-        };
-        table_ptrs[2] = mv2_tmp_allgather_thresholds_table_16ppn;
-        agg_table_sum = 0;
-        for (i = 0; i < mv2_allgather_num_ppn_conf; i++) {
-            agg_table_sum += mv2_size_allgather_tuning_table[i];
-        }
-        mv2_allgather_thresholds_table[0] =
-            malloc(agg_table_sum * sizeof (mv2_allgather_tuning_table));
-        memcpy(mv2_allgather_thresholds_table[0], table_ptrs[0],
-            (sizeof(mv2_allgather_tuning_table)
-                     * mv2_size_allgather_tuning_table[0]));
-        for (i = 1; i < mv2_allgather_num_ppn_conf; i++) {
-            mv2_allgather_thresholds_table[i] =
-            mv2_allgather_thresholds_table[i - 1]
-            + mv2_size_allgather_tuning_table[i - 1];
-            memcpy(mv2_allgather_thresholds_table[i], table_ptrs[i],
-                      (sizeof(mv2_allgather_tuning_table)
-                       * mv2_size_allgather_tuning_table[i]));
-        }
-        free(table_ptrs);
-}
 
 int smpi_coll_tuned_allgather_mvapich2(void *sendbuf, int sendcount, MPI_Datatype sendtype,
                        void *recvbuf, int recvcount, MPI_Datatype recvtype,
@@ -647,53 +189,6 @@ int smpi_coll_tuned_allgather_mvapich2(void *sendbuf, int sendcount, MPI_Datatyp
     return mpi_errno;
 }
 
-static void init_mv2_gather_tables_stampede(){
-
- mv2_size_gather_tuning_table=7;
-      mv2_gather_thresholds_table = malloc(mv2_size_gather_tuning_table*
-                                               sizeof (mv2_gather_tuning_table)); 
-      mv2_gather_tuning_table mv2_tmp_gather_thresholds_table[]={
-       {16,
-        2,{{0, 524288, &MPIR_Gather_MV2_Direct},
-           {524288, -1, &MPIR_Gather_intra}},
-        1,{{0, -1, &MPIR_Gather_MV2_Direct}}},
-       {32,
-        3,{{0, 16384, &MPIR_Gather_MV2_Direct}, 
-           {16384, 131072, &MPIR_Gather_intra},
-           {131072, -1, &MPIR_Gather_MV2_two_level_Direct}},
-        1,{{0, -1, &MPIR_Gather_intra}}},
-       {64,
-        3,{{0, 256, &MPIR_Gather_MV2_two_level_Direct}, 
-           {256, 16384, &MPIR_Gather_MV2_Direct},
-           {256, -1, &MPIR_Gather_MV2_two_level_Direct}},
-        1,{{0, -1, &MPIR_Gather_intra}}},
-       {128,
-        3,{{0, 512, &MPIR_Gather_MV2_two_level_Direct}, 
-           {512, 16384, &MPIR_Gather_MV2_Direct},
-           {16384, -1, &MPIR_Gather_MV2_two_level_Direct}},
-        1,{{0, -1, &MPIR_Gather_intra}}},
-       {256,
-        3,{{0, 512, &MPIR_Gather_MV2_two_level_Direct}, 
-           {512, 16384, &MPIR_Gather_MV2_Direct},
-           {16384, -1, &MPIR_Gather_MV2_two_level_Direct}},
-        1,{{0, -1, &MPIR_Gather_intra}}},
-       {512,
-        3,{{0, 512, &MPIR_Gather_MV2_two_level_Direct}, 
-           {512, 16384, &MPIR_Gather_MV2_Direct},
-           {8196, -1, &MPIR_Gather_MV2_two_level_Direct}},
-        1,{{0, -1, &MPIR_Gather_intra}}},
-       {1024,
-        3,{{0, 512, &MPIR_Gather_MV2_two_level_Direct}, 
-           {512, 16384, &MPIR_Gather_MV2_Direct},
-           {8196, -1, &MPIR_Gather_MV2_two_level_Direct}},
-        1,{{0, -1, &MPIR_Gather_intra}}},
-      };
-
-      memcpy(mv2_gather_thresholds_table, mv2_tmp_gather_thresholds_table,
-                 mv2_size_gather_tuning_table * sizeof (mv2_gather_tuning_table));
-
-}
-
 
 int smpi_coll_tuned_gather_mvapich2(void *sendbuf,
                     int sendcnt,
@@ -774,72 +269,6 @@ int smpi_coll_tuned_gather_mvapich2(void *sendbuf,
 }
 
 
-
-static void init_mv2_allgatherv_tables_stampede(){
- mv2_size_allgatherv_tuning_table = 6;
- mv2_allgatherv_thresholds_table = malloc(mv2_size_allgatherv_tuning_table *
-                                                  sizeof (mv2_allgatherv_tuning_table));
-        mv2_allgatherv_tuning_table mv2_tmp_allgatherv_thresholds_table[] = {
-            {
-                16,
-                2,
-                {
-                    {0, 512, &MPIR_Allgatherv_Rec_Doubling_MV2},
-                    {512, -1, &MPIR_Allgatherv_Ring_MV2},
-                },
-            },
-            {
-                32,
-                2,
-                {
-                    {0, 512, &MPIR_Allgatherv_Rec_Doubling_MV2},
-                    {512, -1, &MPIR_Allgatherv_Ring_MV2},
-                },
-            },
-            {
-                64,
-                2,
-                {
-                    {0, 256, &MPIR_Allgatherv_Rec_Doubling_MV2},
-                    {256, -1, &MPIR_Allgatherv_Ring_MV2},
-                },
-            },
-            {
-                128,
-                2,
-                {
-                    {0, 256, &MPIR_Allgatherv_Rec_Doubling_MV2},
-                    {256, -1, &MPIR_Allgatherv_Ring_MV2},
-                },
-            },
-            {
-                256,
-                2,
-                {
-                    {0, 256, &MPIR_Allgatherv_Rec_Doubling_MV2},
-                    {256, -1, &MPIR_Allgatherv_Ring_MV2},
-                },
-            },
-            {
-                512,
-                2,
-                {
-                    {0, 256, &MPIR_Allgatherv_Rec_Doubling_MV2},
-                    {256, -1, &MPIR_Allgatherv_Ring_MV2},
-                },
-            },
-
-        }; 
-        memcpy(mv2_allgatherv_thresholds_table, mv2_tmp_allgatherv_thresholds_table,
-                  mv2_size_allgatherv_tuning_table * sizeof (mv2_allgatherv_tuning_table));
-}
-
-
-
-
-
-
-
 int smpi_coll_tuned_allgatherv_mvapich2(void *sendbuf, int sendcount, MPI_Datatype sendtype,
                         void *recvbuf, int *recvcounts, int *displs,
                         MPI_Datatype recvtype, MPI_Comm  comm )
@@ -905,147 +334,6 @@ int smpi_coll_tuned_allgatherv_mvapich2(void *sendbuf, int sendcount, MPI_Dataty
 }
 
 
-static void init_mv2_allreduce_tables_stampede(){
-mv2_size_allreduce_tuning_table = 8;
-      mv2_allreduce_thresholds_table = malloc(mv2_size_allreduce_tuning_table *
-                                                  sizeof (mv2_allreduce_tuning_table));
-      mv2_allreduce_tuning_table mv2_tmp_allreduce_thresholds_table[] = {
-       {
-         16,
-         0,
-         {1, 0},
-         2,
-         {
-           {0, 1024, &MPIR_Allreduce_pt2pt_rd_MV2},
-           {1024, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
-         },
-         2,
-         {
-           {0, 1024, &MPIR_Allreduce_reduce_shmem_MV2},
-           {1024, -1, &MPIR_Allreduce_reduce_p2p_MV2},
-         },
-       },
-       {
-         32,
-         0,
-         {1, 1, 0},
-         3,
-         {
-           {0, 1024, &MPIR_Allreduce_pt2pt_rd_MV2},
-           {1024, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
-           {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
-         },
-         2,
-         {
-           {0, 1024, &MPIR_Allreduce_reduce_shmem_MV2},
-           {1024, 16384, &MPIR_Allreduce_reduce_p2p_MV2},
-         },
-       },
-       {
-         64,
-         0,
-         {1, 1, 0},
-         3,
-         {
-           {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
-           {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
-           {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
-         },
-         2,
-         {
-           {0, 512, &MPIR_Allreduce_reduce_shmem_MV2},
-           {512, 16384, &MPIR_Allreduce_reduce_p2p_MV2},
-         },
-       },
-       {
-         128,
-         0,
-         {1, 1, 0},
-         3,
-         {
-           {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
-           {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
-           {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
-         },
-         2,
-         {
-           {0, 512, &MPIR_Allreduce_reduce_shmem_MV2},
-           {512, 16384, &MPIR_Allreduce_reduce_p2p_MV2},
-         },
-       },
-       {
-         256,
-         0,
-         {1, 1, 0},
-         3,
-         {
-           {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
-           {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
-           {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
-         },
-         2,
-         {
-           {0, 512, &MPIR_Allreduce_reduce_shmem_MV2},
-           {512, -1, &MPIR_Allreduce_reduce_p2p_MV2},
-         },
-       },
-       {
-         512,
-         0,
-         {1, 1, 0},
-         3,
-         {
-           {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
-           {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
-           {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
-         },
-         2,
-         {
-           {0, 512, &MPIR_Allreduce_reduce_shmem_MV2},
-           {512, 16384, &MPIR_Allreduce_reduce_p2p_MV2},
-         },
-       },
-       {
-         1024,
-         0,
-         {1, 1, 1, 0},
-         4,
-         {
-           {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
-           {512, 8192, &MPIR_Allreduce_pt2pt_rd_MV2},
-           {8192, 65536, &MPIR_Allreduce_pt2pt_rs_MV2},
-           {65536, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
-         },
-         2,
-         {
-           {0, 512, &MPIR_Allreduce_reduce_shmem_MV2},
-           {512, -1, &MPIR_Allreduce_reduce_p2p_MV2},
-         },
-       },
-       {
-         2048,
-         0,
-         {1, 1, 1, 0},
-         4,
-         {
-           {0, 64, &MPIR_Allreduce_pt2pt_rd_MV2},
-           {64, 512, &MPIR_Allreduce_reduce_p2p_MV2},
-           {512, 4096, &MPIR_Allreduce_mcst_reduce_two_level_helper_MV2},
-           {4096, 16384, &MPIR_Allreduce_pt2pt_rs_MV2},
-           {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
-         },
-         2,
-         {
-           {0, 512, &MPIR_Allreduce_reduce_shmem_MV2},
-           {512, -1, &MPIR_Allreduce_reduce_p2p_MV2},
-         },
-       },
-      }; 
-      memcpy(mv2_allreduce_thresholds_table, mv2_tmp_allreduce_thresholds_table,
-                 mv2_size_allreduce_tuning_table * sizeof (mv2_allreduce_tuning_table));
-}
-
 
 int smpi_coll_tuned_allreduce_mvapich2(void *sendbuf,
                        void *recvbuf,
@@ -1179,7 +467,7 @@ if (sbuf == MPI_IN_PLACE) {
                                                         rbuf, rcounts, rdisps,rdtype,
                                                         comm);
  } else     /* For starters, just keep the original algorithm. */
-    return smpi_coll_tuned_alltoallv_pair(sbuf, scounts, sdisps, sdtype, 
+    return smpi_coll_tuned_alltoallv_ring(sbuf, scounts, sdisps, sdtype, 
                                                         rbuf, rcounts, rdisps,rdtype,
                                                         comm);
 }
@@ -1191,215 +479,6 @@ int smpi_coll_tuned_barrier_mvapich2(MPI_Comm  comm)
 }
 
 
-/*
-static void init_mv2_bcast_tables_stampede(){
- //Stampede,
-        mv2_size_bcast_tuning_table=8;
-        mv2_bcast_thresholds_table = malloc(mv2_size_bcast_tuning_table *
-                                                 sizeof (mv2_bcast_tuning_table));
-
-       mv2_bcast_tuning_table mv2_tmp_bcast_thresholds_table[]={
-         {
-            16,
-            8192, 4, 4,
-            {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
-            11,
-            {
-              {0, 8, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
-              {8, 16, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
-              {16, 1024, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
-              {1024, 8192, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
-              {8192, 16384, &MPIR_Bcast_binomial_MV2, -1},
-              {16384, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
-              {32768, 65536, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
-              {65536, 131072, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1},
-              {131072, 262144, &MPIR_Bcast_scatter_ring_allgather_MV2, -1},
-              {262144, 524288, &MPIR_Bcast_scatter_doubling_allgather_MV2, -1},
-              {524288, -1, &MPIR_Bcast_scatter_ring_allgather_MV2, -1}
-            },
-            11,
-            {
-              {0, 8, &MPIR_Shmem_Bcast_MV2, 2},
-              {8, 16, &MPIR_Shmem_Bcast_MV2, 4},
-              {16, 1024, &MPIR_Shmem_Bcast_MV2, 2},
-              {1024, 8192, &MPIR_Shmem_Bcast_MV2, 4},
-              {8192, 16384, &MPIR_Shmem_Bcast_MV2, -1},
-              {16384, 32768, &MPIR_Shmem_Bcast_MV2, 4},
-              {32768, 65536, &MPIR_Shmem_Bcast_MV2, 2},
-              {65536, 131072, &MPIR_Shmem_Bcast_MV2, -1},
-              {131072, 262144, &MPIR_Shmem_Bcast_MV2, -1},
-              {262144, 524288, &MPIR_Shmem_Bcast_MV2, -1},
-              {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}
-            }
-         },
-         {
-            32,
-            8192, 4, 4,
-            {1, 1, 1, 1, 1, 1, 1, 1},
-            8,
-            {
-              {0, 128, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
-              {128, 256, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
-              {256, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
-              {32768, 65536, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
-              {65536, 131072, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
-              {131072, 262144, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
-              {262144, 524288, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
-              {524288, -1, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8}
-            },
-            8,
-            {
-              {0, 128, &MPIR_Shmem_Bcast_MV2, 2},
-              {128, 256, &MPIR_Shmem_Bcast_MV2, 4},
-              {256, 32768, &MPIR_Shmem_Bcast_MV2, 2},
-              {32768, 65536, &MPIR_Shmem_Bcast_MV2, 4},
-              {65536, 131072, &MPIR_Shmem_Bcast_MV2, 2},
-              {131072, 262144, &MPIR_Shmem_Bcast_MV2, 8},
-              {262144, 524288, &MPIR_Shmem_Bcast_MV2, 2},
-              {524288, -1, &MPIR_Shmem_Bcast_MV2, 8}
-            }
-         },
-         {
-            64,
-            8192, 4, 4,
-            {1, 1, 1, 1, 1, 1, 1, 1, 1},
-            9,
-            {
-              {0, 2, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
-              {2, 4, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
-              {4, 16, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
-              {16, 32, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
-              {32, 128, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
-              {128, 256, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
-              {256, 4096, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
-              {4096, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
-              {32768, -1, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2}
-            },
-            9,
-            {
-              {0, 2, &MPIR_Shmem_Bcast_MV2, 4},
-              {2, 4, &MPIR_Shmem_Bcast_MV2, 8},
-              {4, 16, &MPIR_Shmem_Bcast_MV2, 4},
-              {16, 32, &MPIR_Shmem_Bcast_MV2, 8},
-              {32, 128, &MPIR_Shmem_Bcast_MV2, 4},
-              {128, 256, &MPIR_Shmem_Bcast_MV2, 8},
-              {256, 4096, &MPIR_Shmem_Bcast_MV2, 4},
-              {4096, 32768, &MPIR_Shmem_Bcast_MV2, 8},
-              {32768, -1, &MPIR_Shmem_Bcast_MV2, 2}
-            }
-         },
-         {
-            128,
-            8192, 4, 4,
-            {1, 1, 1, 0},
-            4,
-            {
-              {0, 8192, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
-              {8192, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
-              {16384, 524288, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
-              {524288, -1, &MPIR_Bcast_scatter_ring_allgather_MV2, -1}
-            },
-            4,
-            {
-              {0, 8192, &MPIR_Shmem_Bcast_MV2, 8},
-              {8192, 16384, &MPIR_Shmem_Bcast_MV2, 4},
-              {16384, 524288, &MPIR_Shmem_Bcast_MV2, 2},
-              {524288, -1, NULL, -1}
-            }
-         },
-         {
-            256,
-            8192, 4, 4,
-            {1, 1, 1, 1, 1},
-            5,
-            {
-              {0, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
-              {16384, 131072, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
-              {131072, 262144, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1},
-              {262144, 524288, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
-              {524288, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}
-            },
-            5,
-            {
-              {0, 16384, &MPIR_Shmem_Bcast_MV2, 4},
-              {16384, 131072, &MPIR_Shmem_Bcast_MV2, 2},
-              {131072, 262144, &MPIR_Shmem_Bcast_MV2, -1},
-              {262144, 524288, &MPIR_Shmem_Bcast_MV2, 2},
-              {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}
-            }
-         },
-         {
-            512,
-            8192, 4, 4,
-            {1, 1, 1, 1, 1},
-            5,
-            {
-              {0, 4096, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
-              {4096, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
-              {16384, 131072, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
-              {131072, 262144, &MPIR_Pipelined_Bcast_MV2, -1},
-              {262144, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}
-            },
-            5,
-            {
-              {0, 4096, &MPIR_Shmem_Bcast_MV2, 8},
-              {4096, 16384, &MPIR_Shmem_Bcast_MV2, 4},
-              {16384, 131072, &MPIR_Shmem_Bcast_MV2, 2},
-              {131072, 262144, &MPIR_Shmem_Bcast_MV2, -1},
-              {262144, -1, &MPIR_Shmem_Bcast_MV2, -1}
-            }
-         },
-         {
-            1024,
-            8192, 4, 4,
-            {1, 1, 1, 1, 1},
-            5,
-            {
-              {0, 8192, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
-              {8192, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
-              {16384, 65536, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
-              {65536, 524288, &MPIR_Pipelined_Bcast_MV2, -1},
-              {524288, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}
-            },
-            5,
-            {
-              {0, 8192, &MPIR_Shmem_Bcast_MV2, 8},
-              {8192, 16384, &MPIR_Shmem_Bcast_MV2, 4},
-              {16384, 65536, &MPIR_Shmem_Bcast_MV2, 2},
-              {65536, 524288, &MPIR_Shmem_Bcast_MV2, -1},
-              {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}
-            }
-         },
-         {
-            2048,
-            8192, 4, 4,
-            {1, 1, 1, 1, 1, 1, 1},
-            7,
-            {
-              {0, 16, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
-              {16, 32, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
-              {32, 4096, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
-              {4096, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
-              {16384, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
-              {32768, 524288, &MPIR_Pipelined_Bcast_MV2, -1},
-              {524288, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}
-            },
-            7,
-            {
-              {0, 16, &MPIR_Shmem_Bcast_MV2, 8},
-              {16, 32, &MPIR_Shmem_Bcast_MV2, 4},
-              {32, 4096, &MPIR_Shmem_Bcast_MV2, 8},
-              {4096, 16384, &MPIR_Shmem_Bcast_MV2, 4},
-              {16384, 32768, &MPIR_Shmem_Bcast_MV2, 2},
-              {32768, 524288, &MPIR_Shmem_Bcast_MV2, -1},
-              {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}
-            }
-         }
-       };
-
-        memcpy(mv2_bcast_thresholds_table, mv2_tmp_bcast_thresholds_table,
-                    mv2_size_bcast_tuning_table * sizeof (mv2_bcast_tuning_table));
-}*/
 
 
 int smpi_coll_tuned_bcast_mvapich2(void *buffer,
@@ -1413,199 +492,6 @@ int smpi_coll_tuned_bcast_mvapich2(void *buffer,
 
 }
 
-static void init_mv2_reduce_tables_stampede(){
- /*Stampede*/
-        mv2_size_reduce_tuning_table = 8;
-        mv2_reduce_thresholds_table = malloc(mv2_size_reduce_tuning_table *
-                                                  sizeof (mv2_reduce_tuning_table));
-        mv2_reduce_tuning_table mv2_tmp_reduce_thresholds_table[] = {
-         {
-           16,
-           4,
-           4,
-           {1, 0, 0},
-           3,
-           {
-             {0, 262144, &MPIR_Reduce_inter_knomial_wrapper_MV2},
-             {262144, 1048576, &MPIR_Reduce_binomial_MV2},
-             {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
-           },
-           2,
-           {
-             {0, 65536, &MPIR_Reduce_shmem_MV2},
-             {65536,-1,  &MPIR_Reduce_binomial_MV2},
-           },
-         },
-         {
-           32,
-           4,
-           4,
-           {1, 1, 1, 1, 0, 0, 0},
-           7,
-           {
-             {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
-             {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
-             {16384, 32768, &MPIR_Reduce_inter_knomial_wrapper_MV2},
-             {32768, 65536, &MPIR_Reduce_binomial_MV2},
-             {65536, 262144, &MPIR_Reduce_inter_knomial_wrapper_MV2},
-             {262144, 1048576, &MPIR_Reduce_binomial_MV2},
-             {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
-           },
-           6,
-           {
-             {0, 8192, &MPIR_Reduce_shmem_MV2},
-             {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
-             {16384, 32768, &MPIR_Reduce_shmem_MV2},
-             {32768, 65536, &MPIR_Reduce_shmem_MV2},
-             {65536, 262144, &MPIR_Reduce_shmem_MV2},
-             {262144,-1,  &MPIR_Reduce_binomial_MV2},
-           },
-         },
-         {
-           64,
-           4,
-           4,
-           {1, 1, 1, 1, 0},
-           5,
-           {
-             {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
-             {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
-             {16384, 65536, &MPIR_Reduce_binomial_MV2},
-             {65536, 262144, &MPIR_Reduce_inter_knomial_wrapper_MV2},
-             {262144, -1, &MPIR_Reduce_redscat_gather_MV2},
-           },
-           5,
-           {
-             {0, 8192, &MPIR_Reduce_shmem_MV2},
-             {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
-             {16384, 65536, &MPIR_Reduce_shmem_MV2},
-             {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
-             {262144, -1, &MPIR_Reduce_binomial_MV2},
-           },
-         },
-         {
-           128,
-           4,
-           4,
-           {1, 0, 1, 0, 1, 0},
-           6,
-           {
-             {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
-             {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
-             {16384, 65536, &MPIR_Reduce_binomial_MV2},
-             {65536, 262144, &MPIR_Reduce_inter_knomial_wrapper_MV2},
-             {262144, 1048576, &MPIR_Reduce_binomial_MV2},
-             {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
-           },
-           5,
-           {
-             {0, 8192, &MPIR_Reduce_shmem_MV2},
-             {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
-             {16384, 65536, &MPIR_Reduce_shmem_MV2},
-             {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
-             {262144, -1, &MPIR_Reduce_binomial_MV2},
-           },
-         },
-         {
-           256,
-           4,
-           4,
-           {1, 1, 1, 0, 1, 1, 0},
-           7,
-           {
-             {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
-             {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
-             {16384, 32768, &MPIR_Reduce_binomial_MV2},
-             {32768, 65536, &MPIR_Reduce_binomial_MV2},
-             {65536, 262144, &MPIR_Reduce_binomial_MV2},
-             {262144, 1048576, &MPIR_Reduce_binomial_MV2},
-             {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
-           },
-           6,
-           {
-             {0, 8192, &MPIR_Reduce_shmem_MV2},
-             {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
-             {16384, 32768, &MPIR_Reduce_shmem_MV2},
-             {32768, 65536, &MPIR_Reduce_shmem_MV2},
-             {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
-             {262144, -1, &MPIR_Reduce_binomial_MV2},
-           },
-         },
-         {
-           512,
-           4,
-           4,
-           {1, 0, 1, 1, 1, 0},
-           6,
-           {
-             {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
-             {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
-             {16384, 65536, &MPIR_Reduce_binomial_MV2},
-             {65536, 262144, &MPIR_Reduce_binomial_MV2},
-             {262144, 1048576, &MPIR_Reduce_binomial_MV2},
-             {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
-           },
-           5,
-           {
-             {0, 8192, &MPIR_Reduce_shmem_MV2},
-             {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
-             {16384, 65536, &MPIR_Reduce_shmem_MV2},
-             {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
-             {262144, -1, &MPIR_Reduce_binomial_MV2},
-           },
-         },
-         {
-           1024,
-           4,
-           4,
-           {1, 0, 1, 1, 1},
-           5,
-           {
-             {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
-             {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
-             {16384, 65536, &MPIR_Reduce_binomial_MV2},
-             {65536, 262144, &MPIR_Reduce_binomial_MV2},
-             {262144, -1, &MPIR_Reduce_binomial_MV2},
-           },
-           5,
-           {
-             {0, 8192, &MPIR_Reduce_shmem_MV2},
-             {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
-             {16384, 65536, &MPIR_Reduce_shmem_MV2},
-             {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
-             {262144, -1, &MPIR_Reduce_binomial_MV2},
-           },
-         },
-         {
-           2048,
-           4,
-           4,
-           {1, 0, 1, 1, 1,1},
-           6,
-           {
-             {0, 2048, &MPIR_Reduce_inter_knomial_wrapper_MV2},
-             {2048, 4096, &MPIR_Reduce_inter_knomial_wrapper_MV2},
-             {4096, 16384, &MPIR_Reduce_binomial_MV2},
-             {16384, 65536, &MPIR_Reduce_binomial_MV2},
-             {65536, 131072, &MPIR_Reduce_binomial_MV2},
-             {131072, -1, &MPIR_Reduce_binomial_MV2},
-           },
-           6,
-           {
-             {0, 2048, &MPIR_Reduce_shmem_MV2},
-             {2048, 4096, &MPIR_Reduce_shmem_MV2},
-             {4096, 16384, &MPIR_Reduce_shmem_MV2},
-             {16384, 65536, &MPIR_Reduce_intra_knomial_wrapper_MV2},
-             {65536, 131072, &MPIR_Reduce_binomial_MV2},
-             {131072, -1, &MPIR_Reduce_shmem_MV2},
-           },
-         },
-
-        }; 
-        memcpy(mv2_reduce_thresholds_table, mv2_tmp_reduce_thresholds_table,
-                   mv2_size_reduce_tuning_table * sizeof (mv2_reduce_tuning_table));
-}
-
 
 
 int smpi_coll_tuned_reduce_mvapich2( void *sendbuf,
@@ -1724,69 +610,6 @@ int smpi_coll_tuned_reduce_mvapich2( void *sendbuf,
 }
 
 
-
-static void init_mv2_reduce_scatter_tables_stampede(){
-        mv2_size_red_scat_tuning_table = 6;
-        mv2_red_scat_thresholds_table = malloc(mv2_size_red_scat_tuning_table *
-                                                  sizeof (mv2_red_scat_tuning_table));
-        mv2_red_scat_tuning_table mv2_tmp_red_scat_thresholds_table[] = {
-            {
-                16,
-                3,
-                {
-                    {0, 64, &MPIR_Reduce_Scatter_Basic_MV2},
-                    {64, 65536, &MPIR_Reduce_scatter_Rec_Halving_MV2},
-                    {65536, -1, &MPIR_Reduce_scatter_Pair_Wise_MV2},
-                },
-            },
-            {
-                32,
-                3,
-                {
-                    {0, 64, &MPIR_Reduce_Scatter_Basic_MV2},
-                    {64, 131072, &MPIR_Reduce_scatter_Rec_Halving_MV2},
-                    {131072, -1, &MPIR_Reduce_scatter_Pair_Wise_MV2},
-                },
-            },
-            {
-                64,
-                3,
-                {
-                    {0, 1024, &MPIR_Reduce_Scatter_Basic_MV2},
-                    {1024, 262144, &MPIR_Reduce_scatter_Rec_Halving_MV2},
-                    {262144, -1, &MPIR_Reduce_scatter_Pair_Wise_MV2},
-                },
-            },
-            {
-                128,
-                2,
-                {
-                    {0, 128, &MPIR_Reduce_Scatter_Basic_MV2},
-                    {128, -1, &MPIR_Reduce_scatter_Rec_Halving_MV2},
-                },
-            },
-            {
-                256,
-                2,
-                {
-                    {0, 128, &MPIR_Reduce_Scatter_Basic_MV2},
-                    {128, -1, &MPIR_Reduce_scatter_Rec_Halving_MV2},
-                },
-            },
-            {
-                512,
-                2,
-                {
-                    {0, 256, &MPIR_Reduce_Scatter_Basic_MV2},
-                    {256, -1, &MPIR_Reduce_scatter_Rec_Halving_MV2},
-                },
-            },
-
-        }; 
-        memcpy(mv2_red_scat_thresholds_table, mv2_tmp_red_scat_thresholds_table,
-                  mv2_size_red_scat_tuning_table * sizeof (mv2_red_scat_tuning_table));
-}
-
 int smpi_coll_tuned_reduce_scatter_mvapich2(void *sendbuf, void *recvbuf, int *recvcnts,
                                                        MPI_Datatype datatype, MPI_Op op,
                                                        MPI_Comm comm)
@@ -1797,7 +620,7 @@ int smpi_coll_tuned_reduce_scatter_mvapich2(void *sendbuf, void *recvbuf, int *r
     int range = 0;
     int range_threshold = 0;
        int is_commutative = 0;
-       int *disps = malloc(comm_size * sizeof (int));
+       int *disps = xbt_malloc(comm_size * sizeof (int));
 
     if(mv2_red_scat_thresholds_table==NULL)
       init_mv2_reduce_scatter_tables_stampede();
@@ -1836,9 +659,24 @@ int smpi_coll_tuned_reduce_scatter_mvapich2(void *sendbuf, void *recvbuf, int *r
                                           recvcnts, datatype,
                                           op, comm);
        } else {
-        mpi_errno = MPIR_Reduce_scatter_non_comm_MV2(sendbuf, recvbuf,
-                                                     recvcnts, datatype,
-                                                     op, comm);
+         int is_block_regular = 1;
+        for (i = 0; i < (comm_size - 1); ++i) {
+            if (recvcnts[i] != recvcnts[i+1]) {
+                is_block_regular = 0;
+                break;
+            }
+        }
+         int pof2 = 1;
+      while (pof2 < comm_size) pof2 <<= 1;
+        if (pof2 == comm_size && is_block_regular) {
+       /* noncommutative, pof2 size, and block regular */
+          mpi_errno = MPIR_Reduce_scatter_non_comm_MV2(sendbuf, recvbuf,
+                                                      recvcnts, datatype,
+                                                      op, comm);
+        }
+        mpi_errno =  smpi_coll_tuned_reduce_scatter_mpich_rdb(sendbuf, recvbuf,
+                                                             recvcnts, datatype,
+                                                             op, comm);
        }
 
     return mpi_errno;
@@ -1847,317 +685,6 @@ int smpi_coll_tuned_reduce_scatter_mvapich2(void *sendbuf, void *recvbuf, int *r
 
 
 
-static void init_mv2_scatter_tables_stampede(){
-{
-    int agg_table_sum = 0;
-    int i;
-    mv2_scatter_tuning_table **table_ptrs = NULL;
-     mv2_scatter_num_ppn_conf = 3;
-        mv2_scatter_thresholds_table
-         = malloc(sizeof(mv2_scatter_tuning_table *)
-                       * mv2_scatter_num_ppn_conf);
-        table_ptrs = malloc(sizeof(mv2_scatter_tuning_table *)
-                                 * mv2_scatter_num_ppn_conf);
-        mv2_size_scatter_tuning_table = malloc(sizeof(int) *
-                                                   mv2_scatter_num_ppn_conf);
-        mv2_scatter_table_ppn_conf 
-         = malloc(mv2_scatter_num_ppn_conf * sizeof(int));
-        mv2_scatter_table_ppn_conf[0] = 1;
-        mv2_size_scatter_tuning_table[0] = 6;
-        mv2_scatter_tuning_table mv2_tmp_scatter_thresholds_table_1ppn[] = {
-         {2,
-          1, 
-          {
-            {0, -1, &MPIR_Scatter_MV2_Binomial},
-          },
-          1,
-          {
-            {0, -1, &MPIR_Scatter_MV2_Binomial},
-          },
-         },
-
-         {4,
-          1, 
-          {
-            {0, -1, &MPIR_Scatter_MV2_Direct},
-          },
-          1,
-          {
-            {0, -1, &MPIR_Scatter_MV2_Direct},
-          },
-         },
-  
-         {8,
-          1, 
-          {
-            {0, -1, &MPIR_Scatter_MV2_Direct},
-          },
-          1,
-          {
-            {0, -1, &MPIR_Scatter_MV2_Direct},
-          },
-         },
-  
-         {16,
-          1, 
-          {
-            {0, -1, &MPIR_Scatter_MV2_Direct},
-          },
-          1,
-          {
-            {0, -1, &MPIR_Scatter_MV2_Direct},
-          },
-         },
-  
-         {32,
-          1, 
-          {
-            {0, -1, &MPIR_Scatter_MV2_Direct},
-          },
-          1,
-          {
-            {0, -1, &MPIR_Scatter_MV2_Direct},
-          },
-         },
-  
-         {64,
-          2, 
-          {
-            {0, 32, &MPIR_Scatter_MV2_Binomial},
-            {32, -1, &MPIR_Scatter_MV2_Direct},
-          },
-          1,
-          {
-            {0, -1, &MPIR_Scatter_MV2_Binomial},
-          },
-         },
-        };
-        table_ptrs[0] = mv2_tmp_scatter_thresholds_table_1ppn;
-        mv2_scatter_table_ppn_conf[1] = 2;
-        mv2_size_scatter_tuning_table[1] = 6;
-        mv2_scatter_tuning_table mv2_tmp_scatter_thresholds_table_2ppn[] = {
-         {4,
-          2, 
-          {
-            {0, 4096, &MPIR_Scatter_MV2_Binomial},
-            {4096, -1, &MPIR_Scatter_MV2_Direct},
-          },
-          1,
-          {
-            {0, -1, &MPIR_Scatter_MV2_Direct},
-          },
-         },
-  
-         {8,
-          2, 
-          {
-            {0, 512, &MPIR_Scatter_MV2_two_level_Direct},
-            {512, -1, &MPIR_Scatter_MV2_Direct},
-          },
-          1,
-          {
-            {0, -1, &MPIR_Scatter_MV2_Binomial},
-          },
-         },
-  
-         {16,
-          2, 
-          {
-            {0, 2048, &MPIR_Scatter_MV2_two_level_Direct},
-            {2048, -1, &MPIR_Scatter_MV2_Direct},
-          },
-          1,
-          {
-            {0, -1, &MPIR_Scatter_MV2_Binomial},
-          },
-         },
-  
-         {32,
-          2, 
-          {
-            {0, 2048, &MPIR_Scatter_MV2_two_level_Direct},
-            {2048, -1, &MPIR_Scatter_MV2_Direct},
-          },
-          1,
-          {
-            {0, -1, &MPIR_Scatter_MV2_Binomial},
-          },
-         },
-  
-         {64,
-          2, 
-          {
-            {0, 8192, &MPIR_Scatter_MV2_two_level_Direct},
-            {8192, -1, &MPIR_Scatter_MV2_Direct},
-          },
-          1,
-          {
-            {0, -1, &MPIR_Scatter_MV2_Binomial},
-          },
-         },
-  
-         {128,
-          4, 
-          {
-            {0, 16, &MPIR_Scatter_MV2_Binomial},
-            {16, 128, &MPIR_Scatter_MV2_two_level_Binomial},
-            {128, 16384, &MPIR_Scatter_MV2_two_level_Direct},
-            {16384, -1, &MPIR_Scatter_MV2_Direct},
-          },
-          1,
-          {
-            {0, 128, &MPIR_Scatter_MV2_Direct},
-            {128, -1, &MPIR_Scatter_MV2_Binomial},
-          },
-         },
-        };
-        table_ptrs[1] = mv2_tmp_scatter_thresholds_table_2ppn;
-        mv2_scatter_table_ppn_conf[2] = 16;
-        mv2_size_scatter_tuning_table[2] = 8;
-        mv2_scatter_tuning_table mv2_tmp_scatter_thresholds_table_16ppn[] = {
-         {
-           16,
-           2,
-           { 
-             {0, 256, &MPIR_Scatter_MV2_Binomial}, 
-             {256, -1, &MPIR_Scatter_MV2_Direct},
-           },
-           1, 
-           { 
-             { 0, -1, &MPIR_Scatter_MV2_Direct},
-           },
-         },
-
-         {
-           32,
-           2,
-           {
-             {0, 512, &MPIR_Scatter_MV2_Binomial}, 
-             {512, -1, &MPIR_Scatter_MV2_Direct},
-           },
-           1, 
-           { 
-             { 0, -1, &MPIR_Scatter_MV2_Direct},
-           },
-         },
-
-         {
-           64,
-           2,
-           {
-             {0, 1024, &MPIR_Scatter_MV2_two_level_Direct},
-             {1024, -1, &MPIR_Scatter_MV2_Direct},
-           },
-           1,
-           {
-             { 0, -1, &MPIR_Scatter_MV2_Direct},
-           },
-         },
-
-         {
-           128,
-           4,
-           {
-             {0, 16, &MPIR_Scatter_mcst_wrap_MV2},
-             {0, 16, &MPIR_Scatter_MV2_two_level_Direct},
-             {16, 2048, &MPIR_Scatter_MV2_two_level_Direct},
-             {2048, -1, &MPIR_Scatter_MV2_Direct},
-           },
-           1,
-           {
-             { 0, -1, &MPIR_Scatter_MV2_Direct},
-           },
-         },
-
-         {
-           256,
-           4,
-           {
-             {0, 16, &MPIR_Scatter_mcst_wrap_MV2},
-             {0, 16, &MPIR_Scatter_MV2_two_level_Direct},
-             {16, 2048, &MPIR_Scatter_MV2_two_level_Direct},
-             {2048, -1,  &MPIR_Scatter_MV2_Direct},
-           },
-           1,
-           {
-             { 0, -1, &MPIR_Scatter_MV2_Direct},
-           },
-         },
-
-         {
-           512,
-           4,
-           {
-             {0, 16, &MPIR_Scatter_mcst_wrap_MV2},
-             {16, 16, &MPIR_Scatter_MV2_two_level_Direct},
-             {16, 4096, &MPIR_Scatter_MV2_two_level_Direct},
-             {4096, -1, &MPIR_Scatter_MV2_Direct},
-           },
-           1,
-           {
-             { 0, -1, &MPIR_Scatter_MV2_Binomial},
-           }, 
-         },  
-         {
-           1024,
-           5,
-           {
-             {0, 16, &MPIR_Scatter_mcst_wrap_MV2},
-             {0, 16,  &MPIR_Scatter_MV2_Binomial},
-             {16, 32, &MPIR_Scatter_MV2_Binomial},
-             {32, 4096, &MPIR_Scatter_MV2_two_level_Direct},
-             {4096, -1, &MPIR_Scatter_MV2_Direct},
-           },
-           1,
-           {
-             { 0, -1, &MPIR_Scatter_MV2_Binomial},
-           },  
-         },  
-         {
-           2048,
-           7,
-           {
-             {0, 16, &MPIR_Scatter_mcst_wrap_MV2},
-             {0, 16,  &MPIR_Scatter_MV2_two_level_Binomial},
-             {16, 128, &MPIR_Scatter_MV2_two_level_Binomial},
-             {128, 1024, &MPIR_Scatter_MV2_two_level_Direct},
-             {1024, 16384, &MPIR_Scatter_MV2_two_level_Direct},
-             {16384, 65536, &MPIR_Scatter_MV2_Direct},
-             {65536, -1, &MPIR_Scatter_MV2_two_level_Direct},
-           },
-           6,
-           {
-             {0, 16, &MPIR_Scatter_MV2_Binomial},
-             {16, 128, &MPIR_Scatter_MV2_Binomial},
-             {128, 1024, &MPIR_Scatter_MV2_Binomial},
-             {1024, 16384, &MPIR_Scatter_MV2_Direct},
-             {16384, 65536, &MPIR_Scatter_MV2_Direct},
-             {65536, -1, &MPIR_Scatter_MV2_Direct},
-           },
-         }, 
-        };
-        table_ptrs[2] = mv2_tmp_scatter_thresholds_table_16ppn;
-        agg_table_sum = 0;
-        for (i = 0; i < mv2_scatter_num_ppn_conf; i++) {
-         agg_table_sum += mv2_size_scatter_tuning_table[i];
-        }
-        mv2_scatter_thresholds_table[0] =
-         malloc(agg_table_sum * sizeof (mv2_scatter_tuning_table));
-        memcpy(mv2_scatter_thresholds_table[0], table_ptrs[0],
-                   (sizeof(mv2_scatter_tuning_table)
-                     * mv2_size_scatter_tuning_table[0]));
-        for (i = 1; i < mv2_scatter_num_ppn_conf; i++) {
-         mv2_scatter_thresholds_table[i] =
-            mv2_scatter_thresholds_table[i - 1]
-            + mv2_size_scatter_tuning_table[i - 1];
-         memcpy(mv2_scatter_thresholds_table[i], table_ptrs[i],
-                      (sizeof(mv2_scatter_tuning_table)
-                       * mv2_size_scatter_tuning_table[i]));
-        }
-        free(table_ptrs);
-   }
-}
-
 int smpi_coll_tuned_scatter_mvapich2(void *sendbuf,
                            int sendcnt,
                            MPI_Datatype sendtype,
index abfc786..58104ec 100644 (file)
@@ -1,14 +1,14 @@
-/* selector for collective algorithms based on mvapich decision logic */
+/* selector for collective algorithms based on mvapich decision logic, with calibration from Stampede cluster at TACC*/
 
 /* Copyright (c) 2009-2010, 2013-2014. The SimGrid Team.
  * All rights reserved.                                                     */
 
 /* This is the tuning used by MVAPICH for Stampede platform based on (MV2_ARCH_INTEL_XEON_E5_2680_16, MV2_HCA_MLX_CX_FDR) */
 
-/* Indicates number of processes per node */
-extern int *mv2_alltoall_table_ppn_conf;
-/* Indicates total number of configurations */
-extern int mv2_alltoall_num_ppn_conf;
+
+
+/************ Alltoall variables and initializers                        */
+
 #define MV2_MAX_NB_THRESHOLDS  32
 typedef struct {
     int min;
@@ -25,24 +25,16 @@ typedef struct {
     mv2_alltoall_tuning_element in_place_algo_table[MV2_MAX_NB_THRESHOLDS];
 } mv2_alltoall_tuning_table;
 
-extern int *mv2_size_alltoall_tuning_table;
-extern mv2_alltoall_tuning_table **mv2_alltoall_thresholds_table;
-extern int mv2_use_old_alltoall;
+int (*MV2_Alltoall_function) (void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm_ptr)=NULL;
 
-
-int (*MV2_Alltoall_function) (void *sendbuf, int sendcount, MPI_Datatype sendtype,
-                              void *recvbuf, int recvcount, MPI_Datatype recvtype,
-                              MPI_Comm comm_ptr)=NULL;
-                              
-                              
+/* Indicates number of processes per node */
 int *mv2_alltoall_table_ppn_conf = NULL;
+/* Indicates total number of configurations */
 int mv2_alltoall_num_ppn_conf = 1;
 int *mv2_size_alltoall_tuning_table = NULL;
 mv2_alltoall_tuning_table **mv2_alltoall_thresholds_table = NULL;
 
 
-
-
 #define MPIR_Alltoall_bruck_MV2 smpi_coll_tuned_alltoall_bruck
 #define MPIR_Alltoall_RD_MV2 smpi_coll_tuned_alltoall_rdb
 #define MPIR_Alltoall_Scatter_dest_MV2 smpi_coll_tuned_alltoall_ring
@@ -50,11 +42,251 @@ mv2_alltoall_tuning_table **mv2_alltoall_thresholds_table = NULL;
 #define MPIR_Alltoall_inplace_MV2 smpi_coll_tuned_alltoall_ring 
 
 
+static void init_mv2_alltoall_tables_stampede(){
+int i;
+  int agg_table_sum = 0;
+  mv2_alltoall_tuning_table **table_ptrs = NULL;
+  mv2_alltoall_num_ppn_conf = 3;
+  mv2_alltoall_thresholds_table = xbt_malloc(sizeof(mv2_alltoall_tuning_table *)
+                                  * mv2_alltoall_num_ppn_conf);
+  table_ptrs = xbt_malloc(sizeof(mv2_alltoall_tuning_table *)
+              * mv2_alltoall_num_ppn_conf);
+  mv2_size_alltoall_tuning_table = xbt_malloc(sizeof(int) *
+                                   mv2_alltoall_num_ppn_conf);
+  mv2_alltoall_table_ppn_conf = xbt_malloc(mv2_alltoall_num_ppn_conf * sizeof(int));
+  mv2_alltoall_table_ppn_conf[0] = 1;
+  mv2_size_alltoall_tuning_table[0] = 6;
+  mv2_alltoall_tuning_table mv2_tmp_alltoall_thresholds_table_1ppn[] = {
+    {2,
+     1, 
+     {{0, -1, &MPIR_Alltoall_pairwise_MV2},
+     },
+  
+     {{0, -1, &MPIR_Alltoall_inplace_MV2},
+     },
+    },
+  
+    {4,
+     2,
+     {{0, 262144, &MPIR_Alltoall_Scatter_dest_MV2},
+      {262144, -1, &MPIR_Alltoall_pairwise_MV2},
+     },
+                
+     {{0, -1, &MPIR_Alltoall_inplace_MV2},
+     },
+    },
+  
+    {8,
+     2,
+     {{0, 8, &MPIR_Alltoall_RD_MV2},
+      {8, -1, &MPIR_Alltoall_Scatter_dest_MV2},
+     },
+  
+     {{0, -1, &MPIR_Alltoall_inplace_MV2},
+     },
+    },
+  
+    {16,
+     3,
+     {{0, 64, &MPIR_Alltoall_RD_MV2},
+      {64, 512, &MPIR_Alltoall_bruck_MV2},
+      {512, -1, &MPIR_Alltoall_Scatter_dest_MV2},
+     },
+  
+     {{0,-1, &MPIR_Alltoall_inplace_MV2},
+     },
+    },
+  
+    {32,
+     3,
+     {{0, 32, &MPIR_Alltoall_RD_MV2},
+      {32, 2048, &MPIR_Alltoall_bruck_MV2},
+      {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
+     },
+  
+     {{0, -1, &MPIR_Alltoall_inplace_MV2},
+     },
+    },
+  
+    {64,
+     3,
+     {{0, 8, &MPIR_Alltoall_RD_MV2},
+      {8, 1024, &MPIR_Alltoall_bruck_MV2},
+      {1024, -1, &MPIR_Alltoall_Scatter_dest_MV2},
+     },
+  
+     {{0, -1, &MPIR_Alltoall_inplace_MV2},
+     },
+    },
+        };
+        table_ptrs[0] = mv2_tmp_alltoall_thresholds_table_1ppn;
+        mv2_alltoall_table_ppn_conf[1] = 2;
+        mv2_size_alltoall_tuning_table[1] = 6;
+        mv2_alltoall_tuning_table mv2_tmp_alltoall_thresholds_table_2ppn[] = {
+    {4,
+     2,
+     {{0, 32, &MPIR_Alltoall_RD_MV2},
+      {32, -1, &MPIR_Alltoall_Scatter_dest_MV2},
+     },
+                
+     {{0, -1, &MPIR_Alltoall_inplace_MV2},
+     },
+    },
+  
+    {8,
+     2,
+     {{0, 64, &MPIR_Alltoall_RD_MV2},
+      {64, -1, &MPIR_Alltoall_Scatter_dest_MV2},
+     },
+                
+     {{0, -1, &MPIR_Alltoall_inplace_MV2},
+     },
+    },
+  
+    {16,
+     3,
+     {{0, 64, &MPIR_Alltoall_RD_MV2},
+      {64, 2048, &MPIR_Alltoall_bruck_MV2},
+      {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
+     },
+  
+     {{0,-1, &MPIR_Alltoall_inplace_MV2},
+     },
+    },
+  
+    {32,
+     3,
+     {{0, 16, &MPIR_Alltoall_RD_MV2},
+      {16, 2048, &MPIR_Alltoall_bruck_MV2},
+      {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
+     },
+  
+     {{0, -1, &MPIR_Alltoall_inplace_MV2},
+     },
+    },
+  
+    {64,
+     3,
+     {{0, 8, &MPIR_Alltoall_RD_MV2},
+      {8, 1024, &MPIR_Alltoall_bruck_MV2},
+      {1024, -1, &MPIR_Alltoall_Scatter_dest_MV2},
+     },
+  
+     {{0, -1, &MPIR_Alltoall_inplace_MV2},
+     },
+    },
+
+    {128,
+     3,
+     {{0, 4, &MPIR_Alltoall_RD_MV2},
+      {4, 2048, &MPIR_Alltoall_bruck_MV2},
+      {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
+     },
+  
+     {{0, -1, &MPIR_Alltoall_inplace_MV2},
+     },
+    },
+        };
+        table_ptrs[1] = mv2_tmp_alltoall_thresholds_table_2ppn;
+        mv2_alltoall_table_ppn_conf[2] = 16;
+        mv2_size_alltoall_tuning_table[2] = 7;
+        mv2_alltoall_tuning_table mv2_tmp_alltoall_thresholds_table_16ppn[] = {
+    {16,
+     2, 
+     {{0, 2048, &MPIR_Alltoall_bruck_MV2},
+      {2048, -1,  &MPIR_Alltoall_Scatter_dest_MV2},
+     },
+  
+     {{32768, -1, &MPIR_Alltoall_inplace_MV2},
+     },
+    },
+  
+    {32,
+     2,
+     {{0, 2048, &MPIR_Alltoall_bruck_MV2},
+      {2048, -1, &MPIR_Alltoall_Scatter_dest_MV2},
+     },
+                
+     {{16384, -1, &MPIR_Alltoall_inplace_MV2},
+     },
+    },
+  
+    {64,
+     3,
+     {{0, 2048, &MPIR_Alltoall_bruck_MV2},
+      {2048, 16384, &MPIR_Alltoall_Scatter_dest_MV2},
+      {16384, -1, &MPIR_Alltoall_pairwise_MV2},
+     },
+  
+     {{32768, 131072, &MPIR_Alltoall_inplace_MV2},
+     },
+    },
+  
+    {128,
+     2,
+     {{0, 2048, &MPIR_Alltoall_bruck_MV2},
+      {2048, -1, &MPIR_Alltoall_pairwise_MV2},
+     },
+  
+     {{16384,65536, &MPIR_Alltoall_inplace_MV2},
+     },
+    },
+  
+    {256,
+     2,
+     {{0, 1024, &MPIR_Alltoall_bruck_MV2},
+      {1024, -1, &MPIR_Alltoall_pairwise_MV2},
+     },
+  
+     {{16384, 65536, &MPIR_Alltoall_inplace_MV2},
+     },
+    },
+  
+    {512,
+     2,
+     {{0, 1024, &MPIR_Alltoall_bruck_MV2},
+      {1024, -1, &MPIR_Alltoall_pairwise_MV2},
+     },
+  
+     {{16384, 65536, &MPIR_Alltoall_inplace_MV2},
+     },
+    },
+    {1024,
+     2,
+     {{0, 1024, &MPIR_Alltoall_bruck_MV2},
+      {1024, -1, &MPIR_Alltoall_pairwise_MV2},
+     },
+  
+     {{16384, 65536, &MPIR_Alltoall_inplace_MV2},
+     },
+    },
+  
+        };
+  table_ptrs[2] = mv2_tmp_alltoall_thresholds_table_16ppn;
+        agg_table_sum = 0;
+        for (i = 0; i < mv2_alltoall_num_ppn_conf; i++) {
+    agg_table_sum += mv2_size_alltoall_tuning_table[i];
+        }
+        mv2_alltoall_thresholds_table[0] =
+    xbt_malloc(agg_table_sum * sizeof (mv2_alltoall_tuning_table));
+        memcpy(mv2_alltoall_thresholds_table[0], table_ptrs[0],
+                    (sizeof(mv2_alltoall_tuning_table)
+                     * mv2_size_alltoall_tuning_table[0]));
+        for (i = 1; i < mv2_alltoall_num_ppn_conf; i++) {
+    mv2_alltoall_thresholds_table[i] =
+            mv2_alltoall_thresholds_table[i - 1]
+            + mv2_size_alltoall_tuning_table[i - 1];
+    memcpy(mv2_alltoall_thresholds_table[i], table_ptrs[i],
+                      (sizeof(mv2_alltoall_tuning_table)
+                       * mv2_size_alltoall_tuning_table[i]));
+        }
+        xbt_free(table_ptrs);
+        
+        
+}
+
 
-/* Indicates number of processes per node */
-extern int *mv2_allgather_table_ppn_conf;
-/* Indicates total number of configurations */
-extern int mv2_allgather_num_ppn_conf;
+/************ Allgather variables and initializers                        */
 
 typedef struct {
     int min;
@@ -74,10 +306,6 @@ typedef struct {
     mv2_allgather_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
 } mv2_allgather_tuning_table;
 
-extern int *mv2_size_allgather_tuning_table;
-extern mv2_allgather_tuning_table **mv2_allgather_thresholds_table;
-extern int mv2_use_old_allgather;
-
 int (*MV2_Allgather_function)(void *sendbuf,
                              int sendcount,
                              MPI_Datatype sendtype,
@@ -96,6 +324,225 @@ mv2_allgather_tuning_table **mv2_allgather_thresholds_table = NULL;
 #define MPIR_Allgather_Ring_MV2 smpi_coll_tuned_allgather_ring
 
 
+static void init_mv2_allgather_tables_stampede(){
+int i;
+  int agg_table_sum = 0;
+mv2_allgather_tuning_table **table_ptrs = NULL;
+ mv2_allgather_num_ppn_conf = 3;
+        mv2_allgather_thresholds_table
+            = xbt_malloc(sizeof(mv2_allgather_tuning_table *)
+                  * mv2_allgather_num_ppn_conf);
+        table_ptrs = xbt_malloc(sizeof(mv2_allgather_tuning_table *)
+                                 * mv2_allgather_num_ppn_conf);
+        mv2_size_allgather_tuning_table = xbt_malloc(sizeof(int) *
+                                                      mv2_allgather_num_ppn_conf);
+        mv2_allgather_table_ppn_conf 
+            = xbt_malloc(mv2_allgather_num_ppn_conf * sizeof(int));
+        mv2_allgather_table_ppn_conf[0] = 1;
+        mv2_size_allgather_tuning_table[0] = 6;
+        mv2_allgather_tuning_table mv2_tmp_allgather_thresholds_table_1ppn[] = {
+            {
+                2,
+                {0},
+                1,
+                {
+                    {0, -1, &MPIR_Allgather_Ring_MV2},
+                },
+            },
+            {
+                4,
+                {0,0},
+                2,
+                {
+                    {0, 262144, &MPIR_Allgather_RD_MV2},
+                    {262144, -1, &MPIR_Allgather_Ring_MV2},
+                },
+            },
+            {
+                8,
+                {0,0},
+                2,
+                {
+                    {0, 131072, &MPIR_Allgather_RD_MV2},
+                    {131072, -1, &MPIR_Allgather_Ring_MV2},
+                },
+            },
+            {
+                16,
+                {0,0},
+                2,
+                {
+                    {0, 131072, &MPIR_Allgather_RD_MV2},
+                    {131072, -1, &MPIR_Allgather_Ring_MV2},
+                },
+            },
+            {
+                32,
+                {0,0},
+                2,
+                {
+                    {0, 65536, &MPIR_Allgather_RD_MV2},
+                    {65536, -1, &MPIR_Allgather_Ring_MV2},
+                },
+            },
+            {
+                64,
+                {0,0},
+                2,
+                {
+                    {0, 32768, &MPIR_Allgather_RD_MV2},
+                    {32768, -1, &MPIR_Allgather_Ring_MV2},
+                },
+            },
+        };
+        table_ptrs[0] = mv2_tmp_allgather_thresholds_table_1ppn;
+        mv2_allgather_table_ppn_conf[1] = 2;
+        mv2_size_allgather_tuning_table[1] = 6;
+        mv2_allgather_tuning_table mv2_tmp_allgather_thresholds_table_2ppn[] = {
+            {
+                4,
+                {0,0},
+                2,
+                {
+                    {0, 524288, &MPIR_Allgather_RD_MV2},
+                    {524288, -1, &MPIR_Allgather_Ring_MV2},
+                },
+            },
+            {
+                8,
+                {0,1,0},
+                2,
+                {
+                    {0, 32768, &MPIR_Allgather_RD_MV2},
+                    {32768, 524288, &MPIR_Allgather_Ring_MV2},
+                    {524288, -1, &MPIR_Allgather_Ring_MV2},
+                },
+            },
+            {
+                16,
+                {0,1,0},
+                2,
+                {
+                    {0, 16384, &MPIR_Allgather_RD_MV2},
+                    {16384, 524288, &MPIR_Allgather_Ring_MV2},
+                    {524288, -1, &MPIR_Allgather_Ring_MV2},
+                },
+            },
+            {
+                32,
+                {1,1,0},
+                2,
+                {
+                    {0, 65536, &MPIR_Allgather_RD_MV2},
+                    {65536, 524288, &MPIR_Allgather_Ring_MV2},
+                    {524288, -1, &MPIR_Allgather_Ring_MV2},
+                },
+            },
+            {
+                64,
+                {1,1,0},
+                2,
+                {
+                    {0, 32768, &MPIR_Allgather_RD_MV2},
+                    {32768, 524288, &MPIR_Allgather_Ring_MV2},
+                    {524288, -1, &MPIR_Allgather_Ring_MV2},
+                },
+            },
+            {
+                128,
+                {1,1,0},
+                2,
+                {
+                    {0, 65536, &MPIR_Allgather_RD_MV2},
+                    {65536, 524288, &MPIR_Allgather_Ring_MV2},
+                    {524288, -1, &MPIR_Allgather_Ring_MV2},
+                },
+            },
+        };
+        table_ptrs[1] = mv2_tmp_allgather_thresholds_table_2ppn;
+        mv2_allgather_table_ppn_conf[2] = 16;
+        mv2_size_allgather_tuning_table[2] = 6;
+        mv2_allgather_tuning_table mv2_tmp_allgather_thresholds_table_16ppn[] = {
+            {
+                16,
+                {0,0},
+                2,
+                {
+                    {0, 1024, &MPIR_Allgather_RD_MV2},
+                    {1024, -1, &MPIR_Allgather_Ring_MV2},
+                },
+            },
+            {
+                32,
+                {0,0},
+                2,
+                {
+                    {0, 1024, &MPIR_Allgather_RD_Allgather_Comm_MV2},
+                    {1024, -1, &MPIR_Allgather_Ring_MV2},
+                },
+            },
+            {
+                64,
+                {0,0},
+                2,
+                {
+                    {0, 1024, &MPIR_Allgather_RD_Allgather_Comm_MV2},
+                    {1024, -1, &MPIR_Allgather_Ring_MV2},
+                },
+            },
+            {
+                128,
+                {0,0},
+                2,
+                {
+                    {0, 1024, &MPIR_Allgather_RD_Allgather_Comm_MV2},
+                    {1024, -1, &MPIR_Allgather_Ring_MV2},
+                },
+            },
+            {
+                256,
+                {0,0},
+                2,
+                {
+                    {0, 1024, &MPIR_Allgather_RD_Allgather_Comm_MV2},
+                    {1024, -1, &MPIR_Allgather_Ring_MV2},
+                },
+            },
+            {
+                512,
+                {0,0},
+                2,
+                {
+                    {0, 1024, &MPIR_Allgather_RD_Allgather_Comm_MV2},
+                    {1024, -1, &MPIR_Allgather_Ring_MV2},
+                },
+            },
+
+        };
+        table_ptrs[2] = mv2_tmp_allgather_thresholds_table_16ppn;
+        agg_table_sum = 0;
+        for (i = 0; i < mv2_allgather_num_ppn_conf; i++) {
+            agg_table_sum += mv2_size_allgather_tuning_table[i];
+        }
+        mv2_allgather_thresholds_table[0] =
+            xbt_malloc(agg_table_sum * sizeof (mv2_allgather_tuning_table));
+        memcpy(mv2_allgather_thresholds_table[0], table_ptrs[0],
+            (sizeof(mv2_allgather_tuning_table)
+                     * mv2_size_allgather_tuning_table[0]));
+        for (i = 1; i < mv2_allgather_num_ppn_conf; i++) {
+            mv2_allgather_thresholds_table[i] =
+            mv2_allgather_thresholds_table[i - 1]
+            + mv2_size_allgather_tuning_table[i - 1];
+            memcpy(mv2_allgather_thresholds_table[i], table_ptrs[i],
+                      (sizeof(mv2_allgather_tuning_table)
+                       * mv2_size_allgather_tuning_table[i]));
+        }
+        xbt_free(table_ptrs);
+}
+
+
+/************ Gather variables and initializers                        */
+
 typedef struct {
     int min;
     int max;
@@ -113,15 +560,6 @@ typedef struct {
     mv2_gather_tuning_element intra_node[MV2_MAX_NB_THRESHOLDS];
 } mv2_gather_tuning_table;
 
-extern int mv2_size_gather_tuning_table;
-extern mv2_gather_tuning_table * mv2_gather_thresholds_table;
-
-extern int mv2_user_gather_switch_point;
-extern int mv2_use_two_level_gather;
-extern int mv2_gather_direct_system_size_small;
-extern int mv2_gather_direct_system_size_medium;
-extern int mv2_use_direct_gather;
-
 int mv2_size_gather_tuning_table=7;
 mv2_gather_tuning_table * mv2_gather_thresholds_table=NULL; 
 
@@ -142,6 +580,55 @@ MV2_Gather_function_ptr MV2_Gather_intra_node_function = NULL;
 #define MPIR_Gather_intra smpi_coll_tuned_gather_mpich
 
 
+static void init_mv2_gather_tables_stampede(){
+
+ mv2_size_gather_tuning_table=7;
+      mv2_gather_thresholds_table = xbt_malloc(mv2_size_gather_tuning_table*
+            sizeof (mv2_gather_tuning_table)); 
+      mv2_gather_tuning_table mv2_tmp_gather_thresholds_table[]={
+  {16,
+   2,{{0, 524288, &MPIR_Gather_MV2_Direct},
+      {524288, -1, &MPIR_Gather_intra}},
+   1,{{0, -1, &MPIR_Gather_MV2_Direct}}},
+  {32,
+   3,{{0, 16384, &MPIR_Gather_MV2_Direct}, 
+      {16384, 131072, &MPIR_Gather_intra},
+      {131072, -1, &MPIR_Gather_MV2_two_level_Direct}},
+   1,{{0, -1, &MPIR_Gather_intra}}},
+  {64,
+   3,{{0, 256, &MPIR_Gather_MV2_two_level_Direct}, 
+      {256, 16384, &MPIR_Gather_MV2_Direct},
+      {256, -1, &MPIR_Gather_MV2_two_level_Direct}},
+   1,{{0, -1, &MPIR_Gather_intra}}},
+  {128,
+   3,{{0, 512, &MPIR_Gather_MV2_two_level_Direct}, 
+      {512, 16384, &MPIR_Gather_MV2_Direct},
+      {16384, -1, &MPIR_Gather_MV2_two_level_Direct}},
+   1,{{0, -1, &MPIR_Gather_intra}}},
+  {256,
+   3,{{0, 512, &MPIR_Gather_MV2_two_level_Direct}, 
+      {512, 16384, &MPIR_Gather_MV2_Direct},
+      {16384, -1, &MPIR_Gather_MV2_two_level_Direct}},
+   1,{{0, -1, &MPIR_Gather_intra}}},
+  {512,
+   3,{{0, 512, &MPIR_Gather_MV2_two_level_Direct}, 
+      {512, 16384, &MPIR_Gather_MV2_Direct},
+      {8196, -1, &MPIR_Gather_MV2_two_level_Direct}},
+   1,{{0, -1, &MPIR_Gather_intra}}},
+  {1024,
+   3,{{0, 512, &MPIR_Gather_MV2_two_level_Direct}, 
+      {512, 16384, &MPIR_Gather_MV2_Direct},
+      {8196, -1, &MPIR_Gather_MV2_two_level_Direct}},
+   1,{{0, -1, &MPIR_Gather_intra}}},
+      };
+
+      memcpy(mv2_gather_thresholds_table, mv2_tmp_gather_thresholds_table,
+      mv2_size_gather_tuning_table * sizeof (mv2_gather_tuning_table));
+
+}
+
+
+/************ Allgatherv variables and initializers                        */
 
 typedef struct {
     int min;
@@ -162,9 +649,6 @@ typedef struct {
     mv2_allgatherv_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
 } mv2_allgatherv_tuning_table;
 
-extern int mv2_size_allgatherv_tuning_table;
-extern mv2_allgatherv_tuning_table *mv2_allgatherv_thresholds_table;
-
 int (*MV2_Allgatherv_function)(void *sendbuf,
                                int sendcount,
                                MPI_Datatype sendtype,
@@ -182,6 +666,68 @@ mv2_allgatherv_tuning_table *mv2_allgatherv_thresholds_table = NULL;
 #define MPIR_Allgatherv_Ring_MV2 smpi_coll_tuned_allgatherv_mpich_ring
 
 
+static void init_mv2_allgatherv_tables_stampede(){
+ mv2_size_allgatherv_tuning_table = 6;
+ mv2_allgatherv_thresholds_table = xbt_malloc(mv2_size_allgatherv_tuning_table *
+                                                  sizeof (mv2_allgatherv_tuning_table));
+        mv2_allgatherv_tuning_table mv2_tmp_allgatherv_thresholds_table[] = {
+            {
+                16,
+                2,
+                {
+                    {0, 512, &MPIR_Allgatherv_Rec_Doubling_MV2},
+                    {512, -1, &MPIR_Allgatherv_Ring_MV2},
+                },
+            },
+            {
+                32,
+                2,
+                {
+                    {0, 512, &MPIR_Allgatherv_Rec_Doubling_MV2},
+                    {512, -1, &MPIR_Allgatherv_Ring_MV2},
+                },
+            },
+            {
+                64,
+                2,
+                {
+                    {0, 256, &MPIR_Allgatherv_Rec_Doubling_MV2},
+                    {256, -1, &MPIR_Allgatherv_Ring_MV2},
+                },
+            },
+            {
+                128,
+                2,
+                {
+                    {0, 256, &MPIR_Allgatherv_Rec_Doubling_MV2},
+                    {256, -1, &MPIR_Allgatherv_Ring_MV2},
+                },
+            },
+            {
+                256,
+                2,
+                {
+                    {0, 256, &MPIR_Allgatherv_Rec_Doubling_MV2},
+                    {256, -1, &MPIR_Allgatherv_Ring_MV2},
+                },
+            },
+            {
+                512,
+                2,
+                {
+                    {0, 256, &MPIR_Allgatherv_Rec_Doubling_MV2},
+                    {256, -1, &MPIR_Allgatherv_Ring_MV2},
+                },
+            },
+
+        }; 
+        memcpy(mv2_allgatherv_thresholds_table, mv2_tmp_allgatherv_thresholds_table,
+                  mv2_size_allgatherv_tuning_table * sizeof (mv2_allgatherv_tuning_table));
+}
+
+
+/************ Allreduce variables and initializers                        */
+
 typedef struct {
     int min;
     int max;
@@ -202,10 +748,6 @@ typedef struct {
     mv2_allreduce_tuning_element intra_node[MV2_MAX_NB_THRESHOLDS];
 } mv2_allreduce_tuning_table;
 
-extern int mv2_size_allreduce_tuning_table;
-extern mv2_allreduce_tuning_table *mv2_allreduce_thresholds_table;
-extern int mv2_use_old_allreduce;
-
 
 int (*MV2_Allreduce_function)(void *sendbuf,
                              void *recvbuf,
@@ -266,8 +808,150 @@ static  int MPIR_Allreduce_reduce_shmem_MV2( void *sendbuf,
 }
 
 #define MPIR_Allreduce_pt2pt_rd_MV2 smpi_coll_tuned_allreduce_rdb
-#define MPIR_Allreduce_pt2pt_rs_MV2 smpi_coll_tuned_allreduce_rab1
-
+#define MPIR_Allreduce_pt2pt_rs_MV2 smpi_coll_tuned_allreduce_mvapich2_rs
+
+
+
+static void init_mv2_allreduce_tables_stampede(){
+mv2_size_allreduce_tuning_table = 8;
+      mv2_allreduce_thresholds_table = xbt_malloc(mv2_size_allreduce_tuning_table *
+               sizeof (mv2_allreduce_tuning_table));
+      mv2_allreduce_tuning_table mv2_tmp_allreduce_thresholds_table[] = {
+  {
+    16,
+    0,
+    {1, 0},
+    2,
+    {
+      {0, 1024, &MPIR_Allreduce_pt2pt_rd_MV2},
+      {1024, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
+    },
+    2,
+    {
+      {0, 1024, &MPIR_Allreduce_reduce_shmem_MV2},
+      {1024, -1, &MPIR_Allreduce_reduce_p2p_MV2},
+    },
+  },
+  {
+    32,
+    0,
+    {1, 1, 0},
+    3,
+    {
+      {0, 1024, &MPIR_Allreduce_pt2pt_rd_MV2},
+      {1024, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
+      {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
+    },
+    2,
+    {
+      {0, 1024, &MPIR_Allreduce_reduce_shmem_MV2},
+      {1024, 16384, &MPIR_Allreduce_reduce_p2p_MV2},
+    },
+  },
+  {
+    64,
+    0,
+    {1, 1, 0},
+    3,
+    {
+      {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
+      {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
+      {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
+    },
+    2,
+    {
+      {0, 512, &MPIR_Allreduce_reduce_shmem_MV2},
+      {512, 16384, &MPIR_Allreduce_reduce_p2p_MV2},
+    },
+  },
+  {
+    128,
+    0,
+    {1, 1, 0},
+    3,
+    {
+      {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
+      {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
+      {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
+    },
+    2,
+    {
+      {0, 512, &MPIR_Allreduce_reduce_shmem_MV2},
+      {512, 16384, &MPIR_Allreduce_reduce_p2p_MV2},
+    },
+  },
+  {
+    256,
+    0,
+    {1, 1, 0},
+    3,
+    {
+      {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
+      {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
+      {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
+    },
+    2,
+    {
+      {0, 512, &MPIR_Allreduce_reduce_shmem_MV2},
+      {512, -1, &MPIR_Allreduce_reduce_p2p_MV2},
+    },
+  },
+  {
+    512,
+    0,
+    {1, 1, 0},
+    3,
+    {
+      {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
+      {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
+      {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
+    },
+    2,
+    {
+      {0, 512, &MPIR_Allreduce_reduce_shmem_MV2},
+      {512, 16384, &MPIR_Allreduce_reduce_p2p_MV2},
+    },
+  },
+  {
+    1024,
+    0,
+    {1, 1, 1, 0},
+    4,
+    {
+      {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
+      {512, 8192, &MPIR_Allreduce_pt2pt_rd_MV2},
+      {8192, 65536, &MPIR_Allreduce_pt2pt_rs_MV2},
+      {65536, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
+    },
+    2,
+    {
+      {0, 512, &MPIR_Allreduce_reduce_shmem_MV2},
+      {512, -1, &MPIR_Allreduce_reduce_p2p_MV2},
+    },
+  },
+  {
+    2048,
+    0,
+    {1, 1, 1, 0},
+    4,
+    {
+      {0, 64, &MPIR_Allreduce_pt2pt_rd_MV2},
+      {64, 512, &MPIR_Allreduce_reduce_p2p_MV2},
+      {512, 4096, &MPIR_Allreduce_mcst_reduce_two_level_helper_MV2},
+      {4096, 16384, &MPIR_Allreduce_pt2pt_rs_MV2},
+      {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
+    },
+    2,
+    {
+      {0, 512, &MPIR_Allreduce_reduce_shmem_MV2},
+      {512, -1, &MPIR_Allreduce_reduce_p2p_MV2},
+    },
+  },
+      }; 
+      memcpy(mv2_allreduce_thresholds_table, mv2_tmp_allreduce_thresholds_table,
+      mv2_size_allreduce_tuning_table * sizeof (mv2_allreduce_tuning_table));
+}
 
 
 /*
@@ -292,16 +976,6 @@ typedef struct {
     mv2_bcast_tuning_element intra_node[MV2_MAX_NB_THRESHOLDS];
 } mv2_bcast_tuning_table;
 
-extern int mv2_use_pipelined_bcast;
-extern int mv2_pipelined_knomial_factor; 
-extern int mv2_pipelined_zcpy_knomial_factor; 
-extern int zcpy_knomial_factor;
-extern int bcast_segment_size;
-
-extern int mv2_size_bcast_tuning_table;
-extern mv2_bcast_tuning_table *mv2_bcast_thresholds_table;
-extern int mv2_use_old_bcast;
-
 int mv2_size_bcast_tuning_table = 0;
 mv2_bcast_tuning_table *mv2_bcast_thresholds_table = NULL;
 
@@ -315,6 +989,220 @@ int (*MV2_Bcast_intra_node_function) (void *buffer, int count, MPI_Datatype data
                                       
 */
 
+
+/*
+static void init_mv2_bcast_tables_stampede(){
+ //Stampede,
+        mv2_size_bcast_tuning_table=8;
+        mv2_bcast_thresholds_table = xbt_malloc(mv2_size_bcast_tuning_table *
+                                                 sizeof (mv2_bcast_tuning_table));
+
+  mv2_bcast_tuning_table mv2_tmp_bcast_thresholds_table[]={
+    {
+            16,
+            8192, 4, 4,
+            {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+            11,
+            {
+              {0, 8, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+              {8, 16, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+              {16, 1024, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+              {1024, 8192, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+              {8192, 16384, &MPIR_Bcast_binomial_MV2, -1},
+              {16384, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+              {32768, 65536, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+              {65536, 131072, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1},
+              {131072, 262144, &MPIR_Bcast_scatter_ring_allgather_MV2, -1},
+              {262144, 524288, &MPIR_Bcast_scatter_doubling_allgather_MV2, -1},
+              {524288, -1, &MPIR_Bcast_scatter_ring_allgather_MV2, -1}
+            },
+            11,
+            {
+              {0, 8, &MPIR_Shmem_Bcast_MV2, 2},
+              {8, 16, &MPIR_Shmem_Bcast_MV2, 4},
+              {16, 1024, &MPIR_Shmem_Bcast_MV2, 2},
+              {1024, 8192, &MPIR_Shmem_Bcast_MV2, 4},
+              {8192, 16384, &MPIR_Shmem_Bcast_MV2, -1},
+              {16384, 32768, &MPIR_Shmem_Bcast_MV2, 4},
+              {32768, 65536, &MPIR_Shmem_Bcast_MV2, 2},
+              {65536, 131072, &MPIR_Shmem_Bcast_MV2, -1},
+              {131072, 262144, &MPIR_Shmem_Bcast_MV2, -1},
+              {262144, 524288, &MPIR_Shmem_Bcast_MV2, -1},
+              {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}
+            }
+    },
+    {
+            32,
+            8192, 4, 4,
+            {1, 1, 1, 1, 1, 1, 1, 1},
+            8,
+            {
+              {0, 128, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+              {128, 256, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+              {256, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+              {32768, 65536, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+              {65536, 131072, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+              {131072, 262144, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
+              {262144, 524288, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+              {524288, -1, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8}
+            },
+            8,
+            {
+              {0, 128, &MPIR_Shmem_Bcast_MV2, 2},
+              {128, 256, &MPIR_Shmem_Bcast_MV2, 4},
+              {256, 32768, &MPIR_Shmem_Bcast_MV2, 2},
+              {32768, 65536, &MPIR_Shmem_Bcast_MV2, 4},
+              {65536, 131072, &MPIR_Shmem_Bcast_MV2, 2},
+              {131072, 262144, &MPIR_Shmem_Bcast_MV2, 8},
+              {262144, 524288, &MPIR_Shmem_Bcast_MV2, 2},
+              {524288, -1, &MPIR_Shmem_Bcast_MV2, 8}
+            }
+    },
+    {
+            64,
+            8192, 4, 4,
+            {1, 1, 1, 1, 1, 1, 1, 1, 1},
+            9,
+            {
+              {0, 2, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+              {2, 4, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
+              {4, 16, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+              {16, 32, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
+              {32, 128, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+              {128, 256, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
+              {256, 4096, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+              {4096, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
+              {32768, -1, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2}
+            },
+            9,
+            {
+              {0, 2, &MPIR_Shmem_Bcast_MV2, 4},
+              {2, 4, &MPIR_Shmem_Bcast_MV2, 8},
+              {4, 16, &MPIR_Shmem_Bcast_MV2, 4},
+              {16, 32, &MPIR_Shmem_Bcast_MV2, 8},
+              {32, 128, &MPIR_Shmem_Bcast_MV2, 4},
+              {128, 256, &MPIR_Shmem_Bcast_MV2, 8},
+              {256, 4096, &MPIR_Shmem_Bcast_MV2, 4},
+              {4096, 32768, &MPIR_Shmem_Bcast_MV2, 8},
+              {32768, -1, &MPIR_Shmem_Bcast_MV2, 2}
+            }
+    },
+    {
+            128,
+            8192, 4, 4,
+            {1, 1, 1, 0},
+            4,
+            {
+              {0, 8192, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
+              {8192, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+              {16384, 524288, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+              {524288, -1, &MPIR_Bcast_scatter_ring_allgather_MV2, -1}
+            },
+            4,
+            {
+              {0, 8192, &MPIR_Shmem_Bcast_MV2, 8},
+              {8192, 16384, &MPIR_Shmem_Bcast_MV2, 4},
+              {16384, 524288, &MPIR_Shmem_Bcast_MV2, 2},
+              {524288, -1, NULL, -1}
+            }
+    },
+    {
+            256,
+            8192, 4, 4,
+            {1, 1, 1, 1, 1},
+            5,
+            {
+              {0, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+              {16384, 131072, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+              {131072, 262144, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1},
+              {262144, 524288, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+              {524288, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}
+            },
+            5,
+            {
+              {0, 16384, &MPIR_Shmem_Bcast_MV2, 4},
+              {16384, 131072, &MPIR_Shmem_Bcast_MV2, 2},
+              {131072, 262144, &MPIR_Shmem_Bcast_MV2, -1},
+              {262144, 524288, &MPIR_Shmem_Bcast_MV2, 2},
+              {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}
+            }
+    },
+    {
+            512,
+            8192, 4, 4,
+            {1, 1, 1, 1, 1},
+            5,
+            {
+              {0, 4096, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
+              {4096, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+              {16384, 131072, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+              {131072, 262144, &MPIR_Pipelined_Bcast_MV2, -1},
+              {262144, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}
+            },
+            5,
+            {
+              {0, 4096, &MPIR_Shmem_Bcast_MV2, 8},
+              {4096, 16384, &MPIR_Shmem_Bcast_MV2, 4},
+              {16384, 131072, &MPIR_Shmem_Bcast_MV2, 2},
+              {131072, 262144, &MPIR_Shmem_Bcast_MV2, -1},
+              {262144, -1, &MPIR_Shmem_Bcast_MV2, -1}
+            }
+    },
+    {
+            1024,
+            8192, 4, 4,
+            {1, 1, 1, 1, 1},
+            5,
+            {
+              {0, 8192, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
+              {8192, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+              {16384, 65536, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+              {65536, 524288, &MPIR_Pipelined_Bcast_MV2, -1},
+              {524288, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}
+            },
+            5,
+            {
+              {0, 8192, &MPIR_Shmem_Bcast_MV2, 8},
+              {8192, 16384, &MPIR_Shmem_Bcast_MV2, 4},
+              {16384, 65536, &MPIR_Shmem_Bcast_MV2, 2},
+              {65536, 524288, &MPIR_Shmem_Bcast_MV2, -1},
+              {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}
+            }
+    },
+    {
+            2048,
+            8192, 4, 4,
+            {1, 1, 1, 1, 1, 1, 1},
+            7,
+            {
+              {0, 16, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
+              {16, 32, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+              {32, 4096, &MPIR_Pipelined_Bcast_Zcpy_MV2, 8},
+              {4096, 16384, &MPIR_Pipelined_Bcast_Zcpy_MV2, 4},
+              {16384, 32768, &MPIR_Pipelined_Bcast_Zcpy_MV2, 2},
+              {32768, 524288, &MPIR_Pipelined_Bcast_MV2, -1},
+              {524288, -1, &MPIR_Bcast_scatter_ring_allgather_shm_MV2, -1}
+            },
+            7,
+            {
+              {0, 16, &MPIR_Shmem_Bcast_MV2, 8},
+              {16, 32, &MPIR_Shmem_Bcast_MV2, 4},
+              {32, 4096, &MPIR_Shmem_Bcast_MV2, 8},
+              {4096, 16384, &MPIR_Shmem_Bcast_MV2, 4},
+              {16384, 32768, &MPIR_Shmem_Bcast_MV2, 2},
+              {32768, 524288, &MPIR_Shmem_Bcast_MV2, -1},
+              {524288, -1, &MPIR_Shmem_Bcast_MV2, -1}
+            }
+    }
+  };
+
+        memcpy(mv2_bcast_thresholds_table, mv2_tmp_bcast_thresholds_table,
+                    mv2_size_bcast_tuning_table * sizeof (mv2_bcast_tuning_table));
+}*/
+
+
+/************ Reduce variables and initializers                        */
+
 typedef struct {
     int min;
     int max;
@@ -338,10 +1226,6 @@ typedef struct {
     mv2_reduce_tuning_element intra_node[MV2_MAX_NB_THRESHOLDS];
 } mv2_reduce_tuning_table;
 
-extern int mv2_size_reduce_tuning_table;
-extern mv2_reduce_tuning_table *mv2_reduce_thresholds_table;
-extern int mv2_use_old_reduce;
-
 int mv2_size_reduce_tuning_table = 0;
 mv2_reduce_tuning_table *mv2_reduce_thresholds_table = NULL;
 
@@ -366,12 +1250,209 @@ int (*MV2_Reduce_intra_function)( void *sendbuf,
                                  MPI_Comm  comm_ptr)=NULL;
                                  
                                  
-#define MPIR_Reduce_inter_knomial_wrapper_MV2 smpi_coll_tuned_reduce_ompi_binomial
-#define MPIR_Reduce_intra_knomial_wrapper_MV2 smpi_coll_tuned_reduce_ompi_binomial
-#define MPIR_Reduce_binomial_MV2 smpi_coll_tuned_reduce_ompi_binomial
+#define MPIR_Reduce_inter_knomial_wrapper_MV2 smpi_coll_tuned_reduce_binomial
+#define MPIR_Reduce_intra_knomial_wrapper_MV2 smpi_coll_tuned_reduce_binomial
+#define MPIR_Reduce_binomial_MV2 smpi_coll_tuned_reduce_binomial
 #define MPIR_Reduce_redscat_gather_MV2 smpi_coll_tuned_reduce_scatter_gather
 #define MPIR_Reduce_shmem_MV2 smpi_coll_tuned_reduce_ompi_basic_linear
 
+
+
+static void init_mv2_reduce_tables_stampede(){
+ /*Stampede*/
+        mv2_size_reduce_tuning_table = 8;
+        mv2_reduce_thresholds_table = xbt_malloc(mv2_size_reduce_tuning_table *
+                                                  sizeof (mv2_reduce_tuning_table));
+        mv2_reduce_tuning_table mv2_tmp_reduce_thresholds_table[] = {
+    {
+      16,
+      4,
+      4,
+      {1, 0, 0},
+      3,
+      {
+        {0, 262144, &MPIR_Reduce_inter_knomial_wrapper_MV2},
+        {262144, 1048576, &MPIR_Reduce_binomial_MV2},
+        {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
+      },
+      2,
+      {
+        {0, 65536, &MPIR_Reduce_shmem_MV2},
+        {65536,-1,  &MPIR_Reduce_binomial_MV2},
+      },
+    },
+    {
+      32,
+      4,
+      4,
+      {1, 1, 1, 1, 0, 0, 0},
+      7,
+      {
+        {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
+        {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
+        {16384, 32768, &MPIR_Reduce_inter_knomial_wrapper_MV2},
+        {32768, 65536, &MPIR_Reduce_binomial_MV2},
+        {65536, 262144, &MPIR_Reduce_inter_knomial_wrapper_MV2},
+        {262144, 1048576, &MPIR_Reduce_binomial_MV2},
+        {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
+      },
+      6,
+      {
+        {0, 8192, &MPIR_Reduce_shmem_MV2},
+        {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
+        {16384, 32768, &MPIR_Reduce_shmem_MV2},
+        {32768, 65536, &MPIR_Reduce_shmem_MV2},
+        {65536, 262144, &MPIR_Reduce_shmem_MV2},
+        {262144,-1,  &MPIR_Reduce_binomial_MV2},
+      },
+    },
+    {
+      64,
+      4,
+      4,
+      {1, 1, 1, 1, 0},
+      5,
+      {
+        {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
+        {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
+        {16384, 65536, &MPIR_Reduce_binomial_MV2},
+        {65536, 262144, &MPIR_Reduce_inter_knomial_wrapper_MV2},
+        {262144, -1, &MPIR_Reduce_redscat_gather_MV2},
+      },
+      5,
+      {
+        {0, 8192, &MPIR_Reduce_shmem_MV2},
+        {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
+        {16384, 65536, &MPIR_Reduce_shmem_MV2},
+        {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
+        {262144, -1, &MPIR_Reduce_binomial_MV2},
+      },
+    },
+    {
+      128,
+      4,
+      4,
+      {1, 0, 1, 0, 1, 0},
+      6,
+      {
+        {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
+        {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
+        {16384, 65536, &MPIR_Reduce_binomial_MV2},
+        {65536, 262144, &MPIR_Reduce_inter_knomial_wrapper_MV2},
+        {262144, 1048576, &MPIR_Reduce_binomial_MV2},
+        {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
+      },
+      5,
+      {
+        {0, 8192, &MPIR_Reduce_shmem_MV2},
+        {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
+        {16384, 65536, &MPIR_Reduce_shmem_MV2},
+        {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
+        {262144, -1, &MPIR_Reduce_binomial_MV2},
+      },
+    },
+    {
+      256,
+      4,
+      4,
+      {1, 1, 1, 0, 1, 1, 0},
+      7,
+      {
+        {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
+        {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
+        {16384, 32768, &MPIR_Reduce_binomial_MV2},
+        {32768, 65536, &MPIR_Reduce_binomial_MV2},
+        {65536, 262144, &MPIR_Reduce_binomial_MV2},
+        {262144, 1048576, &MPIR_Reduce_binomial_MV2},
+        {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
+      },
+      6,
+      {
+        {0, 8192, &MPIR_Reduce_shmem_MV2},
+        {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
+        {16384, 32768, &MPIR_Reduce_shmem_MV2},
+        {32768, 65536, &MPIR_Reduce_shmem_MV2},
+        {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
+        {262144, -1, &MPIR_Reduce_binomial_MV2},
+      },
+    },
+    {
+      512,
+      4,
+      4,
+      {1, 0, 1, 1, 1, 0},
+      6,
+      {
+        {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
+        {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
+        {16384, 65536, &MPIR_Reduce_binomial_MV2},
+        {65536, 262144, &MPIR_Reduce_binomial_MV2},
+        {262144, 1048576, &MPIR_Reduce_binomial_MV2},
+        {1048576, -1, &MPIR_Reduce_redscat_gather_MV2},
+      },
+      5,
+      {
+        {0, 8192, &MPIR_Reduce_shmem_MV2},
+        {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
+        {16384, 65536, &MPIR_Reduce_shmem_MV2},
+        {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
+        {262144, -1, &MPIR_Reduce_binomial_MV2},
+      },
+    },
+    {
+      1024,
+      4,
+      4,
+      {1, 0, 1, 1, 1},
+      5,
+      {
+        {0, 8192, &MPIR_Reduce_inter_knomial_wrapper_MV2},
+        {8192, 16384, &MPIR_Reduce_inter_knomial_wrapper_MV2},
+        {16384, 65536, &MPIR_Reduce_binomial_MV2},
+        {65536, 262144, &MPIR_Reduce_binomial_MV2},
+        {262144, -1, &MPIR_Reduce_binomial_MV2},
+      },
+      5,
+      {
+        {0, 8192, &MPIR_Reduce_shmem_MV2},
+        {8192, 16384, &MPIR_Reduce_intra_knomial_wrapper_MV2},
+        {16384, 65536, &MPIR_Reduce_shmem_MV2},
+        {65536, 262144, &MPIR_Reduce_intra_knomial_wrapper_MV2},
+        {262144, -1, &MPIR_Reduce_binomial_MV2},
+      },
+    },
+    {
+      2048,
+      4,
+      4,
+      {1, 0, 1, 1, 1,1},
+      6,
+      {
+        {0, 2048, &MPIR_Reduce_inter_knomial_wrapper_MV2},
+        {2048, 4096, &MPIR_Reduce_inter_knomial_wrapper_MV2},
+        {4096, 16384, &MPIR_Reduce_binomial_MV2},
+        {16384, 65536, &MPIR_Reduce_binomial_MV2},
+        {65536, 131072, &MPIR_Reduce_binomial_MV2},
+        {131072, -1, &MPIR_Reduce_binomial_MV2},
+      },
+      6,
+      {
+        {0, 2048, &MPIR_Reduce_shmem_MV2},
+        {2048, 4096, &MPIR_Reduce_shmem_MV2},
+        {4096, 16384, &MPIR_Reduce_shmem_MV2},
+        {16384, 65536, &MPIR_Reduce_intra_knomial_wrapper_MV2},
+        {65536, 131072, &MPIR_Reduce_binomial_MV2},
+        {131072, -1, &MPIR_Reduce_shmem_MV2},
+      },
+    },
+
+        }; 
+        memcpy(mv2_reduce_thresholds_table, mv2_tmp_reduce_thresholds_table,
+        mv2_size_reduce_tuning_table * sizeof (mv2_reduce_tuning_table));
+}
+
+/************ Reduce scatter variables and initializers                        */
+
 typedef struct {
     int min;
     int max;
@@ -389,9 +1470,6 @@ typedef struct {
     mv2_red_scat_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
 } mv2_red_scat_tuning_table;
 
-extern int mv2_size_red_scat_tuning_table;
-extern mv2_red_scat_tuning_table *mv2_red_scat_thresholds_table;
-
 int mv2_size_red_scat_tuning_table = 0;
 mv2_red_scat_tuning_table *mv2_red_scat_thresholds_table = NULL;
 
@@ -402,18 +1480,89 @@ int (*MV2_Red_scat_function)(void *sendbuf,
                              MPI_Datatype datatype,
                              MPI_Op op,
                              MPI_Comm comm_ptr);
+                             
+                             
 
-#define MPIR_Reduce_Scatter_Basic_MV2 smpi_coll_tuned_reduce_scatter_mpich_noncomm
+static  int MPIR_Reduce_Scatter_Basic_MV2(void *sendbuf,
+                             void *recvbuf,
+                             int *recvcnts,
+                             MPI_Datatype datatype,
+                             MPI_Op op,
+                             MPI_Comm comm)
+{
+    smpi_mpi_reduce_scatter(sendbuf,recvbuf,recvcnts,datatype,op,comm);
+    return MPI_SUCCESS;
+}
 #define MPIR_Reduce_scatter_non_comm_MV2 smpi_coll_tuned_reduce_scatter_mpich_noncomm
 #define MPIR_Reduce_scatter_Rec_Halving_MV2 smpi_coll_tuned_reduce_scatter_ompi_basic_recursivehalving
 #define MPIR_Reduce_scatter_Pair_Wise_MV2 smpi_coll_tuned_reduce_scatter_mpich_pair
 
 
 
-/* Indicates number of processes per node */
-extern int *mv2_scatter_table_ppn_conf;
-/* Indicates total number of configurations */
-extern int mv2_scatter_num_ppn_conf;
+
+static void init_mv2_reduce_scatter_tables_stampede(){
+        mv2_size_red_scat_tuning_table = 6;
+        mv2_red_scat_thresholds_table = xbt_malloc(mv2_size_red_scat_tuning_table *
+                                                  sizeof (mv2_red_scat_tuning_table));
+        mv2_red_scat_tuning_table mv2_tmp_red_scat_thresholds_table[] = {
+            {
+                16,
+                3,
+                {
+                    {0, 64, &MPIR_Reduce_Scatter_Basic_MV2},
+                    {64, 65536, &MPIR_Reduce_scatter_Rec_Halving_MV2},
+                    {65536, -1, &MPIR_Reduce_scatter_Pair_Wise_MV2},
+                },
+            },
+            {
+                32,
+                3,
+                {
+                    {0, 64, &MPIR_Reduce_Scatter_Basic_MV2},
+                    {64, 131072, &MPIR_Reduce_scatter_Rec_Halving_MV2},
+                    {131072, -1, &MPIR_Reduce_scatter_Pair_Wise_MV2},
+                },
+            },
+            {
+                64,
+                3,
+                {
+                    {0, 1024, &MPIR_Reduce_Scatter_Basic_MV2},
+                    {1024, 262144, &MPIR_Reduce_scatter_Rec_Halving_MV2},
+                    {262144, -1, &MPIR_Reduce_scatter_Pair_Wise_MV2},
+                },
+            },
+            {
+                128,
+                2,
+                {
+                    {0, 128, &MPIR_Reduce_Scatter_Basic_MV2},
+                    {128, -1, &MPIR_Reduce_scatter_Rec_Halving_MV2},
+                },
+            },
+            {
+                256,
+                2,
+                {
+                    {0, 128, &MPIR_Reduce_Scatter_Basic_MV2},
+                    {128, -1, &MPIR_Reduce_scatter_Rec_Halving_MV2},
+                },
+            },
+            {
+                512,
+                2,
+                {
+                    {0, 256, &MPIR_Reduce_Scatter_Basic_MV2},
+                    {256, -1, &MPIR_Reduce_scatter_Rec_Halving_MV2},
+                },
+            },
+
+        }; 
+        memcpy(mv2_red_scat_thresholds_table, mv2_tmp_red_scat_thresholds_table,
+                  mv2_size_red_scat_tuning_table * sizeof (mv2_red_scat_tuning_table));
+}
+
+/************ Scatter variables and initializers                        */
 
 typedef struct {
     int min;
@@ -435,9 +1584,6 @@ typedef struct {
     mv2_scatter_tuning_element intra_node[MV2_MAX_NB_THRESHOLDS];
 } mv2_scatter_tuning_table;
 
-extern int *mv2_size_scatter_tuning_table;
-extern mv2_scatter_tuning_table **mv2_scatter_thresholds_table;
-
 
 int *mv2_scatter_table_ppn_conf = NULL;
 int mv2_scatter_num_ppn_conf = 1;
@@ -475,3 +1621,317 @@ int MPIR_Scatter_mcst_wrap_MV2(void *sendbuf,
 #define MPIR_Scatter_MV2_two_level_Binomial smpi_coll_tuned_scatter_ompi_binomial
 #define MPIR_Scatter_MV2_two_level_Direct smpi_coll_tuned_scatter_ompi_basic_linear
 
+
+
+
+static void init_mv2_scatter_tables_stampede(){
+{
+    int agg_table_sum = 0;
+    int i;
+    mv2_scatter_tuning_table **table_ptrs = NULL;
+     mv2_scatter_num_ppn_conf = 3;
+        mv2_scatter_thresholds_table
+    = xbt_malloc(sizeof(mv2_scatter_tuning_table *)
+      * mv2_scatter_num_ppn_conf);
+        table_ptrs = xbt_malloc(sizeof(mv2_scatter_tuning_table *)
+                                 * mv2_scatter_num_ppn_conf);
+        mv2_size_scatter_tuning_table = xbt_malloc(sizeof(int) *
+                mv2_scatter_num_ppn_conf);
+        mv2_scatter_table_ppn_conf 
+    = xbt_malloc(mv2_scatter_num_ppn_conf * sizeof(int));
+        mv2_scatter_table_ppn_conf[0] = 1;
+        mv2_size_scatter_tuning_table[0] = 6;
+        mv2_scatter_tuning_table mv2_tmp_scatter_thresholds_table_1ppn[] = {
+    {2,
+     1, 
+     {
+       {0, -1, &MPIR_Scatter_MV2_Binomial},
+     },
+     1,
+     {
+       {0, -1, &MPIR_Scatter_MV2_Binomial},
+     },
+    },
+
+    {4,
+     1, 
+     {
+       {0, -1, &MPIR_Scatter_MV2_Direct},
+     },
+     1,
+     {
+       {0, -1, &MPIR_Scatter_MV2_Direct},
+     },
+    },
+  
+    {8,
+     1, 
+     {
+       {0, -1, &MPIR_Scatter_MV2_Direct},
+     },
+     1,
+     {
+       {0, -1, &MPIR_Scatter_MV2_Direct},
+     },
+    },
+  
+    {16,
+     1, 
+     {
+       {0, -1, &MPIR_Scatter_MV2_Direct},
+     },
+     1,
+     {
+       {0, -1, &MPIR_Scatter_MV2_Direct},
+     },
+    },
+  
+    {32,
+     1, 
+     {
+       {0, -1, &MPIR_Scatter_MV2_Direct},
+     },
+     1,
+     {
+       {0, -1, &MPIR_Scatter_MV2_Direct},
+     },
+    },
+  
+    {64,
+     2, 
+     {
+       {0, 32, &MPIR_Scatter_MV2_Binomial},
+       {32, -1, &MPIR_Scatter_MV2_Direct},
+     },
+     1,
+     {
+       {0, -1, &MPIR_Scatter_MV2_Binomial},
+     },
+    },
+        };
+        table_ptrs[0] = mv2_tmp_scatter_thresholds_table_1ppn;
+        mv2_scatter_table_ppn_conf[1] = 2;
+        mv2_size_scatter_tuning_table[1] = 6;
+        mv2_scatter_tuning_table mv2_tmp_scatter_thresholds_table_2ppn[] = {
+    {4,
+     2, 
+     {
+       {0, 4096, &MPIR_Scatter_MV2_Binomial},
+       {4096, -1, &MPIR_Scatter_MV2_Direct},
+     },
+     1,
+     {
+       {0, -1, &MPIR_Scatter_MV2_Direct},
+     },
+    },
+  
+    {8,
+     2, 
+     {
+       {0, 512, &MPIR_Scatter_MV2_two_level_Direct},
+       {512, -1, &MPIR_Scatter_MV2_Direct},
+     },
+     1,
+     {
+       {0, -1, &MPIR_Scatter_MV2_Binomial},
+     },
+    },
+  
+    {16,
+     2, 
+     {
+       {0, 2048, &MPIR_Scatter_MV2_two_level_Direct},
+       {2048, -1, &MPIR_Scatter_MV2_Direct},
+     },
+     1,
+     {
+       {0, -1, &MPIR_Scatter_MV2_Binomial},
+     },
+    },
+  
+    {32,
+     2, 
+     {
+       {0, 2048, &MPIR_Scatter_MV2_two_level_Direct},
+       {2048, -1, &MPIR_Scatter_MV2_Direct},
+     },
+     1,
+     {
+       {0, -1, &MPIR_Scatter_MV2_Binomial},
+     },
+    },
+  
+    {64,
+     2, 
+     {
+       {0, 8192, &MPIR_Scatter_MV2_two_level_Direct},
+       {8192, -1, &MPIR_Scatter_MV2_Direct},
+     },
+     1,
+     {
+       {0, -1, &MPIR_Scatter_MV2_Binomial},
+     },
+    },
+  
+    {128,
+     4, 
+     {
+       {0, 16, &MPIR_Scatter_MV2_Binomial},
+       {16, 128, &MPIR_Scatter_MV2_two_level_Binomial},
+       {128, 16384, &MPIR_Scatter_MV2_two_level_Direct},
+       {16384, -1, &MPIR_Scatter_MV2_Direct},
+     },
+     1,
+     {
+       {0, 128, &MPIR_Scatter_MV2_Direct},
+       {128, -1, &MPIR_Scatter_MV2_Binomial},
+     },
+    },
+        };
+        table_ptrs[1] = mv2_tmp_scatter_thresholds_table_2ppn;
+        mv2_scatter_table_ppn_conf[2] = 16;
+        mv2_size_scatter_tuning_table[2] = 8;
+        mv2_scatter_tuning_table mv2_tmp_scatter_thresholds_table_16ppn[] = {
+    {
+      16,
+      2,
+      { 
+        {0, 256, &MPIR_Scatter_MV2_Binomial}, 
+        {256, -1, &MPIR_Scatter_MV2_Direct},
+      },
+      1, 
+      { 
+        { 0, -1, &MPIR_Scatter_MV2_Direct},
+      },
+    },
+
+    {
+      32,
+      2,
+      {
+        {0, 512, &MPIR_Scatter_MV2_Binomial}, 
+        {512, -1, &MPIR_Scatter_MV2_Direct},
+      },
+      1, 
+      { 
+        { 0, -1, &MPIR_Scatter_MV2_Direct},
+      },
+    },
+
+    {
+      64,
+      2,
+      {
+        {0, 1024, &MPIR_Scatter_MV2_two_level_Direct},
+        {1024, -1, &MPIR_Scatter_MV2_Direct},
+      },
+      1,
+      {
+        { 0, -1, &MPIR_Scatter_MV2_Direct},
+      },
+    },
+
+    {
+      128,
+      4,
+      {
+        {0, 16, &MPIR_Scatter_mcst_wrap_MV2},
+        {0, 16, &MPIR_Scatter_MV2_two_level_Direct},
+        {16, 2048, &MPIR_Scatter_MV2_two_level_Direct},
+        {2048, -1, &MPIR_Scatter_MV2_Direct},
+      },
+      1,
+      {
+        { 0, -1, &MPIR_Scatter_MV2_Direct},
+      },
+    },
+
+    {
+      256,
+      4,
+      {
+        {0, 16, &MPIR_Scatter_mcst_wrap_MV2},
+        {0, 16, &MPIR_Scatter_MV2_two_level_Direct},
+        {16, 2048, &MPIR_Scatter_MV2_two_level_Direct},
+        {2048, -1,  &MPIR_Scatter_MV2_Direct},
+      },
+      1,
+      {
+        { 0, -1, &MPIR_Scatter_MV2_Direct},
+      },
+    },
+
+    {
+      512,
+      4,
+      {
+        {0, 16, &MPIR_Scatter_mcst_wrap_MV2},
+        {16, 16, &MPIR_Scatter_MV2_two_level_Direct},
+        {16, 4096, &MPIR_Scatter_MV2_two_level_Direct},
+        {4096, -1, &MPIR_Scatter_MV2_Direct},
+      },
+      1,
+      {
+        { 0, -1, &MPIR_Scatter_MV2_Binomial},
+      }, 
+    },  
+    {
+      1024,
+      5,
+      {
+        {0, 16, &MPIR_Scatter_mcst_wrap_MV2},
+        {0, 16,  &MPIR_Scatter_MV2_Binomial},
+        {16, 32, &MPIR_Scatter_MV2_Binomial},
+        {32, 4096, &MPIR_Scatter_MV2_two_level_Direct},
+        {4096, -1, &MPIR_Scatter_MV2_Direct},
+      },
+      1,
+      {
+        { 0, -1, &MPIR_Scatter_MV2_Binomial},
+      },  
+    },  
+    {
+      2048,
+      7,
+      {
+        {0, 16, &MPIR_Scatter_mcst_wrap_MV2},
+        {0, 16,  &MPIR_Scatter_MV2_two_level_Binomial},
+        {16, 128, &MPIR_Scatter_MV2_two_level_Binomial},
+        {128, 1024, &MPIR_Scatter_MV2_two_level_Direct},
+        {1024, 16384, &MPIR_Scatter_MV2_two_level_Direct},
+        {16384, 65536, &MPIR_Scatter_MV2_Direct},
+        {65536, -1, &MPIR_Scatter_MV2_two_level_Direct},
+      },
+      6,
+      {
+        {0, 16, &MPIR_Scatter_MV2_Binomial},
+        {16, 128, &MPIR_Scatter_MV2_Binomial},
+        {128, 1024, &MPIR_Scatter_MV2_Binomial},
+        {1024, 16384, &MPIR_Scatter_MV2_Direct},
+        {16384, 65536, &MPIR_Scatter_MV2_Direct},
+        {65536, -1, &MPIR_Scatter_MV2_Direct},
+      },
+    }, 
+        };
+        table_ptrs[2] = mv2_tmp_scatter_thresholds_table_16ppn;
+        agg_table_sum = 0;
+        for (i = 0; i < mv2_scatter_num_ppn_conf; i++) {
+    agg_table_sum += mv2_size_scatter_tuning_table[i];
+        }
+        mv2_scatter_thresholds_table[0] =
+    xbt_malloc(agg_table_sum * sizeof (mv2_scatter_tuning_table));
+        memcpy(mv2_scatter_thresholds_table[0], table_ptrs[0],
+        (sizeof(mv2_scatter_tuning_table)
+                     * mv2_size_scatter_tuning_table[0]));
+        for (i = 1; i < mv2_scatter_num_ppn_conf; i++) {
+    mv2_scatter_thresholds_table[i] =
+            mv2_scatter_thresholds_table[i - 1]
+            + mv2_size_scatter_tuning_table[i - 1];
+    memcpy(mv2_scatter_thresholds_table[i], table_ptrs[i],
+                      (sizeof(mv2_scatter_tuning_table)
+                       * mv2_size_scatter_tuning_table[i]));
+        }
+        xbt_free(table_ptrs);
+   }
+}
+