Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
New collectives for mvapich2 selector : allgatherv, allreduce, alltoallv, barrier
authorAugustin Degomme <augustin.degomme@imag.fr>
Wed, 23 Jul 2014 15:35:52 +0000 (17:35 +0200)
committerAugustin Degomme <augustin.degomme@imag.fr>
Thu, 24 Jul 2014 13:23:02 +0000 (15:23 +0200)
buildtools/Cmake/AddTests.cmake
buildtools/Cmake/DefinePackages.cmake
src/smpi/colls/barrier-mvapich2-pair.c [new file with mode: 0644]
src/smpi/colls/colls.h
src/smpi/colls/smpi_mvapich2_selector.c
src/smpi/colls/smpi_mvapich2_selector_stampede.h

index 5991eb7..bb58212 100644 (file)
@@ -378,12 +378,12 @@ IF(NOT enable_memcheck)
                             smp_simple spreading_simple ompi mpich ompi_neighborexchange mvapich2)
       ADD_TESH(tesh-smpi-allgather-coll-${ALLGATHER_COLL} --cfg smpi/allgather:${ALLGATHER_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/allgather --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/allgather allgather_coll.tesh)
     ENDFOREACH()
                             smp_simple spreading_simple ompi mpich ompi_neighborexchange mvapich2)
       ADD_TESH(tesh-smpi-allgather-coll-${ALLGATHER_COLL} --cfg smpi/allgather:${ALLGATHER_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/allgather --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/allgather allgather_coll.tesh)
     ENDFOREACH()
-    FOREACH (ALLGATHERV_COLL default GB pair ring ompi mpich ompi_neighborexchange ompi_bruck mpich_rdb mpich_ring)
+    FOREACH (ALLGATHERV_COLL default GB pair ring ompi mpich ompi_neighborexchange ompi_bruck mpich_rdb mpich_ring mvapich2)
       ADD_TESH(tesh-smpi-allgatherv-coll-${ALLGATHERV_COLL} --cfg smpi/allgatherv:${ALLGATHERV_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/allgatherv --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/allgatherv allgatherv_coll.tesh)
     ENDFOREACH()
     FOREACH (ALLREDUCE_COLL default lr rab1 rab2 rab_rdb
                             rdb smp_binomial smp_binomial_pipeline
       ADD_TESH(tesh-smpi-allgatherv-coll-${ALLGATHERV_COLL} --cfg smpi/allgatherv:${ALLGATHERV_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/allgatherv --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/allgatherv allgatherv_coll.tesh)
     ENDFOREACH()
     FOREACH (ALLREDUCE_COLL default lr rab1 rab2 rab_rdb
                             rdb smp_binomial smp_binomial_pipeline
-                            smp_rdb smp_rsag smp_rsag_lr smp_rsag_rab redbcast ompi mpich ompi_ring_segmented)
+                            smp_rdb smp_rsag smp_rsag_lr smp_rsag_rab redbcast ompi mpich ompi_ring_segmented mvapich2)
       ADD_TESH(tesh-smpi-allreduce-coll-${ALLREDUCE_COLL} --cfg smpi/allreduce:${ALLREDUCE_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/allreduce --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/allreduce allreduce_coll.tesh)
     ENDFOREACH()
     FOREACH (ALLREDUCE_COLL_LARGE ompi_ring_segmented)
       ADD_TESH(tesh-smpi-allreduce-coll-${ALLREDUCE_COLL} --cfg smpi/allreduce:${ALLREDUCE_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/allreduce --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/allreduce allreduce_coll.tesh)
     ENDFOREACH()
     FOREACH (ALLREDUCE_COLL_LARGE ompi_ring_segmented)
@@ -397,7 +397,7 @@ IF(NOT enable_memcheck)
     ENDFOREACH()
     FOREACH (ALLTOALLV_COLL default pair pair_light_barrier pair_mpi_barrier
                             pair_one_barrier  ring ring_light_barrier
     ENDFOREACH()
     FOREACH (ALLTOALLV_COLL default pair pair_light_barrier pair_mpi_barrier
                             pair_one_barrier  ring ring_light_barrier
-                            ring_mpi_barrier ring_one_barrier bruck ompi mpich ompi_basic_linear)
+                            ring_mpi_barrier ring_one_barrier bruck ompi mpich mvapich2 ompi_basic_linear)
       ADD_TESH(tesh-smpi-alltoallv-coll-${ALLTOALLV_COLL} --cfg smpi/alltoallv:${ALLTOALLV_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/alltoallv --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/alltoallv alltoallv_coll.tesh)
     ENDFOREACH()
     FOREACH (BCAST_COLL default arrival_pattern_aware arrival_pattern_aware_wait arrival_scatter
       ADD_TESH(tesh-smpi-alltoallv-coll-${ALLTOALLV_COLL} --cfg smpi/alltoallv:${ALLTOALLV_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/alltoallv --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/alltoallv alltoallv_coll.tesh)
     ENDFOREACH()
     FOREACH (BCAST_COLL default arrival_pattern_aware arrival_pattern_aware_wait arrival_scatter
@@ -414,7 +414,7 @@ IF(NOT enable_memcheck)
     FOREACH (SCATTER_COLL default  ompi mpich ompi_basic_linear ompi_binomial)
       ADD_TESH(tesh-smpi-scatter-coll-${SCATTER_COLL} --cfg smpi/scatter:${SCATTER_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/scatter --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/scatter scatter_coll.tesh)
     ENDFOREACH()
     FOREACH (SCATTER_COLL default  ompi mpich ompi_basic_linear ompi_binomial)
       ADD_TESH(tesh-smpi-scatter-coll-${SCATTER_COLL} --cfg smpi/scatter:${SCATTER_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/scatter --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/scatter scatter_coll.tesh)
     ENDFOREACH()
-    FOREACH (BARRIER_COLL default  ompi mpich ompi_basic_linear ompi_tree ompi_bruck ompi_recursivedoubling ompi_doublering)
+    FOREACH (BARRIER_COLL default  ompi mpich ompi_basic_linear ompi_tree ompi_bruck ompi_recursivedoubling ompi_doublering mvapich2_pair mvapich2)
       ADD_TESH(tesh-smpi-barrier-coll-${BARRIER_COLL} --cfg smpi/barrier:${BARRIER_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/barrier --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/barrier barrier_coll.tesh)
     ENDFOREACH()
     # END TESH TESTS
       ADD_TESH(tesh-smpi-barrier-coll-${BARRIER_COLL} --cfg smpi/barrier:${BARRIER_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/barrier --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/barrier barrier_coll.tesh)
     ENDFOREACH()
     # END TESH TESTS
index c9dfc7e..c5fa83e 100644 (file)
@@ -190,6 +190,7 @@ set(SMPI_SRC
   src/smpi/colls/alltoallv-ring-one-barrier.c
   src/smpi/colls/alltoallv-ring.c
   src/smpi/colls/barrier-ompi.c
   src/smpi/colls/alltoallv-ring-one-barrier.c
   src/smpi/colls/alltoallv-ring.c
   src/smpi/colls/barrier-ompi.c
+  src/smpi/colls/barrier-mvapich2-pair.c
   src/smpi/colls/bcast-NTSB.c
   src/smpi/colls/bcast-NTSL-Isend.c
   src/smpi/colls/bcast-NTSL.c
   src/smpi/colls/bcast-NTSB.c
   src/smpi/colls/bcast-NTSL-Isend.c
   src/smpi/colls/bcast-NTSL.c
diff --git a/src/smpi/colls/barrier-mvapich2-pair.c b/src/smpi/colls/barrier-mvapich2-pair.c
new file mode 100644 (file)
index 0000000..2ef6ad1
--- /dev/null
@@ -0,0 +1,98 @@
+/* Copyright (c) 2013-2014. The SimGrid Team.
+ * All rights reserved.                                                     */
+
+/* This program is free software; you can redistribute it and/or modify it
+ * under the terms of the license (GNU LGPL) which comes with this package. */
+
+/*
+ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
+ *                         University Research and Technology
+ *                         Corporation.  All rights reserved.
+ * Copyright (c) 2004-2006 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
+ *                         University of Stuttgart.  All rights reserved.
+ * Copyright (c) 2004-2005 The Regents of the University of California.
+ *                         All rights reserved.
+ * Copyright (c) 2008      Sun Microsystems, Inc.  All rights reserved.
+ *
+ * Additional copyrights may follow
+ */
+ /* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ *
+ *  (C) 2001 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+
+/* Copyright (c) 2001-2014, The Ohio State University. All rights
+ * reserved.
+ *
+ * This file is part of the MVAPICH2 software package developed by the
+ * team members of The Ohio State University's Network-Based Computing
+ * Laboratory (NBCL), headed by Professor Dhabaleswar K. (DK) Panda.
+ *
+ * For detailed copyright and licensing information, please refer to the
+ * copyright file COPYRIGHT in the top level MVAPICH2 directory.
+ *
+ */
+
+#include "colls_private.h"
+#include "coll_tuned_topo.h"
+
+int smpi_coll_tuned_barrier_mvapich2_pair(MPI_Comm comm)
+{
+
+    int size, rank;
+    int d, dst, src;
+    int mpi_errno = MPI_SUCCESS;
+
+    size = smpi_comm_size(comm);
+    /* Trivial barriers return immediately */
+    if (size == 1)
+        return MPI_SUCCESS;
+
+    rank =  smpi_comm_rank(comm);
+    int N2_prev = 1;
+    /*  N2_prev = greatest power of two < size of Comm  */
+    for( N2_prev = 1; N2_prev <= size; N2_prev <<= 1 );
+    N2_prev >>= 1;
+    
+    int surfeit = size - N2_prev;
+
+    /* Perform a combine-like operation */
+    if (rank < N2_prev) {
+        if (rank < surfeit) {
+            /* get the fanin letter from the upper "half" process: */
+            dst = N2_prev + rank;
+            smpi_mpi_recv(NULL, 0, MPI_BYTE, dst, COLL_TAG_BARRIER,
+                                     comm, MPI_STATUS_IGNORE);
+        }
+
+        /* combine on embedded N2_prev power-of-two processes */
+        for (d = 1; d < N2_prev; d <<= 1) {
+            dst = (rank ^ d);
+            smpi_mpi_sendrecv(NULL, 0, MPI_BYTE, dst, COLL_TAG_BARRIER, NULL,
+                                 0, MPI_BYTE, dst, COLL_TAG_BARRIER, comm,
+                                 MPI_STATUS_IGNORE);
+        }
+
+        /* fanout data to nodes above N2_prev... */
+        if (rank < surfeit) {
+            dst = N2_prev + rank;
+            smpi_mpi_send(NULL, 0, MPI_BYTE, dst, COLL_TAG_BARRIER,
+                                     comm);
+        }
+    } else {
+        /* fanin data to power of 2 subset */
+        src = rank - N2_prev;
+        smpi_mpi_sendrecv(NULL, 0, MPI_BYTE, src, COLL_TAG_BARRIER,
+                                     NULL, 0, MPI_BYTE, src, COLL_TAG_BARRIER,
+                                     comm, MPI_STATUS_IGNORE);
+    }
+
+    return mpi_errno;
+
+}
index 0207d31..1fbd98b 100644 (file)
@@ -98,6 +98,7 @@ COLL_APPLY(action, COLL_ALLGATHERV_SIG, ompi_bruck) COLL_sep \
 COLL_APPLY(action, COLL_ALLGATHERV_SIG, mpich) COLL_sep \
 COLL_APPLY(action, COLL_ALLGATHERV_SIG, mpich_rdb) COLL_sep \
 COLL_APPLY(action, COLL_ALLGATHERV_SIG, mpich_ring) COLL_sep \
 COLL_APPLY(action, COLL_ALLGATHERV_SIG, mpich) COLL_sep \
 COLL_APPLY(action, COLL_ALLGATHERV_SIG, mpich_rdb) COLL_sep \
 COLL_APPLY(action, COLL_ALLGATHERV_SIG, mpich_ring) COLL_sep \
+COLL_APPLY(action, COLL_ALLGATHERV_SIG, mvapich2) COLL_sep \
 COLL_APPLY(action, COLL_ALLGATHERV_SIG, automatic)
 
 COLL_ALLGATHERVS(COLL_PROTO, COLL_NOsep)
 COLL_APPLY(action, COLL_ALLGATHERV_SIG, automatic)
 
 COLL_ALLGATHERVS(COLL_PROTO, COLL_NOsep)
@@ -125,6 +126,7 @@ COLL_APPLY(action, COLL_ALLREDUCE_SIG, redbcast) COLL_sep \
 COLL_APPLY(action, COLL_ALLREDUCE_SIG, ompi) COLL_sep \
 COLL_APPLY(action, COLL_ALLREDUCE_SIG, ompi_ring_segmented) COLL_sep \
 COLL_APPLY(action, COLL_ALLREDUCE_SIG, mpich) COLL_sep \
 COLL_APPLY(action, COLL_ALLREDUCE_SIG, ompi) COLL_sep \
 COLL_APPLY(action, COLL_ALLREDUCE_SIG, ompi_ring_segmented) COLL_sep \
 COLL_APPLY(action, COLL_ALLREDUCE_SIG, mpich) COLL_sep \
+COLL_APPLY(action, COLL_ALLREDUCE_SIG, mvapich2) COLL_sep \
 COLL_APPLY(action, COLL_ALLREDUCE_SIG, automatic)
 
 COLL_ALLREDUCES(COLL_PROTO, COLL_NOsep)
 COLL_APPLY(action, COLL_ALLREDUCE_SIG, automatic)
 
 COLL_ALLREDUCES(COLL_PROTO, COLL_NOsep)
@@ -179,6 +181,7 @@ COLL_APPLY(action, COLL_ALLTOALLV_SIG, ring_one_barrier) COLL_sep \
 COLL_APPLY(action, COLL_ALLTOALLV_SIG, ompi) COLL_sep \
 COLL_APPLY(action, COLL_ALLTOALLV_SIG, mpich) COLL_sep \
 COLL_APPLY(action, COLL_ALLTOALLV_SIG, ompi_basic_linear) COLL_sep \
 COLL_APPLY(action, COLL_ALLTOALLV_SIG, ompi) COLL_sep \
 COLL_APPLY(action, COLL_ALLTOALLV_SIG, mpich) COLL_sep \
 COLL_APPLY(action, COLL_ALLTOALLV_SIG, ompi_basic_linear) COLL_sep \
+COLL_APPLY(action, COLL_ALLTOALLV_SIG, mvapich2) COLL_sep \
 COLL_APPLY(action, COLL_ALLTOALLV_SIG, automatic)
 
 COLL_ALLTOALLVS(COLL_PROTO, COLL_NOsep)
 COLL_APPLY(action, COLL_ALLTOALLV_SIG, automatic)
 
 COLL_ALLTOALLVS(COLL_PROTO, COLL_NOsep)
@@ -293,6 +296,8 @@ COLL_APPLY(action, COLL_BARRIER_SIG, ompi_bruck)  COLL_sep \
 COLL_APPLY(action, COLL_BARRIER_SIG, ompi_recursivedoubling) COLL_sep \
 COLL_APPLY(action, COLL_BARRIER_SIG, ompi_doublering) COLL_sep \
 COLL_APPLY(action, COLL_BARRIER_SIG, mpich)   COLL_sep \
 COLL_APPLY(action, COLL_BARRIER_SIG, ompi_recursivedoubling) COLL_sep \
 COLL_APPLY(action, COLL_BARRIER_SIG, ompi_doublering) COLL_sep \
 COLL_APPLY(action, COLL_BARRIER_SIG, mpich)   COLL_sep \
+COLL_APPLY(action, COLL_BARRIER_SIG, mvapich2_pair)   COLL_sep \
+COLL_APPLY(action, COLL_BARRIER_SIG, mvapich2)   COLL_sep \
 COLL_APPLY(action, COLL_BARRIER_SIG, automatic)
 
 COLL_BARRIERS(COLL_PROTO, COLL_NOsep)
 COLL_APPLY(action, COLL_BARRIER_SIG, automatic)
 
 COLL_BARRIERS(COLL_PROTO, COLL_NOsep)
index 72d40cd..1442a5c 100644 (file)
@@ -703,8 +703,8 @@ int smpi_coll_tuned_gather_mvapich2(void *sendbuf,
                     MPI_Datatype recvtype,
                     int root, MPI_Comm  comm)
 {
                     MPI_Datatype recvtype,
                     int root, MPI_Comm  comm)
 {
-    if(mv2_alltoall_table_ppn_conf==NULL)
-        init_mv2_alltoall_tables_stampede();
+    if(mv2_gather_thresholds_table==NULL)
+        init_mv2_gather_tables_stampede();
         
     int mpi_errno = MPI_SUCCESS;
     int range = 0;
         
     int mpi_errno = MPI_SUCCESS;
     int range = 0;
@@ -773,3 +773,422 @@ int smpi_coll_tuned_gather_mvapich2(void *sendbuf,
     return mpi_errno;
 }
 
     return mpi_errno;
 }
 
+
+
+static void init_mv2_allgatherv_tables_stampede(){
+ mv2_size_allgatherv_tuning_table = 6;
+ mv2_allgatherv_thresholds_table = malloc(mv2_size_allgatherv_tuning_table *
+                                                  sizeof (mv2_allgatherv_tuning_table));
+        mv2_allgatherv_tuning_table mv2_tmp_allgatherv_thresholds_table[] = {
+            {
+                16,
+                2,
+                {
+                    {0, 512, &MPIR_Allgatherv_Rec_Doubling_MV2},
+                    {512, -1, &MPIR_Allgatherv_Ring_MV2},
+                },
+            },
+            {
+                32,
+                2,
+                {
+                    {0, 512, &MPIR_Allgatherv_Rec_Doubling_MV2},
+                    {512, -1, &MPIR_Allgatherv_Ring_MV2},
+                },
+            },
+            {
+                64,
+                2,
+                {
+                    {0, 256, &MPIR_Allgatherv_Rec_Doubling_MV2},
+                    {256, -1, &MPIR_Allgatherv_Ring_MV2},
+                },
+            },
+            {
+                128,
+                2,
+                {
+                    {0, 256, &MPIR_Allgatherv_Rec_Doubling_MV2},
+                    {256, -1, &MPIR_Allgatherv_Ring_MV2},
+                },
+            },
+            {
+                256,
+                2,
+                {
+                    {0, 256, &MPIR_Allgatherv_Rec_Doubling_MV2},
+                    {256, -1, &MPIR_Allgatherv_Ring_MV2},
+                },
+            },
+            {
+                512,
+                2,
+                {
+                    {0, 256, &MPIR_Allgatherv_Rec_Doubling_MV2},
+                    {256, -1, &MPIR_Allgatherv_Ring_MV2},
+                },
+            },
+
+        }; 
+        memcpy(mv2_allgatherv_thresholds_table, mv2_tmp_allgatherv_thresholds_table,
+                  mv2_size_allgatherv_tuning_table * sizeof (mv2_allgatherv_tuning_table));
+}
+
+
+
+
+
+
+
+int smpi_coll_tuned_allgatherv_mvapich2(void *sendbuf, int sendcount, MPI_Datatype sendtype,
+                        void *recvbuf, int *recvcounts, int *displs,
+                        MPI_Datatype recvtype, MPI_Comm  comm )
+{
+    int mpi_errno = MPI_SUCCESS;
+    int range = 0, comm_size, total_count, recvtype_size, i;
+    int range_threshold = 0;
+    int nbytes = 0;
+
+    if(mv2_allgatherv_thresholds_table==NULL)
+        init_mv2_allgatherv_tables_stampede();
+        
+    comm_size = smpi_comm_size(comm);
+    total_count = 0;
+    for (i = 0; i < comm_size; i++)
+        total_count += recvcounts[i];
+
+    recvtype_size=smpi_datatype_size(recvtype);
+    nbytes = total_count * recvtype_size;
+
+    /* Search for the corresponding system size inside the tuning table */
+    while ((range < (mv2_size_allgatherv_tuning_table - 1)) &&
+           (comm_size > mv2_allgatherv_thresholds_table[range].numproc)) {
+        range++;
+    }
+    /* Search for corresponding inter-leader function */
+    while ((range_threshold < (mv2_allgatherv_thresholds_table[range].size_inter_table - 1))
+           && (nbytes >
+               comm_size * mv2_allgatherv_thresholds_table[range].inter_leader[range_threshold].max)
+           && (mv2_allgatherv_thresholds_table[range].inter_leader[range_threshold].max !=
+               -1)) {
+        range_threshold++;
+    }
+    /* Set inter-leader pt */
+    MV2_Allgatherv_function =
+                          mv2_allgatherv_thresholds_table[range].inter_leader[range_threshold].
+                          MV2_pt_Allgatherv_function;
+
+    if (MV2_Allgatherv_function == &MPIR_Allgatherv_Rec_Doubling_MV2)
+    {
+        if(!(comm_size & (comm_size - 1)))
+        {
+            mpi_errno =
+                MPIR_Allgatherv_Rec_Doubling_MV2(sendbuf, sendcount,
+                                                 sendtype, recvbuf,
+                                                 recvcounts, displs,
+                                                 recvtype, comm);
+        } else {
+            mpi_errno =
+                MPIR_Allgatherv_Bruck_MV2(sendbuf, sendcount,
+                                          sendtype, recvbuf,
+                                          recvcounts, displs,
+                                          recvtype, comm);
+        }
+    } else {
+        mpi_errno =
+            MV2_Allgatherv_function(sendbuf, sendcount, sendtype,
+                                    recvbuf, recvcounts, displs,
+                                    recvtype, comm);
+    }
+
+    return mpi_errno;
+}
+
+
+static void init_mv2_allreduce_tables_stampede(){
+mv2_size_allreduce_tuning_table = 8;
+      mv2_allreduce_thresholds_table = malloc(mv2_size_allreduce_tuning_table *
+                                                  sizeof (mv2_allreduce_tuning_table));
+      mv2_allreduce_tuning_table mv2_tmp_allreduce_thresholds_table[] = {
+       {
+         16,
+         0,
+         {1, 0},
+         2,
+         {
+           {0, 1024, &MPIR_Allreduce_pt2pt_rd_MV2},
+           {1024, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
+         },
+         2,
+         {
+           {0, 1024, &MPIR_Allreduce_reduce_shmem_MV2},
+           {1024, -1, &MPIR_Allreduce_reduce_p2p_MV2},
+         },
+       },
+       {
+         32,
+         0,
+         {1, 1, 0},
+         3,
+         {
+           {0, 1024, &MPIR_Allreduce_pt2pt_rd_MV2},
+           {1024, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
+           {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
+         },
+         2,
+         {
+           {0, 1024, &MPIR_Allreduce_reduce_shmem_MV2},
+           {1024, 16384, &MPIR_Allreduce_reduce_p2p_MV2},
+         },
+       },
+       {
+         64,
+         0,
+         {1, 1, 0},
+         3,
+         {
+           {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
+           {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
+           {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
+         },
+         2,
+         {
+           {0, 512, &MPIR_Allreduce_reduce_shmem_MV2},
+           {512, 16384, &MPIR_Allreduce_reduce_p2p_MV2},
+         },
+       },
+       {
+         128,
+         0,
+         {1, 1, 0},
+         3,
+         {
+           {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
+           {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
+           {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
+         },
+         2,
+         {
+           {0, 512, &MPIR_Allreduce_reduce_shmem_MV2},
+           {512, 16384, &MPIR_Allreduce_reduce_p2p_MV2},
+         },
+       },
+       {
+         256,
+         0,
+         {1, 1, 0},
+         3,
+         {
+           {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
+           {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
+           {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
+         },
+         2,
+         {
+           {0, 512, &MPIR_Allreduce_reduce_shmem_MV2},
+           {512, -1, &MPIR_Allreduce_reduce_p2p_MV2},
+         },
+       },
+       {
+         512,
+         0,
+         {1, 1, 0},
+         3,
+         {
+           {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
+           {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2},
+           {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
+         },
+         2,
+         {
+           {0, 512, &MPIR_Allreduce_reduce_shmem_MV2},
+           {512, 16384, &MPIR_Allreduce_reduce_p2p_MV2},
+         },
+       },
+       {
+         1024,
+         0,
+         {1, 1, 1, 0},
+         4,
+         {
+           {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2},
+           {512, 8192, &MPIR_Allreduce_pt2pt_rd_MV2},
+           {8192, 65536, &MPIR_Allreduce_pt2pt_rs_MV2},
+           {65536, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
+         },
+         2,
+         {
+           {0, 512, &MPIR_Allreduce_reduce_shmem_MV2},
+           {512, -1, &MPIR_Allreduce_reduce_p2p_MV2},
+         },
+       },
+       {
+         2048,
+         0,
+         {1, 1, 1, 0},
+         4,
+         {
+           {0, 64, &MPIR_Allreduce_pt2pt_rd_MV2},
+           {64, 512, &MPIR_Allreduce_reduce_p2p_MV2},
+           {512, 4096, &MPIR_Allreduce_mcst_reduce_two_level_helper_MV2},
+           {4096, 16384, &MPIR_Allreduce_pt2pt_rs_MV2},
+           {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2},
+         },
+         2,
+         {
+           {0, 512, &MPIR_Allreduce_reduce_shmem_MV2},
+           {512, -1, &MPIR_Allreduce_reduce_p2p_MV2},
+         },
+       },
+      }; 
+      memcpy(mv2_allreduce_thresholds_table, mv2_tmp_allreduce_thresholds_table,
+                 mv2_size_allreduce_tuning_table * sizeof (mv2_allreduce_tuning_table));
+}
+
+
+int smpi_coll_tuned_allreduce_mvapich2(void *sendbuf,
+                       void *recvbuf,
+                       int count,
+                       MPI_Datatype datatype,
+                       MPI_Op op, MPI_Comm comm)
+{
+
+    int mpi_errno = MPI_SUCCESS;
+    //int rank = 0, 
+    int comm_size = 0;
+   
+    comm_size = smpi_comm_size(comm);
+    //rank = smpi_comm_rank(comm);
+
+    if (count == 0) {
+        return MPI_SUCCESS;
+    }
+
+  if (mv2_allreduce_thresholds_table == NULL)
+    init_mv2_allreduce_tables_stampede();
+
+    /* check if multiple threads are calling this collective function */
+
+    MPI_Aint sendtype_size = 0;
+    int nbytes = 0;
+    int range = 0, range_threshold = 0, range_threshold_intra = 0;
+    int is_two_level = 0;
+    //int is_commutative = 0;
+    MPI_Aint true_lb, true_extent;
+
+    sendtype_size=smpi_datatype_size(datatype);
+    nbytes = count * sendtype_size;
+
+    smpi_datatype_extent(datatype, &true_lb, &true_extent);
+    //MPI_Op *op_ptr;
+    //is_commutative = smpi_op_is_commute(op);
+
+    {
+        /* Search for the corresponding system size inside the tuning table */
+        while ((range < (mv2_size_allreduce_tuning_table - 1)) &&
+               (comm_size > mv2_allreduce_thresholds_table[range].numproc)) {
+            range++;
+        }
+        /* Search for corresponding inter-leader function */
+        /* skip mcast poiters if mcast is not available */
+        if(mv2_allreduce_thresholds_table[range].mcast_enabled != 1){
+            while ((range_threshold < (mv2_allreduce_thresholds_table[range].size_inter_table - 1)) 
+                    && ((mv2_allreduce_thresholds_table[range].
+                    inter_leader[range_threshold].MV2_pt_Allreduce_function 
+                    == &MPIR_Allreduce_mcst_reduce_redscat_gather_MV2) ||
+                    (mv2_allreduce_thresholds_table[range].
+                    inter_leader[range_threshold].MV2_pt_Allreduce_function
+                    == &MPIR_Allreduce_mcst_reduce_two_level_helper_MV2)
+                    )) {
+                    range_threshold++;
+            }
+        }
+        while ((range_threshold < (mv2_allreduce_thresholds_table[range].size_inter_table - 1))
+               && (nbytes >
+               mv2_allreduce_thresholds_table[range].inter_leader[range_threshold].max)
+               && (mv2_allreduce_thresholds_table[range].inter_leader[range_threshold].max != -1)) {
+               range_threshold++;
+        }
+        if(mv2_allreduce_thresholds_table[range].is_two_level_allreduce[range_threshold] == 1){
+               is_two_level = 1;    
+        }
+        /* Search for corresponding intra-node function */
+        while ((range_threshold_intra <
+               (mv2_allreduce_thresholds_table[range].size_intra_table - 1))
+                && (nbytes >
+                mv2_allreduce_thresholds_table[range].intra_node[range_threshold_intra].max)
+                && (mv2_allreduce_thresholds_table[range].intra_node[range_threshold_intra].max !=
+                -1)) {
+                range_threshold_intra++;
+        }
+
+        MV2_Allreduce_function = mv2_allreduce_thresholds_table[range].inter_leader[range_threshold]
+                                .MV2_pt_Allreduce_function;
+
+        MV2_Allreduce_intra_function = mv2_allreduce_thresholds_table[range].intra_node[range_threshold_intra]
+                                .MV2_pt_Allreduce_function;
+
+        /* check if mcast is ready, otherwise replace mcast with other algorithm */
+        if((MV2_Allreduce_function == &MPIR_Allreduce_mcst_reduce_redscat_gather_MV2)||
+          (MV2_Allreduce_function == &MPIR_Allreduce_mcst_reduce_two_level_helper_MV2)){
+            {
+                MV2_Allreduce_function = &MPIR_Allreduce_pt2pt_rd_MV2;
+            }
+            if(is_two_level != 1) {
+                MV2_Allreduce_function = &MPIR_Allreduce_pt2pt_rd_MV2;
+            }
+        } 
+
+        if(is_two_level == 1){
+                // check if shm is ready, if not use other algorithm first 
+                /*if ((comm->ch.shmem_coll_ok == 1)
+                    && (mv2_enable_shmem_allreduce)
+                    && (is_commutative)
+                    && (mv2_enable_shmem_collectives)) {
+                    mpi_errno = MPIR_Allreduce_two_level_MV2(sendbuf, recvbuf, count,
+                                                     datatype, op, comm);
+                } else {*/
+                    mpi_errno = MPIR_Allreduce_pt2pt_rd_MV2(sendbuf, recvbuf, count,
+                                                     datatype, op, comm);
+               // }
+        } else { 
+            mpi_errno = MV2_Allreduce_function(sendbuf, recvbuf, count,
+                                           datatype, op, comm);
+        }
+    } 
+
+       //comm->ch.intra_node_done=0;
+       
+    return (mpi_errno);
+
+
+}
+
+
+int smpi_coll_tuned_alltoallv_mvapich2(void *sbuf, int *scounts, int *sdisps,
+                                              MPI_Datatype sdtype,
+                                              void *rbuf, int *rcounts, int *rdisps,
+                                              MPI_Datatype rdtype,
+                                              MPI_Comm  comm
+                                              )
+{
+
+if (sbuf == MPI_IN_PLACE) {
+    return smpi_coll_tuned_alltoallv_ompi_basic_linear(sbuf, scounts, sdisps, sdtype, 
+                                                        rbuf, rcounts, rdisps,rdtype,
+                                                        comm);
+ } else     /* For starters, just keep the original algorithm. */
+    return smpi_coll_tuned_alltoallv_pair(sbuf, scounts, sdisps, sdtype, 
+                                                        rbuf, rcounts, rdisps,rdtype,
+                                                        comm);
+}
+
+
+int smpi_coll_tuned_barrier_mvapich2(MPI_Comm  comm)
+{   
+    return smpi_coll_tuned_barrier_mvapich2_pair(comm);
+}
+
+
+
index b0f6136..1c29af6 100644 (file)
@@ -140,3 +140,131 @@ MV2_Gather_function_ptr MV2_Gather_intra_node_function = NULL;
 #define MPIR_Gather_MV2_Direct smpi_coll_tuned_gather_ompi_basic_linear
 #define MPIR_Gather_MV2_two_level_Direct smpi_coll_tuned_gather_ompi_basic_linear
 #define MPIR_Gather_intra smpi_coll_tuned_gather_mpich
 #define MPIR_Gather_MV2_Direct smpi_coll_tuned_gather_ompi_basic_linear
 #define MPIR_Gather_MV2_two_level_Direct smpi_coll_tuned_gather_ompi_basic_linear
 #define MPIR_Gather_intra smpi_coll_tuned_gather_mpich
+
+
+
+typedef struct {
+    int min;
+    int max;
+    int (*MV2_pt_Allgatherv_function)(void *sendbuf,
+                                      int sendcount,
+                                      MPI_Datatype sendtype,
+                                      void *recvbuf,
+                                      int *recvcounts,
+                                      int *displs,
+                                      MPI_Datatype recvtype,
+                                      MPI_Comm commg);
+} mv2_allgatherv_tuning_element;
+
+typedef struct {
+    int numproc; 
+    int size_inter_table;
+    mv2_allgatherv_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
+} mv2_allgatherv_tuning_table;
+
+extern int mv2_size_allgatherv_tuning_table;
+extern mv2_allgatherv_tuning_table *mv2_allgatherv_thresholds_table;
+
+int (*MV2_Allgatherv_function)(void *sendbuf,
+                               int sendcount,
+                               MPI_Datatype sendtype,
+                               void *recvbuf,
+                               int *recvcounts,
+                               int *displs,
+                               MPI_Datatype recvtype,
+                               MPI_Comm comm);
+                               
+int mv2_size_allgatherv_tuning_table = 0;
+mv2_allgatherv_tuning_table *mv2_allgatherv_thresholds_table = NULL;
+
+#define MPIR_Allgatherv_Rec_Doubling_MV2 smpi_coll_tuned_allgatherv_mpich_rdb
+#define MPIR_Allgatherv_Bruck_MV2 smpi_coll_tuned_allgatherv_ompi_bruck
+#define MPIR_Allgatherv_Ring_MV2 smpi_coll_tuned_allgatherv_mpich_ring
+
+
+typedef struct {
+    int min;
+    int max;
+    int (*MV2_pt_Allreduce_function)(void *sendbuf,
+                                   void *recvbuf,
+                                   int count,
+                                   MPI_Datatype datatype,
+                                   MPI_Op op, MPI_Comm comm);
+} mv2_allreduce_tuning_element;
+
+typedef struct {
+    int numproc; 
+    int mcast_enabled;  
+    int is_two_level_allreduce[MV2_MAX_NB_THRESHOLDS];   
+    int size_inter_table;
+    mv2_allreduce_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS];
+    int size_intra_table;
+    mv2_allreduce_tuning_element intra_node[MV2_MAX_NB_THRESHOLDS];
+} mv2_allreduce_tuning_table;
+
+extern int mv2_size_allreduce_tuning_table;
+extern mv2_allreduce_tuning_table *mv2_allreduce_thresholds_table;
+extern int mv2_use_old_allreduce;
+
+
+int (*MV2_Allreduce_function)(void *sendbuf,
+                             void *recvbuf,
+                             int count,
+                             MPI_Datatype datatype,
+                             MPI_Op op, MPI_Comm comm)=NULL;
+
+
+int (*MV2_Allreduce_intra_function)( void *sendbuf,
+                             void *recvbuf,
+                             int count,
+                             MPI_Datatype datatype,
+                             MPI_Op op, MPI_Comm comm)=NULL;
+
+int mv2_size_allreduce_tuning_table = 0;
+mv2_allreduce_tuning_table *mv2_allreduce_thresholds_table = NULL;
+
+
+
+
+
+static int MPIR_Allreduce_mcst_reduce_two_level_helper_MV2( void *sendbuf,
+                             void *recvbuf,
+                             int count,
+                             MPI_Datatype datatype,
+                             MPI_Op op, MPI_Comm comm)
+{ 
+    return 0;
+}
+
+static  int MPIR_Allreduce_mcst_reduce_redscat_gather_MV2( void *sendbuf,
+                             void *recvbuf,
+                             int count,
+                             MPI_Datatype datatype,
+                             MPI_Op op, MPI_Comm  comm)
+{
+    return 0;
+}
+
+static  int MPIR_Allreduce_reduce_p2p_MV2( void *sendbuf,
+                             void *recvbuf,
+                             int count,
+                             MPI_Datatype datatype,
+                             MPI_Op op, MPI_Comm  comm)
+{
+    mpi_coll_reduce_fun(sendbuf,recvbuf,count,datatype,op,0,comm);
+    return MPI_SUCCESS;
+}
+
+static  int MPIR_Allreduce_reduce_shmem_MV2( void *sendbuf,
+                             void *recvbuf,
+                             int count,
+                             MPI_Datatype datatype,
+                             MPI_Op op, MPI_Comm  comm)
+{
+    mpi_coll_reduce_fun(sendbuf,recvbuf,count,datatype,op,0,comm);
+    return MPI_SUCCESS;
+}
+
+#define MPIR_Allreduce_pt2pt_rd_MV2 smpi_coll_tuned_allreduce_rdb
+#define MPIR_Allreduce_pt2pt_rs_MV2 smpi_coll_tuned_allreduce_rab1
+