Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Add Scatter SMP collective from MVAPICH2
authorAugustin Degomme <augustin.degomme@imag.fr>
Fri, 1 Aug 2014 10:35:18 +0000 (12:35 +0200)
committerAugustin Degomme <augustin.degomme@imag.fr>
Fri, 1 Aug 2014 10:35:18 +0000 (12:35 +0200)
buildtools/Cmake/AddTests.cmake
buildtools/Cmake/DefinePackages.cmake
src/smpi/colls/allgather-mvapich-smp.c
src/smpi/colls/colls.h
src/smpi/colls/gather-mvapich.c
src/smpi/colls/scatter-mvapich-two-level.c [new file with mode: 0644]
src/smpi/colls/smpi_mvapich2_selector.c
src/smpi/colls/smpi_mvapich2_selector_stampede.h

index 41657bd..36408df 100644 (file)
@@ -412,7 +412,7 @@ IF(NOT enable_memcheck)
     FOREACH (REDUCE_SCATTER_COLL default  ompi mpich ompi_basic_recursivehalving ompi_ring mpich_noncomm mpich_pair mvapich2 mpich_rdb impi)
       ADD_TESH(tesh-smpi-reduce-scatter-coll-${REDUCE_SCATTER_COLL} --cfg smpi/reduce_scatter:${REDUCE_SCATTER_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/reduce --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/reduce reduce_scatter_coll.tesh)
     ENDFOREACH()
-    FOREACH (SCATTER_COLL default  ompi mpich ompi_basic_linear ompi_binomial mvapich2 impi)
+    FOREACH (SCATTER_COLL default  ompi mpich ompi_basic_linear ompi_binomial mvapich2 mvapich2_two_level_binomial mvapich2_two_level_direct impi)
       ADD_TESH(tesh-smpi-scatter-coll-${SCATTER_COLL} --cfg smpi/scatter:${SCATTER_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/scatter --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/scatter scatter_coll.tesh)
     ENDFOREACH()
     FOREACH (BARRIER_COLL default  ompi mpich ompi_basic_linear ompi_tree ompi_bruck ompi_recursivedoubling ompi_doublering mvapich2_pair mvapich2 impi)
index 4fd7eb8..0c18d61 100644 (file)
@@ -228,6 +228,7 @@ set(SMPI_SRC
   src/smpi/colls/reduce-mvapich-two-level.c
   src/smpi/colls/reduce-rab.c
   src/smpi/colls/scatter-ompi.c
+  src/smpi/colls/scatter-mvapich-two-level.c
   src/smpi/colls/smpi_automatic_selector.c
   src/smpi/colls/smpi_mpich_selector.c
   src/smpi/colls/smpi_intel_mpi_selector.c
index b988967..b57a990 100644 (file)
@@ -1,4 +1,40 @@
-#include "colls_private.h"
+/* Copyright (c) 2013-2014. The SimGrid Team.
+ * All rights reserved.                                                     */
+
+/* This program is free software; you can redistribute it and/or modify it
+ * under the terms of the license (GNU LGPL) which comes with this package. */
+
+/*
+ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
+ *                         University Research and Technology
+ *                         Corporation.  All rights reserved.
+ * Copyright (c) 2004-2009 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
+ *                         University of Stuttgart.  All rights reserved.
+ * Copyright (c) 2004-2005 The Regents of the University of California.
+ *                         All rights reserved.
+ *
+ * Additional copyrights may follow
+ */
+ /* -*- Mode: C; c-basic-offset:4 ; -*- */
+/* Copyright (c) 2001-2014, The Ohio State University. All rights
+ * reserved.
+ *
+ * This file is part of the MVAPICH2 software package developed by the
+ * team members of The Ohio State University's Network-Based Computing
+ * Laboratory (NBCL), headed by Professor Dhabaleswar K. (DK) Panda.
+ *
+ * For detailed copyright and licensing information, please refer to the
+ * copyright file COPYRIGHT in the top level MVAPICH2 directory.
+ */
+/*
+ *
+ *  (C) 2001 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+ #include "colls_private.h"
 
 
 
index 86e50b8..ee84cf5 100644 (file)
@@ -300,6 +300,8 @@ COLL_APPLY(action, COLL_SCATTER_SIG, ompi_basic_linear) COLL_sep \
 COLL_APPLY(action, COLL_SCATTER_SIG, ompi_binomial)  COLL_sep \
 COLL_APPLY(action, COLL_SCATTER_SIG, mpich)   COLL_sep \
 COLL_APPLY(action, COLL_SCATTER_SIG, mvapich2)   COLL_sep \
+COLL_APPLY(action, COLL_SCATTER_SIG, mvapich2_two_level_binomial)   COLL_sep \
+COLL_APPLY(action, COLL_SCATTER_SIG, mvapich2_two_level_direct)   COLL_sep \
 COLL_APPLY(action, COLL_SCATTER_SIG, impi)   COLL_sep \
 COLL_APPLY(action, COLL_SCATTER_SIG, automatic)
 
index 04cbcaf..c95e25a 100644 (file)
@@ -265,8 +265,8 @@ int smpi_coll_tuned_gather_mvapich2_two_level(void *sendbuf,
     }
     leader_comm = smpi_comm_get_leaders_comm(comm);
     int* leaders_map = smpi_comm_get_leaders_map(comm);
-    leader_of_root = leaders_map[root];
-    leader_root = smpi_group_rank(smpi_comm_group(leader_comm),leader_of_root);
+    leader_of_root = smpi_group_rank(smpi_comm_group(comm),leaders_map[root]);
+    leader_root = smpi_group_rank(smpi_comm_group(leader_comm),leaders_map[root]);
     /* leader_root is the rank of the leader of the root in leader_comm. 
      * leader_root is to be used as the root of the inter-leader gather ops 
      */
diff --git a/src/smpi/colls/scatter-mvapich-two-level.c b/src/smpi/colls/scatter-mvapich-two-level.c
new file mode 100644 (file)
index 0000000..705869a
--- /dev/null
@@ -0,0 +1,410 @@
+/* Copyright (c) 2013-2014. The SimGrid Team.
+ * All rights reserved.                                                     */
+
+/* This program is free software; you can redistribute it and/or modify it
+ * under the terms of the license (GNU LGPL) which comes with this package. */
+
+/*
+ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
+ *                         University Research and Technology
+ *                         Corporation.  All rights reserved.
+ * Copyright (c) 2004-2009 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
+ *                         University of Stuttgart.  All rights reserved.
+ * Copyright (c) 2004-2005 The Regents of the University of California.
+ *                         All rights reserved.
+ *
+ * Additional copyrights may follow
+ */
+ /* -*- Mode: C; c-basic-offset:4 ; -*- */
+/* Copyright (c) 2001-2014, The Ohio State University. All rights
+ * reserved.
+ *
+ * This file is part of the MVAPICH2 software package developed by the
+ * team members of The Ohio State University's Network-Based Computing
+ * Laboratory (NBCL), headed by Professor Dhabaleswar K. (DK) Panda.
+ *
+ * For detailed copyright and licensing information, please refer to the
+ * copyright file COPYRIGHT in the top level MVAPICH2 directory.
+ */
+/*
+ *
+ *  (C) 2001 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+#include "colls_private.h"
+
+#define MPIR_Scatter_MV2_Binomial smpi_coll_tuned_scatter_ompi_binomial
+#define MPIR_Scatter_MV2_Direct smpi_coll_tuned_scatter_ompi_basic_linear
+
+extern int (*MV2_Scatter_intra_function) (void *sendbuf, int sendcount, MPI_Datatype sendtype,
+    void *recvbuf, int recvcount, MPI_Datatype recvtype,
+    int root, MPI_Comm comm);
+
+int smpi_coll_tuned_scatter_mvapich2_two_level_direct(void *sendbuf,
+                                      int sendcnt,
+                                      MPI_Datatype sendtype,
+                                      void *recvbuf,
+                                      int recvcnt,
+                                      MPI_Datatype recvtype,
+                                      int root, MPI_Comm  comm)
+{
+    int comm_size, rank;
+    int local_rank, local_size;
+    int leader_comm_rank, leader_comm_size;
+    int mpi_errno = MPI_SUCCESS;
+    int recvtype_size, sendtype_size, nbytes;
+    void *tmp_buf = NULL;
+    void *leader_scatter_buf = NULL;
+    MPI_Status status;
+    int leader_root, leader_of_root = -1;
+    MPI_Comm shmem_comm, leader_comm;
+    //if not set (use of the algo directly, without mvapich2 selector)
+    if(MV2_Scatter_intra_function==NULL)
+      MV2_Scatter_intra_function=smpi_coll_tuned_scatter_mpich;
+    
+    if(smpi_comm_get_leaders_comm(comm)==MPI_COMM_NULL){
+      smpi_comm_init_smp(comm);
+    }
+    comm_size = smpi_comm_size(comm);
+    rank = smpi_comm_rank(comm);
+
+    if (((rank == root) && (recvcnt == 0))
+        || ((rank != root) && (sendcnt == 0))) {
+        return MPI_SUCCESS;
+    }
+
+    /* extract the rank,size information for the intra-node
+     * communicator */
+    shmem_comm = smpi_comm_get_intra_comm(comm);
+    local_rank = smpi_comm_rank(shmem_comm);
+    local_size = smpi_comm_size(shmem_comm);
+
+    if (local_rank == 0) {
+        /* Node leader. Extract the rank, size information for the leader
+         * communicator */
+        leader_comm = smpi_comm_get_leaders_comm(comm);
+        leader_comm_size = smpi_comm_size(leader_comm);
+        leader_comm_rank = smpi_comm_rank(leader_comm);
+    }
+
+    if (local_size == comm_size) {
+        /* purely intra-node scatter. Just use the direct algorithm and we are done */
+        mpi_errno = MPIR_Scatter_MV2_Direct(sendbuf, sendcnt, sendtype,
+                                            recvbuf, recvcnt, recvtype,
+                                            root, comm);
+
+    } else {
+        recvtype_size=smpi_datatype_size(recvtype);
+        sendtype_size=smpi_datatype_size(sendtype);
+
+        if (rank == root) {
+            nbytes = sendcnt * sendtype_size;
+        } else {
+            nbytes = recvcnt * recvtype_size;
+        }
+
+        if (local_rank == 0) {
+            /* Node leader, allocate tmp_buffer */
+            tmp_buf = xbt_malloc(nbytes * local_size);
+        }
+
+        leader_comm = smpi_comm_get_leaders_comm(comm);
+        int* leaders_map = smpi_comm_get_leaders_map(comm);
+        leader_of_root = smpi_group_rank(smpi_comm_group(comm),leaders_map[root]);
+        leader_root = smpi_group_rank(smpi_comm_group(leader_comm),leaders_map[root]);
+        /* leader_root is the rank of the leader of the root in leader_comm.
+         * leader_root is to be used as the root of the inter-leader gather ops
+         */
+
+        if ((local_rank == 0) && (root != rank)
+            && (leader_of_root == rank)) {
+            /* The root of the scatter operation is not the node leader. Recv
+             * data from the node leader */
+            leader_scatter_buf = xbt_malloc(nbytes * comm_size);
+            smpi_mpi_recv(leader_scatter_buf, nbytes * comm_size, MPI_BYTE,
+                             root, COLL_TAG_SCATTER, comm, &status);
+
+        }
+
+        if (rank == root && local_rank != 0) {
+            /* The root of the scatter operation is not the node leader. Send
+             * data to the node leader */
+            smpi_mpi_send(sendbuf, sendcnt * comm_size, sendtype,
+                                     leader_of_root, COLL_TAG_SCATTER, comm
+                                     );
+        }
+
+        if (leader_comm_size > 1 && local_rank == 0) {
+            if (!smpi_comm_is_uniform(comm)) {
+                int *displs = NULL;
+                int *sendcnts = NULL;
+                int *node_sizes;
+                int i = 0;
+                node_sizes = smpi_comm_get_non_uniform_map(comm);
+
+                if (root != leader_of_root) {
+                    if (leader_comm_rank == leader_root) {
+                        displs = xbt_malloc(sizeof (int) * leader_comm_size);
+                        sendcnts = xbt_malloc(sizeof (int) * leader_comm_size);
+                        sendcnts[0] = node_sizes[0] * nbytes;
+                        displs[0] = 0;
+
+                        for (i = 1; i < leader_comm_size; i++) {
+                            displs[i] =
+                                displs[i - 1] + node_sizes[i - 1] * nbytes;
+                            sendcnts[i] = node_sizes[i] * nbytes;
+                        }
+                    }
+                        smpi_mpi_scatterv(leader_scatter_buf, sendcnts, displs,
+                                      MPI_BYTE, tmp_buf, nbytes * local_size,
+                                      MPI_BYTE, leader_root, leader_comm);
+                } else {
+                    if (leader_comm_rank == leader_root) {
+                        displs = xbt_malloc(sizeof (int) * leader_comm_size);
+                        sendcnts = xbt_malloc(sizeof (int) * leader_comm_size);
+                        sendcnts[0] = node_sizes[0] * sendcnt;
+                        displs[0] = 0;
+
+                        for (i = 1; i < leader_comm_size; i++) {
+                            displs[i] =
+                                displs[i - 1] + node_sizes[i - 1] * sendcnt;
+                            sendcnts[i] = node_sizes[i] * sendcnt;
+                        }
+                    }
+                    smpi_mpi_scatterv(sendbuf, sendcnts, displs,
+                                              sendtype, tmp_buf,
+                                              nbytes * local_size, MPI_BYTE,
+                                              leader_root, leader_comm);
+                }
+                if (leader_comm_rank == leader_root) {
+                    xbt_free(displs);
+                    xbt_free(sendcnts);
+                }
+            } else {
+                if (leader_of_root != root) {
+                    mpi_errno =
+                        MPIR_Scatter_MV2_Direct(leader_scatter_buf,
+                                                nbytes * local_size, MPI_BYTE,
+                                                tmp_buf, nbytes * local_size,
+                                                MPI_BYTE, leader_root,
+                                                leader_comm);
+                } else {
+                    mpi_errno =
+                        MPIR_Scatter_MV2_Direct(sendbuf, sendcnt * local_size,
+                                                sendtype, tmp_buf,
+                                                nbytes * local_size, MPI_BYTE,
+                                                leader_root, leader_comm);
+
+                }
+            }
+        }
+        /* The leaders are now done with the inter-leader part. Scatter the data within the nodes */
+
+        if (rank == root && recvbuf == MPI_IN_PLACE) {
+            mpi_errno = MV2_Scatter_intra_function(tmp_buf, nbytes, MPI_BYTE,
+                                                (void *)sendbuf, sendcnt, sendtype,
+                                                0, shmem_comm);
+        } else {
+            mpi_errno = MV2_Scatter_intra_function(tmp_buf, nbytes, MPI_BYTE,
+                                                recvbuf, recvcnt, recvtype,
+                                                0, shmem_comm);
+        }
+    }
+
+    /* check if multiple threads are calling this collective function */
+    if (comm_size != local_size && local_rank == 0) {
+        xbt_free(tmp_buf);
+        if (leader_of_root == rank && root != rank) {
+            xbt_free(leader_scatter_buf);
+        }
+    }
+    return (mpi_errno);
+}
+
+
+int smpi_coll_tuned_scatter_mvapich2_two_level_binomial(void *sendbuf,
+                                        int sendcnt,
+                                        MPI_Datatype sendtype,
+                                        void *recvbuf,
+                                        int recvcnt,
+                                        MPI_Datatype recvtype,
+                                        int root, MPI_Comm comm)
+{
+    int comm_size, rank;
+    int local_rank, local_size;
+    int leader_comm_rank, leader_comm_size;
+    int mpi_errno = MPI_SUCCESS;
+    int recvtype_size, sendtype_size, nbytes;
+    void *tmp_buf = NULL;
+    void *leader_scatter_buf = NULL;
+    MPI_Status status;
+    int leader_root = -1, leader_of_root = -1;
+    MPI_Comm shmem_comm, leader_comm;
+
+
+    //if not set (use of the algo directly, without mvapich2 selector)
+    if(MV2_Scatter_intra_function==NULL)
+      MV2_Scatter_intra_function=smpi_coll_tuned_scatter_mpich;
+    
+    if(smpi_comm_get_leaders_comm(comm)==MPI_COMM_NULL){
+      smpi_comm_init_smp(comm);
+    }
+    comm_size = smpi_comm_size(comm);
+    rank = smpi_comm_rank(comm);
+
+    if (((rank == root) && (recvcnt == 0))
+        || ((rank != root) && (sendcnt == 0))) {
+        return MPI_SUCCESS;
+    }
+
+    /* extract the rank,size information for the intra-node
+     * communicator */
+    shmem_comm = smpi_comm_get_intra_comm(comm);
+    local_rank = smpi_comm_rank(shmem_comm);
+    local_size = smpi_comm_size(shmem_comm);
+
+    if (local_rank == 0) {
+        /* Node leader. Extract the rank, size information for the leader
+         * communicator */
+        leader_comm = smpi_comm_get_leaders_comm(comm);
+        leader_comm_size = smpi_comm_size(leader_comm);
+        leader_comm_rank = smpi_comm_rank(leader_comm);
+    }
+
+    if (local_size == comm_size) {
+        /* purely intra-node scatter. Just use the direct algorithm and we are done */
+        mpi_errno = MPIR_Scatter_MV2_Direct(sendbuf, sendcnt, sendtype,
+                                            recvbuf, recvcnt, recvtype,
+                                            root, comm);
+
+    } else {
+        recvtype_size=smpi_datatype_size(recvtype);
+        sendtype_size=smpi_datatype_size(sendtype);
+
+        if (rank == root) {
+            nbytes = sendcnt * sendtype_size;
+        } else {
+            nbytes = recvcnt * recvtype_size;
+        }
+
+        if (local_rank == 0) {
+            /* Node leader, allocate tmp_buffer */
+            tmp_buf = xbt_malloc(nbytes * local_size);
+        }
+        leader_comm = smpi_comm_get_leaders_comm(comm);
+        int* leaders_map = smpi_comm_get_leaders_map(comm);
+        leader_of_root = smpi_group_rank(smpi_comm_group(comm),leaders_map[root]);
+        leader_root = smpi_group_rank(smpi_comm_group(leader_comm),leaders_map[root]);
+        /* leader_root is the rank of the leader of the root in leader_comm.
+         * leader_root is to be used as the root of the inter-leader gather ops
+         */
+
+        if ((local_rank == 0) && (root != rank)
+            && (leader_of_root == rank)) {
+            /* The root of the scatter operation is not the node leader. Recv
+             * data from the node leader */
+            leader_scatter_buf = xbt_malloc(nbytes * comm_size);
+            smpi_mpi_recv(leader_scatter_buf, nbytes * comm_size, MPI_BYTE,
+                             root, COLL_TAG_SCATTER, comm, &status);
+        }
+
+        if (rank == root && local_rank != 0) {
+            /* The root of the scatter operation is not the node leader. Send
+             * data to the node leader */
+            smpi_mpi_send(sendbuf, sendcnt * comm_size, sendtype,
+                                     leader_of_root, COLL_TAG_SCATTER, comm);
+        }
+
+        if (leader_comm_size > 1 && local_rank == 0) {
+            if (!smpi_comm_is_uniform(comm)) {
+                int *displs = NULL;
+                int *sendcnts = NULL;
+                int *node_sizes;
+                int i = 0;
+                node_sizes = smpi_comm_get_non_uniform_map(comm);
+
+                if (root != leader_of_root) {
+                    if (leader_comm_rank == leader_root) {
+                        displs = xbt_malloc(sizeof (int) * leader_comm_size);
+                        sendcnts = xbt_malloc(sizeof (int) * leader_comm_size);
+                        sendcnts[0] = node_sizes[0] * nbytes;
+                        displs[0] = 0;
+
+                        for (i = 1; i < leader_comm_size; i++) {
+                            displs[i] =
+                                displs[i - 1] + node_sizes[i - 1] * nbytes;
+                            sendcnts[i] = node_sizes[i] * nbytes;
+                        }
+                    }
+                        smpi_mpi_scatterv(leader_scatter_buf, sendcnts, displs,
+                                      MPI_BYTE, tmp_buf, nbytes * local_size,
+                                      MPI_BYTE, leader_root, leader_comm);
+                } else {
+                    if (leader_comm_rank == leader_root) {
+                        displs = xbt_malloc(sizeof (int) * leader_comm_size);
+                        sendcnts = xbt_malloc(sizeof (int) * leader_comm_size);
+                        sendcnts[0] = node_sizes[0] * sendcnt;
+                        displs[0] = 0;
+
+                        for (i = 1; i < leader_comm_size; i++) {
+                            displs[i] =
+                                displs[i - 1] + node_sizes[i - 1] * sendcnt;
+                            sendcnts[i] = node_sizes[i] * sendcnt;
+                        }
+                    }
+                    smpi_mpi_scatterv(sendbuf, sendcnts, displs,
+                                              sendtype, tmp_buf,
+                                              nbytes * local_size, MPI_BYTE,
+                                              leader_root, leader_comm);
+                }
+                if (leader_comm_rank == leader_root) {
+                    xbt_free(displs);
+                    xbt_free(sendcnts);
+                }
+            } else {
+                if (leader_of_root != root) {
+                    mpi_errno =
+                        MPIR_Scatter_MV2_Binomial(leader_scatter_buf,
+                                                  nbytes * local_size, MPI_BYTE,
+                                                  tmp_buf, nbytes * local_size,
+                                                  MPI_BYTE, leader_root,
+                                                  leader_comm);
+                } else {
+                    mpi_errno =
+                        MPIR_Scatter_MV2_Binomial(sendbuf, sendcnt * local_size,
+                                                  sendtype, tmp_buf,
+                                                  nbytes * local_size, MPI_BYTE,
+                                                  leader_root, leader_comm);
+
+                }
+            }
+        }
+        /* The leaders are now done with the inter-leader part. Scatter the data within the nodes */
+
+        if (rank == root && recvbuf == MPI_IN_PLACE) {
+            mpi_errno = MV2_Scatter_intra_function(tmp_buf, nbytes, MPI_BYTE,
+                                                (void *)sendbuf, sendcnt, sendtype,
+                                                0, shmem_comm);
+        } else {
+            mpi_errno = MV2_Scatter_intra_function(tmp_buf, nbytes, MPI_BYTE,
+                                                recvbuf, recvcnt, recvtype,
+                                                0, shmem_comm);
+        }
+
+    }
+
+
+    /* check if multiple threads are calling this collective function */
+    if (comm_size != local_size && local_rank == 0) {
+        xbt_free(tmp_buf);
+        if (leader_of_root == rank && root != rank) {
+            xbt_free(leader_scatter_buf);
+        }
+    }
+
+    return (mpi_errno);
+}
+
index 54b6782..c423a5b 100644 (file)
@@ -690,7 +690,7 @@ int smpi_coll_tuned_scatter_mvapich2(void *sendbuf,
     void *recvbuf,
     int recvcnt,
     MPI_Datatype recvtype,
-    int root, MPI_Comm comm_ptr)
+    int root, MPI_Comm comm)
 {
   int range = 0, range_threshold = 0, range_threshold_intra = 0;
   int mpi_errno = MPI_SUCCESS;
@@ -699,16 +699,20 @@ int smpi_coll_tuned_scatter_mvapich2(void *sendbuf,
   int recvtype_size, sendtype_size;
   int partial_sub_ok = 0;
   int conf_index = 0;
-  //  int local_size = -1;
-  //  int i;
-  //   MPI_Comm shmem_comm;
+    int local_size = -1;
+    int i;
+     MPI_Comm shmem_comm;
   //    MPID_Comm *shmem_commptr=NULL;
   if(mv2_scatter_thresholds_table==NULL)
     init_mv2_scatter_tables_stampede();
 
-  comm_size = smpi_comm_size(comm_ptr);
+  if(smpi_comm_get_leaders_comm(comm)==MPI_COMM_NULL){
+    smpi_comm_init_smp(comm);
+  }
+  
+  comm_size = smpi_comm_size(comm);
 
-  rank = smpi_comm_rank(comm_ptr);
+  rank = smpi_comm_rank(comm);
 
   if (rank == root) {
       sendtype_size=smpi_datatype_size(sendtype);
@@ -717,29 +721,28 @@ int smpi_coll_tuned_scatter_mvapich2(void *sendbuf,
       recvtype_size=smpi_datatype_size(recvtype);
       nbytes = recvcnt * recvtype_size;
   }
-  /*
+  
     // check if safe to use partial subscription mode 
-    if (comm_ptr->ch.shmem_coll_ok == 1 && comm_ptr->ch.is_uniform) {
+    if (smpi_comm_is_uniform(comm)) {
 
-        shmem_comm = comm_ptr->ch.shmem_comm;
-        MPID_Comm_get_ptr(shmem_comm, shmem_commptr);
-        local_size = shmem_commptr->local_size;
+        shmem_comm = smpi_comm_get_intra_comm(comm);
+        local_size = smpi_comm_size(shmem_comm);
         i = 0;
         if (mv2_scatter_table_ppn_conf[0] == -1) {
             // Indicating user defined tuning 
             conf_index = 0;
-            goto conf_check_end;
+        }else{
+            do {
+                if (local_size == mv2_scatter_table_ppn_conf[i]) {
+                    conf_index = i;
+                    partial_sub_ok = 1;
+                    break;
+                }
+                i++;
+            } while(i < mv2_scatter_num_ppn_conf);
         }
-        do {
-            if (local_size == mv2_scatter_table_ppn_conf[i]) {
-                conf_index = i;
-                partial_sub_ok = 1;
-                break;
-            }
-            i++;
-        } while(i < mv2_scatter_num_ppn_conf);
     }
-   */
+   
   if (partial_sub_ok != 1) {
       conf_index = 0;
   }
@@ -772,9 +775,9 @@ int smpi_coll_tuned_scatter_mvapich2(void *sendbuf,
 
   if(MV2_Scatter_function == &MPIR_Scatter_mcst_wrap_MV2) {
 #if defined(_MCST_SUPPORT_)
-      if(comm_ptr->ch.is_mcast_ok == 1
+      if(comm->ch.is_mcast_ok == 1
           && mv2_use_mcast_scatter == 1
-          && comm_ptr->ch.shmem_coll_ok == 1) {
+          && comm->ch.shmem_coll_ok == 1) {
           MV2_Scatter_function = &MPIR_Scatter_mcst_MV2;
       } else
 #endif /*#if defined(_MCST_SUPPORT_) */
@@ -792,25 +795,24 @@ int smpi_coll_tuned_scatter_mvapich2(void *sendbuf,
 
   if( (MV2_Scatter_function == &MPIR_Scatter_MV2_two_level_Direct) ||
       (MV2_Scatter_function == &MPIR_Scatter_MV2_two_level_Binomial)) {
-      /* if( comm_ptr->ch.shmem_coll_ok == 1 &&
-             comm_ptr->ch.is_global_block == 1 ) {
+       if( smpi_comm_is_blocked(comm)) {
              MV2_Scatter_intra_function = mv2_scatter_thresholds_table[conf_index][range].intra_node[range_threshold_intra]
                                 .MV2_pt_Scatter_function;
 
              mpi_errno =
                    MV2_Scatter_function(sendbuf, sendcnt, sendtype,
                                         recvbuf, recvcnt, recvtype, root,
-                                        comm_ptr);
-         } else {*/
+                                        comm);
+         } else {
       mpi_errno = MPIR_Scatter_MV2_Binomial(sendbuf, sendcnt, sendtype,
           recvbuf, recvcnt, recvtype, root,
-          comm_ptr);
+          comm);
 
-      //}
+      }
   } else {
       mpi_errno = MV2_Scatter_function(sendbuf, sendcnt, sendtype,
           recvbuf, recvcnt, recvtype, root,
-          comm_ptr);
+          comm);
   }
   return (mpi_errno);
 }
index 2bf0f1c..143d3c7 100644 (file)
@@ -1629,8 +1629,8 @@ int MPIR_Scatter_mcst_wrap_MV2(void *sendbuf,
 
 #define MPIR_Scatter_MV2_Binomial smpi_coll_tuned_scatter_ompi_binomial
 #define MPIR_Scatter_MV2_Direct smpi_coll_tuned_scatter_ompi_basic_linear
-#define MPIR_Scatter_MV2_two_level_Binomial smpi_coll_tuned_scatter_ompi_binomial
-#define MPIR_Scatter_MV2_two_level_Direct smpi_coll_tuned_scatter_ompi_basic_linear
+#define MPIR_Scatter_MV2_two_level_Binomial smpi_coll_tuned_scatter_mvapich2_two_level_binomial
+#define MPIR_Scatter_MV2_two_level_Direct smpi_coll_tuned_scatter_mvapich2_two_level_direct