Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
add Allreduce SMP collective from MVAPICH2
authorAugustin Degomme <augustin.degomme@imag.fr>
Fri, 1 Aug 2014 08:50:02 +0000 (10:50 +0200)
committerAugustin Degomme <augustin.degomme@imag.fr>
Fri, 1 Aug 2014 08:51:10 +0000 (10:51 +0200)
buildtools/Cmake/AddTests.cmake
buildtools/Cmake/DefinePackages.cmake
src/smpi/colls/allreduce-mvapich-two-level.c [new file with mode: 0644]
src/smpi/colls/colls.h
src/smpi/colls/smpi_mvapich2_selector.c
src/smpi/colls/smpi_mvapich2_selector_stampede.h

index ccb0fd1..0b8441c 100644 (file)
@@ -384,7 +384,7 @@ IF(NOT enable_memcheck)
     ENDFOREACH()
     FOREACH (ALLREDUCE_COLL default lr rab1 rab2 rab_rdb
                             rdb smp_binomial smp_binomial_pipeline
     ENDFOREACH()
     FOREACH (ALLREDUCE_COLL default lr rab1 rab2 rab_rdb
                             rdb smp_binomial smp_binomial_pipeline
-                            smp_rdb smp_rsag smp_rsag_lr smp_rsag_rab redbcast ompi mpich ompi_ring_segmented mvapich2 mvapich2_rs impi)
+                            smp_rdb smp_rsag smp_rsag_lr smp_rsag_rab redbcast ompi mpich ompi_ring_segmented mvapich2 mvapich2_rs mvapich2_two_level impi)
       ADD_TESH(tesh-smpi-allreduce-coll-${ALLREDUCE_COLL} --cfg smpi/allreduce:${ALLREDUCE_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/allreduce --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/allreduce allreduce_coll.tesh)
     ENDFOREACH()
     FOREACH (ALLREDUCE_COLL_LARGE ompi_ring_segmented)
       ADD_TESH(tesh-smpi-allreduce-coll-${ALLREDUCE_COLL} --cfg smpi/allreduce:${ALLREDUCE_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/allreduce --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/allreduce allreduce_coll.tesh)
     ENDFOREACH()
     FOREACH (ALLREDUCE_COLL_LARGE ompi_ring_segmented)
index 21a4ec0..f9f9695 100644 (file)
@@ -170,6 +170,7 @@ set(SMPI_SRC
   src/smpi/colls/allreduce-smp-rsag-rab.c
   src/smpi/colls/allreduce-smp-rsag.c
   src/smpi/colls/allreduce-mvapich-rs.c
   src/smpi/colls/allreduce-smp-rsag-rab.c
   src/smpi/colls/allreduce-smp-rsag.c
   src/smpi/colls/allreduce-mvapich-rs.c
+  src/smpi/colls/allreduce-mvapich-two-level.c
   src/smpi/colls/alltoall-2dmesh.c
   src/smpi/colls/alltoall-3dmesh.c
 #  src/smpi/colls/alltoall-bruck.c
   src/smpi/colls/alltoall-2dmesh.c
   src/smpi/colls/alltoall-3dmesh.c
 #  src/smpi/colls/alltoall-bruck.c
diff --git a/src/smpi/colls/allreduce-mvapich-two-level.c b/src/smpi/colls/allreduce-mvapich-two-level.c
new file mode 100644 (file)
index 0000000..7ebb0b5
--- /dev/null
@@ -0,0 +1,170 @@
+/* Copyright (c) 2013-2014. The SimGrid Team.
+ * All rights reserved.                                                     */
+
+/* This program is free software; you can redistribute it and/or modify it
+ * under the terms of the license (GNU LGPL) which comes with this package. */
+
+/*
+ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
+ *                         University Research and Technology
+ *                         Corporation.  All rights reserved.
+ * Copyright (c) 2004-2009 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
+ *                         University of Stuttgart.  All rights reserved.
+ * Copyright (c) 2004-2005 The Regents of the University of California.
+ *                         All rights reserved.
+ *
+ * Additional copyrights may follow
+ */
+ /* -*- Mode: C; c-basic-offset:4 ; -*- */
+/* Copyright (c) 2001-2014, The Ohio State University. All rights
+ * reserved.
+ *
+ * This file is part of the MVAPICH2 software package developed by the
+ * team members of The Ohio State University's Network-Based Computing
+ * Laboratory (NBCL), headed by Professor Dhabaleswar K. (DK) Panda.
+ *
+ * For detailed copyright and licensing information, please refer to the
+ * copyright file COPYRIGHT in the top level MVAPICH2 directory.
+ */
+/*
+ *
+ *  (C) 2001 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+
+#include "colls_private.h"
+
+#define MPIR_Allreduce_pt2pt_rd_MV2 smpi_coll_tuned_allreduce_rdb
+#define MPIR_Allreduce_pt2pt_rs_MV2 smpi_coll_tuned_allreduce_mvapich2_rs
+
+extern int (*MV2_Allreduce_function)(void *sendbuf,
+    void *recvbuf,
+    int count,
+    MPI_Datatype datatype,
+    MPI_Op op, MPI_Comm comm);
+
+
+extern int (*MV2_Allreduce_intra_function)( void *sendbuf,
+    void *recvbuf,
+    int count,
+    MPI_Datatype datatype,
+    MPI_Op op, MPI_Comm comm);
+    
+static  int MPIR_Allreduce_reduce_p2p_MV2( void *sendbuf,
+    void *recvbuf,
+    int count,
+    MPI_Datatype datatype,
+    MPI_Op op, MPI_Comm  comm)
+{
+  mpi_coll_reduce_fun(sendbuf,recvbuf,count,datatype,op,0,comm);
+  return MPI_SUCCESS;
+}
+
+static  int MPIR_Allreduce_reduce_shmem_MV2( void *sendbuf,
+    void *recvbuf,
+    int count,
+    MPI_Datatype datatype,
+    MPI_Op op, MPI_Comm  comm)
+{
+  mpi_coll_reduce_fun(sendbuf,recvbuf,count,datatype,op,0,comm);
+  return MPI_SUCCESS;
+}
+    
+    
+/* general two level allreduce helper function */
+int smpi_coll_tuned_allreduce_mvapich2_two_level(void *sendbuf,
+                             void *recvbuf,
+                             int count,
+                             MPI_Datatype datatype,
+                             MPI_Op op, MPI_Comm comm)
+{
+    int mpi_errno = MPI_SUCCESS;
+    int total_size = 0;
+    MPI_Aint true_lb, true_extent;
+    MPI_Comm shmem_comm = MPI_COMM_NULL, leader_comm = MPI_COMM_NULL;
+    int local_rank = -1, local_size = 0;
+
+    //if not set (use of the algo directly, without mvapich2 selector)
+    if(MV2_Allreduce_intra_function==NULL)
+      MV2_Allreduce_intra_function = smpi_coll_tuned_allreduce_mpich;
+    if(MV2_Allreduce_function==NULL)
+      MV2_Allreduce_function = smpi_coll_tuned_allreduce_rdb;
+    
+    if(smpi_comm_get_leaders_comm(comm)==MPI_COMM_NULL){
+      smpi_comm_init_smp(comm);
+    }
+  
+    if (count == 0) {
+        return MPI_SUCCESS;
+    }
+    smpi_datatype_extent(datatype, &true_lb,
+                                       &true_extent);
+
+    total_size = smpi_comm_size(comm);
+    shmem_comm = smpi_comm_get_intra_comm(comm);
+    local_rank = smpi_comm_rank(shmem_comm);
+    local_size = smpi_comm_size(shmem_comm);
+
+    leader_comm = smpi_comm_get_leaders_comm(comm);
+
+    if (local_rank == 0) {
+        if (sendbuf != MPI_IN_PLACE) {
+            smpi_datatype_copy(sendbuf, count, datatype, recvbuf,
+                                       count, datatype);
+        }
+    }
+
+    /* Doing the shared memory gather and reduction by the leader */
+    if (local_rank == 0) {
+        if ((MV2_Allreduce_intra_function == &MPIR_Allreduce_reduce_shmem_MV2) || 
+              (MV2_Allreduce_intra_function == &MPIR_Allreduce_reduce_p2p_MV2) ) {
+        mpi_errno =
+        MV2_Allreduce_intra_function(sendbuf, recvbuf, count, datatype,
+                                     op, comm);
+        }
+        else {
+        mpi_errno =
+        MV2_Allreduce_intra_function(sendbuf, recvbuf, count, datatype,
+                                     op, shmem_comm);
+        }
+
+        if (local_size != total_size) {
+        void* sendtmpbuf = (char *)xbt_malloc(count*smpi_datatype_get_extent(datatype));
+      smpi_datatype_copy(recvbuf, count, datatype,sendtmpbuf, count, datatype);
+            /* inter-node allreduce */
+            if(MV2_Allreduce_function == &MPIR_Allreduce_pt2pt_rd_MV2){
+                mpi_errno =
+                    MPIR_Allreduce_pt2pt_rd_MV2(sendtmpbuf, recvbuf, count, datatype, op,
+                                      leader_comm);
+            } else {
+                mpi_errno =
+                    MPIR_Allreduce_pt2pt_rs_MV2(sendtmpbuf, recvbuf, count, datatype, op,
+                                      leader_comm);
+            }
+            xbt_free(sendtmpbuf);
+        }
+    } else {
+        /* insert the first reduce here */
+        if ((MV2_Allreduce_intra_function == &MPIR_Allreduce_reduce_shmem_MV2) || 
+              (MV2_Allreduce_intra_function == &MPIR_Allreduce_reduce_p2p_MV2) ) {
+        mpi_errno =
+        MV2_Allreduce_intra_function(sendbuf, recvbuf, count, datatype,
+                                     op, comm);
+        }
+        else {
+        mpi_errno =
+        MV2_Allreduce_intra_function(sendbuf, recvbuf, count, datatype,
+                                     op, shmem_comm);
+        }
+    }
+
+    /* Broadcasting the mesage from leader to the rest */
+    /* Note: shared memory broadcast could improve the performance */
+    mpi_errno = mpi_coll_bcast_fun(recvbuf, count, datatype, 0, shmem_comm);
+
+    return (mpi_errno);
+
+}
index 78d04b8..782f367 100644 (file)
@@ -133,6 +133,7 @@ COLL_APPLY(action, COLL_ALLREDUCE_SIG, ompi_ring_segmented) COLL_sep \
 COLL_APPLY(action, COLL_ALLREDUCE_SIG, mpich) COLL_sep \
 COLL_APPLY(action, COLL_ALLREDUCE_SIG, mvapich2) COLL_sep \
 COLL_APPLY(action, COLL_ALLREDUCE_SIG, mvapich2_rs) COLL_sep \
 COLL_APPLY(action, COLL_ALLREDUCE_SIG, mpich) COLL_sep \
 COLL_APPLY(action, COLL_ALLREDUCE_SIG, mvapich2) COLL_sep \
 COLL_APPLY(action, COLL_ALLREDUCE_SIG, mvapich2_rs) COLL_sep \
+COLL_APPLY(action, COLL_ALLREDUCE_SIG, mvapich2_two_level) COLL_sep \
 COLL_APPLY(action, COLL_ALLREDUCE_SIG, impi) COLL_sep \
 COLL_APPLY(action, COLL_ALLREDUCE_SIG, rab) COLL_sep \
 COLL_APPLY(action, COLL_ALLREDUCE_SIG, automatic)
 COLL_APPLY(action, COLL_ALLREDUCE_SIG, impi) COLL_sep \
 COLL_APPLY(action, COLL_ALLREDUCE_SIG, rab) COLL_sep \
 COLL_APPLY(action, COLL_ALLREDUCE_SIG, automatic)
index 14335a5..c3ab5ef 100644 (file)
@@ -359,7 +359,7 @@ int smpi_coll_tuned_allreduce_mvapich2(void *sendbuf,
   int nbytes = 0;
   int range = 0, range_threshold = 0, range_threshold_intra = 0;
   int is_two_level = 0;
   int nbytes = 0;
   int range = 0, range_threshold = 0, range_threshold_intra = 0;
   int is_two_level = 0;
-  //int is_commutative = 0;
+  int is_commutative = 0;
   MPI_Aint true_lb, true_extent;
 
   sendtype_size=smpi_datatype_size(datatype);
   MPI_Aint true_lb, true_extent;
 
   sendtype_size=smpi_datatype_size(datatype);
@@ -427,16 +427,16 @@ int smpi_coll_tuned_allreduce_mvapich2(void *sendbuf,
 
     if(is_two_level == 1){
         // check if shm is ready, if not use other algorithm first
 
     if(is_two_level == 1){
         // check if shm is ready, if not use other algorithm first
-        /*if ((comm->ch.shmem_coll_ok == 1)
-                    && (mv2_enable_shmem_allreduce)
-                    && (is_commutative)
-                    && (mv2_enable_shmem_collectives)) {
-                    mpi_errno = MPIR_Allreduce_two_level_MV2(sendbuf, recvbuf, count,
+        if (is_commutative) {
+          if(smpi_comm_get_leaders_comm(comm)==MPI_COMM_NULL){
+            smpi_comm_init_smp(comm);
+          }
+          mpi_errno = MPIR_Allreduce_two_level_MV2(sendbuf, recvbuf, count,
                                                      datatype, op, comm);
                                                      datatype, op, comm);
-                } else {*/
+                } else {
         mpi_errno = MPIR_Allreduce_pt2pt_rd_MV2(sendbuf, recvbuf, count,
             datatype, op, comm);
         mpi_errno = MPIR_Allreduce_pt2pt_rd_MV2(sendbuf, recvbuf, count,
             datatype, op, comm);
-        // }
+        }
     } else {
         mpi_errno = MV2_Allreduce_function(sendbuf, recvbuf, count,
             datatype, op, comm);
     } else {
         mpi_errno = MV2_Allreduce_function(sendbuf, recvbuf, count,
             datatype, op, comm);
index 8503dbe..07614ea 100644 (file)
@@ -818,7 +818,7 @@ static  int MPIR_Allreduce_reduce_shmem_MV2( void *sendbuf,
 
 #define MPIR_Allreduce_pt2pt_rd_MV2 smpi_coll_tuned_allreduce_rdb
 #define MPIR_Allreduce_pt2pt_rs_MV2 smpi_coll_tuned_allreduce_mvapich2_rs
 
 #define MPIR_Allreduce_pt2pt_rd_MV2 smpi_coll_tuned_allreduce_rdb
 #define MPIR_Allreduce_pt2pt_rs_MV2 smpi_coll_tuned_allreduce_mvapich2_rs
-
+#define MPIR_Allreduce_two_level_MV2 smpi_coll_tuned_allreduce_mvapich2_two_level
 
 
 static void init_mv2_allreduce_tables_stampede(){
 
 
 static void init_mv2_allreduce_tables_stampede(){