Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Add and use mvapich's scatter_dest alltoall algorithm
authorAugustin Degomme <augustin.degomme@imag.fr>
Thu, 24 Jul 2014 14:41:28 +0000 (16:41 +0200)
committerAugustin Degomme <augustin.degomme@imag.fr>
Thu, 24 Jul 2014 14:41:28 +0000 (16:41 +0200)
buildtools/Cmake/AddTests.cmake
buildtools/Cmake/DefinePackages.cmake
src/smpi/colls/alltoall-mvapich-scatter-dest.c [new file with mode: 0644]
src/smpi/colls/colls.h
src/smpi/colls/smpi_mvapich2_selector_stampede.h

index cdcc52c..f0374e8 100644 (file)
@@ -392,7 +392,7 @@ IF(NOT enable_memcheck)
     FOREACH (ALLTOALL_COLL 2dmesh 3dmesh pair pair_one_barrier pair_light_barrier
                            pair_mpi_barrier rdb ring ring_light_barrier
                            ring_mpi_barrier ring_one_barrier
-                           bruck basic_linear ompi mpich mvapich2)
+                           bruck basic_linear ompi mpich mvapich2 mvapich2_scatter_dest)
       ADD_TESH(tesh-smpi-alltoall-coll-${ALLTOALL_COLL} --cfg smpi/alltoall:${ALLTOALL_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/alltoall --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/alltoall alltoall_coll.tesh)
     ENDFOREACH()
     FOREACH (ALLTOALLV_COLL default pair pair_light_barrier pair_mpi_barrier
index 7188af5..81aeb63 100644 (file)
@@ -180,6 +180,7 @@ set(SMPI_SRC
   src/smpi/colls/alltoall-ring-mpi-barrier.c
   src/smpi/colls/alltoall-ring-one-barrier.c
   src/smpi/colls/alltoall-ring.c
+  src/smpi/colls/alltoall-mvapich-scatter-dest.c
   src/smpi/colls/alltoallv-bruck.c
   src/smpi/colls/alltoallv-ompi-basic-linear.c
   src/smpi/colls/alltoallv-pair-light-barrier.c
diff --git a/src/smpi/colls/alltoall-mvapich-scatter-dest.c b/src/smpi/colls/alltoall-mvapich-scatter-dest.c
new file mode 100644 (file)
index 0000000..521247d
--- /dev/null
@@ -0,0 +1,136 @@
+/* Copyright (c) 2013-2014. The SimGrid Team.
+ * All rights reserved.                                                     */
+
+/* This program is free software; you can redistribute it and/or modify it
+ * under the terms of the license (GNU LGPL) which comes with this package. */
+
+/*
+ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
+ *                         University Research and Technology
+ *                         Corporation.  All rights reserved.
+ * Copyright (c) 2004-2012 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
+ *                         University of Stuttgart.  All rights reserved.
+ * Copyright (c) 2004-2005 The Regents of the University of California.
+ *                         All rights reserved.
+ * Copyright (c) 2008      Sun Microsystems, Inc.  All rights reserved.
+ * Copyright (c) 2009      University of Houston. All rights reserved.
+ *
+ * Additional copyrights may follow
+ */
+
+/*
+ *
+ *  (C) 2001 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+/* Copyright (c) 2001-2014, The Ohio State University. All rights
+ * reserved.
+ *
+ * This file is part of the MVAPICH2 software package developed by the
+ * team members of The Ohio State University's Network-Based Computing
+ * Laboratory (NBCL), headed by Professor Dhabaleswar K. (DK) Panda.
+ *
+ * For detailed copyright and licensing information, please refer to the
+ * copyright file COPYRIGHT in the top level MVAPICH2 directory.
+ *
+ */
+//correct on stampede
+#define MV2_ALLTOALL_THROTTLE_FACTOR         4
+#include "colls_private.h"
+
+int smpi_coll_tuned_alltoall_mvapich2_scatter_dest(
+                            void *sendbuf,
+                            int sendcount,
+                            MPI_Datatype sendtype,
+                            void *recvbuf,
+                            int recvcount,
+                            MPI_Datatype recvtype,
+                            MPI_Comm comm)
+{
+    int          comm_size, i, j;
+    MPI_Aint     sendtype_extent = 0, recvtype_extent = 0;
+    int mpi_errno=MPI_SUCCESS;
+    int dst, rank;
+    MPI_Request *reqarray;
+    MPI_Status *starray;
+    
+    if (recvcount == 0) return MPI_SUCCESS;
+    
+    comm_size =  smpi_comm_size(comm);
+    rank = smpi_comm_rank(comm);
+    
+    /* Get extent of send and recv types */
+    recvtype_extent = smpi_datatype_get_extent(recvtype);
+    sendtype_extent = smpi_datatype_get_extent(sendtype);
+    
+    /* Medium-size message. Use isend/irecv with scattered
+     destinations. Use Tony Ladd's modification to post only
+     a small number of isends/irecvs at a time. */
+    /* FIXME: This converts the Alltoall to a set of blocking phases.
+     Two alternatives should be considered:
+     1) the choice of communication pattern could try to avoid
+     contending routes in each phase
+     2) rather than wait for all communication to finish (waitall),
+     we could maintain constant queue size by using waitsome
+     and posting new isend/irecv as others complete.  This avoids
+     synchronization delays at the end of each block (when
+     there are only a few isend/irecvs left)
+     */
+    int ii, ss, bblock;
+        
+    //Stampede is configured with
+    bblock = MV2_ALLTOALL_THROTTLE_FACTOR;//mv2_coll_param.alltoall_throttle_factor;
+    
+    if (bblock >= comm_size) bblock = comm_size;
+    /* If throttle_factor is n, each process posts n pairs of isend/irecv
+     in each iteration. */
+    
+    /* FIXME: This should use the memory macros (there are storage
+     leaks here if there is an error, for example) */
+    reqarray= (MPI_Request*)xbt_malloc(2*bblock*sizeof(MPI_Request));
+    
+    starray=(MPI_Status *)xbt_malloc(2*bblock*sizeof(MPI_Status));
+    for (ii=0; ii<comm_size; ii+=bblock) {
+        ss = comm_size-ii < bblock ? comm_size-ii : bblock;
+        /* do the communication -- post ss sends and receives: */
+        for ( i=0; i<ss; i++ ) {
+            dst = (rank+i+ii) % comm_size;
+            reqarray[i]=smpi_mpi_irecv((char *)recvbuf +
+                                      dst*recvcount*recvtype_extent,
+                                      recvcount, recvtype, dst,
+                                      COLL_TAG_ALLTOALL, comm);
+
+        }
+        for ( i=0; i<ss; i++ ) {
+            dst = (rank-i-ii+comm_size) % comm_size;
+            reqarray[i+ss]=smpi_mpi_isend((char *)sendbuf +
+                                          dst*sendcount*sendtype_extent,
+                                          sendcount, sendtype, dst,
+                                          COLL_TAG_ALLTOALL, comm);
+
+        }
+        
+        /* ... then wait for them to finish: */
+        smpi_mpi_waitall(2*ss,reqarray,starray);
+        
+       
+        /* --BEGIN ERROR HANDLING-- */
+        if (mpi_errno == MPI_ERR_IN_STATUS) {
+                for (j=0; j<2*ss; j++) {
+                     if (starray[j].MPI_ERROR != MPI_SUCCESS) {
+                         mpi_errno = starray[j].MPI_ERROR;
+                     }
+                }
+        }
+    }
+    /* --END ERROR HANDLING-- */
+    
+    return (mpi_errno);
+    
+}
index dae8570..5752334 100644 (file)
@@ -155,6 +155,7 @@ COLL_APPLY(action, COLL_ALLTOALL_SIG, ring_light_barrier) COLL_sep \
 COLL_APPLY(action, COLL_ALLTOALL_SIG, ring_mpi_barrier) COLL_sep \
 COLL_APPLY(action, COLL_ALLTOALL_SIG, ring_one_barrier) COLL_sep \
 COLL_APPLY(action, COLL_ALLTOALL_SIG, mvapich2) COLL_sep \
+COLL_APPLY(action, COLL_ALLTOALL_SIG, mvapich2_scatter_dest) COLL_sep \
 COLL_APPLY(action, COLL_ALLTOALL_SIG, ompi) COLL_sep \
 COLL_APPLY(action, COLL_ALLTOALL_SIG, mpich) COLL_sep \
 COLL_APPLY(action, COLL_ALLTOALL_SIG, automatic)
index db05c60..079c3b6 100644 (file)
@@ -37,7 +37,7 @@ mv2_alltoall_tuning_table **mv2_alltoall_thresholds_table = NULL;
 
 #define MPIR_Alltoall_bruck_MV2 smpi_coll_tuned_alltoall_bruck
 #define MPIR_Alltoall_RD_MV2 smpi_coll_tuned_alltoall_rdb
-#define MPIR_Alltoall_Scatter_dest_MV2 smpi_coll_tuned_alltoall_ring
+#define MPIR_Alltoall_Scatter_dest_MV2 smpi_coll_tuned_alltoall_mvapich2_scatter_dest
 #define MPIR_Alltoall_pairwise_MV2 smpi_coll_tuned_alltoall_pair
 #define MPIR_Alltoall_inplace_MV2 smpi_coll_tuned_alltoall_ring