From b533e2f7a6f6ebf750a96243804688169d2e6d9e Mon Sep 17 00:00:00 2001 From: Augustin Degomme Date: Thu, 24 Jul 2014 16:41:28 +0200 Subject: [PATCH] Add and use mvapich's scatter_dest alltoall algorithm --- buildtools/Cmake/AddTests.cmake | 2 +- buildtools/Cmake/DefinePackages.cmake | 1 + .../colls/alltoall-mvapich-scatter-dest.c | 136 ++++++++++++++++++ src/smpi/colls/colls.h | 1 + .../colls/smpi_mvapich2_selector_stampede.h | 2 +- 5 files changed, 140 insertions(+), 2 deletions(-) create mode 100644 src/smpi/colls/alltoall-mvapich-scatter-dest.c diff --git a/buildtools/Cmake/AddTests.cmake b/buildtools/Cmake/AddTests.cmake index cdcc52c6ce..f0374e88ea 100644 --- a/buildtools/Cmake/AddTests.cmake +++ b/buildtools/Cmake/AddTests.cmake @@ -392,7 +392,7 @@ IF(NOT enable_memcheck) FOREACH (ALLTOALL_COLL 2dmesh 3dmesh pair pair_one_barrier pair_light_barrier pair_mpi_barrier rdb ring ring_light_barrier ring_mpi_barrier ring_one_barrier - bruck basic_linear ompi mpich mvapich2) + bruck basic_linear ompi mpich mvapich2 mvapich2_scatter_dest) ADD_TESH(tesh-smpi-alltoall-coll-${ALLTOALL_COLL} --cfg smpi/alltoall:${ALLTOALL_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/alltoall --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/alltoall alltoall_coll.tesh) ENDFOREACH() FOREACH (ALLTOALLV_COLL default pair pair_light_barrier pair_mpi_barrier diff --git a/buildtools/Cmake/DefinePackages.cmake b/buildtools/Cmake/DefinePackages.cmake index 7188af58e3..81aeb638c4 100644 --- a/buildtools/Cmake/DefinePackages.cmake +++ b/buildtools/Cmake/DefinePackages.cmake @@ -180,6 +180,7 @@ set(SMPI_SRC src/smpi/colls/alltoall-ring-mpi-barrier.c src/smpi/colls/alltoall-ring-one-barrier.c src/smpi/colls/alltoall-ring.c + src/smpi/colls/alltoall-mvapich-scatter-dest.c src/smpi/colls/alltoallv-bruck.c src/smpi/colls/alltoallv-ompi-basic-linear.c src/smpi/colls/alltoallv-pair-light-barrier.c diff --git a/src/smpi/colls/alltoall-mvapich-scatter-dest.c b/src/smpi/colls/alltoall-mvapich-scatter-dest.c new file mode 100644 index 0000000000..521247df59 --- /dev/null +++ b/src/smpi/colls/alltoall-mvapich-scatter-dest.c @@ -0,0 +1,136 @@ +/* Copyright (c) 2013-2014. The SimGrid Team. + * All rights reserved. */ + +/* This program is free software; you can redistribute it and/or modify it + * under the terms of the license (GNU LGPL) which comes with this package. */ + +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2012 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2009 University of Houston. All rights reserved. + * + * Additional copyrights may follow + */ + +/* + * + * (C) 2001 by Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ +/* Copyright (c) 2001-2014, The Ohio State University. All rights + * reserved. + * + * This file is part of the MVAPICH2 software package developed by the + * team members of The Ohio State University's Network-Based Computing + * Laboratory (NBCL), headed by Professor Dhabaleswar K. (DK) Panda. + * + * For detailed copyright and licensing information, please refer to the + * copyright file COPYRIGHT in the top level MVAPICH2 directory. + * + */ + +//correct on stampede +#define MV2_ALLTOALL_THROTTLE_FACTOR 4 + +#include "colls_private.h" + +int smpi_coll_tuned_alltoall_mvapich2_scatter_dest( + void *sendbuf, + int sendcount, + MPI_Datatype sendtype, + void *recvbuf, + int recvcount, + MPI_Datatype recvtype, + MPI_Comm comm) +{ + int comm_size, i, j; + MPI_Aint sendtype_extent = 0, recvtype_extent = 0; + int mpi_errno=MPI_SUCCESS; + int dst, rank; + MPI_Request *reqarray; + MPI_Status *starray; + + if (recvcount == 0) return MPI_SUCCESS; + + comm_size = smpi_comm_size(comm); + rank = smpi_comm_rank(comm); + + /* Get extent of send and recv types */ + recvtype_extent = smpi_datatype_get_extent(recvtype); + sendtype_extent = smpi_datatype_get_extent(sendtype); + + /* Medium-size message. Use isend/irecv with scattered + destinations. Use Tony Ladd's modification to post only + a small number of isends/irecvs at a time. */ + /* FIXME: This converts the Alltoall to a set of blocking phases. + Two alternatives should be considered: + 1) the choice of communication pattern could try to avoid + contending routes in each phase + 2) rather than wait for all communication to finish (waitall), + we could maintain constant queue size by using waitsome + and posting new isend/irecv as others complete. This avoids + synchronization delays at the end of each block (when + there are only a few isend/irecvs left) + */ + int ii, ss, bblock; + + //Stampede is configured with + bblock = MV2_ALLTOALL_THROTTLE_FACTOR;//mv2_coll_param.alltoall_throttle_factor; + + if (bblock >= comm_size) bblock = comm_size; + /* If throttle_factor is n, each process posts n pairs of isend/irecv + in each iteration. */ + + /* FIXME: This should use the memory macros (there are storage + leaks here if there is an error, for example) */ + reqarray= (MPI_Request*)xbt_malloc(2*bblock*sizeof(MPI_Request)); + + starray=(MPI_Status *)xbt_malloc(2*bblock*sizeof(MPI_Status)); + + for (ii=0; ii