From 12e2600d416ea922cdaa3fdefe2522260e7312d1 Mon Sep 17 00:00:00 2001 From: Augustin Degomme Date: Wed, 19 Jun 2013 13:08:42 +0200 Subject: [PATCH] ompi selector was using the wrong algo for alltoallv, add the right one and use it --- buildtools/Cmake/AddTests.cmake | 2 +- buildtools/Cmake/DefinePackages.cmake | 1 + src/smpi/colls/alltoallv-ompi-basic-linear.c | 100 +++++++++++++++++++ src/smpi/colls/colls.h | 3 +- src/smpi/colls/smpi_openmpi_selector.c | 2 +- 5 files changed, 105 insertions(+), 3 deletions(-) create mode 100644 src/smpi/colls/alltoallv-ompi-basic-linear.c diff --git a/buildtools/Cmake/AddTests.cmake b/buildtools/Cmake/AddTests.cmake index 0b72e1185c..c8a7fb300b 100644 --- a/buildtools/Cmake/AddTests.cmake +++ b/buildtools/Cmake/AddTests.cmake @@ -401,7 +401,7 @@ if(NOT enable_memcheck) FOREACH (ALLTOALLV_COLL default pair pair_light_barrier pair_mpi_barrier pair_one_barrier ring ring_light_barrier - ring_mpi_barrier ring_one_barrier bruck ompi) + ring_mpi_barrier ring_one_barrier bruck ompi ompi_basic_linear) ADD_TEST(smpi-alltoallv-coll-${ALLTOALLV_COLL} ${CMAKE_BINARY_DIR}/bin/tesh ${TESH_OPTION} --cfg smpi/alltoallv:${ALLTOALLV_COLL} --cd ${CMAKE_BINARY_DIR}/teshsuite/smpi ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/alltoallv_coll.tesh) ENDFOREACH() diff --git a/buildtools/Cmake/DefinePackages.cmake b/buildtools/Cmake/DefinePackages.cmake index f8436b725c..5f96eab068 100644 --- a/buildtools/Cmake/DefinePackages.cmake +++ b/buildtools/Cmake/DefinePackages.cmake @@ -174,6 +174,7 @@ set(SMPI_SRC src/smpi/colls/alltoallv-ring-mpi-barrier.c src/smpi/colls/alltoallv-ring-one-barrier.c src/smpi/colls/alltoallv-bruck.c + src/smpi/colls/alltoallv-ompi-basic-linear.c src/smpi/colls/bcast-arrival-nb.c src/smpi/colls/bcast-arrival-pattern-aware.c src/smpi/colls/bcast-arrival-pattern-aware-wait.c diff --git a/src/smpi/colls/alltoallv-ompi-basic-linear.c b/src/smpi/colls/alltoallv-ompi-basic-linear.c new file mode 100644 index 0000000000..f90471d3cd --- /dev/null +++ b/src/smpi/colls/alltoallv-ompi-basic-linear.c @@ -0,0 +1,100 @@ + +#include "colls_private.h" +#define MCA_COLL_BASE_TAG_ALLTOALLV 111 +/* + * Linear functions are copied from the basic coll module. For + * some small number of nodes and/or small data sizes they are just as + * fast as tuned/tree based segmenting operations and as such may be + * selected by the decision functions. These are copied into this module + * due to the way we select modules in V1. i.e. in V2 we will handle this + * differently and so will not have to duplicate code. + * GEF Oct05 after asking Jeff. + */ +int +smpi_coll_tuned_alltoallv_ompi_basic_linear(void *sbuf, int *scounts, int *sdisps, + MPI_Datatype sdtype, + void *rbuf, int *rcounts, int *rdisps, + MPI_Datatype rdtype, + MPI_Comm comm) +{ + int i, size, rank; + char *psnd, *prcv; + int nreqs; + ptrdiff_t sext, rext; + MPI_Request *preq; + size = smpi_comm_size(comm); + rank = smpi_comm_rank(comm); + MPI_Request *ireqs= xbt_malloc(sizeof(MPI_Request) * size * 2); + XBT_DEBUG( + "coll:tuned:alltoallv_intra_basic_linear rank %d", rank); + + sext=smpi_datatype_get_extent(sdtype); + rext=smpi_datatype_get_extent(rdtype); + + /* Simple optimization - handle send to self first */ + psnd = ((char *) sbuf) + (sdisps[rank] * sext); + prcv = ((char *) rbuf) + (rdisps[rank] * rext); + if (0 != scounts[rank]) { + smpi_datatype_copy(psnd, scounts[rank], sdtype, + prcv, rcounts[rank], rdtype); + } + + /* If only one process, we're done. */ + if (1 == size) { + return MPI_SUCCESS; + } + + /* Now, initiate all send/recv to/from others. */ + nreqs = 0; + preq = ireqs; + + /* Post all receives first */ + for (i = 0; i < size; ++i) { + if (i == rank || 0 == rcounts[i]) { + continue; + } + + prcv = ((char *) rbuf) + (rdisps[i] * rext); + + *preq = smpi_irecv_init(prcv, rcounts[i], rdtype, + i, MCA_COLL_BASE_TAG_ALLTOALLV, comm + ); + preq++; + ++nreqs; + + } + + /* Now post all sends */ + for (i = 0; i < size; ++i) { + if (i == rank || 0 == scounts[i]) { + continue; + } + + psnd = ((char *) sbuf) + (sdisps[i] * sext); + *preq=smpi_isend_init(psnd, scounts[i], sdtype, + i, MCA_COLL_BASE_TAG_ALLTOALLV, comm + ); + preq++; + ++nreqs; + } + + /* Start your engines. This will never return an error. */ + smpi_mpi_startall(nreqs, ireqs); + + /* Wait for them all. If there's an error, note that we don't care + * what the error was -- just that there *was* an error. The PML + * will finish all requests, even if one or more of them fail. + * i.e., by the end of this call, all the requests are free-able. + * So free them anyway -- even if there was an error, and return the + * error after we free everything. */ + smpi_mpi_waitall(nreqs, ireqs, + MPI_STATUSES_IGNORE); + + /* Free the requests. */ + for (i = 0; i < nreqs; ++i) { + if(ireqs[i]!=MPI_REQUEST_NULL)smpi_mpi_request_free(&ireqs[i]); + } + + return MPI_SUCCESS; +} + diff --git a/src/smpi/colls/colls.h b/src/smpi/colls/colls.h index 3de368157b..c870515df3 100644 --- a/src/smpi/colls/colls.h +++ b/src/smpi/colls/colls.h @@ -166,7 +166,8 @@ COLL_APPLY(action, COLL_ALLTOALLV_SIG, ring_light_barrier) COLL_sep \ COLL_APPLY(action, COLL_ALLTOALLV_SIG, ring_mpi_barrier) COLL_sep \ COLL_APPLY(action, COLL_ALLTOALLV_SIG, ring_one_barrier) COLL_sep \ COLL_APPLY(action, COLL_ALLTOALLV_SIG, ompi) COLL_sep \ -COLL_APPLY(action, COLL_ALLTOALLV_SIG, mpich) +COLL_APPLY(action, COLL_ALLTOALLV_SIG, mpich)COLL_sep \ +COLL_APPLY(action, COLL_ALLTOALLV_SIG, ompi_basic_linear) COLL_ALLTOALLVS(COLL_PROTO, COLL_NOsep) diff --git a/src/smpi/colls/smpi_openmpi_selector.c b/src/smpi/colls/smpi_openmpi_selector.c index 939b643577..8109fbacc3 100644 --- a/src/smpi/colls/smpi_openmpi_selector.c +++ b/src/smpi/colls/smpi_openmpi_selector.c @@ -94,7 +94,7 @@ int smpi_coll_tuned_alltoallv_ompi(void *sbuf, int *scounts, int *sdisps, ) { /* For starters, just keep the original algorithm. */ - return smpi_coll_tuned_alltoallv_pair(sbuf, scounts, sdisps, sdtype, + return smpi_coll_tuned_alltoallv_ompi_basic_linear(sbuf, scounts, sdisps, sdtype, rbuf, rcounts, rdisps,rdtype, comm); } -- 2.20.1