From: Augustin Degomme Date: Tue, 11 Jun 2013 09:02:21 +0000 (+0200) Subject: add new allgather algo from ompi X-Git-Tag: v3_9_90~276^2~16 X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/commitdiff_plain/d77e8454ea1c527ebec2b4629dcd3e90ecccf51e add new allgather algo from ompi --- diff --git a/buildtools/Cmake/AddTests.cmake b/buildtools/Cmake/AddTests.cmake index 139057c397..e7c8abb716 100644 --- a/buildtools/Cmake/AddTests.cmake +++ b/buildtools/Cmake/AddTests.cmake @@ -373,7 +373,7 @@ if(NOT enable_memcheck) FOREACH (ALLGATHER_COLL default 2dmesh 3dmesh bruck GB loosely_lr lr NTSLR NTSLR_NB pair rdb rhv ring SMP_NTS - smp_simple spreading_simple ompi) + smp_simple spreading_simple ompi ompi_neighborexchange) ADD_TEST(smpi-allgather-coll-${ALLGATHER_COLL} ${CMAKE_BINARY_DIR}/bin/tesh ${TESH_OPTION} --cfg smpi/allgather:${ALLGATHER_COLL} --cd ${CMAKE_BINARY_DIR}/teshsuite/smpi ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/allgather_coll.tesh) ENDFOREACH() FOREACH (ALLGATHERV_COLL default GB pair ring ompi) diff --git a/buildtools/Cmake/DefinePackages.cmake b/buildtools/Cmake/DefinePackages.cmake index 866eb4580e..ce0f758b6f 100644 --- a/buildtools/Cmake/DefinePackages.cmake +++ b/buildtools/Cmake/DefinePackages.cmake @@ -128,6 +128,7 @@ set(SMPI_SRC src/smpi/colls/allgather-SMP-NTS.c src/smpi/colls/allgather-smp-simple.c src/smpi/colls/allgather-spreading-simple.c + src/smpi/colls/allgather-ompi-neighborexchange.c src/smpi/colls/allgatherv-GB.c src/smpi/colls/allgatherv-pair.c src/smpi/colls/allgatherv-ring.c diff --git a/src/smpi/colls/allgather-ompi-neighborexchange.c b/src/smpi/colls/allgather-ompi-neighborexchange.c new file mode 100644 index 0000000000..238163ebaf --- /dev/null +++ b/src/smpi/colls/allgather-ompi-neighborexchange.c @@ -0,0 +1,175 @@ +/* + * ompi_coll_tuned_allgather_intra_neighborexchange + * + * Function: allgather using N/2 steps (O(N)) + * Accepts: Same arguments as MPI_Allgather + * Returns: MPI_SUCCESS or error code + * + * Description: Neighbor Exchange algorithm for allgather. + * Described by Chen et.al. in + * "Performance Evaluation of Allgather Algorithms on + * Terascale Linux Cluster with Fast Ethernet", + * Proceedings of the Eighth International Conference on + * High-Performance Computing inn Asia-Pacific Region + * (HPCASIA'05), 2005 + * + * Rank r exchanges message with one of its neighbors and + * forwards the data further in the next step. + * + * No additional memory requirements. + * + * Limitations: Algorithm works only on even number of processes. + * For odd number of processes we switch to ring algorithm. + * + * Example on 6 nodes: + * Initial state + * # 0 1 2 3 4 5 + * [0] [ ] [ ] [ ] [ ] [ ] + * [ ] [1] [ ] [ ] [ ] [ ] + * [ ] [ ] [2] [ ] [ ] [ ] + * [ ] [ ] [ ] [3] [ ] [ ] + * [ ] [ ] [ ] [ ] [4] [ ] + * [ ] [ ] [ ] [ ] [ ] [5] + * Step 0: + * # 0 1 2 3 4 5 + * [0] [0] [ ] [ ] [ ] [ ] + * [1] [1] [ ] [ ] [ ] [ ] + * [ ] [ ] [2] [2] [ ] [ ] + * [ ] [ ] [3] [3] [ ] [ ] + * [ ] [ ] [ ] [ ] [4] [4] + * [ ] [ ] [ ] [ ] [5] [5] + * Step 1: + * # 0 1 2 3 4 5 + * [0] [0] [0] [ ] [ ] [0] + * [1] [1] [1] [ ] [ ] [1] + * [ ] [2] [2] [2] [2] [ ] + * [ ] [3] [3] [3] [3] [ ] + * [4] [ ] [ ] [4] [4] [4] + * [5] [ ] [ ] [5] [5] [5] + * Step 2: + * # 0 1 2 3 4 5 + * [0] [0] [0] [0] [0] [0] + * [1] [1] [1] [1] [1] [1] + * [2] [2] [2] [2] [2] [2] + * [3] [3] [3] [3] [3] [3] + * [4] [4] [4] [4] [4] [4] + * [5] [5] [5] [5] [5] [5] + */ + + #include "colls_private.h" + #define MCA_COLL_BASE_TAG_ALLGATHER 555 +int +smpi_coll_tuned_allgather_ompi_neighborexchange(void *sbuf, int scount, + MPI_Datatype sdtype, + void* rbuf, int rcount, + MPI_Datatype rdtype, + MPI_Comm comm +) +{ + int line = -1; + int rank, size; + int neighbor[2], offset_at_step[2], recv_data_from[2], send_data_from; + int i, even_rank; + int err = 0; + ptrdiff_t slb, rlb, sext, rext; + char *tmpsend = NULL, *tmprecv = NULL; + + size = smpi_comm_size(comm); + rank = smpi_comm_rank(comm); + + if (size % 2) { + XBT_DEBUG( + "coll:tuned:allgather_intra_neighborexchange WARNING: odd size %d, switching to ring algorithm", + size); + return smpi_coll_tuned_allgather_ring(sbuf, scount, sdtype, + rbuf, rcount, rdtype, + comm); + } + + XBT_DEBUG( + "coll:tuned:allgather_intra_neighborexchange rank %d", rank); + + err = smpi_datatype_extent (sdtype, &slb, &sext); + if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } + + err = smpi_datatype_extent (rdtype, &rlb, &rext); + if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } + + /* Initialization step: + - if send buffer is not MPI_IN_PLACE, copy send buffer to appropriate block + of receive buffer + */ + tmprecv = (char*) rbuf + rank * rcount * rext; + if (MPI_IN_PLACE != sbuf) { + tmpsend = (char*) sbuf; + smpi_datatype_copy (tmpsend, scount, sdtype, tmprecv, rcount, rdtype); + } + + /* Determine neighbors, order in which blocks will arrive, etc. */ + even_rank = !(rank % 2); + if (even_rank) { + neighbor[0] = (rank + 1) % size; + neighbor[1] = (rank - 1 + size) % size; + recv_data_from[0] = rank; + recv_data_from[1] = rank; + offset_at_step[0] = (+2); + offset_at_step[1] = (-2); + } else { + neighbor[0] = (rank - 1 + size) % size; + neighbor[1] = (rank + 1) % size; + recv_data_from[0] = neighbor[0]; + recv_data_from[1] = neighbor[0]; + offset_at_step[0] = (-2); + offset_at_step[1] = (+2); + } + + /* Communication loop: + - First step is special: exchange a single block with neighbor[0]. + - Rest of the steps: + update recv_data_from according to offset, and + exchange two blocks with appropriate neighbor. + the send location becomes previous receve location. + */ + tmprecv = (char*)rbuf + neighbor[0] * rcount * rext; + tmpsend = (char*)rbuf + rank * rcount * rext; + /* Sendreceive */ + smpi_mpi_sendrecv(tmpsend, rcount, rdtype, neighbor[0], + MCA_COLL_BASE_TAG_ALLGATHER, + tmprecv, rcount, rdtype, neighbor[0], + MCA_COLL_BASE_TAG_ALLGATHER, + comm, MPI_STATUS_IGNORE); + + /* Determine initial sending location */ + if (even_rank) { + send_data_from = rank; + } else { + send_data_from = recv_data_from[0]; + } + + for (i = 1; i < (size / 2); i++) { + const int i_parity = i % 2; + recv_data_from[i_parity] = + (recv_data_from[i_parity] + offset_at_step[i_parity] + size) % size; + + tmprecv = (char*)rbuf + recv_data_from[i_parity] * rcount * rext; + tmpsend = (char*)rbuf + send_data_from * rcount * rext; + + /* Sendreceive */ + smpi_mpi_sendrecv(tmpsend, 2 * rcount, rdtype, + neighbor[i_parity], + MCA_COLL_BASE_TAG_ALLGATHER, + tmprecv, 2 * rcount, rdtype, + neighbor[i_parity], + MCA_COLL_BASE_TAG_ALLGATHER, + comm, MPI_STATUS_IGNORE); + + send_data_from = recv_data_from[i_parity]; + } + + return MPI_SUCCESS; + + err_hndl: + XBT_DEBUG( "%s:%4d\tError occurred %d, rank %2d", + __FILE__, line, err, rank); + return err; +} diff --git a/src/smpi/colls/colls.h b/src/smpi/colls/colls.h index 9af9ad3751..6743650295 100644 --- a/src/smpi/colls/colls.h +++ b/src/smpi/colls/colls.h @@ -45,7 +45,8 @@ COLL_APPLY(action, COLL_ALLGATHER_SIG, ring) COLL_sep \ COLL_APPLY(action, COLL_ALLGATHER_SIG, SMP_NTS) COLL_sep \ COLL_APPLY(action, COLL_ALLGATHER_SIG, smp_simple) COLL_sep \ COLL_APPLY(action, COLL_ALLGATHER_SIG, spreading_simple) COLL_sep \ -COLL_APPLY(action, COLL_ALLGATHER_SIG, ompi) +COLL_APPLY(action, COLL_ALLGATHER_SIG, ompi) COLL_sep \ +COLL_APPLY(action, COLL_ALLGATHER_SIG, ompi_neighborexchange) COLL_ALLGATHERS(COLL_PROTO, COLL_NOsep) diff --git a/src/smpi/colls/smpi_openmpi_selector.c b/src/smpi/colls/smpi_openmpi_selector.c index 55f9879e51..2ea07e3178 100644 --- a/src/smpi/colls/smpi_openmpi_selector.c +++ b/src/smpi/colls/smpi_openmpi_selector.c @@ -426,15 +426,15 @@ int smpi_coll_tuned_allgather_ompi(void *sbuf, int scount, comm); } } else { - //if (communicator_size % 2) { + if (communicator_size % 2) { return smpi_coll_tuned_allgather_ring(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); - /*} else { - return smpi_coll_tuned_allgather_intra_neighborexchange(sbuf, scount, sdtype, + } else { + return smpi_coll_tuned_allgather_ompi_neighborexchange(sbuf, scount, sdtype, rbuf, rcount, rdtype, - comm, module); - }*/ + comm); + } } #if defined(USE_MPICH2_DECISION)