From bd59fd0ffbd1daf8f480840be2b6d8ce12d4a339 Mon Sep 17 00:00:00 2001 From: Augustin Degomme Date: Tue, 17 Sep 2013 17:50:30 +0200 Subject: [PATCH] remove redundant algo --- buildtools/Cmake/AddTests.cmake | 4 +- buildtools/Cmake/DefinePackages.cmake | 1 - src/smpi/colls/allreduce-NTS.c | 95 --------------------------- src/smpi/colls/colls.h | 1 - 4 files changed, 2 insertions(+), 99 deletions(-) delete mode 100644 src/smpi/colls/allreduce-NTS.c diff --git a/buildtools/Cmake/AddTests.cmake b/buildtools/Cmake/AddTests.cmake index 8076d74ab1..84aff1bc6d 100644 --- a/buildtools/Cmake/AddTests.cmake +++ b/buildtools/Cmake/AddTests.cmake @@ -394,7 +394,7 @@ if(NOT enable_memcheck) ADD_TEST(smpi-gather-coll-${GATHER_COLL} ${CMAKE_BINARY_DIR}/bin/tesh ${TESH_OPTION} --cfg smpi/gather:${GATHER_COLL} --cd ${CMAKE_BINARY_DIR}/teshsuite/smpi ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/gather_coll.tesh) ENDFOREACH() - FOREACH (ALLGATHER_COLL default 2dmesh 3dmesh bruck GB loosely_lr lr + FOREACH (ALLGATHER_COLL default 2dmesh 3dmesh bruck GB loosely_lr NTSLR NTSLR_NB pair rdb rhv ring SMP_NTS smp_simple spreading_simple ompi mpich ompi_neighborexchange) ADD_TEST(smpi-allgather-coll-${ALLGATHER_COLL} ${CMAKE_BINARY_DIR}/bin/tesh ${TESH_OPTION} --cfg smpi/allgather:${ALLGATHER_COLL} --cd ${CMAKE_BINARY_DIR}/teshsuite/smpi ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/allgather_coll.tesh) @@ -404,7 +404,7 @@ if(NOT enable_memcheck) ADD_TEST(smpi-allgatherv-coll-${ALLGATHERV_COLL} ${CMAKE_BINARY_DIR}/bin/tesh ${TESH_OPTION} --cfg smpi/allgatherv:${ALLGATHERV_COLL} --cd ${CMAKE_BINARY_DIR}/teshsuite/smpi ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/allgatherv_coll.tesh) ENDFOREACH() - FOREACH (ALLREDUCE_COLL default lr NTS rab1 rab2 rab_rdb + FOREACH (ALLREDUCE_COLL default lr rab1 rab2 rab_rdb rab_rsag rdb smp_binomial smp_binomial_pipeline smp_rdb smp_rsag smp_rsag_lr smp_rsag_rab redbcast ompi mpich ompi_ring_segmented) ADD_TEST(smpi-allreduce-coll-${ALLREDUCE_COLL} ${CMAKE_BINARY_DIR}/bin/tesh ${TESH_OPTION} --cfg smpi/allreduce:${ALLREDUCE_COLL} --cd ${CMAKE_BINARY_DIR}/teshsuite/smpi ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/allreduce_coll.tesh) diff --git a/buildtools/Cmake/DefinePackages.cmake b/buildtools/Cmake/DefinePackages.cmake index 3b1b370bf9..b89bc38028 100644 --- a/buildtools/Cmake/DefinePackages.cmake +++ b/buildtools/Cmake/DefinePackages.cmake @@ -138,7 +138,6 @@ set(SMPI_SRC src/smpi/colls/allgatherv-mpich-rdb.c src/smpi/colls/allgatherv-mpich-ring.c src/smpi/colls/allreduce-lr.c - src/smpi/colls/allreduce-NTS.c src/smpi/colls/allreduce-rab1.c src/smpi/colls/allreduce-rab2.c src/smpi/colls/allreduce-rab-rdb.c diff --git a/src/smpi/colls/allreduce-NTS.c b/src/smpi/colls/allreduce-NTS.c deleted file mode 100644 index 85c790b1f8..0000000000 --- a/src/smpi/colls/allreduce-NTS.c +++ /dev/null @@ -1,95 +0,0 @@ -#include "colls_private.h" -/* IMPLEMENTED BY PITCH PATARASUK - Non-topoloty-specific all-reduce operation designed bandwidth optimally */ - -/* ** NOTE ** - Use -DMPICH2_REDUCTION if this code does not compile. - MPICH1 code also work on MPICH2 on our cluster and the performance are similar. - This code assume commutative and associative reduce operator (MPI_SUM, MPI_MAX, etc). -*/ - -//#include - -int -smpi_coll_tuned_allreduce_NTS(void *sbuf, void *rbuf, int rcount, - MPI_Datatype dtype, MPI_Op op, MPI_Comm comm) -{ - int tag = COLL_TAG_ALLREDUCE; - MPI_Status status; - int rank, i, size, count; - int send_offset, recv_offset; - int remainder, remainder_flag, remainder_offset; - - rank = smpi_comm_rank(comm); - size = smpi_comm_size(comm); - - /* make it compatible with all data type */ - MPI_Aint extent; - extent = smpi_datatype_get_extent(dtype); - - /* when communication size is smaller than number of process (not support) */ - if (rcount < size) { - return mpi_coll_allreduce_fun(sbuf, rbuf, rcount, dtype, op, comm); - } - - /* when communication size is not divisible by number of process: - call the native implementation for the remain chunk at the end of the operation */ - if (rcount % size != 0) { - remainder = rcount % size; - remainder_flag = 1; - remainder_offset = (rcount / size) * size * extent; - } else { - remainder = remainder_flag = remainder_offset = 0; - } - - /* size of each point-to-point communication is equal to the size of the whole message - divided by number of processes - */ - count = rcount / size; - - /* our ALL-REDUCE implementation - 1. copy (partial of)send_buf to recv_buf - 2. use logical ring reduce-scatter - 3. use logical ring all-gather - */ - - // copy partial data - send_offset = ((rank - 1 + size) % size) * count * extent; - recv_offset = ((rank - 1 + size) % size) * count * extent; - smpi_mpi_sendrecv((char *) sbuf + send_offset, count, dtype, rank, tag - 1, - (char *) rbuf + recv_offset, count, dtype, rank, tag - 1, comm, - &status); - - // reduce-scatter - for (i = 0; i < (size - 1); i++) { - send_offset = ((rank - 1 - i + size) % size) * count * extent; - recv_offset = ((rank - 2 - i + size) % size) * count * extent; - smpi_mpi_sendrecv((char *) rbuf + send_offset, count, dtype, ((rank + 1) % size), - tag + i, (char *) rbuf + recv_offset, count, dtype, - ((rank + size - 1) % size), tag + i, comm, &status); - - // compute result to rbuf+recv_offset - smpi_op_apply(op, (char *)sbuf + recv_offset, (char *)rbuf + recv_offset, &count, &dtype); - } - - // all-gather - for (i = 0; i < (size - 1); i++) { - send_offset = ((rank - i + size) % size) * count * extent; - recv_offset = ((rank - 1 - i + size) % size) * count * extent; - smpi_mpi_sendrecv((char *) rbuf + send_offset, count, dtype, ((rank + 1) % size), - tag + i, (char *) rbuf + recv_offset, count, dtype, - ((rank + size - 1) % size), tag + i, comm, &status); - } - - /* when communication size is not divisible by number of process: - call the native implementation for the remain chunk at the end of the operation */ - if (remainder_flag) { - XBT_WARN("MPI_allreduce_NTS use default MPI_allreduce."); - smpi_mpi_allreduce((char *) sbuf + remainder_offset, - (char *) rbuf + remainder_offset, remainder, dtype, op, - comm); - return MPI_SUCCESS; - } - - return MPI_SUCCESS; -} diff --git a/src/smpi/colls/colls.h b/src/smpi/colls/colls.h index c7c739bf70..6461a5a665 100644 --- a/src/smpi/colls/colls.h +++ b/src/smpi/colls/colls.h @@ -102,7 +102,6 @@ COLL_ALLGATHERVS(COLL_PROTO, COLL_NOsep) #define COLL_ALLREDUCES(action, COLL_sep) \ COLL_APPLY(action, COLL_ALLREDUCE_SIG, lr) COLL_sep \ -COLL_APPLY(action, COLL_ALLREDUCE_SIG, NTS) COLL_sep \ COLL_APPLY(action, COLL_ALLREDUCE_SIG, rab1) COLL_sep \ COLL_APPLY(action, COLL_ALLREDUCE_SIG, rab2) COLL_sep \ COLL_APPLY(action, COLL_ALLREDUCE_SIG, rab_rdb) COLL_sep \ -- 2.20.1