From: Augustin Degomme Date: Wed, 23 Jul 2014 15:35:52 +0000 (+0200) Subject: New collectives for mvapich2 selector : allgatherv, allreduce, alltoallv, barrier X-Git-Tag: v3_12~893^2~7 X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/commitdiff_plain/b57bbad96670d85481e86a4152b701af0517521a New collectives for mvapich2 selector : allgatherv, allreduce, alltoallv, barrier --- diff --git a/buildtools/Cmake/AddTests.cmake b/buildtools/Cmake/AddTests.cmake index 5991eb7243..bb58212baf 100644 --- a/buildtools/Cmake/AddTests.cmake +++ b/buildtools/Cmake/AddTests.cmake @@ -378,12 +378,12 @@ IF(NOT enable_memcheck) smp_simple spreading_simple ompi mpich ompi_neighborexchange mvapich2) ADD_TESH(tesh-smpi-allgather-coll-${ALLGATHER_COLL} --cfg smpi/allgather:${ALLGATHER_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/allgather --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/allgather allgather_coll.tesh) ENDFOREACH() - FOREACH (ALLGATHERV_COLL default GB pair ring ompi mpich ompi_neighborexchange ompi_bruck mpich_rdb mpich_ring) + FOREACH (ALLGATHERV_COLL default GB pair ring ompi mpich ompi_neighborexchange ompi_bruck mpich_rdb mpich_ring mvapich2) ADD_TESH(tesh-smpi-allgatherv-coll-${ALLGATHERV_COLL} --cfg smpi/allgatherv:${ALLGATHERV_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/allgatherv --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/allgatherv allgatherv_coll.tesh) ENDFOREACH() FOREACH (ALLREDUCE_COLL default lr rab1 rab2 rab_rdb rdb smp_binomial smp_binomial_pipeline - smp_rdb smp_rsag smp_rsag_lr smp_rsag_rab redbcast ompi mpich ompi_ring_segmented) + smp_rdb smp_rsag smp_rsag_lr smp_rsag_rab redbcast ompi mpich ompi_ring_segmented mvapich2) ADD_TESH(tesh-smpi-allreduce-coll-${ALLREDUCE_COLL} --cfg smpi/allreduce:${ALLREDUCE_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/allreduce --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/allreduce allreduce_coll.tesh) ENDFOREACH() FOREACH (ALLREDUCE_COLL_LARGE ompi_ring_segmented) @@ -397,7 +397,7 @@ IF(NOT enable_memcheck) ENDFOREACH() FOREACH (ALLTOALLV_COLL default pair pair_light_barrier pair_mpi_barrier pair_one_barrier ring ring_light_barrier - ring_mpi_barrier ring_one_barrier bruck ompi mpich ompi_basic_linear) + ring_mpi_barrier ring_one_barrier bruck ompi mpich mvapich2 ompi_basic_linear) ADD_TESH(tesh-smpi-alltoallv-coll-${ALLTOALLV_COLL} --cfg smpi/alltoallv:${ALLTOALLV_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/alltoallv --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/alltoallv alltoallv_coll.tesh) ENDFOREACH() FOREACH (BCAST_COLL default arrival_pattern_aware arrival_pattern_aware_wait arrival_scatter @@ -414,7 +414,7 @@ IF(NOT enable_memcheck) FOREACH (SCATTER_COLL default ompi mpich ompi_basic_linear ompi_binomial) ADD_TESH(tesh-smpi-scatter-coll-${SCATTER_COLL} --cfg smpi/scatter:${SCATTER_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/scatter --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/scatter scatter_coll.tesh) ENDFOREACH() - FOREACH (BARRIER_COLL default ompi mpich ompi_basic_linear ompi_tree ompi_bruck ompi_recursivedoubling ompi_doublering) + FOREACH (BARRIER_COLL default ompi mpich ompi_basic_linear ompi_tree ompi_bruck ompi_recursivedoubling ompi_doublering mvapich2_pair mvapich2) ADD_TESH(tesh-smpi-barrier-coll-${BARRIER_COLL} --cfg smpi/barrier:${BARRIER_COLL} --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/barrier --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/barrier barrier_coll.tesh) ENDFOREACH() # END TESH TESTS diff --git a/buildtools/Cmake/DefinePackages.cmake b/buildtools/Cmake/DefinePackages.cmake index c9dfc7e703..c5fa83e4af 100644 --- a/buildtools/Cmake/DefinePackages.cmake +++ b/buildtools/Cmake/DefinePackages.cmake @@ -190,6 +190,7 @@ set(SMPI_SRC src/smpi/colls/alltoallv-ring-one-barrier.c src/smpi/colls/alltoallv-ring.c src/smpi/colls/barrier-ompi.c + src/smpi/colls/barrier-mvapich2-pair.c src/smpi/colls/bcast-NTSB.c src/smpi/colls/bcast-NTSL-Isend.c src/smpi/colls/bcast-NTSL.c diff --git a/src/smpi/colls/barrier-mvapich2-pair.c b/src/smpi/colls/barrier-mvapich2-pair.c new file mode 100644 index 0000000000..2ef6ad1f9a --- /dev/null +++ b/src/smpi/colls/barrier-mvapich2-pair.c @@ -0,0 +1,98 @@ +/* Copyright (c) 2013-2014. The SimGrid Team. + * All rights reserved. */ + +/* This program is free software; you can redistribute it and/or modify it + * under the terms of the license (GNU LGPL) which comes with this package. */ + +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * + * Additional copyrights may follow + */ + + /* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * + * (C) 2001 by Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +/* Copyright (c) 2001-2014, The Ohio State University. All rights + * reserved. + * + * This file is part of the MVAPICH2 software package developed by the + * team members of The Ohio State University's Network-Based Computing + * Laboratory (NBCL), headed by Professor Dhabaleswar K. (DK) Panda. + * + * For detailed copyright and licensing information, please refer to the + * copyright file COPYRIGHT in the top level MVAPICH2 directory. + * + */ + +#include "colls_private.h" +#include "coll_tuned_topo.h" + +int smpi_coll_tuned_barrier_mvapich2_pair(MPI_Comm comm) +{ + + int size, rank; + int d, dst, src; + int mpi_errno = MPI_SUCCESS; + + size = smpi_comm_size(comm); + /* Trivial barriers return immediately */ + if (size == 1) + return MPI_SUCCESS; + + rank = smpi_comm_rank(comm); + int N2_prev = 1; + /* N2_prev = greatest power of two < size of Comm */ + for( N2_prev = 1; N2_prev <= size; N2_prev <<= 1 ); + N2_prev >>= 1; + + int surfeit = size - N2_prev; + + /* Perform a combine-like operation */ + if (rank < N2_prev) { + if (rank < surfeit) { + /* get the fanin letter from the upper "half" process: */ + dst = N2_prev + rank; + smpi_mpi_recv(NULL, 0, MPI_BYTE, dst, COLL_TAG_BARRIER, + comm, MPI_STATUS_IGNORE); + } + + /* combine on embedded N2_prev power-of-two processes */ + for (d = 1; d < N2_prev; d <<= 1) { + dst = (rank ^ d); + smpi_mpi_sendrecv(NULL, 0, MPI_BYTE, dst, COLL_TAG_BARRIER, NULL, + 0, MPI_BYTE, dst, COLL_TAG_BARRIER, comm, + MPI_STATUS_IGNORE); + } + + /* fanout data to nodes above N2_prev... */ + if (rank < surfeit) { + dst = N2_prev + rank; + smpi_mpi_send(NULL, 0, MPI_BYTE, dst, COLL_TAG_BARRIER, + comm); + } + } else { + /* fanin data to power of 2 subset */ + src = rank - N2_prev; + smpi_mpi_sendrecv(NULL, 0, MPI_BYTE, src, COLL_TAG_BARRIER, + NULL, 0, MPI_BYTE, src, COLL_TAG_BARRIER, + comm, MPI_STATUS_IGNORE); + } + + return mpi_errno; + +} diff --git a/src/smpi/colls/colls.h b/src/smpi/colls/colls.h index 0207d313e0..1fbd98b74f 100644 --- a/src/smpi/colls/colls.h +++ b/src/smpi/colls/colls.h @@ -98,6 +98,7 @@ COLL_APPLY(action, COLL_ALLGATHERV_SIG, ompi_bruck) COLL_sep \ COLL_APPLY(action, COLL_ALLGATHERV_SIG, mpich) COLL_sep \ COLL_APPLY(action, COLL_ALLGATHERV_SIG, mpich_rdb) COLL_sep \ COLL_APPLY(action, COLL_ALLGATHERV_SIG, mpich_ring) COLL_sep \ +COLL_APPLY(action, COLL_ALLGATHERV_SIG, mvapich2) COLL_sep \ COLL_APPLY(action, COLL_ALLGATHERV_SIG, automatic) COLL_ALLGATHERVS(COLL_PROTO, COLL_NOsep) @@ -125,6 +126,7 @@ COLL_APPLY(action, COLL_ALLREDUCE_SIG, redbcast) COLL_sep \ COLL_APPLY(action, COLL_ALLREDUCE_SIG, ompi) COLL_sep \ COLL_APPLY(action, COLL_ALLREDUCE_SIG, ompi_ring_segmented) COLL_sep \ COLL_APPLY(action, COLL_ALLREDUCE_SIG, mpich) COLL_sep \ +COLL_APPLY(action, COLL_ALLREDUCE_SIG, mvapich2) COLL_sep \ COLL_APPLY(action, COLL_ALLREDUCE_SIG, automatic) COLL_ALLREDUCES(COLL_PROTO, COLL_NOsep) @@ -179,6 +181,7 @@ COLL_APPLY(action, COLL_ALLTOALLV_SIG, ring_one_barrier) COLL_sep \ COLL_APPLY(action, COLL_ALLTOALLV_SIG, ompi) COLL_sep \ COLL_APPLY(action, COLL_ALLTOALLV_SIG, mpich) COLL_sep \ COLL_APPLY(action, COLL_ALLTOALLV_SIG, ompi_basic_linear) COLL_sep \ +COLL_APPLY(action, COLL_ALLTOALLV_SIG, mvapich2) COLL_sep \ COLL_APPLY(action, COLL_ALLTOALLV_SIG, automatic) COLL_ALLTOALLVS(COLL_PROTO, COLL_NOsep) @@ -293,6 +296,8 @@ COLL_APPLY(action, COLL_BARRIER_SIG, ompi_bruck) COLL_sep \ COLL_APPLY(action, COLL_BARRIER_SIG, ompi_recursivedoubling) COLL_sep \ COLL_APPLY(action, COLL_BARRIER_SIG, ompi_doublering) COLL_sep \ COLL_APPLY(action, COLL_BARRIER_SIG, mpich) COLL_sep \ +COLL_APPLY(action, COLL_BARRIER_SIG, mvapich2_pair) COLL_sep \ +COLL_APPLY(action, COLL_BARRIER_SIG, mvapich2) COLL_sep \ COLL_APPLY(action, COLL_BARRIER_SIG, automatic) COLL_BARRIERS(COLL_PROTO, COLL_NOsep) diff --git a/src/smpi/colls/smpi_mvapich2_selector.c b/src/smpi/colls/smpi_mvapich2_selector.c index 72d40cd595..1442a5c3e0 100644 --- a/src/smpi/colls/smpi_mvapich2_selector.c +++ b/src/smpi/colls/smpi_mvapich2_selector.c @@ -703,8 +703,8 @@ int smpi_coll_tuned_gather_mvapich2(void *sendbuf, MPI_Datatype recvtype, int root, MPI_Comm comm) { - if(mv2_alltoall_table_ppn_conf==NULL) - init_mv2_alltoall_tables_stampede(); + if(mv2_gather_thresholds_table==NULL) + init_mv2_gather_tables_stampede(); int mpi_errno = MPI_SUCCESS; int range = 0; @@ -773,3 +773,422 @@ int smpi_coll_tuned_gather_mvapich2(void *sendbuf, return mpi_errno; } + + +static void init_mv2_allgatherv_tables_stampede(){ + mv2_size_allgatherv_tuning_table = 6; + mv2_allgatherv_thresholds_table = malloc(mv2_size_allgatherv_tuning_table * + sizeof (mv2_allgatherv_tuning_table)); + mv2_allgatherv_tuning_table mv2_tmp_allgatherv_thresholds_table[] = { + { + 16, + 2, + { + {0, 512, &MPIR_Allgatherv_Rec_Doubling_MV2}, + {512, -1, &MPIR_Allgatherv_Ring_MV2}, + }, + }, + { + 32, + 2, + { + {0, 512, &MPIR_Allgatherv_Rec_Doubling_MV2}, + {512, -1, &MPIR_Allgatherv_Ring_MV2}, + }, + }, + { + 64, + 2, + { + {0, 256, &MPIR_Allgatherv_Rec_Doubling_MV2}, + {256, -1, &MPIR_Allgatherv_Ring_MV2}, + }, + }, + { + 128, + 2, + { + {0, 256, &MPIR_Allgatherv_Rec_Doubling_MV2}, + {256, -1, &MPIR_Allgatherv_Ring_MV2}, + }, + }, + { + 256, + 2, + { + {0, 256, &MPIR_Allgatherv_Rec_Doubling_MV2}, + {256, -1, &MPIR_Allgatherv_Ring_MV2}, + }, + }, + { + 512, + 2, + { + {0, 256, &MPIR_Allgatherv_Rec_Doubling_MV2}, + {256, -1, &MPIR_Allgatherv_Ring_MV2}, + }, + }, + + }; + memcpy(mv2_allgatherv_thresholds_table, mv2_tmp_allgatherv_thresholds_table, + mv2_size_allgatherv_tuning_table * sizeof (mv2_allgatherv_tuning_table)); +} + + + + + + + +int smpi_coll_tuned_allgatherv_mvapich2(void *sendbuf, int sendcount, MPI_Datatype sendtype, + void *recvbuf, int *recvcounts, int *displs, + MPI_Datatype recvtype, MPI_Comm comm ) +{ + int mpi_errno = MPI_SUCCESS; + int range = 0, comm_size, total_count, recvtype_size, i; + int range_threshold = 0; + int nbytes = 0; + + if(mv2_allgatherv_thresholds_table==NULL) + init_mv2_allgatherv_tables_stampede(); + + comm_size = smpi_comm_size(comm); + total_count = 0; + for (i = 0; i < comm_size; i++) + total_count += recvcounts[i]; + + recvtype_size=smpi_datatype_size(recvtype); + nbytes = total_count * recvtype_size; + + /* Search for the corresponding system size inside the tuning table */ + while ((range < (mv2_size_allgatherv_tuning_table - 1)) && + (comm_size > mv2_allgatherv_thresholds_table[range].numproc)) { + range++; + } + /* Search for corresponding inter-leader function */ + while ((range_threshold < (mv2_allgatherv_thresholds_table[range].size_inter_table - 1)) + && (nbytes > + comm_size * mv2_allgatherv_thresholds_table[range].inter_leader[range_threshold].max) + && (mv2_allgatherv_thresholds_table[range].inter_leader[range_threshold].max != + -1)) { + range_threshold++; + } + /* Set inter-leader pt */ + MV2_Allgatherv_function = + mv2_allgatherv_thresholds_table[range].inter_leader[range_threshold]. + MV2_pt_Allgatherv_function; + + if (MV2_Allgatherv_function == &MPIR_Allgatherv_Rec_Doubling_MV2) + { + if(!(comm_size & (comm_size - 1))) + { + mpi_errno = + MPIR_Allgatherv_Rec_Doubling_MV2(sendbuf, sendcount, + sendtype, recvbuf, + recvcounts, displs, + recvtype, comm); + } else { + mpi_errno = + MPIR_Allgatherv_Bruck_MV2(sendbuf, sendcount, + sendtype, recvbuf, + recvcounts, displs, + recvtype, comm); + } + } else { + mpi_errno = + MV2_Allgatherv_function(sendbuf, sendcount, sendtype, + recvbuf, recvcounts, displs, + recvtype, comm); + } + + return mpi_errno; +} + + +static void init_mv2_allreduce_tables_stampede(){ +mv2_size_allreduce_tuning_table = 8; + mv2_allreduce_thresholds_table = malloc(mv2_size_allreduce_tuning_table * + sizeof (mv2_allreduce_tuning_table)); + mv2_allreduce_tuning_table mv2_tmp_allreduce_thresholds_table[] = { + { + 16, + 0, + {1, 0}, + 2, + { + {0, 1024, &MPIR_Allreduce_pt2pt_rd_MV2}, + {1024, -1, &MPIR_Allreduce_pt2pt_rs_MV2}, + }, + 2, + { + {0, 1024, &MPIR_Allreduce_reduce_shmem_MV2}, + {1024, -1, &MPIR_Allreduce_reduce_p2p_MV2}, + }, + }, + { + 32, + 0, + {1, 1, 0}, + 3, + { + {0, 1024, &MPIR_Allreduce_pt2pt_rd_MV2}, + {1024, 16384, &MPIR_Allreduce_pt2pt_rd_MV2}, + {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2}, + }, + 2, + { + {0, 1024, &MPIR_Allreduce_reduce_shmem_MV2}, + {1024, 16384, &MPIR_Allreduce_reduce_p2p_MV2}, + }, + }, + { + 64, + 0, + {1, 1, 0}, + 3, + { + {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2}, + {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2}, + {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2}, + }, + 2, + { + {0, 512, &MPIR_Allreduce_reduce_shmem_MV2}, + {512, 16384, &MPIR_Allreduce_reduce_p2p_MV2}, + }, + }, + { + 128, + 0, + {1, 1, 0}, + 3, + { + {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2}, + {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2}, + {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2}, + }, + 2, + { + {0, 512, &MPIR_Allreduce_reduce_shmem_MV2}, + {512, 16384, &MPIR_Allreduce_reduce_p2p_MV2}, + }, + }, + { + 256, + 0, + {1, 1, 0}, + 3, + { + {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2}, + {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2}, + {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2}, + }, + 2, + { + {0, 512, &MPIR_Allreduce_reduce_shmem_MV2}, + {512, -1, &MPIR_Allreduce_reduce_p2p_MV2}, + }, + }, + { + 512, + 0, + {1, 1, 0}, + 3, + { + {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2}, + {512, 16384, &MPIR_Allreduce_pt2pt_rd_MV2}, + {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2}, + }, + 2, + { + {0, 512, &MPIR_Allreduce_reduce_shmem_MV2}, + {512, 16384, &MPIR_Allreduce_reduce_p2p_MV2}, + }, + }, + { + 1024, + 0, + {1, 1, 1, 0}, + 4, + { + {0, 512, &MPIR_Allreduce_pt2pt_rd_MV2}, + {512, 8192, &MPIR_Allreduce_pt2pt_rd_MV2}, + {8192, 65536, &MPIR_Allreduce_pt2pt_rs_MV2}, + {65536, -1, &MPIR_Allreduce_pt2pt_rs_MV2}, + }, + 2, + { + {0, 512, &MPIR_Allreduce_reduce_shmem_MV2}, + {512, -1, &MPIR_Allreduce_reduce_p2p_MV2}, + }, + }, + { + 2048, + 0, + {1, 1, 1, 0}, + 4, + { + {0, 64, &MPIR_Allreduce_pt2pt_rd_MV2}, + {64, 512, &MPIR_Allreduce_reduce_p2p_MV2}, + {512, 4096, &MPIR_Allreduce_mcst_reduce_two_level_helper_MV2}, + {4096, 16384, &MPIR_Allreduce_pt2pt_rs_MV2}, + {16384, -1, &MPIR_Allreduce_pt2pt_rs_MV2}, + }, + 2, + { + {0, 512, &MPIR_Allreduce_reduce_shmem_MV2}, + {512, -1, &MPIR_Allreduce_reduce_p2p_MV2}, + }, + }, + + }; + memcpy(mv2_allreduce_thresholds_table, mv2_tmp_allreduce_thresholds_table, + mv2_size_allreduce_tuning_table * sizeof (mv2_allreduce_tuning_table)); +} + + +int smpi_coll_tuned_allreduce_mvapich2(void *sendbuf, + void *recvbuf, + int count, + MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm) +{ + + int mpi_errno = MPI_SUCCESS; + //int rank = 0, + int comm_size = 0; + + comm_size = smpi_comm_size(comm); + //rank = smpi_comm_rank(comm); + + if (count == 0) { + return MPI_SUCCESS; + } + + if (mv2_allreduce_thresholds_table == NULL) + init_mv2_allreduce_tables_stampede(); + + /* check if multiple threads are calling this collective function */ + + MPI_Aint sendtype_size = 0; + int nbytes = 0; + int range = 0, range_threshold = 0, range_threshold_intra = 0; + int is_two_level = 0; + //int is_commutative = 0; + MPI_Aint true_lb, true_extent; + + sendtype_size=smpi_datatype_size(datatype); + nbytes = count * sendtype_size; + + smpi_datatype_extent(datatype, &true_lb, &true_extent); + //MPI_Op *op_ptr; + //is_commutative = smpi_op_is_commute(op); + + { + /* Search for the corresponding system size inside the tuning table */ + while ((range < (mv2_size_allreduce_tuning_table - 1)) && + (comm_size > mv2_allreduce_thresholds_table[range].numproc)) { + range++; + } + /* Search for corresponding inter-leader function */ + /* skip mcast poiters if mcast is not available */ + if(mv2_allreduce_thresholds_table[range].mcast_enabled != 1){ + while ((range_threshold < (mv2_allreduce_thresholds_table[range].size_inter_table - 1)) + && ((mv2_allreduce_thresholds_table[range]. + inter_leader[range_threshold].MV2_pt_Allreduce_function + == &MPIR_Allreduce_mcst_reduce_redscat_gather_MV2) || + (mv2_allreduce_thresholds_table[range]. + inter_leader[range_threshold].MV2_pt_Allreduce_function + == &MPIR_Allreduce_mcst_reduce_two_level_helper_MV2) + )) { + range_threshold++; + } + } + while ((range_threshold < (mv2_allreduce_thresholds_table[range].size_inter_table - 1)) + && (nbytes > + mv2_allreduce_thresholds_table[range].inter_leader[range_threshold].max) + && (mv2_allreduce_thresholds_table[range].inter_leader[range_threshold].max != -1)) { + range_threshold++; + } + if(mv2_allreduce_thresholds_table[range].is_two_level_allreduce[range_threshold] == 1){ + is_two_level = 1; + } + /* Search for corresponding intra-node function */ + while ((range_threshold_intra < + (mv2_allreduce_thresholds_table[range].size_intra_table - 1)) + && (nbytes > + mv2_allreduce_thresholds_table[range].intra_node[range_threshold_intra].max) + && (mv2_allreduce_thresholds_table[range].intra_node[range_threshold_intra].max != + -1)) { + range_threshold_intra++; + } + + MV2_Allreduce_function = mv2_allreduce_thresholds_table[range].inter_leader[range_threshold] + .MV2_pt_Allreduce_function; + + MV2_Allreduce_intra_function = mv2_allreduce_thresholds_table[range].intra_node[range_threshold_intra] + .MV2_pt_Allreduce_function; + + /* check if mcast is ready, otherwise replace mcast with other algorithm */ + if((MV2_Allreduce_function == &MPIR_Allreduce_mcst_reduce_redscat_gather_MV2)|| + (MV2_Allreduce_function == &MPIR_Allreduce_mcst_reduce_two_level_helper_MV2)){ + { + MV2_Allreduce_function = &MPIR_Allreduce_pt2pt_rd_MV2; + } + if(is_two_level != 1) { + MV2_Allreduce_function = &MPIR_Allreduce_pt2pt_rd_MV2; + } + } + + if(is_two_level == 1){ + // check if shm is ready, if not use other algorithm first + /*if ((comm->ch.shmem_coll_ok == 1) + && (mv2_enable_shmem_allreduce) + && (is_commutative) + && (mv2_enable_shmem_collectives)) { + mpi_errno = MPIR_Allreduce_two_level_MV2(sendbuf, recvbuf, count, + datatype, op, comm); + } else {*/ + mpi_errno = MPIR_Allreduce_pt2pt_rd_MV2(sendbuf, recvbuf, count, + datatype, op, comm); + // } + } else { + mpi_errno = MV2_Allreduce_function(sendbuf, recvbuf, count, + datatype, op, comm); + } + } + + //comm->ch.intra_node_done=0; + + return (mpi_errno); + + +} + + +int smpi_coll_tuned_alltoallv_mvapich2(void *sbuf, int *scounts, int *sdisps, + MPI_Datatype sdtype, + void *rbuf, int *rcounts, int *rdisps, + MPI_Datatype rdtype, + MPI_Comm comm + ) +{ + +if (sbuf == MPI_IN_PLACE) { + return smpi_coll_tuned_alltoallv_ompi_basic_linear(sbuf, scounts, sdisps, sdtype, + rbuf, rcounts, rdisps,rdtype, + comm); + } else /* For starters, just keep the original algorithm. */ + return smpi_coll_tuned_alltoallv_pair(sbuf, scounts, sdisps, sdtype, + rbuf, rcounts, rdisps,rdtype, + comm); +} + + +int smpi_coll_tuned_barrier_mvapich2(MPI_Comm comm) +{ + return smpi_coll_tuned_barrier_mvapich2_pair(comm); +} + + + diff --git a/src/smpi/colls/smpi_mvapich2_selector_stampede.h b/src/smpi/colls/smpi_mvapich2_selector_stampede.h index b0f6136bfa..1c29af6a4c 100644 --- a/src/smpi/colls/smpi_mvapich2_selector_stampede.h +++ b/src/smpi/colls/smpi_mvapich2_selector_stampede.h @@ -140,3 +140,131 @@ MV2_Gather_function_ptr MV2_Gather_intra_node_function = NULL; #define MPIR_Gather_MV2_Direct smpi_coll_tuned_gather_ompi_basic_linear #define MPIR_Gather_MV2_two_level_Direct smpi_coll_tuned_gather_ompi_basic_linear #define MPIR_Gather_intra smpi_coll_tuned_gather_mpich + + + +typedef struct { + int min; + int max; + int (*MV2_pt_Allgatherv_function)(void *sendbuf, + int sendcount, + MPI_Datatype sendtype, + void *recvbuf, + int *recvcounts, + int *displs, + MPI_Datatype recvtype, + MPI_Comm commg); +} mv2_allgatherv_tuning_element; + +typedef struct { + int numproc; + int size_inter_table; + mv2_allgatherv_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS]; +} mv2_allgatherv_tuning_table; + +extern int mv2_size_allgatherv_tuning_table; +extern mv2_allgatherv_tuning_table *mv2_allgatherv_thresholds_table; + +int (*MV2_Allgatherv_function)(void *sendbuf, + int sendcount, + MPI_Datatype sendtype, + void *recvbuf, + int *recvcounts, + int *displs, + MPI_Datatype recvtype, + MPI_Comm comm); + +int mv2_size_allgatherv_tuning_table = 0; +mv2_allgatherv_tuning_table *mv2_allgatherv_thresholds_table = NULL; + +#define MPIR_Allgatherv_Rec_Doubling_MV2 smpi_coll_tuned_allgatherv_mpich_rdb +#define MPIR_Allgatherv_Bruck_MV2 smpi_coll_tuned_allgatherv_ompi_bruck +#define MPIR_Allgatherv_Ring_MV2 smpi_coll_tuned_allgatherv_mpich_ring + + +typedef struct { + int min; + int max; + int (*MV2_pt_Allreduce_function)(void *sendbuf, + void *recvbuf, + int count, + MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm); +} mv2_allreduce_tuning_element; + +typedef struct { + int numproc; + int mcast_enabled; + int is_two_level_allreduce[MV2_MAX_NB_THRESHOLDS]; + int size_inter_table; + mv2_allreduce_tuning_element inter_leader[MV2_MAX_NB_THRESHOLDS]; + int size_intra_table; + mv2_allreduce_tuning_element intra_node[MV2_MAX_NB_THRESHOLDS]; +} mv2_allreduce_tuning_table; + +extern int mv2_size_allreduce_tuning_table; +extern mv2_allreduce_tuning_table *mv2_allreduce_thresholds_table; +extern int mv2_use_old_allreduce; + + +int (*MV2_Allreduce_function)(void *sendbuf, + void *recvbuf, + int count, + MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm)=NULL; + + +int (*MV2_Allreduce_intra_function)( void *sendbuf, + void *recvbuf, + int count, + MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm)=NULL; + +int mv2_size_allreduce_tuning_table = 0; +mv2_allreduce_tuning_table *mv2_allreduce_thresholds_table = NULL; + + + + + +static int MPIR_Allreduce_mcst_reduce_two_level_helper_MV2( void *sendbuf, + void *recvbuf, + int count, + MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm) +{ + return 0; +} + +static int MPIR_Allreduce_mcst_reduce_redscat_gather_MV2( void *sendbuf, + void *recvbuf, + int count, + MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm) +{ + return 0; +} + +static int MPIR_Allreduce_reduce_p2p_MV2( void *sendbuf, + void *recvbuf, + int count, + MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm) +{ + mpi_coll_reduce_fun(sendbuf,recvbuf,count,datatype,op,0,comm); + return MPI_SUCCESS; +} + +static int MPIR_Allreduce_reduce_shmem_MV2( void *sendbuf, + void *recvbuf, + int count, + MPI_Datatype datatype, + MPI_Op op, MPI_Comm comm) +{ + mpi_coll_reduce_fun(sendbuf,recvbuf,count,datatype,op,0,comm); + return MPI_SUCCESS; +} + +#define MPIR_Allreduce_pt2pt_rd_MV2 smpi_coll_tuned_allreduce_rdb +#define MPIR_Allreduce_pt2pt_rs_MV2 smpi_coll_tuned_allreduce_rab1 +