From 9f4593d014139381fd8977a84e76f0094fb2fac8 Mon Sep 17 00:00:00 2001 From: Augustin Degomme Date: Tue, 11 Jun 2013 17:49:46 +0200 Subject: [PATCH] add gather collectives from ompi --- buildtools/Cmake/DefinePackages.cmake | 1 + src/include/smpi/smpi_interface.h | 9 + src/simgrid/sg_config.c | 9 + src/smpi/colls/colls.h | 16 + src/smpi/colls/gather-ompi.c | 410 +++++++++++++++++++++++++ src/smpi/colls/smpi_openmpi_selector.c | 45 ++- src/smpi/smpi_coll.c | 14 +- src/smpi/smpi_global.c | 6 + src/smpi/smpi_pmpi.c | 2 +- 9 files changed, 485 insertions(+), 27 deletions(-) create mode 100644 src/smpi/colls/gather-ompi.c diff --git a/buildtools/Cmake/DefinePackages.cmake b/buildtools/Cmake/DefinePackages.cmake index 2e31a12106..eab571d577 100644 --- a/buildtools/Cmake/DefinePackages.cmake +++ b/buildtools/Cmake/DefinePackages.cmake @@ -195,6 +195,7 @@ set(SMPI_SRC src/smpi/colls/reduce-NTSL.c src/smpi/colls/reduce-scatter-gather.c src/smpi/colls/reduce-ompi.c + src/smpi/colls/gather-ompi.c ) if(SMPI_F2C) diff --git a/src/include/smpi/smpi_interface.h b/src/include/smpi/smpi_interface.h index 9c11e29a4e..77736f9d8f 100644 --- a/src/include/smpi/smpi_interface.h +++ b/src/include/smpi/smpi_interface.h @@ -22,6 +22,15 @@ typedef struct mpi_coll_description { void *coll; } s_mpi_coll_description_t, *mpi_coll_description_t; + +/** \ingroup MPI gather + * \brief The list of all available allgather collectives + */ +XBT_PUBLIC_DATA(s_mpi_coll_description_t) mpi_coll_gather_description[]; +XBT_PUBLIC_DATA(int (*mpi_coll_gather_fun) + (void *, int, MPI_Datatype, void *, int, MPI_Datatype, + int, MPI_Comm)); + /** \ingroup MPI allgather * \brief The list of all available allgather collectives */ diff --git a/src/simgrid/sg_config.c b/src/simgrid/sg_config.c index f0ea9761ad..797a0ae112 100644 --- a/src/simgrid/sg_config.c +++ b/src/simgrid/sg_config.c @@ -247,6 +247,9 @@ static void _sg_cfg_cb__coll(const char *category, /* New Module missing */ find_coll_description(table, val); } +static void _sg_cfg_cb__coll_gather(const char *name, int pos){ + _sg_cfg_cb__coll("gather", mpi_coll_gather_description, name, pos); +} static void _sg_cfg_cb__coll_allgather(const char *name, int pos){ _sg_cfg_cb__coll("allgather", mpi_coll_allgather_description, name, pos); } @@ -758,6 +761,12 @@ void sg_config_init(int *argc, char **argv) "Which collective selector to use", xbt_cfgelm_string, &default_value, 1, 1, NULL, NULL); + + xbt_cfg_register(&_sg_cfg_set, "smpi/gather", + "Which collective to use for gather", + xbt_cfgelm_string, NULL, 1, 1, &_sg_cfg_cb__coll_gather, + NULL); + xbt_cfg_register(&_sg_cfg_set, "smpi/allgather", "Which collective to use for allgather", xbt_cfgelm_string, NULL, 1, 1, &_sg_cfg_cb__coll_allgather, diff --git a/src/smpi/colls/colls.h b/src/smpi/colls/colls.h index ee51cacf82..4495b4d9ba 100644 --- a/src/smpi/colls/colls.h +++ b/src/smpi/colls/colls.h @@ -20,6 +20,22 @@ #define COLL_NOsep #define COLL_NOTHING(...) +/************* + * GATHER * + *************/ +#define COLL_GATHER_SIG gather, int, \ + (void *send_buff, int send_count, MPI_Datatype send_type, \ + void *recv_buff, int recv_count, MPI_Datatype recv_type, \ + int root, MPI_Comm comm) + +#define COLL_GATHERS(action, COLL_sep) \ +COLL_APPLY(action, COLL_GATHER_SIG, ompi) COLL_sep \ +COLL_APPLY(action, COLL_GATHER_SIG, ompi_basic_linear) COLL_sep \ +COLL_APPLY(action, COLL_GATHER_SIG, ompi_binomial) COLL_sep \ +COLL_APPLY(action, COLL_GATHER_SIG, ompi_linear_sync) \ + + +COLL_GATHERS(COLL_PROTO, COLL_NOsep) /************* * ALLGATHER * diff --git a/src/smpi/colls/gather-ompi.c b/src/smpi/colls/gather-ompi.c new file mode 100644 index 0000000000..22e6e63dd9 --- /dev/null +++ b/src/smpi/colls/gather-ompi.c @@ -0,0 +1,410 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2009 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "colls_private.h" +#include "coll_tuned_topo.h" + +#define MCA_COLL_BASE_TAG_GATHER 333 +/* Todo: gather_intra_generic, gather_intra_binary, gather_intra_chain, + * gather_intra_pipeline, segmentation? */ +int +smpi_coll_tuned_gather_ompi_binomial(void *sbuf, int scount, + MPI_Datatype sdtype, + void *rbuf, int rcount, + MPI_Datatype rdtype, + int root, + MPI_Comm comm) +{ + int line = -1; + int i; + int rank; + int vrank; + int size; + int total_recv = 0; + char *ptmp = NULL; + char *tempbuf = NULL; + int err; + ompi_coll_tree_t* bmtree; + MPI_Status status; + MPI_Aint sextent, slb, strue_lb, strue_extent; + MPI_Aint rextent, rlb, rtrue_lb, rtrue_extent; + + + size = smpi_comm_size(comm); + rank = smpi_comm_rank(comm); + + XBT_DEBUG( + "smpi_coll_tuned_gather_ompi_binomial rank %d", rank); + + /* create the binomial tree */ + // COLL_TUNED_UPDATE_IN_ORDER_BMTREE( comm, tuned_module, root ); + bmtree = ompi_coll_tuned_topo_build_in_order_bmtree(comm, root); + // data->cached_in_order_bmtree; + + smpi_datatype_extent(sdtype, &slb, &sextent); + smpi_datatype_extent(sdtype, &strue_lb, &strue_extent); + + vrank = (rank - root + size) % size; + + if (rank == root) { + smpi_datatype_extent(rdtype, &rlb, &rextent); + smpi_datatype_extent(rdtype, &rtrue_lb, &rtrue_extent); + if (0 == root){ + /* root on 0, just use the recv buffer */ + ptmp = (char *) rbuf; + if (sbuf != MPI_IN_PLACE) { + err = smpi_datatype_copy(sbuf, scount, sdtype, + ptmp, rcount, rdtype); + if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } + } + } else { + /* root is not on 0, allocate temp buffer for recv, + * rotate data at the end */ + tempbuf = (char *) malloc(rtrue_extent + (rcount*size - 1) * rextent); + if (NULL == tempbuf) { + err= MPI_ERR_OTHER; line = __LINE__; goto err_hndl; + } + + ptmp = tempbuf - rlb; + if (sbuf != MPI_IN_PLACE) { + /* copy from sbuf to temp buffer */ + err = smpi_datatype_copy(sbuf, scount, sdtype, + ptmp, rcount, rdtype); + if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } + } else { + /* copy from rbuf to temp buffer */ + err = smpi_datatype_copy((char *) rbuf + rank*rextent*rcount, rcount, rdtype, ptmp, rcount, rdtype ); + if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } + } + } + total_recv = rcount; + } else if (!(vrank % 2)) { + /* other non-leaf nodes, allocate temp buffer for data received from + * children, the most we need is half of the total data elements due + * to the property of binimoal tree */ + tempbuf = (char *) malloc(strue_extent + (scount*size - 1) * sextent); + if (NULL == tempbuf) { + err= MPI_ERR_OTHER; line = __LINE__; goto err_hndl; + } + + ptmp = tempbuf - slb; + /* local copy to tempbuf */ + err = smpi_datatype_copy(sbuf, scount, sdtype, + ptmp, scount, sdtype); + if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } + + /* use sdtype,scount as rdtype,rdcount since they are ignored on + * non-root procs */ + rdtype = sdtype; + rcount = scount; + rextent = sextent; + total_recv = rcount; + } else { + /* leaf nodes, no temp buffer needed, use sdtype,scount as + * rdtype,rdcount since they are ignored on non-root procs */ + ptmp = (char *) sbuf; + total_recv = scount; + } + + if (!(vrank % 2)) { + /* all non-leaf nodes recv from children */ + for (i = 0; i < bmtree->tree_nextsize; i++) { + int mycount = 0, vkid; + /* figure out how much data I have to send to this child */ + vkid = (bmtree->tree_next[i] - root + size) % size; + mycount = vkid - vrank; + if (mycount > (size - vkid)) + mycount = size - vkid; + mycount *= rcount; + + XBT_DEBUG( + "smpi_coll_tuned_gather_ompi_binomial rank %d recv %d mycount = %d", + rank, bmtree->tree_next[i], mycount); + + smpi_mpi_recv(ptmp + total_recv*rextent, rcount*size-total_recv, rdtype, + bmtree->tree_next[i], MCA_COLL_BASE_TAG_GATHER, + comm, &status); + + total_recv += mycount; + } + } + + if (rank != root) { + /* all nodes except root send to parents */ + XBT_DEBUG( + "smpi_coll_tuned_gather_ompi_binomial rank %d send %d count %d\n", + rank, bmtree->tree_prev, total_recv); + + smpi_mpi_send(ptmp, total_recv, sdtype, + bmtree->tree_prev, + MCA_COLL_BASE_TAG_GATHER, + comm); + } + if (rank == root) { + if (root != 0) { + /* rotate received data on root if root != 0 */ + err = smpi_datatype_copy(ptmp, rcount*(size - root), rdtype, + (char *) rbuf + rextent*root*rcount, rcount*(size - root), rdtype ); + if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } + + + err = smpi_datatype_copy( ptmp + rextent*rcount*(size-root), rcount*root,rdtype, + (char *) rbuf,rcount*root,rdtype); + if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } + + free(tempbuf); + } + } else if (!(vrank % 2)) { + /* other non-leaf nodes */ + free(tempbuf); + } + return MPI_SUCCESS; + + err_hndl: + if (NULL != tempbuf) + free(tempbuf); + + XBT_DEBUG( "%s:%4d\tError occurred %d, rank %2d", + __FILE__, line, err, rank); + return err; +} + +/* + * gather_intra_linear_sync + * + * Function: - synchronized gather operation with + * Accepts: - same arguments as MPI_Gather(), first segment size + * Returns: - MPI_SUCCESS or error code + */ +int +smpi_coll_tuned_gather_ompi_linear_sync(void *sbuf, int scount, + MPI_Datatype sdtype, + void *rbuf, int rcount, + MPI_Datatype rdtype, + int root, + MPI_Comm comm) +{ + int i; + int ret, line; + int rank, size; + int first_segment_count; + size_t typelng; + MPI_Aint extent; + MPI_Aint lb; + + int first_segment_size=0; + size = smpi_comm_size(comm); + rank = smpi_comm_rank(comm); + + size_t dsize, block_size; + if (rank == root) { + dsize= smpi_datatype_size(rdtype); + block_size = dsize * rcount; + } else { + dsize=smpi_datatype_size(sdtype); + block_size = dsize * scount; + } + + if (block_size > 92160){ + first_segment_size = 32768; + }else{ + first_segment_size = 1024; + } + + XBT_DEBUG( + "smpi_coll_tuned_gather_ompi_linear_sync rank %d, segment %d", rank, first_segment_size); + + if (rank != root) { + /* Non-root processes: + - receive zero byte message from the root, + - send the first segment of the data synchronously, + - send the second segment of the data. + */ + + typelng= smpi_datatype_size(sdtype); + smpi_datatype_extent(sdtype, &lb, &extent); + first_segment_count = scount; + COLL_TUNED_COMPUTED_SEGCOUNT( (size_t) first_segment_size, typelng, + first_segment_count ); + + smpi_mpi_recv(sbuf, 0, MPI_BYTE, root, + MCA_COLL_BASE_TAG_GATHER, + comm, MPI_STATUS_IGNORE); + + smpi_mpi_send(sbuf, first_segment_count, sdtype, root, + MCA_COLL_BASE_TAG_GATHER, + comm); + + smpi_mpi_send((char*)sbuf + extent * first_segment_count, + (scount - first_segment_count), sdtype, + root, MCA_COLL_BASE_TAG_GATHER, + comm); + } + + else { + /* Root process, + - For every non-root node: + - post irecv for the first segment of the message + - send zero byte message to signal node to send the message + - post irecv for the second segment of the message + - wait for the first segment to complete + - Copy local data if necessary + - Waitall for all the second segments to complete. + */ + char *ptmp; + MPI_Request *reqs = NULL, first_segment_req; + reqs = (MPI_Request *) calloc(size, sizeof(MPI_Request )); + if (NULL == reqs) { ret = -1; line = __LINE__; goto error_hndl; } + + typelng=smpi_datatype_size(rdtype); + smpi_datatype_extent(rdtype, &lb, &extent); + first_segment_count = rcount; + COLL_TUNED_COMPUTED_SEGCOUNT( (size_t)first_segment_size, typelng, + first_segment_count ); + + ptmp = (char *) rbuf; + for (i = 0; i < size; ++i) { + if (i == rank) { + /* skip myself */ + reqs[i] = MPI_REQUEST_NULL; + continue; + } + + /* irecv for the first segment from i */ + ptmp = (char*)rbuf + i * rcount * extent; + first_segment_req = smpi_mpi_irecv(ptmp, first_segment_count, rdtype, i, + MCA_COLL_BASE_TAG_GATHER, comm + ); + + /* send sync message */ + smpi_mpi_send(rbuf, 0, MPI_BYTE, i, + MCA_COLL_BASE_TAG_GATHER, + comm); + + /* irecv for the second segment */ + ptmp = (char*)rbuf + (i * rcount + first_segment_count) * extent; + reqs[i]=smpi_mpi_irecv(ptmp, (rcount - first_segment_count), + rdtype, i, MCA_COLL_BASE_TAG_GATHER, comm + ); + + /* wait on the first segment to complete */ + smpi_mpi_wait(&first_segment_req, MPI_STATUS_IGNORE); + } + + /* copy local data if necessary */ + if (MPI_IN_PLACE != sbuf) { + ret = smpi_datatype_copy(sbuf, scount, sdtype, + (char*)rbuf + rank * rcount * extent, + rcount, rdtype); + if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } + } + + /* wait all second segments to complete */ + ret = smpi_mpi_waitall(size, reqs, MPI_STATUSES_IGNORE); + + free(reqs); + } + + /* All done */ + + return MPI_SUCCESS; + error_hndl: + XBT_DEBUG( + "ERROR_HNDL: node %d file %s line %d error %d\n", + rank, __FILE__, line, ret ); + return ret; +} + +/* + * Linear functions are copied from the BASIC coll module + * they do not segment the message and are simple implementations + * but for some small number of nodes and/or small data sizes they + * are just as fast as tuned/tree based segmenting operations + * and as such may be selected by the decision functions + * These are copied into this module due to the way we select modules + * in V1. i.e. in V2 we will handle this differently and so will not + * have to duplicate code. + * JPG following the examples from other coll_tuned implementations. Dec06. + */ + +/* copied function (with appropriate renaming) starts here */ +/* + * gather_intra + * + * Function: - basic gather operation + * Accepts: - same arguments as MPI_Gather() + * Returns: - MPI_SUCCESS or error code + */ +int +smpi_coll_tuned_gather_ompi_basic_linear(void *sbuf, int scount, + MPI_Datatype sdtype, + void *rbuf, int rcount, + MPI_Datatype rdtype, + int root, + MPI_Comm comm) +{ + int i; + int err; + int rank; + int size; + char *ptmp; + MPI_Aint incr; + MPI_Aint extent; + MPI_Aint lb; + + size = smpi_comm_size(comm); + rank = smpi_comm_rank(comm); + + /* Everyone but root sends data and returns. */ + XBT_DEBUG( + "ompi_coll_tuned_gather_intra_basic_linear rank %d", rank); + + if (rank != root) { + smpi_mpi_send(sbuf, scount, sdtype, root, + MCA_COLL_BASE_TAG_GATHER, + comm); + return MPI_SUCCESS; + } + + /* I am the root, loop receiving the data. */ + + smpi_datatype_extent(rdtype, &lb, &extent); + incr = extent * rcount; + for (i = 0, ptmp = (char *) rbuf; i < size; ++i, ptmp += incr) { + if (i == rank) { + if (MPI_IN_PLACE != sbuf) { + err = smpi_datatype_copy(sbuf, scount, sdtype, + ptmp, rcount, rdtype); + } else { + err = MPI_SUCCESS; + } + } else { + smpi_mpi_recv(ptmp, rcount, rdtype, i, + MCA_COLL_BASE_TAG_GATHER, + comm, MPI_STATUS_IGNORE); + err = MPI_SUCCESS; + } + if (MPI_SUCCESS != err) { + return err; + } + } + + /* All done */ + + return MPI_SUCCESS; +} diff --git a/src/smpi/colls/smpi_openmpi_selector.c b/src/smpi/colls/smpi_openmpi_selector.c index 36f901feae..72c3699278 100644 --- a/src/smpi/colls/smpi_openmpi_selector.c +++ b/src/smpi/colls/smpi_openmpi_selector.c @@ -511,19 +511,19 @@ int smpi_coll_tuned_allgatherv_ompi(void *sbuf, int scount, } } } -/* + int smpi_coll_tuned_gather_ompi(void *sbuf, int scount, MPI_Datatype sdtype, void* rbuf, int rcount, MPI_Datatype rdtype, int root, - MPI_Comm comm, + MPI_Comm comm ) { - const int large_segment_size = 32768; - const int small_segment_size = 1024; + //const int large_segment_size = 32768; + //const int small_segment_size = 1024; - const size_t large_block_size = 92160; + //const size_t large_block_size = 92160; const size_t intermediate_block_size = 6000; const size_t small_block_size = 1024; @@ -533,46 +533,43 @@ int smpi_coll_tuned_gather_ompi(void *sbuf, int scount, int communicator_size, rank; size_t dsize, block_size; - OPAL_OUTPUT((smpi_coll_tuned_stream, - "smpi_coll_tuned_gather_ompi")); + XBT_DEBUG("smpi_coll_tuned_gather_ompi"); communicator_size = smpi_comm_size(comm); - rank = ompi_comm_rank(comm); + rank = smpi_comm_rank(comm); // Determine block size if (rank == root) { - ompi_datatype_type_size(rdtype, &dsize); + dsize = smpi_datatype_size(rdtype); block_size = dsize * rcount; } else { - ompi_datatype_type_size(sdtype, &dsize); + dsize = smpi_datatype_size(sdtype); block_size = dsize * scount; } - if (block_size > large_block_size) { - return smpi_coll_tuned_gather_intra_linear_sync (sbuf, scount, sdtype, - rbuf, rcount, rdtype, - root, comm, module, - large_segment_size); +/* if (block_size > large_block_size) {*/ +/* return smpi_coll_tuned_gather_ompi_linear_sync (sbuf, scount, sdtype, */ +/* rbuf, rcount, rdtype, */ +/* root, comm);*/ - } else if (block_size > intermediate_block_size) { - return smpi_coll_tuned_gather_intra_linear_sync (sbuf, scount, sdtype, +/* } else*/ if (block_size > intermediate_block_size) { + return smpi_coll_tuned_gather_ompi_linear_sync (sbuf, scount, sdtype, rbuf, rcount, rdtype, - root, comm, module, - small_segment_size); + root, comm); } else if ((communicator_size > large_communicator_size) || ((communicator_size > small_communicator_size) && (block_size < small_block_size))) { - return smpi_coll_tuned_gather_intra_binomial (sbuf, scount, sdtype, + return smpi_coll_tuned_gather_ompi_binomial (sbuf, scount, sdtype, rbuf, rcount, rdtype, - root, comm, module); + root, comm); } // Otherwise, use basic linear - return smpi_coll_tuned_gather_intra_basic_linear (sbuf, scount, sdtype, + return smpi_coll_tuned_gather_ompi_basic_linear (sbuf, scount, sdtype, rbuf, rcount, rdtype, - root, comm, module); -}*/ + root, comm); +} /* int smpi_coll_tuned_scatter_ompi(void *sbuf, int scount, MPI_Datatype sdtype, diff --git a/src/smpi/smpi_coll.c b/src/smpi/smpi_coll.c index 503c060ece..f242c77232 100644 --- a/src/smpi/smpi_coll.c +++ b/src/smpi/smpi_coll.c @@ -14,6 +14,15 @@ #include "colls/colls.h" #include "simgrid/sg_config.h" +s_mpi_coll_description_t mpi_coll_gather_description[] = { + {"default", + "gather default collective", + smpi_mpi_gather}, +COLL_GATHERS(COLL_DESCRIPTION, COLL_COMMA), + {NULL, NULL, NULL} /* this array must be NULL terminated */ +}; + + s_mpi_coll_description_t mpi_coll_allgather_description[] = { {"default", "allgather default collective", @@ -62,7 +71,7 @@ COLL_ALLTOALLVS(COLL_DESCRIPTION, COLL_COMMA), s_mpi_coll_description_t mpi_coll_bcast_description[] = { {"default", - "allgather default collective", + "bcast default collective", smpi_mpi_bcast}, COLL_BCASTS(COLL_DESCRIPTION, COLL_COMMA), {NULL, NULL, NULL} /* this array must be NULL terminated */ @@ -70,7 +79,7 @@ COLL_BCASTS(COLL_DESCRIPTION, COLL_COMMA), s_mpi_coll_description_t mpi_coll_reduce_description[] = { {"default", - "allgather default collective", + "reduce default collective", smpi_mpi_reduce}, COLL_REDUCES(COLL_DESCRIPTION, COLL_COMMA), {NULL, NULL, NULL} /* this array must be NULL terminated */ @@ -126,6 +135,7 @@ int find_coll_description(s_mpi_coll_description_t * table, XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_coll, smpi, "Logging specific to SMPI (coll)"); +int (*mpi_coll_gather_fun)(void *, int, MPI_Datatype, void*, int, MPI_Datatype, int root, MPI_Comm); int (*mpi_coll_allgather_fun)(void *, int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm); int (*mpi_coll_allgatherv_fun)(void *, int, MPI_Datatype, void*, int*, int*, MPI_Datatype, MPI_Comm); int (*mpi_coll_allreduce_fun)(void *sbuf, void *rbuf, int rcount, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm); diff --git a/src/smpi/smpi_global.c b/src/smpi/smpi_global.c index 7fca6d7bf9..189bac1afd 100644 --- a/src/smpi/smpi_global.c +++ b/src/smpi/smpi_global.c @@ -360,6 +360,12 @@ int smpi_main(int (*realmain) (int argc, char *argv[]),int argc, char *argv[]) SIMIX_function_register_default(realmain); SIMIX_launch_application(argv[2]); + int gather_id = find_coll_description(mpi_coll_gather_description, + sg_cfg_get_string("smpi/gather")); + mpi_coll_gather_fun = (int (*)(void *, int, MPI_Datatype, + void*, int, MPI_Datatype, int, MPI_Comm)) + mpi_coll_gather_description[gather_id].coll; + int allgather_id = find_coll_description(mpi_coll_allgather_description, sg_cfg_get_string("smpi/allgather")); mpi_coll_allgather_fun = (int (*)(void *, int, MPI_Datatype, diff --git a/src/smpi/smpi_pmpi.c b/src/smpi/smpi_pmpi.c index 988f0ad59c..783395b2ef 100644 --- a/src/smpi/smpi_pmpi.c +++ b/src/smpi/smpi_pmpi.c @@ -1609,7 +1609,7 @@ int PMPI_Gather(void *sendbuf, int sendcount, MPI_Datatype sendtype, || recvtype == MPI_DATATYPE_NULL) { retval = MPI_ERR_TYPE; } else { - smpi_mpi_gather(sendbuf, sendcount, sendtype, recvbuf, recvcount, + mpi_coll_gather_fun(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm); retval = MPI_SUCCESS; } -- 2.20.1