From f070773e633fc07bb6be25b23b7ded7657479e7f Mon Sep 17 00:00:00 2001 From: =?utf8?q?Paul=20B=C3=A9daride?= Date: Fri, 12 Apr 2013 14:35:12 +0200 Subject: [PATCH] Reactivate some mpi collectives --- buildtools/Cmake/AddTests.cmake | 8 +- buildtools/Cmake/DefinePackages.cmake | 10 +-- src/smpi/colls/allgather-2dmesh.c | 18 ++--- src/smpi/colls/allgather-3dmesh.c | 29 +++---- src/smpi/colls/allgather-bruck.c | 80 +++---------------- src/smpi/colls/allreduce-rab-rdb.c | 23 ++---- .../colls/allreduce-smp-binomial-pipeline.c | 24 +----- src/smpi/colls/allreduce-smp-binomial.c | 10 --- src/smpi/colls/alltoallv-ring.c | 2 +- src/smpi/colls/colls.h | 10 +-- 10 files changed, 52 insertions(+), 162 deletions(-) diff --git a/buildtools/Cmake/AddTests.cmake b/buildtools/Cmake/AddTests.cmake index bf7b8fbd3b..db3a149160 100644 --- a/buildtools/Cmake/AddTests.cmake +++ b/buildtools/Cmake/AddTests.cmake @@ -370,15 +370,17 @@ if(NOT enable_memcheck) ADD_TEST(smpi-replay ${CMAKE_BINARY_DIR}/bin/tesh ${TESH_OPTION} --setenv srcdir=${CMAKE_HOME_DIRECTORY}/examples/smpi --cd ${CMAKE_BINARY_DIR}/examples/smpi ${CMAKE_HOME_DIRECTORY}/examples/smpi/replay/smpi_replay.tesh) endif() - FOREACH (ALLGATHER_COLL default GB loosely_lr lr NTSLR NTSLR_NB pair rdb rhv ring SMP_NTS + FOREACH (ALLGATHER_COLL default 2dmesh 3dmesh bruck GB loosely_lr lr + NTSLR NTSLR_NB pair rdb rhv ring SMP_NTS smp_simple spreading_simple) ADD_TEST(smpi-allgather-coll-${ALLGATHER_COLL} ${CMAKE_BINARY_DIR}/bin/tesh ${TESH_OPTION} --cfg smpi/allgather:${ALLGATHER_COLL} --cd ${CMAKE_BINARY_DIR}/teshsuite/smpi ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/allgather_coll.tesh) ENDFOREACH() FOREACH (ALLGATHERV_COLL default pair ring) ADD_TEST(smpi-allgatherv-coll-${ALLGATHERV_COLL} ${CMAKE_BINARY_DIR}/bin/tesh ${TESH_OPTION} --cfg smpi/allgatherv:${ALLGATHERV_COLL} --cd ${CMAKE_BINARY_DIR}/teshsuite/smpi ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/allgatherv_coll.tesh) ENDFOREACH() - FOREACH (ALLREDUCE_COLL default lr NTS rab1 rab2 rab_rsag rdb smp_binomial smp_rdb smp_rsag - smp_rsag_lr smp_rsag_rab redbcast) + FOREACH (ALLREDUCE_COLL default lr NTS rab1 rab2 rab_rdb + rab_rsag rdb smp_binomial smp_binomial_pipeline + smp_rdb smp_rsag smp_rsag_lr smp_rsag_rab redbcast) ADD_TEST(smpi-allreduce-coll-${ALLREDUCE_COLL} ${CMAKE_BINARY_DIR}/bin/tesh ${TESH_OPTION} --cfg smpi/allreduce:${ALLREDUCE_COLL} --cd ${CMAKE_BINARY_DIR}/teshsuite/smpi ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/allreduce_coll.tesh) ENDFOREACH() FOREACH (ALLTOALL_COLL 2dmesh 3dmesh pair pair_one_barrier pair_light_barrier diff --git a/buildtools/Cmake/DefinePackages.cmake b/buildtools/Cmake/DefinePackages.cmake index b2f2b7a222..0fd308c88b 100644 --- a/buildtools/Cmake/DefinePackages.cmake +++ b/buildtools/Cmake/DefinePackages.cmake @@ -111,9 +111,9 @@ set(SMPI_SRC src/smpi/smpi_pmpi.c src/smpi/smpi_replay.c src/smpi/colls/colls_global.c - #src/smpi/colls/allgather-2dmesh.c - #src/smpi/colls/allgather-3dmesh.c - #src/smpi/colls/allgather-bruck.c + src/smpi/colls/allgather-2dmesh.c + src/smpi/colls/allgather-3dmesh.c + src/smpi/colls/allgather-bruck.c src/smpi/colls/allgather-GB.c src/smpi/colls/allgather-loosely-lr.c src/smpi/colls/allgather-lr.c @@ -132,13 +132,13 @@ set(SMPI_SRC src/smpi/colls/allreduce-NTS.c src/smpi/colls/allreduce-rab1.c src/smpi/colls/allreduce-rab2.c - #src/smpi/colls/allreduce-rab-rdb.c + src/smpi/colls/allreduce-rab-rdb.c #src/smpi/colls/allreduce-rab-reduce-scatter.c src/smpi/colls/allreduce-rab-rsag.c src/smpi/colls/allreduce-rdb.c src/smpi/colls/allreduce-redbcast.c src/smpi/colls/allreduce-smp-binomial.c - #src/smpi/colls/allreduce-smp-binomial-pipeline.c + src/smpi/colls/allreduce-smp-binomial-pipeline.c src/smpi/colls/allreduce-smp-rdb.c src/smpi/colls/allreduce-smp-rsag.c src/smpi/colls/allreduce-smp-rsag-lr.c diff --git a/src/smpi/colls/allgather-2dmesh.c b/src/smpi/colls/allgather-2dmesh.c index 91c9cfbcb8..b5f50420e0 100644 --- a/src/smpi/colls/allgather-2dmesh.c +++ b/src/smpi/colls/allgather-2dmesh.c @@ -110,8 +110,6 @@ smpi_coll_tuned_allgather_2dmesh(void *send_buff, int send_count, MPI_Datatype int i, src, dst, rank, num_procs; int X, Y, send_offset, recv_offset; int my_row_base, my_col_base, src_row_base, block_size, num_reqs; - int success = 0; - int failure = 1; int tag = 1; rank = smpi_comm_rank(comm); @@ -135,7 +133,7 @@ smpi_coll_tuned_allgather_2dmesh(void *send_buff, int send_count, MPI_Datatype // do local allgather/local copy recv_offset = rank * block_size; - MPIR_Localcopy(send_buff, send_count, send_type, (char *)recv_buff + recv_offset, + smpi_datatype_copy(send_buff, send_count, send_type, (char *)recv_buff + recv_offset, recv_count, recv_type); // do row-wise comm @@ -144,8 +142,8 @@ smpi_coll_tuned_allgather_2dmesh(void *send_buff, int send_count, MPI_Datatype if (src == rank) continue; recv_offset = src * block_size; - MPIC_Irecv((char *)recv_buff + recv_offset, recv_count, recv_type, src, tag, - comm, req_ptr++); + *(req_ptr++) = smpi_mpi_irecv((char *)recv_buff + recv_offset, recv_count, recv_type, src, tag, + comm); } @@ -153,7 +151,7 @@ smpi_coll_tuned_allgather_2dmesh(void *send_buff, int send_count, MPI_Datatype dst = i + my_row_base; if (dst == rank) continue; - MPIC_Send(send_buff, send_count, send_type, dst, tag, comm); + smpi_mpi_send(send_buff, send_count, send_type, dst, tag, comm); } smpi_mpi_waitall(Y - 1, req, MPI_STATUSES_IGNORE); @@ -167,8 +165,8 @@ smpi_coll_tuned_allgather_2dmesh(void *send_buff, int send_count, MPI_Datatype continue; src_row_base = (src / Y) * Y; recv_offset = src_row_base * block_size; - MPIC_Irecv((char *)recv_buff + recv_offset, recv_count * Y, recv_type, src, tag, - comm, req_ptr++); + *(req_ptr++) = smpi_mpi_irecv((char *)recv_buff + recv_offset, recv_count * Y, recv_type, src, tag, + comm); } for (i = 0; i < X; i++) { @@ -176,7 +174,7 @@ smpi_coll_tuned_allgather_2dmesh(void *send_buff, int send_count, MPI_Datatype if (dst == rank) continue; send_offset = my_row_base * block_size; - MPIC_Send((char *)recv_buff + send_offset, send_count * Y, send_type, dst, tag, + smpi_mpi_send((char *)recv_buff + send_offset, send_count * Y, send_type, dst, tag, comm); } @@ -184,5 +182,5 @@ smpi_coll_tuned_allgather_2dmesh(void *send_buff, int send_count, MPI_Datatype free(req); - return success; + return MPI_SUCCESS; } diff --git a/src/smpi/colls/allgather-3dmesh.c b/src/smpi/colls/allgather-3dmesh.c index e75c9c3d8e..dfd2ace9e6 100644 --- a/src/smpi/colls/allgather-3dmesh.c +++ b/src/smpi/colls/allgather-3dmesh.c @@ -97,8 +97,6 @@ int smpi_coll_tuned_allgather_3dmesh(void *send_buff, int send_count, int i, src, dst, rank, num_procs, block_size, my_z_base; int my_z, X, Y, Z, send_offset, recv_offset; int two_dsize, my_row_base, my_col_base, src_row_base, src_z_base, num_reqs; - int success = 0; - int failure = 1; int tag = 1; rank = smpi_comm_rank(comm); @@ -124,17 +122,12 @@ int smpi_coll_tuned_allgather_3dmesh(void *send_buff, int send_count, block_size = extent * send_count; req = (MPI_Request *) xbt_malloc(num_reqs * sizeof(MPI_Request)); - if (!req) { - printf("allgather-3dmesh-shoot.c:85: cannot allocate memory\n"); - MPI_Finalize(); - exit(failure); - } req_ptr = req; // do local allgather/local copy recv_offset = rank * block_size; - MPIR_Localcopy(send_buff, send_count, send_type, (char *)recv_buff + recv_offset, + smpi_datatype_copy(send_buff, send_count, send_type, (char *)recv_buff + recv_offset, recv_count, recv_type); // do rowwise comm @@ -143,15 +136,15 @@ int smpi_coll_tuned_allgather_3dmesh(void *send_buff, int send_count, if (src == rank) continue; recv_offset = src * block_size; - MPIC_Irecv((char *)recv_buff + recv_offset, send_count, recv_type, src, tag, - comm, req_ptr++); + *(req_ptr++) = smpi_mpi_irecv((char *)recv_buff + recv_offset, send_count, recv_type, src, tag, + comm); } for (i = 0; i < Y; i++) { dst = i + my_row_base; if (dst == rank) continue; - MPIC_Send(send_buff, send_count, send_type, dst, tag, comm); + smpi_mpi_send(send_buff, send_count, send_type, dst, tag, comm); } smpi_mpi_waitall(Y - 1, req, MPI_STATUSES_IGNORE); @@ -166,8 +159,8 @@ int smpi_coll_tuned_allgather_3dmesh(void *send_buff, int send_count, src_row_base = (src / X) * X; recv_offset = src_row_base * block_size; - MPIC_Irecv((char *)recv_buff + recv_offset, recv_count * Y, recv_type, src, tag, - comm, req_ptr++); + *(req_ptr++) = smpi_mpi_irecv((char *)recv_buff + recv_offset, recv_count * Y, recv_type, src, tag, + comm); } send_offset = my_row_base * block_size; @@ -176,7 +169,7 @@ int smpi_coll_tuned_allgather_3dmesh(void *send_buff, int send_count, dst = (i * Y + my_col_base); if (dst == rank) continue; - MPIC_Send((char *)recv_buff + send_offset, send_count * Y, send_type, dst, tag, + smpi_mpi_send((char *)recv_buff + send_offset, send_count * Y, send_type, dst, tag, comm); } @@ -189,19 +182,19 @@ int smpi_coll_tuned_allgather_3dmesh(void *send_buff, int send_count, recv_offset = (src_z_base * block_size); - MPIC_Irecv((char *)recv_buff + recv_offset, recv_count * two_dsize, recv_type, - src, tag, comm, req_ptr++); + *(req_ptr++) = smpi_mpi_irecv((char *)recv_buff + recv_offset, recv_count * two_dsize, recv_type, + src, tag, comm); } for (i = 1; i < Z; i++) { dst = (rank + i * two_dsize) % num_procs; send_offset = my_z_base * block_size; - MPIC_Send((char *)recv_buff + send_offset, send_count * two_dsize, send_type, + smpi_mpi_send((char *)recv_buff + send_offset, send_count * two_dsize, send_type, dst, tag, comm); } smpi_mpi_waitall(Z - 1, req, MPI_STATUSES_IGNORE); free(req); - return success; + return MPI_SUCCESS; } diff --git a/src/smpi/colls/allgather-bruck.c b/src/smpi/colls/allgather-bruck.c index 4ca844c5ab..681690a63b 100644 --- a/src/smpi/colls/allgather-bruck.c +++ b/src/smpi/colls/allgather-bruck.c @@ -71,10 +71,9 @@ int smpi_coll_tuned_allgather_bruck(void *send_buff, int send_count, MPI_Aint recv_extent; // local int variables - int i, src, dst, rank, num_procs, count, remainder; + int src, dst, rank, num_procs, count, remainder; int tag = 1; int pof2 = 1; - int success = 0; // local string variables char *tmp_buff; @@ -91,20 +90,15 @@ int smpi_coll_tuned_allgather_bruck(void *send_buff, int send_count, count = recv_count; tmp_buff = (char *) xbt_malloc(num_procs * recv_count * recv_extent); - if (!tmp_buff) { - printf("allgather-bruck:54: cannot allocate memory\n"); - MPI_Finalize(); - exit(0); - } - // perform a local copy - MPIR_Localcopy(send_ptr, send_count, send_type, tmp_buff, recv_count, - recv_type); + // perform a local copy + smpi_datatype_copy(send_ptr, send_count, send_type, + tmp_buff, recv_count, recv_type); while (pof2 <= (num_procs / 2)) { src = (rank + pof2) % num_procs; dst = (rank - pof2 + num_procs) % num_procs; - MPIC_Sendrecv(tmp_buff, count, recv_type, dst, tag, + smpi_mpi_sendrecv(tmp_buff, count, recv_type, dst, tag, tmp_buff + count * recv_extent, count, recv_type, src, tag, comm, &status); count *= 2; @@ -116,76 +110,20 @@ int smpi_coll_tuned_allgather_bruck(void *send_buff, int send_count, src = (rank + pof2) % num_procs; dst = (rank - pof2 + num_procs) % num_procs; - MPIC_Sendrecv(tmp_buff, remainder * recv_count, recv_type, dst, tag, + smpi_mpi_sendrecv(tmp_buff, remainder * recv_count, recv_type, dst, tag, tmp_buff + count * recv_extent, remainder * recv_count, recv_type, src, tag, comm, &status); } - MPIC_Sendrecv(tmp_buff, (num_procs - rank) * recv_count, recv_type, rank, + smpi_mpi_sendrecv(tmp_buff, (num_procs - rank) * recv_count, recv_type, rank, tag, recv_ptr + rank * recv_count * recv_extent, (num_procs - rank) * recv_count, recv_type, rank, tag, comm, &status); if (rank) - MPIC_Sendrecv(tmp_buff + (num_procs - rank) * recv_count * recv_extent, + smpi_mpi_sendrecv(tmp_buff + (num_procs - rank) * recv_count * recv_extent, rank * recv_count, recv_type, rank, tag, recv_ptr, rank * recv_count, recv_type, rank, tag, comm, &status); free(tmp_buff); - return success; -} - -/*#include "ompi_bindings.h" - -int ompi_coll_tuned_alltoall_intra_pairwise(void *sbuf, int scount, - MPI_Datatype sdtype, - void* rbuf, int rcount, - MPI_Datatype rdtype, - MPI_Comm comm) -{ - int line = -1, err = 0; - int rank, size, step; - int sendto, recvfrom; - void * tmpsend, *tmprecv; - ptrdiff_t lb, sext, rext; - - size = ompi_comm_size(comm); - rank = ompi_comm_rank(comm); - - OPAL_OUTPUT((ompi_coll_tuned_stream, - "coll:tuned:alltoall_intra_pairwise rank %d", rank)); - - err = ompi_datatype_get_extent (sdtype, &lb, &sext); - if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; } - err = ompi_datatype_get_extent (rdtype, &lb, &rext); - if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; } - - - // Perform pairwise exchange - starting from 1 so the local copy is last - for (step = 1; step < size + 1; step++) { - - // Determine sender and receiver for this step. - sendto = (rank + step) % size; - recvfrom = (rank + size - step) % size; - - // Determine sending and receiving locations - tmpsend = (char*)sbuf + sendto * sext * scount; - tmprecv = (char*)rbuf + recvfrom * rext * rcount; - - // send and receive - err = ompi_coll_tuned_sendrecv( tmpsend, scount, sdtype, sendto, - MCA_COLL_BASE_TAG_ALLTOALL, - tmprecv, rcount, rdtype, recvfrom, - MCA_COLL_BASE_TAG_ALLTOALL, - comm, MPI_STATUS_IGNORE, rank); - if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; } - } - - return MPI_SUCCESS; - - err_hndl: - OPAL_OUTPUT((ompi_coll_tuned_stream, - "%s:%4d\tError occurred %d, rank %2d", __FILE__, line, - err, rank)); - return err; + return MPI_SUCCESS; } -*/ diff --git a/src/smpi/colls/allreduce-rab-rdb.c b/src/smpi/colls/allreduce-rab-rdb.c index 6d49e711da..9d5ee9c784 100644 --- a/src/smpi/colls/allreduce-rab-rdb.c +++ b/src/smpi/colls/allreduce-rab-rdb.c @@ -4,31 +4,20 @@ int smpi_coll_tuned_allreduce_rab_rdb(void *sbuff, void *rbuff, int count, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm) { - int nprocs, rank, type_size, tag = 543; + int nprocs, rank, tag = 543; int mask, dst, pof2, newrank, rem, newdst, i, send_idx, recv_idx, last_idx, send_cnt, recv_cnt, *cnts, *disps; - MPI_Aint lb, extent; + MPI_Aint extent; MPI_Status status; void *tmp_buf = NULL; -#ifdef MPICH2_REDUCTION - MPI_User_function *uop = MPIR_Op_table[op % 16 - 1]; -#else - MPI_User_function *uop; - struct MPIR_OP *op_ptr; - op_ptr = (MPI_User_function *) MPIR_ToPointer(op); - uop = op_ptr->op; -#endif - nprocs = smpi_comm_size(comm); rank = smpi_comm_rank(comm); extent = smpi_datatype_get_extent(dtype); tmp_buf = (void *) xbt_malloc(count * extent); - MPIR_Localcopy(sbuff, count, dtype, rbuff, count, dtype); - - type_size = smpi_datatype_size(dtype); + smpi_datatype_copy(sbuff, count, dtype, rbuff, count, dtype); // find nearest power-of-two less than or equal to comm_size pof2 = 1; @@ -60,7 +49,7 @@ int smpi_coll_tuned_allreduce_rab_rdb(void *sbuff, void *rbuff, int count, // do the reduction on received data. since the // ordering is right, it doesn't matter whether // the operation is commutative or not. - (*uop) (tmp_buf, rbuff, &count, &dtype); + smpi_op_apply(op, tmp_buf, rbuff, &count, &dtype); // change the rank newrank = rank / 2; @@ -129,8 +118,8 @@ int smpi_coll_tuned_allreduce_rab_rdb(void *sbuff, void *rbuff, int count, // This algorithm is used only for predefined ops // and predefined ops are always commutative. - (*uop) ((char *) tmp_buf + disps[recv_idx] * extent, - (char *) rbuff + disps[recv_idx] * extent, &recv_cnt, &dtype); + smpi_op_apply(op, (char *) tmp_buf + disps[recv_idx] * extent, + (char *) rbuff + disps[recv_idx] * extent, &recv_cnt, &dtype); // update send_idx for next iteration send_idx = recv_idx; diff --git a/src/smpi/colls/allreduce-smp-binomial-pipeline.c b/src/smpi/colls/allreduce-smp-binomial-pipeline.c index 05ecfd6cf8..93d72bb5ca 100644 --- a/src/smpi/colls/allreduce-smp-binomial-pipeline.c +++ b/src/smpi/colls/allreduce-smp-binomial-pipeline.c @@ -29,18 +29,6 @@ int allreduce_smp_binomial_pipeline_segment_size = 4096; This code assume commutative and associative reduce operator (MPI_SUM, MPI_MAX, etc). */ -#ifndef MPICH2 -extern void *MPIR_ToPointer(); - -struct MPIR_OP { - MPI_User_function *op; - int commute, permanent; -}; - -#else -extern MPI_User_function *MPIR_Op_table[]; -#endif - /* This fucntion performs all-reduce operation as follow. ** in a pipeline fashion ** 1) binomial_tree reduce inside each SMP node @@ -60,14 +48,6 @@ int smpi_coll_tuned_allreduce_smp_binomial_pipeline(void *send_buf, MPI_Status status; int num_core = NUM_CORE; - MPI_User_function *uop; -#ifndef MPICH2 - struct MPIR_OP *op_ptr = MPIR_ToPointer(op); - uop = (MPI_User_function *) op_ptr->op; -#else - uop = MPIR_Op_table[op % 16 - 1]; -#endif - comm_size = smpi_comm_size(comm); rank = smpi_comm_rank(comm); MPI_Aint extent; @@ -111,7 +91,7 @@ int smpi_coll_tuned_allreduce_smp_binomial_pipeline(void *send_buf, if (src < comm_size) { recv_offset = phase * pcount * extent; smpi_mpi_recv(tmp_buf, pcount, dtype, src, tag, comm, &status); - (*uop) (tmp_buf, (char *)recv_buf + recv_offset, &pcount, &dtype); + smpi_op_apply(op, tmp_buf, (char *)recv_buf + recv_offset, &pcount, &dtype); } } else { send_offset = phase * pcount * extent; @@ -135,7 +115,7 @@ int smpi_coll_tuned_allreduce_smp_binomial_pipeline(void *send_buf, if (src < comm_size) { recv_offset = (phase - 1) * pcount * extent; smpi_mpi_recv(tmp_buf, pcount, dtype, src, tag, comm, &status); - (*uop) (tmp_buf, (char *)recv_buf + recv_offset, &pcount, &dtype); + smpi_op_apply(op, tmp_buf, (char *)recv_buf + recv_offset, &pcount, &dtype); } } else { dst = (inter_rank & (~mask)) * num_core; diff --git a/src/smpi/colls/allreduce-smp-binomial.c b/src/smpi/colls/allreduce-smp-binomial.c index cf68dfb647..ed9b68c55d 100644 --- a/src/smpi/colls/allreduce-smp-binomial.c +++ b/src/smpi/colls/allreduce-smp-binomial.c @@ -36,16 +36,6 @@ int smpi_coll_tuned_allreduce_smp_binomial(void *send_buf, void *recv_buf, int mask, src, dst; int num_core = NUM_CORE; MPI_Status status; - /* - #ifdef MPICH2_REDUCTION - MPI_User_function * uop = MPIR_Op_table[op % 16 - 1]; - #else - MPI_User_function *uop; - struct MPIR_OP *op_ptr; - op_ptr = MPIR_ToPointer(op); - uop = op_ptr->op; - #endif - */ comm_size=smpi_comm_size(comm); rank=smpi_comm_rank(comm); diff --git a/src/smpi/colls/alltoallv-ring.c b/src/smpi/colls/alltoallv-ring.c index e958bb4991..dcd4b35376 100644 --- a/src/smpi/colls/alltoallv-ring.c +++ b/src/smpi/colls/alltoallv-ring.c @@ -22,7 +22,7 @@ int smpi_coll_tuned_alltoallv_ring(void *send_buff, int *send_counts, int *send_disps, MPI_Datatype send_type, - void *recv_buff,int *recv_counts, int *recv_disps, + void *recv_buff, int *recv_counts, int *recv_disps, MPI_Datatype recv_type, MPI_Comm comm) { diff --git a/src/smpi/colls/colls.h b/src/smpi/colls/colls.h index d09c6d077d..a2ceb932cf 100644 --- a/src/smpi/colls/colls.h +++ b/src/smpi/colls/colls.h @@ -30,9 +30,9 @@ MPI_Comm comm) #define COLL_ALLGATHERS(action, COLL_sep) \ -COLL_NOTHING(COLL_APPLY(action, COLL_ALLGATHER_SIG, 2dmesh) COLL_sep) \ -COLL_NOTHING(COLL_APPLY(action, COLL_ALLGATHER_SIG, 3dmesh) COLL_sep) \ -COLL_NOTHING(COLL_APPLY(action, COLL_ALLGATHER_SIG, bruck) COLL_sep) \ +COLL_APPLY(action, COLL_ALLGATHER_SIG, 2dmesh) COLL_sep \ +COLL_APPLY(action, COLL_ALLGATHER_SIG, 3dmesh) COLL_sep \ +COLL_APPLY(action, COLL_ALLGATHER_SIG, bruck) COLL_sep \ COLL_APPLY(action, COLL_ALLGATHER_SIG, GB) COLL_sep \ COLL_APPLY(action, COLL_ALLGATHER_SIG, loosely_lr) COLL_sep \ COLL_APPLY(action, COLL_ALLGATHER_SIG, lr) COLL_sep \ @@ -74,12 +74,12 @@ COLL_APPLY(action, COLL_ALLREDUCE_SIG, lr) COLL_sep \ COLL_APPLY(action, COLL_ALLREDUCE_SIG, NTS) COLL_sep \ COLL_APPLY(action, COLL_ALLREDUCE_SIG, rab1) COLL_sep \ COLL_APPLY(action, COLL_ALLREDUCE_SIG, rab2) COLL_sep \ -COLL_NOTHING(COLL_APPLY(action, COLL_ALLREDUCE_SIG, rab_rdb) COLL_sep) \ +COLL_APPLY(action, COLL_ALLREDUCE_SIG, rab_rdb) COLL_sep \ COLL_NOTHING(COLL_APPLY(action, COLL_ALLREDUCE_SIG, rab_reduce_scatter) COLL_sep) \ COLL_APPLY(action, COLL_ALLREDUCE_SIG, rab_rsag) COLL_sep \ COLL_APPLY(action, COLL_ALLREDUCE_SIG, rdb) COLL_sep \ COLL_APPLY(action, COLL_ALLREDUCE_SIG, smp_binomial) COLL_sep \ -COLL_NOTHING(COLL_APPLY(action, COLL_ALLREDUCE_SIG, smp_binomial_pipeline) COLL_sep) \ +COLL_APPLY(action, COLL_ALLREDUCE_SIG, smp_binomial_pipeline) COLL_sep \ COLL_APPLY(action, COLL_ALLREDUCE_SIG, smp_rdb) COLL_sep \ COLL_APPLY(action, COLL_ALLREDUCE_SIG, smp_rsag) COLL_sep \ COLL_APPLY(action, COLL_ALLREDUCE_SIG, smp_rsag_lr) COLL_sep \ -- 2.20.1