From 56bcf2dd190eb7d40a0d2e542d2c8f51f7ebd350 Mon Sep 17 00:00:00 2001 From: Frederic Suter Date: Fri, 1 Jul 2016 09:00:07 +0200 Subject: [PATCH] chasing blockers in collectives (part 1) --- src/smpi/colls/allgather-pair.c | 6 ++--- src/smpi/colls/allgather-rdb.c | 6 ++--- src/smpi/colls/allgather-rhv.c | 5 ++-- src/smpi/colls/allgatherv-mpich-rdb.c | 14 ++++++----- src/smpi/colls/allgatherv-ompi-bruck.c | 8 +++--- src/smpi/colls/allgatherv-pair.c | 6 ++--- src/smpi/colls/allreduce-mvapich-rs.c | 25 ++++++++++--------- .../colls/allreduce-ompi-ring-segmented.c | 15 +++++------ src/smpi/colls/allreduce-rab-rdb.c | 9 ++++--- src/smpi/colls/allreduce-rab1.c | 9 ++++--- 10 files changed, 54 insertions(+), 49 deletions(-) diff --git a/src/smpi/colls/allgather-pair.c b/src/smpi/colls/allgather-pair.c index 46cde2dea5..6111f2820c 100644 --- a/src/smpi/colls/allgather-pair.c +++ b/src/smpi/colls/allgather-pair.c @@ -72,15 +72,15 @@ smpi_coll_tuned_allgather_pair(void *send_buff, int send_count, { MPI_Aint extent; - int i, src, dst, rank, num_procs; + unsigned int i, src, dst; int tag = COLL_TAG_ALLGATHER; MPI_Status status; char *send_ptr = (char *) send_buff; char *recv_ptr = (char *) recv_buff; - rank = smpi_comm_rank(comm); - num_procs = smpi_comm_size(comm); + unsigned int rank = smpi_comm_rank(comm); + unsigned int num_procs = smpi_comm_size(comm); if((num_procs&(num_procs-1))) THROWF(arg_error,0, "allgather pair algorithm can't be used with non power of two number of processes ! "); diff --git a/src/smpi/colls/allgather-rdb.c b/src/smpi/colls/allgather-rdb.c index 06e2f4bb78..84acf3d99f 100644 --- a/src/smpi/colls/allgather-rdb.c +++ b/src/smpi/colls/allgather-rdb.c @@ -17,7 +17,7 @@ smpi_coll_tuned_allgather_rdb(void *sbuf, int send_count, MPI_Aint send_chunk, recv_chunk; // local int variables - int i, j, k, dst, rank, num_procs, send_offset, recv_offset, tree_root; + unsigned int i, j, k, dst, send_offset, recv_offset, tree_root; int dst_tree_root, rank_tree_root, last_recv_count = 0, num_procs_completed; int offset, tmp_mask; int tag = COLL_TAG_ALLGATHER; @@ -30,8 +30,8 @@ smpi_coll_tuned_allgather_rdb(void *sbuf, int send_count, char *recv_ptr = (char *) rbuf; // get size of the communicator, followed by rank - num_procs = smpi_comm_size(comm); - rank = smpi_comm_rank(comm); + unsigned int num_procs = smpi_comm_size(comm); + unsigned int rank = smpi_comm_rank(comm); // get size of single element's type for send buffer and recv buffer send_chunk = smpi_datatype_get_extent(send_type); diff --git a/src/smpi/colls/allgather-rhv.c b/src/smpi/colls/allgather-rhv.c index c2e0cb43ff..3aaf479d9a 100644 --- a/src/smpi/colls/allgather-rhv.c +++ b/src/smpi/colls/allgather-rhv.c @@ -20,18 +20,17 @@ smpi_coll_tuned_allgather_rhv(void *sbuf, int send_count, // local int variables int i, dst, send_base_offset, recv_base_offset, send_chunk, recv_chunk, send_offset, recv_offset; - int rank, num_procs; int tag = COLL_TAG_ALLGATHER; int mask; int curr_count; // get size of the communicator, followed by rank - num_procs = smpi_comm_size(comm); + unsigned int num_procs = smpi_comm_size(comm); if((num_procs&(num_procs-1))) THROWF(arg_error,0, "allgather rhv algorithm can't be used with non power of two number of processes ! "); - rank = smpi_comm_rank(comm); + unsigned int rank = smpi_comm_rank(comm); // get size of single element's type for send buffer and recv buffer s_extent = smpi_datatype_get_extent(send_type); diff --git a/src/smpi/colls/allgatherv-mpich-rdb.c b/src/smpi/colls/allgatherv-mpich-rdb.c index fcf9079e5b..a3614e5f69 100644 --- a/src/smpi/colls/allgatherv-mpich-rdb.c +++ b/src/smpi/colls/allgatherv-mpich-rdb.c @@ -17,23 +17,24 @@ int smpi_coll_tuned_allgatherv_mpich_rdb ( MPI_Datatype recvtype, MPI_Comm comm) { - int comm_size, rank, j, i; + int j, i; MPI_Status status; MPI_Aint recvtype_extent, recvtype_true_extent, recvtype_true_lb; int curr_cnt, dst, total_count; void *tmp_buf, *tmp_buf_rl; - int mask, dst_tree_root, my_tree_root, position, + unsigned int mask, dst_tree_root, my_tree_root, position, send_offset, recv_offset, last_recv_cnt=0, nprocs_completed, k, offset, tmp_mask, tree_root; - comm_size = smpi_comm_size(comm); - rank = smpi_comm_rank(comm); + unsigned int comm_size = smpi_comm_size(comm); + unsigned int rank = smpi_comm_rank(comm); total_count = 0; for (i=0; i= (TYPELNG)) && \ ((SEGSIZE) < ((TYPELNG) * (SEGCOUNT))) ) { \ size_t residual; \ @@ -152,8 +152,8 @@ if (0 != SPLIT_INDEX) { \ EARLY_BLOCK_COUNT = EARLY_BLOCK_COUNT + 1; \ } \ - - #include "colls_private.h" + +#include "colls_private.h" int smpi_coll_tuned_allreduce_ompi_ring_segmented(void *sbuf, void *rbuf, int count, MPI_Datatype dtype, @@ -162,11 +162,12 @@ smpi_coll_tuned_allreduce_ompi_ring_segmented(void *sbuf, void *rbuf, int count, { int ret = MPI_SUCCESS; int line; - int rank, size, k, recv_from, send_to; + int k, recv_from, send_to; int early_blockcount, late_blockcount, split_rank; int segcount, max_segcount; int num_phases, phase; - int block_count, inbi; + int block_count; + unsigned int inbi; size_t typelng; char *tmpsend = NULL, *tmprecv = NULL; char *inbuf[2] = {NULL, NULL}; @@ -174,8 +175,8 @@ smpi_coll_tuned_allreduce_ompi_ring_segmented(void *sbuf, void *rbuf, int count, ptrdiff_t block_offset, max_real_segsize; MPI_Request reqs[2] = {NULL, NULL}; const size_t segsize = 1 << 20; /* 1 MB */ - size = smpi_comm_size(comm); - rank = smpi_comm_rank(comm); + unsigned int size = smpi_comm_size(comm); + unsigned int rank = smpi_comm_rank(comm); XBT_DEBUG("coll:tuned:allreduce_intra_ring_segmented rank %d, count %d", rank, count); diff --git a/src/smpi/colls/allreduce-rab-rdb.c b/src/smpi/colls/allreduce-rab-rdb.c index 5dce91e93b..7499785afb 100644 --- a/src/smpi/colls/allreduce-rab-rdb.c +++ b/src/smpi/colls/allreduce-rab-rdb.c @@ -10,15 +10,16 @@ int smpi_coll_tuned_allreduce_rab_rdb(void *sbuff, void *rbuff, int count, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm) { - int nprocs, rank, tag = COLL_TAG_ALLREDUCE; - int mask, dst, pof2, newrank, rem, newdst, i, + int tag = COLL_TAG_ALLREDUCE; + unsigned int mask, pof2; + int dst, newrank, rem, newdst, i, send_idx, recv_idx, last_idx, send_cnt, recv_cnt, *cnts, *disps; MPI_Aint extent; MPI_Status status; void *tmp_buf = NULL; - nprocs = smpi_comm_size(comm); - rank = smpi_comm_rank(comm); + unsigned int nprocs = smpi_comm_size(comm); + unsigned int rank = smpi_comm_rank(comm); extent = smpi_datatype_get_extent(dtype); tmp_buf = (void *) smpi_get_tmp_sendbuffer(count * extent); diff --git a/src/smpi/colls/allreduce-rab1.c b/src/smpi/colls/allreduce-rab1.c index 0a04e54b38..173d3e530d 100644 --- a/src/smpi/colls/allreduce-rab1.c +++ b/src/smpi/colls/allreduce-rab1.c @@ -14,13 +14,14 @@ int smpi_coll_tuned_allreduce_rab1(void *sbuff, void *rbuff, { MPI_Status status; MPI_Aint extent; - int tag = COLL_TAG_ALLREDUCE, rank, nprocs, send_size, newcnt, share; - int pof2 = 1, mask, send_idx, recv_idx, dst, send_cnt, recv_cnt; + int tag = COLL_TAG_ALLREDUCE, send_size, newcnt, share; + unsigned int pof2 = 1, mask; + int send_idx, recv_idx, dst, send_cnt, recv_cnt; void *recv, *tmp_buf; - rank = smpi_comm_rank(comm); - nprocs = smpi_comm_size(comm); + unsigned int rank = smpi_comm_rank(comm); + unsigned int nprocs = smpi_comm_size(comm); if((nprocs&(nprocs-1))) THROWF(arg_error,0, "allreduce rab1 algorithm can't be used with non power of two number of processes ! "); -- 2.20.1