1 #include "colls_private.h"
4 smpi_coll_tuned_allgather_rdb(void *sbuf, int send_count,
5 MPI_Datatype send_type, void *rbuf,
6 int recv_count, MPI_Datatype recv_type,
11 MPI_Aint send_chunk, recv_chunk;
13 // local int variables
14 int i, j, k, dst, rank, num_procs, send_offset, recv_offset, tree_root;
15 int dst_tree_root, rank_tree_root, last_recv_count = 0, num_procs_completed;
17 int tag = COLL_TAG_ALLGATHER;
20 int curr_count = recv_count;
22 // local string variables
23 char *send_ptr = (char *) sbuf;
24 char *recv_ptr = (char *) rbuf;
26 // get size of the communicator, followed by rank
27 num_procs = smpi_comm_size(comm);
28 rank = smpi_comm_rank(comm);
30 // get size of single element's type for send buffer and recv buffer
31 send_chunk = smpi_datatype_get_extent(send_type);
32 recv_chunk = smpi_datatype_get_extent(recv_type);
34 // multiply size of each element by number of elements to send or recv
35 send_chunk *= send_count;
36 recv_chunk *= recv_count;
38 // perform a local copy
39 smpi_mpi_sendrecv(send_ptr, send_count, send_type, rank, tag,
40 recv_ptr + rank * recv_chunk, recv_count, recv_type, rank, tag,
44 while (mask < num_procs) {
46 dst_tree_root = dst >> i;
48 rank_tree_root = rank >> i;
50 send_offset = rank_tree_root * send_chunk;
51 recv_offset = dst_tree_root * recv_chunk;
53 if (dst < num_procs) {
54 smpi_mpi_sendrecv(recv_ptr + send_offset, curr_count, send_type, dst,
55 tag, recv_ptr + recv_offset, mask * recv_count,
56 recv_type, dst, tag, comm, &status);
57 last_recv_count = smpi_mpi_get_count(&status, recv_type);
58 curr_count += last_recv_count;
61 if (dst_tree_root + mask > num_procs) {
62 num_procs_completed = num_procs - rank_tree_root - mask;
63 /* num_procs_completed is the number of processes in this
64 subtree that have all the data. Send data to others
65 in a tree fashion. First find root of current tree
66 that is being divided into two. k is the number of
67 least-significant bits in this process's rank that
68 must be zeroed out to find the rank of the root */
78 offset = recv_chunk * (rank_tree_root + mask);
82 dst = rank ^ tmp_mask;
84 tree_root = rank >> k;
87 /* send only if this proc has data and destination
88 doesn't have data. at any step, multiple processes
89 can send if they have the data */
91 && (rank < tree_root + num_procs_completed)
92 && (dst >= tree_root + num_procs_completed)) {
93 smpi_mpi_send(recv_ptr + offset, last_recv_count, recv_type, dst,
96 /* last_recv_cnt was set in the previous
97 receive. that's the amount of data to be
100 /* recv only if this proc. doesn't have data and sender
102 else if ((dst < rank)
103 && (dst < tree_root + num_procs_completed)
104 && (rank >= tree_root + num_procs_completed)) {
105 smpi_mpi_recv(recv_ptr + offset,
106 recv_count * num_procs_completed,
107 recv_type, dst, tag, comm, &status);
108 // num_procs_completed is also equal to the no. of processes
109 // whose data we don't have
110 last_recv_count = smpi_mpi_get_count(&status, recv_type);
111 curr_count += last_recv_count;