1 /* Copyright (c) 2013-2014. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
7 #include "colls_private.h"
10 smpi_coll_tuned_allgather_rdb(void *sbuf, int send_count,
11 MPI_Datatype send_type, void *rbuf,
12 int recv_count, MPI_Datatype recv_type,
17 MPI_Aint send_chunk, recv_chunk;
19 // local int variables
20 int i, j, k, dst, rank, num_procs, send_offset, recv_offset, tree_root;
21 int dst_tree_root, rank_tree_root, last_recv_count = 0, num_procs_completed;
23 int tag = COLL_TAG_ALLGATHER;
26 int curr_count = recv_count;
28 // local string variables
29 char *send_ptr = (char *) sbuf;
30 char *recv_ptr = (char *) rbuf;
32 // get size of the communicator, followed by rank
33 num_procs = smpi_comm_size(comm);
34 rank = smpi_comm_rank(comm);
36 // get size of single element's type for send buffer and recv buffer
37 send_chunk = smpi_datatype_get_extent(send_type);
38 recv_chunk = smpi_datatype_get_extent(recv_type);
40 // multiply size of each element by number of elements to send or recv
41 send_chunk *= send_count;
42 recv_chunk *= recv_count;
44 // perform a local copy
45 smpi_mpi_sendrecv(send_ptr, send_count, send_type, rank, tag,
46 recv_ptr + rank * recv_chunk, recv_count, recv_type, rank, tag,
50 while (mask < num_procs) {
52 dst_tree_root = dst >> i;
54 rank_tree_root = rank >> i;
56 send_offset = rank_tree_root * send_chunk;
57 recv_offset = dst_tree_root * recv_chunk;
59 if (dst < num_procs) {
60 smpi_mpi_sendrecv(recv_ptr + send_offset, curr_count, send_type, dst,
61 tag, recv_ptr + recv_offset, mask * recv_count,
62 recv_type, dst, tag, comm, &status);
63 last_recv_count = smpi_mpi_get_count(&status, recv_type);
64 curr_count += last_recv_count;
67 if (dst_tree_root + mask > num_procs) {
68 num_procs_completed = num_procs - rank_tree_root - mask;
69 /* num_procs_completed is the number of processes in this
70 subtree that have all the data. Send data to others
71 in a tree fashion. First find root of current tree
72 that is being divided into two. k is the number of
73 least-significant bits in this process's rank that
74 must be zeroed out to find the rank of the root */
84 offset = recv_chunk * (rank_tree_root + mask);
88 dst = rank ^ tmp_mask;
90 tree_root = rank >> k;
93 /* send only if this proc has data and destination
94 doesn't have data. at any step, multiple processes
95 can send if they have the data */
97 && (rank < tree_root + num_procs_completed)
98 && (dst >= tree_root + num_procs_completed)) {
99 smpi_mpi_send(recv_ptr + offset, last_recv_count, recv_type, dst,
102 /* last_recv_cnt was set in the previous
103 receive. that's the amount of data to be
106 /* recv only if this proc. doesn't have data and sender
108 else if ((dst < rank)
109 && (dst < tree_root + num_procs_completed)
110 && (rank >= tree_root + num_procs_completed)) {
111 smpi_mpi_recv(recv_ptr + offset,
112 recv_count * num_procs_completed,
113 recv_type, dst, tag, comm, &status);
114 // num_procs_completed is also equal to the no. of processes
115 // whose data we don't have
116 last_recv_count = smpi_mpi_get_count(&status, recv_type);
117 curr_count += last_recv_count;