1 /* Copyright (c) 2013-2023. The SimGrid Team. All rights reserved. */
3 /* This program is free software; you can redistribute it and/or modify it
4 * under the terms of the license (GNU LGPL) which comes with this package. */
6 /* Short or medium size message and power-of-two no. of processes. Use
7 * recursive doubling algorithm */
9 #include "../colls_private.hpp"
10 #include "smpi_status.hpp"
13 namespace simgrid::smpi {
15 int allgatherv__mpich_rdb(
18 MPI_Datatype sendtype,
20 const int *recvcounts,
22 MPI_Datatype recvtype,
27 MPI_Aint recvtype_extent, recvtype_true_extent, recvtype_true_lb;
28 unsigned int curr_cnt, dst, total_count;
29 unsigned int mask, dst_tree_root, my_tree_root, position,
30 send_offset, recv_offset, last_recv_cnt=0, nprocs_completed, k,
31 offset, tmp_mask, tree_root;
33 unsigned int comm_size = comm->size();
34 unsigned int rank = comm->rank();
37 for (i=0; i<comm_size; i++)
38 total_count += recvcounts[i];
43 recvtype_extent=recvtype->get_extent();
45 /* need to receive contiguously into tmp_buf because
46 displs could make the recvbuf noncontiguous */
48 recvtype->extent(&recvtype_true_lb, &recvtype_true_extent);
50 unsigned char* tmp_buf_rl = smpi_get_tmp_sendbuffer(total_count * std::max(recvtype_true_extent, recvtype_extent));
52 /* adjust for potential negative lower bound in datatype */
53 unsigned char* tmp_buf = tmp_buf_rl - recvtype_true_lb;
55 /* copy local data into right location in tmp_buf */
57 for (i=0; i<rank; i++)
58 position += recvcounts[i];
59 if (sendbuf != MPI_IN_PLACE)
61 Datatype::copy(sendbuf, sendcount, sendtype, tmp_buf + position * recvtype_extent, recvcounts[rank], recvtype);
65 /* if in_place specified, local data is found in recvbuf */
66 Datatype::copy(static_cast<char*>(recvbuf) + displs[rank] * recvtype_extent, recvcounts[rank], recvtype,
67 tmp_buf + position * recvtype_extent, recvcounts[rank], recvtype);
69 curr_cnt = recvcounts[rank];
73 while (mask < comm_size) {
76 /* find offset into send and recv buffers. zero out
77 the least significant "i" bits of rank and dst to
78 find root of src and dst subtrees. Use ranks of
79 roots as index to send from and recv into buffer */
81 dst_tree_root = dst >> i;
84 my_tree_root = rank >> i;
87 if (dst < comm_size) {
89 for (j=0; j<my_tree_root; j++)
90 send_offset += recvcounts[j];
93 for (j=0; j<dst_tree_root; j++)
94 recv_offset += recvcounts[j];
96 Request::sendrecv(tmp_buf + send_offset * recvtype_extent, curr_cnt, recvtype, dst, COLL_TAG_ALLGATHERV,
97 tmp_buf + recv_offset * recvtype_extent, total_count - recv_offset, recvtype, dst,
98 COLL_TAG_ALLGATHERV, comm, &status);
99 /* for convenience, recv is posted for a bigger amount
101 last_recv_cnt=Status::get_count(&status, recvtype);
102 curr_cnt += last_recv_cnt;
105 /* if some processes in this process's subtree in this step
106 did not have any destination process to communicate with
107 because of non-power-of-two, we need to send them the
108 data that they would normally have received from those
109 processes. That is, the haves in this subtree must send to
110 the havenots. We use a logarithmic
111 recursive-halfing algorithm for this. */
113 /* This part of the code will not currently be
114 executed because we are not using recursive
115 doubling for non power of two. Mark it as experimental
116 so that it doesn't show up as red in the coverage
119 /* --BEGIN EXPERIMENTAL-- */
120 if (dst_tree_root + mask > comm_size) {
121 nprocs_completed = comm_size - my_tree_root - mask;
122 /* nprocs_completed is the number of processes in this
123 subtree that have all the data. Send data to others
124 in a tree fashion. First find root of current tree
125 that is being divided into two. k is the number of
126 least-significant bits in this process's rank that
127 must be zeroed out to find the rank of the root */
136 tmp_mask = mask >> 1;
139 dst = rank ^ tmp_mask;
141 tree_root = rank >> k;
144 /* send only if this proc has data and destination
145 doesn't have data. at any step, multiple processes
146 can send if they have the data */
148 (rank < tree_root + nprocs_completed)
149 && (dst >= tree_root + nprocs_completed)) {
152 for (j=0; j<(my_tree_root+mask); j++)
153 offset += recvcounts[j];
154 offset *= recvtype_extent;
156 Request::send(tmp_buf + offset, last_recv_cnt, recvtype, dst, COLL_TAG_ALLGATHERV, comm);
157 /* last_recv_cnt was set in the previous
158 receive. that's the amount of data to be
161 /* recv only if this proc. doesn't have data and sender
163 else if ((dst < rank) &&
164 (dst < tree_root + nprocs_completed) &&
165 (rank >= tree_root + nprocs_completed)) {
168 for (j=0; j<(my_tree_root+mask); j++)
169 offset += recvcounts[j];
171 Request::recv(tmp_buf + offset * recvtype_extent, total_count - offset, recvtype, dst, COLL_TAG_ALLGATHERV,
173 /* for convenience, recv is posted for a
174 bigger amount than will be sent */
175 last_recv_cnt=Status::get_count(&status, recvtype);
176 curr_cnt += last_recv_cnt;
182 /* --END EXPERIMENTAL-- */
188 /* copy data from tmp_buf to recvbuf */
190 for (j=0; j<comm_size; j++) {
191 if ((sendbuf != MPI_IN_PLACE) || (j != rank)) {
192 /* not necessary to copy if in_place and
193 j==rank. otherwise copy. */
194 Datatype::copy(tmp_buf + position * recvtype_extent, recvcounts[j], recvtype,
195 static_cast<char*>(recvbuf) + displs[j] * recvtype_extent, recvcounts[j], recvtype);
197 position += recvcounts[j];
200 smpi_free_tmp_buffer(tmp_buf_rl);
204 } // namespace simgrid::smpi