1 /* Copyright (c) 2013-2019. The SimGrid Team. All rights reserved. */
3 /* This program is free software; you can redistribute it and/or modify it
4 * under the terms of the license (GNU LGPL) which comes with this package. */
6 /* Short or medium size message and power-of-two no. of processes. Use
7 * recursive doubling algorithm */
9 #include "../colls_private.hpp"
10 #include "smpi_status.hpp"
16 int Coll_allgatherv_mpich_rdb::allgatherv (
19 MPI_Datatype sendtype,
23 MPI_Datatype recvtype,
28 MPI_Aint recvtype_extent, recvtype_true_extent, recvtype_true_lb;
29 unsigned int curr_cnt, dst, total_count;
30 void *tmp_buf, *tmp_buf_rl;
31 unsigned int mask, dst_tree_root, my_tree_root, position,
32 send_offset, recv_offset, last_recv_cnt=0, nprocs_completed, k,
33 offset, tmp_mask, tree_root;
35 unsigned int comm_size = comm->size();
36 unsigned int rank = comm->rank();
39 for (i=0; i<comm_size; i++)
40 total_count += recvcounts[i];
45 recvtype_extent=recvtype->get_extent();
47 /* need to receive contiguously into tmp_buf because
48 displs could make the recvbuf noncontiguous */
50 recvtype->extent(&recvtype_true_lb, &recvtype_true_extent);
52 tmp_buf_rl = (void*)smpi_get_tmp_sendbuffer(total_count * std::max(recvtype_true_extent, recvtype_extent));
54 /* adjust for potential negative lower bound in datatype */
55 tmp_buf = (void *)((char*)tmp_buf_rl - recvtype_true_lb);
57 /* copy local data into right location in tmp_buf */
59 for (i=0; i<rank; i++)
60 position += recvcounts[i];
61 if (sendbuf != MPI_IN_PLACE)
63 Datatype::copy(sendbuf, sendcount, sendtype,
64 ((char *)tmp_buf + position*
66 recvcounts[rank], recvtype);
70 /* if in_place specified, local data is found in recvbuf */
71 Datatype::copy(((char *)recvbuf +
72 displs[rank]*recvtype_extent),
73 recvcounts[rank], recvtype,
74 ((char *)tmp_buf + position*
76 recvcounts[rank], recvtype);
78 curr_cnt = recvcounts[rank];
82 while (mask < comm_size) {
85 /* find offset into send and recv buffers. zero out
86 the least significant "i" bits of rank and dst to
87 find root of src and dst subtrees. Use ranks of
88 roots as index to send from and recv into buffer */
90 dst_tree_root = dst >> i;
93 my_tree_root = rank >> i;
96 if (dst < comm_size) {
98 for (j=0; j<my_tree_root; j++)
99 send_offset += recvcounts[j];
102 for (j=0; j<dst_tree_root; j++)
103 recv_offset += recvcounts[j];
105 Request::sendrecv(((char *)tmp_buf + send_offset * recvtype_extent),
106 curr_cnt, recvtype, dst,
108 ((char *)tmp_buf + recv_offset * recvtype_extent),
109 total_count - recv_offset, recvtype, dst,
112 /* for convenience, recv is posted for a bigger amount
114 last_recv_cnt=Status::get_count(&status, recvtype);
115 curr_cnt += last_recv_cnt;
118 /* if some processes in this process's subtree in this step
119 did not have any destination process to communicate with
120 because of non-power-of-two, we need to send them the
121 data that they would normally have received from those
122 processes. That is, the haves in this subtree must send to
123 the havenots. We use a logarithmic
124 recursive-halfing algorithm for this. */
126 /* This part of the code will not currently be
127 executed because we are not using recursive
128 doubling for non power of two. Mark it as experimental
129 so that it doesn't show up as red in the coverage
132 /* --BEGIN EXPERIMENTAL-- */
133 if (dst_tree_root + mask > comm_size) {
134 nprocs_completed = comm_size - my_tree_root - mask;
135 /* nprocs_completed is the number of processes in this
136 subtree that have all the data. Send data to others
137 in a tree fashion. First find root of current tree
138 that is being divided into two. k is the number of
139 least-significant bits in this process's rank that
140 must be zeroed out to find the rank of the root */
149 tmp_mask = mask >> 1;
152 dst = rank ^ tmp_mask;
154 tree_root = rank >> k;
157 /* send only if this proc has data and destination
158 doesn't have data. at any step, multiple processes
159 can send if they have the data */
161 (rank < tree_root + nprocs_completed)
162 && (dst >= tree_root + nprocs_completed)) {
165 for (j=0; j<(my_tree_root+mask); j++)
166 offset += recvcounts[j];
167 offset *= recvtype_extent;
169 Request::send(((char *)tmp_buf + offset),
172 COLL_TAG_ALLGATHERV, comm);
173 /* last_recv_cnt was set in the previous
174 receive. that's the amount of data to be
177 /* recv only if this proc. doesn't have data and sender
179 else if ((dst < rank) &&
180 (dst < tree_root + nprocs_completed) &&
181 (rank >= tree_root + nprocs_completed)) {
184 for (j=0; j<(my_tree_root+mask); j++)
185 offset += recvcounts[j];
187 Request::recv(((char *)tmp_buf + offset * recvtype_extent),
188 total_count - offset, recvtype,
189 dst, COLL_TAG_ALLGATHERV,
191 /* for convenience, recv is posted for a
192 bigger amount than will be sent */
193 last_recv_cnt=Status::get_count(&status, recvtype);
194 curr_cnt += last_recv_cnt;
200 /* --END EXPERIMENTAL-- */
206 /* copy data from tmp_buf to recvbuf */
208 for (j=0; j<comm_size; j++) {
209 if ((sendbuf != MPI_IN_PLACE) || (j != rank)) {
210 /* not necessary to copy if in_place and
211 j==rank. otherwise copy. */
212 Datatype::copy(((char *)tmp_buf + position*recvtype_extent),
213 recvcounts[j], recvtype,
214 ((char *)recvbuf + displs[j]*recvtype_extent),
215 recvcounts[j], recvtype);
217 position += recvcounts[j];
220 smpi_free_tmp_buffer(tmp_buf_rl);