1 /* Copyright (c) 2013-2014. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
7 /* Short or medium size message and power-of-two no. of processes. Use
8 * recursive doubling algorithm */
9 #include "colls_private.h"
10 int smpi_coll_tuned_allgatherv_mpich_rdb (
13 MPI_Datatype sendtype,
17 MPI_Datatype recvtype,
20 int comm_size, rank, j, i;
22 MPI_Aint recvtype_extent, recvtype_true_extent, recvtype_true_lb;
23 int curr_cnt, dst, total_count;
24 void *tmp_buf, *tmp_buf_rl;
25 int mask, dst_tree_root, my_tree_root, position,
26 send_offset, recv_offset, last_recv_cnt=0, nprocs_completed, k,
27 offset, tmp_mask, tree_root;
29 comm_size = smpi_comm_size(comm);
30 rank = smpi_comm_rank(comm);
33 for (i=0; i<comm_size; i++)
34 total_count += recvcounts[i];
36 if (total_count == 0) return MPI_ERR_COUNT;
38 recvtype_extent=smpi_datatype_get_extent( recvtype);
40 /* need to receive contiguously into tmp_buf because
41 displs could make the recvbuf noncontiguous */
43 smpi_datatype_extent(recvtype, &recvtype_true_lb, &recvtype_true_extent);
45 tmp_buf_rl= (void*)smpi_get_tmp_sendbuffer(total_count*(max(recvtype_true_extent,recvtype_extent)));
47 /* adjust for potential negative lower bound in datatype */
48 tmp_buf = (void *)((char*)tmp_buf_rl - recvtype_true_lb);
50 /* copy local data into right location in tmp_buf */
52 for (i=0; i<rank; i++) position += recvcounts[i];
53 if (sendbuf != MPI_IN_PLACE)
55 smpi_datatype_copy(sendbuf, sendcount, sendtype,
56 ((char *)tmp_buf + position*
58 recvcounts[rank], recvtype);
62 /* if in_place specified, local data is found in recvbuf */
63 smpi_datatype_copy(((char *)recvbuf +
64 displs[rank]*recvtype_extent),
65 recvcounts[rank], recvtype,
66 ((char *)tmp_buf + position*
68 recvcounts[rank], recvtype);
70 curr_cnt = recvcounts[rank];
74 while (mask < comm_size) {
77 /* find offset into send and recv buffers. zero out
78 the least significant "i" bits of rank and dst to
79 find root of src and dst subtrees. Use ranks of
80 roots as index to send from and recv into buffer */
82 dst_tree_root = dst >> i;
85 my_tree_root = rank >> i;
88 if (dst < comm_size) {
90 for (j=0; j<my_tree_root; j++)
91 send_offset += recvcounts[j];
94 for (j=0; j<dst_tree_root; j++)
95 recv_offset += recvcounts[j];
97 smpi_mpi_sendrecv(((char *)tmp_buf + send_offset * recvtype_extent),
98 curr_cnt, recvtype, dst,
100 ((char *)tmp_buf + recv_offset * recvtype_extent),
101 total_count - recv_offset, recvtype, dst,
104 /* for convenience, recv is posted for a bigger amount
106 last_recv_cnt=smpi_mpi_get_count(&status, recvtype);
107 curr_cnt += last_recv_cnt;
110 /* if some processes in this process's subtree in this step
111 did not have any destination process to communicate with
112 because of non-power-of-two, we need to send them the
113 data that they would normally have received from those
114 processes. That is, the haves in this subtree must send to
115 the havenots. We use a logarithmic
116 recursive-halfing algorithm for this. */
118 /* This part of the code will not currently be
119 executed because we are not using recursive
120 doubling for non power of two. Mark it as experimental
121 so that it doesn't show up as red in the coverage
124 /* --BEGIN EXPERIMENTAL-- */
125 if (dst_tree_root + mask > comm_size) {
126 nprocs_completed = comm_size - my_tree_root - mask;
127 /* nprocs_completed is the number of processes in this
128 subtree that have all the data. Send data to others
129 in a tree fashion. First find root of current tree
130 that is being divided into two. k is the number of
131 least-significant bits in this process's rank that
132 must be zeroed out to find the rank of the root */
141 tmp_mask = mask >> 1;
144 dst = rank ^ tmp_mask;
146 tree_root = rank >> k;
149 /* send only if this proc has data and destination
150 doesn't have data. at any step, multiple processes
151 can send if they have the data */
153 (rank < tree_root + nprocs_completed)
154 && (dst >= tree_root + nprocs_completed)) {
157 for (j=0; j<(my_tree_root+mask); j++)
158 offset += recvcounts[j];
159 offset *= recvtype_extent;
161 smpi_mpi_send(((char *)tmp_buf + offset),
164 COLL_TAG_ALLGATHERV, comm);
165 /* last_recv_cnt was set in the previous
166 receive. that's the amount of data to be
169 /* recv only if this proc. doesn't have data and sender
171 else if ((dst < rank) &&
172 (dst < tree_root + nprocs_completed) &&
173 (rank >= tree_root + nprocs_completed)) {
176 for (j=0; j<(my_tree_root+mask); j++)
177 offset += recvcounts[j];
179 smpi_mpi_recv(((char *)tmp_buf + offset * recvtype_extent),
180 total_count - offset, recvtype,
181 dst, COLL_TAG_ALLGATHERV,
183 /* for convenience, recv is posted for a
184 bigger amount than will be sent */
185 last_recv_cnt=smpi_mpi_get_count(&status, recvtype);
186 curr_cnt += last_recv_cnt;
192 /* --END EXPERIMENTAL-- */
198 /* copy data from tmp_buf to recvbuf */
200 for (j=0; j<comm_size; j++) {
201 if ((sendbuf != MPI_IN_PLACE) || (j != rank)) {
202 /* not necessary to copy if in_place and
203 j==rank. otherwise copy. */
204 smpi_datatype_copy(((char *)tmp_buf + position*recvtype_extent),
205 recvcounts[j], recvtype,
206 ((char *)recvbuf + displs[j]*recvtype_extent),
207 recvcounts[j], recvtype);
209 position += recvcounts[j];
212 smpi_free_tmp_buffer(tmp_buf_rl);