1 /* Copyright (c) 2013-2014. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
7 /* Short or medium size message and power-of-two no. of processes. Use
8 * recursive doubling algorithm */
9 #include "colls_private.h"
10 int smpi_coll_tuned_allgatherv_mpich_rdb (
13 MPI_Datatype sendtype,
17 MPI_Datatype recvtype,
22 MPI_Aint recvtype_extent, recvtype_true_extent, recvtype_true_lb;
23 int curr_cnt, dst, total_count;
24 void *tmp_buf, *tmp_buf_rl;
25 unsigned int mask, dst_tree_root, my_tree_root, position,
26 send_offset, recv_offset, last_recv_cnt=0, nprocs_completed, k,
27 offset, tmp_mask, tree_root;
29 unsigned int comm_size = smpi_comm_size(comm);
30 unsigned int rank = smpi_comm_rank(comm);
33 for (i=0; i<comm_size; i++)
34 total_count += recvcounts[i];
39 recvtype_extent=smpi_datatype_get_extent( recvtype);
41 /* need to receive contiguously into tmp_buf because
42 displs could make the recvbuf noncontiguous */
44 smpi_datatype_extent(recvtype, &recvtype_true_lb, &recvtype_true_extent);
46 tmp_buf_rl= (void*)smpi_get_tmp_sendbuffer(total_count*(MAX(recvtype_true_extent,recvtype_extent)));
48 /* adjust for potential negative lower bound in datatype */
49 tmp_buf = (void *)((char*)tmp_buf_rl - recvtype_true_lb);
51 /* copy local data into right location in tmp_buf */
53 for (i=0; i<rank; i++)
54 position += recvcounts[i];
55 if (sendbuf != MPI_IN_PLACE)
57 smpi_datatype_copy(sendbuf, sendcount, sendtype,
58 ((char *)tmp_buf + position*
60 recvcounts[rank], recvtype);
64 /* if in_place specified, local data is found in recvbuf */
65 smpi_datatype_copy(((char *)recvbuf +
66 displs[rank]*recvtype_extent),
67 recvcounts[rank], recvtype,
68 ((char *)tmp_buf + position*
70 recvcounts[rank], recvtype);
72 curr_cnt = recvcounts[rank];
76 while (mask < comm_size) {
79 /* find offset into send and recv buffers. zero out
80 the least significant "i" bits of rank and dst to
81 find root of src and dst subtrees. Use ranks of
82 roots as index to send from and recv into buffer */
84 dst_tree_root = dst >> i;
87 my_tree_root = rank >> i;
90 if (dst < comm_size) {
92 for (j=0; j<my_tree_root; j++)
93 send_offset += recvcounts[j];
96 for (j=0; j<dst_tree_root; j++)
97 recv_offset += recvcounts[j];
99 smpi_mpi_sendrecv(((char *)tmp_buf + send_offset * recvtype_extent),
100 curr_cnt, recvtype, dst,
102 ((char *)tmp_buf + recv_offset * recvtype_extent),
103 total_count - recv_offset, recvtype, dst,
106 /* for convenience, recv is posted for a bigger amount
108 last_recv_cnt=smpi_mpi_get_count(&status, recvtype);
109 curr_cnt += last_recv_cnt;
112 /* if some processes in this process's subtree in this step
113 did not have any destination process to communicate with
114 because of non-power-of-two, we need to send them the
115 data that they would normally have received from those
116 processes. That is, the haves in this subtree must send to
117 the havenots. We use a logarithmic
118 recursive-halfing algorithm for this. */
120 /* This part of the code will not currently be
121 executed because we are not using recursive
122 doubling for non power of two. Mark it as experimental
123 so that it doesn't show up as red in the coverage
126 /* --BEGIN EXPERIMENTAL-- */
127 if (dst_tree_root + mask > comm_size) {
128 nprocs_completed = comm_size - my_tree_root - mask;
129 /* nprocs_completed is the number of processes in this
130 subtree that have all the data. Send data to others
131 in a tree fashion. First find root of current tree
132 that is being divided into two. k is the number of
133 least-significant bits in this process's rank that
134 must be zeroed out to find the rank of the root */
143 tmp_mask = mask >> 1;
146 dst = rank ^ tmp_mask;
148 tree_root = rank >> k;
151 /* send only if this proc has data and destination
152 doesn't have data. at any step, multiple processes
153 can send if they have the data */
155 (rank < tree_root + nprocs_completed)
156 && (dst >= tree_root + nprocs_completed)) {
159 for (j=0; j<(my_tree_root+mask); j++)
160 offset += recvcounts[j];
161 offset *= recvtype_extent;
163 smpi_mpi_send(((char *)tmp_buf + offset),
166 COLL_TAG_ALLGATHERV, comm);
167 /* last_recv_cnt was set in the previous
168 receive. that's the amount of data to be
171 /* recv only if this proc. doesn't have data and sender
173 else if ((dst < rank) &&
174 (dst < tree_root + nprocs_completed) &&
175 (rank >= tree_root + nprocs_completed)) {
178 for (j=0; j<(my_tree_root+mask); j++)
179 offset += recvcounts[j];
181 smpi_mpi_recv(((char *)tmp_buf + offset * recvtype_extent),
182 total_count - offset, recvtype,
183 dst, COLL_TAG_ALLGATHERV,
185 /* for convenience, recv is posted for a
186 bigger amount than will be sent */
187 last_recv_cnt=smpi_mpi_get_count(&status, recvtype);
188 curr_cnt += last_recv_cnt;
194 /* --END EXPERIMENTAL-- */
200 /* copy data from tmp_buf to recvbuf */
202 for (j=0; j<comm_size; j++) {
203 if ((sendbuf != MPI_IN_PLACE) || (j != rank)) {
204 /* not necessary to copy if in_place and
205 j==rank. otherwise copy. */
206 smpi_datatype_copy(((char *)tmp_buf + position*recvtype_extent),
207 recvcounts[j], recvtype,
208 ((char *)recvbuf + displs[j]*recvtype_extent),
209 recvcounts[j], recvtype);
211 position += recvcounts[j];
214 smpi_free_tmp_buffer(tmp_buf_rl);