Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
forgot one file
[simgrid.git] / src / smpi / colls / allgatherv-mpich-rdb.c
1         /* Short or medium size message and power-of-two no. of processes. Use
2          * recursive doubling algorithm */   
3 #include "colls_private.h"
4 #define MPIR_ALLGATHERV_TAG 222
5 int smpi_coll_tuned_allgatherv_mpich_rdb ( 
6     void *sendbuf,
7     int sendcount,
8     MPI_Datatype sendtype,
9     void *recvbuf,
10     int *recvcounts,
11     int *displs,
12     MPI_Datatype recvtype,
13     MPI_Comm comm)
14 {
15     int        comm_size, rank, j, i;
16     MPI_Status status;
17     MPI_Aint  recvtype_extent, recvtype_true_extent, recvtype_true_lb;
18     int curr_cnt, dst, total_count; 
19     void *tmp_buf;
20     int mask, dst_tree_root, my_tree_root, is_homogeneous, position,  
21         send_offset, recv_offset, last_recv_cnt=0, nprocs_completed, k,
22         offset, tmp_mask, tree_root;
23
24     comm_size = smpi_comm_size(comm);
25     rank = smpi_comm_rank(comm);
26     
27     total_count = 0;
28     for (i=0; i<comm_size; i++)
29         total_count += recvcounts[i];
30
31     if (total_count == 0) return MPI_ERR_COUNT;
32     
33     recvtype_extent=smpi_datatype_get_extent( recvtype);
34
35         is_homogeneous = 1;
36         
37         if (is_homogeneous) {
38             /* need to receive contiguously into tmp_buf because
39                displs could make the recvbuf noncontiguous */
40
41             smpi_datatype_extent(recvtype, &recvtype_true_lb, &recvtype_true_extent);
42
43             tmp_buf= (void*)xbt_malloc(total_count*(max(recvtype_true_extent,recvtype_extent)));
44
45             /* adjust for potential negative lower bound in datatype */
46             tmp_buf = (void *)((char*)tmp_buf - recvtype_true_lb);
47
48             /* copy local data into right location in tmp_buf */ 
49             position = 0;
50             for (i=0; i<rank; i++) position += recvcounts[i];
51             if (sendbuf != MPI_IN_PLACE)
52             {
53                 smpi_datatype_copy(sendbuf, sendcount, sendtype,
54                                            ((char *)tmp_buf + position*
55                                             recvtype_extent), 
56                                            recvcounts[rank], recvtype);
57             }
58             else
59             {
60                 /* if in_place specified, local data is found in recvbuf */ 
61                 smpi_datatype_copy(((char *)recvbuf +
62                                             displs[rank]*recvtype_extent), 
63                                            recvcounts[rank], recvtype,
64                                            ((char *)tmp_buf + position*
65                                             recvtype_extent), 
66                                            recvcounts[rank], recvtype);
67     }
68             curr_cnt = recvcounts[rank];
69             
70             mask = 0x1;
71             i = 0;
72             while (mask < comm_size) {
73                 dst = rank ^ mask;
74                 
75                 /* find offset into send and recv buffers. zero out 
76                    the least significant "i" bits of rank and dst to 
77                    find root of src and dst subtrees. Use ranks of 
78                    roots as index to send from and recv into buffer */ 
79                 
80                 dst_tree_root = dst >> i;
81                 dst_tree_root <<= i;
82                 
83                 my_tree_root = rank >> i;
84                 my_tree_root <<= i;
85                 
86                 if (dst < comm_size) {
87                     send_offset = 0;
88                     for (j=0; j<my_tree_root; j++)
89                         send_offset += recvcounts[j];
90                     
91                     recv_offset = 0;
92                     for (j=0; j<dst_tree_root; j++)
93                         recv_offset += recvcounts[j];
94
95                     smpi_mpi_sendrecv(((char *)tmp_buf + send_offset * recvtype_extent),
96                                                  curr_cnt, recvtype, dst,
97                                                  MPIR_ALLGATHERV_TAG,  
98                                                  ((char *)tmp_buf + recv_offset * recvtype_extent),
99                                                  total_count - recv_offset, recvtype, dst,
100                                                  MPIR_ALLGATHERV_TAG,
101                                                  comm, &status);
102                         /* for convenience, recv is posted for a bigger amount
103                            than will be sent */
104                         last_recv_cnt=smpi_mpi_get_count(&status, recvtype);
105                     curr_cnt += last_recv_cnt;
106                 }
107                 
108                 /* if some processes in this process's subtree in this step
109                    did not have any destination process to communicate with
110                    because of non-power-of-two, we need to send them the
111                    data that they would normally have received from those
112                    processes. That is, the haves in this subtree must send to
113                    the havenots. We use a logarithmic
114                    recursive-halfing algorithm for this. */
115                 
116                 /* This part of the code will not currently be
117                  executed because we are not using recursive
118                  doubling for non power of two. Mark it as experimental
119                  so that it doesn't show up as red in the coverage
120                  tests. */  
121
122                 /* --BEGIN EXPERIMENTAL-- */
123                 if (dst_tree_root + mask > comm_size) {
124                     nprocs_completed = comm_size - my_tree_root - mask;
125                     /* nprocs_completed is the number of processes in this
126                        subtree that have all the data. Send data to others
127                        in a tree fashion. First find root of current tree
128                        that is being divided into two. k is the number of
129                        least-significant bits in this process's rank that
130                        must be zeroed out to find the rank of the root */ 
131                     j = mask;
132                     k = 0;
133                     while (j) {
134                         j >>= 1;
135                         k++;
136                     }
137                     k--;
138                     
139                     tmp_mask = mask >> 1;
140                     
141                     while (tmp_mask) {
142                         dst = rank ^ tmp_mask;
143                         
144                         tree_root = rank >> k;
145                         tree_root <<= k;
146                         
147                         /* send only if this proc has data and destination
148                            doesn't have data. at any step, multiple processes
149                            can send if they have the data */
150                         if ((dst > rank) && 
151                             (rank < tree_root + nprocs_completed)
152                             && (dst >= tree_root + nprocs_completed)) {
153
154                             offset = 0;
155                             for (j=0; j<(my_tree_root+mask); j++)
156                                 offset += recvcounts[j];
157                             offset *= recvtype_extent;
158
159                             smpi_mpi_send(((char *)tmp_buf + offset),
160                                                      last_recv_cnt,
161                                                      recvtype, dst,
162                                                      MPIR_ALLGATHERV_TAG, comm);
163                             /* last_recv_cnt was set in the previous
164                                receive. that's the amount of data to be
165                                sent now. */
166                         }
167                         /* recv only if this proc. doesn't have data and sender
168                            has data */
169                         else if ((dst < rank) && 
170                                  (dst < tree_root + nprocs_completed) &&
171                                  (rank >= tree_root + nprocs_completed)) {
172
173                             offset = 0;
174                             for (j=0; j<(my_tree_root+mask); j++)
175                                 offset += recvcounts[j];
176
177                             smpi_mpi_recv(((char *)tmp_buf + offset * recvtype_extent),
178                                                      total_count - offset, recvtype,
179                                                      dst, MPIR_ALLGATHERV_TAG,
180                                                      comm, &status);
181                                 /* for convenience, recv is posted for a
182                                    bigger amount than will be sent */
183                                 last_recv_cnt=smpi_mpi_get_count(&status, recvtype);
184                             curr_cnt += last_recv_cnt;
185                         }
186                         tmp_mask >>= 1;
187                         k--;
188                     }
189                 }
190                 /* --END EXPERIMENTAL-- */
191                 
192                 mask <<= 1;
193                 i++;
194             }
195
196             /* copy data from tmp_buf to recvbuf */
197             position = 0;
198             for (j=0; j<comm_size; j++) {
199                 if ((sendbuf != MPI_IN_PLACE) || (j != rank)) {
200                     /* not necessary to copy if in_place and
201                        j==rank. otherwise copy. */
202                     smpi_datatype_copy(((char *)tmp_buf + position*recvtype_extent),
203                                                recvcounts[j], recvtype,
204                                                ((char *)recvbuf + displs[j]*recvtype_extent),
205                                                recvcounts[j], recvtype);
206                 }
207                 position += recvcounts[j];
208             }
209         }
210 return MPI_SUCCESS;
211 }