Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Concatenate nested namespaces (sonar).
[simgrid.git] / src / smpi / colls / scatter / scatter-mvapich-two-level.cpp
1 /* Copyright (c) 2013-2022. The SimGrid Team.
2  * All rights reserved.                                                     */
3
4 /* This program is free software; you can redistribute it and/or modify it
5  * under the terms of the license (GNU LGPL) which comes with this package. */
6
7 /*
8  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
9  *                         University Research and Technology
10  *                         Corporation.  All rights reserved.
11  * Copyright (c) 2004-2009 The University of Tennessee and The University
12  *                         of Tennessee Research Foundation.  All rights
13  *                         reserved.
14  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
15  *                         University of Stuttgart.  All rights reserved.
16  * Copyright (c) 2004-2005 The Regents of the University of California.
17  *                         All rights reserved.
18  *
19  * Additional copyrights may follow
20  */
21  /* -*- Mode: C; c-basic-offset:4 ; -*- */
22 /* Copyright (c) 2001-2014, The Ohio State University. All rights
23  * reserved.
24  *
25  * This file is part of the MVAPICH2 software package developed by the
26  * team members of The Ohio State University's Network-Based Computing
27  * Laboratory (NBCL), headed by Professor Dhabaleswar K. (DK) Panda.
28  *
29  * For detailed copyright and licensing information, please refer to the
30  * copyright file COPYRIGHT in the top level MVAPICH2 directory.
31  */
32 /*
33  *
34  *  (C) 2001 by Argonne National Laboratory.
35  *      See COPYRIGHT in top-level directory.
36  */
37 #include "../colls_private.hpp"
38
39 #define MPIR_Scatter_MV2_Binomial scatter__ompi_binomial
40 #define MPIR_Scatter_MV2_Direct scatter__ompi_basic_linear
41
42 extern int (*MV2_Scatter_intra_function) (const void *sendbuf, int sendcount, MPI_Datatype sendtype,
43     void *recvbuf, int recvcount, MPI_Datatype recvtype,
44     int root, MPI_Comm comm);
45
46 namespace simgrid::smpi {
47
48 int scatter__mvapich2_two_level_direct(const void *sendbuf,
49                                        int sendcnt,
50                                        MPI_Datatype sendtype,
51                                        void *recvbuf,
52                                        int recvcnt,
53                                        MPI_Datatype recvtype,
54                                        int root, MPI_Comm  comm)
55 {
56     int comm_size, rank;
57     int local_rank, local_size;
58     int leader_comm_rank = -1, leader_comm_size = -1;
59     int mpi_errno = MPI_SUCCESS;
60     int recvtype_size, sendtype_size, nbytes;
61     unsigned char* tmp_buf            = nullptr;
62     unsigned char* leader_scatter_buf = nullptr;
63     MPI_Status status;
64     int leader_root, leader_of_root = -1;
65     MPI_Comm shmem_comm, leader_comm;
66     //if not set (use of the algo directly, without mvapich2 selector)
67     if (MV2_Scatter_intra_function == nullptr)
68       MV2_Scatter_intra_function = scatter__mpich;
69
70     if(comm->get_leaders_comm()==MPI_COMM_NULL){
71       comm->init_smp();
72     }
73     comm_size = comm->size();
74     rank = comm->rank();
75
76     if (((rank == root) && (recvcnt == 0))
77         || ((rank != root) && (sendcnt == 0))) {
78         return MPI_SUCCESS;
79     }
80
81     /* extract the rank,size information for the intra-node
82      * communicator */
83     shmem_comm = comm->get_intra_comm();
84     local_rank = shmem_comm->rank();
85     local_size = shmem_comm->size();
86
87     if (local_rank == 0) {
88         /* Node leader. Extract the rank, size information for the leader
89          * communicator */
90         leader_comm = comm->get_leaders_comm();
91         leader_comm_size = leader_comm->size();
92         leader_comm_rank = leader_comm->rank();
93     }
94
95     if (local_size == comm_size) {
96         /* purely intra-node scatter. Just use the direct algorithm and we are done */
97         mpi_errno = MPIR_Scatter_MV2_Direct(sendbuf, sendcnt, sendtype,
98                                             recvbuf, recvcnt, recvtype,
99                                             root, comm);
100
101     } else {
102         recvtype_size=recvtype->size();
103         sendtype_size=sendtype->size();
104
105         if (rank == root) {
106             nbytes = sendcnt * sendtype_size;
107         } else {
108             nbytes = recvcnt * recvtype_size;
109         }
110
111         if (local_rank == 0) {
112             /* Node leader, allocate tmp_buffer */
113             tmp_buf = smpi_get_tmp_sendbuffer(nbytes * local_size);
114         }
115
116         leader_comm = comm->get_leaders_comm();
117         int* leaders_map = comm->get_leaders_map();
118         leader_of_root = comm->group()->rank(leaders_map[root]);
119         leader_root = leader_comm->group()->rank(leaders_map[root]);
120         /* leader_root is the rank of the leader of the root in leader_comm.
121          * leader_root is to be used as the root of the inter-leader gather ops
122          */
123
124         if ((local_rank == 0) && (root != rank)
125             && (leader_of_root == rank)) {
126             /* The root of the scatter operation is not the node leader. Recv
127              * data from the node leader */
128             leader_scatter_buf = smpi_get_tmp_sendbuffer(nbytes * comm_size);
129             Request::recv(leader_scatter_buf, nbytes * comm_size, MPI_BYTE,
130                              root, COLL_TAG_SCATTER, comm, &status);
131
132         }
133
134         if (rank == root && local_rank != 0) {
135             /* The root of the scatter operation is not the node leader. Send
136              * data to the node leader */
137             Request::send(sendbuf, sendcnt * comm_size, sendtype,
138                                      leader_of_root, COLL_TAG_SCATTER, comm
139                                      );
140         }
141
142         if (leader_comm_size > 1 && local_rank == 0) {
143           if (not comm->is_uniform()) {
144             int* displs   = nullptr;
145             int* sendcnts = nullptr;
146             int* node_sizes;
147             int i      = 0;
148             node_sizes = comm->get_non_uniform_map();
149
150             if (root != leader_of_root) {
151               if (leader_comm_rank == leader_root) {
152                 displs      = new int[leader_comm_size];
153                 sendcnts    = new int[leader_comm_size];
154                 sendcnts[0] = node_sizes[0] * nbytes;
155                 displs[0]   = 0;
156
157                 for (i = 1; i < leader_comm_size; i++) {
158                   displs[i]   = displs[i - 1] + node_sizes[i - 1] * nbytes;
159                   sendcnts[i] = node_sizes[i] * nbytes;
160                 }
161               }
162               colls::scatterv(leader_scatter_buf, sendcnts, displs, MPI_BYTE, tmp_buf, nbytes * local_size, MPI_BYTE,
163                               leader_root, leader_comm);
164             } else {
165               if (leader_comm_rank == leader_root) {
166                 displs      = new int[leader_comm_size];
167                 sendcnts    = new int[leader_comm_size];
168                 sendcnts[0] = node_sizes[0] * sendcnt;
169                 displs[0]   = 0;
170
171                 for (i = 1; i < leader_comm_size; i++) {
172                   displs[i]   = displs[i - 1] + node_sizes[i - 1] * sendcnt;
173                   sendcnts[i] = node_sizes[i] * sendcnt;
174                 }
175               }
176               colls::scatterv(sendbuf, sendcnts, displs, sendtype, tmp_buf, nbytes * local_size, MPI_BYTE, leader_root,
177                               leader_comm);
178             }
179             if (leader_comm_rank == leader_root) {
180               delete[] displs;
181               delete[] sendcnts;
182             }
183             } else {
184                 if (leader_of_root != root) {
185                     mpi_errno =
186                         MPIR_Scatter_MV2_Direct(leader_scatter_buf,
187                                                 nbytes * local_size, MPI_BYTE,
188                                                 tmp_buf, nbytes * local_size,
189                                                 MPI_BYTE, leader_root,
190                                                 leader_comm);
191                 } else {
192                     mpi_errno =
193                         MPIR_Scatter_MV2_Direct(sendbuf, sendcnt * local_size,
194                                                 sendtype, tmp_buf,
195                                                 nbytes * local_size, MPI_BYTE,
196                                                 leader_root, leader_comm);
197
198                 }
199             }
200         }
201         /* The leaders are now done with the inter-leader part. Scatter the data within the nodes */
202
203         if (rank == root && recvbuf == MPI_IN_PLACE) {
204             mpi_errno = MV2_Scatter_intra_function(tmp_buf, nbytes, MPI_BYTE,
205                                                 (void *)sendbuf, sendcnt, sendtype,
206                                                 0, shmem_comm);
207         } else {
208             mpi_errno = MV2_Scatter_intra_function(tmp_buf, nbytes, MPI_BYTE,
209                                                 recvbuf, recvcnt, recvtype,
210                                                 0, shmem_comm);
211         }
212     }
213
214     /* check if multiple threads are calling this collective function */
215     if (comm_size != local_size && local_rank == 0) {
216         smpi_free_tmp_buffer(tmp_buf);
217         if (leader_of_root == rank && root != rank) {
218             smpi_free_tmp_buffer(leader_scatter_buf);
219         }
220     }
221     return (mpi_errno);
222 }
223
224
225 int scatter__mvapich2_two_level_binomial(const void *sendbuf,
226                                          int sendcnt,
227                                          MPI_Datatype sendtype,
228                                          void *recvbuf,
229                                          int recvcnt,
230                                          MPI_Datatype recvtype,
231                                          int root, MPI_Comm comm)
232 {
233     int comm_size, rank;
234     int local_rank, local_size;
235     int leader_comm_rank = -1, leader_comm_size = -1;
236     int mpi_errno = MPI_SUCCESS;
237     int recvtype_size, sendtype_size, nbytes;
238     unsigned char* tmp_buf            = nullptr;
239     unsigned char* leader_scatter_buf = nullptr;
240     MPI_Status status;
241     int leader_root = -1, leader_of_root = -1;
242     MPI_Comm shmem_comm, leader_comm;
243
244
245     //if not set (use of the algo directly, without mvapich2 selector)
246     if (MV2_Scatter_intra_function == nullptr)
247       MV2_Scatter_intra_function = scatter__mpich;
248
249     if(comm->get_leaders_comm()==MPI_COMM_NULL){
250       comm->init_smp();
251     }
252     comm_size = comm->size();
253     rank = comm->rank();
254
255     if (((rank == root) && (recvcnt == 0))
256         || ((rank != root) && (sendcnt == 0))) {
257         return MPI_SUCCESS;
258     }
259
260     /* extract the rank,size information for the intra-node
261      * communicator */
262     shmem_comm = comm->get_intra_comm();
263     local_rank = shmem_comm->rank();
264     local_size = shmem_comm->size();
265
266     if (local_rank == 0) {
267         /* Node leader. Extract the rank, size information for the leader
268          * communicator */
269         leader_comm = comm->get_leaders_comm();
270         leader_comm_size = leader_comm->size();
271         leader_comm_rank = leader_comm->rank();
272     }
273
274     if (local_size == comm_size) {
275         /* purely intra-node scatter. Just use the direct algorithm and we are done */
276         mpi_errno = MPIR_Scatter_MV2_Direct(sendbuf, sendcnt, sendtype,
277                                             recvbuf, recvcnt, recvtype,
278                                             root, comm);
279
280     } else {
281         recvtype_size=recvtype->size();
282         sendtype_size=sendtype->size();
283
284         if (rank == root) {
285             nbytes = sendcnt * sendtype_size;
286         } else {
287             nbytes = recvcnt * recvtype_size;
288         }
289
290         if (local_rank == 0) {
291             /* Node leader, allocate tmp_buffer */
292             tmp_buf = smpi_get_tmp_sendbuffer(nbytes * local_size);
293         }
294         leader_comm = comm->get_leaders_comm();
295         int* leaders_map = comm->get_leaders_map();
296         leader_of_root = comm->group()->rank(leaders_map[root]);
297         leader_root = leader_comm->group()->rank(leaders_map[root]);
298         /* leader_root is the rank of the leader of the root in leader_comm.
299          * leader_root is to be used as the root of the inter-leader gather ops
300          */
301
302         if ((local_rank == 0) && (root != rank)
303             && (leader_of_root == rank)) {
304             /* The root of the scatter operation is not the node leader. Recv
305              * data from the node leader */
306             leader_scatter_buf = smpi_get_tmp_sendbuffer(nbytes * comm_size);
307             Request::recv(leader_scatter_buf, nbytes * comm_size, MPI_BYTE,
308                              root, COLL_TAG_SCATTER, comm, &status);
309         }
310
311         if (rank == root && local_rank != 0) {
312             /* The root of the scatter operation is not the node leader. Send
313              * data to the node leader */
314             Request::send(sendbuf, sendcnt * comm_size, sendtype,
315                                      leader_of_root, COLL_TAG_SCATTER, comm);
316         }
317
318         if (leader_comm_size > 1 && local_rank == 0) {
319           if (not comm->is_uniform()) {
320             int* displs   = nullptr;
321             int* sendcnts = nullptr;
322             int* node_sizes;
323             int i      = 0;
324             node_sizes = comm->get_non_uniform_map();
325
326             if (root != leader_of_root) {
327               if (leader_comm_rank == leader_root) {
328                 displs      = new int[leader_comm_size];
329                 sendcnts    = new int[leader_comm_size];
330                 sendcnts[0] = node_sizes[0] * nbytes;
331                 displs[0]   = 0;
332
333                 for (i = 1; i < leader_comm_size; i++) {
334                   displs[i]   = displs[i - 1] + node_sizes[i - 1] * nbytes;
335                   sendcnts[i] = node_sizes[i] * nbytes;
336                 }
337               }
338               colls::scatterv(leader_scatter_buf, sendcnts, displs, MPI_BYTE, tmp_buf, nbytes * local_size, MPI_BYTE,
339                               leader_root, leader_comm);
340             } else {
341               if (leader_comm_rank == leader_root) {
342                 displs      = new int[leader_comm_size];
343                 sendcnts    = new int[leader_comm_size];
344                 sendcnts[0] = node_sizes[0] * sendcnt;
345                 displs[0]   = 0;
346
347                 for (i = 1; i < leader_comm_size; i++) {
348                   displs[i]   = displs[i - 1] + node_sizes[i - 1] * sendcnt;
349                   sendcnts[i] = node_sizes[i] * sendcnt;
350                 }
351               }
352               colls::scatterv(sendbuf, sendcnts, displs, sendtype, tmp_buf, nbytes * local_size, MPI_BYTE, leader_root,
353                               leader_comm);
354             }
355             if (leader_comm_rank == leader_root) {
356               delete[] displs;
357               delete[] sendcnts;
358             }
359             } else {
360                 if (leader_of_root != root) {
361                     mpi_errno =
362                         MPIR_Scatter_MV2_Binomial(leader_scatter_buf,
363                                                   nbytes * local_size, MPI_BYTE,
364                                                   tmp_buf, nbytes * local_size,
365                                                   MPI_BYTE, leader_root,
366                                                   leader_comm);
367                 } else {
368                     mpi_errno =
369                         MPIR_Scatter_MV2_Binomial(sendbuf, sendcnt * local_size,
370                                                   sendtype, tmp_buf,
371                                                   nbytes * local_size, MPI_BYTE,
372                                                   leader_root, leader_comm);
373
374                 }
375             }
376         }
377         /* The leaders are now done with the inter-leader part. Scatter the data within the nodes */
378
379         if (rank == root && recvbuf == MPI_IN_PLACE) {
380             mpi_errno = MV2_Scatter_intra_function(tmp_buf, nbytes, MPI_BYTE,
381                                                 (void *)sendbuf, sendcnt, sendtype,
382                                                 0, shmem_comm);
383         } else {
384             mpi_errno = MV2_Scatter_intra_function(tmp_buf, nbytes, MPI_BYTE,
385                                                 recvbuf, recvcnt, recvtype,
386                                                 0, shmem_comm);
387         }
388
389     }
390
391
392     /* check if multiple threads are calling this collective function */
393     if (comm_size != local_size && local_rank == 0) {
394         smpi_free_tmp_buffer(tmp_buf);
395         if (leader_of_root == rank && root != rank) {
396             smpi_free_tmp_buffer(leader_scatter_buf);
397         }
398     }
399
400     return (mpi_errno);
401 }
402
403 } // namespace simgrid::smpi