1 /* Copyright (c) 2013-2017. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
8 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
9 * University Research and Technology
10 * Corporation. All rights reserved.
11 * Copyright (c) 2004-2009 The University of Tennessee and The University
12 * of Tennessee Research Foundation. All rights
14 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
15 * University of Stuttgart. All rights reserved.
16 * Copyright (c) 2004-2005 The Regents of the University of California.
17 * All rights reserved.
19 * Additional copyrights may follow
21 /* -*- Mode: C; c-basic-offset:4 ; -*- */
22 /* Copyright (c) 2001-2014, The Ohio State University. All rights
25 * This file is part of the MVAPICH2 software package developed by the
26 * team members of The Ohio State University's Network-Based Computing
27 * Laboratory (NBCL), headed by Professor Dhabaleswar K. (DK) Panda.
29 * For detailed copyright and licensing information, please refer to the
30 * copyright file COPYRIGHT in the top level MVAPICH2 directory.
34 * (C) 2001 by Argonne National Laboratory.
35 * See COPYRIGHT in top-level directory.
37 #include "../colls_private.h"
39 #define MPIR_Scatter_MV2_Binomial Coll_scatter_ompi_binomial::scatter
40 #define MPIR_Scatter_MV2_Direct Coll_scatter_ompi_basic_linear::scatter
42 extern int (*MV2_Scatter_intra_function) (void *sendbuf, int sendcount, MPI_Datatype sendtype,
43 void *recvbuf, int recvcount, MPI_Datatype recvtype,
44 int root, MPI_Comm comm);
49 int Coll_scatter_mvapich2_two_level_direct::scatter(void *sendbuf,
51 MPI_Datatype sendtype,
54 MPI_Datatype recvtype,
55 int root, MPI_Comm comm)
58 int local_rank, local_size;
59 int leader_comm_rank = -1, leader_comm_size = -1;
60 int mpi_errno = MPI_SUCCESS;
61 int recvtype_size, sendtype_size, nbytes;
63 void *leader_scatter_buf = NULL;
65 int leader_root, leader_of_root = -1;
66 MPI_Comm shmem_comm, leader_comm;
67 //if not set (use of the algo directly, without mvapich2 selector)
68 if(MV2_Scatter_intra_function==NULL)
69 MV2_Scatter_intra_function=Coll_scatter_mpich::scatter;
71 if(comm->get_leaders_comm()==MPI_COMM_NULL){
74 comm_size = comm->size();
77 if (((rank == root) && (recvcnt == 0))
78 || ((rank != root) && (sendcnt == 0))) {
82 /* extract the rank,size information for the intra-node
84 shmem_comm = comm->get_intra_comm();
85 local_rank = shmem_comm->rank();
86 local_size = shmem_comm->size();
88 if (local_rank == 0) {
89 /* Node leader. Extract the rank, size information for the leader
91 leader_comm = comm->get_leaders_comm();
92 leader_comm_size = leader_comm->size();
93 leader_comm_rank = leader_comm->rank();
96 if (local_size == comm_size) {
97 /* purely intra-node scatter. Just use the direct algorithm and we are done */
98 mpi_errno = MPIR_Scatter_MV2_Direct(sendbuf, sendcnt, sendtype,
99 recvbuf, recvcnt, recvtype,
103 recvtype_size=recvtype->size();
104 sendtype_size=sendtype->size();
107 nbytes = sendcnt * sendtype_size;
109 nbytes = recvcnt * recvtype_size;
112 if (local_rank == 0) {
113 /* Node leader, allocate tmp_buffer */
114 tmp_buf = smpi_get_tmp_sendbuffer(nbytes * local_size);
117 leader_comm = comm->get_leaders_comm();
118 int* leaders_map = comm->get_leaders_map();
119 leader_of_root = comm->group()->rank(leaders_map[root]);
120 leader_root = leader_comm->group()->rank(leaders_map[root]);
121 /* leader_root is the rank of the leader of the root in leader_comm.
122 * leader_root is to be used as the root of the inter-leader gather ops
125 if ((local_rank == 0) && (root != rank)
126 && (leader_of_root == rank)) {
127 /* The root of the scatter operation is not the node leader. Recv
128 * data from the node leader */
129 leader_scatter_buf = smpi_get_tmp_sendbuffer(nbytes * comm_size);
130 Request::recv(leader_scatter_buf, nbytes * comm_size, MPI_BYTE,
131 root, COLL_TAG_SCATTER, comm, &status);
135 if (rank == root && local_rank != 0) {
136 /* The root of the scatter operation is not the node leader. Send
137 * data to the node leader */
138 Request::send(sendbuf, sendcnt * comm_size, sendtype,
139 leader_of_root, COLL_TAG_SCATTER, comm
143 if (leader_comm_size > 1 && local_rank == 0) {
144 if (not comm->is_uniform()) {
146 int* sendcnts = NULL;
149 node_sizes = comm->get_non_uniform_map();
151 if (root != leader_of_root) {
152 if (leader_comm_rank == leader_root) {
153 displs = static_cast<int*>(xbt_malloc(sizeof(int) * leader_comm_size));
154 sendcnts = static_cast<int*>(xbt_malloc(sizeof(int) * leader_comm_size));
155 sendcnts[0] = node_sizes[0] * nbytes;
158 for (i = 1; i < leader_comm_size; i++) {
159 displs[i] = displs[i - 1] + node_sizes[i - 1] * nbytes;
160 sendcnts[i] = node_sizes[i] * nbytes;
163 Colls::scatterv(leader_scatter_buf, sendcnts, displs, MPI_BYTE, tmp_buf, nbytes * local_size, MPI_BYTE,
164 leader_root, leader_comm);
166 if (leader_comm_rank == leader_root) {
167 displs = static_cast<int*>(xbt_malloc(sizeof(int) * leader_comm_size));
168 sendcnts = static_cast<int*>(xbt_malloc(sizeof(int) * leader_comm_size));
169 sendcnts[0] = node_sizes[0] * sendcnt;
172 for (i = 1; i < leader_comm_size; i++) {
173 displs[i] = displs[i - 1] + node_sizes[i - 1] * sendcnt;
174 sendcnts[i] = node_sizes[i] * sendcnt;
177 Colls::scatterv(sendbuf, sendcnts, displs, sendtype, tmp_buf, nbytes * local_size, MPI_BYTE, leader_root,
180 if (leader_comm_rank == leader_root) {
185 if (leader_of_root != root) {
187 MPIR_Scatter_MV2_Direct(leader_scatter_buf,
188 nbytes * local_size, MPI_BYTE,
189 tmp_buf, nbytes * local_size,
190 MPI_BYTE, leader_root,
194 MPIR_Scatter_MV2_Direct(sendbuf, sendcnt * local_size,
196 nbytes * local_size, MPI_BYTE,
197 leader_root, leader_comm);
202 /* The leaders are now done with the inter-leader part. Scatter the data within the nodes */
204 if (rank == root && recvbuf == MPI_IN_PLACE) {
205 mpi_errno = MV2_Scatter_intra_function(tmp_buf, nbytes, MPI_BYTE,
206 (void *)sendbuf, sendcnt, sendtype,
209 mpi_errno = MV2_Scatter_intra_function(tmp_buf, nbytes, MPI_BYTE,
210 recvbuf, recvcnt, recvtype,
215 /* check if multiple threads are calling this collective function */
216 if (comm_size != local_size && local_rank == 0) {
217 smpi_free_tmp_buffer(tmp_buf);
218 if (leader_of_root == rank && root != rank) {
219 smpi_free_tmp_buffer(leader_scatter_buf);
226 int Coll_scatter_mvapich2_two_level_binomial::scatter(void *sendbuf,
228 MPI_Datatype sendtype,
231 MPI_Datatype recvtype,
232 int root, MPI_Comm comm)
235 int local_rank, local_size;
236 int leader_comm_rank = -1, leader_comm_size = -1;
237 int mpi_errno = MPI_SUCCESS;
238 int recvtype_size, sendtype_size, nbytes;
239 void *tmp_buf = NULL;
240 void *leader_scatter_buf = NULL;
242 int leader_root = -1, leader_of_root = -1;
243 MPI_Comm shmem_comm, leader_comm;
246 //if not set (use of the algo directly, without mvapich2 selector)
247 if(MV2_Scatter_intra_function==NULL)
248 MV2_Scatter_intra_function=Coll_scatter_mpich::scatter;
250 if(comm->get_leaders_comm()==MPI_COMM_NULL){
253 comm_size = comm->size();
256 if (((rank == root) && (recvcnt == 0))
257 || ((rank != root) && (sendcnt == 0))) {
261 /* extract the rank,size information for the intra-node
263 shmem_comm = comm->get_intra_comm();
264 local_rank = shmem_comm->rank();
265 local_size = shmem_comm->size();
267 if (local_rank == 0) {
268 /* Node leader. Extract the rank, size information for the leader
270 leader_comm = comm->get_leaders_comm();
271 leader_comm_size = leader_comm->size();
272 leader_comm_rank = leader_comm->rank();
275 if (local_size == comm_size) {
276 /* purely intra-node scatter. Just use the direct algorithm and we are done */
277 mpi_errno = MPIR_Scatter_MV2_Direct(sendbuf, sendcnt, sendtype,
278 recvbuf, recvcnt, recvtype,
282 recvtype_size=recvtype->size();
283 sendtype_size=sendtype->size();
286 nbytes = sendcnt * sendtype_size;
288 nbytes = recvcnt * recvtype_size;
291 if (local_rank == 0) {
292 /* Node leader, allocate tmp_buffer */
293 tmp_buf = smpi_get_tmp_sendbuffer(nbytes * local_size);
295 leader_comm = comm->get_leaders_comm();
296 int* leaders_map = comm->get_leaders_map();
297 leader_of_root = comm->group()->rank(leaders_map[root]);
298 leader_root = leader_comm->group()->rank(leaders_map[root]);
299 /* leader_root is the rank of the leader of the root in leader_comm.
300 * leader_root is to be used as the root of the inter-leader gather ops
303 if ((local_rank == 0) && (root != rank)
304 && (leader_of_root == rank)) {
305 /* The root of the scatter operation is not the node leader. Recv
306 * data from the node leader */
307 leader_scatter_buf = smpi_get_tmp_sendbuffer(nbytes * comm_size);
308 Request::recv(leader_scatter_buf, nbytes * comm_size, MPI_BYTE,
309 root, COLL_TAG_SCATTER, comm, &status);
312 if (rank == root && local_rank != 0) {
313 /* The root of the scatter operation is not the node leader. Send
314 * data to the node leader */
315 Request::send(sendbuf, sendcnt * comm_size, sendtype,
316 leader_of_root, COLL_TAG_SCATTER, comm);
319 if (leader_comm_size > 1 && local_rank == 0) {
320 if (not comm->is_uniform()) {
322 int* sendcnts = NULL;
325 node_sizes = comm->get_non_uniform_map();
327 if (root != leader_of_root) {
328 if (leader_comm_rank == leader_root) {
329 displs = static_cast<int*>(xbt_malloc(sizeof(int) * leader_comm_size));
330 sendcnts = static_cast<int*>(xbt_malloc(sizeof(int) * leader_comm_size));
331 sendcnts[0] = node_sizes[0] * nbytes;
334 for (i = 1; i < leader_comm_size; i++) {
335 displs[i] = displs[i - 1] + node_sizes[i - 1] * nbytes;
336 sendcnts[i] = node_sizes[i] * nbytes;
339 Colls::scatterv(leader_scatter_buf, sendcnts, displs, MPI_BYTE, tmp_buf, nbytes * local_size, MPI_BYTE,
340 leader_root, leader_comm);
342 if (leader_comm_rank == leader_root) {
343 displs = static_cast<int*>(xbt_malloc(sizeof(int) * leader_comm_size));
344 sendcnts = static_cast<int*>(xbt_malloc(sizeof(int) * leader_comm_size));
345 sendcnts[0] = node_sizes[0] * sendcnt;
348 for (i = 1; i < leader_comm_size; i++) {
349 displs[i] = displs[i - 1] + node_sizes[i - 1] * sendcnt;
350 sendcnts[i] = node_sizes[i] * sendcnt;
353 Colls::scatterv(sendbuf, sendcnts, displs, sendtype, tmp_buf, nbytes * local_size, MPI_BYTE, leader_root,
356 if (leader_comm_rank == leader_root) {
361 if (leader_of_root != root) {
363 MPIR_Scatter_MV2_Binomial(leader_scatter_buf,
364 nbytes * local_size, MPI_BYTE,
365 tmp_buf, nbytes * local_size,
366 MPI_BYTE, leader_root,
370 MPIR_Scatter_MV2_Binomial(sendbuf, sendcnt * local_size,
372 nbytes * local_size, MPI_BYTE,
373 leader_root, leader_comm);
378 /* The leaders are now done with the inter-leader part. Scatter the data within the nodes */
380 if (rank == root && recvbuf == MPI_IN_PLACE) {
381 mpi_errno = MV2_Scatter_intra_function(tmp_buf, nbytes, MPI_BYTE,
382 (void *)sendbuf, sendcnt, sendtype,
385 mpi_errno = MV2_Scatter_intra_function(tmp_buf, nbytes, MPI_BYTE,
386 recvbuf, recvcnt, recvtype,
393 /* check if multiple threads are calling this collective function */
394 if (comm_size != local_size && local_rank == 0) {
395 smpi_free_tmp_buffer(tmp_buf);
396 if (leader_of_root == rank && root != rank) {
397 smpi_free_tmp_buffer(leader_scatter_buf);