Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
please codacy: use long form of negation in C++
[simgrid.git] / src / smpi / colls / bcast / bcast-mvapich-smp.cpp
1 /* Copyright (c) 2013-2017. The SimGrid Team.
2  * All rights reserved.                                                     */
3
4 /* This program is free software; you can redistribute it and/or modify it
5  * under the terms of the license (GNU LGPL) which comes with this package. */
6
7 /*
8  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
9  *                         University Research and Technology
10  *                         Corporation.  All rights reserved.
11  * Copyright (c) 2004-2009 The University of Tennessee and The University
12  *                         of Tennessee Research Foundation.  All rights
13  *                         reserved.
14  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
15  *                         University of Stuttgart.  All rights reserved.
16  * Copyright (c) 2004-2005 The Regents of the University of California.
17  *                         All rights reserved.
18  *
19  * Additional copyrights may follow
20  */
21  /* -*- Mode: C; c-basic-offset:4 ; -*- */
22 /* Copyright (c) 2001-2014, The Ohio State University. All rights
23  * reserved.
24  *
25  * This file is part of the MVAPICH2 software package developed by the
26  * team members of The Ohio State University's Network-Based Computing
27  * Laboratory (NBCL), headed by Professor Dhabaleswar K. (DK) Panda.
28  *
29  * For detailed copyright and licensing information, please refer to the
30  * copyright file COPYRIGHT in the top level MVAPICH2 directory.
31  */
32 /*
33  *
34  *  (C) 2001 by Argonne National Laboratory.
35  *      See COPYRIGHT in top-level directory.
36  */
37 #include "../colls_private.h"
38
39
40 extern int (*MV2_Bcast_function) (void *buffer, int count, MPI_Datatype datatype,
41                            int root, MPI_Comm comm_ptr);
42
43 extern int (*MV2_Bcast_intra_node_function) (void *buffer, int count, MPI_Datatype datatype,
44                                       int root, MPI_Comm comm_ptr);
45                                       
46 extern int zcpy_knomial_factor;
47 extern int mv2_pipelined_zcpy_knomial_factor;
48 extern int bcast_segment_size;
49 extern int mv2_inter_node_knomial_factor;
50 extern int mv2_intra_node_knomial_factor;
51 extern int mv2_bcast_two_level_system_size;
52 #define INTRA_NODE_ROOT 0
53
54 #define MPIR_Pipelined_Bcast_Zcpy_MV2 Coll_bcast_mpich::bcast
55 #define MPIR_Pipelined_Bcast_MV2 Coll_bcast_mpich::bcast
56 #define MPIR_Bcast_binomial_MV2 Coll_bcast_binomial_tree::bcast
57 #define MPIR_Bcast_scatter_ring_allgather_shm_MV2 Coll_bcast_scatter_LR_allgather::bcast
58 #define MPIR_Bcast_scatter_doubling_allgather_MV2 Coll_bcast_scatter_rdb_allgather::bcast
59 #define MPIR_Bcast_scatter_ring_allgather_MV2 Coll_bcast_scatter_LR_allgather::bcast
60 #define MPIR_Shmem_Bcast_MV2 Coll_bcast_mpich::bcast
61 #define MPIR_Bcast_tune_inter_node_helper_MV2 Coll_bcast_mvapich2_inter_node::bcast
62 #define MPIR_Bcast_inter_node_helper_MV2 Coll_bcast_mvapich2_inter_node::bcast
63 #define MPIR_Knomial_Bcast_intra_node_MV2 Coll_bcast_mvapich2_knomial_intra_node::bcast
64 #define MPIR_Bcast_intra_MV2 Coll_bcast_mvapich2_intra_node::bcast
65
66 extern int zcpy_knomial_factor;
67 extern int mv2_pipelined_zcpy_knomial_factor;
68 extern int bcast_segment_size;
69 extern int mv2_inter_node_knomial_factor;
70 extern int mv2_intra_node_knomial_factor;
71 #define mv2_bcast_two_level_system_size  64
72 #define mv2_bcast_short_msg             16384
73 #define mv2_bcast_large_msg            512*1024
74 #define mv2_knomial_intra_node_threshold 131072
75 #define mv2_scatter_rd_inter_leader_bcast 1
76 namespace simgrid{
77 namespace smpi{
78 int Coll_bcast_mvapich2_inter_node::bcast(void *buffer,
79                                                  int count,
80                                                  MPI_Datatype datatype,
81                                                  int root,
82                                                  MPI_Comm  comm)
83 {
84     int rank;
85     int mpi_errno = MPI_SUCCESS;
86     MPI_Comm shmem_comm, leader_comm;
87     int local_rank, local_size, global_rank = -1;
88     int leader_root, leader_of_root;
89
90
91     rank = comm->rank();
92     //comm_size = comm->size();
93
94
95     if (MV2_Bcast_function==NULL){
96       MV2_Bcast_function=Coll_bcast_mpich::bcast;
97     }
98     
99     if (MV2_Bcast_intra_node_function==NULL){
100       MV2_Bcast_intra_node_function= Coll_bcast_mpich::bcast;
101     }
102     
103     if(comm->get_leaders_comm()==MPI_COMM_NULL){
104       comm->init_smp();
105     }
106     
107     shmem_comm = comm->get_intra_comm();
108     local_rank = shmem_comm->rank();
109     local_size = shmem_comm->size();
110
111     leader_comm = comm->get_leaders_comm();
112
113     if ((local_rank == 0) && (local_size > 1)) {
114       global_rank = leader_comm->rank();
115     }
116
117     int* leaders_map = comm->get_leaders_map();
118     leader_of_root = comm->group()->rank(leaders_map[root]);
119     leader_root = leader_comm->group()->rank(leaders_map[root]);
120     
121     
122     if (local_size > 1) {
123         if ((local_rank == 0) && (root != rank) && (leader_root == global_rank)) {
124             Request::recv(buffer, count, datatype, root,
125                                      COLL_TAG_BCAST, comm, MPI_STATUS_IGNORE);
126         }
127         if ((local_rank != 0) && (root == rank)) {
128             Request::send(buffer, count, datatype,
129                                      leader_of_root, COLL_TAG_BCAST, comm);
130         }
131     }
132 #if defined(_MCST_SUPPORT_)
133     if (comm_ptr->ch.is_mcast_ok) {
134         mpi_errno = MPIR_Mcast_inter_node_MV2(buffer, count, datatype, root, comm_ptr,
135                                               errflag);
136         if (mpi_errno == MPI_SUCCESS) {
137             goto fn_exit;
138         } else {
139             goto fn_fail;
140         }
141     }
142 #endif
143 /*
144     if (local_rank == 0) {
145         leader_comm = comm->get_leaders_comm();
146         root = leader_root;
147     }
148
149     if (MV2_Bcast_function == &MPIR_Pipelined_Bcast_MV2) {
150         mpi_errno = MPIR_Pipelined_Bcast_MV2(buffer, count, datatype,
151                                              root, comm);
152     } else if (MV2_Bcast_function == &MPIR_Bcast_scatter_ring_allgather_shm_MV2) {
153         mpi_errno = MPIR_Bcast_scatter_ring_allgather_shm_MV2(buffer, count,
154                                                               datatype, root,
155                                                               comm);
156     } else */{
157         if (local_rank == 0) {
158       /*      if (MV2_Bcast_function == &MPIR_Knomial_Bcast_inter_node_wrapper_MV2) {
159                 mpi_errno = MPIR_Knomial_Bcast_inter_node_wrapper_MV2(buffer, count,
160                                                               datatype, root,
161                                                               comm);
162             } else {*/
163                 mpi_errno = MV2_Bcast_function(buffer, count, datatype,
164                                                leader_root, leader_comm);
165           //  }
166         }
167     }
168
169     return mpi_errno;
170 }
171
172
173 int Coll_bcast_mvapich2_knomial_intra_node::bcast(void *buffer,
174                                       int count,
175                                       MPI_Datatype datatype,
176                                       int root, MPI_Comm  comm)
177 {
178     int local_size = 0, rank;
179     int mpi_errno = MPI_SUCCESS;
180     MPI_Request *reqarray = NULL;
181     MPI_Status *starray = NULL;
182     int src, dst, mask, relative_rank;
183     int k;
184     if (MV2_Bcast_function==NULL){
185       MV2_Bcast_function=Coll_bcast_mpich::bcast;
186     }
187     
188     if (MV2_Bcast_intra_node_function==NULL){
189       MV2_Bcast_intra_node_function= Coll_bcast_mpich::bcast;
190     }
191     
192     if(comm->get_leaders_comm()==MPI_COMM_NULL){
193       comm->init_smp();
194     }
195     
196     local_size = comm->size();
197     rank = comm->rank();
198
199
200     reqarray=(MPI_Request *)xbt_malloc(2 * mv2_intra_node_knomial_factor * sizeof (MPI_Request));
201
202     starray=(MPI_Status *)xbt_malloc(2 * mv2_intra_node_knomial_factor * sizeof (MPI_Status));
203
204     /* intra-node k-nomial bcast  */
205     if (local_size > 1) {
206         relative_rank = (rank >= root) ? rank - root : rank - root + local_size;
207         mask = 0x1;
208
209         while (mask < local_size) {
210             if (relative_rank % (mv2_intra_node_knomial_factor * mask)) {
211                 src = relative_rank / (mv2_intra_node_knomial_factor * mask) *
212                     (mv2_intra_node_knomial_factor * mask) + root;
213                 if (src >= local_size) {
214                     src -= local_size;
215                 }
216
217                 Request::recv(buffer, count, datatype, src,
218                                          COLL_TAG_BCAST, comm,
219                                          MPI_STATUS_IGNORE);
220                 break;
221             }
222             mask *= mv2_intra_node_knomial_factor;
223         }
224         mask /= mv2_intra_node_knomial_factor;
225
226         while (mask > 0) {
227             int reqs = 0;
228             for (k = 1; k < mv2_intra_node_knomial_factor; k++) {
229                 if (relative_rank + mask * k < local_size) {
230                     dst = rank + mask * k;
231                     if (dst >= local_size) {
232                         dst -= local_size;
233                     }
234                     reqarray[reqs++]=Request::isend(buffer, count, datatype, dst,
235                                               COLL_TAG_BCAST, comm);
236                 }
237             }
238             Request::waitall(reqs, reqarray, starray);
239
240             mask /= mv2_intra_node_knomial_factor;
241         }
242     }
243     xbt_free(reqarray);
244     xbt_free(starray);
245     return mpi_errno;
246 }
247
248
249 int Coll_bcast_mvapich2_intra_node::bcast(void *buffer,
250                          int count,
251                          MPI_Datatype datatype,
252                          int root, MPI_Comm  comm)
253 {
254     int mpi_errno = MPI_SUCCESS;
255     int comm_size;
256     int two_level_bcast = 1;
257     size_t nbytes = 0; 
258     int is_homogeneous, is_contig;
259     MPI_Aint type_size;
260     void *tmp_buf = NULL;
261     MPI_Comm shmem_comm;
262
263     if (count == 0)
264         return MPI_SUCCESS;
265     if (MV2_Bcast_function==NULL){
266       MV2_Bcast_function=Coll_bcast_mpich::bcast;
267     }
268     
269     if (MV2_Bcast_intra_node_function==NULL){
270       MV2_Bcast_intra_node_function= Coll_bcast_mpich::bcast;
271     }
272     
273     if(comm->get_leaders_comm()==MPI_COMM_NULL){
274       comm->init_smp();
275     }
276     
277     comm_size = comm->size();
278    // rank = comm->rank();
279 /*
280     if (HANDLE_GET_KIND(datatype) == HANDLE_KIND_BUILTIN)*/
281         is_contig = 1;
282 /*    else {
283         MPID_Datatype_get_ptr(datatype, dtp);
284         is_contig = dtp->is_contig;
285     }
286 */
287     is_homogeneous = 1;
288 #ifdef MPID_HAS_HETERO
289     if (comm_ptr->is_hetero)
290         is_homogeneous = 0;
291 #endif
292
293     /* MPI_Type_size() might not give the accurate size of the packed
294      * datatype for heterogeneous systems (because of padding, encoding,
295      * etc). On the other hand, MPI_Pack_size() can become very
296      * expensive, depending on the implementation, especially for
297      * heterogeneous systems. We want to use MPI_Type_size() wherever
298      * possible, and MPI_Pack_size() in other places.
299      */
300     //if (is_homogeneous) {
301         type_size=datatype->size();
302     //}
303 /*    else {*/
304 /*        MPIR_Pack_size_impl(1, datatype, &type_size);*/
305 /*    }*/
306     nbytes = (size_t) (count) * (type_size);
307     if (comm_size <= mv2_bcast_two_level_system_size) {
308         if (nbytes > mv2_bcast_short_msg && nbytes < mv2_bcast_large_msg) {
309             two_level_bcast = 1;
310         } else {
311             two_level_bcast = 0;
312         }
313     }
314
315     if (two_level_bcast == 1
316 #if defined(_MCST_SUPPORT_)
317             || comm_ptr->ch.is_mcast_ok
318 #endif
319         ) {
320
321       if (not is_contig || not is_homogeneous) {
322         tmp_buf = (void*)smpi_get_tmp_sendbuffer(nbytes);
323
324         /* TODO: Pipeline the packing and communication */
325         // position = 0;
326         /*            if (rank == root) {*/
327         /*                mpi_errno =*/
328         /*                    MPIR_Pack_impl(buffer, count, datatype, tmp_buf, nbytes, &position);*/
329         /*                if (mpi_errno)*/
330         /*                    MPIU_ERR_POP(mpi_errno);*/
331         /*            }*/
332         }
333
334         shmem_comm = comm->get_intra_comm();
335         if (not is_contig || not is_homogeneous) {
336           mpi_errno = MPIR_Bcast_inter_node_helper_MV2(tmp_buf, nbytes, MPI_BYTE, root, comm);
337         } else {
338             mpi_errno =
339                 MPIR_Bcast_inter_node_helper_MV2(buffer, count, datatype, root,
340                                                  comm);
341         }
342
343         /* We are now done with the inter-node phase */
344             if (nbytes <= mv2_knomial_intra_node_threshold) {
345               if (not is_contig || not is_homogeneous) {
346                 mpi_errno = MPIR_Shmem_Bcast_MV2(tmp_buf, nbytes, MPI_BYTE, root, shmem_comm);
347                 } else {
348                     mpi_errno = MPIR_Shmem_Bcast_MV2(buffer, count, datatype,
349                                                      root, shmem_comm);
350                 }
351             } else {
352               if (not is_contig || not is_homogeneous) {
353                 mpi_errno = MPIR_Knomial_Bcast_intra_node_MV2(tmp_buf, nbytes, MPI_BYTE, INTRA_NODE_ROOT, shmem_comm);
354                 } else {
355                     mpi_errno =
356                         MPIR_Knomial_Bcast_intra_node_MV2(buffer, count,
357                                                           datatype,
358                                                           INTRA_NODE_ROOT,
359                                                           shmem_comm);
360                 }
361             }
362
363     } else {
364         if (nbytes <= mv2_bcast_short_msg) {
365             mpi_errno = MPIR_Bcast_binomial_MV2(buffer, count, datatype, root,
366                                                 comm);
367         } else {
368             if (mv2_scatter_rd_inter_leader_bcast) {
369                 mpi_errno = MPIR_Bcast_scatter_ring_allgather_MV2(buffer, count,
370                                                                   datatype,
371                                                                   root,
372                                                                   comm);
373             } else {
374                 mpi_errno =
375                     MPIR_Bcast_scatter_doubling_allgather_MV2(buffer, count,
376                                                               datatype, root,
377                                                               comm);
378             }
379         }
380     }
381
382
383     return mpi_errno;
384
385 }
386
387 }
388 }