1 /* Copyright (c) 2013-2018. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
8 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
9 * University Research and Technology
10 * Corporation. All rights reserved.
11 * Copyright (c) 2004-2012 The University of Tennessee and The University
12 * of Tennessee Research Foundation. All rights
14 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
15 * University of Stuttgart. All rights reserved.
16 * Copyright (c) 2004-2005 The Regents of the University of California.
17 * All rights reserved.
18 * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
19 * Copyright (c) 2009 University of Houston. All rights reserved.
21 * Additional copyrights may follow
26 * (C) 2001 by Argonne National Laboratory.
27 * See COPYRIGHT in top-level directory.
29 /* Copyright (c) 2001-2014, The Ohio State University. All rights
32 * This file is part of the MVAPICH2 software package developed by the
33 * team members of The Ohio State University's Network-Based Computing
34 * Laboratory (NBCL), headed by Professor Dhabaleswar K. (DK) Panda.
36 * For detailed copyright and licensing information, please refer to the
37 * copyright file COPYRIGHT in the top level MVAPICH2 directory.
41 #include "../colls_private.hpp"
44 extern int mv2_reduce_intra_knomial_factor;
45 extern int mv2_reduce_inter_knomial_factor;
47 #define SMPI_DEFAULT_KNOMIAL_FACTOR 4
49 // int mv2_reduce_knomial_factor = 2;
53 static int MPIR_Reduce_knomial_trace(int root, int reduce_knomial_factor,
54 MPI_Comm comm, int *dst, int *expected_send_count,
55 int *expected_recv_count, int **src_array)
57 int mask=0x1, k, comm_size, src, rank, relative_rank, lroot=0;
59 int recv_iter=0, send_iter=0;
60 int *knomial_reduce_src_array=NULL;
61 comm_size = comm->size();
65 relative_rank = (rank - lroot + comm_size) % comm_size;
67 /* First compute to whom we need to send data */
68 while (mask < comm_size) {
69 if (relative_rank % (reduce_knomial_factor*mask)) {
70 *dst = relative_rank/(reduce_knomial_factor*mask)*
71 (reduce_knomial_factor*mask)+root;
72 if (*dst >= comm_size) {
78 mask *= reduce_knomial_factor;
80 mask /= reduce_knomial_factor;
82 /* Now compute how many children we have in the knomial-tree */
85 for(k=1;k<reduce_knomial_factor;k++) {
86 if (relative_rank + mask*k < comm_size) {
90 mask /= reduce_knomial_factor;
93 /* Finally, fill up the src array */
95 knomial_reduce_src_array = static_cast<int*>(smpi_get_tmp_sendbuffer(sizeof(int)*recv_iter));
101 for(k=1;k<reduce_knomial_factor;k++) {
102 if (relative_rank + mask*k < comm_size) {
104 if (src >= comm_size) {
107 knomial_reduce_src_array[recv_iter++] = src;
110 mask /= reduce_knomial_factor;
113 *expected_recv_count = recv_iter;
114 *expected_send_count = send_iter;
115 *src_array = knomial_reduce_src_array;
121 int Coll_reduce_mvapich2_knomial::reduce (
125 MPI_Datatype datatype,
130 int mpi_errno = MPI_SUCCESS;
131 int rank, is_commutative;
133 MPI_Request send_request;
135 MPI_Aint true_lb, true_extent, extent;
137 int recv_iter=0, dst=-1, expected_send_count, expected_recv_count;
140 MPI_Request *requests=NULL;
143 if (count == 0) return MPI_SUCCESS;
147 /* Create a temporary buffer */
149 datatype->extent(&true_lb, &true_extent);
150 extent = datatype->get_extent();
152 is_commutative = (op==MPI_OP_NULL || op->is_commutative());
155 recvbuf = (void*)smpi_get_tmp_recvbuffer(count * std::max(extent, true_extent));
156 recvbuf = (void *)((char*)recvbuf - true_lb);
159 if ((rank != root) || (sendbuf != MPI_IN_PLACE)) {
160 mpi_errno = Datatype::copy(sendbuf, count, datatype, recvbuf,
165 if(mv2_reduce_intra_knomial_factor<0)
167 mv2_reduce_intra_knomial_factor = SMPI_DEFAULT_KNOMIAL_FACTOR;
169 if(mv2_reduce_inter_knomial_factor<0)
171 mv2_reduce_inter_knomial_factor = SMPI_DEFAULT_KNOMIAL_FACTOR;
175 MPIR_Reduce_knomial_trace(root, mv2_reduce_intra_knomial_factor, comm,
176 &dst, &expected_send_count, &expected_recv_count, &src_array);
178 if(expected_recv_count > 0 ) {
179 tmp_buf = static_cast<void**>(xbt_malloc(sizeof(void *)*expected_recv_count));
180 requests = static_cast<MPI_Request*>(xbt_malloc(sizeof(MPI_Request)*expected_recv_count));
181 for(k=0; k < expected_recv_count; k++ ) {
182 tmp_buf[k] = smpi_get_tmp_sendbuffer(count * std::max(extent, true_extent));
183 tmp_buf[k] = (void *)((char*)tmp_buf[k] - true_lb);
186 while(recv_iter < expected_recv_count) {
187 src = src_array[expected_recv_count - (recv_iter+1)];
189 requests[recv_iter]=Request::irecv (tmp_buf[recv_iter], count, datatype ,src,
190 COLL_TAG_REDUCE, comm);
196 while(recv_iter < expected_recv_count) {
197 index=Request::waitany(expected_recv_count, requests,
201 if (is_commutative) {
202 if(op!=MPI_OP_NULL) op->apply( tmp_buf[index], recvbuf, &count, datatype);
206 for(k=0; k < expected_recv_count; k++ ) {
207 smpi_free_tmp_buffer(tmp_buf[k]);
213 if(src_array != NULL) {
218 send_request=Request::isend(recvbuf,count, datatype, dst,
219 COLL_TAG_REDUCE,comm);
221 Request::waitall(1, &send_request, &status);
223 smpi_free_tmp_buffer((void *)((char*)recvbuf + true_lb));
226 /* --END ERROR HANDLING-- */