Exa.: size=13 ==> n=3, r=5 (i.e. size == 13 == 2**n+r == 2**3 + 5)
- The algorithm needs for the execution of one Colls::reduce
+ The algorithm needs for the execution of one colls::reduce
- for r==0
exec_time = n*(L1+L2) + buf_lng * (1-1/2**n) * (T1 + T2 + O/d)
7: { [(a+b)+(c+d)] + [(e+f)+(g+h)] } + { [(i+j)+k] + [l+m] } for H
-For Colls::allreduce:
+For colls::allreduce:
------------------
Step 6.1)
on all nodes 0..12
-For Colls::reduce:
+For colls::reduce:
---------------
Step 6.0)
otherwise the new protocol is used (see variable Ldb).
3) These lines show the bandwidth (= buffer length / execution time)
for both protocols.
- 4) This line shows that the limit is choosen well if the ratio is
- between 0.95 (loosing 5% for buffer length near and >=limit)
+ 4) This line shows that the limit is chosen well if the ratio is
+ between 0.95 (losing 5% for buffer length near and >=limit)
and 1.10 (not gaining 10% for buffer length near and <limit).
5) This line shows that the new protocol is 2..7 times faster
for long counts.
REDUCE_LIMITS
namespace simgrid{
namespace smpi{
-static int MPI_I_anyReduce(const void* Sendbuf, void* Recvbuf, int count, MPI_Datatype mpi_datatype, MPI_Op mpi_op, int root, MPI_Comm comm, int is_all)
+static int MPI_I_anyReduce(const void* Sendbuf, void* Recvbuf, int count, MPI_Datatype mpi_datatype, MPI_Op mpi_op,
+ int root, MPI_Comm comm, bool is_all)
{
char *scr1buf, *scr2buf, *scr3buf, *xxx, *sendbuf, *recvbuf;
int myrank, size, x_base, x_size, computed, idx;
} /* new_prot */
/*otherwise:*/
if (is_all)
- return( Colls::allreduce(Sendbuf, Recvbuf, count, mpi_datatype, mpi_op, comm) );
+ return (colls::allreduce(Sendbuf, Recvbuf, count, mpi_datatype, mpi_op, comm));
else
- return( Colls::reduce(Sendbuf,Recvbuf, count,mpi_datatype,mpi_op, root, comm) );
+ return (colls::reduce(Sendbuf, Recvbuf, count, mpi_datatype, mpi_op, root, comm));
}
#endif /*REDUCE_LIMITS*/
-int Coll_reduce_rab::reduce(const void* Sendbuf, void* Recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm)
+int reduce__rab(const void* Sendbuf, void* Recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm)
{
- return( MPI_I_anyReduce(Sendbuf, Recvbuf, count, datatype, op, root, comm, 0) );
+ return MPI_I_anyReduce(Sendbuf, Recvbuf, count, datatype, op, root, comm, false);
}
-int Coll_allreduce_rab::allreduce(const void* Sendbuf, void* Recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
+int allreduce__rab(const void* Sendbuf, void* Recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
{
- return( MPI_I_anyReduce(Sendbuf, Recvbuf, count, datatype, op, -1, comm, 1) );
+ return MPI_I_anyReduce(Sendbuf, Recvbuf, count, datatype, op, -1, comm, true);
}
}
}