/* Fast reduce and allreduce algorithm for longer buffers and predefined
operations.
- This algorithm is explaned with the example of 13 nodes.
+ This algorithm is explained with the example of 13 nodes.
The nodes are numbered 0, 1, 2, ... 12.
The sendbuf content is a, b, c, ... m.
The buffer array is notated with ABCDEFGH, this means that
Exa.: size=13 ==> n=3, r=5 (i.e. size == 13 == 2**n+r == 2**3 + 5)
- The algoritm needs for the execution of one Colls::reduce
+ The algorithm needs for the execution of one Colls::reduce
- for r==0
exec_time = n*(L1+L2) + buf_lng * (1-1/2**n) * (T1 + T2 + O/d)
2) This line shows the limit for the count argument.
If count < limit then the vendor protocol is used,
otherwise the new protocol is used (see variable Ldb).
- 3) These lines show the bandwidth (=bufer length / execution time)
+ 3) These lines show the bandwidth (= buffer length / execution time)
for both protocols.
4) This line shows that the limit is choosen well if the ratio is
between 0.95 (loosing 5% for buffer length near and >=limit)
#endif
#endif
-typedef enum {MPIM_SHORT, MPIM_INT, MPIM_LONG, MPIM_UNSIGNED_SHORT,
- MPIM_UNSIGNED, MPIM_UNSIGNED_LONG, MPIM_UNSIGNED_LONG_LONG, MPIM_FLOAT,
- MPIM_DOUBLE, MPIM_BYTE} MPIM_Datatype;
-
-typedef enum {MPIM_MAX, MPIM_MIN, MPIM_SUM, MPIM_PROD,
- MPIM_LAND, MPIM_BAND, MPIM_LOR, MPIM_BOR,
- MPIM_LXOR, MPIM_BXOR} MPIM_Op;
+enum MPIM_Datatype {
+ MPIM_SHORT,
+ MPIM_INT,
+ MPIM_LONG,
+ MPIM_UNSIGNED_SHORT,
+ MPIM_UNSIGNED,
+ MPIM_UNSIGNED_LONG,
+ MPIM_UNSIGNED_LONG_LONG,
+ MPIM_FLOAT,
+ MPIM_DOUBLE,
+ MPIM_BYTE
+};
+
+enum MPIM_Op {
+ MPIM_MAX,
+ MPIM_MIN,
+ MPIM_SUM,
+ MPIM_PROD,
+ MPIM_LAND,
+ MPIM_BAND,
+ MPIM_LOR,
+ MPIM_BOR,
+ MPIM_LXOR,
+ MPIM_BXOR
+};
#define MPI_I_DO_OP_C_INTEGER(MPI_I_do_op_TYPE,TYPE) \
static void MPI_I_do_op_TYPE(TYPE* b1,TYPE* b2,TYPE* rslt, int cnt,MPIM_Op op)\
{ int i; \
REDUCE_LIMITS
namespace simgrid{
namespace smpi{
-static int MPI_I_anyReduce(void* Sendbuf, void* Recvbuf, int count, MPI_Datatype mpi_datatype, MPI_Op mpi_op, int root, MPI_Comm comm, int is_all)
+static int MPI_I_anyReduce(const void* Sendbuf, void* Recvbuf, int count, MPI_Datatype mpi_datatype, MPI_Op mpi_op, int root, MPI_Comm comm, int is_all)
{
char *scr1buf, *scr2buf, *scr3buf, *xxx, *sendbuf, *recvbuf;
int myrank, size, x_base, x_size, computed, idx;
else if(mpi_datatype==MPI_DOUBLE ) datatype=MPIM_DOUBLE;
else if(mpi_datatype==MPI_BYTE ) datatype=MPIM_BYTE;
else
- THROWF(arg_error,0, "reduce rab algorithm can't be used with this datatype ! ");
+ throw std::invalid_argument("reduce rab algorithm can't be used with this datatype!");
if (mpi_op==MPI_MAX ) op=MPIM_MAX;
else if(mpi_op==MPI_MIN ) op=MPIM_MIN;
MPI_Type_extent(mpi_datatype, &typelng);
scrlng = typelng * count;
#ifdef NO_CACHE_OPTIMIZATION
- scr1buf = static_cast<char*>(xbt_malloc(scrlng));
- scr2buf = static_cast<char*>(xbt_malloc(scrlng));
- scr3buf = static_cast<char*>(xbt_malloc(scrlng));
+ scr1buf = new char[scrlng];
+ scr2buf = new char[scrlng];
+ scr3buf = new char[scrlng];
#else
# ifdef SCR_LNG_OPTIM
scrlng = SCR_LNG_OPTIM(scrlng);
# endif
- scr2buf = static_cast<char*>(xbt_malloc(3*scrlng)); /* To test cache problems. */
+ scr2buf = new char[3 * scrlng]; /* To test cache problems. */
scr1buf = scr2buf + 1*scrlng; /* scr1buf and scr3buf must not*/
scr3buf = scr2buf + 2*scrlng; /* be used for malloc because */
/* they are interchanged below.*/
# endif
n = 0; x_size = 1;
while (2*x_size <= size) { n++; x_size = x_size * 2; }
- /* x_sixe == 2**n */
+ /* x_size == 2**n */
r = size - x_size;
/*...step 2 */
}
# ifdef NO_CACHE_TESTING
- xbt_free(scr1buf); xbt_free(scr2buf); xbt_free(scr3buf);
+ delete[] scr1buf;
+ delete[] scr2buf;
+ delete[] scr3buf;
# else
- xbt_free(scr2buf); /* scr1buf and scr3buf are part of scr2buf */
+ delete[] scr2buf; /* scr1buf and scr3buf are part of scr2buf */
# endif
return(MPI_SUCCESS);
} /* new_prot */
#endif /*REDUCE_LIMITS*/
-int Coll_reduce_rab::reduce(void* Sendbuf, void* Recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm)
+int Coll_reduce_rab::reduce(const void* Sendbuf, void* Recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm)
{
return( MPI_I_anyReduce(Sendbuf, Recvbuf, count, datatype, op, root, comm, 0) );
}
-int Coll_allreduce_rab::allreduce(void* Sendbuf, void* Recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
+int Coll_allreduce_rab::allreduce(const void* Sendbuf, void* Recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
{
return( MPI_I_anyReduce(Sendbuf, Recvbuf, count, datatype, op, -1, comm, 1) );
}