* but this header must not be removed.
*/
-#include "../colls_private.h"
-#include <stdio.h>
-#include <stdlib.h>
+#include "../colls_private.hpp"
+#include <cstdio>
+#include <cstdlib>
#define REDUCE_NEW_ALWAYS 1
Exa.: size=13 ==> n=3, r=5 (i.e. size == 13 == 2**n+r == 2**3 + 5)
- The algoritm needs for the execution of one mpi_coll_reduce_fun
+ The algoritm needs for the execution of one Colls::reduce
- for r==0
exec_time = n*(L1+L2) + buf_lng * (1-1/2**n) * (T1 + T2 + O/d)
7: { [(a+b)+(c+d)] + [(e+f)+(g+h)] } + { [(i+j)+k] + [l+m] } for H
-For mpi_coll_allreduce_fun:
+For Colls::allreduce:
------------------
Step 6.1)
on all nodes 0..12
-For mpi_coll_reduce_fun:
+For Colls::reduce:
---------------
Step 6.0)
#endif
#endif
-typedef enum {MPIM_SHORT, MPIM_INT, MPIM_LONG, MPIM_UNSIGNED_SHORT,
- MPIM_UNSIGNED, MPIM_UNSIGNED_LONG, MPIM_UNSIGNED_LONG_LONG, MPIM_FLOAT,
- MPIM_DOUBLE, MPIM_BYTE} MPIM_Datatype;
-
-typedef enum {MPIM_MAX, MPIM_MIN, MPIM_SUM, MPIM_PROD,
- MPIM_LAND, MPIM_BAND, MPIM_LOR, MPIM_BOR,
- MPIM_LXOR, MPIM_BXOR} MPIM_Op;
+enum MPIM_Datatype {
+ MPIM_SHORT,
+ MPIM_INT,
+ MPIM_LONG,
+ MPIM_UNSIGNED_SHORT,
+ MPIM_UNSIGNED,
+ MPIM_UNSIGNED_LONG,
+ MPIM_UNSIGNED_LONG_LONG,
+ MPIM_FLOAT,
+ MPIM_DOUBLE,
+ MPIM_BYTE
+};
+
+enum MPIM_Op {
+ MPIM_MAX,
+ MPIM_MIN,
+ MPIM_SUM,
+ MPIM_PROD,
+ MPIM_LAND,
+ MPIM_BAND,
+ MPIM_LOR,
+ MPIM_BOR,
+ MPIM_LXOR,
+ MPIM_BXOR
+};
#define MPI_I_DO_OP_C_INTEGER(MPI_I_do_op_TYPE,TYPE) \
static void MPI_I_do_op_TYPE(TYPE* b1,TYPE* b2,TYPE* rslt, int cnt,MPIM_Op op)\
{ int i; \
}
REDUCE_LIMITS
-
+namespace simgrid{
+namespace smpi{
static int MPI_I_anyReduce(void* Sendbuf, void* Recvbuf, int count, MPI_Datatype mpi_datatype, MPI_Op mpi_op, int root, MPI_Comm comm, int is_all)
{
char *scr1buf, *scr2buf, *scr3buf, *xxx, *sendbuf, *recvbuf;
MPI_Type_extent(mpi_datatype, &typelng);
scrlng = typelng * count;
#ifdef NO_CACHE_OPTIMIZATION
- scr1buf = static_cast<char*>(xbt_malloc(scrlng));
- scr2buf = static_cast<char*>(xbt_malloc(scrlng));
- scr3buf = static_cast<char*>(xbt_malloc(scrlng));
+ scr1buf = new char[scrlng];
+ scr2buf = new char[scrlng];
+ scr3buf = new char[scrlng];
#else
# ifdef SCR_LNG_OPTIM
scrlng = SCR_LNG_OPTIM(scrlng);
# endif
- scr2buf = static_cast<char*>(xbt_malloc(3*scrlng)); /* To test cache problems. */
+ scr2buf = new char[3 * scrlng]; /* To test cache problems. */
scr1buf = scr2buf + 1*scrlng; /* scr1buf and scr3buf must not*/
scr3buf = scr2buf + 2*scrlng; /* be used for malloc because */
/* they are interchanged below.*/
}
# ifdef NO_CACHE_TESTING
- xbt_free(scr1buf); xbt_free(scr2buf); xbt_free(scr3buf);
+ delete[] scr1buf;
+ delete[] scr2buf;
+ delete[] scr3buf;
# else
- xbt_free(scr2buf); /* scr1buf and scr3buf are part of scr2buf */
+ delete[] scr2buf; /* scr1buf and scr3buf are part of scr2buf */
# endif
return(MPI_SUCCESS);
} /* new_prot */
/*otherwise:*/
if (is_all)
- return( mpi_coll_allreduce_fun(Sendbuf, Recvbuf, count, mpi_datatype, mpi_op, comm) );
+ return( Colls::allreduce(Sendbuf, Recvbuf, count, mpi_datatype, mpi_op, comm) );
else
- return( mpi_coll_reduce_fun(Sendbuf,Recvbuf, count,mpi_datatype,mpi_op, root, comm) );
+ return( Colls::reduce(Sendbuf,Recvbuf, count,mpi_datatype,mpi_op, root, comm) );
}
#endif /*REDUCE_LIMITS*/
-int smpi_coll_tuned_reduce_rab(void* Sendbuf, void* Recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm)
+int Coll_reduce_rab::reduce(void* Sendbuf, void* Recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm)
{
return( MPI_I_anyReduce(Sendbuf, Recvbuf, count, datatype, op, root, comm, 0) );
}
-int smpi_coll_tuned_allreduce_rab(void* Sendbuf, void* Recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
+int Coll_allreduce_rab::allreduce(void* Sendbuf, void* Recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
{
return( MPI_I_anyReduce(Sendbuf, Recvbuf, count, datatype, op, -1, comm, 1) );
}
+}
+}