COLL_APPLY(action, COLL_GATHER_SIG, ompi_basic_linear) COLL_sep \
COLL_APPLY(action, COLL_GATHER_SIG, ompi_binomial) COLL_sep \
COLL_APPLY(action, COLL_GATHER_SIG, ompi_linear_sync) COLL_sep \
-COLL_APPLY(action, COLL_GATHER_SIG, mpich) \
+COLL_APPLY(action, COLL_GATHER_SIG, mpich) COLL_sep \
+COLL_APPLY(action, COLL_GATHER_SIG, automatic)
COLL_APPLY(action, COLL_ALLGATHER_SIG, spreading_simple) COLL_sep \
COLL_APPLY(action, COLL_ALLGATHER_SIG, ompi) COLL_sep \
COLL_APPLY(action, COLL_ALLGATHER_SIG, ompi_neighborexchange) COLL_sep \
-COLL_APPLY(action, COLL_ALLGATHER_SIG, mpich)
+COLL_APPLY(action, COLL_ALLGATHER_SIG, mpich) COLL_sep \
+COLL_APPLY(action, COLL_ALLGATHER_SIG, automatic)
COLL_ALLGATHERS(COLL_PROTO, COLL_NOsep)
COLL_APPLY(action, COLL_ALLGATHERV_SIG, ompi_neighborexchange) COLL_sep \
COLL_APPLY(action, COLL_ALLGATHERV_SIG, ompi_bruck) COLL_sep \
COLL_APPLY(action, COLL_ALLGATHERV_SIG, mpich) COLL_sep \
-COLL_APPLY(action, COLL_ALLGATHERV_SIG, mpich_rdb)
+COLL_APPLY(action, COLL_ALLGATHERV_SIG, mpich_rdb) COLL_sep \
+COLL_APPLY(action, COLL_ALLGATHERV_SIG, automatic)
COLL_ALLGATHERVS(COLL_PROTO, COLL_NOsep)
COLL_APPLY(action, COLL_ALLREDUCE_SIG, redbcast) COLL_sep \
COLL_APPLY(action, COLL_ALLREDUCE_SIG, ompi) COLL_sep \
COLL_APPLY(action, COLL_ALLREDUCE_SIG, ompi_ring_segmented) COLL_sep \
-COLL_APPLY(action, COLL_ALLREDUCE_SIG, mpich)
+COLL_APPLY(action, COLL_ALLREDUCE_SIG, mpich) COLL_sep \
+COLL_APPLY(action, COLL_ALLREDUCE_SIG, automatic)
COLL_ALLREDUCES(COLL_PROTO, COLL_NOsep)
#define COLL_ALLTOALL_SIG alltoall, int, \
(void *send_buff, int send_count, MPI_Datatype send_type, \
void *recv_buff, int recv_count, MPI_Datatype recv_type, \
- MPI_Comm com)
+ MPI_Comm comm)
#define COLL_ALLTOALLS(action, COLL_sep) \
COLL_APPLY(action, COLL_ALLTOALL_SIG, 2dmesh) COLL_sep \
COLL_APPLY(action, COLL_ALLTOALL_SIG, ring_one_barrier) COLL_sep \
COLL_APPLY(action, COLL_ALLTOALL_SIG, simple) COLL_sep \
COLL_APPLY(action, COLL_ALLTOALL_SIG, ompi) COLL_sep \
-COLL_APPLY(action, COLL_ALLTOALL_SIG, mpich)COLL_sep \
-COLL_APPLY(action, COLL_ALLTOALL_SIG, ompi_pairwise)
+COLL_APPLY(action, COLL_ALLTOALL_SIG, mpich) COLL_sep \
+COLL_APPLY(action, COLL_ALLTOALL_SIG, ompi_pairwise) COLL_sep \
+COLL_APPLY(action, COLL_ALLTOALL_SIG, automatic)
COLL_ALLTOALLS(COLL_PROTO, COLL_NOsep)
#define COLL_ALLTOALLV_SIG alltoallv, int, \
(void *send_buff, int *send_counts, int *send_disps, MPI_Datatype send_type, \
void *recv_buff, int *recv_counts, int *recv_disps, MPI_Datatype recv_type, \
- MPI_Comm com)
+ MPI_Comm comm)
#define COLL_ALLTOALLVS(action, COLL_sep) \
COLL_APPLY(action, COLL_ALLTOALLV_SIG, bruck) COLL_sep \
COLL_APPLY(action, COLL_ALLTOALLV_SIG, ring_mpi_barrier) COLL_sep \
COLL_APPLY(action, COLL_ALLTOALLV_SIG, ring_one_barrier) COLL_sep \
COLL_APPLY(action, COLL_ALLTOALLV_SIG, ompi) COLL_sep \
-COLL_APPLY(action, COLL_ALLTOALLV_SIG, mpich)COLL_sep \
-COLL_APPLY(action, COLL_ALLTOALLV_SIG, ompi_basic_linear)
+COLL_APPLY(action, COLL_ALLTOALLV_SIG, mpich) COLL_sep \
+COLL_APPLY(action, COLL_ALLTOALLV_SIG, ompi_basic_linear) COLL_sep \
+COLL_APPLY(action, COLL_ALLTOALLV_SIG, automatic)
COLL_ALLTOALLVS(COLL_PROTO, COLL_NOsep)
COLL_APPLY(action, COLL_BCAST_SIG, ompi) COLL_sep \
COLL_APPLY(action, COLL_BCAST_SIG, ompi_split_bintree) COLL_sep \
COLL_APPLY(action, COLL_BCAST_SIG, ompi_pipeline) COLL_sep \
-COLL_APPLY(action, COLL_BCAST_SIG, mpich)
+COLL_APPLY(action, COLL_BCAST_SIG, mpich) COLL_sep \
+COLL_APPLY(action, COLL_BCAST_SIG, automatic)
COLL_BCASTS(COLL_PROTO, COLL_NOsep)
COLL_APPLY(action, COLL_REDUCE_SIG, ompi_in_order_binary) COLL_sep \
COLL_APPLY(action, COLL_REDUCE_SIG, ompi_binary) COLL_sep \
COLL_APPLY(action, COLL_REDUCE_SIG, ompi_binomial) COLL_sep \
-COLL_APPLY(action, COLL_REDUCE_SIG, mpich)
+COLL_APPLY(action, COLL_REDUCE_SIG, mpich) COLL_sep \
+COLL_APPLY(action, COLL_REDUCE_SIG, automatic)
COLL_REDUCES(COLL_PROTO, COLL_NOsep)
COLL_APPLY(action, COLL_REDUCE_SCATTER_SIG, mpich) COLL_sep \
COLL_APPLY(action, COLL_REDUCE_SCATTER_SIG, mpich_pair) COLL_sep \
COLL_APPLY(action, COLL_REDUCE_SCATTER_SIG, mpich_rdb) COLL_sep \
-COLL_APPLY(action, COLL_REDUCE_SCATTER_SIG, mpich_noncomm)
+COLL_APPLY(action, COLL_REDUCE_SCATTER_SIG, mpich_noncomm) COLL_sep \
+COLL_APPLY(action, COLL_REDUCE_SCATTER_SIG, automatic)
+
COLL_REDUCE_SCATTERS(COLL_PROTO, COLL_NOsep)
COLL_APPLY(action, COLL_SCATTER_SIG, ompi) COLL_sep \
COLL_APPLY(action, COLL_SCATTER_SIG, ompi_basic_linear) COLL_sep \
COLL_APPLY(action, COLL_SCATTER_SIG, ompi_binomial) COLL_sep \
-COLL_APPLY(action, COLL_SCATTER_SIG, mpich)
+COLL_APPLY(action, COLL_SCATTER_SIG, mpich) COLL_sep \
+COLL_APPLY(action, COLL_SCATTER_SIG, automatic)
COLL_SCATTERS(COLL_PROTO, COLL_NOsep)
COLL_APPLY(action, COLL_BARRIER_SIG, ompi_bruck) COLL_sep \
COLL_APPLY(action, COLL_BARRIER_SIG, ompi_recursivedoubling) COLL_sep \
COLL_APPLY(action, COLL_BARRIER_SIG, ompi_doublering) COLL_sep \
-COLL_APPLY(action, COLL_BARRIER_SIG, mpich)
+COLL_APPLY(action, COLL_BARRIER_SIG, mpich) COLL_sep \
+COLL_APPLY(action, COLL_BARRIER_SIG, automatic)
COLL_BARRIERS(COLL_PROTO, COLL_NOsep)
--- /dev/null
+#include "colls_private.h"
+#include <limits.h>
+#include "mc/mc_private.h"
+
+//attempt to do a quick autotuning version of the collective,
+
+#define AUTOMATIC_COLL_BENCH(cat, ret, args, args2)\
+ ret smpi_coll_tuned_ ## cat ## _ ## automatic(COLL_UNPAREN args)\
+{\
+ double time1, time2, time_min=INT_MAX;\
+ int min_coll=-1, global_coll=-1;\
+ int i;\
+ double buf_in, buf_out, max_min=INT_MAX;\
+ for (i = 0; mpi_coll_##cat##_description[i].name; i++){\
+ if(!strcmp(mpi_coll_##cat##_description[i].name, "automatic"))continue;\
+ if(!strcmp(mpi_coll_##cat##_description[i].name, "default"))continue;\
+ smpi_mpi_barrier(comm);\
+ if (TRACE_is_enabled()){\
+ type_t type = PJ_type_get_or_null (#cat, PJ_type_get_root());\
+ if (!type){\
+ type=PJ_type_event_new(#cat, PJ_type_get_root());\
+ }\
+ char* cont_name=malloc(25*sizeof(char*));\
+ sprintf(cont_name, "rank-%d", smpi_process_index());\
+ val_t value = PJ_value_get_or_new(mpi_coll_##cat##_description[i].name,"1.0 1.0 1.0", type);\
+ new_pajeNewEvent (SIMIX_get_clock(), PJ_container_get(cont_name), type, value);\
+ }\
+ time1 = SIMIX_get_clock();\
+ ((int (*) args)\
+ mpi_coll_##cat##_description[i].coll) args2 ;\
+ time2 = SIMIX_get_clock();\
+ buf_out=time2-time1;\
+ smpi_mpi_reduce((void*)&buf_out,(void*)&buf_in, 1, MPI_DOUBLE, MPI_MAX, 0,comm );\
+ if(time2-time1<time_min){\
+ min_coll=i;\
+ time_min=time2-time1;\
+ }\
+ if(smpi_comm_rank(comm)==0){\
+ if(buf_in<max_min){\
+ max_min=buf_in;\
+ global_coll=i;\
+ }\
+ }\
+ }\
+ if(smpi_comm_rank(comm)==0){\
+ XBT_WARN("For rank 0, the quickest was %s : %lf , but global was %s : %lf at max",mpi_coll_##cat##_description[min_coll].name, time_min,mpi_coll_##cat##_description[global_coll].name, max_min);\
+ }else\
+ XBT_WARN("The quickest reduce_scatter was %s on rank %d and took %lf",mpi_coll_##cat##_description[min_coll].name, smpi_comm_rank(comm), time_min);\
+ return (min_coll!=-1)?MPI_SUCCESS:MPI_ERR_INTERN;\
+}\
+
+
+COLL_APPLY(AUTOMATIC_COLL_BENCH, COLL_ALLGATHERV_SIG, (send_buff, send_count, send_type, recv_buff, recv_count, recv_disps, recv_type, comm));
+COLL_APPLY(AUTOMATIC_COLL_BENCH, COLL_ALLREDUCE_SIG, (sbuf, rbuf, rcount, dtype, op, comm));
+COLL_APPLY(AUTOMATIC_COLL_BENCH, COLL_GATHER_SIG, (send_buff, send_count, send_type, recv_buff, recv_count, recv_type, root, comm));
+COLL_APPLY(AUTOMATIC_COLL_BENCH, COLL_ALLGATHER_SIG, (send_buff,send_count,send_type,recv_buff,recv_count,recv_type,comm));
+COLL_APPLY(AUTOMATIC_COLL_BENCH, COLL_ALLTOALL_SIG,(send_buff, send_count, send_type, recv_buff, recv_count, recv_type,comm));
+COLL_APPLY(AUTOMATIC_COLL_BENCH, COLL_ALLTOALLV_SIG, (send_buff, send_counts, send_disps, send_type, recv_buff, recv_counts, recv_disps, recv_type, comm));
+COLL_APPLY(AUTOMATIC_COLL_BENCH, COLL_BCAST_SIG , (buf, count, datatype, root, comm));
+COLL_APPLY(AUTOMATIC_COLL_BENCH, COLL_REDUCE_SIG,(buf,rbuf, count, datatype, op, root, comm));
+COLL_APPLY(AUTOMATIC_COLL_BENCH, COLL_REDUCE_SCATTER_SIG ,(sbuf,rbuf, rcounts,dtype,op,comm));
+COLL_APPLY(AUTOMATIC_COLL_BENCH, COLL_SCATTER_SIG ,(sendbuf, sendcount, sendtype,recvbuf, recvcount, recvtype,root, comm));
+COLL_APPLY(AUTOMATIC_COLL_BENCH, COLL_BARRIER_SIG,(comm));