X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/8e43c615b2475d0174be55f95f930ca30988279e..c7cad16a187e10f78f94c63a490f474676702424:/src/smpi/colls/smpi_intel_mpi_selector.cpp diff --git a/src/smpi/colls/smpi_intel_mpi_selector.cpp b/src/smpi/colls/smpi_intel_mpi_selector.cpp index 43138e341f..6bec031ed8 100644 --- a/src/smpi/colls/smpi_intel_mpi_selector.cpp +++ b/src/smpi/colls/smpi_intel_mpi_selector.cpp @@ -1,6 +1,6 @@ /* selector for collective algorithms based on openmpi's default coll_tuned_decision_fixed selector */ -/* Copyright (c) 2009-2010, 2013-2014. The SimGrid Team. +/* Copyright (c) 2009-2010, 2013-2017. The SimGrid Team. * All rights reserved. */ /* This program is free software; you can redistribute it and/or modify it @@ -41,31 +41,33 @@ MPI_Allreduce 4 - Topology aware Reduce + Bcast algorithm 5 - Binomial gather + scatter algorithm 6 - Topology aware binominal gather + scatter algorithm -7 - Shumilin's ring algorithm +7 - Shumilin's ring algorithm 8 - Ring algorithm as Shumilin's ring algorithm is unknown, default to ring' */ +namespace simgrid{ +namespace smpi{ int (*intel_allreduce_functions_table[])(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) ={ - smpi_coll_tuned_allreduce_rdb, - smpi_coll_tuned_allreduce_rab1, - smpi_coll_tuned_allreduce_redbcast, - smpi_coll_tuned_allreduce_mvapich2_two_level, - smpi_coll_tuned_allreduce_smp_binomial, - smpi_coll_tuned_allreduce_mvapich2_two_level, - smpi_coll_tuned_allreduce_ompi_ring_segmented, - smpi_coll_tuned_allreduce_ompi_ring_segmented + Coll_allreduce_rdb::allreduce, + Coll_allreduce_rab1::allreduce, + Coll_allreduce_redbcast::allreduce, + Coll_allreduce_mvapich2_two_level::allreduce, + Coll_allreduce_smp_binomial::allreduce, + Coll_allreduce_mvapich2_two_level::allreduce, + Coll_allreduce_ompi_ring_segmented::allreduce, + Coll_allreduce_ompi_ring_segmented::allreduce }; intel_tuning_table_element intel_allreduce_table[] = { - {1,{ + {1,{ { 2,9,{ {6,7}, {85,1}, @@ -380,13 +382,13 @@ intel_tuning_table_element intel_allreduce_table[] = -/*I_MPI_ADJUST_ALLTOALL +/*I_MPI_ADJUST_ALLTOALL -MPI_Alltoall +MPI_Alltoall -1. Bruck's algorithm -2. Isend/Irecv + waitall algorithm -3. Pair wise exchange algorithm +1. Bruck's algorithm +2. Isend/Irecv + waitall algorithm +3. Pair wise exchange algorithm 4. Plum's algorithm */ @@ -632,40 +634,40 @@ intel_tuning_table_element intel_alltoall_table[] = } } }; -int (*intel_alltoall_functions_table[])(void *sbuf, int scount, +int (*intel_alltoall_functions_table[])(void *sbuf, int scount, MPI_Datatype sdtype, - void* rbuf, int rcount, - MPI_Datatype rdtype, + void* rbuf, int rcount, + MPI_Datatype rdtype, MPI_Comm comm) ={ - smpi_coll_tuned_alltoall_bruck, - smpi_coll_tuned_alltoall_mvapich2_scatter_dest, - smpi_coll_tuned_alltoall_pair, - smpi_coll_tuned_alltoall_mvapich2//Plum is proprietary ? (and super efficient) + Coll_alltoall_bruck::alltoall, + Coll_alltoall_mvapich2_scatter_dest::alltoall, + Coll_alltoall_pair::alltoall, + Coll_alltoall_mvapich2::alltoall//Plum is proprietary ? (and super efficient) }; -/*I_MPI_ADJUST_BARRIER +/*I_MPI_ADJUST_BARRIER -MPI_Barrier +MPI_Barrier -1. Dissemination algorithm -2. Recursive doubling algorithm -3. Topology aware dissemination algorithm -4. Topology aware recursive doubling algorithm -5. Binominal gather + scatter algorithm -6. Topology aware binominal gather + scatter algorithm +1. Dissemination algorithm +2. Recursive doubling algorithm +3. Topology aware dissemination algorithm +4. Topology aware recursive doubling algorithm +5. Binominal gather + scatter algorithm +6. Topology aware binominal gather + scatter algorithm */ static int intel_barrier_gather_scatter(MPI_Comm comm){ //our default barrier performs a antibcast/bcast - smpi_mpi_barrier(comm); + Coll_barrier_default::barrier(comm); return MPI_SUCCESS; } int (*intel_barrier_functions_table[])(MPI_Comm comm) ={ - smpi_coll_tuned_barrier_ompi_basic_linear, - smpi_coll_tuned_barrier_ompi_recursivedoubling, - smpi_coll_tuned_barrier_ompi_basic_linear, - smpi_coll_tuned_barrier_ompi_recursivedoubling, + Coll_barrier_ompi_basic_linear::barrier, + Coll_barrier_ompi_recursivedoubling::barrier, + Coll_barrier_ompi_basic_linear::barrier, + Coll_barrier_ompi_recursivedoubling::barrier, intel_barrier_gather_scatter, intel_barrier_gather_scatter }; @@ -783,31 +785,31 @@ intel_tuning_table_element intel_barrier_table[] = }; -/*I_MPI_ADJUST_BCAST +/*I_MPI_ADJUST_BCAST -MPI_Bcast +MPI_Bcast -1. Binomial algorithm -2. Recursive doubling algorithm -3. Ring algorithm -4. Topology aware binomial algorithm -5. Topology aware recursive doubling algorithm -6. Topology aware ring algorithm -7. Shumilin's bcast algorithm +1. Binomial algorithm +2. Recursive doubling algorithm +3. Ring algorithm +4. Topology aware binomial algorithm +5. Topology aware recursive doubling algorithm +6. Topology aware ring algorithm +7. Shumilin's bcast algorithm */ int (*intel_bcast_functions_table[])(void *buff, int count, MPI_Datatype datatype, int root, MPI_Comm comm) ={ - smpi_coll_tuned_bcast_binomial_tree, - //smpi_coll_tuned_bcast_scatter_rdb_allgather, - smpi_coll_tuned_bcast_NTSL, - smpi_coll_tuned_bcast_NTSL, - smpi_coll_tuned_bcast_SMP_binomial, - //smpi_coll_tuned_bcast_scatter_rdb_allgather, - smpi_coll_tuned_bcast_NTSL, - smpi_coll_tuned_bcast_SMP_linear, - smpi_coll_tuned_bcast_mvapich2,//we don't know shumilin's algo' + Coll_bcast_binomial_tree::bcast, + //Coll_bcast_scatter_rdb_allgather::bcast, + Coll_bcast_NTSL::bcast, + Coll_bcast_NTSL::bcast, + Coll_bcast_SMP_binomial::bcast, + //Coll_bcast_scatter_rdb_allgather::bcast, + Coll_bcast_NTSL::bcast, + Coll_bcast_SMP_linear::bcast, + Coll_bcast_mvapich2::bcast,//we don't know shumilin's algo' }; intel_tuning_table_element intel_bcast_table[] = @@ -952,15 +954,15 @@ intel_tuning_table_element intel_bcast_table[] = }; -/*I_MPI_ADJUST_REDUCE +/*I_MPI_ADJUST_REDUCE -MPI_Reduce +MPI_Reduce -1. Shumilin's algorithm -2. Binomial algorithm -3. Topology aware Shumilin's algorithm -4. Topology aware binomial algorithm -5. Rabenseifner's algorithm +1. Shumilin's algorithm +2. Binomial algorithm +3. Topology aware Shumilin's algorithm +4. Topology aware binomial algorithm +5. Rabenseifner's algorithm 6. Topology aware Rabenseifner's algorithm */ @@ -969,12 +971,12 @@ int (*intel_reduce_functions_table[])(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm) ={ - smpi_coll_tuned_reduce_mvapich2, - smpi_coll_tuned_reduce_binomial, - smpi_coll_tuned_reduce_mvapich2, - smpi_coll_tuned_reduce_mvapich2_two_level, - smpi_coll_tuned_reduce_rab, - smpi_coll_tuned_reduce_rab + Coll_reduce_mvapich2::reduce, + Coll_reduce_binomial::reduce, + Coll_reduce_mvapich2::reduce, + Coll_reduce_mvapich2_two_level::reduce, + Coll_reduce_rab::reduce, + Coll_reduce_rab::reduce }; intel_tuning_table_element intel_reduce_table[] = @@ -1042,15 +1044,15 @@ intel_tuning_table_element intel_reduce_table[] = } }; -/* I_MPI_ADJUST_REDUCE_SCATTER +/* I_MPI_ADJUST_REDUCE_SCATTER -MPI_Reduce_scatter +MPI_Reduce_scatter -1. Recursive having algorithm -2. Pair wise exchange algorithm -3. Recursive doubling algorithm -4. Reduce + Scatterv algorithm -5. Topology aware Reduce + Scatterv algorithm +1. Recursive having algorithm +2. Pair wise exchange algorithm +3. Recursive doubling algorithm +4. Reduce + Scatterv algorithm +5. Topology aware Reduce + Scatterv algorithm */ static int intel_reduce_scatter_reduce_scatterv(void *sbuf, void *rbuf, @@ -1059,7 +1061,7 @@ static int intel_reduce_scatter_reduce_scatterv(void *sbuf, void *rbuf, MPI_Op op, MPI_Comm comm) { - smpi_mpi_reduce_scatter(sbuf, rbuf, rcounts,dtype, op,comm); + Coll_reduce_scatter_default::reduce_scatter(sbuf, rbuf, rcounts,dtype, op,comm); return MPI_SUCCESS; } @@ -1069,10 +1071,10 @@ static int intel_reduce_scatter_recursivehalving(void *sbuf, void *rbuf, MPI_Op op, MPI_Comm comm) { - if(smpi_op_is_commute(op)) - return smpi_coll_tuned_reduce_scatter_ompi_basic_recursivehalving(sbuf, rbuf, rcounts,dtype, op,comm); + if(op==MPI_OP_NULL || op->is_commutative()) + return Coll_reduce_scatter_ompi_basic_recursivehalving::reduce_scatter(sbuf, rbuf, rcounts,dtype, op,comm); else - return smpi_coll_tuned_reduce_scatter_mvapich2(sbuf, rbuf, rcounts,dtype, op,comm); + return Coll_reduce_scatter_mvapich2::reduce_scatter(sbuf, rbuf, rcounts,dtype, op,comm); } int (*intel_reduce_scatter_functions_table[])( void *sbuf, void *rbuf, @@ -1082,8 +1084,8 @@ int (*intel_reduce_scatter_functions_table[])( void *sbuf, void *rbuf, MPI_Comm comm ) ={ intel_reduce_scatter_recursivehalving, - smpi_coll_tuned_reduce_scatter_mpich_pair, - smpi_coll_tuned_reduce_scatter_mpich_rdb, + Coll_reduce_scatter_mpich_pair::reduce_scatter, + Coll_reduce_scatter_mpich_rdb::reduce_scatter, intel_reduce_scatter_reduce_scatterv, intel_reduce_scatter_reduce_scatterv }; @@ -1474,27 +1476,27 @@ intel_tuning_table_element intel_reduce_scatter_table[] = } }; -/* I_MPI_ADJUST_ALLGATHER +/* I_MPI_ADJUST_ALLGATHER -MPI_Allgather +MPI_Allgather -1. Recursive doubling algorithm -2. Bruck's algorithm -3. Ring algorithm -4. Topology aware Gatherv + Bcast algorithm +1. Recursive doubling algorithm +2. Bruck's algorithm +3. Ring algorithm +4. Topology aware Gatherv + Bcast algorithm */ -int (*intel_allgather_functions_table[])(void *sbuf, int scount, +int (*intel_allgather_functions_table[])(void *sbuf, int scount, MPI_Datatype sdtype, - void* rbuf, int rcount, - MPI_Datatype rdtype, + void* rbuf, int rcount, + MPI_Datatype rdtype, MPI_Comm comm ) ={ - smpi_coll_tuned_allgather_rdb, - smpi_coll_tuned_allgather_bruck, - smpi_coll_tuned_allgather_ring, - smpi_coll_tuned_allgather_GB + Coll_allgather_rdb::allgather, + Coll_allgather_bruck::allgather, + Coll_allgather_ring::allgather, + Coll_allgather_GB::allgather }; intel_tuning_table_element intel_allgather_table[] = @@ -1643,28 +1645,28 @@ intel_tuning_table_element intel_allgather_table[] = } }; -/* I_MPI_ADJUST_ALLGATHERV +/* I_MPI_ADJUST_ALLGATHERV -MPI_Allgatherv +MPI_Allgatherv -1. Recursive doubling algorithm -2. Bruck's algorithm -3. Ring algorithm -4. Topology aware Gatherv + Bcast algorithm +1. Recursive doubling algorithm +2. Bruck's algorithm +3. Ring algorithm +4. Topology aware Gatherv + Bcast algorithm */ -int (*intel_allgatherv_functions_table[])(void *sbuf, int scount, +int (*intel_allgatherv_functions_table[])(void *sbuf, int scount, MPI_Datatype sdtype, - void* rbuf, int *rcounts, + void* rbuf, int *rcounts, int *rdispls, - MPI_Datatype rdtype, + MPI_Datatype rdtype, MPI_Comm comm ) ={ - smpi_coll_tuned_allgatherv_mpich_rdb, - smpi_coll_tuned_allgatherv_ompi_bruck, - smpi_coll_tuned_allgatherv_ring, - smpi_coll_tuned_allgatherv_GB + Coll_allgatherv_mpich_rdb::allgatherv, + Coll_allgatherv_ompi_bruck::allgatherv, + Coll_allgatherv_ring::allgatherv, + Coll_allgatherv_GB::allgatherv }; intel_tuning_table_element intel_allgatherv_table[] = @@ -1859,22 +1861,22 @@ intel_tuning_table_element intel_allgatherv_table[] = MPI_Gather -1. Binomial algorithm -2. Topology aware binomial algorithm +1. Binomial algorithm +2. Topology aware binomial algorithm 3. Shumilin's algorithm */ -int (*intel_gather_functions_table[])(void *sbuf, int scount, +int (*intel_gather_functions_table[])(void *sbuf, int scount, MPI_Datatype sdtype, - void* rbuf, int rcount, - MPI_Datatype rdtype, + void* rbuf, int rcount, + MPI_Datatype rdtype, int root, MPI_Comm comm ) ={ - smpi_coll_tuned_gather_ompi_binomial, - smpi_coll_tuned_gather_ompi_binomial, - smpi_coll_tuned_gather_mvapich2 + Coll_gather_ompi_binomial::gather, + Coll_gather_ompi_binomial::gather, + Coll_gather_mvapich2::gather }; intel_tuning_table_element intel_gather_table[] = @@ -1959,25 +1961,25 @@ intel_tuning_table_element intel_gather_table[] = }; -/* I_MPI_ADJUST_SCATTER +/* I_MPI_ADJUST_SCATTER -MPI_Scatter +MPI_Scatter -1. Binomial algorithm -2. Topology aware binomial algorithm -3. Shumilin's algorithm +1. Binomial algorithm +2. Topology aware binomial algorithm +3. Shumilin's algorithm */ -int (*intel_scatter_functions_table[])(void *sbuf, int scount, +int (*intel_scatter_functions_table[])(void *sbuf, int scount, MPI_Datatype sdtype, - void* rbuf, int rcount, - MPI_Datatype rdtype, + void* rbuf, int rcount, + MPI_Datatype rdtype, int root, MPI_Comm comm ) ={ - smpi_coll_tuned_scatter_ompi_binomial, - smpi_coll_tuned_scatter_ompi_binomial, - smpi_coll_tuned_scatter_mvapich2 + Coll_scatter_ompi_binomial::scatter, + Coll_scatter_ompi_binomial::scatter, + Coll_scatter_mvapich2::scatter }; intel_tuning_table_element intel_scatter_table[] = @@ -2134,12 +2136,12 @@ intel_tuning_table_element intel_scatter_table[] = -/* I_MPI_ADJUST_ALLTOALLV +/* I_MPI_ADJUST_ALLTOALLV -MPI_Alltoallv +MPI_Alltoallv -1. Isend/Irecv + waitall algorithm -2. Plum's algorithm +1. Isend/Irecv + waitall algorithm +2. Plum's algorithm */ @@ -2149,8 +2151,8 @@ int (*intel_alltoallv_functions_table[])(void *sbuf, int *scounts, int *sdisps, MPI_Datatype rdtype, MPI_Comm comm ) ={ - smpi_coll_tuned_alltoallv_ompi_basic_linear, - smpi_coll_tuned_alltoallv_bruck + Coll_alltoallv_ompi_basic_linear::alltoallv, + Coll_alltoallv_bruck::alltoallv }; intel_tuning_table_element intel_alltoallv_table[] = @@ -2177,7 +2179,7 @@ intel_tuning_table_element intel_alltoallv_table[] = } }, { 2147483647,2,{ - {4,1},//0 again + {4,1},//0 again {2147483647,2} } } @@ -2210,58 +2212,58 @@ intel_tuning_table_element intel_alltoallv_table[] = }; -//These are collected from table 3.5-2 of the Intel MPI Reference Manual +//These are collected from table 3.5-2 of the Intel MPI Reference Manual + - #define SIZECOMP_reduce_scatter\ int total_message_size = 0;\ for (i = 0; i < comm_size; i++) { \ total_message_size += rcounts[i];\ }\ - size_t block_dsize = total_message_size*smpi_datatype_size(dtype);\ - + size_t block_dsize = total_message_size*dtype->size();\ + #define SIZECOMP_allreduce\ - size_t block_dsize =rcount * smpi_datatype_size(dtype); - + size_t block_dsize =rcount * dtype->size(); + #define SIZECOMP_alltoall\ - size_t block_dsize =send_count * smpi_datatype_size(send_type); + size_t block_dsize =send_count * send_type->size(); #define SIZECOMP_bcast\ - size_t block_dsize =count * smpi_datatype_size(datatype); + size_t block_dsize =count * datatype->size(); #define SIZECOMP_reduce\ - size_t block_dsize =count * smpi_datatype_size(datatype); + size_t block_dsize =count * datatype->size(); #define SIZECOMP_barrier\ size_t block_dsize = 1; #define SIZECOMP_allgather\ - size_t block_dsize =recv_count * smpi_datatype_size(recv_type); + size_t block_dsize =recv_count * recv_type->size(); #define SIZECOMP_allgatherv\ int total_message_size = 0;\ for (i = 0; i < comm_size; i++) { \ total_message_size += recv_count[i];\ }\ - size_t block_dsize = total_message_size*smpi_datatype_size(recv_type); - + size_t block_dsize = total_message_size*recv_type->size(); + #define SIZECOMP_gather\ int rank = comm->rank();\ size_t block_dsize = (send_buff == MPI_IN_PLACE || rank ==root) ?\ - recv_count * smpi_datatype_size(recv_type) :\ - send_count * smpi_datatype_size(send_type); + recv_count * recv_type->size() :\ + send_count * send_type->size(); #define SIZECOMP_scatter\ int rank = comm->rank();\ size_t block_dsize = (sendbuf == MPI_IN_PLACE || rank !=root ) ?\ - recvcount * smpi_datatype_size(recvtype) :\ - sendcount * smpi_datatype_size(sendtype); + recvcount * recvtype->size() :\ + sendcount * sendtype->size(); #define SIZECOMP_alltoallv\ size_t block_dsize = 1; - + #define IMPI_COLL_SELECT(cat, ret, args, args2)\ -ret smpi_coll_tuned_ ## cat ## _impi (COLL_UNPAREN args)\ +ret Coll_ ## cat ## _impi:: cat (COLL_UNPAREN args)\ {\ int comm_size = comm->size();\ int i =0;\ @@ -2289,6 +2291,7 @@ ret smpi_coll_tuned_ ## cat ## _impi (COLL_UNPAREN args)\ args2);\ } + COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLGATHERV_SIG, (send_buff, send_count, send_type, recv_buff, recv_count, recv_disps, recv_type, comm)); COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLREDUCE_SIG, (sbuf, rbuf, rcount, dtype, op, comm)); COLL_APPLY(IMPI_COLL_SELECT, COLL_GATHER_SIG, (send_buff, send_count, send_type, recv_buff, recv_count, recv_type, root, comm)); @@ -2301,3 +2304,5 @@ COLL_APPLY(IMPI_COLL_SELECT, COLL_REDUCE_SCATTER_SIG ,(sbuf,rbuf, rcounts,dtype, COLL_APPLY(IMPI_COLL_SELECT, COLL_SCATTER_SIG ,(sendbuf, sendcount, sendtype,recvbuf, recvcount, recvtype,root, comm)); COLL_APPLY(IMPI_COLL_SELECT, COLL_BARRIER_SIG,(comm)); +} +}