Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Merge branch 'master' of framagit.org:simgrid/simgrid
[simgrid.git] / src / smpi / colls / smpi_intel_mpi_selector.cpp
index 574aeaa..5166533 100644 (file)
@@ -1,12 +1,12 @@
 /* selector for collective algorithms based on openmpi's default coll_tuned_decision_fixed selector */
 
-/* Copyright (c) 2009-2010, 2013-2014. The SimGrid Team.
+/* Copyright (c) 2009-2019. The SimGrid Team.
  * All rights reserved.                                                     */
 
 /* This program is free software; you can redistribute it and/or modify it
  * under the terms of the license (GNU LGPL) which comes with this package. */
 
-#include "colls_private.h"
+#include "colls_private.hpp"
 
 // This selector is based on information gathered on the Stampede cluster, with Intel MPI 4.1.3.049, and from the intel reference manual. The data was gathered launching runs with 1,2,4,8,16 processes per node.
 
 #define INTEL_MAX_NB_NUMPROCS  12
 #define INTEL_MAX_NB_PPN  5  /* 1 2 4 8 16 ppn */
 
-typedef struct {
+struct intel_tuning_table_size_element {
   unsigned int max_size;
   int algo;
-} intel_tuning_table_size_element;
+};
 
-typedef struct {
+struct intel_tuning_table_numproc_element {
   int max_num_proc;
   int num_elems;
   intel_tuning_table_size_element elems[INTEL_MAX_NB_THRESHOLDS];
-} intel_tuning_table_numproc_element;
+};
 
-typedef struct {
+struct intel_tuning_table_element {
   int ppn;
   intel_tuning_table_numproc_element elems[INTEL_MAX_NB_NUMPROCS];
-} intel_tuning_table_element;
+};
 
 /*
 I_MPI_ADJUST_ALLREDUCE
@@ -41,31 +41,33 @@ MPI_Allreduce
 4 - Topology aware Reduce + Bcast algorithm
 5 - Binomial gather + scatter algorithm
 6 - Topology aware binominal gather + scatter algorithm
-7 - Shumilin's ring algorithm 
+7 - Shumilin's ring algorithm
 8 - Ring algorithm
 
   as Shumilin's ring algorithm is unknown, default to ring'
 */
 
+namespace simgrid{
+namespace smpi{
 
 int (*intel_allreduce_functions_table[])(void *sendbuf,
       void *recvbuf,
       int count,
       MPI_Datatype datatype,
       MPI_Op op, MPI_Comm comm) ={
-      smpi_coll_tuned_allreduce_rdb,
-      smpi_coll_tuned_allreduce_rab1,
-      smpi_coll_tuned_allreduce_redbcast,
-      smpi_coll_tuned_allreduce_mvapich2_two_level,
-      smpi_coll_tuned_allreduce_smp_binomial,
-      smpi_coll_tuned_allreduce_mvapich2_two_level,
-      smpi_coll_tuned_allreduce_ompi_ring_segmented,
-      smpi_coll_tuned_allreduce_ompi_ring_segmented
+      Coll_allreduce_rdb::allreduce,
+      Coll_allreduce_rab1::allreduce,
+      Coll_allreduce_redbcast::allreduce,
+      Coll_allreduce_mvapich2_two_level::allreduce,
+      Coll_allreduce_smp_binomial::allreduce,
+      Coll_allreduce_mvapich2_two_level::allreduce,
+      Coll_allreduce_ompi_ring_segmented::allreduce,
+      Coll_allreduce_ompi_ring_segmented::allreduce
 };
 
 intel_tuning_table_element intel_allreduce_table[] =
 {
-  {1,{ 
+  {1,{
     { 2,9,{
       {6,7},
       {85,1},
@@ -380,13 +382,13 @@ intel_tuning_table_element intel_allreduce_table[] =
 
 
 
-/*I_MPI_ADJUST_ALLTOALL 
+/*I_MPI_ADJUST_ALLTOALL
 
-MPI_Alltoall 
+MPI_Alltoall
 
-1. Bruck's algorithm 
-2. Isend/Irecv + waitall algorithm 
-3. Pair wise exchange algorithm 
+1. Bruck's algorithm
+2. Isend/Irecv + waitall algorithm
+3. Pair wise exchange algorithm
 4. Plum's algorithm
 
 */
@@ -632,40 +634,40 @@ intel_tuning_table_element intel_alltoall_table[] =
   }
   }
 };
-int (*intel_alltoall_functions_table[])(void *sbuf, int scount, 
+int (*intel_alltoall_functions_table[])(void *sbuf, int scount,
                                              MPI_Datatype sdtype,
-                                             void* rbuf, int rcount, 
-                                             MPI_Datatype rdtype, 
+                                             void* rbuf, int rcount,
+                                             MPI_Datatype rdtype,
                                              MPI_Comm comm) ={
-      smpi_coll_tuned_alltoall_bruck,
-      smpi_coll_tuned_alltoall_mvapich2_scatter_dest,
-      smpi_coll_tuned_alltoall_pair,
-      smpi_coll_tuned_alltoall_mvapich2//Plum is proprietary ? (and super efficient)
+      Coll_alltoall_bruck::alltoall,
+      Coll_alltoall_mvapich2_scatter_dest::alltoall,
+      Coll_alltoall_pair::alltoall,
+      Coll_alltoall_mvapich2::alltoall//Plum is proprietary ? (and super efficient)
 };
 
-/*I_MPI_ADJUST_BARRIER 
+/*I_MPI_ADJUST_BARRIER
 
-MPI_Barrier 
+MPI_Barrier
 
-1. Dissemination algorithm 
-2. Recursive doubling algorithm 
-3. Topology aware dissemination algorithm 
-4. Topology aware recursive doubling algorithm 
-5. Binominal gather + scatter algorithm 
-6. Topology aware binominal gather + scatter algorithm 
+1. Dissemination algorithm
+2. Recursive doubling algorithm
+3. Topology aware dissemination algorithm
+4. Topology aware recursive doubling algorithm
+5. Binominal gather + scatter algorithm
+6. Topology aware binominal gather + scatter algorithm
 
 */
 static int intel_barrier_gather_scatter(MPI_Comm comm){
     //our default barrier performs a antibcast/bcast
-    smpi_mpi_barrier(comm);
+    Coll_barrier_default::barrier(comm);
     return MPI_SUCCESS;
 }
 
 int (*intel_barrier_functions_table[])(MPI_Comm comm) ={
-      smpi_coll_tuned_barrier_ompi_basic_linear,
-      smpi_coll_tuned_barrier_ompi_recursivedoubling,
-      smpi_coll_tuned_barrier_ompi_basic_linear,
-      smpi_coll_tuned_barrier_ompi_recursivedoubling,
+      Coll_barrier_ompi_basic_linear::barrier,
+      Coll_barrier_ompi_recursivedoubling::barrier,
+      Coll_barrier_ompi_basic_linear::barrier,
+      Coll_barrier_ompi_recursivedoubling::barrier,
       intel_barrier_gather_scatter,
       intel_barrier_gather_scatter
 };
@@ -783,31 +785,31 @@ intel_tuning_table_element intel_barrier_table[] =
 };
 
 
-/*I_MPI_ADJUST_BCAST 
+/*I_MPI_ADJUST_BCAST
 
-MPI_Bcast 
+MPI_Bcast
 
-1. Binomial algorithm 
-2. Recursive doubling algorithm 
-3. Ring algorithm 
-4. Topology aware binomial algorithm 
-5. Topology aware recursive doubling algorithm 
-6. Topology aware ring algorithm 
-7. Shumilin's bcast algorithm 
+1. Binomial algorithm
+2. Recursive doubling algorithm
+3. Ring algorithm
+4. Topology aware binomial algorithm
+5. Topology aware recursive doubling algorithm
+6. Topology aware ring algorithm
+7. Shumilin's bcast algorithm
 */
 
 int (*intel_bcast_functions_table[])(void *buff, int count,
                                           MPI_Datatype datatype, int root,
                                           MPI_Comm  comm) ={
-      smpi_coll_tuned_bcast_binomial_tree,
-      //smpi_coll_tuned_bcast_scatter_rdb_allgather,
-      smpi_coll_tuned_bcast_NTSL,
-      smpi_coll_tuned_bcast_NTSL,
-      smpi_coll_tuned_bcast_SMP_binomial,
-      //smpi_coll_tuned_bcast_scatter_rdb_allgather,
-      smpi_coll_tuned_bcast_NTSL,
-      smpi_coll_tuned_bcast_SMP_linear,
-      smpi_coll_tuned_bcast_mvapich2,//we don't know shumilin's algo'
+      Coll_bcast_binomial_tree::bcast,
+      //Coll_bcast_scatter_rdb_allgather::bcast,
+      Coll_bcast_NTSL::bcast,
+      Coll_bcast_NTSL::bcast,
+      Coll_bcast_SMP_binomial::bcast,
+      //Coll_bcast_scatter_rdb_allgather::bcast,
+      Coll_bcast_NTSL::bcast,
+      Coll_bcast_SMP_linear::bcast,
+      Coll_bcast_mvapich2::bcast,//we don't know shumilin's algo'
 };
 
 intel_tuning_table_element intel_bcast_table[] =
@@ -952,15 +954,15 @@ intel_tuning_table_element intel_bcast_table[] =
 };
 
 
-/*I_MPI_ADJUST_REDUCE 
+/*I_MPI_ADJUST_REDUCE
 
-MPI_Reduce 
+MPI_Reduce
 
-1. Shumilin's algorithm 
-2. Binomial algorithm 
-3. Topology aware Shumilin's algorithm 
-4. Topology aware binomial algorithm 
-5. Rabenseifner's algorithm 
+1. Shumilin's algorithm
+2. Binomial algorithm
+3. Topology aware Shumilin's algorithm
+4. Topology aware binomial algorithm
+5. Rabenseifner's algorithm
 6. Topology aware Rabenseifner's algorithm
 
 */
@@ -969,12 +971,12 @@ int (*intel_reduce_functions_table[])(void *sendbuf, void *recvbuf,
                                             int count, MPI_Datatype  datatype,
                                             MPI_Op   op, int root,
                                             MPI_Comm   comm) ={
-      smpi_coll_tuned_reduce_mvapich2,
-      smpi_coll_tuned_reduce_binomial,
-      smpi_coll_tuned_reduce_mvapich2,
-      smpi_coll_tuned_reduce_mvapich2_two_level,
-      smpi_coll_tuned_reduce_rab,
-      smpi_coll_tuned_reduce_rab
+      Coll_reduce_mvapich2::reduce,
+      Coll_reduce_binomial::reduce,
+      Coll_reduce_mvapich2::reduce,
+      Coll_reduce_mvapich2_two_level::reduce,
+      Coll_reduce_rab::reduce,
+      Coll_reduce_rab::reduce
 };
 
 intel_tuning_table_element intel_reduce_table[] =
@@ -1042,15 +1044,15 @@ intel_tuning_table_element intel_reduce_table[] =
   }
 };
 
-/* I_MPI_ADJUST_REDUCE_SCATTER 
+/* I_MPI_ADJUST_REDUCE_SCATTER
 
-MPI_Reduce_scatter 
+MPI_Reduce_scatter
 
-1. Recursive having algorithm 
-2. Pair wise exchange algorithm 
-3. Recursive doubling algorithm 
-4. Reduce + Scatterv algorithm 
-5. Topology aware Reduce + Scatterv algorithm 
+1. Recursive having algorithm
+2. Pair wise exchange algorithm
+3. Recursive doubling algorithm
+4. Reduce + Scatterv algorithm
+5. Topology aware Reduce + Scatterv algorithm
 
 */
 static  int intel_reduce_scatter_reduce_scatterv(void *sbuf, void *rbuf,
@@ -1059,7 +1061,7 @@ static  int intel_reduce_scatter_reduce_scatterv(void *sbuf, void *rbuf,
                                                     MPI_Op  op,
                                                     MPI_Comm  comm)
 {
-  smpi_mpi_reduce_scatter(sbuf, rbuf, rcounts,dtype, op,comm);
+  Coll_reduce_scatter_default::reduce_scatter(sbuf, rbuf, rcounts,dtype, op,comm);
   return MPI_SUCCESS;
 }
 
@@ -1070,9 +1072,9 @@ static  int  intel_reduce_scatter_recursivehalving(void *sbuf, void *rbuf,
                                                     MPI_Comm  comm)
 {
   if(op==MPI_OP_NULL || op->is_commutative())
-    return smpi_coll_tuned_reduce_scatter_ompi_basic_recursivehalving(sbuf, rbuf, rcounts,dtype, op,comm);
+    return Coll_reduce_scatter_ompi_basic_recursivehalving::reduce_scatter(sbuf, rbuf, rcounts,dtype, op,comm);
   else
-    return smpi_coll_tuned_reduce_scatter_mvapich2(sbuf, rbuf, rcounts,dtype, op,comm);
+    return Coll_reduce_scatter_mvapich2::reduce_scatter(sbuf, rbuf, rcounts,dtype, op,comm);
 }
 
 int (*intel_reduce_scatter_functions_table[])( void *sbuf, void *rbuf,
@@ -1082,8 +1084,8 @@ int (*intel_reduce_scatter_functions_table[])( void *sbuf, void *rbuf,
                                                     MPI_Comm  comm
                                                     ) ={
       intel_reduce_scatter_recursivehalving,
-      smpi_coll_tuned_reduce_scatter_mpich_pair,
-      smpi_coll_tuned_reduce_scatter_mpich_rdb,
+      Coll_reduce_scatter_mpich_pair::reduce_scatter,
+      Coll_reduce_scatter_mpich_rdb::reduce_scatter,
       intel_reduce_scatter_reduce_scatterv,
       intel_reduce_scatter_reduce_scatterv
 };
@@ -1474,27 +1476,27 @@ intel_tuning_table_element intel_reduce_scatter_table[] =
   }
 };
 
-/* I_MPI_ADJUST_ALLGATHER 
+/* I_MPI_ADJUST_ALLGATHER
 
-MPI_Allgather 
+MPI_Allgather
 
-1. Recursive doubling algorithm 
-2. Bruck's algorithm 
-3. Ring algorithm 
-4. Topology aware Gatherv + Bcast algorithm 
+1. Recursive doubling algorithm
+2. Bruck's algorithm
+3. Ring algorithm
+4. Topology aware Gatherv + Bcast algorithm
 
 */
 
-int (*intel_allgather_functions_table[])(void *sbuf, int scount, 
+int (*intel_allgather_functions_table[])(void *sbuf, int scount,
                                               MPI_Datatype sdtype,
-                                              void* rbuf, int rcount, 
-                                              MPI_Datatype rdtype, 
+                                              void* rbuf, int rcount,
+                                              MPI_Datatype rdtype,
                                               MPI_Comm  comm
                                                     ) ={
-      smpi_coll_tuned_allgather_rdb,
-      smpi_coll_tuned_allgather_bruck,
-      smpi_coll_tuned_allgather_ring,
-      smpi_coll_tuned_allgather_GB
+      Coll_allgather_rdb::allgather,
+      Coll_allgather_bruck::allgather,
+      Coll_allgather_ring::allgather,
+      Coll_allgather_GB::allgather
 };
 
 intel_tuning_table_element intel_allgather_table[] =
@@ -1643,28 +1645,28 @@ intel_tuning_table_element intel_allgather_table[] =
   }
 };
 
-/* I_MPI_ADJUST_ALLGATHERV 
+/* I_MPI_ADJUST_ALLGATHERV
 
-MPI_Allgatherv 
+MPI_Allgatherv
 
-1. Recursive doubling algorithm 
-2. Bruck's algorithm 
-3. Ring algorithm 
-4. Topology aware Gatherv + Bcast algorithm 
+1. Recursive doubling algorithm
+2. Bruck's algorithm
+3. Ring algorithm
+4. Topology aware Gatherv + Bcast algorithm
 
 */
 
-int (*intel_allgatherv_functions_table[])(void *sbuf, int scount, 
+int (*intel_allgatherv_functions_table[])(void *sbuf, int scount,
                                                MPI_Datatype sdtype,
-                                               void* rbuf, int *rcounts, 
+                                               void* rbuf, int *rcounts,
                                                int *rdispls,
-                                               MPI_Datatype rdtype, 
+                                               MPI_Datatype rdtype,
                                                MPI_Comm  comm
                                                     ) ={
-      smpi_coll_tuned_allgatherv_mpich_rdb,
-      smpi_coll_tuned_allgatherv_ompi_bruck,
-      smpi_coll_tuned_allgatherv_ring,
-      smpi_coll_tuned_allgatherv_GB
+      Coll_allgatherv_mpich_rdb::allgatherv,
+      Coll_allgatherv_ompi_bruck::allgatherv,
+      Coll_allgatherv_ring::allgatherv,
+      Coll_allgatherv_GB::allgatherv
 };
 
 intel_tuning_table_element intel_allgatherv_table[] =
@@ -1859,22 +1861,22 @@ intel_tuning_table_element intel_allgatherv_table[] =
 
 MPI_Gather
 
-1. Binomial algorithm 
-2. Topology aware binomial algorithm 
+1. Binomial algorithm
+2. Topology aware binomial algorithm
 3. Shumilin's algorithm
 
 */
 
-int (*intel_gather_functions_table[])(void *sbuf, int scount, 
+int (*intel_gather_functions_table[])(void *sbuf, int scount,
                                            MPI_Datatype sdtype,
-                                           void* rbuf, int rcount, 
-                                           MPI_Datatype rdtype, 
+                                           void* rbuf, int rcount,
+                                           MPI_Datatype rdtype,
                                            int root,
                                            MPI_Comm  comm
                                                     ) ={
-      smpi_coll_tuned_gather_ompi_binomial,
-      smpi_coll_tuned_gather_ompi_binomial,
-      smpi_coll_tuned_gather_mvapich2
+      Coll_gather_ompi_binomial::gather,
+      Coll_gather_ompi_binomial::gather,
+      Coll_gather_mvapich2::gather
 };
 
 intel_tuning_table_element intel_gather_table[] =
@@ -1959,25 +1961,25 @@ intel_tuning_table_element intel_gather_table[] =
 };
 
 
-/* I_MPI_ADJUST_SCATTER 
+/* I_MPI_ADJUST_SCATTER
 
-MPI_Scatter 
+MPI_Scatter
 
-1. Binomial algorithm 
-2. Topology aware binomial algorithm 
-3. Shumilin's algorithm 
+1. Binomial algorithm
+2. Topology aware binomial algorithm
+3. Shumilin's algorithm
 
 */
 
-int (*intel_scatter_functions_table[])(void *sbuf, int scount, 
+int (*intel_scatter_functions_table[])(void *sbuf, int scount,
                                             MPI_Datatype sdtype,
-                                            void* rbuf, int rcount, 
-                                            MPI_Datatype rdtype, 
+                                            void* rbuf, int rcount,
+                                            MPI_Datatype rdtype,
                                             int root, MPI_Comm  comm
                                                     ) ={
-      smpi_coll_tuned_scatter_ompi_binomial,
-      smpi_coll_tuned_scatter_ompi_binomial,
-      smpi_coll_tuned_scatter_mvapich2
+      Coll_scatter_ompi_binomial::scatter,
+      Coll_scatter_ompi_binomial::scatter,
+      Coll_scatter_mvapich2::scatter
 };
 
 intel_tuning_table_element intel_scatter_table[] =
@@ -2134,12 +2136,12 @@ intel_tuning_table_element intel_scatter_table[] =
 
 
 
-/* I_MPI_ADJUST_ALLTOALLV 
+/* I_MPI_ADJUST_ALLTOALLV
 
-MPI_Alltoallv 
+MPI_Alltoallv
 
-1. Isend/Irecv + waitall algorithm 
-2. Plum's algorithm 
+1. Isend/Irecv + waitall algorithm
+2. Plum's algorithm
 
 */
 
@@ -2149,8 +2151,8 @@ int (*intel_alltoallv_functions_table[])(void *sbuf, int *scounts, int *sdisps,
                                               MPI_Datatype rdtype,
                                               MPI_Comm  comm
                                                     ) ={
-      smpi_coll_tuned_alltoallv_ompi_basic_linear,
-      smpi_coll_tuned_alltoallv_bruck
+      Coll_alltoallv_ompi_basic_linear::alltoallv,
+      Coll_alltoallv_bruck::alltoallv
 };
 
 intel_tuning_table_element intel_alltoallv_table[] =
@@ -2177,7 +2179,7 @@ intel_tuning_table_element intel_alltoallv_table[] =
     }
     },
     { 2147483647,2,{
-      {4,1},//0 again 
+      {4,1},//0 again
       {2147483647,2}
     }
     }
@@ -2210,19 +2212,19 @@ intel_tuning_table_element intel_alltoallv_table[] =
 };
 
 
-//These are collected from table 3.5-2 of the Intel MPI Reference Manual 
+//These are collected from table 3.5-2 of the Intel MPI Reference Manual
+
 
-    
 #define SIZECOMP_reduce_scatter\
     int total_message_size = 0;\
     for (i = 0; i < comm_size; i++) { \
         total_message_size += rcounts[i];\
     }\
     size_t block_dsize = total_message_size*dtype->size();\
-    
+
 #define SIZECOMP_allreduce\
   size_t block_dsize =rcount * dtype->size();
-  
+
 #define SIZECOMP_alltoall\
   size_t block_dsize =send_count * send_type->size();
 
@@ -2244,7 +2246,7 @@ intel_tuning_table_element intel_alltoallv_table[] =
         total_message_size += recv_count[i];\
     }\
     size_t block_dsize = total_message_size*recv_type->size();
-    
+
 #define SIZECOMP_gather\
   int rank = comm->rank();\
   size_t block_dsize = (send_buff == MPI_IN_PLACE || rank ==root) ?\
@@ -2259,9 +2261,9 @@ intel_tuning_table_element intel_alltoallv_table[] =
 
 #define SIZECOMP_alltoallv\
   size_t block_dsize = 1;
-  
+
 #define IMPI_COLL_SELECT(cat, ret, args, args2)\
-ret smpi_coll_tuned_ ## cat ## _impi (COLL_UNPAREN args)\
+ret Coll_ ## cat ## _impi:: cat (COLL_UNPAREN args)\
 {\
     int comm_size = comm->size();\
     int i =0;\
@@ -2289,6 +2291,7 @@ ret smpi_coll_tuned_ ## cat ## _impi (COLL_UNPAREN args)\
     args2);\
 }
 
+
 COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLGATHERV_SIG, (send_buff, send_count, send_type, recv_buff, recv_count, recv_disps, recv_type, comm));
 COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLREDUCE_SIG, (sbuf, rbuf, rcount, dtype, op, comm));
 COLL_APPLY(IMPI_COLL_SELECT, COLL_GATHER_SIG, (send_buff, send_count, send_type, recv_buff, recv_count, recv_type, root, comm));
@@ -2301,3 +2304,5 @@ COLL_APPLY(IMPI_COLL_SELECT, COLL_REDUCE_SCATTER_SIG ,(sbuf,rbuf, rcounts,dtype,
 COLL_APPLY(IMPI_COLL_SELECT, COLL_SCATTER_SIG ,(sendbuf, sendcount, sendtype,recvbuf, recvcount, recvtype,root, comm));
 COLL_APPLY(IMPI_COLL_SELECT, COLL_BARRIER_SIG,(comm));
 
+}
+}