Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Merge branch 'master' into clean_events
[simgrid.git] / src / smpi / colls / smpi_intel_mpi_selector.cpp
index c848923..6bec031 100644 (file)
@@ -1,6 +1,6 @@
 /* selector for collective algorithms based on openmpi's default coll_tuned_decision_fixed selector */
 
-/* Copyright (c) 2009-2010, 2013-2014. The SimGrid Team.
+/* Copyright (c) 2009-2010, 2013-2017. The SimGrid Team.
  * All rights reserved.                                                     */
 
 /* This program is free software; you can redistribute it and/or modify it
@@ -41,31 +41,33 @@ MPI_Allreduce
 4 - Topology aware Reduce + Bcast algorithm
 5 - Binomial gather + scatter algorithm
 6 - Topology aware binominal gather + scatter algorithm
-7 - Shumilin's ring algorithm 
+7 - Shumilin's ring algorithm
 8 - Ring algorithm
 
   as Shumilin's ring algorithm is unknown, default to ring'
 */
 
+namespace simgrid{
+namespace smpi{
 
 int (*intel_allreduce_functions_table[])(void *sendbuf,
       void *recvbuf,
       int count,
       MPI_Datatype datatype,
       MPI_Op op, MPI_Comm comm) ={
-      smpi_coll_tuned_allreduce_rdb,
-      smpi_coll_tuned_allreduce_rab1,
-      smpi_coll_tuned_allreduce_redbcast,
-      smpi_coll_tuned_allreduce_mvapich2_two_level,
-      smpi_coll_tuned_allreduce_smp_binomial,
-      smpi_coll_tuned_allreduce_mvapich2_two_level,
-      smpi_coll_tuned_allreduce_ompi_ring_segmented,
-      smpi_coll_tuned_allreduce_ompi_ring_segmented
+      Coll_allreduce_rdb::allreduce,
+      Coll_allreduce_rab1::allreduce,
+      Coll_allreduce_redbcast::allreduce,
+      Coll_allreduce_mvapich2_two_level::allreduce,
+      Coll_allreduce_smp_binomial::allreduce,
+      Coll_allreduce_mvapich2_two_level::allreduce,
+      Coll_allreduce_ompi_ring_segmented::allreduce,
+      Coll_allreduce_ompi_ring_segmented::allreduce
 };
 
 intel_tuning_table_element intel_allreduce_table[] =
 {
-  {1,{ 
+  {1,{
     { 2,9,{
       {6,7},
       {85,1},
@@ -380,13 +382,13 @@ intel_tuning_table_element intel_allreduce_table[] =
 
 
 
-/*I_MPI_ADJUST_ALLTOALL 
+/*I_MPI_ADJUST_ALLTOALL
 
-MPI_Alltoall 
+MPI_Alltoall
 
-1. Bruck's algorithm 
-2. Isend/Irecv + waitall algorithm 
-3. Pair wise exchange algorithm 
+1. Bruck's algorithm
+2. Isend/Irecv + waitall algorithm
+3. Pair wise exchange algorithm
 4. Plum's algorithm
 
 */
@@ -632,40 +634,40 @@ intel_tuning_table_element intel_alltoall_table[] =
   }
   }
 };
-int (*intel_alltoall_functions_table[])(void *sbuf, int scount, 
+int (*intel_alltoall_functions_table[])(void *sbuf, int scount,
                                              MPI_Datatype sdtype,
-                                             void* rbuf, int rcount, 
-                                             MPI_Datatype rdtype, 
+                                             void* rbuf, int rcount,
+                                             MPI_Datatype rdtype,
                                              MPI_Comm comm) ={
-      smpi_coll_tuned_alltoall_bruck,
-      smpi_coll_tuned_alltoall_mvapich2_scatter_dest,
-      smpi_coll_tuned_alltoall_pair,
-      smpi_coll_tuned_alltoall_mvapich2//Plum is proprietary ? (and super efficient)
+      Coll_alltoall_bruck::alltoall,
+      Coll_alltoall_mvapich2_scatter_dest::alltoall,
+      Coll_alltoall_pair::alltoall,
+      Coll_alltoall_mvapich2::alltoall//Plum is proprietary ? (and super efficient)
 };
 
-/*I_MPI_ADJUST_BARRIER 
+/*I_MPI_ADJUST_BARRIER
 
-MPI_Barrier 
+MPI_Barrier
 
-1. Dissemination algorithm 
-2. Recursive doubling algorithm 
-3. Topology aware dissemination algorithm 
-4. Topology aware recursive doubling algorithm 
-5. Binominal gather + scatter algorithm 
-6. Topology aware binominal gather + scatter algorithm 
+1. Dissemination algorithm
+2. Recursive doubling algorithm
+3. Topology aware dissemination algorithm
+4. Topology aware recursive doubling algorithm
+5. Binominal gather + scatter algorithm
+6. Topology aware binominal gather + scatter algorithm
 
 */
 static int intel_barrier_gather_scatter(MPI_Comm comm){
     //our default barrier performs a antibcast/bcast
-    smpi_mpi_barrier(comm);
+    Coll_barrier_default::barrier(comm);
     return MPI_SUCCESS;
 }
 
 int (*intel_barrier_functions_table[])(MPI_Comm comm) ={
-      smpi_coll_tuned_barrier_ompi_basic_linear,
-      smpi_coll_tuned_barrier_ompi_recursivedoubling,
-      smpi_coll_tuned_barrier_ompi_basic_linear,
-      smpi_coll_tuned_barrier_ompi_recursivedoubling,
+      Coll_barrier_ompi_basic_linear::barrier,
+      Coll_barrier_ompi_recursivedoubling::barrier,
+      Coll_barrier_ompi_basic_linear::barrier,
+      Coll_barrier_ompi_recursivedoubling::barrier,
       intel_barrier_gather_scatter,
       intel_barrier_gather_scatter
 };
@@ -783,31 +785,31 @@ intel_tuning_table_element intel_barrier_table[] =
 };
 
 
-/*I_MPI_ADJUST_BCAST 
+/*I_MPI_ADJUST_BCAST
 
-MPI_Bcast 
+MPI_Bcast
 
-1. Binomial algorithm 
-2. Recursive doubling algorithm 
-3. Ring algorithm 
-4. Topology aware binomial algorithm 
-5. Topology aware recursive doubling algorithm 
-6. Topology aware ring algorithm 
-7. Shumilin's bcast algorithm 
+1. Binomial algorithm
+2. Recursive doubling algorithm
+3. Ring algorithm
+4. Topology aware binomial algorithm
+5. Topology aware recursive doubling algorithm
+6. Topology aware ring algorithm
+7. Shumilin's bcast algorithm
 */
 
 int (*intel_bcast_functions_table[])(void *buff, int count,
                                           MPI_Datatype datatype, int root,
                                           MPI_Comm  comm) ={
-      smpi_coll_tuned_bcast_binomial_tree,
-      //smpi_coll_tuned_bcast_scatter_rdb_allgather,
-      smpi_coll_tuned_bcast_NTSL,
-      smpi_coll_tuned_bcast_NTSL,
-      smpi_coll_tuned_bcast_SMP_binomial,
-      //smpi_coll_tuned_bcast_scatter_rdb_allgather,
-      smpi_coll_tuned_bcast_NTSL,
-      smpi_coll_tuned_bcast_SMP_linear,
-      smpi_coll_tuned_bcast_mvapich2,//we don't know shumilin's algo'
+      Coll_bcast_binomial_tree::bcast,
+      //Coll_bcast_scatter_rdb_allgather::bcast,
+      Coll_bcast_NTSL::bcast,
+      Coll_bcast_NTSL::bcast,
+      Coll_bcast_SMP_binomial::bcast,
+      //Coll_bcast_scatter_rdb_allgather::bcast,
+      Coll_bcast_NTSL::bcast,
+      Coll_bcast_SMP_linear::bcast,
+      Coll_bcast_mvapich2::bcast,//we don't know shumilin's algo'
 };
 
 intel_tuning_table_element intel_bcast_table[] =
@@ -952,15 +954,15 @@ intel_tuning_table_element intel_bcast_table[] =
 };
 
 
-/*I_MPI_ADJUST_REDUCE 
+/*I_MPI_ADJUST_REDUCE
 
-MPI_Reduce 
+MPI_Reduce
 
-1. Shumilin's algorithm 
-2. Binomial algorithm 
-3. Topology aware Shumilin's algorithm 
-4. Topology aware binomial algorithm 
-5. Rabenseifner's algorithm 
+1. Shumilin's algorithm
+2. Binomial algorithm
+3. Topology aware Shumilin's algorithm
+4. Topology aware binomial algorithm
+5. Rabenseifner's algorithm
 6. Topology aware Rabenseifner's algorithm
 
 */
@@ -969,12 +971,12 @@ int (*intel_reduce_functions_table[])(void *sendbuf, void *recvbuf,
                                             int count, MPI_Datatype  datatype,
                                             MPI_Op   op, int root,
                                             MPI_Comm   comm) ={
-      smpi_coll_tuned_reduce_mvapich2,
-      smpi_coll_tuned_reduce_binomial,
-      smpi_coll_tuned_reduce_mvapich2,
-      smpi_coll_tuned_reduce_mvapich2_two_level,
-      smpi_coll_tuned_reduce_rab,
-      smpi_coll_tuned_reduce_rab
+      Coll_reduce_mvapich2::reduce,
+      Coll_reduce_binomial::reduce,
+      Coll_reduce_mvapich2::reduce,
+      Coll_reduce_mvapich2_two_level::reduce,
+      Coll_reduce_rab::reduce,
+      Coll_reduce_rab::reduce
 };
 
 intel_tuning_table_element intel_reduce_table[] =
@@ -1042,15 +1044,15 @@ intel_tuning_table_element intel_reduce_table[] =
   }
 };
 
-/* I_MPI_ADJUST_REDUCE_SCATTER 
+/* I_MPI_ADJUST_REDUCE_SCATTER
 
-MPI_Reduce_scatter 
+MPI_Reduce_scatter
 
-1. Recursive having algorithm 
-2. Pair wise exchange algorithm 
-3. Recursive doubling algorithm 
-4. Reduce + Scatterv algorithm 
-5. Topology aware Reduce + Scatterv algorithm 
+1. Recursive having algorithm
+2. Pair wise exchange algorithm
+3. Recursive doubling algorithm
+4. Reduce + Scatterv algorithm
+5. Topology aware Reduce + Scatterv algorithm
 
 */
 static  int intel_reduce_scatter_reduce_scatterv(void *sbuf, void *rbuf,
@@ -1059,7 +1061,7 @@ static  int intel_reduce_scatter_reduce_scatterv(void *sbuf, void *rbuf,
                                                     MPI_Op  op,
                                                     MPI_Comm  comm)
 {
-  smpi_mpi_reduce_scatter(sbuf, rbuf, rcounts,dtype, op,comm);
+  Coll_reduce_scatter_default::reduce_scatter(sbuf, rbuf, rcounts,dtype, op,comm);
   return MPI_SUCCESS;
 }
 
@@ -1069,10 +1071,10 @@ static  int  intel_reduce_scatter_recursivehalving(void *sbuf, void *rbuf,
                                                     MPI_Op  op,
                                                     MPI_Comm  comm)
 {
-  if(smpi_op_is_commute(op))
-    return smpi_coll_tuned_reduce_scatter_ompi_basic_recursivehalving(sbuf, rbuf, rcounts,dtype, op,comm);
+  if(op==MPI_OP_NULL || op->is_commutative())
+    return Coll_reduce_scatter_ompi_basic_recursivehalving::reduce_scatter(sbuf, rbuf, rcounts,dtype, op,comm);
   else
-    return smpi_coll_tuned_reduce_scatter_mvapich2(sbuf, rbuf, rcounts,dtype, op,comm);
+    return Coll_reduce_scatter_mvapich2::reduce_scatter(sbuf, rbuf, rcounts,dtype, op,comm);
 }
 
 int (*intel_reduce_scatter_functions_table[])( void *sbuf, void *rbuf,
@@ -1082,8 +1084,8 @@ int (*intel_reduce_scatter_functions_table[])( void *sbuf, void *rbuf,
                                                     MPI_Comm  comm
                                                     ) ={
       intel_reduce_scatter_recursivehalving,
-      smpi_coll_tuned_reduce_scatter_mpich_pair,
-      smpi_coll_tuned_reduce_scatter_mpich_rdb,
+      Coll_reduce_scatter_mpich_pair::reduce_scatter,
+      Coll_reduce_scatter_mpich_rdb::reduce_scatter,
       intel_reduce_scatter_reduce_scatterv,
       intel_reduce_scatter_reduce_scatterv
 };
@@ -1474,27 +1476,27 @@ intel_tuning_table_element intel_reduce_scatter_table[] =
   }
 };
 
-/* I_MPI_ADJUST_ALLGATHER 
+/* I_MPI_ADJUST_ALLGATHER
 
-MPI_Allgather 
+MPI_Allgather
 
-1. Recursive doubling algorithm 
-2. Bruck's algorithm 
-3. Ring algorithm 
-4. Topology aware Gatherv + Bcast algorithm 
+1. Recursive doubling algorithm
+2. Bruck's algorithm
+3. Ring algorithm
+4. Topology aware Gatherv + Bcast algorithm
 
 */
 
-int (*intel_allgather_functions_table[])(void *sbuf, int scount, 
+int (*intel_allgather_functions_table[])(void *sbuf, int scount,
                                               MPI_Datatype sdtype,
-                                              void* rbuf, int rcount, 
-                                              MPI_Datatype rdtype, 
+                                              void* rbuf, int rcount,
+                                              MPI_Datatype rdtype,
                                               MPI_Comm  comm
                                                     ) ={
-      smpi_coll_tuned_allgather_rdb,
-      smpi_coll_tuned_allgather_bruck,
-      smpi_coll_tuned_allgather_ring,
-      smpi_coll_tuned_allgather_GB
+      Coll_allgather_rdb::allgather,
+      Coll_allgather_bruck::allgather,
+      Coll_allgather_ring::allgather,
+      Coll_allgather_GB::allgather
 };
 
 intel_tuning_table_element intel_allgather_table[] =
@@ -1643,28 +1645,28 @@ intel_tuning_table_element intel_allgather_table[] =
   }
 };
 
-/* I_MPI_ADJUST_ALLGATHERV 
+/* I_MPI_ADJUST_ALLGATHERV
 
-MPI_Allgatherv 
+MPI_Allgatherv
 
-1. Recursive doubling algorithm 
-2. Bruck's algorithm 
-3. Ring algorithm 
-4. Topology aware Gatherv + Bcast algorithm 
+1. Recursive doubling algorithm
+2. Bruck's algorithm
+3. Ring algorithm
+4. Topology aware Gatherv + Bcast algorithm
 
 */
 
-int (*intel_allgatherv_functions_table[])(void *sbuf, int scount, 
+int (*intel_allgatherv_functions_table[])(void *sbuf, int scount,
                                                MPI_Datatype sdtype,
-                                               void* rbuf, int *rcounts, 
+                                               void* rbuf, int *rcounts,
                                                int *rdispls,
-                                               MPI_Datatype rdtype, 
+                                               MPI_Datatype rdtype,
                                                MPI_Comm  comm
                                                     ) ={
-      smpi_coll_tuned_allgatherv_mpich_rdb,
-      smpi_coll_tuned_allgatherv_ompi_bruck,
-      smpi_coll_tuned_allgatherv_ring,
-      smpi_coll_tuned_allgatherv_GB
+      Coll_allgatherv_mpich_rdb::allgatherv,
+      Coll_allgatherv_ompi_bruck::allgatherv,
+      Coll_allgatherv_ring::allgatherv,
+      Coll_allgatherv_GB::allgatherv
 };
 
 intel_tuning_table_element intel_allgatherv_table[] =
@@ -1859,22 +1861,22 @@ intel_tuning_table_element intel_allgatherv_table[] =
 
 MPI_Gather
 
-1. Binomial algorithm 
-2. Topology aware binomial algorithm 
+1. Binomial algorithm
+2. Topology aware binomial algorithm
 3. Shumilin's algorithm
 
 */
 
-int (*intel_gather_functions_table[])(void *sbuf, int scount, 
+int (*intel_gather_functions_table[])(void *sbuf, int scount,
                                            MPI_Datatype sdtype,
-                                           void* rbuf, int rcount, 
-                                           MPI_Datatype rdtype, 
+                                           void* rbuf, int rcount,
+                                           MPI_Datatype rdtype,
                                            int root,
                                            MPI_Comm  comm
                                                     ) ={
-      smpi_coll_tuned_gather_ompi_binomial,
-      smpi_coll_tuned_gather_ompi_binomial,
-      smpi_coll_tuned_gather_mvapich2
+      Coll_gather_ompi_binomial::gather,
+      Coll_gather_ompi_binomial::gather,
+      Coll_gather_mvapich2::gather
 };
 
 intel_tuning_table_element intel_gather_table[] =
@@ -1959,25 +1961,25 @@ intel_tuning_table_element intel_gather_table[] =
 };
 
 
-/* I_MPI_ADJUST_SCATTER 
+/* I_MPI_ADJUST_SCATTER
 
-MPI_Scatter 
+MPI_Scatter
 
-1. Binomial algorithm 
-2. Topology aware binomial algorithm 
-3. Shumilin's algorithm 
+1. Binomial algorithm
+2. Topology aware binomial algorithm
+3. Shumilin's algorithm
 
 */
 
-int (*intel_scatter_functions_table[])(void *sbuf, int scount, 
+int (*intel_scatter_functions_table[])(void *sbuf, int scount,
                                             MPI_Datatype sdtype,
-                                            void* rbuf, int rcount, 
-                                            MPI_Datatype rdtype, 
+                                            void* rbuf, int rcount,
+                                            MPI_Datatype rdtype,
                                             int root, MPI_Comm  comm
                                                     ) ={
-      smpi_coll_tuned_scatter_ompi_binomial,
-      smpi_coll_tuned_scatter_ompi_binomial,
-      smpi_coll_tuned_scatter_mvapich2
+      Coll_scatter_ompi_binomial::scatter,
+      Coll_scatter_ompi_binomial::scatter,
+      Coll_scatter_mvapich2::scatter
 };
 
 intel_tuning_table_element intel_scatter_table[] =
@@ -2134,12 +2136,12 @@ intel_tuning_table_element intel_scatter_table[] =
 
 
 
-/* I_MPI_ADJUST_ALLTOALLV 
+/* I_MPI_ADJUST_ALLTOALLV
 
-MPI_Alltoallv 
+MPI_Alltoallv
 
-1. Isend/Irecv + waitall algorithm 
-2. Plum's algorithm 
+1. Isend/Irecv + waitall algorithm
+2. Plum's algorithm
 
 */
 
@@ -2149,8 +2151,8 @@ int (*intel_alltoallv_functions_table[])(void *sbuf, int *scounts, int *sdisps,
                                               MPI_Datatype rdtype,
                                               MPI_Comm  comm
                                                     ) ={
-      smpi_coll_tuned_alltoallv_ompi_basic_linear,
-      smpi_coll_tuned_alltoallv_bruck
+      Coll_alltoallv_ompi_basic_linear::alltoallv,
+      Coll_alltoallv_bruck::alltoallv
 };
 
 intel_tuning_table_element intel_alltoallv_table[] =
@@ -2177,7 +2179,7 @@ intel_tuning_table_element intel_alltoallv_table[] =
     }
     },
     { 2147483647,2,{
-      {4,1},//0 again 
+      {4,1},//0 again
       {2147483647,2}
     }
     }
@@ -2210,70 +2212,70 @@ intel_tuning_table_element intel_alltoallv_table[] =
 };
 
 
-//These are collected from table 3.5-2 of the Intel MPI Reference Manual 
+//These are collected from table 3.5-2 of the Intel MPI Reference Manual
+
 
-    
 #define SIZECOMP_reduce_scatter\
     int total_message_size = 0;\
     for (i = 0; i < comm_size; i++) { \
         total_message_size += rcounts[i];\
     }\
-    size_t block_dsize = total_message_size*smpi_datatype_size(dtype);\
-    
+    size_t block_dsize = total_message_size*dtype->size();\
+
 #define SIZECOMP_allreduce\
-  size_t block_dsize =rcount * smpi_datatype_size(dtype);
-  
+  size_t block_dsize =rcount * dtype->size();
+
 #define SIZECOMP_alltoall\
-  size_t block_dsize =send_count * smpi_datatype_size(send_type);
+  size_t block_dsize =send_count * send_type->size();
 
 #define SIZECOMP_bcast\
-  size_t block_dsize =count * smpi_datatype_size(datatype);
+  size_t block_dsize =count * datatype->size();
 
 #define SIZECOMP_reduce\
-  size_t block_dsize =count * smpi_datatype_size(datatype);
+  size_t block_dsize =count * datatype->size();
 
 #define SIZECOMP_barrier\
   size_t block_dsize = 1;
 
 #define SIZECOMP_allgather\
-  size_t block_dsize =recv_count * smpi_datatype_size(recv_type);
+  size_t block_dsize =recv_count * recv_type->size();
 
 #define SIZECOMP_allgatherv\
     int total_message_size = 0;\
     for (i = 0; i < comm_size; i++) { \
         total_message_size += recv_count[i];\
     }\
-    size_t block_dsize = total_message_size*smpi_datatype_size(recv_type);
-    
+    size_t block_dsize = total_message_size*recv_type->size();
+
 #define SIZECOMP_gather\
-  int rank = smpi_comm_rank(comm);\
+  int rank = comm->rank();\
   size_t block_dsize = (send_buff == MPI_IN_PLACE || rank ==root) ?\
-                recv_count * smpi_datatype_size(recv_type) :\
-                send_count * smpi_datatype_size(send_type);
+                recv_count * recv_type->size() :\
+                send_count * send_type->size();
 
 #define SIZECOMP_scatter\
-  int rank = smpi_comm_rank(comm);\
+  int rank = comm->rank();\
   size_t block_dsize = (sendbuf == MPI_IN_PLACE || rank !=root ) ?\
-                recvcount * smpi_datatype_size(recvtype) :\
-                sendcount * smpi_datatype_size(sendtype);
+                recvcount * recvtype->size() :\
+                sendcount * sendtype->size();
 
 #define SIZECOMP_alltoallv\
   size_t block_dsize = 1;
-  
+
 #define IMPI_COLL_SELECT(cat, ret, args, args2)\
-ret smpi_coll_tuned_ ## cat ## _impi (COLL_UNPAREN args)\
+ret Coll_ ## cat ## _impi:: cat (COLL_UNPAREN args)\
 {\
-    int comm_size = smpi_comm_size(comm);\
+    int comm_size = comm->size();\
     int i =0;\
     SIZECOMP_ ## cat\
     i=0;\
     int j =0, k=0;\
-    if(smpi_comm_get_leaders_comm(comm)==MPI_COMM_NULL){\
-      smpi_comm_init_smp(comm);\
+    if(comm->get_leaders_comm()==MPI_COMM_NULL){\
+      comm->init_smp();\
     }\
     int local_size=1;\
-    if (smpi_comm_is_uniform(comm)) {\
-        local_size = smpi_comm_size(smpi_comm_get_intra_comm(comm));\
+    if (comm->is_uniform()) {\
+        local_size = comm->get_intra_comm()->size();\
     }\
     while(i < INTEL_MAX_NB_PPN &&\
     local_size!=intel_ ## cat ## _table[i].ppn)\
@@ -2289,6 +2291,7 @@ ret smpi_coll_tuned_ ## cat ## _impi (COLL_UNPAREN args)\
     args2);\
 }
 
+
 COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLGATHERV_SIG, (send_buff, send_count, send_type, recv_buff, recv_count, recv_disps, recv_type, comm));
 COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLREDUCE_SIG, (sbuf, rbuf, rcount, dtype, op, comm));
 COLL_APPLY(IMPI_COLL_SELECT, COLL_GATHER_SIG, (send_buff, send_count, send_type, recv_buff, recv_count, recv_type, root, comm));
@@ -2301,3 +2304,5 @@ COLL_APPLY(IMPI_COLL_SELECT, COLL_REDUCE_SCATTER_SIG ,(sbuf,rbuf, rcounts,dtype,
 COLL_APPLY(IMPI_COLL_SELECT, COLL_SCATTER_SIG ,(sendbuf, sendcount, sendtype,recvbuf, recvcount, recvtype,root, comm));
 COLL_APPLY(IMPI_COLL_SELECT, COLL_BARRIER_SIG,(comm));
 
+}
+}