Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Add new entry in Release_Notes.
[simgrid.git] / src / smpi / colls / smpi_intel_mpi_selector.cpp
index 5166533..1c053c2 100644 (file)
@@ -1,6 +1,6 @@
 /* selector for collective algorithms based on openmpi's default coll_tuned_decision_fixed selector */
 
-/* Copyright (c) 2009-2019. The SimGrid Team.
+/* Copyright (c) 2009-2023. The SimGrid Team.
  * All rights reserved.                                                     */
 
 /* This program is free software; you can redistribute it and/or modify it
@@ -47,22 +47,21 @@ MPI_Allreduce
   as Shumilin's ring algorithm is unknown, default to ring'
 */
 
-namespace simgrid{
-namespace smpi{
+namespace simgrid::smpi {
 
-int (*intel_allreduce_functions_table[])(void *sendbuf,
+int (*intel_allreduce_functions_table[])(const void *sendbuf,
       void *recvbuf,
       int count,
       MPI_Datatype datatype,
       MPI_Op op, MPI_Comm comm) ={
-      Coll_allreduce_rdb::allreduce,
-      Coll_allreduce_rab1::allreduce,
-      Coll_allreduce_redbcast::allreduce,
-      Coll_allreduce_mvapich2_two_level::allreduce,
-      Coll_allreduce_smp_binomial::allreduce,
-      Coll_allreduce_mvapich2_two_level::allreduce,
-      Coll_allreduce_ompi_ring_segmented::allreduce,
-      Coll_allreduce_ompi_ring_segmented::allreduce
+      allreduce__rdb,
+      allreduce__rab1,
+      allreduce__redbcast,
+      allreduce__mvapich2_two_level,
+      allreduce__smp_binomial,
+      allreduce__mvapich2_two_level,
+      allreduce__ompi_ring_segmented,
+      allreduce__ompi_ring_segmented
 };
 
 intel_tuning_table_element intel_allreduce_table[] =
@@ -634,15 +633,15 @@ intel_tuning_table_element intel_alltoall_table[] =
   }
   }
 };
-int (*intel_alltoall_functions_table[])(void *sbuf, int scount,
+int (*intel_alltoall_functions_table[])(const void *sbuf, int scount,
                                              MPI_Datatype sdtype,
                                              void* rbuf, int rcount,
                                              MPI_Datatype rdtype,
                                              MPI_Comm comm) ={
-      Coll_alltoall_bruck::alltoall,
-      Coll_alltoall_mvapich2_scatter_dest::alltoall,
-      Coll_alltoall_pair::alltoall,
-      Coll_alltoall_mvapich2::alltoall//Plum is proprietary ? (and super efficient)
+      alltoall__bruck,
+      alltoall__mvapich2_scatter_dest,
+      alltoall__pair,
+      alltoall__mvapich2//Plum is proprietary ? (and super efficient)
 };
 
 /*I_MPI_ADJUST_BARRIER
@@ -658,16 +657,16 @@ MPI_Barrier
 
 */
 static int intel_barrier_gather_scatter(MPI_Comm comm){
-    //our default barrier performs a antibcast/bcast
-    Coll_barrier_default::barrier(comm);
-    return MPI_SUCCESS;
+  // our default barrier performs an antibcast/bcast
+  barrier__default(comm);
+  return MPI_SUCCESS;
 }
 
 int (*intel_barrier_functions_table[])(MPI_Comm comm) ={
-      Coll_barrier_ompi_basic_linear::barrier,
-      Coll_barrier_ompi_recursivedoubling::barrier,
-      Coll_barrier_ompi_basic_linear::barrier,
-      Coll_barrier_ompi_recursivedoubling::barrier,
+      barrier__ompi_basic_linear,
+      barrier__ompi_recursivedoubling,
+      barrier__ompi_basic_linear,
+      barrier__ompi_recursivedoubling,
       intel_barrier_gather_scatter,
       intel_barrier_gather_scatter
 };
@@ -801,15 +800,15 @@ MPI_Bcast
 int (*intel_bcast_functions_table[])(void *buff, int count,
                                           MPI_Datatype datatype, int root,
                                           MPI_Comm  comm) ={
-      Coll_bcast_binomial_tree::bcast,
-      //Coll_bcast_scatter_rdb_allgather::bcast,
-      Coll_bcast_NTSL::bcast,
-      Coll_bcast_NTSL::bcast,
-      Coll_bcast_SMP_binomial::bcast,
-      //Coll_bcast_scatter_rdb_allgather::bcast,
-      Coll_bcast_NTSL::bcast,
-      Coll_bcast_SMP_linear::bcast,
-      Coll_bcast_mvapich2::bcast,//we don't know shumilin's algo'
+      bcast__binomial_tree,
+      //bcast__scatter_rdb_allgather,
+      bcast__NTSL,
+      bcast__NTSL,
+      bcast__SMP_binomial,
+      //bcast__scatter_rdb_allgather,
+      bcast__NTSL,
+      bcast__SMP_linear,
+      bcast__mvapich2,//we don't know shumilin's algo'
 };
 
 intel_tuning_table_element intel_bcast_table[] =
@@ -967,16 +966,16 @@ MPI_Reduce
 
 */
 
-int (*intel_reduce_functions_table[])(void *sendbuf, void *recvbuf,
+int (*intel_reduce_functions_table[])(const void *sendbuf, void *recvbuf,
                                             int count, MPI_Datatype  datatype,
                                             MPI_Op   op, int root,
                                             MPI_Comm   comm) ={
-      Coll_reduce_mvapich2::reduce,
-      Coll_reduce_binomial::reduce,
-      Coll_reduce_mvapich2::reduce,
-      Coll_reduce_mvapich2_two_level::reduce,
-      Coll_reduce_rab::reduce,
-      Coll_reduce_rab::reduce
+      reduce__mvapich2,
+      reduce__binomial,
+      reduce__mvapich2,
+      reduce__mvapich2_two_level,
+      reduce__rab,
+      reduce__rab
 };
 
 intel_tuning_table_element intel_reduce_table[] =
@@ -1055,37 +1054,37 @@ MPI_Reduce_scatter
 5. Topology aware Reduce + Scatterv algorithm
 
 */
-static  int intel_reduce_scatter_reduce_scatterv(void *sbuf, void *rbuf,
-                                                    int *rcounts,
+static  int intel_reduce_scatter_reduce_scatterv(const void *sbuf, void *rbuf,
+                                                    const int *rcounts,
                                                     MPI_Datatype dtype,
                                                     MPI_Op  op,
                                                     MPI_Comm  comm)
 {
-  Coll_reduce_scatter_default::reduce_scatter(sbuf, rbuf, rcounts,dtype, op,comm);
+  reduce_scatter__default(sbuf, rbuf, rcounts,dtype, op,comm);
   return MPI_SUCCESS;
 }
 
-static  int  intel_reduce_scatter_recursivehalving(void *sbuf, void *rbuf,
-                                                    int *rcounts,
+static  int  intel_reduce_scatter_recursivehalving(const void *sbuf, void *rbuf,
+                                                    const int *rcounts,
                                                     MPI_Datatype dtype,
                                                     MPI_Op  op,
                                                     MPI_Comm  comm)
 {
   if(op==MPI_OP_NULL || op->is_commutative())
-    return Coll_reduce_scatter_ompi_basic_recursivehalving::reduce_scatter(sbuf, rbuf, rcounts,dtype, op,comm);
+    return reduce_scatter__ompi_basic_recursivehalving(sbuf, rbuf, rcounts,dtype, op,comm);
   else
-    return Coll_reduce_scatter_mvapich2::reduce_scatter(sbuf, rbuf, rcounts,dtype, op,comm);
+    return reduce_scatter__mvapich2(sbuf, rbuf, rcounts,dtype, op,comm);
 }
 
-int (*intel_reduce_scatter_functions_table[])( void *sbuf, void *rbuf,
-                                                    int *rcounts,
+int (*intel_reduce_scatter_functions_table[])( const void *sbuf, void *rbuf,
+                                                    const int *rcounts,
                                                     MPI_Datatype dtype,
                                                     MPI_Op  op,
                                                     MPI_Comm  comm
                                                     ) ={
       intel_reduce_scatter_recursivehalving,
-      Coll_reduce_scatter_mpich_pair::reduce_scatter,
-      Coll_reduce_scatter_mpich_rdb::reduce_scatter,
+      reduce_scatter__mpich_pair,
+      reduce_scatter__mpich_rdb,
       intel_reduce_scatter_reduce_scatterv,
       intel_reduce_scatter_reduce_scatterv
 };
@@ -1487,16 +1486,16 @@ MPI_Allgather
 
 */
 
-int (*intel_allgather_functions_table[])(void *sbuf, int scount,
+int (*intel_allgather_functions_table[])(const void *sbuf, int scount,
                                               MPI_Datatype sdtype,
                                               void* rbuf, int rcount,
                                               MPI_Datatype rdtype,
                                               MPI_Comm  comm
                                                     ) ={
-      Coll_allgather_rdb::allgather,
-      Coll_allgather_bruck::allgather,
-      Coll_allgather_ring::allgather,
-      Coll_allgather_GB::allgather
+      allgather__rdb,
+      allgather__bruck,
+      allgather__ring,
+      allgather__GB
 };
 
 intel_tuning_table_element intel_allgather_table[] =
@@ -1656,17 +1655,17 @@ MPI_Allgatherv
 
 */
 
-int (*intel_allgatherv_functions_table[])(void *sbuf, int scount,
+int (*intel_allgatherv_functions_table[])(const void *sbuf, int scount,
                                                MPI_Datatype sdtype,
-                                               void* rbuf, int *rcounts,
-                                               int *rdispls,
+                                               void* rbuf, const int *rcounts,
+                                               const int *rdispls,
                                                MPI_Datatype rdtype,
                                                MPI_Comm  comm
                                                     ) ={
-      Coll_allgatherv_mpich_rdb::allgatherv,
-      Coll_allgatherv_ompi_bruck::allgatherv,
-      Coll_allgatherv_ring::allgatherv,
-      Coll_allgatherv_GB::allgatherv
+      allgatherv__mpich_rdb,
+      allgatherv__ompi_bruck,
+      allgatherv__ring,
+      allgatherv__GB
 };
 
 intel_tuning_table_element intel_allgatherv_table[] =
@@ -1867,16 +1866,16 @@ MPI_Gather
 
 */
 
-int (*intel_gather_functions_table[])(void *sbuf, int scount,
+int (*intel_gather_functions_table[])(const void *sbuf, int scount,
                                            MPI_Datatype sdtype,
                                            void* rbuf, int rcount,
                                            MPI_Datatype rdtype,
                                            int root,
                                            MPI_Comm  comm
                                                     ) ={
-      Coll_gather_ompi_binomial::gather,
-      Coll_gather_ompi_binomial::gather,
-      Coll_gather_mvapich2::gather
+      gather__ompi_binomial,
+      gather__ompi_binomial,
+      gather__mvapich2
 };
 
 intel_tuning_table_element intel_gather_table[] =
@@ -1971,15 +1970,15 @@ MPI_Scatter
 
 */
 
-int (*intel_scatter_functions_table[])(void *sbuf, int scount,
+int (*intel_scatter_functions_table[])(const void *sbuf, int scount,
                                             MPI_Datatype sdtype,
                                             void* rbuf, int rcount,
                                             MPI_Datatype rdtype,
                                             int root, MPI_Comm  comm
                                                     ) ={
-      Coll_scatter_ompi_binomial::scatter,
-      Coll_scatter_ompi_binomial::scatter,
-      Coll_scatter_mvapich2::scatter
+      scatter__ompi_binomial,
+      scatter__ompi_binomial,
+      scatter__mvapich2
 };
 
 intel_tuning_table_element intel_scatter_table[] =
@@ -2145,14 +2144,14 @@ MPI_Alltoallv
 
 */
 
-int (*intel_alltoallv_functions_table[])(void *sbuf, int *scounts, int *sdisps,
+int (*intel_alltoallv_functions_table[])(const void *sbuf, const int *scounts, const int *sdisps,
                                               MPI_Datatype sdtype,
-                                              void *rbuf, int *rcounts, int *rdisps,
+                                              void *rbuf, const int *rcounts, const int *rdisps,
                                               MPI_Datatype rdtype,
                                               MPI_Comm  comm
                                                     ) ={
-      Coll_alltoallv_ompi_basic_linear::alltoallv,
-      Coll_alltoallv_bruck::alltoallv
+      alltoallv__ompi_basic_linear,
+      alltoallv__bruck
 };
 
 intel_tuning_table_element intel_alltoallv_table[] =
@@ -2262,47 +2261,44 @@ intel_tuning_table_element intel_alltoallv_table[] =
 #define SIZECOMP_alltoallv\
   size_t block_dsize = 1;
 
-#define IMPI_COLL_SELECT(cat, ret, args, args2)\
-ret Coll_ ## cat ## _impi:: cat (COLL_UNPAREN args)\
-{\
-    int comm_size = comm->size();\
-    int i =0;\
-    SIZECOMP_ ## cat\
-    i=0;\
-    int j =0, k=0;\
-    if(comm->get_leaders_comm()==MPI_COMM_NULL){\
-      comm->init_smp();\
-    }\
-    int local_size=1;\
-    if (comm->is_uniform()) {\
-        local_size = comm->get_intra_comm()->size();\
-    }\
-    while(i < INTEL_MAX_NB_PPN &&\
-    local_size!=intel_ ## cat ## _table[i].ppn)\
-      i++;\
-    if(i==INTEL_MAX_NB_PPN) i=0;\
-    while(comm_size>intel_ ## cat ## _table[i].elems[j].max_num_proc\
-        && j < INTEL_MAX_NB_THRESHOLDS)\
-      j++;\
-    while(block_dsize >=intel_ ## cat ## _table[i].elems[j].elems[k].max_size\
-         && k< intel_ ## cat ## _table[i].elems[j].num_elems)\
-      k++;\
-    return (intel_ ## cat ## _functions_table[intel_ ## cat ## _table[i].elems[j].elems[k].algo-1]\
-    args2);\
-}
-
-
-COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLGATHERV_SIG, (send_buff, send_count, send_type, recv_buff, recv_count, recv_disps, recv_type, comm));
-COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLREDUCE_SIG, (sbuf, rbuf, rcount, dtype, op, comm));
-COLL_APPLY(IMPI_COLL_SELECT, COLL_GATHER_SIG, (send_buff, send_count, send_type, recv_buff, recv_count, recv_type, root, comm));
-COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLGATHER_SIG, (send_buff,send_count,send_type,recv_buff,recv_count,recv_type,comm));
-COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLTOALL_SIG,(send_buff, send_count, send_type, recv_buff, recv_count, recv_type,comm));
-COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLTOALLV_SIG, (send_buff, send_counts, send_disps, send_type, recv_buff, recv_counts, recv_disps, recv_type, comm));
-COLL_APPLY(IMPI_COLL_SELECT, COLL_BCAST_SIG , (buf, count, datatype, root, comm));
-COLL_APPLY(IMPI_COLL_SELECT, COLL_REDUCE_SIG,(buf,rbuf, count, datatype, op, root, comm));
-COLL_APPLY(IMPI_COLL_SELECT, COLL_REDUCE_SCATTER_SIG ,(sbuf,rbuf, rcounts,dtype,op,comm));
-COLL_APPLY(IMPI_COLL_SELECT, COLL_SCATTER_SIG ,(sendbuf, sendcount, sendtype,recvbuf, recvcount, recvtype,root, comm));
-COLL_APPLY(IMPI_COLL_SELECT, COLL_BARRIER_SIG,(comm));
-
-}
-}
+#define IMPI_COLL_SELECT(cat, ret, args, args2)                                                                        \
+  ret _XBT_CONCAT2(cat, __impi)(COLL_UNPAREN args)                                                          \
+  {                                                                                                                    \
+    int comm_size = comm->size();                                                                                      \
+    int i         = 0;                                                                                                 \
+    _XBT_CONCAT(SIZECOMP_, cat)                                                                                        \
+    i     = 0;                                                                                                         \
+    int j = 0, k = 0;                                                                                                  \
+    if (comm->get_leaders_comm() == MPI_COMM_NULL) {                                                                   \
+      comm->init_smp();                                                                                                \
+    }                                                                                                                  \
+    int local_size = 1;                                                                                                \
+    if (comm->is_uniform()) {                                                                                          \
+      local_size = comm->get_intra_comm()->size();                                                                     \
+    }                                                                                                                  \
+    while (i < INTEL_MAX_NB_PPN && local_size != _XBT_CONCAT3(intel_, cat, _table)[i].ppn)                             \
+      i++;                                                                                                             \
+    if (i == INTEL_MAX_NB_PPN)                                                                                         \
+      i = 0;                                                                                                           \
+    while (comm_size > _XBT_CONCAT3(intel_, cat, _table)[i].elems[j].max_num_proc && j < INTEL_MAX_NB_THRESHOLDS)      \
+      j++;                                                                                                             \
+    while (block_dsize >= _XBT_CONCAT3(intel_, cat, _table)[i].elems[j].elems[k].max_size &&                           \
+           k < _XBT_CONCAT3(intel_, cat, _table)[i].elems[j].num_elems)                                                \
+      k++;                                                                                                             \
+    return (_XBT_CONCAT3(intel_, cat,                                                                                  \
+                         _functions_table)[_XBT_CONCAT3(intel_, cat, _table)[i].elems[j].elems[k].algo - 1] args2);    \
+  }
+
+COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLGATHERV_SIG, (send_buff, send_count, send_type, recv_buff, recv_count, recv_disps, recv_type, comm))
+COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLREDUCE_SIG, (sbuf, rbuf, rcount, dtype, op, comm))
+COLL_APPLY(IMPI_COLL_SELECT, COLL_GATHER_SIG, (send_buff, send_count, send_type, recv_buff, recv_count, recv_type, root, comm))
+COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLGATHER_SIG, (send_buff,send_count,send_type,recv_buff,recv_count,recv_type,comm))
+COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLTOALL_SIG,(send_buff, send_count, send_type, recv_buff, recv_count, recv_type,comm))
+COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLTOALLV_SIG, (send_buff, send_counts, send_disps, send_type, recv_buff, recv_counts, recv_disps, recv_type, comm))
+COLL_APPLY(IMPI_COLL_SELECT, COLL_BCAST_SIG , (buf, count, datatype, root, comm))
+COLL_APPLY(IMPI_COLL_SELECT, COLL_REDUCE_SIG,(buf,rbuf, count, datatype, op, root, comm))
+COLL_APPLY(IMPI_COLL_SELECT, COLL_REDUCE_SCATTER_SIG ,(sbuf,rbuf, rcounts,dtype,op,comm))
+COLL_APPLY(IMPI_COLL_SELECT, COLL_SCATTER_SIG ,(sendbuf, sendcount, sendtype,recvbuf, recvcount, recvtype,root, comm))
+COLL_APPLY(IMPI_COLL_SELECT, COLL_BARRIER_SIG,(comm))
+
+} // namespace simgrid::smpi