Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Add new entry in Release_Notes.
[simgrid.git] / src / smpi / colls / smpi_intel_mpi_selector.cpp
index 6bec031..1c053c2 100644 (file)
@@ -1,12 +1,12 @@
 /* selector for collective algorithms based on openmpi's default coll_tuned_decision_fixed selector */
 
-/* Copyright (c) 2009-2010, 2013-2017. The SimGrid Team.
+/* Copyright (c) 2009-2023. The SimGrid Team.
  * All rights reserved.                                                     */
 
 /* This program is free software; you can redistribute it and/or modify it
  * under the terms of the license (GNU LGPL) which comes with this package. */
 
-#include "colls_private.h"
+#include "colls_private.hpp"
 
 // This selector is based on information gathered on the Stampede cluster, with Intel MPI 4.1.3.049, and from the intel reference manual. The data was gathered launching runs with 1,2,4,8,16 processes per node.
 
 #define INTEL_MAX_NB_NUMPROCS  12
 #define INTEL_MAX_NB_PPN  5  /* 1 2 4 8 16 ppn */
 
-typedef struct {
+struct intel_tuning_table_size_element {
   unsigned int max_size;
   int algo;
-} intel_tuning_table_size_element;
+};
 
-typedef struct {
+struct intel_tuning_table_numproc_element {
   int max_num_proc;
   int num_elems;
   intel_tuning_table_size_element elems[INTEL_MAX_NB_THRESHOLDS];
-} intel_tuning_table_numproc_element;
+};
 
-typedef struct {
+struct intel_tuning_table_element {
   int ppn;
   intel_tuning_table_numproc_element elems[INTEL_MAX_NB_NUMPROCS];
-} intel_tuning_table_element;
+};
 
 /*
 I_MPI_ADJUST_ALLREDUCE
@@ -47,22 +47,21 @@ MPI_Allreduce
   as Shumilin's ring algorithm is unknown, default to ring'
 */
 
-namespace simgrid{
-namespace smpi{
+namespace simgrid::smpi {
 
-int (*intel_allreduce_functions_table[])(void *sendbuf,
+int (*intel_allreduce_functions_table[])(const void *sendbuf,
       void *recvbuf,
       int count,
       MPI_Datatype datatype,
       MPI_Op op, MPI_Comm comm) ={
-      Coll_allreduce_rdb::allreduce,
-      Coll_allreduce_rab1::allreduce,
-      Coll_allreduce_redbcast::allreduce,
-      Coll_allreduce_mvapich2_two_level::allreduce,
-      Coll_allreduce_smp_binomial::allreduce,
-      Coll_allreduce_mvapich2_two_level::allreduce,
-      Coll_allreduce_ompi_ring_segmented::allreduce,
-      Coll_allreduce_ompi_ring_segmented::allreduce
+      allreduce__rdb,
+      allreduce__rab1,
+      allreduce__redbcast,
+      allreduce__mvapich2_two_level,
+      allreduce__smp_binomial,
+      allreduce__mvapich2_two_level,
+      allreduce__ompi_ring_segmented,
+      allreduce__ompi_ring_segmented
 };
 
 intel_tuning_table_element intel_allreduce_table[] =
@@ -634,15 +633,15 @@ intel_tuning_table_element intel_alltoall_table[] =
   }
   }
 };
-int (*intel_alltoall_functions_table[])(void *sbuf, int scount,
+int (*intel_alltoall_functions_table[])(const void *sbuf, int scount,
                                              MPI_Datatype sdtype,
                                              void* rbuf, int rcount,
                                              MPI_Datatype rdtype,
                                              MPI_Comm comm) ={
-      Coll_alltoall_bruck::alltoall,
-      Coll_alltoall_mvapich2_scatter_dest::alltoall,
-      Coll_alltoall_pair::alltoall,
-      Coll_alltoall_mvapich2::alltoall//Plum is proprietary ? (and super efficient)
+      alltoall__bruck,
+      alltoall__mvapich2_scatter_dest,
+      alltoall__pair,
+      alltoall__mvapich2//Plum is proprietary ? (and super efficient)
 };
 
 /*I_MPI_ADJUST_BARRIER
@@ -658,16 +657,16 @@ MPI_Barrier
 
 */
 static int intel_barrier_gather_scatter(MPI_Comm comm){
-    //our default barrier performs a antibcast/bcast
-    Coll_barrier_default::barrier(comm);
-    return MPI_SUCCESS;
+  // our default barrier performs an antibcast/bcast
+  barrier__default(comm);
+  return MPI_SUCCESS;
 }
 
 int (*intel_barrier_functions_table[])(MPI_Comm comm) ={
-      Coll_barrier_ompi_basic_linear::barrier,
-      Coll_barrier_ompi_recursivedoubling::barrier,
-      Coll_barrier_ompi_basic_linear::barrier,
-      Coll_barrier_ompi_recursivedoubling::barrier,
+      barrier__ompi_basic_linear,
+      barrier__ompi_recursivedoubling,
+      barrier__ompi_basic_linear,
+      barrier__ompi_recursivedoubling,
       intel_barrier_gather_scatter,
       intel_barrier_gather_scatter
 };
@@ -801,15 +800,15 @@ MPI_Bcast
 int (*intel_bcast_functions_table[])(void *buff, int count,
                                           MPI_Datatype datatype, int root,
                                           MPI_Comm  comm) ={
-      Coll_bcast_binomial_tree::bcast,
-      //Coll_bcast_scatter_rdb_allgather::bcast,
-      Coll_bcast_NTSL::bcast,
-      Coll_bcast_NTSL::bcast,
-      Coll_bcast_SMP_binomial::bcast,
-      //Coll_bcast_scatter_rdb_allgather::bcast,
-      Coll_bcast_NTSL::bcast,
-      Coll_bcast_SMP_linear::bcast,
-      Coll_bcast_mvapich2::bcast,//we don't know shumilin's algo'
+      bcast__binomial_tree,
+      //bcast__scatter_rdb_allgather,
+      bcast__NTSL,
+      bcast__NTSL,
+      bcast__SMP_binomial,
+      //bcast__scatter_rdb_allgather,
+      bcast__NTSL,
+      bcast__SMP_linear,
+      bcast__mvapich2,//we don't know shumilin's algo'
 };
 
 intel_tuning_table_element intel_bcast_table[] =
@@ -967,16 +966,16 @@ MPI_Reduce
 
 */
 
-int (*intel_reduce_functions_table[])(void *sendbuf, void *recvbuf,
+int (*intel_reduce_functions_table[])(const void *sendbuf, void *recvbuf,
                                             int count, MPI_Datatype  datatype,
                                             MPI_Op   op, int root,
                                             MPI_Comm   comm) ={
-      Coll_reduce_mvapich2::reduce,
-      Coll_reduce_binomial::reduce,
-      Coll_reduce_mvapich2::reduce,
-      Coll_reduce_mvapich2_two_level::reduce,
-      Coll_reduce_rab::reduce,
-      Coll_reduce_rab::reduce
+      reduce__mvapich2,
+      reduce__binomial,
+      reduce__mvapich2,
+      reduce__mvapich2_two_level,
+      reduce__rab,
+      reduce__rab
 };
 
 intel_tuning_table_element intel_reduce_table[] =
@@ -1055,37 +1054,37 @@ MPI_Reduce_scatter
 5. Topology aware Reduce + Scatterv algorithm
 
 */
-static  int intel_reduce_scatter_reduce_scatterv(void *sbuf, void *rbuf,
-                                                    int *rcounts,
+static  int intel_reduce_scatter_reduce_scatterv(const void *sbuf, void *rbuf,
+                                                    const int *rcounts,
                                                     MPI_Datatype dtype,
                                                     MPI_Op  op,
                                                     MPI_Comm  comm)
 {
-  Coll_reduce_scatter_default::reduce_scatter(sbuf, rbuf, rcounts,dtype, op,comm);
+  reduce_scatter__default(sbuf, rbuf, rcounts,dtype, op,comm);
   return MPI_SUCCESS;
 }
 
-static  int  intel_reduce_scatter_recursivehalving(void *sbuf, void *rbuf,
-                                                    int *rcounts,
+static  int  intel_reduce_scatter_recursivehalving(const void *sbuf, void *rbuf,
+                                                    const int *rcounts,
                                                     MPI_Datatype dtype,
                                                     MPI_Op  op,
                                                     MPI_Comm  comm)
 {
   if(op==MPI_OP_NULL || op->is_commutative())
-    return Coll_reduce_scatter_ompi_basic_recursivehalving::reduce_scatter(sbuf, rbuf, rcounts,dtype, op,comm);
+    return reduce_scatter__ompi_basic_recursivehalving(sbuf, rbuf, rcounts,dtype, op,comm);
   else
-    return Coll_reduce_scatter_mvapich2::reduce_scatter(sbuf, rbuf, rcounts,dtype, op,comm);
+    return reduce_scatter__mvapich2(sbuf, rbuf, rcounts,dtype, op,comm);
 }
 
-int (*intel_reduce_scatter_functions_table[])( void *sbuf, void *rbuf,
-                                                    int *rcounts,
+int (*intel_reduce_scatter_functions_table[])( const void *sbuf, void *rbuf,
+                                                    const int *rcounts,
                                                     MPI_Datatype dtype,
                                                     MPI_Op  op,
                                                     MPI_Comm  comm
                                                     ) ={
       intel_reduce_scatter_recursivehalving,
-      Coll_reduce_scatter_mpich_pair::reduce_scatter,
-      Coll_reduce_scatter_mpich_rdb::reduce_scatter,
+      reduce_scatter__mpich_pair,
+      reduce_scatter__mpich_rdb,
       intel_reduce_scatter_reduce_scatterv,
       intel_reduce_scatter_reduce_scatterv
 };
@@ -1487,16 +1486,16 @@ MPI_Allgather
 
 */
 
-int (*intel_allgather_functions_table[])(void *sbuf, int scount,
+int (*intel_allgather_functions_table[])(const void *sbuf, int scount,
                                               MPI_Datatype sdtype,
                                               void* rbuf, int rcount,
                                               MPI_Datatype rdtype,
                                               MPI_Comm  comm
                                                     ) ={
-      Coll_allgather_rdb::allgather,
-      Coll_allgather_bruck::allgather,
-      Coll_allgather_ring::allgather,
-      Coll_allgather_GB::allgather
+      allgather__rdb,
+      allgather__bruck,
+      allgather__ring,
+      allgather__GB
 };
 
 intel_tuning_table_element intel_allgather_table[] =
@@ -1656,17 +1655,17 @@ MPI_Allgatherv
 
 */
 
-int (*intel_allgatherv_functions_table[])(void *sbuf, int scount,
+int (*intel_allgatherv_functions_table[])(const void *sbuf, int scount,
                                                MPI_Datatype sdtype,
-                                               void* rbuf, int *rcounts,
-                                               int *rdispls,
+                                               void* rbuf, const int *rcounts,
+                                               const int *rdispls,
                                                MPI_Datatype rdtype,
                                                MPI_Comm  comm
                                                     ) ={
-      Coll_allgatherv_mpich_rdb::allgatherv,
-      Coll_allgatherv_ompi_bruck::allgatherv,
-      Coll_allgatherv_ring::allgatherv,
-      Coll_allgatherv_GB::allgatherv
+      allgatherv__mpich_rdb,
+      allgatherv__ompi_bruck,
+      allgatherv__ring,
+      allgatherv__GB
 };
 
 intel_tuning_table_element intel_allgatherv_table[] =
@@ -1867,16 +1866,16 @@ MPI_Gather
 
 */
 
-int (*intel_gather_functions_table[])(void *sbuf, int scount,
+int (*intel_gather_functions_table[])(const void *sbuf, int scount,
                                            MPI_Datatype sdtype,
                                            void* rbuf, int rcount,
                                            MPI_Datatype rdtype,
                                            int root,
                                            MPI_Comm  comm
                                                     ) ={
-      Coll_gather_ompi_binomial::gather,
-      Coll_gather_ompi_binomial::gather,
-      Coll_gather_mvapich2::gather
+      gather__ompi_binomial,
+      gather__ompi_binomial,
+      gather__mvapich2
 };
 
 intel_tuning_table_element intel_gather_table[] =
@@ -1971,15 +1970,15 @@ MPI_Scatter
 
 */
 
-int (*intel_scatter_functions_table[])(void *sbuf, int scount,
+int (*intel_scatter_functions_table[])(const void *sbuf, int scount,
                                             MPI_Datatype sdtype,
                                             void* rbuf, int rcount,
                                             MPI_Datatype rdtype,
                                             int root, MPI_Comm  comm
                                                     ) ={
-      Coll_scatter_ompi_binomial::scatter,
-      Coll_scatter_ompi_binomial::scatter,
-      Coll_scatter_mvapich2::scatter
+      scatter__ompi_binomial,
+      scatter__ompi_binomial,
+      scatter__mvapich2
 };
 
 intel_tuning_table_element intel_scatter_table[] =
@@ -2145,14 +2144,14 @@ MPI_Alltoallv
 
 */
 
-int (*intel_alltoallv_functions_table[])(void *sbuf, int *scounts, int *sdisps,
+int (*intel_alltoallv_functions_table[])(const void *sbuf, const int *scounts, const int *sdisps,
                                               MPI_Datatype sdtype,
-                                              void *rbuf, int *rcounts, int *rdisps,
+                                              void *rbuf, const int *rcounts, const int *rdisps,
                                               MPI_Datatype rdtype,
                                               MPI_Comm  comm
                                                     ) ={
-      Coll_alltoallv_ompi_basic_linear::alltoallv,
-      Coll_alltoallv_bruck::alltoallv
+      alltoallv__ompi_basic_linear,
+      alltoallv__bruck
 };
 
 intel_tuning_table_element intel_alltoallv_table[] =
@@ -2262,47 +2261,44 @@ intel_tuning_table_element intel_alltoallv_table[] =
 #define SIZECOMP_alltoallv\
   size_t block_dsize = 1;
 
-#define IMPI_COLL_SELECT(cat, ret, args, args2)\
-ret Coll_ ## cat ## _impi:: cat (COLL_UNPAREN args)\
-{\
-    int comm_size = comm->size();\
-    int i =0;\
-    SIZECOMP_ ## cat\
-    i=0;\
-    int j =0, k=0;\
-    if(comm->get_leaders_comm()==MPI_COMM_NULL){\
-      comm->init_smp();\
-    }\
-    int local_size=1;\
-    if (comm->is_uniform()) {\
-        local_size = comm->get_intra_comm()->size();\
-    }\
-    while(i < INTEL_MAX_NB_PPN &&\
-    local_size!=intel_ ## cat ## _table[i].ppn)\
-      i++;\
-    if(i==INTEL_MAX_NB_PPN) i=0;\
-    while(comm_size>intel_ ## cat ## _table[i].elems[j].max_num_proc\
-        && j < INTEL_MAX_NB_THRESHOLDS)\
-      j++;\
-    while(block_dsize >=intel_ ## cat ## _table[i].elems[j].elems[k].max_size\
-         && k< intel_ ## cat ## _table[i].elems[j].num_elems)\
-      k++;\
-    return (intel_ ## cat ## _functions_table[intel_ ## cat ## _table[i].elems[j].elems[k].algo-1]\
-    args2);\
-}
-
-
-COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLGATHERV_SIG, (send_buff, send_count, send_type, recv_buff, recv_count, recv_disps, recv_type, comm));
-COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLREDUCE_SIG, (sbuf, rbuf, rcount, dtype, op, comm));
-COLL_APPLY(IMPI_COLL_SELECT, COLL_GATHER_SIG, (send_buff, send_count, send_type, recv_buff, recv_count, recv_type, root, comm));
-COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLGATHER_SIG, (send_buff,send_count,send_type,recv_buff,recv_count,recv_type,comm));
-COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLTOALL_SIG,(send_buff, send_count, send_type, recv_buff, recv_count, recv_type,comm));
-COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLTOALLV_SIG, (send_buff, send_counts, send_disps, send_type, recv_buff, recv_counts, recv_disps, recv_type, comm));
-COLL_APPLY(IMPI_COLL_SELECT, COLL_BCAST_SIG , (buf, count, datatype, root, comm));
-COLL_APPLY(IMPI_COLL_SELECT, COLL_REDUCE_SIG,(buf,rbuf, count, datatype, op, root, comm));
-COLL_APPLY(IMPI_COLL_SELECT, COLL_REDUCE_SCATTER_SIG ,(sbuf,rbuf, rcounts,dtype,op,comm));
-COLL_APPLY(IMPI_COLL_SELECT, COLL_SCATTER_SIG ,(sendbuf, sendcount, sendtype,recvbuf, recvcount, recvtype,root, comm));
-COLL_APPLY(IMPI_COLL_SELECT, COLL_BARRIER_SIG,(comm));
-
-}
-}
+#define IMPI_COLL_SELECT(cat, ret, args, args2)                                                                        \
+  ret _XBT_CONCAT2(cat, __impi)(COLL_UNPAREN args)                                                          \
+  {                                                                                                                    \
+    int comm_size = comm->size();                                                                                      \
+    int i         = 0;                                                                                                 \
+    _XBT_CONCAT(SIZECOMP_, cat)                                                                                        \
+    i     = 0;                                                                                                         \
+    int j = 0, k = 0;                                                                                                  \
+    if (comm->get_leaders_comm() == MPI_COMM_NULL) {                                                                   \
+      comm->init_smp();                                                                                                \
+    }                                                                                                                  \
+    int local_size = 1;                                                                                                \
+    if (comm->is_uniform()) {                                                                                          \
+      local_size = comm->get_intra_comm()->size();                                                                     \
+    }                                                                                                                  \
+    while (i < INTEL_MAX_NB_PPN && local_size != _XBT_CONCAT3(intel_, cat, _table)[i].ppn)                             \
+      i++;                                                                                                             \
+    if (i == INTEL_MAX_NB_PPN)                                                                                         \
+      i = 0;                                                                                                           \
+    while (comm_size > _XBT_CONCAT3(intel_, cat, _table)[i].elems[j].max_num_proc && j < INTEL_MAX_NB_THRESHOLDS)      \
+      j++;                                                                                                             \
+    while (block_dsize >= _XBT_CONCAT3(intel_, cat, _table)[i].elems[j].elems[k].max_size &&                           \
+           k < _XBT_CONCAT3(intel_, cat, _table)[i].elems[j].num_elems)                                                \
+      k++;                                                                                                             \
+    return (_XBT_CONCAT3(intel_, cat,                                                                                  \
+                         _functions_table)[_XBT_CONCAT3(intel_, cat, _table)[i].elems[j].elems[k].algo - 1] args2);    \
+  }
+
+COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLGATHERV_SIG, (send_buff, send_count, send_type, recv_buff, recv_count, recv_disps, recv_type, comm))
+COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLREDUCE_SIG, (sbuf, rbuf, rcount, dtype, op, comm))
+COLL_APPLY(IMPI_COLL_SELECT, COLL_GATHER_SIG, (send_buff, send_count, send_type, recv_buff, recv_count, recv_type, root, comm))
+COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLGATHER_SIG, (send_buff,send_count,send_type,recv_buff,recv_count,recv_type,comm))
+COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLTOALL_SIG,(send_buff, send_count, send_type, recv_buff, recv_count, recv_type,comm))
+COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLTOALLV_SIG, (send_buff, send_counts, send_disps, send_type, recv_buff, recv_counts, recv_disps, recv_type, comm))
+COLL_APPLY(IMPI_COLL_SELECT, COLL_BCAST_SIG , (buf, count, datatype, root, comm))
+COLL_APPLY(IMPI_COLL_SELECT, COLL_REDUCE_SIG,(buf,rbuf, count, datatype, op, root, comm))
+COLL_APPLY(IMPI_COLL_SELECT, COLL_REDUCE_SCATTER_SIG ,(sbuf,rbuf, rcounts,dtype,op,comm))
+COLL_APPLY(IMPI_COLL_SELECT, COLL_SCATTER_SIG ,(sendbuf, sendcount, sendtype,recvbuf, recvcount, recvtype,root, comm))
+COLL_APPLY(IMPI_COLL_SELECT, COLL_BARRIER_SIG,(comm))
+
+} // namespace simgrid::smpi