Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Define and use _XBT_STRINGIFY and _XBT_CONCAT{,2,3,4}.
[simgrid.git] / src / smpi / colls / smpi_intel_mpi_selector.cpp
index 6bec031..a126d72 100644 (file)
@@ -1,12 +1,12 @@
 /* selector for collective algorithms based on openmpi's default coll_tuned_decision_fixed selector */
 
-/* Copyright (c) 2009-2010, 2013-2017. The SimGrid Team.
+/* Copyright (c) 2009-2019. The SimGrid Team.
  * All rights reserved.                                                     */
 
 /* This program is free software; you can redistribute it and/or modify it
  * under the terms of the license (GNU LGPL) which comes with this package. */
 
-#include "colls_private.h"
+#include "colls_private.hpp"
 
 // This selector is based on information gathered on the Stampede cluster, with Intel MPI 4.1.3.049, and from the intel reference manual. The data was gathered launching runs with 1,2,4,8,16 processes per node.
 
 #define INTEL_MAX_NB_NUMPROCS  12
 #define INTEL_MAX_NB_PPN  5  /* 1 2 4 8 16 ppn */
 
-typedef struct {
+struct intel_tuning_table_size_element {
   unsigned int max_size;
   int algo;
-} intel_tuning_table_size_element;
+};
 
-typedef struct {
+struct intel_tuning_table_numproc_element {
   int max_num_proc;
   int num_elems;
   intel_tuning_table_size_element elems[INTEL_MAX_NB_THRESHOLDS];
-} intel_tuning_table_numproc_element;
+};
 
-typedef struct {
+struct intel_tuning_table_element {
   int ppn;
   intel_tuning_table_numproc_element elems[INTEL_MAX_NB_NUMPROCS];
-} intel_tuning_table_element;
+};
 
 /*
 I_MPI_ADJUST_ALLREDUCE
@@ -50,7 +50,7 @@ MPI_Allreduce
 namespace simgrid{
 namespace smpi{
 
-int (*intel_allreduce_functions_table[])(void *sendbuf,
+int (*intel_allreduce_functions_table[])(const void *sendbuf,
       void *recvbuf,
       int count,
       MPI_Datatype datatype,
@@ -634,7 +634,7 @@ intel_tuning_table_element intel_alltoall_table[] =
   }
   }
 };
-int (*intel_alltoall_functions_table[])(void *sbuf, int scount,
+int (*intel_alltoall_functions_table[])(const void *sbuf, int scount,
                                              MPI_Datatype sdtype,
                                              void* rbuf, int rcount,
                                              MPI_Datatype rdtype,
@@ -967,7 +967,7 @@ MPI_Reduce
 
 */
 
-int (*intel_reduce_functions_table[])(void *sendbuf, void *recvbuf,
+int (*intel_reduce_functions_table[])(const void *sendbuf, void *recvbuf,
                                             int count, MPI_Datatype  datatype,
                                             MPI_Op   op, int root,
                                             MPI_Comm   comm) ={
@@ -1055,8 +1055,8 @@ MPI_Reduce_scatter
 5. Topology aware Reduce + Scatterv algorithm
 
 */
-static  int intel_reduce_scatter_reduce_scatterv(void *sbuf, void *rbuf,
-                                                    int *rcounts,
+static  int intel_reduce_scatter_reduce_scatterv(const void *sbuf, void *rbuf,
+                                                    const int *rcounts,
                                                     MPI_Datatype dtype,
                                                     MPI_Op  op,
                                                     MPI_Comm  comm)
@@ -1065,8 +1065,8 @@ static  int intel_reduce_scatter_reduce_scatterv(void *sbuf, void *rbuf,
   return MPI_SUCCESS;
 }
 
-static  int  intel_reduce_scatter_recursivehalving(void *sbuf, void *rbuf,
-                                                    int *rcounts,
+static  int  intel_reduce_scatter_recursivehalving(const void *sbuf, void *rbuf,
+                                                    const int *rcounts,
                                                     MPI_Datatype dtype,
                                                     MPI_Op  op,
                                                     MPI_Comm  comm)
@@ -1077,8 +1077,8 @@ static  int  intel_reduce_scatter_recursivehalving(void *sbuf, void *rbuf,
     return Coll_reduce_scatter_mvapich2::reduce_scatter(sbuf, rbuf, rcounts,dtype, op,comm);
 }
 
-int (*intel_reduce_scatter_functions_table[])( void *sbuf, void *rbuf,
-                                                    int *rcounts,
+int (*intel_reduce_scatter_functions_table[])( const void *sbuf, void *rbuf,
+                                                    const int *rcounts,
                                                     MPI_Datatype dtype,
                                                     MPI_Op  op,
                                                     MPI_Comm  comm
@@ -1487,7 +1487,7 @@ MPI_Allgather
 
 */
 
-int (*intel_allgather_functions_table[])(void *sbuf, int scount,
+int (*intel_allgather_functions_table[])(const void *sbuf, int scount,
                                               MPI_Datatype sdtype,
                                               void* rbuf, int rcount,
                                               MPI_Datatype rdtype,
@@ -1656,10 +1656,10 @@ MPI_Allgatherv
 
 */
 
-int (*intel_allgatherv_functions_table[])(void *sbuf, int scount,
+int (*intel_allgatherv_functions_table[])(const void *sbuf, int scount,
                                                MPI_Datatype sdtype,
-                                               void* rbuf, int *rcounts,
-                                               int *rdispls,
+                                               void* rbuf, const int *rcounts,
+                                               const int *rdispls,
                                                MPI_Datatype rdtype,
                                                MPI_Comm  comm
                                                     ) ={
@@ -1867,7 +1867,7 @@ MPI_Gather
 
 */
 
-int (*intel_gather_functions_table[])(void *sbuf, int scount,
+int (*intel_gather_functions_table[])(const void *sbuf, int scount,
                                            MPI_Datatype sdtype,
                                            void* rbuf, int rcount,
                                            MPI_Datatype rdtype,
@@ -1971,7 +1971,7 @@ MPI_Scatter
 
 */
 
-int (*intel_scatter_functions_table[])(void *sbuf, int scount,
+int (*intel_scatter_functions_table[])(const void *sbuf, int scount,
                                             MPI_Datatype sdtype,
                                             void* rbuf, int rcount,
                                             MPI_Datatype rdtype,
@@ -2145,9 +2145,9 @@ MPI_Alltoallv
 
 */
 
-int (*intel_alltoallv_functions_table[])(void *sbuf, int *scounts, int *sdisps,
+int (*intel_alltoallv_functions_table[])(const void *sbuf, const int *scounts, const int *sdisps,
                                               MPI_Datatype sdtype,
-                                              void *rbuf, int *rcounts, int *rdisps,
+                                              void *rbuf, const int *rcounts, const int *rdisps,
                                               MPI_Datatype rdtype,
                                               MPI_Comm  comm
                                                     ) ={
@@ -2262,35 +2262,33 @@ intel_tuning_table_element intel_alltoallv_table[] =
 #define SIZECOMP_alltoallv\
   size_t block_dsize = 1;
 
-#define IMPI_COLL_SELECT(cat, ret, args, args2)\
-ret Coll_ ## cat ## _impi:: cat (COLL_UNPAREN args)\
-{\
-    int comm_size = comm->size();\
-    int i =0;\
-    SIZECOMP_ ## cat\
-    i=0;\
-    int j =0, k=0;\
-    if(comm->get_leaders_comm()==MPI_COMM_NULL){\
-      comm->init_smp();\
-    }\
-    int local_size=1;\
-    if (comm->is_uniform()) {\
-        local_size = comm->get_intra_comm()->size();\
-    }\
-    while(i < INTEL_MAX_NB_PPN &&\
-    local_size!=intel_ ## cat ## _table[i].ppn)\
-      i++;\
-    if(i==INTEL_MAX_NB_PPN) i=0;\
-    while(comm_size>intel_ ## cat ## _table[i].elems[j].max_num_proc\
-        && j < INTEL_MAX_NB_THRESHOLDS)\
-      j++;\
-    while(block_dsize >=intel_ ## cat ## _table[i].elems[j].elems[k].max_size\
-         && k< intel_ ## cat ## _table[i].elems[j].num_elems)\
-      k++;\
-    return (intel_ ## cat ## _functions_table[intel_ ## cat ## _table[i].elems[j].elems[k].algo-1]\
-    args2);\
-}
-
+#define IMPI_COLL_SELECT(cat, ret, args, args2)                                                                        \
+  ret _XBT_CONCAT3(Coll_, cat, _impi)::cat(COLL_UNPAREN args)                                                          \
+  {                                                                                                                    \
+    int comm_size = comm->size();                                                                                      \
+    int i         = 0;                                                                                                 \
+    _XBT_CONCAT(SIZECOMP_, cat)                                                                                        \
+    i     = 0;                                                                                                         \
+    int j = 0, k = 0;                                                                                                  \
+    if (comm->get_leaders_comm() == MPI_COMM_NULL) {                                                                   \
+      comm->init_smp();                                                                                                \
+    }                                                                                                                  \
+    int local_size = 1;                                                                                                \
+    if (comm->is_uniform()) {                                                                                          \
+      local_size = comm->get_intra_comm()->size();                                                                     \
+    }                                                                                                                  \
+    while (i < INTEL_MAX_NB_PPN && local_size != _XBT_CONCAT3(intel_, cat, _table)[i].ppn)                             \
+      i++;                                                                                                             \
+    if (i == INTEL_MAX_NB_PPN)                                                                                         \
+      i = 0;                                                                                                           \
+    while (comm_size > _XBT_CONCAT3(intel_, cat, _table)[i].elems[j].max_num_proc && j < INTEL_MAX_NB_THRESHOLDS)      \
+      j++;                                                                                                             \
+    while (block_dsize >= _XBT_CONCAT3(intel_, cat, _table)[i].elems[j].elems[k].max_size &&                           \
+           k < _XBT_CONCAT3(intel_, cat, _table)[i].elems[j].num_elems)                                                \
+      k++;                                                                                                             \
+    return (_XBT_CONCAT3(intel_, cat,                                                                                  \
+                         _functions_table)[_XBT_CONCAT3(intel_, cat, _table)[i].elems[j].elems[k].algo - 1] args2);    \
+  }
 
 COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLGATHERV_SIG, (send_buff, send_count, send_type, recv_buff, recv_count, recv_disps, recv_type, comm));
 COLL_APPLY(IMPI_COLL_SELECT, COLL_ALLREDUCE_SIG, (sbuf, rbuf, rcount, dtype, op, comm));