Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Update copyright lines.
[simgrid.git] / src / smpi / colls / reduce / reduce-mvapich-two-level.cpp
index 3c3adc1..71e9233 100644 (file)
@@ -1,4 +1,4 @@
-/* Copyright (c) 2013-2017. The SimGrid Team.
+/* Copyright (c) 2013-2021. The SimGrid Team.
  * All rights reserved.                                                     */
 
 /* This program is free software; you can redistribute it and/or modify it
  */
 
 #include "../colls_private.hpp"
+#include <algorithm>
+
 #define MV2_INTRA_SHMEM_REDUCE_MSG 2048
 
 #define mv2_g_shmem_coll_max_msg_size (1 << 17)
 #define SHMEM_COLL_BLOCK_SIZE (local_size * mv2_g_shmem_coll_max_msg_size)
 #define mv2_use_knomial_reduce 1
 
-#define MPIR_Reduce_inter_knomial_wrapper_MV2 Coll_reduce_mvapich2_knomial::reduce
-#define MPIR_Reduce_intra_knomial_wrapper_MV2 Coll_reduce_mvapich2_knomial::reduce
-#define MPIR_Reduce_binomial_MV2 Coll_reduce_binomial::reduce
-#define MPIR_Reduce_redscat_gather_MV2 Coll_reduce_scatter_gather::reduce
-#define MPIR_Reduce_shmem_MV2 Coll_reduce_ompi_basic_linear::reduce
+#define MPIR_Reduce_inter_knomial_wrapper_MV2 reduce__mvapich2_knomial
+#define MPIR_Reduce_intra_knomial_wrapper_MV2 reduce__mvapich2_knomial
+#define MPIR_Reduce_binomial_MV2 reduce__binomial
+#define MPIR_Reduce_redscat_gather_MV2 reduce__scatter_gather
+#define MPIR_Reduce_shmem_MV2 reduce__ompi_basic_linear
 
-extern int (*MV2_Reduce_function)( void *sendbuf,
+extern int (*MV2_Reduce_function)( const void *sendbuf,
     void *recvbuf,
     int count,
     MPI_Datatype datatype,
@@ -56,7 +58,7 @@ extern int (*MV2_Reduce_function)( void *sendbuf,
     int root,
     MPI_Comm  comm_ptr);
 
-extern int (*MV2_Reduce_intra_function)( void *sendbuf,
+extern int (*MV2_Reduce_intra_function)( const void *sendbuf,
     void *recvbuf,
     int count,
     MPI_Datatype datatype,
@@ -66,36 +68,37 @@ extern int (*MV2_Reduce_intra_function)( void *sendbuf,
 
 
 /*Fn pointers for collectives */
-static int (*reduce_fn)(void *sendbuf,
+static int (*reduce_fn)(const void *sendbuf,
                              void *recvbuf,
                              int count,
                              MPI_Datatype datatype,
                              MPI_Op op, int root, MPI_Comm  comm);
-namespace simgrid{
-namespace smpi{
-int Coll_reduce_mvapich2_two_level::reduce( void *sendbuf,
-                                     void *recvbuf,
-                                     int count,
-                                     MPI_Datatype datatype,
-                                     MPI_Op op,
-                                     int root,
-                                     MPI_Comm comm)
+namespace simgrid {
+namespace smpi {
+int reduce__mvapich2_two_level( const void *sendbuf,
+                                void *recvbuf,
+                                int count,
+                                MPI_Datatype datatype,
+                                MPI_Op op,
+                                int root,
+                                MPI_Comm comm)
 {
     int mpi_errno = MPI_SUCCESS;
     int my_rank, total_size, local_rank, local_size;
     int leader_comm_rank = -1, leader_comm_size = 0;
     MPI_Comm shmem_comm, leader_comm;
     int leader_root, leader_of_root;
-    void *in_buf = NULL, *out_buf = NULL, *tmp_buf = NULL;
+    const unsigned char* in_buf = nullptr;
+    unsigned char *out_buf = nullptr, *tmp_buf = nullptr;
     MPI_Aint true_lb, true_extent, extent;
-    int is_commutative = 0, stride = 0;
+    int stride          = 0;
     int intra_node_root=0;
 
     //if not set (use of the algo directly, without mvapich2 selector)
-    if(MV2_Reduce_function==NULL)
-      MV2_Reduce_function=Coll_reduce_mpich::reduce;
-    if(MV2_Reduce_intra_function==NULL)
-      MV2_Reduce_intra_function=Coll_reduce_mpich::reduce;
+    if (MV2_Reduce_function == nullptr)
+      MV2_Reduce_function = reduce__mpich;
+    if (MV2_Reduce_intra_function == nullptr)
+      MV2_Reduce_intra_function = reduce__mpich;
 
     if(comm->get_leaders_comm()==MPI_COMM_NULL){
       comm->init_smp();
@@ -112,45 +115,43 @@ int Coll_reduce_mvapich2_two_level::reduce( void *sendbuf,
     leader_of_root = comm->group()->rank(leaders_map[root]);
     leader_root = leader_comm->group()->rank(leaders_map[root]);
 
-    is_commutative= (op==MPI_OP_NULL || op->is_commutative());
+    bool is_commutative = (op == MPI_OP_NULL || op->is_commutative());
 
     datatype->extent(&true_lb,
                                        &true_extent);
     extent =datatype->get_extent();
-    stride = count * MAX(extent, true_extent);
+    stride = count * std::max(extent, true_extent);
 
     if (local_size == total_size) {
         /* First handle the case where there is only one node */
-        if (stride <= MV2_INTRA_SHMEM_REDUCE_MSG &&
-            is_commutative == 1) {
+        if (stride <= MV2_INTRA_SHMEM_REDUCE_MSG && is_commutative) {
             if (local_rank == 0 ) {
-                tmp_buf=(void *)smpi_get_tmp_sendbuffer( count *
-                                    (MAX(extent, true_extent)));
-                tmp_buf = (void *) ((char *) tmp_buf - true_lb);
+              tmp_buf = smpi_get_tmp_sendbuffer(count * std::max(extent, true_extent));
+              tmp_buf = tmp_buf - true_lb;
             }
 
             if (sendbuf != MPI_IN_PLACE) {
-                in_buf = (void *)sendbuf;
+              in_buf = static_cast<const unsigned char*>(sendbuf);
             } else {
-                in_buf = recvbuf;
+              in_buf = static_cast<const unsigned char*>(recvbuf);
             }
 
             if (local_rank == 0) {
                  if( my_rank != root) {
                      out_buf = tmp_buf;
                  } else {
-                     out_buf = recvbuf;
-                     if(in_buf == out_buf) {
-                        in_buf = MPI_IN_PLACE;
-                        out_buf = recvbuf;
+                   out_buf = static_cast<unsigned char*>(recvbuf);
+                   if (in_buf == out_buf) {
+                     in_buf  = static_cast<const unsigned char*>(MPI_IN_PLACE);
+                     out_buf = static_cast<unsigned char*>(recvbuf);
                      }
                  }
             } else {
-                in_buf  = (void *)sendbuf;
-                out_buf = NULL;
+              in_buf  = static_cast<const unsigned char*>(sendbuf);
+              out_buf = nullptr;
             }
 
-            if (count * (MAX(extent, true_extent)) < SHMEM_COLL_BLOCK_SIZE) {
+            if (count * (std::max(extent, true_extent)) < SHMEM_COLL_BLOCK_SIZE) {
               mpi_errno = MPIR_Reduce_shmem_MV2(in_buf, out_buf, count, datatype, op, 0, shmem_comm);
             } else {
               mpi_errno = MPIR_Reduce_intra_knomial_wrapper_MV2(in_buf, out_buf, count, datatype, op, 0, shmem_comm);
@@ -176,8 +177,8 @@ int Coll_reduce_mvapich2_two_level::reduce( void *sendbuf,
                                   root, comm);
         }
         /* We are done */
-        if(tmp_buf!=NULL)
-          smpi_free_tmp_buffer((void *) ((char *) tmp_buf + true_lb));
+        if (tmp_buf != nullptr)
+          smpi_free_tmp_buffer(tmp_buf + true_lb);
         goto fn_exit;
     }
 
@@ -189,19 +190,18 @@ int Coll_reduce_mvapich2_two_level::reduce( void *sendbuf,
         }
         leader_comm_size = leader_comm->size();
         leader_comm_rank = leader_comm->rank();
-        tmp_buf=(void *)smpi_get_tmp_sendbuffer(count *
-                            (MAX(extent, true_extent)));
-        tmp_buf = (void *) ((char *) tmp_buf - true_lb);
+        tmp_buf          = smpi_get_tmp_sendbuffer(count * std::max(extent, true_extent));
+        tmp_buf          = tmp_buf - true_lb;
     }
     if (sendbuf != MPI_IN_PLACE) {
-        in_buf = (void *)sendbuf;
+      in_buf = static_cast<const unsigned char*>(sendbuf);
     } else {
-        in_buf = recvbuf;
+      in_buf = static_cast<const unsigned char*>(recvbuf);
     }
     if (local_rank == 0) {
-        out_buf = tmp_buf;
+      out_buf = static_cast<unsigned char*>(tmp_buf);
     } else {
-        out_buf = NULL;
+      out_buf = nullptr;
     }
 
 
@@ -214,7 +214,7 @@ int Coll_reduce_mvapich2_two_level::reduce( void *sendbuf,
          *this step*/
         if (MV2_Reduce_intra_function == & MPIR_Reduce_shmem_MV2)
         {
-          if (is_commutative == 1 && (count * (MAX(extent, true_extent)) < SHMEM_COLL_BLOCK_SIZE)) {
+          if (is_commutative && (count * (std::max(extent, true_extent)) < SHMEM_COLL_BLOCK_SIZE)) {
             mpi_errno = MV2_Reduce_intra_function(in_buf, out_buf, count, datatype, op, intra_node_root, shmem_comm);
             } else {
                     mpi_errno = MPIR_Reduce_intra_knomial_wrapper_MV2(in_buf, out_buf, count,
@@ -228,8 +228,8 @@ int Coll_reduce_mvapich2_two_level::reduce( void *sendbuf,
                                       intra_node_root, shmem_comm);
         }
     } else {
-        smpi_free_tmp_buffer((void *) ((char *) tmp_buf + true_lb));
-        tmp_buf = in_buf;
+      smpi_free_tmp_buffer(tmp_buf + true_lb);
+      tmp_buf = (unsigned char*)in_buf; // xxx
     }
 
     /* Now work on the inter-leader phase. Data is in tmp_buf */
@@ -243,28 +243,26 @@ int Coll_reduce_mvapich2_two_level::reduce( void *sendbuf,
                  * root of the reduce op. So, I will write the
                  * final result directly into my recvbuf */
                 if(tmp_buf != recvbuf) {
-                    in_buf = tmp_buf;
-                    out_buf = recvbuf;
+                  in_buf  = tmp_buf;
+                  out_buf = static_cast<unsigned char*>(recvbuf);
                 } else {
 
-                     in_buf = (char *)smpi_get_tmp_sendbuffer(count*
-                                       datatype->get_extent());
-                     Datatype::copy(tmp_buf, count, datatype,
-                                        in_buf, count, datatype);
-                    //in_buf = MPI_IN_PLACE;
-                    out_buf = recvbuf;
+                  unsigned char* buf = smpi_get_tmp_sendbuffer(count * datatype->get_extent());
+                  Datatype::copy(tmp_buf, count, datatype, buf, count, datatype);
+                  // in_buf = MPI_IN_PLACE;
+                  in_buf  = buf;
+                  out_buf = static_cast<unsigned char*>(recvbuf);
                 }
             } else {
-                in_buf = (char *)smpi_get_tmp_sendbuffer(count*
-                                       datatype->get_extent());
-                Datatype::copy(tmp_buf, count, datatype,
-                                        in_buf, count, datatype);
-                //in_buf = MPI_IN_PLACE;
-                out_buf = tmp_buf;
+              unsigned char* buf = smpi_get_tmp_sendbuffer(count * datatype->get_extent());
+              Datatype::copy(tmp_buf, count, datatype, buf, count, datatype);
+              // in_buf = MPI_IN_PLACE;
+              in_buf  = buf;
+              out_buf = tmp_buf;
             }
         } else {
             in_buf = tmp_buf;
-            out_buf = NULL;
+            out_buf = nullptr;
         }
 
         /* inter-leader communication  */
@@ -275,20 +273,15 @@ int Coll_reduce_mvapich2_two_level::reduce( void *sendbuf,
     }
 
     if (local_size > 1) {
-        /* Send the message to the root if the leader is not the
-         * root of the reduce operation. The reduced data is in tmp_buf */
-        if ((local_rank == 0) && (root != my_rank)
-            && (leader_root == leader_comm_rank)) {
-            Request::send(tmp_buf, count, datatype, root,
-                                     COLL_TAG_REDUCE+1, comm);
-        }
-        if ((local_rank != 0) && (root == my_rank)) {
-            Request::recv(recvbuf, count, datatype,
-                                     leader_of_root,
-                                     COLL_TAG_REDUCE+1, comm,
-                                     MPI_STATUS_IGNORE);
-        }
-      smpi_free_tmp_buffer((void *) ((char *) tmp_buf + true_lb));
+      /* Send the message to the root if the leader is not the
+       * root of the reduce operation. The reduced data is in tmp_buf */
+      if ((local_rank == 0) && (root != my_rank) && (leader_root == leader_comm_rank)) {
+        Request::send(tmp_buf, count, datatype, root, COLL_TAG_REDUCE + 1, comm);
+      }
+      if ((local_rank != 0) && (root == my_rank)) {
+        Request::recv(recvbuf, count, datatype, leader_of_root, COLL_TAG_REDUCE + 1, comm, MPI_STATUS_IGNORE);
+      }
+      smpi_free_tmp_buffer(tmp_buf + true_lb);
 
       if (leader_comm_rank == leader_root) {
         if (my_rank != root || (my_rank == root && tmp_buf == recvbuf)) {