Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
add mpich's allgatherv ring algo, as the one from star-mpi performs terribly
authorAugustin Degomme <degomme@idpann.imag.fr>
Mon, 26 Aug 2013 12:28:04 +0000 (14:28 +0200)
committerAugustin Degomme <degomme@idpann.imag.fr>
Thu, 29 Aug 2013 12:41:11 +0000 (14:41 +0200)
buildtools/Cmake/AddTests.cmake
buildtools/Cmake/DefinePackages.cmake
src/smpi/colls/allgatherv-mpich-ring.c [new file with mode: 0644]
src/smpi/colls/colls.h

index b276f09..eb4ea1d 100644 (file)
@@ -401,7 +401,7 @@ if(NOT enable_memcheck)
         ADD_TEST(smpi-allgather-coll-${ALLGATHER_COLL} ${CMAKE_BINARY_DIR}/bin/tesh ${TESH_OPTION} --cfg smpi/allgather:${ALLGATHER_COLL} --cd ${CMAKE_BINARY_DIR}/teshsuite/smpi ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/allgather_coll.tesh)
     ENDFOREACH()
     
         ADD_TEST(smpi-allgather-coll-${ALLGATHER_COLL} ${CMAKE_BINARY_DIR}/bin/tesh ${TESH_OPTION} --cfg smpi/allgather:${ALLGATHER_COLL} --cd ${CMAKE_BINARY_DIR}/teshsuite/smpi ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/allgather_coll.tesh)
     ENDFOREACH()
     
-    FOREACH (ALLGATHERV_COLL default GB pair ring ompi mpich ompi_neighborexchange ompi_bruck mpich_rdb)
+    FOREACH (ALLGATHERV_COLL default GB pair ring ompi mpich ompi_neighborexchange ompi_bruck mpich_rdb mpich_ring)
         ADD_TEST(smpi-allgatherv-coll-${ALLGATHERV_COLL} ${CMAKE_BINARY_DIR}/bin/tesh ${TESH_OPTION} --cfg smpi/allgatherv:${ALLGATHERV_COLL} --cd ${CMAKE_BINARY_DIR}/teshsuite/smpi ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/allgatherv_coll.tesh)
     ENDFOREACH()
     
         ADD_TEST(smpi-allgatherv-coll-${ALLGATHERV_COLL} ${CMAKE_BINARY_DIR}/bin/tesh ${TESH_OPTION} --cfg smpi/allgatherv:${ALLGATHERV_COLL} --cd ${CMAKE_BINARY_DIR}/teshsuite/smpi ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/allgatherv_coll.tesh)
     ENDFOREACH()
     
index dab01cd..a32ce0c 100644 (file)
@@ -137,6 +137,7 @@ set(SMPI_SRC
   src/smpi/colls/allgatherv-ompi-neighborexchange.c
   src/smpi/colls/allgatherv-ompi-bruck.c
   src/smpi/colls/allgatherv-mpich-rdb.c
   src/smpi/colls/allgatherv-ompi-neighborexchange.c
   src/smpi/colls/allgatherv-ompi-bruck.c
   src/smpi/colls/allgatherv-mpich-rdb.c
+  src/smpi/colls/allgatherv-mpich-ring.c
   src/smpi/colls/allreduce-lr.c
   src/smpi/colls/allreduce-NTS.c
   src/smpi/colls/allreduce-rab1.c
   src/smpi/colls/allreduce-lr.c
   src/smpi/colls/allreduce-NTS.c
   src/smpi/colls/allreduce-rab1.c
diff --git a/src/smpi/colls/allgatherv-mpich-ring.c b/src/smpi/colls/allgatherv-mpich-ring.c
new file mode 100644 (file)
index 0000000..aeacb3f
--- /dev/null
@@ -0,0 +1,121 @@
+#include "colls_private.h"
+
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ *
+ *  (C) 2001 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+
+/*****************************************************************************
+ * Function: allgather_mpich_ring
+ * return: int
+ * inputs:
+ *   send_buff: send input buffer
+ *   send_count: number of elements to send
+ *   send_type: data type of elements being sent
+ *   recv_buff: receive output buffer
+ *   recv_count: number of elements to received
+ *   recv_type: data type of elements being received
+ *   comm: communication
+ ****************************************************************************/
+int
+smpi_coll_tuned_allgatherv_mpich_ring(void *sendbuf, int sendcount,
+    MPI_Datatype send_type, void *recvbuf,
+    int *recvcounts, int *displs, MPI_Datatype recvtype,
+    MPI_Comm comm)
+{
+
+  char * sbuf = NULL, * rbuf = NULL;
+  int soffset, roffset;
+  int torecv=0, tosend=0, min, rank, comm_size;
+  int sendnow, recvnow;
+  int sidx, ridx;
+  MPI_Status status;
+  MPI_Aint recvtype_extent;
+  int right, left, total_count, i;
+  rank= smpi_comm_rank(comm);
+  comm_size=smpi_comm_size(comm);
+
+  recvtype_extent= smpi_datatype_get_extent( recvtype);
+  total_count = 0;
+  for (i=0; i<comm_size; i++)
+    total_count += recvcounts[i];
+
+  if (sendbuf != MPI_IN_PLACE) {
+      /* First, load the "local" version in the recvbuf. */
+      smpi_datatype_copy(sendbuf, sendcount, send_type,
+          ((char *)recvbuf + displs[rank]*recvtype_extent),
+          recvcounts[rank], recvtype);
+  }
+
+  left  = (comm_size + rank - 1) % comm_size;
+  right = (rank + 1) % comm_size;
+
+  torecv = total_count - recvcounts[rank];
+  tosend = total_count - recvcounts[right];
+
+  min = recvcounts[0];
+  for (i = 1; i < comm_size; i++)
+    if (min > recvcounts[i])
+      min = recvcounts[i];
+  if (min * recvtype_extent < 32768)
+    min = 32768 / recvtype_extent;
+  /* Handle the case where the datatype extent is larger than
+   * the pipeline size. */
+  if (!min)
+    min = 1;
+
+  sidx = rank;
+  ridx = left;
+  soffset = 0;
+  roffset = 0;
+  while (tosend || torecv) { /* While we have data to send or receive */
+      sendnow = ((recvcounts[sidx] - soffset) > min) ? min : (recvcounts[sidx] - soffset);
+      recvnow = ((recvcounts[ridx] - roffset) > min) ? min : (recvcounts[ridx] - roffset);
+      sbuf = (char *)recvbuf + ((displs[sidx] + soffset) * recvtype_extent);
+      rbuf = (char *)recvbuf + ((displs[ridx] + roffset) * recvtype_extent);
+
+      /* Protect against wrap-around of indices */
+      if (!tosend)
+        sendnow = 0;
+      if (!torecv)
+        recvnow = 0;
+
+      /* Communicate */
+      if (!sendnow && !recvnow) {
+          /* Don't do anything. This case is possible if two
+           * consecutive processes contribute 0 bytes each. */
+      }
+      else if (!sendnow) { /* If there's no data to send, just do a recv call */
+          smpi_mpi_recv(rbuf, recvnow, recvtype, left, COLL_TAG_ALLGATHERV, comm, &status);
+
+          torecv -= recvnow;
+      }
+      else if (!recvnow) { /* If there's no data to receive, just do a send call */
+          smpi_mpi_send(sbuf, sendnow, recvtype, right, COLL_TAG_ALLGATHERV, comm);
+
+          tosend -= sendnow;
+      }
+      else { /* There's data to be sent and received */
+          smpi_mpi_sendrecv(sbuf, sendnow, recvtype, right, COLL_TAG_ALLGATHERV,
+              rbuf, recvnow, recvtype, left, COLL_TAG_ALLGATHERV,
+              comm, &status);
+          tosend -= sendnow;
+          torecv -= recvnow;
+      }
+
+      soffset += sendnow;
+      roffset += recvnow;
+      if (soffset == recvcounts[sidx]) {
+          soffset = 0;
+          sidx = (sidx + comm_size - 1) % comm_size;
+      }
+      if (roffset == recvcounts[ridx]) {
+          roffset = 0;
+          ridx = (ridx + comm_size - 1) % comm_size;
+      }
+  }
+
+  return MPI_SUCCESS;
+}
index a2091b1..dce6456 100644 (file)
@@ -89,6 +89,7 @@ COLL_APPLY(action, COLL_ALLGATHERV_SIG, ompi_neighborexchange) COLL_sep \
 COLL_APPLY(action, COLL_ALLGATHERV_SIG, ompi_bruck) COLL_sep \
 COLL_APPLY(action, COLL_ALLGATHERV_SIG, mpich) COLL_sep \
 COLL_APPLY(action, COLL_ALLGATHERV_SIG, mpich_rdb) COLL_sep \
 COLL_APPLY(action, COLL_ALLGATHERV_SIG, ompi_bruck) COLL_sep \
 COLL_APPLY(action, COLL_ALLGATHERV_SIG, mpich) COLL_sep \
 COLL_APPLY(action, COLL_ALLGATHERV_SIG, mpich_rdb) COLL_sep \
+COLL_APPLY(action, COLL_ALLGATHERV_SIG, mpich_ring) COLL_sep \
 COLL_APPLY(action, COLL_ALLGATHERV_SIG, automatic)
 
 COLL_ALLGATHERVS(COLL_PROTO, COLL_NOsep)
 COLL_APPLY(action, COLL_ALLGATHERV_SIG, automatic)
 
 COLL_ALLGATHERVS(COLL_PROTO, COLL_NOsep)