Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
add scatter algos from ompi
authordegomme <degomme@debian.localdomain>
Thu, 13 Jun 2013 07:26:51 +0000 (09:26 +0200)
committerdegomme <degomme@debian.localdomain>
Thu, 13 Jun 2013 07:26:51 +0000 (09:26 +0200)
buildtools/Cmake/DefinePackages.cmake
src/include/smpi/smpi_interface.h
src/simgrid/sg_config.c
src/smpi/colls/colls.h
src/smpi/colls/scatter-ompi.c [new file with mode: 0644]
src/smpi/colls/smpi_openmpi_selector.c
src/smpi/smpi_coll.c
src/smpi/smpi_global.c
src/smpi/smpi_pmpi.c

index 4612ad1..4b5c74e 100644 (file)
@@ -197,6 +197,7 @@ set(SMPI_SRC
   src/smpi/colls/reduce-ompi.c
   src/smpi/colls/gather-ompi.c
   src/smpi/colls/reduce_scatter-ompi.c
   src/smpi/colls/reduce-ompi.c
   src/smpi/colls/gather-ompi.c
   src/smpi/colls/reduce_scatter-ompi.c
+  src/smpi/colls/scatter-ompi.c
   )
 
 if(SMPI_F2C)
   )
 
 if(SMPI_F2C)
index f90ef70..7ba0fdc 100644 (file)
@@ -99,6 +99,15 @@ XBT_PUBLIC_DATA(int (*mpi_coll_reduce_scatter_fun)
                 (void *sbuf, void *rbuf, int *rcounts,
                  MPI_Datatype dtype, MPI_Op op,MPI_Comm comm));
 
                 (void *sbuf, void *rbuf, int *rcounts,
                  MPI_Datatype dtype, MPI_Op op,MPI_Comm comm));
 
+/** \ingroup MPI scatter
+ *  \brief The list of all available allgather collectives
+ */
+XBT_PUBLIC_DATA(s_mpi_coll_description_t) mpi_coll_scatter_description[];
+XBT_PUBLIC_DATA(int (*mpi_coll_scatter_fun)
+                (void *sendbuf, int sendcount, MPI_Datatype sendtype,
+                void *recvbuf, int recvcount, MPI_Datatype recvtype,
+                int root, MPI_Comm comm));
+
 XBT_PUBLIC(void) coll_help(const char *category,
                            s_mpi_coll_description_t * table);
 XBT_PUBLIC(int) find_coll_description(s_mpi_coll_description_t * table,
 XBT_PUBLIC(void) coll_help(const char *category,
                            s_mpi_coll_description_t * table);
 XBT_PUBLIC(int) find_coll_description(s_mpi_coll_description_t * table,
index d600f19..c29f5e8 100644 (file)
@@ -279,6 +279,9 @@ static void _sg_cfg_cb__coll_reduce(const char *name, int pos)
 static void _sg_cfg_cb__coll_reduce_scatter(const char *name, int pos){
   _sg_cfg_cb__coll("reduce_scatter", mpi_coll_reduce_scatter_description, name, pos);
 }
 static void _sg_cfg_cb__coll_reduce_scatter(const char *name, int pos){
   _sg_cfg_cb__coll("reduce_scatter", mpi_coll_reduce_scatter_description, name, pos);
 }
+static void _sg_cfg_cb__coll_scatter(const char *name, int pos){
+  _sg_cfg_cb__coll("scatter", mpi_coll_scatter_description, name, pos);
+}
 #endif
 
 /* callback of the inclusion path */
 #endif
 
 /* callback of the inclusion path */
@@ -780,6 +783,11 @@ void sg_config_init(int *argc, char **argv)
                     xbt_cfgelm_string, NULL, 1, 1, &_sg_cfg_cb__coll_reduce_scatter,
                     NULL);
 
                     xbt_cfgelm_string, NULL, 1, 1, &_sg_cfg_cb__coll_reduce_scatter,
                     NULL);
 
+    xbt_cfg_register(&_sg_cfg_set, "smpi/scatter",
+                    "Which collective to use for scatter",
+                    xbt_cfgelm_string, NULL, 1, 1, &_sg_cfg_cb__coll_scatter,
+                    NULL);
+
     xbt_cfg_register(&_sg_cfg_set, "smpi/allgatherv",
                     "Which collective to use for allgatherv",
                     xbt_cfgelm_string, NULL, 1, 1, &_sg_cfg_cb__coll_allgatherv,
     xbt_cfg_register(&_sg_cfg_set, "smpi/allgatherv",
                     "Which collective to use for allgatherv",
                     xbt_cfgelm_string, NULL, 1, 1, &_sg_cfg_cb__coll_allgatherv,
index 55be878..12627c0 100644 (file)
@@ -227,4 +227,20 @@ COLL_APPLY(action, COLL_REDUCE_SCATTER_SIG, ompi_ring)
 
 COLL_REDUCE_SCATTERS(COLL_PROTO, COLL_NOsep)
 
 
 COLL_REDUCE_SCATTERS(COLL_PROTO, COLL_NOsep)
 
+
+/*************
+ * REDUCE_SCATTER *
+ *************/
+#define COLL_SCATTER_SIG scatter, int, \
+                (void *sendbuf, int sendcount, MPI_Datatype sendtype,\
+                void *recvbuf, int recvcount, MPI_Datatype recvtype,\
+                int root, MPI_Comm comm)
+
+#define COLL_SCATTERS(action, COLL_sep) \
+COLL_APPLY(action, COLL_SCATTER_SIG, ompi) COLL_sep \
+COLL_APPLY(action, COLL_SCATTER_SIG, ompi_basic_linear) COLL_sep \
+COLL_APPLY(action, COLL_SCATTER_SIG, ompi_binomial) 
+
+COLL_SCATTERS(COLL_PROTO, COLL_NOsep)
+
 #endif
 #endif
diff --git a/src/smpi/colls/scatter-ompi.c b/src/smpi/colls/scatter-ompi.c
new file mode 100644 (file)
index 0000000..205d602
--- /dev/null
@@ -0,0 +1,246 @@
+/*
+ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
+ *                         University Research and Technology
+ *                         Corporation.  All rights reserved.
+ * Copyright (c) 2004-2006 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
+ *                         University of Stuttgart.  All rights reserved.
+ * Copyright (c) 2004-2005 The Regents of the University of California.
+ *                         All rights reserved.
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ *
+ * $HEADER$
+ */
+
+
+#include "colls_private.h"
+#include "coll_tuned_topo.h"
+
+#define MCA_COLL_BASE_TAG_SCATTER 111
+
+int
+smpi_coll_tuned_scatter_ompi_binomial(void *sbuf, int scount,
+                                      MPI_Datatype sdtype,
+                                      void *rbuf, int rcount,
+                                      MPI_Datatype rdtype,
+                                      int root,
+                                      MPI_Comm comm
+                                      )
+{
+    int line = -1;
+    int i;
+    int rank;
+    int vrank;
+    int size;
+    int total_send = 0;
+    char *ptmp     = NULL;
+    char *tempbuf  = NULL;
+    int err;
+    ompi_coll_tree_t* bmtree;
+    MPI_Status status;
+    MPI_Aint sextent, slb, strue_lb, strue_extent; 
+    MPI_Aint rextent, rlb, rtrue_lb, rtrue_extent;
+
+    size = smpi_comm_size(comm);
+    rank = smpi_comm_rank(comm);
+
+    XBT_DEBUG(
+                 "smpi_coll_tuned_scatter_ompi_binomial rank %d", rank);
+
+    /* create the binomial tree */
+    
+//    COLL_TUNED_UPDATE_IN_ORDER_BMTREE( comm, tuned_module, root );
+    bmtree =  ompi_coll_tuned_topo_build_in_order_bmtree( comm, root);//ompi_ data->cached_in_order_bmtree;
+
+    smpi_datatype_extent(sdtype, &slb, &sextent);
+    smpi_datatype_extent(sdtype, &strue_lb, &strue_extent);
+    smpi_datatype_extent(rdtype, &rlb, &rextent);
+    smpi_datatype_extent(rdtype, &rtrue_lb, &rtrue_extent);
+
+    vrank = (rank - root + size) % size;
+
+    if (rank == root) {
+       if (0 == root) {
+           /* root on 0, just use the send buffer */
+           ptmp = (char *) sbuf;
+           if (rbuf != MPI_IN_PLACE) {
+               /* local copy to rbuf */
+               err = smpi_datatype_copy(sbuf, scount, sdtype,
+                                     rbuf, rcount, rdtype);
+               if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+           }
+       } else {
+           /* root is not on 0, allocate temp buffer for send */
+           tempbuf = (char *) malloc(strue_extent + (scount*size - 1) * sextent);
+           if (NULL == tempbuf) {
+               err = MPI_ERR_OTHER; line = __LINE__; goto err_hndl;
+           }
+
+           ptmp = tempbuf - slb;
+
+           /* and rotate data so they will eventually in the right place */
+           err = smpi_datatype_copy((char *) sbuf + sextent*root*scount, scount*(size-root), sdtype,
+            ptmp, scount*(size-root), sdtype);
+           if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+
+
+           err = smpi_datatype_copy((char*)sbuf, scount*root, sdtype,
+                                                ptmp + sextent*scount*(size - root), scount*root, sdtype);
+           if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+
+           if (rbuf != MPI_IN_PLACE) {
+               /* local copy to rbuf */
+               err = smpi_datatype_copy(ptmp, scount, sdtype,
+                                     rbuf, rcount, rdtype);
+               if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+           }
+       }
+       total_send = scount;
+    } else if (!(vrank % 2)) {
+       /* non-root, non-leaf nodes, allocte temp buffer for recv
+        * the most we need is rcount*size/2 */
+       tempbuf = (char *) malloc(rtrue_extent + (rcount*size - 1) * rextent);
+       if (NULL == tempbuf) {
+           err= MPI_ERR_OTHER; line = __LINE__; goto err_hndl;
+       }
+
+       ptmp = tempbuf - rlb;
+
+       sdtype = rdtype;
+       scount = rcount;
+       sextent = rextent;
+       total_send = scount;
+    } else {
+       /* leaf nodes, just use rbuf */
+       ptmp = (char *) rbuf;
+    }
+
+    if (!(vrank % 2)) {
+       if (rank != root) {
+           /* recv from parent on non-root */
+           smpi_mpi_recv(ptmp, rcount*size, rdtype, bmtree->tree_prev,
+                                   MCA_COLL_BASE_TAG_SCATTER, comm, &status);
+           /* local copy to rbuf */
+           err = smpi_datatype_copy(ptmp, scount, sdtype,
+                                 rbuf, rcount, rdtype);
+       }
+       /* send to children on all non-leaf */
+       for (i = 0; i < bmtree->tree_nextsize; i++) {
+           int mycount = 0, vkid;
+           /* figure out how much data I have to send to this child */
+           vkid = (bmtree->tree_next[i] - root + size) % size;
+           mycount = vkid - vrank;
+           if (mycount > (size - vkid))
+               mycount = size - vkid;
+           mycount *= scount;
+
+           smpi_mpi_send(ptmp + total_send*sextent, mycount, sdtype,
+                                   bmtree->tree_next[i],
+                                   MCA_COLL_BASE_TAG_SCATTER,
+                                    comm);
+
+           total_send += mycount;
+       }
+
+       if (NULL != tempbuf) 
+           free(tempbuf);
+    } else {
+       /* recv from parent on leaf nodes */
+       smpi_mpi_recv(ptmp, rcount, rdtype, bmtree->tree_prev,
+                               MCA_COLL_BASE_TAG_SCATTER, comm, &status);
+    }
+
+    return MPI_SUCCESS;
+
+ err_hndl:
+    if (NULL != tempbuf)
+       free(tempbuf);
+
+    XBT_DEBUG(  "%s:%4d\tError occurred %d, rank %2d",
+                __FILE__, line, err, rank);
+    return err;
+}
+
+/*
+ * Linear functions are copied from the BASIC coll module
+ * they do not segment the message and are simple implementations
+ * but for some small number of nodes and/or small data sizes they 
+ * are just as fast as tuned/tree based segmenting operations 
+ * and as such may be selected by the decision functions
+ * These are copied into this module due to the way we select modules
+ * in V1. i.e. in V2 we will handle this differently and so will not
+ * have to duplicate code.
+ * JPG following the examples from other coll_tuned implementations. Dec06.
+ */
+
+/* copied function (with appropriate renaming) starts here */
+/*
+ *     scatter_intra
+ *
+ *     Function:       - basic scatter operation
+ *     Accepts:        - same arguments as MPI_Scatter()
+ *     Returns:        - MPI_SUCCESS or error code
+ */
+int
+smpi_coll_tuned_scatter_ompi_basic_linear(void *sbuf, int scount,
+                                          MPI_Datatype sdtype,
+                                          void *rbuf, int rcount,
+                                          MPI_Datatype rdtype,
+                                          int root,
+                                          MPI_Comm comm
+                                          )
+{
+    int i, rank, size, err;
+    char *ptmp;
+    ptrdiff_t lb, incr;
+
+    /* Initialize */
+
+    rank = smpi_comm_rank(comm);
+    size = smpi_comm_size(comm);
+
+    /* If not root, receive data. */
+
+    if (rank != root) {
+        smpi_mpi_recv(rbuf, rcount, rdtype, root,
+                                MCA_COLL_BASE_TAG_SCATTER,
+                                comm, MPI_STATUS_IGNORE);
+        return MPI_SUCCESS;
+    }
+
+    /* I am the root, loop sending data. */
+
+    err = smpi_datatype_extent(sdtype, &lb, &incr);
+    if (MPI_SUCCESS != err) {
+        return MPI_ERR_OTHER;
+    }
+
+    incr *= scount;
+    for (i = 0, ptmp = (char *) sbuf; i < size; ++i, ptmp += incr) {
+
+        /* simple optimization */
+
+        if (i == rank) {
+            if (MPI_IN_PLACE != rbuf) {
+                err =
+                    smpi_datatype_copy(ptmp, scount, sdtype, rbuf, rcount,
+                                    rdtype);
+            }
+        } else {
+            smpi_mpi_send(ptmp, scount, sdtype, i,
+                                    MCA_COLL_BASE_TAG_SCATTER,
+                                     comm);
+        }
+        if (MPI_SUCCESS != err) {
+            return err;
+        }
+    }
+
+    /* All done */
+
+    return MPI_SUCCESS;
+}
index 15dac04..f5a23c4 100644 (file)
@@ -568,12 +568,12 @@ int smpi_coll_tuned_gather_ompi(void *sbuf, int scount,
                                                       rbuf, rcount, rdtype, 
                                                       root, comm);
 }
                                                       rbuf, rcount, rdtype, 
                                                       root, comm);
 }
-/*
+
 int smpi_coll_tuned_scatter_ompi(void *sbuf, int scount, 
                                             MPI_Datatype sdtype,
                                             void* rbuf, int rcount, 
                                             MPI_Datatype rdtype, 
 int smpi_coll_tuned_scatter_ompi(void *sbuf, int scount, 
                                             MPI_Datatype sdtype,
                                             void* rbuf, int rcount, 
                                             MPI_Datatype rdtype, 
-                                            int root, MPI_Comm  comm,
+                                            int root, MPI_Comm  comm
                                             )
 {
     const size_t small_block_size = 300;
                                             )
 {
     const size_t small_block_size = 300;
@@ -581,28 +581,27 @@ int smpi_coll_tuned_scatter_ompi(void *sbuf, int scount,
     int communicator_size, rank;
     size_t dsize, block_size;
 
     int communicator_size, rank;
     size_t dsize, block_size;
 
-    OPAL_OUTPUT((smpi_coll_tuned_stream, 
-                 "smpi_coll_tuned_scatter_ompi"));
+    XBT_DEBUG("smpi_coll_tuned_scatter_ompi");
 
     communicator_size = smpi_comm_size(comm);
 
     communicator_size = smpi_comm_size(comm);
-    rank = ompi_comm_rank(comm);
+    rank = smpi_comm_rank(comm);
     // Determine block size 
     if (root == rank) {
     // Determine block size 
     if (root == rank) {
-        ompi_datatype_type_size(sdtype, &dsize);
+        dsize=smpi_datatype_size(sdtype);
         block_size = dsize * scount;
     } else {
         block_size = dsize * scount;
     } else {
-        ompi_datatype_type_size(rdtype, &dsize);
+        dsize=smpi_datatype_size(rdtype);
         block_size = dsize * rcount;
     } 
 
     if ((communicator_size > small_comm_size) &&
         (block_size < small_block_size)) {
         block_size = dsize * rcount;
     } 
 
     if ((communicator_size > small_comm_size) &&
         (block_size < small_block_size)) {
-        return smpi_coll_tuned_scatter_intra_binomial (sbuf, scount, sdtype, 
+        return smpi_coll_tuned_scatter_ompi_binomial (sbuf, scount, sdtype, 
                                                        rbuf, rcount, rdtype, 
                                                        rbuf, rcount, rdtype, 
-                                                       root, comm, module);
+                                                       root, comm);
     }
     }
-    return smpi_coll_tuned_scatter_intra_basic_linear (sbuf, scount, sdtype, 
+    return smpi_coll_tuned_scatter_ompi_basic_linear (sbuf, scount, sdtype, 
                                                        rbuf, rcount, rdtype, 
                                                        rbuf, rcount, rdtype, 
-                                                       root, comm, module);
-}*/
+                                                       root, comm);
+}
 
 
index 9140f3a..50ee98e 100644 (file)
@@ -55,6 +55,13 @@ COLL_REDUCE_SCATTERS(COLL_DESCRIPTION, COLL_COMMA),
   {NULL, NULL, NULL}      /* this array must be NULL terminated */
 };
 
   {NULL, NULL, NULL}      /* this array must be NULL terminated */
 };
 
+s_mpi_coll_description_t mpi_coll_scatter_description[] = {
+  {"default",
+   "scatter default collective",
+   smpi_mpi_scatter},
+COLL_SCATTERS(COLL_DESCRIPTION, COLL_COMMA),
+  {NULL, NULL, NULL}      /* this array must be NULL terminated */
+};
 
 s_mpi_coll_description_t mpi_coll_alltoall_description[] = {
   {"default",
 
 s_mpi_coll_description_t mpi_coll_alltoall_description[] = {
   {"default",
@@ -153,6 +160,7 @@ int (*mpi_coll_alltoallv_fun)(void *, int*, int*, MPI_Datatype, void*, int*, int
 int (*mpi_coll_bcast_fun)(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm com);
 int (*mpi_coll_reduce_fun)(void *buf, void *rbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm);
 int (*mpi_coll_reduce_scatter_fun)(void *sbuf, void *rbuf, int *rcounts,MPI_Datatype dtype,MPI_Op  op,MPI_Comm  comm);
 int (*mpi_coll_bcast_fun)(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm com);
 int (*mpi_coll_reduce_fun)(void *buf, void *rbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm);
 int (*mpi_coll_reduce_scatter_fun)(void *sbuf, void *rbuf, int *rcounts,MPI_Datatype dtype,MPI_Op  op,MPI_Comm  comm);
+int (*mpi_coll_scatter_fun)(void *sendbuf, int sendcount, MPI_Datatype sendtype,void *recvbuf, int recvcount, MPI_Datatype recvtype,int root, MPI_Comm comm);
 struct s_proc_tree {
   int PROCTREE_A;
   int numChildren;
 struct s_proc_tree {
   int PROCTREE_A;
   int numChildren;
index 5b05cfe..d980dfc 100644 (file)
@@ -413,6 +413,13 @@ int smpi_main(int (*realmain) (int argc, char *argv[]),int argc, char *argv[])
   mpi_coll_reduce_scatter_fun = (int (*)(void *sbuf, void *rbuf, int *rcounts,\
                     MPI_Datatype dtype,MPI_Op  op,MPI_Comm  comm))
                           mpi_coll_reduce_scatter_description[reduce_scatter_id].coll;
   mpi_coll_reduce_scatter_fun = (int (*)(void *sbuf, void *rbuf, int *rcounts,\
                     MPI_Datatype dtype,MPI_Op  op,MPI_Comm  comm))
                           mpi_coll_reduce_scatter_description[reduce_scatter_id].coll;
+
+  int scatter_id = find_coll_description(mpi_coll_scatter_description,
+                                           sg_cfg_get_string("smpi/scatter"));
+  mpi_coll_scatter_fun = (int (*)(void *sendbuf, int sendcount, MPI_Datatype sendtype,\
+                void *recvbuf, int recvcount, MPI_Datatype recvtype,\
+                int root, MPI_Comm comm))
+                          mpi_coll_scatter_description[scatter_id].coll;
   smpi_global_init();
 
   /* Clean IO before the run */
   smpi_global_init();
 
   /* Clean IO before the run */
index 23621ea..32fb308 100644 (file)
@@ -1735,7 +1735,7 @@ int PMPI_Scatter(void *sendbuf, int sendcount, MPI_Datatype sendtype,
              || recvtype == MPI_DATATYPE_NULL) {
     retval = MPI_ERR_TYPE;
   } else {
              || recvtype == MPI_DATATYPE_NULL) {
     retval = MPI_ERR_TYPE;
   } else {
-    smpi_mpi_scatter(sendbuf, sendcount, sendtype, recvbuf, recvcount,
+    mpi_coll_scatter_fun(sendbuf, sendcount, sendtype, recvbuf, recvcount,
                      recvtype, root, comm);
     retval = MPI_SUCCESS;
   }
                      recvtype, root, comm);
     retval = MPI_SUCCESS;
   }