src/smpi/colls/alltoallv/alltoallv-ompi-basic-linear.cpp

   1 /* Copyright (c) 2013-2019. The SimGrid Team.
   2  * All rights reserved.                                                     */
   3
   4 /* This program is free software; you can redistribute it and/or modify it
   5  * under the terms of the license (GNU LGPL) which comes with this package. */
   6
   7 #include "../colls_private.hpp"
   8 /*
   9  * Linear functions are copied from the basic coll module.  For
  10  * some small number of nodes and/or small data sizes they are just as
  11  * fast as tuned/tree based segmenting operations and as such may be
  12  * selected by the decision functions.  These are copied into this module
  13  * due to the way we select modules in V1. i.e. in V2 we will handle this
  14  * differently and so will not have to duplicate code.
  15  * GEF Oct05 after asking Jeff.
  16  */
  17 namespace simgrid{
  18 namespace smpi{
  19 int alltoallv__ompi_basic_linear(const void *sbuf, const int *scounts, const int *sdisps,
  20                                  MPI_Datatype sdtype,
  21                                  void *rbuf, const int *rcounts, const int *rdisps,
  22                                  MPI_Datatype rdtype,
  23                                  MPI_Comm comm)
  24 {
  25     int i, size, rank;
  26     char *psnd, *prcv;
  27     int nreqs;
  28     ptrdiff_t sext, rext;
  29     MPI_Request *preq;
  30     size = comm->size();
  31     rank = comm->rank();
  32     MPI_Request* ireqs = new MPI_Request[size * 2];
  33     XBT_DEBUG(
  34                  "coll:tuned:alltoallv_intra_basic_linear rank %d", rank);
  35
  36     sext=sdtype->get_extent();
  37     rext=rdtype->get_extent();
  38
  39     /* Simple optimization - handle send to self first */
  40     psnd = ((char *) sbuf) + (sdisps[rank] * sext);
  41     prcv = ((char *) rbuf) + (rdisps[rank] * rext);
  42     if (0 != scounts[rank]) {
  43         Datatype::copy(psnd, scounts[rank], sdtype,
  44                               prcv, rcounts[rank], rdtype);
  45     }
  46
  47     /* If only one process, we're done. */
  48     if (1 == size) {
  49         return MPI_SUCCESS;
  50     }
  51
  52     /* Now, initiate all send/recv to/from others. */
  53     nreqs = 0;
  54     preq = ireqs;
  55
  56     /* Post all receives first */
  57     for (i = 0; i < size; ++i) {
  58         if (i == rank) {
  59             continue;
  60         }
  61
  62         prcv = ((char *) rbuf) + (rdisps[i] * rext);
  63
  64         *preq = Request::irecv_init(prcv, rcounts[i], rdtype,
  65                                       i, COLL_TAG_ALLTOALLV, comm
  66                                       );
  67         preq++;
  68         ++nreqs;
  69
  70     }
  71
  72     /* Now post all sends */
  73     for (i = 0; i < size; ++i) {
  74         if (i == rank) {
  75             continue;
  76         }
  77
  78         psnd = ((char *) sbuf) + (sdisps[i] * sext);
  79         *preq=Request::isend_init(psnd, scounts[i], sdtype,
  80                                       i, COLL_TAG_ALLTOALLV, comm
  81                                       );
  82         preq++;
  83         ++nreqs;
  84     }
  85
  86     /* Start your engines.  This will never return an error. */
  87     Request::startall(nreqs, ireqs);
  88
  89     /* Wait for them all.  If there's an error, note that we don't care
  90      * what the error was -- just that there *was* an error.  The PML
  91      * will finish all requests, even if one or more of them fail.
  92      * i.e., by the end of this call, all the requests are free-able.
  93      * So free them anyway -- even if there was an error, and return the
  94      * error after we free everything. */
  95     Request::waitall(nreqs, ireqs,
  96                                 MPI_STATUSES_IGNORE);
  97
  98     /* Free the requests. */
  99     for (i = 0; i < nreqs; ++i) {
 100       if(ireqs[i]!=MPI_REQUEST_NULL)
 101         Request::unref(&ireqs[i]);
 102     }
 103     delete[] ireqs;
 104
 105     return MPI_SUCCESS;
 106 }
 107 }
 108 }
 109