src/smpi/colls/alltoallv-ompi-basic-linear.c

   1
   2 #include "colls_private.h"
   3 #define MCA_COLL_BASE_TAG_ALLTOALLV 111
   4 /*
   5  * Linear functions are copied from the basic coll module.  For
   6  * some small number of nodes and/or small data sizes they are just as
   7  * fast as tuned/tree based segmenting operations and as such may be
   8  * selected by the decision functions.  These are copied into this module
   9  * due to the way we select modules in V1. i.e. in V2 we will handle this
  10  * differently and so will not have to duplicate code.
  11  * GEF Oct05 after asking Jeff.
  12  */
  13 int
  14 smpi_coll_tuned_alltoallv_ompi_basic_linear(void *sbuf, int *scounts, int *sdisps,
  15                                             MPI_Datatype sdtype,
  16                                             void *rbuf, int *rcounts, int *rdisps,
  17                                             MPI_Datatype rdtype,
  18                                             MPI_Comm comm)
  19 {
  20     int i, size, rank;
  21     char *psnd, *prcv;
  22     int nreqs;
  23     ptrdiff_t sext, rext;
  24     MPI_Request *preq;
  25     size = smpi_comm_size(comm);
  26     rank = smpi_comm_rank(comm);
  27     MPI_Request *ireqs= xbt_malloc(sizeof(MPI_Request) * size * 2);
  28     XBT_DEBUG(
  29                  "coll:tuned:alltoallv_intra_basic_linear rank %d", rank);
  30
  31     sext=smpi_datatype_get_extent(sdtype);
  32     rext=smpi_datatype_get_extent(rdtype);
  33
  34     /* Simple optimization - handle send to self first */
  35     psnd = ((char *) sbuf) + (sdisps[rank] * sext);
  36     prcv = ((char *) rbuf) + (rdisps[rank] * rext);
  37     if (0 != scounts[rank]) {
  38         smpi_datatype_copy(psnd, scounts[rank], sdtype,
  39                               prcv, rcounts[rank], rdtype);
  40     }
  41
  42     /* If only one process, we're done. */
  43     if (1 == size) {
  44         return MPI_SUCCESS;
  45     }
  46
  47     /* Now, initiate all send/recv to/from others. */
  48     nreqs = 0;
  49     preq = ireqs;
  50
  51     /* Post all receives first */
  52     for (i = 0; i < size; ++i) {
  53         if (i == rank || 0 == rcounts[i]) {
  54             continue;
  55         }
  56
  57         prcv = ((char *) rbuf) + (rdisps[i] * rext);
  58
  59         *preq = smpi_irecv_init(prcv, rcounts[i], rdtype,
  60                                       i, MCA_COLL_BASE_TAG_ALLTOALLV, comm
  61                                       );
  62         preq++;
  63         ++nreqs;
  64
  65     }
  66
  67     /* Now post all sends */
  68     for (i = 0; i < size; ++i) {
  69         if (i == rank || 0 == scounts[i]) {
  70             continue;
  71         }
  72
  73         psnd = ((char *) sbuf) + (sdisps[i] * sext);
  74         *preq=smpi_isend_init(psnd, scounts[i], sdtype,
  75                                       i, MCA_COLL_BASE_TAG_ALLTOALLV, comm
  76                                       );
  77         preq++;
  78         ++nreqs;
  79     }
  80
  81     /* Start your engines.  This will never return an error. */
  82     smpi_mpi_startall(nreqs, ireqs);
  83
  84     /* Wait for them all.  If there's an error, note that we don't care
  85      * what the error was -- just that there *was* an error.  The PML
  86      * will finish all requests, even if one or more of them fail.
  87      * i.e., by the end of this call, all the requests are free-able.
  88      * So free them anyway -- even if there was an error, and return the
  89      * error after we free everything. */
  90     smpi_mpi_waitall(nreqs, ireqs,
  91                                 MPI_STATUSES_IGNORE);
  92
  93     /* Free the requests. */
  94     for (i = 0; i < nreqs; ++i) {
  95       if(ireqs[i]!=MPI_REQUEST_NULL)smpi_mpi_request_free(&ireqs[i]);
  96     }
  97
  98     return MPI_SUCCESS;
  99 }
 100