src/smpi/colls/allgather-NTSLR-NB.c

   1 #include "colls.h"
   2
   3 // Allgather-Non-Topoloty-Scecific-Logical-Ring algorithm
   4 int
   5 smpi_coll_tuned_allgather_NTSLR_NB(void *sbuf, int scount, MPI_Datatype stype,
   6                                    void *rbuf, int rcount, MPI_Datatype rtype,
   7                                    MPI_Comm comm)
   8 {
   9   MPI_Aint rextent, sextent;
  10   MPI_Status status, status2;
  11   int i, to, from, rank, size;
  12   int send_offset, recv_offset;
  13   int tag = 500;
  14
  15   MPI_Comm_rank(comm, &rank);
  16   MPI_Comm_size(comm, &size);
  17   MPI_Type_extent(rtype, &rextent);
  18   MPI_Type_extent(stype, &sextent);
  19   MPI_Request *rrequest_array;
  20   MPI_Request *srequest_array;
  21   rrequest_array = (MPI_Request *) malloc(size * sizeof(MPI_Request));
  22   srequest_array = (MPI_Request *) malloc(size * sizeof(MPI_Request));
  23
  24   // irregular case use default MPI fucntions
  25   if (scount * sextent != rcount * rextent)
  26     MPI_Allgather(sbuf, scount, stype, rbuf, rcount, rtype, comm);
  27
  28   // topo non-specific
  29   to = (rank + 1) % size;
  30   from = (rank + size - 1) % size;
  31
  32   //copy a single segment from sbuf to rbuf
  33   send_offset = rank * scount * sextent;
  34
  35   MPI_Sendrecv(sbuf, scount, stype, rank, tag,
  36                (char *)rbuf + send_offset, rcount, rtype, rank, tag, comm, &status);
  37
  38
  39   //start sending logical ring message
  40   int increment = scount * sextent;
  41
  42   //post all irecv first
  43   for (i = 0; i < size - 1; i++) {
  44     recv_offset = ((rank - i - 1 + size) % size) * increment;
  45     MPI_Irecv((char *)rbuf + recv_offset, rcount, rtype, from, tag + i, comm,
  46               &rrequest_array[i]);
  47   }
  48
  49
  50   for (i = 0; i < size - 1; i++) {
  51     send_offset = ((rank - i + size) % size) * increment;
  52     MPI_Isend((char *)rbuf + send_offset, scount, stype, to, tag + i, comm,
  53               &srequest_array[i]);
  54     MPI_Wait(&rrequest_array[i], &status);
  55     MPI_Wait(&srequest_array[i], &status2);
  56   }
  57
  58   free(rrequest_array);
  59   free(srequest_array);
  60
  61   return MPI_SUCCESS;
  62 }