src/smpi/colls/alltoall-2dmesh.c

   1 #include "colls.h"
   2 #include <math.h>
   3
   4 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_colls, smpi,
   5                                 "Logging specific to SMPI collectives");
   6
   7 /*****************************************************************************
   8
   9  * Function: alltoall_2dmesh_shoot
  10
  11  * Return: int
  12
  13  * Inputs:
  14     send_buff: send input buffer
  15     send_count: number of elements to send
  16     send_type: data type of elements being sent
  17     recv_buff: receive output buffer
  18     recv_count: number of elements to received
  19     recv_type: data type of elements being received
  20     comm: communicator
  21
  22  * Descrp: Function realizes the alltoall operation using the 2dmesh
  23            algorithm. It actually performs allgather operation in x dimension
  24            then in the y dimension. Each node then extracts the needed data.
  25            The communication in each dimension follows "simple."
  26
  27  * Auther: Ahmad Faraj
  28
  29 ****************************************************************************/
  30 int alltoall_check_is_2dmesh(int num, int *i, int *j)
  31 {
  32   int x, max = num / 2;
  33   x = sqrt(num);
  34
  35   while (x <= max) {
  36     if ((num % x) == 0) {
  37       *i = x;
  38       *j = num / x;
  39
  40       if (*i > *j) {
  41         x = *i;
  42         *i = *j;
  43         *j = x;
  44       }
  45
  46       return 1;
  47     }
  48     x++;
  49   }
  50   return 0;
  51 }
  52
  53 int smpi_coll_tuned_alltoall_2dmesh(void *send_buff, int send_count,
  54                                     MPI_Datatype send_type,
  55                                     void *recv_buff, int recv_count,
  56                                     MPI_Datatype recv_type,
  57                                     MPI_Comm comm)
  58 {
  59   MPI_Status *statuses, s;
  60   MPI_Request *reqs, *req_ptr;;
  61   MPI_Aint extent;
  62
  63   char *tmp_buff1, *tmp_buff2;
  64   int i, j, src, dst, rank, num_procs, count, num_reqs;
  65   int rows, cols, my_row, my_col, X, Y, send_offset, recv_offset;
  66   int two_dsize, my_row_base, my_col_base, src_row_base, block_size;
  67   int tag = 1, failure = 0, success = 1;
  68
  69   MPI_Comm_rank(comm, &rank);
  70   MPI_Comm_size(comm, &num_procs);
  71   MPI_Type_extent(send_type, &extent);
  72
  73   if (!alltoall_check_is_2dmesh(num_procs, &X, &Y))
  74     return failure;
  75
  76   two_dsize = X * Y;
  77
  78   my_row_base = (rank / Y) * Y;
  79   my_col_base = rank % Y;
  80
  81   block_size = extent * send_count;
  82
  83   tmp_buff1 = (char *) malloc(block_size * num_procs * Y);
  84   if (!tmp_buff1) {
  85     XBT_DEBUG("alltoall-2dmesh_shoot.c:88: cannot allocate memory");
  86     MPI_Finalize();
  87     exit(failure);
  88   }
  89
  90   tmp_buff2 = (char *) malloc(block_size * Y);
  91   if (!tmp_buff2) {
  92     XBT_WARN("alltoall-2dmesh_shoot.c:88: cannot allocate memory");
  93     MPI_Finalize();
  94     exit(failure);
  95   }
  96
  97
  98
  99   num_reqs = X;
 100   if (Y > X)
 101     num_reqs = Y;
 102
 103   statuses = (MPI_Status *) malloc(num_reqs * sizeof(MPI_Status));
 104   reqs = (MPI_Request *) malloc(num_reqs * sizeof(MPI_Request));
 105   if (!reqs) {
 106     XBT_WARN("alltoall-2dmesh_shoot.c:88: cannot allocate memory");
 107     MPI_Finalize();
 108     exit(failure);
 109   }
 110
 111   req_ptr = reqs;
 112
 113   send_offset = recv_offset = (rank % Y) * block_size * num_procs;
 114
 115   count = send_count * num_procs;
 116
 117   for (i = 0; i < Y; i++) {
 118     src = i + my_row_base;
 119     if (src == rank)
 120       continue;
 121
 122     recv_offset = (src % Y) * block_size * num_procs;
 123     MPI_Irecv(tmp_buff1 + recv_offset, count, recv_type, src, tag, comm,
 124               req_ptr++);
 125   }
 126
 127   for (i = 0; i < Y; i++) {
 128     dst = i + my_row_base;
 129     if (dst == rank)
 130       continue;
 131     MPI_Send(send_buff, count, send_type, dst, tag, comm);
 132   }
 133
 134   MPI_Waitall(Y - 1, reqs, statuses);
 135   req_ptr = reqs;
 136
 137   for (i = 0; i < Y; i++) {
 138     send_offset = (rank * block_size) + (i * block_size * num_procs);
 139     recv_offset = (my_row_base * block_size) + (i * block_size);
 140
 141     if (i + my_row_base == rank)
 142       MPI_Sendrecv(send_buff + recv_offset, send_count, send_type,
 143                    rank, tag, recv_buff + recv_offset, recv_count,
 144                    recv_type, rank, tag, comm, &s);
 145
 146     else
 147       MPI_Sendrecv(tmp_buff1 + send_offset, send_count, send_type,
 148                    rank, tag,
 149                    recv_buff + recv_offset, recv_count, recv_type,
 150                    rank, tag, comm, &s);
 151   }
 152
 153
 154   for (i = 0; i < X; i++) {
 155     src = (i * Y + my_col_base);
 156     if (src == rank)
 157       continue;
 158     src_row_base = (src / Y) * Y;
 159
 160     MPI_Irecv(recv_buff + src_row_base * block_size, recv_count * Y,
 161               recv_type, src, tag, comm, req_ptr++);
 162   }
 163
 164   for (i = 0; i < X; i++) {
 165     dst = (i * Y + my_col_base);
 166     if (dst == rank)
 167       continue;
 168
 169     recv_offset = 0;
 170     for (j = 0; j < Y; j++) {
 171       send_offset = (dst + j * num_procs) * block_size;
 172
 173       if (j + my_row_base == rank)
 174         MPI_Sendrecv(send_buff + dst * block_size, send_count, send_type,
 175                      rank, tag,
 176                      tmp_buff2 + recv_offset, recv_count, recv_type,
 177                      rank, tag, comm, &s);
 178       else
 179         MPI_Sendrecv(tmp_buff1 + send_offset, send_count, send_type,
 180                      rank, tag,
 181                      tmp_buff2 + recv_offset, recv_count, recv_type,
 182                      rank, tag, comm, &s);
 183
 184       recv_offset += block_size;
 185     }
 186
 187     MPI_Send(tmp_buff2, send_count * Y, send_type, dst, tag, comm);
 188   }
 189   MPI_Waitall(X - 1, reqs, statuses);
 190   free(reqs);
 191   free(statuses);
 192   free(tmp_buff1);
 193   free(tmp_buff2);
 194   return success;
 195 }