src/smpi/colls/alltoall-3dmesh.c

   1 #include "colls.h"
   2 #include <math.h>
   3
   4 /*****************************************************************************
   5
   6  * Function: alltoall_3dmesh_shoot
   7
   8  * Return: int
   9
  10  * Inputs:
  11     send_buff: send input buffer
  12     send_count: number of elements to send
  13     send_type: data type of elements being sent
  14     recv_buff: receive output buffer
  15     recv_count: number of elements to received
  16     recv_type: data type of elements being received
  17     comm: communicator
  18
  19  * Descrp: Function realizes the alltoall operation using the 3dmesh
  20            algorithm. It actually performs allgather operation in x dimension,
  21            y dimension, then in z dimension. Each node then extracts the
  22            needed data. The communication in all dimension is simple.
  23
  24  * Auther: Ahmad Faraj
  25 ****************************************************************************/
  26
  27 int alltoall_check_is_3dmesh(int num, int * i, int * j, int * k)
  28 {
  29   int x, max = num / 3;
  30   x = cbrt(num);
  31   * i = * j = * k = 0;
  32   while (x <= max)
  33     {
  34       if ((num % (x * x)) == 0)
  35         {
  36           * i = * j = x;
  37           * k = num / (x * x);
  38           return 1;
  39         }
  40       x++;
  41     }
  42   return 0;
  43 }
  44
  45 int smpi_coll_tuned_alltoall_3dmesh(void * send_buff, int send_count, MPI_Datatype send_type,
  46                       void * recv_buff, int recv_count, MPI_Datatype recv_type,
  47                       MPI_Comm comm)
  48 {
  49   MPI_Request * reqs, * req_ptr;
  50   MPI_Aint extent;
  51   MPI_Status status, * statuses;
  52   int i, j, src, dst, rank, num_procs, num_reqs, X, Y, Z, block_size, count;
  53   int my_z, two_dsize, my_row_base, my_col_base, my_z_base, src_row_base;
  54   int src_z_base, send_offset, recv_offset, tag = 1, failure = 0, success = 1;
  55
  56   char * tmp_buff1, * tmp_buff2;
  57
  58   MPI_Comm_rank(comm, &rank);
  59   MPI_Comm_size(comm, &num_procs);
  60   MPI_Type_extent(send_type, &extent);
  61
  62   if (!alltoall_check_is_3dmesh(num_procs, &X, &Y, &Z))
  63      return failure;
  64
  65   num_reqs = X;
  66   if (Y > X) num_reqs = Y;
  67   if (Z > Y) num_reqs = Z;
  68
  69   two_dsize = X * Y;
  70   my_z = rank / two_dsize;
  71
  72   my_row_base = (rank / X) * X;
  73   my_col_base = (rank % Y) + (my_z * two_dsize);
  74   my_z_base = my_z * two_dsize;
  75
  76   block_size = extent * send_count;
  77
  78   tmp_buff1 =(char *) malloc(block_size * num_procs * two_dsize);
  79   if (!tmp_buff1)
  80     {
  81       printf("alltoall-3Dmesh:97: cannot allocate memory\n");
  82       MPI_Finalize();
  83       exit(failure);
  84     }
  85
  86   tmp_buff2 =(char *) malloc(block_size *  two_dsize);
  87   if (!tmp_buff2)
  88     {
  89       printf("alltoall-3Dmesh:105: cannot allocate memory\n");
  90       MPI_Finalize();
  91       exit(failure);
  92     }
  93
  94   statuses = (MPI_Status *) malloc(num_reqs * sizeof(MPI_Status));
  95   reqs = (MPI_Request *) malloc(num_reqs * sizeof(MPI_Request));
  96   if (!reqs)
  97     {
  98       printf("alltoall-3Dmesh:113: cannot allocate memory\n");
  99       MPI_Finalize();
 100       exit(failure);
 101     }
 102
 103   req_ptr = reqs;
 104
 105   send_offset = recv_offset = (rank % two_dsize) * block_size * num_procs;
 106
 107   MPI_Sendrecv(send_buff, send_count * num_procs, send_type, rank, tag,
 108                tmp_buff1 + recv_offset, num_procs * recv_count,
 109                recv_type, rank, tag, comm, &status);
 110
 111   count = send_count * num_procs;
 112
 113   for (i = 0; i < Y; i++)
 114     {
 115       src = i + my_row_base;
 116       if (src == rank) continue;
 117       recv_offset = (src % two_dsize) * block_size * num_procs;
 118       MPI_Irecv(tmp_buff1 + recv_offset, count, recv_type, src, tag, comm,
 119                 req_ptr++);
 120     }
 121
 122   for (i = 0; i < Y; i++)
 123     {
 124       dst = i + my_row_base;
 125       if (dst == rank) continue;
 126       MPI_Send(send_buff, count, send_type, dst, tag, comm);
 127     }
 128
 129   MPI_Waitall(Y - 1, reqs, statuses);
 130   req_ptr = reqs;
 131
 132
 133   for (i = 0; i < X; i++)
 134     {
 135       src = (i * Y + my_col_base);
 136       if (src == rank) continue;
 137
 138       src_row_base = (src / X) * X;
 139
 140       recv_offset = (src_row_base % two_dsize) * block_size * num_procs;
 141       MPI_Irecv(tmp_buff1 + recv_offset, recv_count * num_procs * Y,
 142                  recv_type, src, tag, comm, req_ptr++);
 143     }
 144
 145   send_offset = (my_row_base % two_dsize) * block_size * num_procs;
 146   for (i = 0; i < X; i++)
 147     {
 148       dst = (i * Y + my_col_base);
 149       if (dst == rank) continue;
 150       MPI_Send(tmp_buff1 + send_offset, send_count * num_procs * Y, send_type,
 151                 dst, tag, comm);
 152     }
 153
 154   MPI_Waitall(X - 1, reqs, statuses);
 155   req_ptr = reqs;
 156
 157   for (i = 0; i < two_dsize; i++)
 158     {
 159       send_offset = (rank * block_size) + (i * block_size * num_procs);
 160       recv_offset = (my_z_base * block_size) + (i * block_size);
 161       MPI_Sendrecv(tmp_buff1 + send_offset, send_count, send_type, rank, tag,
 162                    recv_buff + recv_offset, recv_count, recv_type, rank, tag,
 163                    comm, &status);
 164     }
 165
 166   for (i = 1; i < Z; i++)
 167     {
 168       src = (rank + i * two_dsize) % num_procs;
 169       src_z_base = (src / two_dsize) * two_dsize;
 170
 171       recv_offset = (src_z_base * block_size);
 172
 173       MPI_Irecv(recv_buff + recv_offset, recv_count * two_dsize, recv_type,
 174                  src, tag, comm, req_ptr++);
 175   }
 176
 177   for (i = 1; i < Z; i++)
 178     {
 179       dst = (rank + i * two_dsize) % num_procs;
 180
 181       recv_offset = 0;
 182       for (j = 0; j < two_dsize; j++)
 183         {
 184           send_offset = (dst + j * num_procs) * block_size;
 185           MPI_Sendrecv(tmp_buff1 + send_offset, send_count, send_type,
 186                        rank, tag, tmp_buff2 + recv_offset, recv_count,
 187                        recv_type, rank, tag, comm, &status);
 188
 189           recv_offset += block_size;
 190         }
 191
 192       MPI_Send(tmp_buff2, send_count * two_dsize, send_type, dst, tag, comm);
 193
 194     }
 195
 196   MPI_Waitall(Z - 1, reqs, statuses);
 197
 198   free(reqs);
 199   free(statuses);
 200   free(tmp_buff1);
 201   free(tmp_buff2);
 202   return success;
 203 }