src/smpi/colls/alltoall-3dmesh.c

   1 #include "colls.h"
   2 #include <math.h>
   3
   4 /*****************************************************************************
   5
   6  * Function: alltoall_3dmesh_shoot
   7
   8  * Return: int
   9
  10  * Inputs:
  11     send_buff: send input buffer
  12     send_count: number of elements to send
  13     send_type: data type of elements being sent
  14     recv_buff: receive output buffer
  15     recv_count: number of elements to received
  16     recv_type: data type of elements being received
  17     comm: communicator
  18
  19  * Descrp: Function realizes the alltoall operation using the 3dmesh
  20            algorithm. It actually performs allgather operation in x dimension,
  21            y dimension, then in z dimension. Each node then extracts the
  22            needed data. The communication in all dimension is simple.
  23
  24  * Auther: Ahmad Faraj
  25 ****************************************************************************/
  26
  27 static int alltoall_check_is_3dmesh(int num, int *i, int *j, int *k)
  28 {
  29   int x, max = num / 3;
  30   x = cbrt(num);
  31   *i = *j = *k = 0;
  32   while (x <= max) {
  33     if ((num % (x * x)) == 0) {
  34       *i = *j = x;
  35       *k = num / (x * x);
  36       return 1;
  37     }
  38     x++;
  39   }
  40   return 0;
  41 }
  42
  43 int smpi_coll_tuned_alltoall_3dmesh(void *send_buff, int send_count,
  44                                     MPI_Datatype send_type,
  45                                     void *recv_buff, int recv_count,
  46                                     MPI_Datatype recv_type, MPI_Comm comm)
  47 {
  48   MPI_Request *reqs, *req_ptr;
  49   MPI_Aint extent;
  50   MPI_Status status, *statuses;
  51   int i, j, src, dst, rank, num_procs, num_reqs, X, Y, Z, block_size, count;
  52   int my_z, two_dsize, my_row_base, my_col_base, my_z_base, src_row_base;
  53   int src_z_base, send_offset, recv_offset, tag = 1, failure = 0, success = 1;
  54
  55   char *tmp_buff1, *tmp_buff2;
  56
  57   MPI_Comm_rank(comm, &rank);
  58   MPI_Comm_size(comm, &num_procs);
  59   MPI_Type_extent(send_type, &extent);
  60
  61   if (!alltoall_check_is_3dmesh(num_procs, &X, &Y, &Z))
  62     return failure;
  63
  64   num_reqs = X;
  65   if (Y > X)
  66     num_reqs = Y;
  67   if (Z > Y)
  68     num_reqs = Z;
  69
  70   two_dsize = X * Y;
  71   my_z = rank / two_dsize;
  72
  73   my_row_base = (rank / X) * X;
  74   my_col_base = (rank % Y) + (my_z * two_dsize);
  75   my_z_base = my_z * two_dsize;
  76
  77   block_size = extent * send_count;
  78
  79   tmp_buff1 = (char *) malloc(block_size * num_procs * two_dsize);
  80   if (!tmp_buff1) {
  81     printf("alltoall-3Dmesh:97: cannot allocate memory\n");
  82     MPI_Finalize();
  83     exit(failure);
  84   }
  85
  86   tmp_buff2 = (char *) malloc(block_size * two_dsize);
  87   if (!tmp_buff2) {
  88     printf("alltoall-3Dmesh:105: cannot allocate memory\n");
  89     MPI_Finalize();
  90     exit(failure);
  91   }
  92
  93   statuses = (MPI_Status *) malloc(num_reqs * sizeof(MPI_Status));
  94   reqs = (MPI_Request *) malloc(num_reqs * sizeof(MPI_Request));
  95   if (!reqs) {
  96     printf("alltoall-3Dmesh:113: cannot allocate memory\n");
  97     MPI_Finalize();
  98     exit(failure);
  99   }
 100
 101   req_ptr = reqs;
 102
 103   send_offset = recv_offset = (rank % two_dsize) * block_size * num_procs;
 104
 105   MPI_Sendrecv(send_buff, send_count * num_procs, send_type, rank, tag,
 106                tmp_buff1 + recv_offset, num_procs * recv_count,
 107                recv_type, rank, tag, comm, &status);
 108
 109   count = send_count * num_procs;
 110
 111   for (i = 0; i < Y; i++) {
 112     src = i + my_row_base;
 113     if (src == rank)
 114       continue;
 115     recv_offset = (src % two_dsize) * block_size * num_procs;
 116     MPI_Irecv(tmp_buff1 + recv_offset, count, recv_type, src, tag, comm,
 117               req_ptr++);
 118   }
 119
 120   for (i = 0; i < Y; i++) {
 121     dst = i + my_row_base;
 122     if (dst == rank)
 123       continue;
 124     MPI_Send(send_buff, count, send_type, dst, tag, comm);
 125   }
 126
 127   MPI_Waitall(Y - 1, reqs, statuses);
 128   req_ptr = reqs;
 129
 130
 131   for (i = 0; i < X; i++) {
 132     src = (i * Y + my_col_base);
 133     if (src == rank)
 134       continue;
 135
 136     src_row_base = (src / X) * X;
 137
 138     recv_offset = (src_row_base % two_dsize) * block_size * num_procs;
 139     MPI_Irecv(tmp_buff1 + recv_offset, recv_count * num_procs * Y,
 140               recv_type, src, tag, comm, req_ptr++);
 141   }
 142
 143   send_offset = (my_row_base % two_dsize) * block_size * num_procs;
 144   for (i = 0; i < X; i++) {
 145     dst = (i * Y + my_col_base);
 146     if (dst == rank)
 147       continue;
 148     MPI_Send(tmp_buff1 + send_offset, send_count * num_procs * Y, send_type,
 149              dst, tag, comm);
 150   }
 151
 152   MPI_Waitall(X - 1, reqs, statuses);
 153   req_ptr = reqs;
 154
 155   for (i = 0; i < two_dsize; i++) {
 156     send_offset = (rank * block_size) + (i * block_size * num_procs);
 157     recv_offset = (my_z_base * block_size) + (i * block_size);
 158     MPI_Sendrecv(tmp_buff1 + send_offset, send_count, send_type, rank, tag,
 159                  (char *) recv_buff + recv_offset, recv_count, recv_type,
 160                  rank, tag, comm, &status);
 161   }
 162
 163   for (i = 1; i < Z; i++) {
 164     src = (rank + i * two_dsize) % num_procs;
 165     src_z_base = (src / two_dsize) * two_dsize;
 166
 167     recv_offset = (src_z_base * block_size);
 168
 169     MPI_Irecv((char *) recv_buff + recv_offset, recv_count * two_dsize,
 170               recv_type, src, tag, comm, req_ptr++);
 171   }
 172
 173   for (i = 1; i < Z; i++) {
 174     dst = (rank + i * two_dsize) % num_procs;
 175
 176     recv_offset = 0;
 177     for (j = 0; j < two_dsize; j++) {
 178       send_offset = (dst + j * num_procs) * block_size;
 179       MPI_Sendrecv(tmp_buff1 + send_offset, send_count, send_type,
 180                    rank, tag, tmp_buff2 + recv_offset, recv_count,
 181                    recv_type, rank, tag, comm, &status);
 182
 183       recv_offset += block_size;
 184     }
 185
 186     MPI_Send(tmp_buff2, send_count * two_dsize, send_type, dst, tag, comm);
 187
 188   }
 189
 190   MPI_Waitall(Z - 1, reqs, statuses);
 191
 192   free(reqs);
 193   free(statuses);
 194   free(tmp_buff1);
 195   free(tmp_buff2);
 196   return success;
 197 }