1 #include "colls_private.h"
4 /*****************************************************************************
6 * Function: alltoall_3dmesh_shoot
11 send_buff: send input buffer
12 send_count: number of elements to send
13 send_type: data type of elements being sent
14 recv_buff: receive output buffer
15 recv_count: number of elements to received
16 recv_type: data type of elements being received
19 * Descrp: Function realizes the alltoall operation using the 3dmesh
20 algorithm. It actually performs allgather operation in x dimension,
21 y dimension, then in z dimension. Each node then extracts the
22 needed data. The communication in all dimension is simple.
25 ****************************************************************************/
27 static int alltoall_check_is_3dmesh(int num, int *i, int *j, int *k)
33 if ((num % (x * x)) == 0) {
43 int smpi_coll_tuned_alltoall_3dmesh(void *send_buff, int send_count,
44 MPI_Datatype send_type,
45 void *recv_buff, int recv_count,
46 MPI_Datatype recv_type, MPI_Comm comm)
48 MPI_Request *reqs, *req_ptr;
50 MPI_Status status, *statuses;
51 int i, j, src, dst, rank, num_procs, num_reqs, X, Y, Z, block_size, count;
52 int my_z, two_dsize, my_row_base, my_col_base, my_z_base, src_row_base;
53 int src_z_base, send_offset, recv_offset, tag = COLL_TAG_ALLTOALL;
55 char *tmp_buff1, *tmp_buff2;
57 rank = smpi_comm_rank(comm);
58 num_procs = smpi_comm_size(comm);
59 extent = smpi_datatype_get_extent(send_type);
61 if (!alltoall_check_is_3dmesh(num_procs, &X, &Y, &Z))
71 my_z = rank / two_dsize;
73 my_row_base = (rank / X) * X;
74 my_col_base = (rank % Y) + (my_z * two_dsize);
75 my_z_base = my_z * two_dsize;
77 block_size = extent * send_count;
79 tmp_buff1 = (char *) xbt_malloc(block_size * num_procs * two_dsize);
80 tmp_buff2 = (char *) xbt_malloc(block_size * two_dsize);
82 statuses = (MPI_Status *) xbt_malloc(num_reqs * sizeof(MPI_Status));
83 reqs = (MPI_Request *) xbt_malloc(num_reqs * sizeof(MPI_Request));
87 send_offset = recv_offset = (rank % two_dsize) * block_size * num_procs;
89 smpi_mpi_sendrecv(send_buff, send_count * num_procs, send_type, rank, tag,
90 tmp_buff1 + recv_offset, num_procs * recv_count,
91 recv_type, rank, tag, comm, &status);
93 count = send_count * num_procs;
95 for (i = 0; i < Y; i++) {
96 src = i + my_row_base;
99 recv_offset = (src % two_dsize) * block_size * num_procs;
100 *(req_ptr++) = smpi_mpi_irecv(tmp_buff1 + recv_offset, count, recv_type, src, tag, comm);
103 for (i = 0; i < Y; i++) {
104 dst = i + my_row_base;
107 smpi_mpi_send(send_buff, count, send_type, dst, tag, comm);
110 smpi_mpi_waitall(Y - 1, reqs, statuses);
114 for (i = 0; i < X; i++) {
115 src = (i * Y + my_col_base);
119 src_row_base = (src / X) * X;
121 recv_offset = (src_row_base % two_dsize) * block_size * num_procs;
122 *(req_ptr++) = smpi_mpi_irecv(tmp_buff1 + recv_offset, recv_count * num_procs * Y,
123 recv_type, src, tag, comm);
126 send_offset = (my_row_base % two_dsize) * block_size * num_procs;
127 for (i = 0; i < X; i++) {
128 dst = (i * Y + my_col_base);
131 smpi_mpi_send(tmp_buff1 + send_offset, send_count * num_procs * Y, send_type,
135 smpi_mpi_waitall(X - 1, reqs, statuses);
138 for (i = 0; i < two_dsize; i++) {
139 send_offset = (rank * block_size) + (i * block_size * num_procs);
140 recv_offset = (my_z_base * block_size) + (i * block_size);
141 smpi_mpi_sendrecv(tmp_buff1 + send_offset, send_count, send_type, rank, tag,
142 (char *) recv_buff + recv_offset, recv_count, recv_type,
143 rank, tag, comm, &status);
146 for (i = 1; i < Z; i++) {
147 src = (rank + i * two_dsize) % num_procs;
148 src_z_base = (src / two_dsize) * two_dsize;
150 recv_offset = (src_z_base * block_size);
152 *(req_ptr++) = smpi_mpi_irecv((char *) recv_buff + recv_offset, recv_count * two_dsize,
153 recv_type, src, tag, comm);
156 for (i = 1; i < Z; i++) {
157 dst = (rank + i * two_dsize) % num_procs;
160 for (j = 0; j < two_dsize; j++) {
161 send_offset = (dst + j * num_procs) * block_size;
162 smpi_mpi_sendrecv(tmp_buff1 + send_offset, send_count, send_type,
163 rank, tag, tmp_buff2 + recv_offset, recv_count,
164 recv_type, rank, tag, comm, &status);
166 recv_offset += block_size;
169 smpi_mpi_send(tmp_buff2, send_count * two_dsize, send_type, dst, tag, comm);
173 smpi_mpi_waitall(Z - 1, reqs, statuses);