4 /*****************************************************************************
6 * Function: alltoall_3dmesh_shoot
11 send_buff: send input buffer
12 send_count: number of elements to send
13 send_type: data type of elements being sent
14 recv_buff: receive output buffer
15 recv_count: number of elements to received
16 recv_type: data type of elements being received
19 * Descrp: Function realizes the alltoall operation using the 3dmesh
20 algorithm. It actually performs allgather operation in x dimension,
21 y dimension, then in z dimension. Each node then extracts the
22 needed data. The communication in all dimension is simple.
25 ****************************************************************************/
27 int alltoall_check_is_3dmesh(int num, int * i, int * j, int * k)
34 if ((num % (x * x)) == 0)
45 int smpi_coll_tuned_alltoall_3dmesh(void * send_buff, int send_count, MPI_Datatype send_type,
46 void * recv_buff, int recv_count, MPI_Datatype recv_type,
49 MPI_Request * reqs, * req_ptr;
51 MPI_Status status, * statuses;
52 int i, j, src, dst, rank, num_procs, num_reqs, X, Y, Z, block_size, count;
53 int my_z, two_dsize, my_row_base, my_col_base, my_z_base, src_row_base;
54 int src_z_base, send_offset, recv_offset, tag = 1, failure = 0, success = 1;
56 char * tmp_buff1, * tmp_buff2;
58 MPI_Comm_rank(comm, &rank);
59 MPI_Comm_size(comm, &num_procs);
60 MPI_Type_extent(send_type, &extent);
62 if (!alltoall_check_is_3dmesh(num_procs, &X, &Y, &Z))
66 if (Y > X) num_reqs = Y;
67 if (Z > Y) num_reqs = Z;
70 my_z = rank / two_dsize;
72 my_row_base = (rank / X) * X;
73 my_col_base = (rank % Y) + (my_z * two_dsize);
74 my_z_base = my_z * two_dsize;
76 block_size = extent * send_count;
78 tmp_buff1 =(char *) malloc(block_size * num_procs * two_dsize);
81 printf("alltoall-3Dmesh:97: cannot allocate memory\n");
86 tmp_buff2 =(char *) malloc(block_size * two_dsize);
89 printf("alltoall-3Dmesh:105: cannot allocate memory\n");
94 statuses = (MPI_Status *) malloc(num_reqs * sizeof(MPI_Status));
95 reqs = (MPI_Request *) malloc(num_reqs * sizeof(MPI_Request));
98 printf("alltoall-3Dmesh:113: cannot allocate memory\n");
105 send_offset = recv_offset = (rank % two_dsize) * block_size * num_procs;
107 MPI_Sendrecv(send_buff, send_count * num_procs, send_type, rank, tag,
108 tmp_buff1 + recv_offset, num_procs * recv_count,
109 recv_type, rank, tag, comm, &status);
111 count = send_count * num_procs;
113 for (i = 0; i < Y; i++)
115 src = i + my_row_base;
116 if (src == rank) continue;
117 recv_offset = (src % two_dsize) * block_size * num_procs;
118 MPI_Irecv(tmp_buff1 + recv_offset, count, recv_type, src, tag, comm,
122 for (i = 0; i < Y; i++)
124 dst = i + my_row_base;
125 if (dst == rank) continue;
126 MPI_Send(send_buff, count, send_type, dst, tag, comm);
129 MPI_Waitall(Y - 1, reqs, statuses);
133 for (i = 0; i < X; i++)
135 src = (i * Y + my_col_base);
136 if (src == rank) continue;
138 src_row_base = (src / X) * X;
140 recv_offset = (src_row_base % two_dsize) * block_size * num_procs;
141 MPI_Irecv(tmp_buff1 + recv_offset, recv_count * num_procs * Y,
142 recv_type, src, tag, comm, req_ptr++);
145 send_offset = (my_row_base % two_dsize) * block_size * num_procs;
146 for (i = 0; i < X; i++)
148 dst = (i * Y + my_col_base);
149 if (dst == rank) continue;
150 MPI_Send(tmp_buff1 + send_offset, send_count * num_procs * Y, send_type,
154 MPI_Waitall(X - 1, reqs, statuses);
157 for (i = 0; i < two_dsize; i++)
159 send_offset = (rank * block_size) + (i * block_size * num_procs);
160 recv_offset = (my_z_base * block_size) + (i * block_size);
161 MPI_Sendrecv(tmp_buff1 + send_offset, send_count, send_type, rank, tag,
162 recv_buff + recv_offset, recv_count, recv_type, rank, tag,
166 for (i = 1; i < Z; i++)
168 src = (rank + i * two_dsize) % num_procs;
169 src_z_base = (src / two_dsize) * two_dsize;
171 recv_offset = (src_z_base * block_size);
173 MPI_Irecv(recv_buff + recv_offset, recv_count * two_dsize, recv_type,
174 src, tag, comm, req_ptr++);
177 for (i = 1; i < Z; i++)
179 dst = (rank + i * two_dsize) % num_procs;
182 for (j = 0; j < two_dsize; j++)
184 send_offset = (dst + j * num_procs) * block_size;
185 MPI_Sendrecv(tmp_buff1 + send_offset, send_count, send_type,
186 rank, tag, tmp_buff2 + recv_offset, recv_count,
187 recv_type, rank, tag, comm, &status);
189 recv_offset += block_size;
192 MPI_Send(tmp_buff2, send_count * two_dsize, send_type, dst, tag, comm);
196 MPI_Waitall(Z - 1, reqs, statuses);