1 #include "colls_private.h"
4 /*****************************************************************************
6 * Function: alltoall_2dmesh_shoot
11 send_buff: send input buffer
12 send_count: number of elements to send
13 send_type: data type of elements being sent
14 recv_buff: receive output buffer
15 recv_count: number of elements to received
16 recv_type: data type of elements being received
19 * Descrp: Function realizes the alltoall operation using the 2dmesh
20 algorithm. It actually performs allgather operation in x dimension
21 then in the y dimension. Each node then extracts the needed data.
22 The communication in each dimension follows "simple."
26 ****************************************************************************/
27 static int alltoall_check_is_2dmesh(int num, int *i, int *j)
50 int smpi_coll_tuned_alltoall_2dmesh(void *send_buff, int send_count,
51 MPI_Datatype send_type,
52 void *recv_buff, int recv_count,
53 MPI_Datatype recv_type, MPI_Comm comm)
55 MPI_Status *statuses, s;
56 MPI_Request *reqs, *req_ptr;;
59 char *tmp_buff1, *tmp_buff2;
60 int i, j, src, dst, rank, num_procs, count, num_reqs;
61 int X, Y, send_offset, recv_offset;
62 int my_row_base, my_col_base, src_row_base, block_size;
63 int tag = COLL_TAG_ALLTOALL;
65 rank = smpi_comm_rank(comm);
66 num_procs = smpi_comm_size(comm);
67 extent = smpi_datatype_get_extent(send_type);
69 if (!alltoall_check_is_2dmesh(num_procs, &X, &Y))
72 my_row_base = (rank / Y) * Y;
73 my_col_base = rank % Y;
75 block_size = extent * send_count;
77 tmp_buff1 = (char *) xbt_malloc(block_size * num_procs * Y);
78 tmp_buff2 = (char *) xbt_malloc(block_size * Y);
84 statuses = (MPI_Status *) xbt_malloc(num_reqs * sizeof(MPI_Status));
85 reqs = (MPI_Request *) xbt_malloc(num_reqs * sizeof(MPI_Request));
89 send_offset = recv_offset = (rank % Y) * block_size * num_procs;
91 count = send_count * num_procs;
93 for (i = 0; i < Y; i++) {
94 src = i + my_row_base;
98 recv_offset = (src % Y) * block_size * num_procs;
99 *(req_ptr++) = smpi_mpi_irecv(tmp_buff1 + recv_offset, count, recv_type, src, tag, comm);
102 for (i = 0; i < Y; i++) {
103 dst = i + my_row_base;
106 smpi_mpi_send(send_buff, count, send_type, dst, tag, comm);
109 smpi_mpi_waitall(Y - 1, reqs, statuses);
112 for (i = 0; i < Y; i++) {
113 send_offset = (rank * block_size) + (i * block_size * num_procs);
114 recv_offset = (my_row_base * block_size) + (i * block_size);
116 if (i + my_row_base == rank)
117 smpi_mpi_sendrecv((char *) send_buff + recv_offset, send_count, send_type,
119 (char *) recv_buff + recv_offset, recv_count, recv_type,
120 rank, tag, comm, &s);
123 smpi_mpi_sendrecv(tmp_buff1 + send_offset, send_count, send_type,
125 (char *) recv_buff + recv_offset, recv_count, recv_type,
126 rank, tag, comm, &s);
130 for (i = 0; i < X; i++) {
131 src = (i * Y + my_col_base);
134 src_row_base = (src / Y) * Y;
136 *(req_ptr++) = smpi_mpi_irecv((char *) recv_buff + src_row_base * block_size, recv_count * Y,
137 recv_type, src, tag, comm);
140 for (i = 0; i < X; i++) {
141 dst = (i * Y + my_col_base);
146 for (j = 0; j < Y; j++) {
147 send_offset = (dst + j * num_procs) * block_size;
149 if (j + my_row_base == rank)
150 smpi_mpi_sendrecv((char *) send_buff + dst * block_size, send_count,
151 send_type, rank, tag, tmp_buff2 + recv_offset, recv_count,
152 recv_type, rank, tag, comm, &s);
154 smpi_mpi_sendrecv(tmp_buff1 + send_offset, send_count, send_type,
156 tmp_buff2 + recv_offset, recv_count, recv_type,
157 rank, tag, comm, &s);
159 recv_offset += block_size;
162 smpi_mpi_send(tmp_buff2, send_count * Y, send_type, dst, tag, comm);
164 smpi_mpi_waitall(X - 1, reqs, statuses);