4 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_colls, smpi,
5 "Logging specific to SMPI collectives");
7 /*****************************************************************************
9 * Function: alltoall_2dmesh_shoot
14 send_buff: send input buffer
15 send_count: number of elements to send
16 send_type: data type of elements being sent
17 recv_buff: receive output buffer
18 recv_count: number of elements to received
19 recv_type: data type of elements being received
22 * Descrp: Function realizes the alltoall operation using the 2dmesh
23 algorithm. It actually performs allgather operation in x dimension
24 then in the y dimension. Each node then extracts the needed data.
25 The communication in each dimension follows "simple."
29 ****************************************************************************/
30 static int alltoall_check_is_2dmesh(int num, int *i, int *j)
53 int smpi_coll_tuned_alltoall_2dmesh(void *send_buff, int send_count,
54 MPI_Datatype send_type,
55 void *recv_buff, int recv_count,
56 MPI_Datatype recv_type, MPI_Comm comm)
58 MPI_Status *statuses, s;
59 MPI_Request *reqs, *req_ptr;;
62 char *tmp_buff1, *tmp_buff2;
63 int i, j, src, dst, rank, num_procs, count, num_reqs;
64 int X, Y, send_offset, recv_offset;
65 int my_row_base, my_col_base, src_row_base, block_size;
66 int tag = 1, failure = 0, success = 1;
68 MPI_Comm_rank(comm, &rank);
69 MPI_Comm_size(comm, &num_procs);
70 MPI_Type_extent(send_type, &extent);
72 if (!alltoall_check_is_2dmesh(num_procs, &X, &Y))
75 my_row_base = (rank / Y) * Y;
76 my_col_base = rank % Y;
78 block_size = extent * send_count;
80 tmp_buff1 = (char *) malloc(block_size * num_procs * Y);
82 XBT_DEBUG("alltoall-2dmesh_shoot.c:88: cannot allocate memory");
87 tmp_buff2 = (char *) malloc(block_size * Y);
89 XBT_WARN("alltoall-2dmesh_shoot.c:88: cannot allocate memory");
100 statuses = (MPI_Status *) malloc(num_reqs * sizeof(MPI_Status));
101 reqs = (MPI_Request *) malloc(num_reqs * sizeof(MPI_Request));
103 XBT_WARN("alltoall-2dmesh_shoot.c:88: cannot allocate memory");
110 send_offset = recv_offset = (rank % Y) * block_size * num_procs;
112 count = send_count * num_procs;
114 for (i = 0; i < Y; i++) {
115 src = i + my_row_base;
119 recv_offset = (src % Y) * block_size * num_procs;
120 MPI_Irecv(tmp_buff1 + recv_offset, count, recv_type, src, tag, comm,
124 for (i = 0; i < Y; i++) {
125 dst = i + my_row_base;
128 MPI_Send(send_buff, count, send_type, dst, tag, comm);
131 MPI_Waitall(Y - 1, reqs, statuses);
134 for (i = 0; i < Y; i++) {
135 send_offset = (rank * block_size) + (i * block_size * num_procs);
136 recv_offset = (my_row_base * block_size) + (i * block_size);
138 if (i + my_row_base == rank)
139 MPI_Sendrecv((char *) send_buff + recv_offset, send_count, send_type,
141 (char *) recv_buff + recv_offset, recv_count, recv_type,
142 rank, tag, comm, &s);
145 MPI_Sendrecv(tmp_buff1 + send_offset, send_count, send_type,
147 (char *) recv_buff + recv_offset, recv_count, recv_type,
148 rank, tag, comm, &s);
152 for (i = 0; i < X; i++) {
153 src = (i * Y + my_col_base);
156 src_row_base = (src / Y) * Y;
158 MPI_Irecv((char *) recv_buff + src_row_base * block_size, recv_count * Y,
159 recv_type, src, tag, comm, req_ptr++);
162 for (i = 0; i < X; i++) {
163 dst = (i * Y + my_col_base);
168 for (j = 0; j < Y; j++) {
169 send_offset = (dst + j * num_procs) * block_size;
171 if (j + my_row_base == rank)
172 MPI_Sendrecv((char *) send_buff + dst * block_size, send_count,
173 send_type, rank, tag, tmp_buff2 + recv_offset, recv_count,
174 recv_type, rank, tag, comm, &s);
176 MPI_Sendrecv(tmp_buff1 + send_offset, send_count, send_type,
178 tmp_buff2 + recv_offset, recv_count, recv_type,
179 rank, tag, comm, &s);
181 recv_offset += block_size;
184 MPI_Send(tmp_buff2, send_count * Y, send_type, dst, tag, comm);
186 MPI_Waitall(X - 1, reqs, statuses);