4 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_colls, smpi,
5 "Logging specific to SMPI collectives");
7 /*****************************************************************************
9 * Function: alltoall_2dmesh_shoot
14 send_buff: send input buffer
15 send_count: number of elements to send
16 send_type: data type of elements being sent
17 recv_buff: receive output buffer
18 recv_count: number of elements to received
19 recv_type: data type of elements being received
22 * Descrp: Function realizes the alltoall operation using the 2dmesh
23 algorithm. It actually performs allgather operation in x dimension
24 then in the y dimension. Each node then extracts the needed data.
25 The communication in each dimension follows "simple."
29 ****************************************************************************/
30 int alltoall_check_is_2dmesh(int num, int *i, int *j)
53 int smpi_coll_tuned_alltoall_2dmesh(void *send_buff, int send_count,
54 MPI_Datatype send_type,
55 void *recv_buff, int recv_count,
56 MPI_Datatype recv_type,
59 MPI_Status *statuses, s;
60 MPI_Request *reqs, *req_ptr;;
63 char *tmp_buff1, *tmp_buff2;
64 int i, j, src, dst, rank, num_procs, count, num_reqs;
65 int rows, cols, my_row, my_col, X, Y, send_offset, recv_offset;
66 int two_dsize, my_row_base, my_col_base, src_row_base, block_size;
67 int tag = 1, failure = 0, success = 1;
69 MPI_Comm_rank(comm, &rank);
70 MPI_Comm_size(comm, &num_procs);
71 MPI_Type_extent(send_type, &extent);
73 if (!alltoall_check_is_2dmesh(num_procs, &X, &Y))
78 my_row_base = (rank / Y) * Y;
79 my_col_base = rank % Y;
81 block_size = extent * send_count;
83 tmp_buff1 = (char *) malloc(block_size * num_procs * Y);
85 XBT_DEBUG("alltoall-2dmesh_shoot.c:88: cannot allocate memory");
90 tmp_buff2 = (char *) malloc(block_size * Y);
92 XBT_WARN("alltoall-2dmesh_shoot.c:88: cannot allocate memory");
103 statuses = (MPI_Status *) malloc(num_reqs * sizeof(MPI_Status));
104 reqs = (MPI_Request *) malloc(num_reqs * sizeof(MPI_Request));
106 XBT_WARN("alltoall-2dmesh_shoot.c:88: cannot allocate memory");
113 send_offset = recv_offset = (rank % Y) * block_size * num_procs;
115 count = send_count * num_procs;
117 for (i = 0; i < Y; i++) {
118 src = i + my_row_base;
122 recv_offset = (src % Y) * block_size * num_procs;
123 MPI_Irecv(tmp_buff1 + recv_offset, count, recv_type, src, tag, comm,
127 for (i = 0; i < Y; i++) {
128 dst = i + my_row_base;
131 MPI_Send(send_buff, count, send_type, dst, tag, comm);
134 MPI_Waitall(Y - 1, reqs, statuses);
137 for (i = 0; i < Y; i++) {
138 send_offset = (rank * block_size) + (i * block_size * num_procs);
139 recv_offset = (my_row_base * block_size) + (i * block_size);
141 if (i + my_row_base == rank)
142 MPI_Sendrecv(send_buff + recv_offset, send_count, send_type,
143 rank, tag, recv_buff + recv_offset, recv_count,
144 recv_type, rank, tag, comm, &s);
147 MPI_Sendrecv(tmp_buff1 + send_offset, send_count, send_type,
149 recv_buff + recv_offset, recv_count, recv_type,
150 rank, tag, comm, &s);
154 for (i = 0; i < X; i++) {
155 src = (i * Y + my_col_base);
158 src_row_base = (src / Y) * Y;
160 MPI_Irecv(recv_buff + src_row_base * block_size, recv_count * Y,
161 recv_type, src, tag, comm, req_ptr++);
164 for (i = 0; i < X; i++) {
165 dst = (i * Y + my_col_base);
170 for (j = 0; j < Y; j++) {
171 send_offset = (dst + j * num_procs) * block_size;
173 if (j + my_row_base == rank)
174 MPI_Sendrecv(send_buff + dst * block_size, send_count, send_type,
176 tmp_buff2 + recv_offset, recv_count, recv_type,
177 rank, tag, comm, &s);
179 MPI_Sendrecv(tmp_buff1 + send_offset, send_count, send_type,
181 tmp_buff2 + recv_offset, recv_count, recv_type,
182 rank, tag, comm, &s);
184 recv_offset += block_size;
187 MPI_Send(tmp_buff2, send_count * Y, send_type, dst, tag, comm);
189 MPI_Waitall(X - 1, reqs, statuses);