1 /* Asynchronous parts of the basic collective algorithms, meant to be used both for the naive default implementation, but also for non blocking collectives */
3 /* Copyright (c) 2009-2019. The SimGrid Team. All rights reserved. */
5 /* This program is free software; you can redistribute it and/or modify it
6 * under the terms of the license (GNU LGPL) which comes with this package. */
8 #include "colls_private.hpp"
9 #include "src/smpi/include/smpi_actor.hpp"
15 int Colls::ibarrier(MPI_Comm comm, MPI_Request* request)
18 int size = comm->size();
19 int rank = comm->rank();
20 MPI_Request* requests;
21 (*request) = new Request( nullptr, 0, MPI_BYTE,
22 rank,rank, COLL_TAG_BARRIER, comm, MPI_REQ_PERSISTENT);
24 requests = new MPI_Request[2];
25 requests[0] = Request::isend (nullptr, 0, MPI_BYTE, 0,
28 requests[1] = Request::irecv (nullptr, 0, MPI_BYTE, 0,
31 (*request)->set_nbc_requests(requests, 2);
34 requests = new MPI_Request[(size-1)*2];
35 for (i = 1; i < 2*size-1; i+=2) {
36 requests[i-1] = Request::irecv(nullptr, 0, MPI_BYTE, MPI_ANY_SOURCE,
37 COLL_TAG_BARRIER, comm
39 requests[i] = Request::isend(nullptr, 0, MPI_BYTE, (i+1)/2,
44 (*request)->set_nbc_requests(requests, 2*(size-1));
49 int Colls::ibcast(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm, MPI_Request* request)
52 int size = comm->size();
53 int rank = comm->rank();
54 MPI_Request* requests;
55 (*request) = new Request( nullptr, 0, MPI_BYTE,
56 rank,rank, COLL_TAG_BARRIER, comm, MPI_REQ_PERSISTENT);
58 requests = new MPI_Request[1];
59 requests[0] = Request::irecv (buf, count, datatype, root,
62 (*request)->set_nbc_requests(requests, 1);
65 requests = new MPI_Request[size-1];
67 for (i = 0; i < size; i++) {
69 requests[n] = Request::isend(buf, count, datatype, i,
76 (*request)->set_nbc_requests(requests, size-1);
81 int Colls::iallgather(void *sendbuf, int sendcount, MPI_Datatype sendtype,
82 void *recvbuf,int recvcount, MPI_Datatype recvtype, MPI_Comm comm, MPI_Request* request)
85 const int system_tag = COLL_TAG_ALLGATHER;
88 MPI_Request *requests;
90 int rank = comm->rank();
91 int size = comm->size();
92 (*request) = new Request( nullptr, 0, MPI_BYTE,
93 rank,rank, COLL_TAG_BARRIER, comm, MPI_REQ_PERSISTENT);
94 // FIXME: check for errors
95 recvtype->extent(&lb, &recvext);
96 // Local copy from self
97 Datatype::copy(sendbuf, sendcount, sendtype, static_cast<char *>(recvbuf) + rank * recvcount * recvext, recvcount,
99 // Send/Recv buffers to/from others;
100 requests = new MPI_Request[2 * (size - 1)];
102 for (int other = 0; other < size; other++) {
104 requests[index] = Request::isend_init(sendbuf, sendcount, sendtype, other, system_tag,comm);
106 requests[index] = Request::irecv_init(static_cast<char *>(recvbuf) + other * recvcount * recvext, recvcount, recvtype,
107 other, system_tag, comm);
111 Request::startall(2 * (size - 1), requests);
112 (*request)->set_nbc_requests(requests, 2 * (size - 1));
116 int Colls::iscatter(void *sendbuf, int sendcount, MPI_Datatype sendtype,
117 void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request* request)
119 const int system_tag = COLL_TAG_SCATTER;
121 MPI_Aint sendext = 0;
122 MPI_Request *requests;
124 int rank = comm->rank();
125 int size = comm->size();
126 (*request) = new Request( nullptr, 0, MPI_BYTE,
127 rank,rank, COLL_TAG_BARRIER, comm, MPI_REQ_PERSISTENT);
129 requests = new MPI_Request[1];
130 // Recv buffer from root
131 requests[0] = Request::irecv(recvbuf, recvcount, recvtype, root, system_tag, comm);
132 (*request)->set_nbc_requests(requests, 1);
134 sendtype->extent(&lb, &sendext);
135 // Local copy from root
136 if(recvbuf!=MPI_IN_PLACE){
137 Datatype::copy(static_cast<char *>(sendbuf) + root * sendcount * sendext,
138 sendcount, sendtype, recvbuf, recvcount, recvtype);
140 // Send buffers to receivers
141 requests = new MPI_Request[size - 1];
143 for(int dst = 0; dst < size; dst++) {
145 requests[index] = Request::isend_init(static_cast<char *>(sendbuf) + dst * sendcount * sendext, sendcount, sendtype,
146 dst, system_tag, comm);
150 // Wait for completion of isend's.
151 Request::startall(size - 1, requests);
152 (*request)->set_nbc_requests(requests, size - 1);
157 int Colls::iallgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf,
158 int *recvcounts, int *displs, MPI_Datatype recvtype, MPI_Comm comm, MPI_Request* request)
160 const int system_tag = COLL_TAG_ALLGATHERV;
162 MPI_Aint recvext = 0;
164 int rank = comm->rank();
165 int size = comm->size();
166 (*request) = new Request( nullptr, 0, MPI_BYTE,
167 rank,rank, COLL_TAG_BARRIER, comm, MPI_REQ_PERSISTENT);
168 recvtype->extent(&lb, &recvext);
169 // Local copy from self
170 Datatype::copy(sendbuf, sendcount, sendtype,
171 static_cast<char *>(recvbuf) + displs[rank] * recvext,recvcounts[rank], recvtype);
172 // Send buffers to others;
173 MPI_Request *requests = new MPI_Request[2 * (size - 1)];
175 for (int other = 0; other < size; other++) {
178 Request::isend_init(sendbuf, sendcount, sendtype, other, system_tag, comm);
180 requests[index] = Request::irecv_init(static_cast<char *>(recvbuf) + displs[other] * recvext, recvcounts[other],
181 recvtype, other, system_tag, comm);
185 // Wait for completion of all comms.
186 Request::startall(2 * (size - 1), requests);
187 (*request)->set_nbc_requests(requests, 2 * (size - 1));
191 int Colls::ialltoall( void *sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, MPI_Request* request){
192 int system_tag = COLL_TAG_ALLTOALL;
195 MPI_Aint lb = 0, sendext = 0, recvext = 0;
196 MPI_Request *requests;
199 int rank = comm->rank();
200 int size = comm->size();
201 (*request) = new Request( nullptr, 0, MPI_BYTE,
202 rank,rank, COLL_TAG_ALLTOALL, comm, MPI_REQ_PERSISTENT);
203 sendtype->extent(&lb, &sendext);
204 recvtype->extent(&lb, &recvext);
205 /* simple optimization */
206 int err = Datatype::copy(static_cast<char *>(sendbuf) + rank * sendcount * sendext, sendcount, sendtype,
207 static_cast<char *>(recvbuf) + rank * recvcount * recvext, recvcount, recvtype);
208 if (err == MPI_SUCCESS && size > 1) {
209 /* Initiate all send/recv to/from others. */
210 requests = new MPI_Request[2 * (size - 1)];
211 /* Post all receives first -- a simple optimization */
213 for (i = (rank + 1) % size; i != rank; i = (i + 1) % size) {
214 requests[count] = Request::irecv_init(static_cast<char *>(recvbuf) + i * recvcount * recvext, recvcount,
215 recvtype, i, system_tag, comm);
218 /* Now post all sends in reverse order
219 * - We would like to minimize the search time through message queue
220 * when messages actually arrive in the order in which they were posted.
221 * TODO: check the previous assertion
223 for (i = (rank + size - 1) % size; i != rank; i = (i + size - 1) % size) {
224 requests[count] = Request::isend_init(static_cast<char *>(sendbuf) + i * sendcount * sendext, sendcount,
225 sendtype, i, system_tag, comm);
228 /* Wait for them all. */
229 Request::startall(count, requests);
230 (*request)->set_nbc_requests(requests, count);
235 int Colls::ialltoallv(void *sendbuf, int *sendcounts, int *senddisps, MPI_Datatype sendtype,
236 void *recvbuf, int *recvcounts, int *recvdisps, MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request){
237 const int system_tag = COLL_TAG_ALLTOALLV;
239 MPI_Aint sendext = 0;
240 MPI_Aint recvext = 0;
241 MPI_Request *requests;
244 int rank = comm->rank();
245 int size = comm->size();
246 (*request) = new Request( nullptr, 0, MPI_BYTE,
247 rank,rank, COLL_TAG_ALLTOALLV, comm, MPI_REQ_PERSISTENT);
248 sendtype->extent(&lb, &sendext);
249 recvtype->extent(&lb, &recvext);
250 /* Local copy from self */
251 int err = Datatype::copy(static_cast<char *>(sendbuf) + senddisps[rank] * sendext, sendcounts[rank], sendtype,
252 static_cast<char *>(recvbuf) + recvdisps[rank] * recvext, recvcounts[rank], recvtype);
253 if (err == MPI_SUCCESS && size > 1) {
254 /* Initiate all send/recv to/from others. */
255 requests = new MPI_Request[2 * (size - 1)];
257 /* Create all receives that will be posted first */
258 for (int i = 0; i < size; ++i) {
260 requests[count] = Request::irecv_init(static_cast<char *>(recvbuf) + recvdisps[i] * recvext,
261 recvcounts[i], recvtype, i, system_tag, comm);
264 XBT_DEBUG("<%d> skip request creation [src = %d, recvcounts[src] = %d]", rank, i, recvcounts[i]);
267 /* Now create all sends */
268 for (int i = 0; i < size; ++i) {
270 requests[count] = Request::isend_init(static_cast<char *>(sendbuf) + senddisps[i] * sendext,
271 sendcounts[i], sendtype, i, system_tag, comm);
274 XBT_DEBUG("<%d> skip request creation [dst = %d, sendcounts[dst] = %d]", rank, i, sendcounts[i]);
277 /* Wait for them all. */
278 Request::startall(count, requests);
279 (*request)->set_nbc_requests(requests, count);
284 int Colls::ialltoallw(void *sendbuf, int *sendcounts, int *senddisps, MPI_Datatype* sendtypes,
285 void *recvbuf, int *recvcounts, int *recvdisps, MPI_Datatype* recvtypes, MPI_Comm comm, MPI_Request *request){
286 const int system_tag = COLL_TAG_ALLTOALLV;
287 MPI_Request *requests;
290 int rank = comm->rank();
291 int size = comm->size();
292 (*request) = new Request( nullptr, 0, MPI_BYTE,
293 rank,rank, COLL_TAG_ALLTOALLV, comm, MPI_REQ_PERSISTENT);
294 /* Local copy from self */
295 int err = (sendcounts[rank]>0 && recvcounts[rank]) ? Datatype::copy(static_cast<char *>(sendbuf) + senddisps[rank], sendcounts[rank], sendtypes[rank],
296 static_cast<char *>(recvbuf) + recvdisps[rank], recvcounts[rank], recvtypes[rank]): MPI_SUCCESS;
297 if (err == MPI_SUCCESS && size > 1) {
298 /* Initiate all send/recv to/from others. */
299 requests = new MPI_Request[2 * (size - 1)];
301 /* Create all receives that will be posted first */
302 for (int i = 0; i < size; ++i) {
304 requests[count] = Request::irecv_init(static_cast<char *>(recvbuf) + recvdisps[i],
305 recvcounts[i], recvtypes[i], i, system_tag, comm);
308 XBT_DEBUG("<%d> skip request creation [src = %d, recvcounts[src] = %d]", rank, i, recvcounts[i]);
311 /* Now create all sends */
312 for (int i = 0; i < size; ++i) {
314 requests[count] = Request::isend_init(static_cast<char *>(sendbuf) + senddisps[i] ,
315 sendcounts[i], sendtypes[i], i, system_tag, comm);
318 XBT_DEBUG("<%d> skip request creation [dst = %d, sendcounts[dst] = %d]", rank, i, sendcounts[i]);
321 /* Wait for them all. */
322 Request::startall(count, requests);
323 (*request)->set_nbc_requests(requests, count);
328 int Colls::igather(void *sendbuf, int sendcount, MPI_Datatype sendtype,
329 void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request *request)
331 const int system_tag = COLL_TAG_GATHER;
333 MPI_Aint recvext = 0;
334 MPI_Request *requests;
336 int rank = comm->rank();
337 int size = comm->size();
338 (*request) = new Request( nullptr, 0, MPI_BYTE,
339 rank,rank, COLL_TAG_GATHER, comm, MPI_REQ_PERSISTENT);
341 // Send buffer to root
342 requests = new MPI_Request[1];
343 requests[0]=Request::isend(sendbuf, sendcount, sendtype, root, system_tag, comm);
344 (*request)->set_nbc_requests(requests, 1);
346 recvtype->extent(&lb, &recvext);
347 // Local copy from root
348 Datatype::copy(sendbuf, sendcount, sendtype, static_cast<char*>(recvbuf) + root * recvcount * recvext,
349 recvcount, recvtype);
350 // Receive buffers from senders
351 requests = new MPI_Request[size - 1];
353 for (int src = 0; src < size; src++) {
355 requests[index] = Request::irecv_init(static_cast<char*>(recvbuf) + src * recvcount * recvext, recvcount, recvtype,
356 src, system_tag, comm);
360 // Wait for completion of irecv's.
361 Request::startall(size - 1, requests);
362 (*request)->set_nbc_requests(requests, size - 1);
367 int Colls::igatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int *recvcounts, int *displs,
368 MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request *request)
370 int system_tag = COLL_TAG_GATHERV;
372 MPI_Aint recvext = 0;
373 MPI_Request *requests;
375 int rank = comm->rank();
376 int size = comm->size();
377 (*request) = new Request( nullptr, 0, MPI_BYTE,
378 rank,rank, COLL_TAG_GATHERV, comm, MPI_REQ_PERSISTENT);
380 // Send buffer to root
381 requests = new MPI_Request[1];
382 requests[0]=Request::isend(sendbuf, sendcount, sendtype, root, system_tag, comm);
383 (*request)->set_nbc_requests(requests, 1);
385 recvtype->extent(&lb, &recvext);
386 // Local copy from root
387 Datatype::copy(sendbuf, sendcount, sendtype, static_cast<char*>(recvbuf) + displs[root] * recvext,
388 recvcounts[root], recvtype);
389 // Receive buffers from senders
390 requests = new MPI_Request[size - 1];
392 for (int src = 0; src < size; src++) {
394 requests[index] = Request::irecv_init(static_cast<char*>(recvbuf) + displs[src] * recvext,
395 recvcounts[src], recvtype, src, system_tag, comm);
399 // Wait for completion of irecv's.
400 Request::startall(size - 1, requests);
401 (*request)->set_nbc_requests(requests, size - 1);
405 int Colls::iscatterv(void *sendbuf, int *sendcounts, int *displs, MPI_Datatype sendtype, void *recvbuf, int recvcount,
406 MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request *request)
408 int system_tag = COLL_TAG_SCATTERV;
410 MPI_Aint sendext = 0;
411 MPI_Request* requests;
413 int rank = comm->rank();
414 int size = comm->size();
415 (*request) = new Request( nullptr, 0, MPI_BYTE,
416 rank,rank, COLL_TAG_SCATTERV, comm, MPI_REQ_PERSISTENT);
418 // Recv buffer from root
419 requests = new MPI_Request[1];
420 requests[0]=Request::irecv(recvbuf, recvcount, recvtype, root, system_tag, comm);
421 (*request)->set_nbc_requests(requests, 1);
423 sendtype->extent(&lb, &sendext);
424 // Local copy from root
425 if(recvbuf!=MPI_IN_PLACE){
426 Datatype::copy(static_cast<char *>(sendbuf) + displs[root] * sendext, sendcounts[root],
427 sendtype, recvbuf, recvcount, recvtype);
429 // Send buffers to receivers
430 MPI_Request *requests = new MPI_Request[size - 1];
432 for (int dst = 0; dst < size; dst++) {
434 requests[index] = Request::isend_init(static_cast<char *>(sendbuf) + displs[dst] * sendext, sendcounts[dst],
435 sendtype, dst, system_tag, comm);
439 // Wait for completion of isend's.
440 Request::startall(size - 1, requests);
441 (*request)->set_nbc_requests(requests, size - 1);
446 int Colls::ireduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root,
447 MPI_Comm comm, MPI_Request* request)
449 const int system_tag = COLL_TAG_REDUCE;
451 MPI_Aint dataext = 0;
452 MPI_Request* requests;
454 char* sendtmpbuf = static_cast<char *>(sendbuf);
456 int rank = comm->rank();
457 int size = comm->size();
462 if( sendbuf == MPI_IN_PLACE ) {
463 sendtmpbuf = static_cast<char *>(smpi_get_tmp_sendbuffer(count*datatype->get_extent()));
464 Datatype::copy(recvbuf, count, datatype,sendtmpbuf, count, datatype);
468 (*request) = new Request( recvbuf, count, datatype,
469 rank,rank, COLL_TAG_REDUCE, comm, MPI_REQ_PERSISTENT, op);
472 (*request) = new Request( nullptr, count, datatype,
473 rank,rank, COLL_TAG_REDUCE, comm, MPI_REQ_PERSISTENT);
476 // Send buffer to root
477 requests = new MPI_Request[1];
478 requests[0]=Request::isend(sendtmpbuf, count, datatype, root, system_tag, comm);
479 (*request)->set_nbc_requests(requests, 1);
481 datatype->extent(&lb, &dataext);
482 // Local copy from root
483 if (sendtmpbuf != nullptr && recvbuf != nullptr)
484 Datatype::copy(sendtmpbuf, count, datatype, recvbuf, count, datatype);
485 // Receive buffers from senders
486 MPI_Request *requests = new MPI_Request[size - 1];
488 for (int src = 0; src < size; src++) {
491 Request::irecv_init(smpi_get_tmp_sendbuffer(count * dataext), count, datatype, src, system_tag, comm);
495 // Wait for completion of irecv's.
496 Request::startall(size - 1, requests);
497 (*request)->set_nbc_requests(requests, size - 1);
499 if( sendbuf == MPI_IN_PLACE ) {
500 smpi_free_tmp_buffer(sendtmpbuf);