2 #include "colls_private.h"
3 #define MCA_COLL_BASE_TAG_ALLTOALLV 111
5 * Linear functions are copied from the basic coll module. For
6 * some small number of nodes and/or small data sizes they are just as
7 * fast as tuned/tree based segmenting operations and as such may be
8 * selected by the decision functions. These are copied into this module
9 * due to the way we select modules in V1. i.e. in V2 we will handle this
10 * differently and so will not have to duplicate code.
11 * GEF Oct05 after asking Jeff.
14 smpi_coll_tuned_alltoallv_ompi_basic_linear(void *sbuf, int *scounts, int *sdisps,
16 void *rbuf, int *rcounts, int *rdisps,
25 size = smpi_comm_size(comm);
26 rank = smpi_comm_rank(comm);
27 MPI_Request *ireqs= xbt_malloc(sizeof(MPI_Request) * size * 2);
29 "coll:tuned:alltoallv_intra_basic_linear rank %d", rank);
31 sext=smpi_datatype_get_extent(sdtype);
32 rext=smpi_datatype_get_extent(rdtype);
34 /* Simple optimization - handle send to self first */
35 psnd = ((char *) sbuf) + (sdisps[rank] * sext);
36 prcv = ((char *) rbuf) + (rdisps[rank] * rext);
37 if (0 != scounts[rank]) {
38 smpi_datatype_copy(psnd, scounts[rank], sdtype,
39 prcv, rcounts[rank], rdtype);
42 /* If only one process, we're done. */
47 /* Now, initiate all send/recv to/from others. */
51 /* Post all receives first */
52 for (i = 0; i < size; ++i) {
53 if (i == rank || 0 == rcounts[i]) {
57 prcv = ((char *) rbuf) + (rdisps[i] * rext);
59 *preq = smpi_irecv_init(prcv, rcounts[i], rdtype,
60 i, MCA_COLL_BASE_TAG_ALLTOALLV, comm
67 /* Now post all sends */
68 for (i = 0; i < size; ++i) {
69 if (i == rank || 0 == scounts[i]) {
73 psnd = ((char *) sbuf) + (sdisps[i] * sext);
74 *preq=smpi_isend_init(psnd, scounts[i], sdtype,
75 i, MCA_COLL_BASE_TAG_ALLTOALLV, comm
81 /* Start your engines. This will never return an error. */
82 smpi_mpi_startall(nreqs, ireqs);
84 /* Wait for them all. If there's an error, note that we don't care
85 * what the error was -- just that there *was* an error. The PML
86 * will finish all requests, even if one or more of them fail.
87 * i.e., by the end of this call, all the requests are free-able.
88 * So free them anyway -- even if there was an error, and return the
89 * error after we free everything. */
90 smpi_mpi_waitall(nreqs, ireqs,
93 /* Free the requests. */
94 for (i = 0; i < nreqs; ++i) {
95 if(ireqs[i]!=MPI_REQUEST_NULL)smpi_mpi_request_free(&ireqs[i]);