X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/8f40f9f58e06254d6616be4ce9c5db867546c6ad..b7ed19dfcc221d7b3eca182abb5c4a3946671172:/src/smpi/smpi_coll.cpp diff --git a/src/smpi/smpi_coll.cpp b/src/smpi/smpi_coll.cpp index 1df9861dc0..37a3612e43 100644 --- a/src/smpi/smpi_coll.cpp +++ b/src/smpi/smpi_coll.cpp @@ -17,119 +17,59 @@ XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_coll, smpi, "Logging specific to SMPI (coll)"); s_mpi_coll_description_t mpi_coll_gather_description[] = { - {"default", "gather default collective", (void*)smpi_mpi_gather}, - COLL_GATHERS(COLL_DESCRIPTION, COLL_COMMA), {NULL, NULL, NULL} /* this array must be NULL terminated */ + COLL_GATHERS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr} /* this array must be nullptr terminated */ }; - s_mpi_coll_description_t mpi_coll_allgather_description[] = { - {"default", - "allgather default collective", - (void*)smpi_mpi_allgather}, -COLL_ALLGATHERS(COLL_DESCRIPTION, COLL_COMMA), - {NULL, NULL, NULL} /* this array must be NULL terminated */ + COLL_ALLGATHERS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr} }; -s_mpi_coll_description_t mpi_coll_allgatherv_description[] = { - {"default", - "allgatherv default collective", - (void*)smpi_mpi_allgatherv}, -COLL_ALLGATHERVS(COLL_DESCRIPTION, COLL_COMMA), - {NULL, NULL, NULL} /* this array must be NULL terminated */ +s_mpi_coll_description_t mpi_coll_allgatherv_description[] = { COLL_ALLGATHERVS(COLL_DESCRIPTION, COLL_COMMA), + {nullptr, nullptr, nullptr} /* this array must be nullptr terminated */ }; -s_mpi_coll_description_t mpi_coll_allreduce_description[] = { - {"default", - "allreduce default collective", - (void*)smpi_mpi_allreduce}, -COLL_ALLREDUCES(COLL_DESCRIPTION, COLL_COMMA), - {NULL, NULL, NULL} /* this array must be NULL terminated */ +s_mpi_coll_description_t mpi_coll_allreduce_description[] ={ COLL_ALLREDUCES(COLL_DESCRIPTION, COLL_COMMA), + {nullptr, nullptr, nullptr} /* this array must be nullptr terminated */ }; -s_mpi_coll_description_t mpi_coll_reduce_scatter_description[] = { - {"default", - "reduce_scatter default collective", - (void*)smpi_mpi_reduce_scatter}, -COLL_REDUCE_SCATTERS(COLL_DESCRIPTION, COLL_COMMA), - {NULL, NULL, NULL} /* this array must be NULL terminated */ +s_mpi_coll_description_t mpi_coll_reduce_scatter_description[] = {COLL_REDUCE_SCATTERS(COLL_DESCRIPTION, COLL_COMMA), + {nullptr, nullptr, nullptr} /* this array must be nullptr terminated */ }; -s_mpi_coll_description_t mpi_coll_scatter_description[] = { - {"default", - "scatter default collective", - (void*)smpi_mpi_scatter}, -COLL_SCATTERS(COLL_DESCRIPTION, COLL_COMMA), - {NULL, NULL, NULL} /* this array must be NULL terminated */ -}; +s_mpi_coll_description_t mpi_coll_scatter_description[] ={COLL_SCATTERS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr}}; -s_mpi_coll_description_t mpi_coll_barrier_description[] = { - {"default", - "barrier default collective", - (void*)smpi_mpi_barrier}, -COLL_BARRIERS(COLL_DESCRIPTION, COLL_COMMA), - {NULL, NULL, NULL} /* this array must be NULL terminated */ -}; -s_mpi_coll_description_t mpi_coll_alltoall_description[] = { - {"default", - "Ompi alltoall default collective", - (void*)smpi_coll_tuned_alltoall_ompi2}, -COLL_ALLTOALLS(COLL_DESCRIPTION, COLL_COMMA), - {"bruck", - "Alltoall Bruck (SG) collective", - (void*)smpi_coll_tuned_alltoall_bruck}, - {"basic_linear", - "Alltoall basic linear (SG) collective", - (void*)smpi_coll_tuned_alltoall_basic_linear}, - {NULL, NULL, NULL} /* this array must be NULL terminated */ -}; +s_mpi_coll_description_t mpi_coll_barrier_description[] ={COLL_BARRIERS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr}}; -s_mpi_coll_description_t mpi_coll_alltoallv_description[] = { - {"default", - "Ompi alltoallv default collective", - (void*)smpi_coll_basic_alltoallv}, -COLL_ALLTOALLVS(COLL_DESCRIPTION, COLL_COMMA), - {NULL, NULL, NULL} /* this array must be NULL terminated */ -}; +s_mpi_coll_description_t mpi_coll_alltoall_description[] = {COLL_ALLTOALLS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr}}; -s_mpi_coll_description_t mpi_coll_bcast_description[] = { - {"default", - "bcast default collective ", - (void*)smpi_mpi_bcast}, -COLL_BCASTS(COLL_DESCRIPTION, COLL_COMMA), - {NULL, NULL, NULL} /* this array must be NULL terminated */ +s_mpi_coll_description_t mpi_coll_alltoallv_description[] = {COLL_ALLTOALLVS(COLL_DESCRIPTION, COLL_COMMA), + {nullptr, nullptr, nullptr} /* this array must be nullptr terminated */ }; -s_mpi_coll_description_t mpi_coll_reduce_description[] = { - {"default", - "reduce default collective", - (void*)smpi_mpi_reduce}, -COLL_REDUCES(COLL_DESCRIPTION, COLL_COMMA), - {NULL, NULL, NULL} /* this array must be NULL terminated */ -}; +s_mpi_coll_description_t mpi_coll_bcast_description[] = {COLL_BCASTS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr}}; + +s_mpi_coll_description_t mpi_coll_reduce_description[] = {COLL_REDUCES(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr} }; /** Displays the long description of all registered models, and quit */ void coll_help(const char *category, s_mpi_coll_description_t * table) { - int i; - printf("Long description of the %s models accepted by this simulator:\n", - category); - for (i = 0; table[i].name; i++) + printf("Long description of the %s models accepted by this simulator:\n", category); + for (int i = 0; table[i].name; i++) printf(" %s: %s\n", table[i].name, table[i].description); } -int find_coll_description(s_mpi_coll_description_t * table, - char *name, const char *desc) +int find_coll_description(s_mpi_coll_description_t * table, const char *name, const char *desc) { - int i; - char *name_list = NULL; + char *name_list = nullptr; int selector_on=0; - if(name==NULL){//no argument provided, use active selector's algorithm - name=(char*)sg_cfg_get_string("smpi/coll_selector"); + if (name==nullptr || name[0] == '\0') { + //no argument provided, use active selector's algorithm + name=static_cast(xbt_cfg_get_string("smpi/coll-selector")); selector_on=1; } - for (i = 0; table[i].name; i++) + for (int i = 0; table[i].name; i++) if (!strcmp(name, table[i].name)) { if (strcmp(table[i].name,"default")) XBT_INFO("Switch to algorithm %s for collective %s",table[i].name,desc); @@ -138,255 +78,346 @@ int find_coll_description(s_mpi_coll_description_t * table, if(selector_on){ // collective seems not handled by the active selector, try with default one - name=(char*)"default"; - for (i = 0; table[i].name; i++) - if (!strcmp(name, table[i].name)) { + for (int i = 0; table[i].name; i++) + if (!strcmp("default", table[i].name)) { return i; } } if (!table[0].name) xbt_die("No collective is valid for '%s'! This is a bug.",name); name_list = xbt_strdup(table[0].name); - for (i = 1; table[i].name; i++) { - name_list = static_cast(xbt_realloc(name_list, - strlen(name_list) + strlen(table[i].name) + 3)); - strcat(name_list, ", "); - strcat(name_list, table[i].name); + for (int i = 1; table[i].name; i++) { + name_list = static_cast(xbt_realloc(name_list, strlen(name_list) + strlen(table[i].name) + 3)); + strncat(name_list, ", ",2); + strncat(name_list, table[i].name, strlen(table[i].name)); } xbt_die("Collective '%s' is invalid! Valid collectives are: %s.", name, name_list); return -1; } -int (*mpi_coll_gather_fun)(void *, int, MPI_Datatype, void*, int, MPI_Datatype, int root, MPI_Comm); -int (*mpi_coll_allgather_fun)(void *, int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm); -int (*mpi_coll_allgatherv_fun)(void *, int, MPI_Datatype, void*, int*, int*, MPI_Datatype, MPI_Comm); -int (*mpi_coll_allreduce_fun)(void *sbuf, void *rbuf, int rcount, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm); -int (*mpi_coll_alltoall_fun)(void *, int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm); -int (*mpi_coll_alltoallv_fun)(void *, int*, int*, MPI_Datatype, void*, int*, int*, MPI_Datatype, MPI_Comm); -int (*mpi_coll_bcast_fun)(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm com); -int (*mpi_coll_reduce_fun)(void *buf, void *rbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm); -int (*mpi_coll_reduce_scatter_fun)(void *sbuf, void *rbuf, int *rcounts,MPI_Datatype dtype,MPI_Op op,MPI_Comm comm); -int (*mpi_coll_scatter_fun)(void *sendbuf, int sendcount, MPI_Datatype sendtype,void *recvbuf, int recvcount, MPI_Datatype recvtype,int root, MPI_Comm comm); -int (*mpi_coll_barrier_fun)(MPI_Comm comm); - - -int smpi_coll_tuned_alltoall_ompi2(void *sendbuf, int sendcount, - MPI_Datatype sendtype, void *recvbuf, - int recvcount, MPI_Datatype recvtype, - MPI_Comm comm) +void (*smpi_coll_cleanup_callback)(); + +namespace simgrid{ +namespace smpi{ + +int (*Colls::gather)(void *, int, MPI_Datatype, void*, int, MPI_Datatype, int root, MPI_Comm); +int (*Colls::allgather)(void *, int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm); +int (*Colls::allgatherv)(void *, int, MPI_Datatype, void*, int*, int*, MPI_Datatype, MPI_Comm); +int (*Colls::allreduce)(void *sbuf, void *rbuf, int rcount, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm); +int (*Colls::alltoall)(void *, int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm); +int (*Colls::alltoallv)(void *, int*, int*, MPI_Datatype, void*, int*, int*, MPI_Datatype, MPI_Comm); +int (*Colls::bcast)(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm com); +int (*Colls::reduce)(void *buf, void *rbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm); +int (*Colls::reduce_scatter)(void *sbuf, void *rbuf, int *rcounts,MPI_Datatype dtype,MPI_Op op,MPI_Comm comm); +int (*Colls::scatter)(void *sendbuf, int sendcount, MPI_Datatype sendtype,void *recvbuf, int recvcount, MPI_Datatype recvtype,int root, MPI_Comm comm); +int (*Colls::barrier)(MPI_Comm comm); + + +#define COLL_SETTER(cat, ret, args, args2)\ +void Colls::set_##cat (const char * name){\ + int id = find_coll_description(mpi_coll_## cat ##_description,\ + name,#cat);\ + cat = reinterpret_cast\ + (mpi_coll_## cat ##_description[id].coll);\ +} + +COLL_APPLY(COLL_SETTER,COLL_GATHER_SIG,""); +COLL_APPLY(COLL_SETTER,COLL_ALLGATHER_SIG,""); +COLL_APPLY(COLL_SETTER,COLL_ALLGATHERV_SIG,""); +COLL_APPLY(COLL_SETTER,COLL_REDUCE_SIG,""); +COLL_APPLY(COLL_SETTER,COLL_ALLREDUCE_SIG,""); +COLL_APPLY(COLL_SETTER,COLL_REDUCE_SCATTER_SIG,""); +COLL_APPLY(COLL_SETTER,COLL_SCATTER_SIG,""); +COLL_APPLY(COLL_SETTER,COLL_BARRIER_SIG,""); +COLL_APPLY(COLL_SETTER,COLL_BCAST_SIG,""); +COLL_APPLY(COLL_SETTER,COLL_ALLTOALL_SIG,""); +COLL_APPLY(COLL_SETTER,COLL_ALLTOALLV_SIG,""); + + +void Colls::set_collectives(){ + const char* selector_name = static_cast(xbt_cfg_get_string("smpi/coll-selector")); + if (selector_name==nullptr || selector_name[0] == '\0') + selector_name = "default"; + + const char* name = xbt_cfg_get_string("smpi/gather"); + if (name==nullptr || name[0] == '\0') + name = selector_name; + + set_gather(name); + + name = xbt_cfg_get_string("smpi/allgather"); + if (name==nullptr || name[0] == '\0') + name = selector_name; + + set_allgather(name); + + name = xbt_cfg_get_string("smpi/allgatherv"); + if (name==nullptr || name[0] == '\0') + name = selector_name; + + set_allgatherv(name); + + name = xbt_cfg_get_string("smpi/allreduce"); + if (name==nullptr || name[0] == '\0') + name = selector_name; + + set_allreduce(name); + + name = xbt_cfg_get_string("smpi/alltoall"); + if (name==nullptr || name[0] == '\0') + name = selector_name; + + set_alltoall(name); + + name = xbt_cfg_get_string("smpi/alltoallv"); + if (name==nullptr || name[0] == '\0') + name = selector_name; + + set_alltoallv(name); + + name = xbt_cfg_get_string("smpi/reduce"); + if (name==nullptr || name[0] == '\0') + name = selector_name; + + set_reduce(name); + + name = xbt_cfg_get_string("smpi/reduce-scatter"); + if (name==nullptr || name[0] == '\0') + name = selector_name; + + set_reduce_scatter(name); + + name = xbt_cfg_get_string("smpi/scatter"); + if (name==nullptr || name[0] == '\0') + name = selector_name; + + set_scatter(name); + + name = xbt_cfg_get_string("smpi/bcast"); + if (name==nullptr || name[0] == '\0') + name = selector_name; + + set_bcast(name); + + name = xbt_cfg_get_string("smpi/barrier"); + if (name==nullptr || name[0] == '\0') + name = selector_name; + + set_barrier(name); +} + + +int Colls::gatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int *recvcounts, int *displs, + MPI_Datatype recvtype, int root, MPI_Comm comm) { - int size, sendsize; - size = smpi_comm_size(comm); - sendsize = smpi_datatype_size(sendtype) * sendcount; - if (sendsize < 200 && size > 12) { - return - smpi_coll_tuned_alltoall_bruck(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, - comm); - } else if (sendsize < 3000) { - return - smpi_coll_tuned_alltoall_basic_linear(sendbuf, sendcount, - sendtype, recvbuf, - recvcount, recvtype, comm); + int system_tag = COLL_TAG_GATHERV; + MPI_Aint lb = 0; + MPI_Aint recvext = 0; + + int rank = comm->rank(); + int size = comm->size(); + if (rank != root) { + // Send buffer to root + Request::send(sendbuf, sendcount, sendtype, root, system_tag, comm); } else { - return - smpi_coll_tuned_alltoall_ring(sendbuf, sendcount, sendtype, - recvbuf, recvcount, recvtype, - comm); + recvtype->extent(&lb, &recvext); + // Local copy from root + Datatype::copy(sendbuf, sendcount, sendtype, static_cast(recvbuf) + displs[root] * recvext, + recvcounts[root], recvtype); + // Receive buffers from senders + MPI_Request *requests = xbt_new(MPI_Request, size - 1); + int index = 0; + for (int src = 0; src < size; src++) { + if(src != root) { + requests[index] = Request::irecv_init(static_cast(recvbuf) + displs[src] * recvext, + recvcounts[src], recvtype, src, system_tag, comm); + index++; + } + } + // Wait for completion of irecv's. + Request::startall(size - 1, requests); + Request::waitall(size - 1, requests, MPI_STATUS_IGNORE); + for (int src = 0; src < size-1; src++) { + Request::unref(&requests[src]); + } + xbt_free(requests); } + return MPI_SUCCESS; } -/** - * Alltoall Bruck - * - * Openmpi calls this routine when the message size sent to each rank < 2000 bytes and size < 12 - * FIXME: uh, check smpi_pmpi again, but this routine is called for > 12, not - * less... - **/ -int smpi_coll_tuned_alltoall_bruck(void *sendbuf, int sendcount, - MPI_Datatype sendtype, void *recvbuf, - int recvcount, MPI_Datatype recvtype, - MPI_Comm comm) + +int Colls::scatterv(void *sendbuf, int *sendcounts, int *displs, MPI_Datatype sendtype, void *recvbuf, int recvcount, + MPI_Datatype recvtype, int root, MPI_Comm comm) { - int system_tag = 777; - int i, rank, size, err, count; - MPI_Aint lb; + int system_tag = COLL_TAG_SCATTERV; + MPI_Aint lb = 0; MPI_Aint sendext = 0; - MPI_Aint recvext = 0; - MPI_Request *requests; - - // FIXME: check implementation - rank = smpi_comm_rank(comm); - size = smpi_comm_size(comm); - XBT_DEBUG("<%d> algorithm alltoall_bruck() called.", rank); - smpi_datatype_extent(sendtype, &lb, &sendext); - smpi_datatype_extent(recvtype, &lb, &recvext); - /* Local copy from self */ - err = - smpi_datatype_copy((char *)sendbuf + rank * sendcount * sendext, - sendcount, sendtype, - (char *)recvbuf + rank * recvcount * recvext, - recvcount, recvtype); - if (err == MPI_SUCCESS && size > 1) { - /* Initiate all send/recv to/from others. */ - requests = xbt_new(MPI_Request, 2 * (size - 1)); - count = 0; - /* Create all receives that will be posted first */ - for (i = 0; i < size; ++i) { - if (i == rank) { - XBT_DEBUG("<%d> skip request creation [src = %d, recvcount = %d]", - rank, i, recvcount); - continue; - } - requests[count] = - smpi_irecv_init((char *)recvbuf + i * recvcount * recvext, recvcount, - recvtype, i, system_tag, comm); - count++; + + int rank = comm->rank(); + int size = comm->size(); + if(rank != root) { + // Recv buffer from root + Request::recv(recvbuf, recvcount, recvtype, root, system_tag, comm, MPI_STATUS_IGNORE); + } else { + sendtype->extent(&lb, &sendext); + // Local copy from root + if(recvbuf!=MPI_IN_PLACE){ + Datatype::copy(static_cast(sendbuf) + displs[root] * sendext, sendcounts[root], + sendtype, recvbuf, recvcount, recvtype); } - /* Now create all sends */ - for (i = 0; i < size; ++i) { - if (i == rank) { - XBT_DEBUG("<%d> skip request creation [dst = %d, sendcount = %d]", - rank, i, sendcount); - continue; + // Send buffers to receivers + MPI_Request *requests = xbt_new(MPI_Request, size - 1); + int index = 0; + for (int dst = 0; dst < size; dst++) { + if (dst != root) { + requests[index] = Request::isend_init(static_cast(sendbuf) + displs[dst] * sendext, sendcounts[dst], + sendtype, dst, system_tag, comm); + index++; } - requests[count] = - smpi_isend_init((char *)sendbuf + i * sendcount * sendext, sendcount, - sendtype, i, system_tag, comm); - count++; } - /* Wait for them all. */ - smpi_mpi_startall(count, requests); - XBT_DEBUG("<%d> wait for %d requests", rank, count); - smpi_mpi_waitall(count, requests, MPI_STATUS_IGNORE); - for(i = 0; i < count; i++) { - if(requests[i]!=MPI_REQUEST_NULL) smpi_mpi_request_free(&requests[i]); + // Wait for completion of isend's. + Request::startall(size - 1, requests); + Request::waitall(size - 1, requests, MPI_STATUS_IGNORE); + for (int dst = 0; dst < size-1; dst++) { + Request::unref(&requests[dst]); } xbt_free(requests); } return MPI_SUCCESS; } -/** - * Alltoall basic_linear (STARMPI:alltoall-simple) - **/ -int smpi_coll_tuned_alltoall_basic_linear(void *sendbuf, int sendcount, - MPI_Datatype sendtype, - void *recvbuf, int recvcount, - MPI_Datatype recvtype, - MPI_Comm comm) + +int Colls::scan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) { - int system_tag = 888; - int i, rank, size, err, count; - MPI_Aint lb = 0, sendext = 0, recvext = 0; - MPI_Request *requests; - - /* Initialize. */ - rank = smpi_comm_rank(comm); - size = smpi_comm_size(comm); - XBT_DEBUG("<%d> algorithm alltoall_basic_linear() called.", rank); - smpi_datatype_extent(sendtype, &lb, &sendext); - smpi_datatype_extent(recvtype, &lb, &recvext); - /* simple optimization */ - err = smpi_datatype_copy((char *)sendbuf + rank * sendcount * sendext, - sendcount, sendtype, - (char *)recvbuf + rank * recvcount * recvext, - recvcount, recvtype); - if (err == MPI_SUCCESS && size > 1) { - /* Initiate all send/recv to/from others. */ - requests = xbt_new(MPI_Request, 2 * (size - 1)); - /* Post all receives first -- a simple optimization */ - count = 0; - for (i = (rank + 1) % size; i != rank; i = (i + 1) % size) { - requests[count] = - smpi_irecv_init((char *)recvbuf + i * recvcount * recvext, recvcount, - recvtype, i, system_tag, comm); - count++; - } - /* Now post all sends in reverse order - * - We would like to minimize the search time through message queue - * when messages actually arrive in the order in which they were posted. - * TODO: check the previous assertion - */ - for (i = (rank + size - 1) % size; i != rank; i = (i + size - 1) % size) { - requests[count] = - smpi_isend_init((char *)sendbuf + i * sendcount * sendext, sendcount, - sendtype, i, system_tag, comm); - count++; + int system_tag = -888; + MPI_Aint lb = 0; + MPI_Aint dataext = 0; + + int rank = comm->rank(); + int size = comm->size(); + + datatype->extent(&lb, &dataext); + + // Local copy from self + Datatype::copy(sendbuf, count, datatype, recvbuf, count, datatype); + + // Send/Recv buffers to/from others; + MPI_Request *requests = xbt_new(MPI_Request, size - 1); + void **tmpbufs = xbt_new(void *, rank); + int index = 0; + for (int other = 0; other < rank; other++) { + tmpbufs[index] = smpi_get_tmp_sendbuffer(count * dataext); + requests[index] = Request::irecv_init(tmpbufs[index], count, datatype, other, system_tag, comm); + index++; + } + for (int other = rank + 1; other < size; other++) { + requests[index] = Request::isend_init(sendbuf, count, datatype, other, system_tag, comm); + index++; + } + // Wait for completion of all comms. + Request::startall(size - 1, requests); + + if(op != MPI_OP_NULL && op->is_commutative()){ + for (int other = 0; other < size - 1; other++) { + index = Request::waitany(size - 1, requests, MPI_STATUS_IGNORE); + if(index == MPI_UNDEFINED) { + break; + } + if(index < rank) { + // #Request is below rank: it's a irecv + if(op!=MPI_OP_NULL) op->apply( tmpbufs[index], recvbuf, &count, datatype); + } } - /* Wait for them all. */ - smpi_mpi_startall(count, requests); - XBT_DEBUG("<%d> wait for %d requests", rank, count); - smpi_mpi_waitall(count, requests, MPI_STATUS_IGNORE); - for(i = 0; i < count; i++) { - if(requests[i]!=MPI_REQUEST_NULL) smpi_mpi_request_free(&requests[i]); + }else{ + //non commutative case, wait in order + for (int other = 0; other < size - 1; other++) { + Request::wait(&(requests[other]), MPI_STATUS_IGNORE); + if(index < rank) { + if(op!=MPI_OP_NULL) op->apply( tmpbufs[other], recvbuf, &count, datatype); + } } - xbt_free(requests); } - return err; + for(index = 0; index < rank; index++) { + smpi_free_tmp_buffer(tmpbufs[index]); + } + for(index = 0; index < size-1; index++) { + Request::unref(&requests[index]); + } + xbt_free(tmpbufs); + xbt_free(requests); + return MPI_SUCCESS; } -int smpi_coll_basic_alltoallv(void *sendbuf, int *sendcounts, - int *senddisps, MPI_Datatype sendtype, - void *recvbuf, int *recvcounts, - int *recvdisps, MPI_Datatype recvtype, - MPI_Comm comm) +int Colls::exscan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) { - int system_tag = 889; - int i, rank, size, err, count; - MPI_Aint lb = 0, sendext = 0, recvext = 0; - MPI_Request *requests; - - /* Initialize. */ - rank = smpi_comm_rank(comm); - size = smpi_comm_size(comm); - XBT_DEBUG("<%d> algorithm basic_alltoallv() called.", rank); - smpi_datatype_extent(sendtype, &lb, &sendext); - smpi_datatype_extent(recvtype, &lb, &recvext); - /* Local copy from self */ - err = - smpi_datatype_copy((char *)sendbuf + senddisps[rank] * sendext, - sendcounts[rank], sendtype, - (char *)recvbuf + recvdisps[rank] * recvext, - recvcounts[rank], recvtype); - if (err == MPI_SUCCESS && size > 1) { - /* Initiate all send/recv to/from others. */ - requests = xbt_new(MPI_Request, 2 * (size - 1)); - count = 0; - /* Create all receives that will be posted first */ - for (i = 0; i < size; ++i) { - if (i == rank || recvcounts[i] == 0) { - XBT_DEBUG - ("<%d> skip request creation [src = %d, recvcounts[src] = %d]", - rank, i, recvcounts[i]); - continue; + int system_tag = -888; + MPI_Aint lb = 0; + MPI_Aint dataext = 0; + int recvbuf_is_empty=1; + int rank = comm->rank(); + int size = comm->size(); + + datatype->extent(&lb, &dataext); + + // Send/Recv buffers to/from others; + MPI_Request *requests = xbt_new(MPI_Request, size - 1); + void **tmpbufs = xbt_new(void *, rank); + int index = 0; + for (int other = 0; other < rank; other++) { + tmpbufs[index] = smpi_get_tmp_sendbuffer(count * dataext); + requests[index] = Request::irecv_init(tmpbufs[index], count, datatype, other, system_tag, comm); + index++; + } + for (int other = rank + 1; other < size; other++) { + requests[index] = Request::isend_init(sendbuf, count, datatype, other, system_tag, comm); + index++; + } + // Wait for completion of all comms. + Request::startall(size - 1, requests); + + if(op != MPI_OP_NULL && op->is_commutative()){ + for (int other = 0; other < size - 1; other++) { + index = Request::waitany(size - 1, requests, MPI_STATUS_IGNORE); + if(index == MPI_UNDEFINED) { + break; } - requests[count] = - smpi_irecv_init((char *)recvbuf + recvdisps[i] * recvext, - recvcounts[i], recvtype, i, system_tag, comm); - count++; - } - /* Now create all sends */ - for (i = 0; i < size; ++i) { - if (i == rank || sendcounts[i] == 0) { - XBT_DEBUG - ("<%d> skip request creation [dst = %d, sendcounts[dst] = %d]", - rank, i, sendcounts[i]); - continue; + if(index < rank) { + if(recvbuf_is_empty){ + Datatype::copy(tmpbufs[index], count, datatype, recvbuf, count, datatype); + recvbuf_is_empty=0; + } else + // #Request is below rank: it's a irecv + if(op!=MPI_OP_NULL) op->apply( tmpbufs[index], recvbuf, &count, datatype); } - requests[count] = - smpi_isend_init((char *)sendbuf + senddisps[i] * sendext, - sendcounts[i], sendtype, i, system_tag, comm); - count++; } - /* Wait for them all. */ - smpi_mpi_startall(count, requests); - XBT_DEBUG("<%d> wait for %d requests", rank, count); - smpi_mpi_waitall(count, requests, MPI_STATUS_IGNORE); - for(i = 0; i < count; i++) { - if(requests[i]!=MPI_REQUEST_NULL) smpi_mpi_request_free(&requests[i]); + }else{ + //non commutative case, wait in order + for (int other = 0; other < size - 1; other++) { + Request::wait(&(requests[other]), MPI_STATUS_IGNORE); + if(index < rank) { + if (recvbuf_is_empty) { + Datatype::copy(tmpbufs[other], count, datatype, recvbuf, count, datatype); + recvbuf_is_empty = 0; + } else + if(op!=MPI_OP_NULL) op->apply( tmpbufs[other], recvbuf, &count, datatype); + } } - xbt_free(requests); } - return err; + for(index = 0; index < rank; index++) { + smpi_free_tmp_buffer(tmpbufs[index]); + } + for(index = 0; index < size-1; index++) { + Request::unref(&requests[index]); + } + xbt_free(tmpbufs); + xbt_free(requests); + return MPI_SUCCESS; +} + +} } + + + + +