X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/004b932fe967a47a2ded3795af9dc069c3de9671..57970de8f4fd548711e2cea76502ba692595fa4a:/src/smpi/smpi_coll.cpp diff --git a/src/smpi/smpi_coll.cpp b/src/smpi/smpi_coll.cpp index 960af87bcd..326f46202e 100644 --- a/src/smpi/smpi_coll.cpp +++ b/src/smpi/smpi_coll.cpp @@ -1,87 +1,77 @@ /* smpi_coll.c -- various optimized routing for collectives */ -/* Copyright (c) 2009-2015. The SimGrid Team. +/* Copyright (c) 2009-2017. The SimGrid Team. * All rights reserved. */ /* This program is free software; you can redistribute it and/or modify it * under the terms of the license (GNU LGPL) which comes with this package. */ -#include -#include -#include - -#include "private.h" -#include "colls/colls.h" -#include "simgrid/sg_config.h" +#include "src/smpi/private.h" +#include "src/smpi/smpi_coll.hpp" +#include "src/smpi/smpi_comm.hpp" +#include "src/smpi/smpi_datatype.hpp" +#include "src/smpi/smpi_op.hpp" +#include "src/smpi/smpi_request.hpp" XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_coll, smpi, "Logging specific to SMPI (coll)"); -s_mpi_coll_description_t mpi_coll_gather_description[] = { - {"default", "gather default collective", reinterpret_cast(&smpi_mpi_gather)}, - COLL_GATHERS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr} /* this array must be nullptr terminated */ -}; - -s_mpi_coll_description_t mpi_coll_allgather_description[] = { {"default", "allgather default collective", - reinterpret_cast(&smpi_mpi_allgather)}, COLL_ALLGATHERS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr}}; - -s_mpi_coll_description_t mpi_coll_allgatherv_description[] = {{"default", "allgatherv default collective", - reinterpret_cast(&smpi_mpi_allgatherv)}, COLL_ALLGATHERVS(COLL_DESCRIPTION, COLL_COMMA), - {nullptr, nullptr, nullptr} /* this array must be nullptr terminated */ -}; - -s_mpi_coll_description_t mpi_coll_allreduce_description[] = { {"default", "allreduce default collective", - reinterpret_cast(&smpi_mpi_allreduce)}, COLL_ALLREDUCES(COLL_DESCRIPTION, COLL_COMMA), - {nullptr, nullptr, nullptr} /* this array must be nullptr terminated */ -}; - -s_mpi_coll_description_t mpi_coll_reduce_scatter_description[] = {{"default", "reduce_scatter default collective", - reinterpret_cast(&smpi_mpi_reduce_scatter)}, COLL_REDUCE_SCATTERS(COLL_DESCRIPTION, COLL_COMMA), - {nullptr, nullptr, nullptr} /* this array must be nullptr terminated */ -}; - -s_mpi_coll_description_t mpi_coll_scatter_description[] = { {"default", "scatter default collective", - reinterpret_cast(&smpi_mpi_scatter)}, COLL_SCATTERS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr}}; - -s_mpi_coll_description_t mpi_coll_barrier_description[] = { {"default", "barrier default collective", - reinterpret_cast(&smpi_mpi_barrier)}, COLL_BARRIERS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr}}; - -s_mpi_coll_description_t mpi_coll_alltoall_description[] = { {"default", "Ompi alltoall default collective", - reinterpret_cast(&smpi_coll_tuned_alltoall_ompi2)}, COLL_ALLTOALLS(COLL_DESCRIPTION, COLL_COMMA), - {"bruck", "Alltoall Bruck (SG) collective", - reinterpret_cast(&smpi_coll_tuned_alltoall_bruck)}, - {"basic_linear", "Alltoall basic linear (SG) collective", - reinterpret_cast(&smpi_coll_tuned_alltoall_basic_linear)}, {nullptr, nullptr, nullptr}}; - -s_mpi_coll_description_t mpi_coll_alltoallv_description[] = { {"default", "Ompi alltoallv default collective", - reinterpret_cast(&smpi_coll_basic_alltoallv)}, COLL_ALLTOALLVS(COLL_DESCRIPTION, COLL_COMMA), - {nullptr, nullptr, nullptr} /* this array must be nullptr terminated */ -}; +#define COLL_SETTER(cat, ret, args, args2)\ +int (*Colls::cat ) args;\ +void Colls::set_##cat (const char * name){\ + int id = find_coll_description(mpi_coll_## cat ##_description,\ + name,#cat);\ + cat = reinterpret_cast\ + (mpi_coll_## cat ##_description[id].coll);\ + if (cat == nullptr)\ + xbt_die("Collective "#cat" set to nullptr!");\ +} -s_mpi_coll_description_t mpi_coll_bcast_description[] = { {"default", "bcast default collective ", - reinterpret_cast(&smpi_mpi_bcast)}, COLL_BCASTS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr}}; +#define SET_COLL(coll)\ + name = xbt_cfg_get_string("smpi/"#coll);\ + if (name==nullptr || name[0] == '\0')\ + name = selector_name;\ + set_##coll(name); -s_mpi_coll_description_t mpi_coll_reduce_description[] = { {"default", "reduce default collective", - reinterpret_cast(&smpi_mpi_reduce)}, COLL_REDUCES(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr} }; +namespace simgrid{ +namespace smpi{ +void (*Colls::smpi_coll_cleanup_callback)(); +/* these arrays must be nullptr terminated */ +s_mpi_coll_description_t Colls::mpi_coll_gather_description[] = { + COLL_GATHERS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr} }; +s_mpi_coll_description_t Colls::mpi_coll_allgather_description[] = { + COLL_ALLGATHERS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr} }; +s_mpi_coll_description_t Colls::mpi_coll_allgatherv_description[] = { + COLL_ALLGATHERVS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr} }; +s_mpi_coll_description_t Colls::mpi_coll_allreduce_description[] ={ + COLL_ALLREDUCES(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr} }; +s_mpi_coll_description_t Colls::mpi_coll_reduce_scatter_description[] = { + COLL_REDUCE_SCATTERS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr} }; +s_mpi_coll_description_t Colls::mpi_coll_scatter_description[] ={ + COLL_SCATTERS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr} }; +s_mpi_coll_description_t Colls::mpi_coll_barrier_description[] ={ + COLL_BARRIERS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr} }; +s_mpi_coll_description_t Colls::mpi_coll_alltoall_description[] = { + COLL_ALLTOALLS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr} }; +s_mpi_coll_description_t Colls::mpi_coll_alltoallv_description[] = { + COLL_ALLTOALLVS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr} }; +s_mpi_coll_description_t Colls::mpi_coll_bcast_description[] = { + COLL_BCASTS(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr} }; +s_mpi_coll_description_t Colls::mpi_coll_reduce_description[] = { + COLL_REDUCES(COLL_DESCRIPTION, COLL_COMMA), {nullptr, nullptr, nullptr} }; /** Displays the long description of all registered models, and quit */ -void coll_help(const char *category, s_mpi_coll_description_t * table) +void Colls::coll_help(const char *category, s_mpi_coll_description_t * table) { - printf("Long description of the %s models accepted by this simulator:\n", category); + XBT_WARN("Long description of the %s models accepted by this simulator:\n", category); for (int i = 0; table[i].name; i++) - printf(" %s: %s\n", table[i].name, table[i].description); + XBT_WARN(" %s: %s\n", table[i].name, table[i].description); } -int find_coll_description(s_mpi_coll_description_t * table, char *name, const char *desc) +int Colls::find_coll_description(s_mpi_coll_description_t * table, const char *name, const char *desc) { char *name_list = nullptr; - int selector_on=0; - if (name==nullptr || name[0] == '\0') { - //no argument provided, use active selector's algorithm - name=static_cast(xbt_cfg_get_string("smpi/coll-selector")); - selector_on=1; - } for (int i = 0; table[i].name; i++) if (!strcmp(name, table[i].name)) { if (strcmp(table[i].name,"default")) @@ -89,13 +79,6 @@ int find_coll_description(s_mpi_coll_description_t * table, char *name, const ch return i; } - if(selector_on){ - // collective seems not handled by the active selector, try with default one - for (int i = 0; table[i].name; i++) - if (!strcmp("default", table[i].name)) { - return i; - } - } if (!table[0].name) xbt_die("No collective is valid for '%s'! This is a bug.",name); name_list = xbt_strdup(table[0].name); @@ -108,204 +91,256 @@ int find_coll_description(s_mpi_coll_description_t * table, char *name, const ch return -1; } -int (*mpi_coll_gather_fun)(void *, int, MPI_Datatype, void*, int, MPI_Datatype, int root, MPI_Comm); -int (*mpi_coll_allgather_fun)(void *, int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm); -int (*mpi_coll_allgatherv_fun)(void *, int, MPI_Datatype, void*, int*, int*, MPI_Datatype, MPI_Comm); -int (*mpi_coll_allreduce_fun)(void *sbuf, void *rbuf, int rcount, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm); -int (*mpi_coll_alltoall_fun)(void *, int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm); -int (*mpi_coll_alltoallv_fun)(void *, int*, int*, MPI_Datatype, void*, int*, int*, MPI_Datatype, MPI_Comm); -int (*mpi_coll_bcast_fun)(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm com); -int (*mpi_coll_reduce_fun)(void *buf, void *rbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm); -int (*mpi_coll_reduce_scatter_fun)(void *sbuf, void *rbuf, int *rcounts,MPI_Datatype dtype,MPI_Op op,MPI_Comm comm); -int (*mpi_coll_scatter_fun)(void *sendbuf, int sendcount, MPI_Datatype sendtype,void *recvbuf, int recvcount, MPI_Datatype recvtype,int root, MPI_Comm comm); -int (*mpi_coll_barrier_fun)(MPI_Comm comm); -void (*smpi_coll_cleanup_callback)(); - - -int smpi_coll_tuned_alltoall_ompi2(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, - int recvcount, MPI_Datatype recvtype, MPI_Comm comm) + + +COLL_APPLY(COLL_SETTER,COLL_GATHER_SIG,""); +COLL_APPLY(COLL_SETTER,COLL_ALLGATHER_SIG,""); +COLL_APPLY(COLL_SETTER,COLL_ALLGATHERV_SIG,""); +COLL_APPLY(COLL_SETTER,COLL_REDUCE_SIG,""); +COLL_APPLY(COLL_SETTER,COLL_ALLREDUCE_SIG,""); +COLL_APPLY(COLL_SETTER,COLL_REDUCE_SCATTER_SIG,""); +COLL_APPLY(COLL_SETTER,COLL_SCATTER_SIG,""); +COLL_APPLY(COLL_SETTER,COLL_BARRIER_SIG,""); +COLL_APPLY(COLL_SETTER,COLL_BCAST_SIG,""); +COLL_APPLY(COLL_SETTER,COLL_ALLTOALL_SIG,""); +COLL_APPLY(COLL_SETTER,COLL_ALLTOALLV_SIG,""); + + +void Colls::set_collectives(){ + const char* selector_name = static_cast(xbt_cfg_get_string("smpi/coll-selector")); + if (selector_name==nullptr || selector_name[0] == '\0') + selector_name = "default"; + + const char* name; + + SET_COLL(gather); + SET_COLL(allgather); + SET_COLL(allgatherv); + SET_COLL(allreduce); + SET_COLL(alltoall); + SET_COLL(alltoallv); + SET_COLL(reduce); + SET_COLL(reduce_scatter); + SET_COLL(scatter); + SET_COLL(bcast); + SET_COLL(barrier); +} + + +//Implementations of the single algorith collectives + +int Colls::gatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int *recvcounts, int *displs, + MPI_Datatype recvtype, int root, MPI_Comm comm) { + int system_tag = COLL_TAG_GATHERV; + MPI_Aint lb = 0; + MPI_Aint recvext = 0; + + int rank = comm->rank(); int size = comm->size(); - int sendsize = smpi_datatype_size(sendtype) * sendcount; - if (sendsize < 200 && size > 12) { - return smpi_coll_tuned_alltoall_bruck(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm); - } else if (sendsize < 3000) { - return smpi_coll_tuned_alltoall_basic_linear(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm); + if (rank != root) { + // Send buffer to root + Request::send(sendbuf, sendcount, sendtype, root, system_tag, comm); } else { - return smpi_coll_tuned_alltoall_ring(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm); + recvtype->extent(&lb, &recvext); + // Local copy from root + Datatype::copy(sendbuf, sendcount, sendtype, static_cast(recvbuf) + displs[root] * recvext, + recvcounts[root], recvtype); + // Receive buffers from senders + MPI_Request *requests = xbt_new(MPI_Request, size - 1); + int index = 0; + for (int src = 0; src < size; src++) { + if(src != root) { + requests[index] = Request::irecv_init(static_cast(recvbuf) + displs[src] * recvext, + recvcounts[src], recvtype, src, system_tag, comm); + index++; + } + } + // Wait for completion of irecv's. + Request::startall(size - 1, requests); + Request::waitall(size - 1, requests, MPI_STATUS_IGNORE); + for (int src = 0; src < size-1; src++) { + Request::unref(&requests[src]); + } + xbt_free(requests); } + return MPI_SUCCESS; } -/** - * Alltoall Bruck - * - * Openmpi calls this routine when the message size sent to each rank < 2000 bytes and size < 12 - * FIXME: uh, check smpi_pmpi again, but this routine is called for > 12, not less... - **/ -int smpi_coll_tuned_alltoall_bruck(void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm) + +int Colls::scatterv(void *sendbuf, int *sendcounts, int *displs, MPI_Datatype sendtype, void *recvbuf, int recvcount, + MPI_Datatype recvtype, int root, MPI_Comm comm) { - int system_tag = 777; - int i; - int count; - MPI_Aint lb; + int system_tag = COLL_TAG_SCATTERV; + MPI_Aint lb = 0; MPI_Aint sendext = 0; - MPI_Aint recvext = 0; - MPI_Request *requests; - // FIXME: check implementation int rank = comm->rank(); int size = comm->size(); - XBT_DEBUG("<%d> algorithm alltoall_bruck() called.", rank); - smpi_datatype_extent(sendtype, &lb, &sendext); - smpi_datatype_extent(recvtype, &lb, &recvext); - /* Local copy from self */ - int err = smpi_datatype_copy(static_cast(sendbuf) + rank * sendcount * sendext, sendcount, sendtype, - static_cast(recvbuf) + rank * recvcount * recvext, recvcount, recvtype); - if (err == MPI_SUCCESS && size > 1) { - /* Initiate all send/recv to/from others. */ - requests = xbt_new(MPI_Request, 2 * (size - 1)); - count = 0; - /* Create all receives that will be posted first */ - for (i = 0; i < size; ++i) { - if (i != rank) { - requests[count] = Request::irecv_init(static_cast(recvbuf) + i * recvcount * recvext, recvcount, - recvtype, i, system_tag, comm); - count++; - }else{ - XBT_DEBUG("<%d> skip request creation [src = %d, recvcount = %d]", rank, i, recvcount); - } + if(rank != root) { + // Recv buffer from root + Request::recv(recvbuf, recvcount, recvtype, root, system_tag, comm, MPI_STATUS_IGNORE); + } else { + sendtype->extent(&lb, &sendext); + // Local copy from root + if(recvbuf!=MPI_IN_PLACE){ + Datatype::copy(static_cast(sendbuf) + displs[root] * sendext, sendcounts[root], + sendtype, recvbuf, recvcount, recvtype); } - /* Now create all sends */ - for (i = 0; i < size; ++i) { - if (i != rank) { - requests[count] = Request::isend_init(static_cast(sendbuf) + i * sendcount * sendext, sendcount, - sendtype, i, system_tag, comm); - count++; - }else{ - XBT_DEBUG("<%d> skip request creation [dst = %d, sendcount = %d]", rank, i, sendcount); + // Send buffers to receivers + MPI_Request *requests = xbt_new(MPI_Request, size - 1); + int index = 0; + for (int dst = 0; dst < size; dst++) { + if (dst != root) { + requests[index] = Request::isend_init(static_cast(sendbuf) + displs[dst] * sendext, sendcounts[dst], + sendtype, dst, system_tag, comm); + index++; } } - /* Wait for them all. */ - Request::startall(count, requests); - XBT_DEBUG("<%d> wait for %d requests", rank, count); - Request::waitall(count, requests, MPI_STATUS_IGNORE); - for(i = 0; i < count; i++) { - if(requests[i]!=MPI_REQUEST_NULL) - Request::unuse(&requests[i]); + // Wait for completion of isend's. + Request::startall(size - 1, requests); + Request::waitall(size - 1, requests, MPI_STATUS_IGNORE); + for (int dst = 0; dst < size-1; dst++) { + Request::unref(&requests[dst]); } xbt_free(requests); } return MPI_SUCCESS; } -/** - * Alltoall basic_linear (STARMPI:alltoall-simple) - **/ -int smpi_coll_tuned_alltoall_basic_linear(void *sendbuf, int sendcount, MPI_Datatype sendtype, - void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm) + +int Colls::scan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) { - int system_tag = 888; - int i; - int count; - MPI_Aint lb = 0, sendext = 0, recvext = 0; - MPI_Request *requests; + int system_tag = -888; + MPI_Aint lb = 0; + MPI_Aint dataext = 0; - /* Initialize. */ int rank = comm->rank(); int size = comm->size(); - XBT_DEBUG("<%d> algorithm alltoall_basic_linear() called.", rank); - smpi_datatype_extent(sendtype, &lb, &sendext); - smpi_datatype_extent(recvtype, &lb, &recvext); - /* simple optimization */ - int err = smpi_datatype_copy(static_cast(sendbuf) + rank * sendcount * sendext, sendcount, sendtype, - static_cast(recvbuf) + rank * recvcount * recvext, recvcount, recvtype); - if (err == MPI_SUCCESS && size > 1) { - /* Initiate all send/recv to/from others. */ - requests = xbt_new(MPI_Request, 2 * (size - 1)); - /* Post all receives first -- a simple optimization */ - count = 0; - for (i = (rank + 1) % size; i != rank; i = (i + 1) % size) { - requests[count] = Request::irecv_init(static_cast(recvbuf) + i * recvcount * recvext, recvcount, - recvtype, i, system_tag, comm); - count++; - } - /* Now post all sends in reverse order - * - We would like to minimize the search time through message queue - * when messages actually arrive in the order in which they were posted. - * TODO: check the previous assertion - */ - for (i = (rank + size - 1) % size; i != rank; i = (i + size - 1) % size) { - requests[count] = Request::isend_init(static_cast(sendbuf) + i * sendcount * sendext, sendcount, - sendtype, i, system_tag, comm); - count++; + + datatype->extent(&lb, &dataext); + + // Local copy from self + Datatype::copy(sendbuf, count, datatype, recvbuf, count, datatype); + + // Send/Recv buffers to/from others + MPI_Request *requests = xbt_new(MPI_Request, size - 1); + void **tmpbufs = xbt_new(void *, rank); + int index = 0; + for (int other = 0; other < rank; other++) { + tmpbufs[index] = smpi_get_tmp_sendbuffer(count * dataext); + requests[index] = Request::irecv_init(tmpbufs[index], count, datatype, other, system_tag, comm); + index++; + } + for (int other = rank + 1; other < size; other++) { + requests[index] = Request::isend_init(sendbuf, count, datatype, other, system_tag, comm); + index++; + } + // Wait for completion of all comms. + Request::startall(size - 1, requests); + + if(op != MPI_OP_NULL && op->is_commutative()){ + for (int other = 0; other < size - 1; other++) { + index = Request::waitany(size - 1, requests, MPI_STATUS_IGNORE); + if(index == MPI_UNDEFINED) { + break; + } + if(index < rank) { + // #Request is below rank: it's a irecv + op->apply( tmpbufs[index], recvbuf, &count, datatype); + } } - /* Wait for them all. */ - Request::startall(count, requests); - XBT_DEBUG("<%d> wait for %d requests", rank, count); - Request::waitall(count, requests, MPI_STATUS_IGNORE); - for(i = 0; i < count; i++) { - if(requests[i]!=MPI_REQUEST_NULL) - Request::unuse(&requests[i]); + }else{ + //non commutative case, wait in order + for (int other = 0; other < size - 1; other++) { + Request::wait(&(requests[other]), MPI_STATUS_IGNORE); + if(index < rank && op!=MPI_OP_NULL) { + op->apply( tmpbufs[other], recvbuf, &count, datatype); + } } - xbt_free(requests); } - return err; + for(index = 0; index < rank; index++) { + smpi_free_tmp_buffer(tmpbufs[index]); + } + for(index = 0; index < size-1; index++) { + Request::unref(&requests[index]); + } + xbt_free(tmpbufs); + xbt_free(requests); + return MPI_SUCCESS; } -int smpi_coll_basic_alltoallv(void *sendbuf, int *sendcounts, int *senddisps, MPI_Datatype sendtype, - void *recvbuf, int *recvcounts, int *recvdisps, MPI_Datatype recvtype, MPI_Comm comm) +int Colls::exscan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) { - int system_tag = 889; - int i; - int count; - MPI_Aint lb = 0; - MPI_Aint sendext = 0; - MPI_Aint recvext = 0; - MPI_Request *requests; - - /* Initialize. */ + int system_tag = -888; + MPI_Aint lb = 0; + MPI_Aint dataext = 0; + int recvbuf_is_empty=1; int rank = comm->rank(); int size = comm->size(); - XBT_DEBUG("<%d> algorithm basic_alltoallv() called.", rank); - smpi_datatype_extent(sendtype, &lb, &sendext); - smpi_datatype_extent(recvtype, &lb, &recvext); - /* Local copy from self */ - int err = smpi_datatype_copy(static_cast(sendbuf) + senddisps[rank] * sendext, sendcounts[rank], sendtype, - static_cast(recvbuf) + recvdisps[rank] * recvext, recvcounts[rank], recvtype); - if (err == MPI_SUCCESS && size > 1) { - /* Initiate all send/recv to/from others. */ - requests = xbt_new(MPI_Request, 2 * (size - 1)); - count = 0; - /* Create all receives that will be posted first */ - for (i = 0; i < size; ++i) { - if (i != rank && recvcounts[i] != 0) { - requests[count] = Request::irecv_init(static_cast(recvbuf) + recvdisps[i] * recvext, - recvcounts[i], recvtype, i, system_tag, comm); - count++; - }else{ - XBT_DEBUG("<%d> skip request creation [src = %d, recvcounts[src] = %d]", rank, i, recvcounts[i]); + + datatype->extent(&lb, &dataext); + + // Send/Recv buffers to/from others + MPI_Request *requests = xbt_new(MPI_Request, size - 1); + void **tmpbufs = xbt_new(void *, rank); + int index = 0; + for (int other = 0; other < rank; other++) { + tmpbufs[index] = smpi_get_tmp_sendbuffer(count * dataext); + requests[index] = Request::irecv_init(tmpbufs[index], count, datatype, other, system_tag, comm); + index++; + } + for (int other = rank + 1; other < size; other++) { + requests[index] = Request::isend_init(sendbuf, count, datatype, other, system_tag, comm); + index++; + } + // Wait for completion of all comms. + Request::startall(size - 1, requests); + + if(op != MPI_OP_NULL && op->is_commutative()){ + for (int other = 0; other < size - 1; other++) { + index = Request::waitany(size - 1, requests, MPI_STATUS_IGNORE); + if(index == MPI_UNDEFINED) { + break; } - } - /* Now create all sends */ - for (i = 0; i < size; ++i) { - if (i != rank && sendcounts[i] != 0) { - requests[count] = Request::isend_init(static_cast(sendbuf) + senddisps[i] * sendext, - sendcounts[i], sendtype, i, system_tag, comm); - count++; - }else{ - XBT_DEBUG("<%d> skip request creation [dst = %d, sendcounts[dst] = %d]", rank, i, sendcounts[i]); + if(index < rank) { + if(recvbuf_is_empty){ + Datatype::copy(tmpbufs[index], count, datatype, recvbuf, count, datatype); + recvbuf_is_empty=0; + } else + // #Request is below rank: it's a irecv + op->apply( tmpbufs[index], recvbuf, &count, datatype); } } - /* Wait for them all. */ - Request::startall(count, requests); - XBT_DEBUG("<%d> wait for %d requests", rank, count); - Request::waitall(count, requests, MPI_STATUS_IGNORE); - for(i = 0; i < count; i++) { - if(requests[i]!=MPI_REQUEST_NULL) - Request::unuse(&requests[i]); + }else{ + //non commutative case, wait in order + for (int other = 0; other < size - 1; other++) { + Request::wait(&(requests[other]), MPI_STATUS_IGNORE); + if(index < rank) { + if (recvbuf_is_empty) { + Datatype::copy(tmpbufs[other], count, datatype, recvbuf, count, datatype); + recvbuf_is_empty = 0; + } else + if(op!=MPI_OP_NULL) + op->apply( tmpbufs[other], recvbuf, &count, datatype); + } } - xbt_free(requests); } - return err; + for(index = 0; index < rank; index++) { + smpi_free_tmp_buffer(tmpbufs[index]); + } + for(index = 0; index < size-1; index++) { + Request::unref(&requests[index]); + } + xbt_free(tmpbufs); + xbt_free(requests); + return MPI_SUCCESS; +} + } +} + + + + +