X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/a92d7b716f51a53dea7f59db8524d4add713b910..933849e1079bf5389d6a38447d49cbe5cc65101d:/src/smpi/colls/scatter/scatter-ompi.cpp diff --git a/src/smpi/colls/scatter/scatter-ompi.cpp b/src/smpi/colls/scatter/scatter-ompi.cpp index 9d14907810..6bc77d60a9 100644 --- a/src/smpi/colls/scatter/scatter-ompi.cpp +++ b/src/smpi/colls/scatter/scatter-ompi.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2013-2017. The SimGrid Team. +/* Copyright (c) 2013-2022. The SimGrid Team. * All rights reserved. */ /* This program is free software; you can redistribute it and/or modify it @@ -19,15 +19,14 @@ * Additional copyrights may follow */ - -#include "../colls_private.h" -#include "../coll_tuned_topo.h" +#include "../coll_tuned_topo.hpp" +#include "../colls_private.hpp" namespace simgrid{ namespace smpi{ -int Coll_scatter_ompi_binomial::scatter(void* sbuf, int scount, MPI_Datatype sdtype, void* rbuf, int rcount, - MPI_Datatype rdtype, int root, MPI_Comm comm) +int scatter__ompi_binomial(const void* sbuf, int scount, MPI_Datatype sdtype, void* rbuf, int rcount, + MPI_Datatype rdtype, int root, MPI_Comm comm) { int line = -1; int i; @@ -35,12 +34,13 @@ int Coll_scatter_ompi_binomial::scatter(void* sbuf, int scount, MPI_Datatype sdt int vrank; int size; int total_send = 0; - char *ptmp = NULL; - char *tempbuf = NULL; + unsigned char* ptmp = nullptr; + unsigned char* tempbuf = nullptr; + const unsigned char* cptmp; // const ptmp int err; ompi_coll_tree_t* bmtree; MPI_Status status; - MPI_Aint sextent, slb, strue_lb, strue_extent; + MPI_Aint sextent, slb, strue_lb, strue_extent; MPI_Aint rextent, rlb, rtrue_lb, rtrue_extent; size = comm->size(); @@ -50,7 +50,7 @@ int Coll_scatter_ompi_binomial::scatter(void* sbuf, int scount, MPI_Datatype sdt "Coll_scatter_ompi_binomial::scatter rank %d", rank); /* create the binomial tree */ - + // COLL_TUNED_UPDATE_IN_ORDER_BMTREE( comm, tuned_module, root ); bmtree = ompi_coll_tuned_topo_build_in_order_bmtree( comm, root);//ompi_ data->cached_in_order_bmtree; @@ -64,7 +64,8 @@ int Coll_scatter_ompi_binomial::scatter(void* sbuf, int scount, MPI_Datatype sdt if (rank == root) { if (0 == root) { /* root on 0, just use the send buffer */ - ptmp = (char*)sbuf; + ptmp = nullptr; // unused + cptmp = static_cast(sbuf); if (rbuf != MPI_IN_PLACE) { /* local copy to rbuf */ err = Datatype::copy(sbuf, scount, sdtype, rbuf, rcount, rdtype); @@ -75,14 +76,15 @@ int Coll_scatter_ompi_binomial::scatter(void* sbuf, int scount, MPI_Datatype sdt } } else { /* root is not on 0, allocate temp buffer for send */ - tempbuf = (char*)smpi_get_tmp_sendbuffer(strue_extent + (scount * size - 1) * sextent); - if (NULL == tempbuf) { + tempbuf = smpi_get_tmp_sendbuffer(strue_extent + (scount * size - 1) * sextent); + if (nullptr == tempbuf) { err = MPI_ERR_OTHER; line = __LINE__; goto err_hndl; } - ptmp = tempbuf - slb; + ptmp = tempbuf - slb; + cptmp = ptmp; /* and rotate data so they will eventually in the right place */ err = Datatype::copy((char*)sbuf + sextent * root * scount, scount * (size - root), sdtype, ptmp, @@ -112,14 +114,15 @@ int Coll_scatter_ompi_binomial::scatter(void* sbuf, int scount, MPI_Datatype sdt } else if (not(vrank % 2)) { /* non-root, non-leaf nodes, allocate temp buffer for recv * the most we need is rcount*size/2 */ - tempbuf = (char*)smpi_get_tmp_recvbuffer(rtrue_extent + (rcount * size - 1) * rextent); - if (NULL == tempbuf) { + tempbuf = smpi_get_tmp_recvbuffer(rtrue_extent + (rcount * size - 1) * rextent); + if (nullptr == tempbuf) { err = MPI_ERR_OTHER; line = __LINE__; goto err_hndl; } - ptmp = tempbuf - rlb; + ptmp = tempbuf - rlb; + cptmp = ptmp; sdtype = rdtype; scount = rcount; @@ -127,7 +130,8 @@ int Coll_scatter_ompi_binomial::scatter(void* sbuf, int scount, MPI_Datatype sdt total_send = scount; } else { /* leaf nodes, just use rbuf */ - ptmp = (char*)rbuf; + ptmp = static_cast(rbuf); + cptmp = ptmp; } if (not(vrank % 2)) { @@ -147,7 +151,7 @@ int Coll_scatter_ompi_binomial::scatter(void* sbuf, int scount, MPI_Datatype sdt mycount = size - vkid; mycount *= scount; - Request::send(ptmp + total_send * sextent, mycount, sdtype, bmtree->tree_next[i], COLL_TAG_SCATTER, comm); + Request::send(cptmp + total_send * sextent, mycount, sdtype, bmtree->tree_next[i], COLL_TAG_SCATTER, comm); total_send += mycount; } @@ -157,16 +161,14 @@ int Coll_scatter_ompi_binomial::scatter(void* sbuf, int scount, MPI_Datatype sdt Request::recv(ptmp, rcount, rdtype, bmtree->tree_prev, COLL_TAG_SCATTER, comm, &status); } - if (NULL != tempbuf) - smpi_free_tmp_buffer(tempbuf); + smpi_free_tmp_buffer(tempbuf); // not FIXME : store the tree, as done in ompi, instead of calculating it each time ? - xbt_free(bmtree); + ompi_coll_tuned_topo_destroy_tree(&bmtree); return MPI_SUCCESS; err_hndl: - if (NULL != tempbuf) - free(tempbuf); + smpi_free_tmp_buffer(tempbuf); XBT_DEBUG("%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err, rank); return err; @@ -175,8 +177,8 @@ int Coll_scatter_ompi_binomial::scatter(void* sbuf, int scount, MPI_Datatype sdt /* * Linear functions are copied from the BASIC coll module * they do not segment the message and are simple implementations - * but for some small number of nodes and/or small data sizes they - * are just as fast as tuned/tree based segmenting operations + * but for some small number of nodes and/or small data sizes they + * are just as fast as tuned/tree based segmenting operations * and as such may be selected by the decision functions * These are copied into this module due to the way we select modules * in V1. i.e. in V2 we will handle this differently and so will not @@ -192,8 +194,8 @@ int Coll_scatter_ompi_binomial::scatter(void* sbuf, int scount, MPI_Datatype sdt * Accepts: - same arguments as MPI_Scatter() * Returns: - MPI_SUCCESS or error code */ -int Coll_scatter_ompi_basic_linear::scatter(void* sbuf, int scount, MPI_Datatype sdtype, void* rbuf, int rcount, - MPI_Datatype rdtype, int root, MPI_Comm comm) +int scatter__ompi_basic_linear(const void* sbuf, int scount, MPI_Datatype sdtype, void* rbuf, int rcount, + MPI_Datatype rdtype, int root, MPI_Comm comm) { int i, rank, size, err; char *ptmp; @@ -246,5 +248,79 @@ int Coll_scatter_ompi_basic_linear::scatter(void* sbuf, int scount, MPI_Datatype return MPI_SUCCESS; } +/* + * Use isends for distributing the data with periodic sync by blocking send. + * Blocking send acts like a local resources flush, because it ensures + * progression until the message is sent/(copied to some sort of transmit buffer). + */ +int scatter__ompi_linear_nb(const void *sbuf, int scount, + MPI_Datatype sdtype, + void *rbuf, int rcount, + MPI_Datatype rdtype, + int root, + MPI_Comm comm) +{ + int i, rank, size, err, line, nreqs; + ptrdiff_t incr; + char *ptmp; + MPI_Request *reqs = nullptr; + MPI_Request *preq = nullptr; + + rank = comm->rank(); + size = comm->size(); + + /* If not root, receive data. */ + if (rank != root) { + Request::recv(rbuf, rcount, rdtype, root, + COLL_TAG_SCATTER, + comm, MPI_STATUS_IGNORE); + return MPI_SUCCESS; + } + + nreqs = size - 1; /* no send for myself */ + + reqs = new MPI_Request[nreqs]; + if (NULL == reqs) { + err = MPI_ERR_OTHER; + line = __LINE__; goto err_hndl; + } + + incr = sdtype->get_extent(); + incr *= scount; + + /* I am the root, loop sending data. */ + for (i = 0, ptmp = (char *)sbuf, preq = reqs; i < size; ++i, ptmp += incr) { + /* simple optimization */ + if (i == rank) { + if (MPI_IN_PLACE != rbuf) { + err = Datatype::copy(ptmp, scount, sdtype, rbuf, rcount, + rdtype); + } + } else { + *preq = Request::isend(ptmp, scount, sdtype, i, + COLL_TAG_SCATTER, comm); + preq++; + } + if (MPI_SUCCESS != err) { + line = __LINE__; goto err_hndl; + } + } + + err = Request::waitall(preq - reqs, reqs, MPI_STATUSES_IGNORE); + if (MPI_SUCCESS != err) { + line = __LINE__; goto err_hndl; + } + + return MPI_SUCCESS; + +err_hndl: + if (NULL != reqs) { + delete reqs; + } + XBT_DEBUG("%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err, rank); + (void)line; /* silence compiler warning */ + return err; +} + } }