X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/40616078da72e823931c1fb884949054699ec39d..13fb4af932a02ea0bd4293d1e55ac071de326f80:/src/smpi/colls/gather/gather-ompi.cpp diff --git a/src/smpi/colls/gather/gather-ompi.cpp b/src/smpi/colls/gather/gather-ompi.cpp index e5133fd925..6190e56669 100644 --- a/src/smpi/colls/gather/gather-ompi.cpp +++ b/src/smpi/colls/gather/gather-ompi.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2013-2014. The SimGrid Team. +/* Copyright (c) 2013-2017. The SimGrid Team. * All rights reserved. */ /* This program is free software; you can redistribute it and/or modify it @@ -22,15 +22,11 @@ #include "../colls_private.h" #include "../coll_tuned_topo.h" -/* Todo: gather_intra_generic, gather_intra_binary, gather_intra_chain, - * gather_intra_pipeline, segmentation? */ -int -smpi_coll_tuned_gather_ompi_binomial(void *sbuf, int scount, - MPI_Datatype sdtype, - void *rbuf, int rcount, - MPI_Datatype rdtype, - int root, - MPI_Comm comm) +namespace simgrid{ +namespace smpi{ + +int Coll_gather_ompi_binomial::gather(void* sbuf, int scount, MPI_Datatype sdtype, void* rbuf, int rcount, + MPI_Datatype rdtype, int root, MPI_Comm comm) { int line = -1; int i; @@ -43,15 +39,14 @@ smpi_coll_tuned_gather_ompi_binomial(void *sbuf, int scount, int err; ompi_coll_tree_t* bmtree; MPI_Status status; - MPI_Aint sextent, slb, strue_lb, strue_extent; + MPI_Aint sextent, slb, strue_lb, strue_extent; MPI_Aint rextent, rlb, rtrue_lb, rtrue_extent; size = comm->size(); rank = comm->rank(); - XBT_DEBUG( - "smpi_coll_tuned_gather_ompi_binomial rank %d", rank); + XBT_DEBUG("smpi_coll_tuned_gather_ompi_binomial rank %d", rank); /* create the binomial tree */ // COLL_TUNED_UPDATE_IN_ORDER_BMTREE( comm, tuned_module, root ); @@ -66,140 +61,149 @@ smpi_coll_tuned_gather_ompi_binomial(void *sbuf, int scount, if (rank == root) { rdtype->extent(&rlb, &rextent); rdtype->extent(&rtrue_lb, &rtrue_extent); - if (0 == root){ - /* root on 0, just use the recv buffer */ - ptmp = (char *) rbuf; - if (sbuf != MPI_IN_PLACE) { - err = Datatype::copy(sbuf, scount, sdtype, - ptmp, rcount, rdtype); - if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } - } - } else { - /* root is not on 0, allocate temp buffer for recv, - * rotate data at the end */ - tempbuf = (char *) smpi_get_tmp_recvbuffer(rtrue_extent + (rcount*size - 1) * rextent); - if (NULL == tempbuf) { - err= MPI_ERR_OTHER; line = __LINE__; goto err_hndl; - } - - ptmp = tempbuf - rlb; - if (sbuf != MPI_IN_PLACE) { - /* copy from sbuf to temp buffer */ - err = Datatype::copy(sbuf, scount, sdtype, - ptmp, rcount, rdtype); - if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } - } else { - /* copy from rbuf to temp buffer */ - err = Datatype::copy((char *) rbuf + rank*rextent*rcount, rcount, rdtype, ptmp, rcount, rdtype ); - if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } - } - } - total_recv = rcount; + if (0 == root) { + /* root on 0, just use the recv buffer */ + ptmp = (char*)rbuf; + if (sbuf != MPI_IN_PLACE) { + err = Datatype::copy(sbuf, scount, sdtype, ptmp, rcount, rdtype); + if (MPI_SUCCESS != err) { + line = __LINE__; + goto err_hndl; + } + } + } else { + /* root is not on 0, allocate temp buffer for recv, + * rotate data at the end */ + tempbuf = (char*)smpi_get_tmp_recvbuffer(rtrue_extent + (rcount * size - 1) * rextent); + if (NULL == tempbuf) { + err = MPI_ERR_OTHER; + line = __LINE__; + goto err_hndl; + } + + ptmp = tempbuf - rlb; + if (sbuf != MPI_IN_PLACE) { + /* copy from sbuf to temp buffer */ + err = Datatype::copy(sbuf, scount, sdtype, ptmp, rcount, rdtype); + if (MPI_SUCCESS != err) { + line = __LINE__; + goto err_hndl; + } + } else { + /* copy from rbuf to temp buffer */ + err = Datatype::copy((char*)rbuf + rank * rextent * rcount, rcount, rdtype, ptmp, rcount, rdtype); + if (MPI_SUCCESS != err) { + line = __LINE__; + goto err_hndl; + } + } + } + total_recv = rcount; } else if (!(vrank % 2)) { - /* other non-leaf nodes, allocate temp buffer for data received from - * children, the most we need is half of the total data elements due - * to the property of binimoal tree */ - tempbuf = (char *) smpi_get_tmp_sendbuffer(strue_extent + (scount*size - 1) * sextent); - if (NULL == tempbuf) { - err= MPI_ERR_OTHER; line = __LINE__; goto err_hndl; - } - - ptmp = tempbuf - slb; - /* local copy to tempbuf */ - err = Datatype::copy(sbuf, scount, sdtype, - ptmp, scount, sdtype); - if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } - - /* use sdtype,scount as rdtype,rdcount since they are ignored on - * non-root procs */ - rdtype = sdtype; - rcount = scount; - rextent = sextent; - total_recv = rcount; + /* other non-leaf nodes, allocate temp buffer for data received from + * children, the most we need is half of the total data elements due + * to the property of binimoal tree */ + tempbuf = (char*)smpi_get_tmp_sendbuffer(strue_extent + (scount * size - 1) * sextent); + if (NULL == tempbuf) { + err = MPI_ERR_OTHER; + line = __LINE__; + goto err_hndl; + } + + ptmp = tempbuf - slb; + /* local copy to tempbuf */ + err = Datatype::copy(sbuf, scount, sdtype, ptmp, scount, sdtype); + if (MPI_SUCCESS != err) { + line = __LINE__; + goto err_hndl; + } + + /* use sdtype,scount as rdtype,rdcount since they are ignored on + * non-root procs */ + rdtype = sdtype; + rcount = scount; + rextent = sextent; + total_recv = rcount; } else { - /* leaf nodes, no temp buffer needed, use sdtype,scount as - * rdtype,rdcount since they are ignored on non-root procs */ - ptmp = (char *) sbuf; - total_recv = scount; + /* leaf nodes, no temp buffer needed, use sdtype,scount as + * rdtype,rdcount since they are ignored on non-root procs */ + ptmp = (char*)sbuf; + total_recv = scount; } if (!(vrank % 2)) { - /* all non-leaf nodes recv from children */ - for (i = 0; i < bmtree->tree_nextsize; i++) { - int mycount = 0, vkid; - /* figure out how much data I have to send to this child */ - vkid = (bmtree->tree_next[i] - root + size) % size; - mycount = vkid - vrank; - if (mycount > (size - vkid)) - mycount = size - vkid; - mycount *= rcount; - - XBT_DEBUG( - "smpi_coll_tuned_gather_ompi_binomial rank %d recv %d mycount = %d", - rank, bmtree->tree_next[i], mycount); - - Request::recv(ptmp + total_recv*rextent, mycount, rdtype, - bmtree->tree_next[i], COLL_TAG_GATHER, - comm, &status); - - total_recv += mycount; - } + /* all non-leaf nodes recv from children */ + for (i = 0; i < bmtree->tree_nextsize; i++) { + int mycount = 0, vkid; + /* figure out how much data I have to send to this child */ + vkid = (bmtree->tree_next[i] - root + size) % size; + mycount = vkid - vrank; + if (mycount > (size - vkid)) + mycount = size - vkid; + mycount *= rcount; + + XBT_DEBUG("smpi_coll_tuned_gather_ompi_binomial rank %d recv %d mycount = %d", rank, bmtree->tree_next[i], + mycount); + + Request::recv(ptmp + total_recv * rextent, mycount, rdtype, bmtree->tree_next[i], COLL_TAG_GATHER, comm, + &status); + + total_recv += mycount; + } } if (rank != root) { - /* all nodes except root send to parents */ - XBT_DEBUG( - "smpi_coll_tuned_gather_ompi_binomial rank %d send %d count %d\n", - rank, bmtree->tree_prev, total_recv); - - Request::send(ptmp, total_recv, sdtype, - bmtree->tree_prev, - COLL_TAG_GATHER, - comm); + /* all nodes except root send to parents */ + XBT_DEBUG("smpi_coll_tuned_gather_ompi_binomial rank %d send %d count %d\n", rank, bmtree->tree_prev, total_recv); + + Request::send(ptmp, total_recv, sdtype, bmtree->tree_prev, COLL_TAG_GATHER, comm); } if (rank == root) { - if (root != 0) { - /* rotate received data on root if root != 0 */ - err = Datatype::copy(ptmp, rcount*(size - root), rdtype, - (char *) rbuf + rextent*root*rcount, rcount*(size - root), rdtype ); - if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } - + if (root != 0) { + /* rotate received data on root if root != 0 */ + err = Datatype::copy(ptmp, rcount * (size - root), rdtype, (char*)rbuf + rextent * root * rcount, + rcount * (size - root), rdtype); + if (MPI_SUCCESS != err) { + line = __LINE__; + goto err_hndl; + } - err = Datatype::copy( ptmp + rextent*rcount*(size-root), rcount*root,rdtype, - (char *) rbuf,rcount*root,rdtype); - if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } + err = Datatype::copy(ptmp + rextent * rcount * (size - root), rcount * root, rdtype, (char*)rbuf, rcount * root, + rdtype); + if (MPI_SUCCESS != err) { + line = __LINE__; + goto err_hndl; + } - smpi_free_tmp_buffer(tempbuf); - } + smpi_free_tmp_buffer(tempbuf); + } } else if (!(vrank % 2)) { - /* other non-leaf nodes */ - smpi_free_tmp_buffer(tempbuf); + /* other non-leaf nodes */ + smpi_free_tmp_buffer(tempbuf); } xbt_free(bmtree); return MPI_SUCCESS; err_hndl: if (NULL != tempbuf) - smpi_free_tmp_buffer(tempbuf); + smpi_free_tmp_buffer(tempbuf); - XBT_DEBUG( "%s:%4d\tError occurred %d, rank %2d", - __FILE__, line, err, rank); + XBT_DEBUG("%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err, rank); return err; } /* - * gather_intra_linear_sync + * gather_intra_linear_sync * - * Function: - synchronized gather operation with - * Accepts: - same arguments as MPI_Gather(), first segment size - * Returns: - MPI_SUCCESS or error code + * Function: - synchronized gather operation with + * Accepts: - same arguments as MPI_Gather(), first segment size + * Returns: - MPI_SUCCESS or error code */ -int -smpi_coll_tuned_gather_ompi_linear_sync(void *sbuf, int scount, +int Coll_gather_ompi_linear_sync::gather(void *sbuf, int scount, MPI_Datatype sdtype, void *rbuf, int rcount, MPI_Datatype rdtype, - int root, + int root, MPI_Comm comm) { int i; @@ -213,7 +217,7 @@ smpi_coll_tuned_gather_ompi_linear_sync(void *sbuf, int scount, int first_segment_size=0; size = comm->size(); rank = comm->rank(); - + size_t dsize, block_size; if (rank == root) { dsize= rdtype->size(); @@ -222,77 +226,72 @@ smpi_coll_tuned_gather_ompi_linear_sync(void *sbuf, int scount, dsize=sdtype->size(); block_size = dsize * scount; } - + if (block_size > 92160){ first_segment_size = 32768; }else{ first_segment_size = 1024; } - XBT_DEBUG( - "smpi_coll_tuned_gather_ompi_linear_sync rank %d, segment %d", rank, first_segment_size); + XBT_DEBUG("smpi_coll_tuned_gather_ompi_linear_sync rank %d, segment %d", rank, first_segment_size); - if (rank != root) { - /* Non-root processes: - - receive zero byte message from the root, - - send the first segment of the data synchronously, - - send the second segment of the data. - */ - - typelng= sdtype->size(); - sdtype->extent(&lb, &extent); - first_segment_count = scount; - COLL_TUNED_COMPUTED_SEGCOUNT( (size_t) first_segment_size, typelng, - first_segment_count ); + if (rank != root) { + /* Non-root processes: + - receive zero byte message from the root, + - send the first segment of the data synchronously, + - send the second segment of the data. + */ - Request::recv(sbuf, 0, MPI_BYTE, root, - COLL_TAG_GATHER, - comm, MPI_STATUS_IGNORE); + typelng = sdtype->size(); + sdtype->extent(&lb, &extent); + first_segment_count = scount; + COLL_TUNED_COMPUTED_SEGCOUNT((size_t)first_segment_size, typelng, first_segment_count); - Request::send(sbuf, first_segment_count, sdtype, root, - COLL_TAG_GATHER, - comm); + Request::recv(sbuf, 0, MPI_BYTE, root, COLL_TAG_GATHER, comm, MPI_STATUS_IGNORE); - Request::send((char*)sbuf + extent * first_segment_count, - (scount - first_segment_count), sdtype, - root, COLL_TAG_GATHER, - comm); + Request::send(sbuf, first_segment_count, sdtype, root, COLL_TAG_GATHER, comm); + + Request::send((char*)sbuf + extent * first_segment_count, (scount - first_segment_count), sdtype, root, + COLL_TAG_GATHER, comm); } else { - /* Root process, - - For every non-root node: - - post irecv for the first segment of the message - - send zero byte message to signal node to send the message - - post irecv for the second segment of the message - - wait for the first segment to complete - - Copy local data if necessary - - Waitall for all the second segments to complete. - */ - char *ptmp; - MPI_Request *reqs = NULL, first_segment_req; - reqs = (MPI_Request *) calloc(size, sizeof(MPI_Request )); - if (NULL == reqs) { ret = -1; line = __LINE__; goto error_hndl; } - + /* Root process, + - For every non-root node: + - post irecv for the first segment of the message + - send zero byte message to signal node to send the message + - post irecv for the second segment of the message + - wait for the first segment to complete + - Copy local data if necessary + - Waitall for all the second segments to complete. +*/ + char* ptmp; + MPI_Request *reqs = NULL, first_segment_req; + reqs = (MPI_Request*)calloc(size, sizeof(MPI_Request)); + if (NULL == reqs) { + ret = -1; + line = __LINE__; + goto error_hndl; } + typelng=rdtype->size(); rdtype->extent(&lb, &extent); first_segment_count = rcount; - COLL_TUNED_COMPUTED_SEGCOUNT( (size_t)first_segment_size, typelng, + COLL_TUNED_COMPUTED_SEGCOUNT( (size_t)first_segment_size, typelng, first_segment_count ); for (i = 0; i < size; ++i) { - if (i == rank) { + if (i == rank) { /* skip myself */ - reqs[i] = MPI_REQUEST_NULL; - continue; - } + reqs[i] = MPI_REQUEST_NULL; + continue; + } /* irecv for the first segment from i */ ptmp = (char*)rbuf + i * rcount * extent; first_segment_req = Request::irecv(ptmp, first_segment_count, rdtype, i, COLL_TAG_GATHER, comm ); - + /* send sync message */ Request::send(rbuf, 0, MPI_BYTE, i, COLL_TAG_GATHER, @@ -300,7 +299,7 @@ smpi_coll_tuned_gather_ompi_linear_sync(void *sbuf, int scount, /* irecv for the second segment */ ptmp = (char*)rbuf + (i * rcount + first_segment_count) * extent; - reqs[i]=Request::irecv(ptmp, (rcount - first_segment_count), + reqs[i]=Request::irecv(ptmp, (rcount - first_segment_count), rdtype, i, COLL_TAG_GATHER, comm ); @@ -311,11 +310,11 @@ smpi_coll_tuned_gather_ompi_linear_sync(void *sbuf, int scount, /* copy local data if necessary */ if (MPI_IN_PLACE != sbuf) { ret = Datatype::copy(sbuf, scount, sdtype, - (char*)rbuf + rank * rcount * extent, + (char*)rbuf + rank * rcount * extent, rcount, rdtype); if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } } - + /* wait all second segments to complete */ ret = Request::waitall(size, reqs, MPI_STATUSES_IGNORE); if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } @@ -327,8 +326,8 @@ smpi_coll_tuned_gather_ompi_linear_sync(void *sbuf, int scount, return MPI_SUCCESS; error_hndl: - XBT_DEBUG( - "ERROR_HNDL: node %d file %s line %d error %d\n", + XBT_DEBUG( + "ERROR_HNDL: node %d file %s line %d error %d\n", rank, __FILE__, line, ret ); return ret; } @@ -336,8 +335,8 @@ smpi_coll_tuned_gather_ompi_linear_sync(void *sbuf, int scount, /* * Linear functions are copied from the BASIC coll module * they do not segment the message and are simple implementations - * but for some small number of nodes and/or small data sizes they - * are just as fast as tuned/tree based segmenting operations + * but for some small number of nodes and/or small data sizes they + * are just as fast as tuned/tree based segmenting operations * and as such may be selected by the decision functions * These are copied into this module due to the way we select modules * in V1. i.e. in V2 we will handle this differently and so will not @@ -347,19 +346,14 @@ smpi_coll_tuned_gather_ompi_linear_sync(void *sbuf, int scount, /* copied function (with appropriate renaming) starts here */ /* - * gather_intra + * gather_intra * - * Function: - basic gather operation - * Accepts: - same arguments as MPI_Gather() - * Returns: - MPI_SUCCESS or error code + * Function: - basic gather operation + * Accepts: - same arguments as MPI_Gather() + * Returns: - MPI_SUCCESS or error code */ -int -smpi_coll_tuned_gather_ompi_basic_linear(void *sbuf, int scount, - MPI_Datatype sdtype, - void *rbuf, int rcount, - MPI_Datatype rdtype, - int root, - MPI_Comm comm) +int Coll_gather_ompi_basic_linear::gather(void* sbuf, int scount, MPI_Datatype sdtype, void* rbuf, int rcount, + MPI_Datatype rdtype, int root, MPI_Comm comm) { int i; int err; @@ -374,8 +368,7 @@ smpi_coll_tuned_gather_ompi_basic_linear(void *sbuf, int scount, rank = comm->rank(); /* Everyone but root sends data and returns. */ - XBT_DEBUG( - "ompi_coll_tuned_gather_intra_basic_linear rank %d", rank); + XBT_DEBUG("ompi_coll_tuned_gather_intra_basic_linear rank %d", rank); if (rank != root) { Request::send(sbuf, scount, sdtype, root, @@ -411,3 +404,6 @@ smpi_coll_tuned_gather_ompi_basic_linear(void *sbuf, int scount, return MPI_SUCCESS; } + +} +}