X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/a2f1b23687f04169144f4ffb4f20dc4fc5c28395..ec3e4ee5f1a7ffeb96e044057809944f364014e6:/src/smpi/colls/allreduce-lr.c diff --git a/src/smpi/colls/allreduce-lr.c b/src/smpi/colls/allreduce-lr.c index d4bf82aae8..9bcf75c1fb 100644 --- a/src/smpi/colls/allreduce-lr.c +++ b/src/smpi/colls/allreduce-lr.c @@ -1,4 +1,10 @@ -#include "colls.h" +/* Copyright (c) 2013-2014. The SimGrid Team. + * All rights reserved. */ + +/* This program is free software; you can redistribute it and/or modify it + * under the terms of the license (GNU LGPL) which comes with this package. */ + +#include "colls_private.h" /* IMPLEMENTED BY PITCH PATARASUK Non-topoloty-specific all-reduce operation designed bandwidth optimally @@ -17,32 +23,34 @@ int smpi_coll_tuned_allreduce_lr(void *sbuf, void *rbuf, int rcount, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm) { - int tag = 5000; + int tag = COLL_TAG_ALLREDUCE; MPI_Status status; int rank, i, size, count; int send_offset, recv_offset; int remainder, remainder_flag, remainder_offset; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &size); + rank = smpi_comm_rank(comm); + size = smpi_comm_size(comm); /* make it compatible with all data type */ MPI_Aint extent; - MPI_Type_extent(dtype, &extent); + extent = smpi_datatype_get_extent(dtype); /* when communication size is smaller than number of process (not support) */ if (rcount < size) { - return MPI_Allreduce(sbuf, rbuf, rcount, dtype, op, comm); + XBT_WARN("MPI_allreduce_lr use default MPI_allreduce."); + smpi_mpi_allreduce(sbuf, rbuf, rcount, dtype, op, comm); + return MPI_SUCCESS; } /* when communication size is not divisible by number of process: call the native implementation for the remain chunk at the end of the operation */ - else if (rcount % size != 0) { + if (rcount % size != 0) { remainder = rcount % size; remainder_flag = 1; remainder_offset = (rcount / size) * size * extent; } else { - remainder_flag = remainder_offset = 0; + remainder = remainder_flag = remainder_offset = 0; } /* size of each point-to-point communication is equal to the size of the whole message @@ -59,7 +67,7 @@ smpi_coll_tuned_allreduce_lr(void *sbuf, void *rbuf, int rcount, // copy partial data send_offset = ((rank - 1 + size) % size) * count * extent; recv_offset = ((rank - 1 + size) % size) * count * extent; - MPI_Sendrecv((char *) sbuf + send_offset, count, dtype, rank, tag - 1, + smpi_mpi_sendrecv((char *) sbuf + send_offset, count, dtype, rank, tag - 1, (char *) rbuf + recv_offset, count, dtype, rank, tag - 1, comm, &status); @@ -68,12 +76,12 @@ smpi_coll_tuned_allreduce_lr(void *sbuf, void *rbuf, int rcount, send_offset = ((rank - 1 - i + 2 * size) % size) * count * extent; recv_offset = ((rank - 2 - i + 2 * size) % size) * count * extent; // recv_offset = ((rank-i+2*size)%size)*count*extent; - MPI_Sendrecv((char *) rbuf + send_offset, count, dtype, ((rank + 1) % size), + smpi_mpi_sendrecv((char *) rbuf + send_offset, count, dtype, ((rank + 1) % size), tag + i, (char *) rbuf + recv_offset, count, dtype, ((rank + size - 1) % size), tag + i, comm, &status); // compute result to rbuf+recv_offset - star_reduction(op, (char *) sbuf + recv_offset, (char *) rbuf + recv_offset, + smpi_op_apply(op, (char *) sbuf + recv_offset, (char *) rbuf + recv_offset, &count, &dtype); } @@ -81,7 +89,7 @@ smpi_coll_tuned_allreduce_lr(void *sbuf, void *rbuf, int rcount, for (i = 0; i < (size - 1); i++) { send_offset = ((rank - i + 2 * size) % size) * count * extent; recv_offset = ((rank - 1 - i + 2 * size) % size) * count * extent; - MPI_Sendrecv((char *) rbuf + send_offset, count, dtype, ((rank + 1) % size), + smpi_mpi_sendrecv((char *) rbuf + send_offset, count, dtype, ((rank + 1) % size), tag + i, (char *) rbuf + recv_offset, count, dtype, ((rank + size - 1) % size), tag + i, comm, &status); } @@ -89,7 +97,7 @@ smpi_coll_tuned_allreduce_lr(void *sbuf, void *rbuf, int rcount, /* when communication size is not divisible by number of process: call the native implementation for the remain chunk at the end of the operation */ if (remainder_flag) { - return MPI_Allreduce((char *) sbuf + remainder_offset, + return mpi_coll_allreduce_fun((char *) sbuf + remainder_offset, (char *) rbuf + remainder_offset, remainder, dtype, op, comm); }