X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/1d18e615eaa617d3354bc22dfbe711d34be9f902..70a1c67dc21179a44b0b317a1ea4823b63b7c666:/src/smpi/colls/allreduce-rab1.c diff --git a/src/smpi/colls/allreduce-rab1.c b/src/smpi/colls/allreduce-rab1.c index ae72f7491a..0a04e54b38 100644 --- a/src/smpi/colls/allreduce-rab1.c +++ b/src/smpi/colls/allreduce-rab1.c @@ -1,3 +1,9 @@ +/* Copyright (c) 2013-2014. The SimGrid Team. + * All rights reserved. */ + +/* This program is free software; you can redistribute it and/or modify it + * under the terms of the license (GNU LGPL) which comes with this package. */ + #include "colls_private.h" //#include @@ -8,7 +14,7 @@ int smpi_coll_tuned_allreduce_rab1(void *sbuff, void *rbuff, { MPI_Status status; MPI_Aint extent; - int tag = 4321, rank, nprocs, send_size, newcnt, share; + int tag = COLL_TAG_ALLREDUCE, rank, nprocs, send_size, newcnt, share; int pof2 = 1, mask, send_idx, recv_idx, dst, send_cnt, recv_cnt; void *recv, *tmp_buf; @@ -16,6 +22,9 @@ int smpi_coll_tuned_allreduce_rab1(void *sbuff, void *rbuff, rank = smpi_comm_rank(comm); nprocs = smpi_comm_size(comm); + if((nprocs&(nprocs-1))) + THROWF(arg_error,0, "allreduce rab1 algorithm can't be used with non power of two number of processes ! "); + extent = smpi_datatype_get_extent(dtype); pof2 = 1; @@ -23,7 +32,6 @@ int smpi_coll_tuned_allreduce_rab1(void *sbuff, void *rbuff, pof2 <<= 1; pof2 >>= 1; - mask = 1; send_idx = recv_idx = 0; // uneven count @@ -31,8 +39,8 @@ int smpi_coll_tuned_allreduce_rab1(void *sbuff, void *rbuff, send_size = (count + nprocs) / nprocs; newcnt = send_size * nprocs; - recv = (void *) xbt_malloc(extent * newcnt); - tmp_buf = (void *) xbt_malloc(extent * newcnt); + recv = (void *) smpi_get_tmp_recvbuffer(extent * newcnt); + tmp_buf = (void *) smpi_get_tmp_sendbuffer(extent * newcnt); memcpy(recv, sbuff, extent * count); @@ -62,13 +70,13 @@ int smpi_coll_tuned_allreduce_rab1(void *sbuff, void *rbuff, mpi_coll_allgather_fun(tmp_buf, recv_cnt, dtype, recv, recv_cnt, dtype, comm); memcpy(rbuff, recv, count * extent); - free(recv); - free(tmp_buf); + smpi_free_tmp_buffer(recv); + smpi_free_tmp_buffer(tmp_buf); } else { - tmp_buf = (void *) xbt_malloc(extent * count); + tmp_buf = (void *) smpi_get_tmp_sendbuffer(extent * count); memcpy(rbuff, sbuff, count * extent); mask = pof2 / 2; share = count / pof2; @@ -94,7 +102,7 @@ int smpi_coll_tuned_allreduce_rab1(void *sbuff, void *rbuff, memcpy(tmp_buf, (char *) rbuff + recv_idx * extent, recv_cnt * extent); mpi_coll_allgather_fun(tmp_buf, recv_cnt, dtype, rbuff, recv_cnt, dtype, comm); - free(tmp_buf); + smpi_free_tmp_buffer(tmp_buf); } return MPI_SUCCESS;