X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/a2f1b23687f04169144f4ffb4f20dc4fc5c28395..cb05f84d25f0add3d49c67083991f650bc6c28b1:/src/smpi/colls/allreduce-rab1.c diff --git a/src/smpi/colls/allreduce-rab1.c b/src/smpi/colls/allreduce-rab1.c index 0dc64c15f5..66862da91b 100644 --- a/src/smpi/colls/allreduce-rab1.c +++ b/src/smpi/colls/allreduce-rab1.c @@ -1,4 +1,4 @@ -#include "colls.h" +#include "colls_private.h" //#include // NP pow of 2 for now @@ -8,15 +8,15 @@ int smpi_coll_tuned_allreduce_rab1(void *sbuff, void *rbuff, { MPI_Status status; MPI_Aint extent; - int tag = 4321, rank, nprocs, send_size, newcnt, share; + int tag = COLL_TAG_ALLREDUCE, rank, nprocs, send_size, newcnt, share; int pof2 = 1, mask, send_idx, recv_idx, dst, send_cnt, recv_cnt; void *recv, *tmp_buf; - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &nprocs); + rank = smpi_comm_rank(comm); + nprocs = smpi_comm_size(comm); - MPI_Type_extent(dtype, &extent); + extent = smpi_datatype_get_extent(dtype); pof2 = 1; while (pof2 <= nprocs) @@ -31,8 +31,8 @@ int smpi_coll_tuned_allreduce_rab1(void *sbuff, void *rbuff, send_size = (count + nprocs) / nprocs; newcnt = send_size * nprocs; - recv = (void *) malloc(extent * newcnt); - tmp_buf = (void *) malloc(extent * newcnt); + recv = (void *) xbt_malloc(extent * newcnt); + tmp_buf = (void *) xbt_malloc(extent * newcnt); memcpy(recv, sbuff, extent * count); @@ -47,10 +47,10 @@ int smpi_coll_tuned_allreduce_rab1(void *sbuff, void *rbuff, else recv_idx = send_idx + (mask * share); - MPI_Sendrecv((char *) recv + send_idx * extent, send_cnt, dtype, dst, tag, + smpi_mpi_sendrecv((char *) recv + send_idx * extent, send_cnt, dtype, dst, tag, tmp_buf, recv_cnt, dtype, dst, tag, comm, &status); - star_reduction(op, tmp_buf, (char *) recv + recv_idx * extent, &recv_cnt, + smpi_op_apply(op, tmp_buf, (char *) recv + recv_idx * extent, &recv_cnt, &dtype); // update send_idx for next iteration @@ -59,7 +59,7 @@ int smpi_coll_tuned_allreduce_rab1(void *sbuff, void *rbuff, } memcpy(tmp_buf, (char *) recv + recv_idx * extent, recv_cnt * extent); - MPI_Allgather(tmp_buf, recv_cnt, dtype, recv, recv_cnt, dtype, comm); + mpi_coll_allgather_fun(tmp_buf, recv_cnt, dtype, recv, recv_cnt, dtype, comm); memcpy(rbuff, recv, count * extent); free(recv); @@ -68,7 +68,7 @@ int smpi_coll_tuned_allreduce_rab1(void *sbuff, void *rbuff, } else { - tmp_buf = (void *) malloc(extent * count); + tmp_buf = (void *) xbt_malloc(extent * count); memcpy(rbuff, sbuff, count * extent); mask = pof2 / 2; share = count / pof2; @@ -81,10 +81,10 @@ int smpi_coll_tuned_allreduce_rab1(void *sbuff, void *rbuff, else recv_idx = send_idx + (mask * share); - MPI_Sendrecv((char *) rbuff + send_idx * extent, send_cnt, dtype, dst, + smpi_mpi_sendrecv((char *) rbuff + send_idx * extent, send_cnt, dtype, dst, tag, tmp_buf, recv_cnt, dtype, dst, tag, comm, &status); - star_reduction(op, tmp_buf, (char *) rbuff + recv_idx * extent, &recv_cnt, + smpi_op_apply(op, tmp_buf, (char *) rbuff + recv_idx * extent, &recv_cnt, &dtype); // update send_idx for next iteration @@ -93,9 +93,9 @@ int smpi_coll_tuned_allreduce_rab1(void *sbuff, void *rbuff, } memcpy(tmp_buf, (char *) rbuff + recv_idx * extent, recv_cnt * extent); - MPI_Allgather(tmp_buf, recv_cnt, dtype, rbuff, recv_cnt, dtype, comm); + mpi_coll_allgather_fun(tmp_buf, recv_cnt, dtype, rbuff, recv_cnt, dtype, comm); free(tmp_buf); } - return 0; + return MPI_SUCCESS; }