-#include "colls.h"
+/* Copyright (c) 2013-2014. The SimGrid Team.
+ * All rights reserved. */
+
+/* This program is free software; you can redistribute it and/or modify it
+ * under the terms of the license (GNU LGPL) which comes with this package. */
+
+#include "colls_private.h"
/* IMPLEMENTED BY PITCH PATARASUK
Non-topoloty-specific all-reduce operation designed bandwidth optimally
smpi_coll_tuned_allreduce_lr(void *sbuf, void *rbuf, int rcount,
MPI_Datatype dtype, MPI_Op op, MPI_Comm comm)
{
- int tag = 5000;
+ int tag = COLL_TAG_ALLREDUCE;
MPI_Status status;
int rank, i, size, count;
int send_offset, recv_offset;
int remainder, remainder_flag, remainder_offset;
- MPI_Comm_rank(MPI_COMM_WORLD, &rank);
- MPI_Comm_size(MPI_COMM_WORLD, &size);
+ rank = smpi_comm_rank(comm);
+ size = smpi_comm_size(comm);
/* make it compatible with all data type */
MPI_Aint extent;
- MPI_Type_extent(dtype, &extent);
+ extent = smpi_datatype_get_extent(dtype);
/* when communication size is smaller than number of process (not support) */
if (rcount < size) {
- return MPI_Allreduce(sbuf, rbuf, rcount, dtype, op, comm);
+ XBT_WARN("MPI_allreduce_lr use default MPI_allreduce.");
+ smpi_mpi_allreduce(sbuf, rbuf, rcount, dtype, op, comm);
+ return MPI_SUCCESS;
}
/* when communication size is not divisible by number of process:
call the native implementation for the remain chunk at the end of the operation */
- else if (rcount % size != 0) {
+ if (rcount % size != 0) {
remainder = rcount % size;
remainder_flag = 1;
remainder_offset = (rcount / size) * size * extent;
} else {
- remainder_flag = remainder_offset = 0;
+ remainder = remainder_flag = remainder_offset = 0;
}
/* size of each point-to-point communication is equal to the size of the whole message
// copy partial data
send_offset = ((rank - 1 + size) % size) * count * extent;
recv_offset = ((rank - 1 + size) % size) * count * extent;
- MPI_Sendrecv((char *) sbuf + send_offset, count, dtype, rank, tag - 1,
+ smpi_mpi_sendrecv((char *) sbuf + send_offset, count, dtype, rank, tag - 1,
(char *) rbuf + recv_offset, count, dtype, rank, tag - 1, comm,
&status);
send_offset = ((rank - 1 - i + 2 * size) % size) * count * extent;
recv_offset = ((rank - 2 - i + 2 * size) % size) * count * extent;
// recv_offset = ((rank-i+2*size)%size)*count*extent;
- MPI_Sendrecv((char *) rbuf + send_offset, count, dtype, ((rank + 1) % size),
+ smpi_mpi_sendrecv((char *) rbuf + send_offset, count, dtype, ((rank + 1) % size),
tag + i, (char *) rbuf + recv_offset, count, dtype,
((rank + size - 1) % size), tag + i, comm, &status);
// compute result to rbuf+recv_offset
- star_reduction(op, (char *) sbuf + recv_offset, (char *) rbuf + recv_offset,
+ smpi_op_apply(op, (char *) sbuf + recv_offset, (char *) rbuf + recv_offset,
&count, &dtype);
}
for (i = 0; i < (size - 1); i++) {
send_offset = ((rank - i + 2 * size) % size) * count * extent;
recv_offset = ((rank - 1 - i + 2 * size) % size) * count * extent;
- MPI_Sendrecv((char *) rbuf + send_offset, count, dtype, ((rank + 1) % size),
+ smpi_mpi_sendrecv((char *) rbuf + send_offset, count, dtype, ((rank + 1) % size),
tag + i, (char *) rbuf + recv_offset, count, dtype,
((rank + size - 1) % size), tag + i, comm, &status);
}
/* when communication size is not divisible by number of process:
call the native implementation for the remain chunk at the end of the operation */
if (remainder_flag) {
- return MPI_Allreduce((char *) sbuf + remainder_offset,
+ return mpi_coll_allreduce_fun((char *) sbuf + remainder_offset,
(char *) rbuf + remainder_offset, remainder, dtype, op,
comm);
}