From 9c9f17b4ac25fe6fcf026724a7d3cc4978b775ed Mon Sep 17 00:00:00 2001 From: degomme Date: Tue, 21 Mar 2017 13:28:13 +0100 Subject: [PATCH] Fix stupid tag collision bug. RMA tag is used to order consecutive MPI_Accumulate calls. The 454th message's tag was causing a collision with the BARRIER tag, causing the wrong message to be used. So switch to a decrement instead, and start from a value below all of the ones we use (users can't use <0 values, it's only for internals) --- src/smpi/private.h | 3 ++- src/smpi/smpi_global.cpp | 1 + src/smpi/smpi_op.cpp | 1 + src/smpi/smpi_request.cpp | 2 +- src/smpi/smpi_win.cpp | 7 ++++--- 5 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/smpi/private.h b/src/smpi/private.h index 8d23af3f4d..2ddc806da3 100644 --- a/src/smpi/private.h +++ b/src/smpi/private.h @@ -62,7 +62,8 @@ enum smpi_process_state{ #define COLL_TAG_GATHERV -2223 #define COLL_TAG_BCAST -3334 #define COLL_TAG_ALLREDUCE -4445 -#define SMPI_RMA_TAG -1234 +//SMPI_RMA_TAG has to be the smallest one, as it will be decremented for accumulate ordering. +#define SMPI_RMA_TAG -6666 extern XBT_PRIVATE MPI_Comm MPI_COMM_UNINITIALIZED; diff --git a/src/smpi/smpi_global.cpp b/src/smpi/smpi_global.cpp index 8459d4e9ee..db497bfdbf 100644 --- a/src/smpi/smpi_global.cpp +++ b/src/smpi/smpi_global.cpp @@ -127,6 +127,7 @@ void smpi_comm_copy_buffer_callback(smx_activity_t synchro, void *buff, size_t b (static_cast((static_cast(comm->dst_proc->data)->data))->index())); } + XBT_DEBUG("Copying %zu bytes from %p to %p", buff_size, tmpbuff,comm->dst_buff); memcpy(comm->dst_buff, tmpbuff, buff_size); if (comm->detached) { // if this is a detached send, the source buffer was duplicated by SMPI diff --git a/src/smpi/smpi_op.cpp b/src/smpi/smpi_op.cpp index 22ac499b53..96dd94142a 100644 --- a/src/smpi/smpi_op.cpp +++ b/src/smpi/smpi_op.cpp @@ -241,6 +241,7 @@ void Op::apply(void *invec, void *inoutvec, int *len, MPI_Datatype datatype) if(! is_fortran_op_) this->func_(invec, inoutvec, len, &datatype); else{ + XBT_DEBUG("Applying operation of length %d from %p and from/to %p", *len, invec, inoutvec); int tmp = datatype->c2f(); /* Unfortunately, the C and Fortran version of the MPI standard do not agree on the type here, thus the reinterpret_cast. */ diff --git a/src/smpi/smpi_request.cpp b/src/smpi/smpi_request.cpp index 397a8cc640..aa1bf22d07 100644 --- a/src/smpi/smpi_request.cpp +++ b/src/smpi/smpi_request.cpp @@ -876,7 +876,7 @@ int Request::waitany(int count, MPI_Request requests[], MPI_Status * status) static int sort_accumulates(MPI_Request a, MPI_Request b) { - return (a->tag() < b->tag()); + return (a->tag() > b->tag()); } int Request::waitall(int count, MPI_Request requests[], MPI_Status status[]) diff --git a/src/smpi/smpi_win.cpp b/src/smpi/smpi_win.cpp index b522dcf06b..5f819980b1 100644 --- a/src/smpi/smpi_win.cpp +++ b/src/smpi/smpi_win.cpp @@ -273,14 +273,15 @@ int Win::accumulate( void *origin_addr, int origin_count, MPI_Datatype origin_da void* recv_addr = static_cast(static_cast(recv_win->base_) + target_disp * recv_win->disp_unit_); XBT_DEBUG("Entering MPI_Accumulate to %d", target_rank); - //As the tag will be used for ordering of the operations, add count to it + //As the tag will be used for ordering of the operations, substract count from it (to avoid collisions with other SMPI tags, SMPI_RMA_TAG is set below all the other ones we use ) //prepare send_request + MPI_Request sreq = Request::rma_send_init(origin_addr, origin_count, origin_datatype, - smpi_process()->index(), comm_->group()->index(target_rank), SMPI_RMA_TAG+3+count_, comm_, op); + smpi_process()->index(), comm_->group()->index(target_rank), SMPI_RMA_TAG-3-count_, comm_, op); //prepare receiver request MPI_Request rreq = Request::rma_recv_init(recv_addr, target_count, target_datatype, - smpi_process()->index(), comm_->group()->index(target_rank), SMPI_RMA_TAG+3+count_, recv_win->comm_, op); + smpi_process()->index(), comm_->group()->index(target_rank), SMPI_RMA_TAG-3-count_, recv_win->comm_, op); count_++; //push request to receiver's win -- 2.20.1