X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/ed0097ccbbed6f0fa5efe5fc563d7d4ea5a617b3..36fa571a13985879dc627c70ecc2340af606aa42:/src/smpi/smpi_rma.cpp diff --git a/src/smpi/smpi_rma.cpp b/src/smpi/smpi_rma.cpp index c15a1aa5d6..30278c8acc 100644 --- a/src/smpi/smpi_rma.cpp +++ b/src/smpi/smpi_rma.cpp @@ -1,4 +1,3 @@ - /* Copyright (c) 2007-2015. The SimGrid Team. * All rights reserved. */ @@ -10,10 +9,6 @@ XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_rma, smpi, "Logging specific to SMPI (RMA operations)"); -#define RMA_TAG -1234 - -xbt_bar_t creation_bar = nullptr; - typedef struct s_smpi_mpi_win{ void* base; MPI_Aint size; @@ -22,11 +17,13 @@ typedef struct s_smpi_mpi_win{ MPI_Info info; int assert; std::vector *requests; - xbt_bar_t bar; + xbt_mutex_t mut; + msg_bar_t bar; MPI_Win* connected_wins; char* name; int opened; MPI_Group group; + int count; //for ordering the accs } s_smpi_mpi_win_t; @@ -50,16 +47,17 @@ MPI_Win smpi_mpi_win_create( void *base, MPI_Aint size, int disp_unit, MPI_Info win->opened = 0; win->group = MPI_GROUP_NULL; win->requests = new std::vector(); + win->mut=xbt_mutex_init(); win->connected_wins = xbt_new0(MPI_Win, comm_size); win->connected_wins[rank] = win; - + win->count = 0; if(rank==0){ - win->bar=xbt_barrier_init(comm_size); + win->bar = MSG_barrier_init(comm_size); } mpi_coll_allgather_fun(&(win->connected_wins[rank]), sizeof(MPI_Win), MPI_BYTE, win->connected_wins, sizeof(MPI_Win), MPI_BYTE, comm); - mpi_coll_bcast_fun( &(win->bar), sizeof(xbt_bar_t), MPI_BYTE, 0, comm); + mpi_coll_bcast_fun(&(win->bar), sizeof(msg_bar_t), MPI_BYTE, 0, comm); mpi_coll_barrier_fun(comm); @@ -68,8 +66,10 @@ MPI_Win smpi_mpi_win_create( void *base, MPI_Aint size, int disp_unit, MPI_Info int smpi_mpi_win_free( MPI_Win* win){ //As per the standard, perform a barrier to ensure every async comm is finished - xbt_barrier_wait((*win)->bar); + MSG_barrier_wait((*win)->bar); + xbt_mutex_acquire((*win)->mut); delete (*win)->requests; + xbt_mutex_release((*win)->mut); xbt_free((*win)->connected_wins); if ((*win)->name != nullptr){ xbt_free((*win)->name); @@ -81,7 +81,8 @@ int smpi_mpi_win_free( MPI_Win* win){ mpi_coll_barrier_fun((*win)->comm); int rank=smpi_comm_rank((*win)->comm); if(rank == 0) - xbt_barrier_destroy((*win)->bar); + MSG_barrier_destroy((*win)->bar); + xbt_mutex_destroy((*win)->mut); xbt_free(*win); *win = MPI_WIN_NULL; return MPI_SUCCESS; @@ -100,6 +101,8 @@ void smpi_mpi_win_get_name(MPI_Win win, char* name, int* length){ void smpi_mpi_win_get_group(MPI_Win win, MPI_Group* group){ if(win->comm != MPI_COMM_NULL){ *group = smpi_comm_group(win->comm); + } else { + *group = MPI_GROUP_NULL; } } @@ -107,28 +110,39 @@ void smpi_mpi_win_set_name(MPI_Win win, char* name){ win->name = xbt_strdup(name); } -int smpi_mpi_win_fence( int assert, MPI_Win win){ +int smpi_mpi_win_fence(int assert, MPI_Win win) +{ XBT_DEBUG("Entering fence"); - if(win->opened==0) + if (win->opened == 0) win->opened=1; - if(assert != MPI_MODE_NOPRECEDE){ - xbt_barrier_wait(win->bar); - + if (assert != MPI_MODE_NOPRECEDE) { + // This is not the first fence => finalize what came before + MSG_barrier_wait(win->bar); + xbt_mutex_acquire(win->mut); + // This (simulated) mutex ensures that no process pushes to the vector of requests during the waitall. + // Without this, the vector could get redimensionned when another process pushes. + // This would result in the array used by smpi_mpi_waitall() to be invalidated. + // Another solution would be to copy the data and cleanup the vector *before* smpi_mpi_waitall std::vector *reqs = win->requests; int size = static_cast(reqs->size()); // start all requests that have been prepared by another process - for(auto req: *reqs){ - if (req->flags & PREPARED) - smpi_mpi_start(req); - } + if (size > 0) { + for (const auto& req : *reqs) { + if (req && (req->flags & PREPARED)) + smpi_mpi_start(req); + } + + MPI_Request* treqs = &(*reqs)[0]; - MPI_Request* treqs = &(*reqs)[0]; - smpi_mpi_waitall(size,treqs,MPI_STATUSES_IGNORE); + smpi_mpi_waitall(size, treqs, MPI_STATUSES_IGNORE); + } + win->count=0; + xbt_mutex_release(win->mut); } win->assert = assert; - xbt_barrier_wait(win->bar); - XBT_DEBUG("Leaving fence "); + MSG_barrier_wait(win->bar); + XBT_DEBUG("Leaving fence"); return MPI_SUCCESS; } @@ -147,20 +161,23 @@ int smpi_mpi_put( void *origin_addr, int origin_count, MPI_Datatype origin_datat if(target_rank != smpi_comm_rank(win->comm)){ //prepare send_request MPI_Request sreq = smpi_rma_send_init(origin_addr, origin_count, origin_datatype, smpi_process_index(), - smpi_group_index(smpi_comm_group(win->comm),target_rank), RMA_TAG+1, win->comm, MPI_OP_NULL); + smpi_group_index(smpi_comm_group(win->comm),target_rank), SMPI_RMA_TAG+1, win->comm, MPI_OP_NULL); //prepare receiver request MPI_Request rreq = smpi_rma_recv_init(recv_addr, target_count, target_datatype, smpi_process_index(), - smpi_group_index(smpi_comm_group(win->comm),target_rank), RMA_TAG+1, recv_win->comm, MPI_OP_NULL); + smpi_group_index(smpi_comm_group(win->comm),target_rank), SMPI_RMA_TAG+1, recv_win->comm, MPI_OP_NULL); //push request to receiver's win + xbt_mutex_acquire(recv_win->mut); recv_win->requests->push_back(rreq); - + xbt_mutex_release(recv_win->mut); //start send smpi_mpi_start(sreq); //push request to sender's win + xbt_mutex_acquire(win->mut); win->requests->push_back(sreq); + xbt_mutex_release(win->mut); }else{ smpi_datatype_copy(origin_addr, origin_count, origin_datatype, recv_addr, target_count, target_datatype); } @@ -182,25 +199,27 @@ int smpi_mpi_get( void *origin_addr, int origin_count, MPI_Datatype origin_datat if(target_rank != smpi_comm_rank(win->comm)){ //prepare send_request MPI_Request sreq = smpi_rma_send_init(send_addr, target_count, target_datatype, - smpi_group_index(smpi_comm_group(win->comm),target_rank), smpi_process_index(), RMA_TAG+2, send_win->comm, + smpi_group_index(smpi_comm_group(win->comm),target_rank), smpi_process_index(), SMPI_RMA_TAG+2, send_win->comm, MPI_OP_NULL); //prepare receiver request MPI_Request rreq = smpi_rma_recv_init(origin_addr, origin_count, origin_datatype, - smpi_group_index(smpi_comm_group(win->comm),target_rank), smpi_process_index(), RMA_TAG+2, win->comm, + smpi_group_index(smpi_comm_group(win->comm),target_rank), smpi_process_index(), SMPI_RMA_TAG+2, win->comm, MPI_OP_NULL); //start the send, with another process than us as sender. smpi_mpi_start(sreq); - //push request to receiver's win + xbt_mutex_acquire(send_win->mut); send_win->requests->push_back(sreq); + xbt_mutex_release(send_win->mut); //start recv smpi_mpi_start(rreq); - //push request to sender's win + xbt_mutex_acquire(win->mut); win->requests->push_back(rreq); + xbt_mutex_release(win->mut); }else{ smpi_datatype_copy(send_addr, target_count, target_datatype, origin_addr, origin_count, origin_datatype); } @@ -220,21 +239,27 @@ int smpi_mpi_accumulate( void *origin_addr, int origin_count, MPI_Datatype origi void* recv_addr = static_cast(static_cast(recv_win->base) + target_disp * recv_win->disp_unit); XBT_DEBUG("Entering MPI_Accumulate to %d", target_rank); - + //As the tag will be used for ordering of the operations, add count to it //prepare send_request MPI_Request sreq = smpi_rma_send_init(origin_addr, origin_count, origin_datatype, - smpi_process_index(), smpi_group_index(smpi_comm_group(win->comm),target_rank), RMA_TAG+3, win->comm, op); + smpi_process_index(), smpi_group_index(smpi_comm_group(win->comm),target_rank), SMPI_RMA_TAG+3+win->count, win->comm, op); //prepare receiver request MPI_Request rreq = smpi_rma_recv_init(recv_addr, target_count, target_datatype, - smpi_process_index(), smpi_group_index(smpi_comm_group(win->comm),target_rank), RMA_TAG+3, recv_win->comm, op); + smpi_process_index(), smpi_group_index(smpi_comm_group(win->comm),target_rank), SMPI_RMA_TAG+3+win->count, recv_win->comm, op); + + win->count++; //push request to receiver's win + xbt_mutex_acquire(recv_win->mut); recv_win->requests->push_back(rreq); + xbt_mutex_release(recv_win->mut); //start send smpi_mpi_start(sreq); //push request to sender's win + xbt_mutex_acquire(win->mut); win->requests->push_back(sreq); + xbt_mutex_release(win->mut); return MPI_SUCCESS; } @@ -259,8 +284,8 @@ int smpi_mpi_win_start(MPI_Group group, int assert, MPI_Win win){ while(j!=size){ int src=smpi_group_index(group,j); - if(src!=smpi_process_index()){ - reqs[i]=smpi_irecv_init(nullptr, 0, MPI_CHAR, src,RMA_TAG+4, MPI_COMM_WORLD); + if(src!=smpi_process_index()&& src!=MPI_UNDEFINED){ + reqs[i]=smpi_irecv_init(nullptr, 0, MPI_CHAR, src,SMPI_RMA_TAG+4, MPI_COMM_WORLD); i++; } j++; @@ -286,8 +311,8 @@ int smpi_mpi_win_post(MPI_Group group, int assert, MPI_Win win){ while(j!=size){ int dst=smpi_group_index(group,j); - if(dst!=smpi_process_index()){ - reqs[i]=smpi_mpi_send_init(nullptr, 0, MPI_CHAR, dst, RMA_TAG+4, MPI_COMM_WORLD); + if(dst!=smpi_process_index() && dst!=MPI_UNDEFINED){ + reqs[i]=smpi_mpi_send_init(nullptr, 0, MPI_CHAR, dst, SMPI_RMA_TAG+4, MPI_COMM_WORLD); i++; } j++; @@ -317,8 +342,8 @@ int smpi_mpi_win_complete(MPI_Win win){ while(j!=size){ int dst=smpi_group_index(win->group,j); - if(dst!=smpi_process_index()){ - reqs[i]=smpi_mpi_send_init(nullptr, 0, MPI_CHAR, dst, RMA_TAG+5, MPI_COMM_WORLD); + if(dst!=smpi_process_index() && dst!=MPI_UNDEFINED){ + reqs[i]=smpi_mpi_send_init(nullptr, 0, MPI_CHAR, dst, SMPI_RMA_TAG+5, MPI_COMM_WORLD); i++; } j++; @@ -334,20 +359,24 @@ int smpi_mpi_win_complete(MPI_Win win){ xbt_free(reqs); //now we can finish RMA calls - + xbt_mutex_acquire(win->mut); std::vector *reqqs = win->requests; size = static_cast(reqqs->size()); XBT_DEBUG("Win_complete - Finishing %d RMA calls", size); - // start all requests that have been prepared by another process - for (auto req: *reqqs){ - if (req->flags & PREPARED) - smpi_mpi_start(req); + if (size > 0) { + // start all requests that have been prepared by another process + for (const auto& req : *reqqs) { + if (req && (req->flags & PREPARED)) + smpi_mpi_start(req); + } + + MPI_Request* treqs = &(*reqqs)[0]; + smpi_mpi_waitall(size, treqs, MPI_STATUSES_IGNORE); + reqqs->clear(); } + xbt_mutex_release(win->mut); - MPI_Request* treqs = &(*reqqs)[0]; - smpi_mpi_waitall(size,treqs,MPI_STATUSES_IGNORE); - delete reqqs; smpi_group_unuse(win->group); win->opened--; //we're closed for business ! return MPI_SUCCESS; @@ -362,8 +391,8 @@ int smpi_mpi_win_wait(MPI_Win win){ while(j!=size){ int src=smpi_group_index(win->group,j); - if(src!=smpi_process_index()){ - reqs[i]=smpi_irecv_init(nullptr, 0, MPI_CHAR, src,RMA_TAG+5, MPI_COMM_WORLD); + if(src!=smpi_process_index() && src!=MPI_UNDEFINED){ + reqs[i]=smpi_irecv_init(nullptr, 0, MPI_CHAR, src,SMPI_RMA_TAG+5, MPI_COMM_WORLD); i++; } j++; @@ -376,21 +405,24 @@ int smpi_mpi_win_wait(MPI_Win win){ smpi_mpi_request_free(&reqs[i]); } xbt_free(reqs); - + xbt_mutex_acquire(win->mut); std::vector *reqqs = win->requests; size = static_cast(reqqs->size()); - XBT_DEBUG("Win_complete - Finishing %d RMA calls", size); + XBT_DEBUG("Win_wait - Finishing %d RMA calls", size); + if (size > 0) { + // start all requests that have been prepared by another process + for (const auto& req : *reqqs) { + if (req && (req->flags & PREPARED)) + smpi_mpi_start(req); + } - // start all requests that have been prepared by another process - for(auto req: *reqqs){ - if (req->flags & PREPARED) - smpi_mpi_start(req); + MPI_Request* treqs = &(*reqqs)[0]; + smpi_mpi_waitall(size, treqs, MPI_STATUSES_IGNORE); + reqqs->clear(); } + xbt_mutex_release(win->mut); - MPI_Request* treqs = &(*reqqs)[0]; - smpi_mpi_waitall(size,treqs,MPI_STATUSES_IGNORE); - delete reqqs; smpi_group_unuse(win->group); win->opened--; //we're opened for business ! return MPI_SUCCESS;