From: degomme Date: Thu, 3 Nov 2016 11:01:15 +0000 (+0100) Subject: SMPI tracing : match links with sender/receiver + tag, as in MPI, as messages could... X-Git-Tag: v3_14~240 X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/commitdiff_plain/973a7c0c1df933e59baa7a039e51c8741d837b9d SMPI tracing : match links with sender/receiver + tag, as in MPI, as messages could be mixed up --- diff --git a/src/smpi/instr_smpi.cpp b/src/smpi/instr_smpi.cpp index 73ee64b77a..0fe4dc842e 100644 --- a/src/smpi/instr_smpi.cpp +++ b/src/smpi/instr_smpi.cpp @@ -93,13 +93,13 @@ XBT_PRIVATE char *smpi_container(int rank, char *container, int n) return container; } -static char *TRACE_smpi_get_key(int src, int dst, char *key, int n, int send); +static char *TRACE_smpi_get_key(int src, int dst, int tag, char *key, int n, int send); -static char *TRACE_smpi_put_key(int src, int dst, char *key, int n, int send) +static char *TRACE_smpi_put_key(int src, int dst, int tag, char *key, int n, int send) { //get the dynar for src#dst char aux[INSTR_DEFAULT_STR_SIZE]; - snprintf(aux, INSTR_DEFAULT_STR_SIZE, "%d#%d#%d", src, dst, send); + snprintf(aux, INSTR_DEFAULT_STR_SIZE, "%d#%d#%d#%d", src, dst, tag, send); xbt_dynar_t d = static_cast(xbt_dict_get_or_null(keys, aux)); if (d == nullptr) { @@ -110,7 +110,7 @@ static char *TRACE_smpi_put_key(int src, int dst, char *key, int n, int send) //generate the key static unsigned long long counter = 0; counter++; - snprintf(key, n, "%d_%d_%llu", src, dst, counter); + snprintf(key, n, "%d_%d_%d_%llu", src, dst, tag, counter); //push it char *a = static_cast (xbt_strdup(key)); @@ -119,15 +119,15 @@ static char *TRACE_smpi_put_key(int src, int dst, char *key, int n, int send) return key; } -static char *TRACE_smpi_get_key(int src, int dst, char *key, int n, int send) +static char *TRACE_smpi_get_key(int src, int dst, int tag, char *key, int n, int send) { char aux[INSTR_DEFAULT_STR_SIZE]; - snprintf(aux, INSTR_DEFAULT_STR_SIZE, "%d#%d#%d", src, dst, send==1?0:1); + snprintf(aux, INSTR_DEFAULT_STR_SIZE, "%d#%d#%d#%d", src, dst, tag, send==1?0:1); xbt_dynar_t d = static_cast(xbt_dict_get_or_null(keys, aux)); // first posted if(xbt_dynar_is_empty(d)){ - TRACE_smpi_put_key(src, dst, key, n, send); + TRACE_smpi_put_key(src, dst, tag, key, n, send); return key; } @@ -402,34 +402,34 @@ void TRACE_smpi_ptp_out(int rank, int src, int dst, const char *operation) new_pajePopState (SIMIX_get_clock(), container, type); } -void TRACE_smpi_send(int rank, int src, int dst, int size) +void TRACE_smpi_send(int rank, int src, int dst, int tag, int size) { if (!TRACE_smpi_is_enabled()) return; char key[INSTR_DEFAULT_STR_SIZE] = {0}; - TRACE_smpi_get_key(src, dst, key, INSTR_DEFAULT_STR_SIZE,1); + TRACE_smpi_get_key(src, dst, tag, key, INSTR_DEFAULT_STR_SIZE,1); char str[INSTR_DEFAULT_STR_SIZE]; smpi_container(src, str, INSTR_DEFAULT_STR_SIZE); container_t container = PJ_container_get (str); type_t type = PJ_type_get ("MPI_LINK", PJ_type_get_root()); - XBT_DEBUG("Send tracing from %d to %d, with key %s", src, dst, key); + XBT_DEBUG("Send tracing from %d to %d, tag %d, with key %s", src, dst, tag, key); new_pajeStartLinkWithSize (SIMIX_get_clock(), PJ_container_get_root(), type, container, "PTP", key, size); } -void TRACE_smpi_recv(int rank, int src, int dst) +void TRACE_smpi_recv(int rank, int src, int dst, int tag) { if (!TRACE_smpi_is_enabled()) return; char key[INSTR_DEFAULT_STR_SIZE] = {0}; - TRACE_smpi_get_key(src, dst, key, INSTR_DEFAULT_STR_SIZE,0); + TRACE_smpi_get_key(src, dst, tag, key, INSTR_DEFAULT_STR_SIZE,0); char str[INSTR_DEFAULT_STR_SIZE]; smpi_container(dst, str, INSTR_DEFAULT_STR_SIZE); container_t container = PJ_container_get (str); type_t type = PJ_type_get ("MPI_LINK", PJ_type_get_root()); - XBT_DEBUG("Recv tracing from %d to %d, with key %s", src, dst, key); + XBT_DEBUG("Recv tracing from %d to %d, tag %d, with key %s", src, dst, tag, key); new_pajeEndLink (SIMIX_get_clock(), PJ_container_get_root(), type, container, "PTP", key); } diff --git a/src/smpi/private.h b/src/smpi/private.h index 5ad242b8c2..ac7ec2e671 100644 --- a/src/smpi/private.h +++ b/src/smpi/private.h @@ -77,6 +77,7 @@ typedef struct s_smpi_mpi_datatype{ #define COLL_TAG_GATHERV -2223 #define COLL_TAG_BCAST -3334 #define COLL_TAG_ALLREDUCE -4445 +#define SMPI_RMA_TAG -1234 #define MPI_COMM_UNINITIALIZED ((MPI_Comm)-1) @@ -703,8 +704,8 @@ XBT_PRIVATE void TRACE_smpi_alloc(); XBT_PRIVATE void TRACE_smpi_release(); XBT_PRIVATE void TRACE_smpi_ptp_in(int rank, int src, int dst, const char *operation, instr_extra_data extra); XBT_PRIVATE void TRACE_smpi_ptp_out(int rank, int src, int dst, const char *operation); -XBT_PRIVATE void TRACE_smpi_send(int rank, int src, int dst, int size); -XBT_PRIVATE void TRACE_smpi_recv(int rank, int src, int dst); +XBT_PRIVATE void TRACE_smpi_send(int rank, int src, int dst, int tag, int size); +XBT_PRIVATE void TRACE_smpi_recv(int rank, int src, int dst, int tag); XBT_PRIVATE void TRACE_smpi_init(int rank); XBT_PRIVATE void TRACE_smpi_finalize(int rank); XBT_PRIVATE char *smpi_container(int rank, char *container, int n); diff --git a/src/smpi/smpi_base.cpp b/src/smpi/smpi_base.cpp index 4960bd4089..453d854592 100644 --- a/src/smpi/smpi_base.cpp +++ b/src/smpi/smpi_base.cpp @@ -343,7 +343,7 @@ void smpi_mpi_start(MPI_Request request) int rank = request->src; if (TRACE_smpi_view_internals()) { - TRACE_smpi_send(rank, rank, receiver,request->size); + TRACE_smpi_send(rank, rank, receiver, request->tag, request->size); } print_request("New send", request); @@ -645,7 +645,7 @@ static void finish_wait(MPI_Request * request, MPI_Status * status) if (TRACE_smpi_view_internals() && ((req->flags & RECV) != 0)){ int rank = smpi_process_index(); int src_traced = (req->src == MPI_ANY_SOURCE ? req->real_src : req->src); - TRACE_smpi_recv(rank, src_traced, rank); + TRACE_smpi_recv(rank, src_traced, rank,req->tag); } if(req->detached_sender != nullptr){ diff --git a/src/smpi/smpi_pmpi.cpp b/src/smpi/smpi_pmpi.cpp index 704aaad589..5c614b9aa6 100644 --- a/src/smpi/smpi_pmpi.cpp +++ b/src/smpi/smpi_pmpi.cpp @@ -1047,7 +1047,7 @@ int PMPI_Isend(void *buf, int count, MPI_Datatype datatype, int dst, int tag, MP dt_size_send = smpi_datatype_size(datatype); extra->send_size = count*dt_size_send; TRACE_smpi_ptp_in(rank, rank, dst_traced, __FUNCTION__, extra); - TRACE_smpi_send(rank, rank, dst_traced, count*smpi_datatype_size(datatype)); + TRACE_smpi_send(rank, rank, dst_traced, tag, count*smpi_datatype_size(datatype)); *request = smpi_mpi_isend(buf, count, datatype, dst, tag, comm); retval = MPI_SUCCESS; @@ -1097,7 +1097,7 @@ int PMPI_Issend(void* buf, int count, MPI_Datatype datatype, int dst, int tag, M dt_size_send = smpi_datatype_size(datatype); extra->send_size = count*dt_size_send; TRACE_smpi_ptp_in(rank, rank, dst_traced, __FUNCTION__, extra); - TRACE_smpi_send(rank, rank, dst_traced, count*smpi_datatype_size(datatype)); + TRACE_smpi_send(rank, rank, dst_traced, tag, count*smpi_datatype_size(datatype)); *request = smpi_mpi_issend(buf, count, datatype, dst, tag, comm); retval = MPI_SUCCESS; @@ -1153,7 +1153,7 @@ int PMPI_Recv(void *buf, int count, MPI_Datatype datatype, int src, int tag, MPI if(status!=MPI_STATUS_IGNORE){ src_traced = smpi_group_index(smpi_comm_group(comm), status->MPI_SOURCE); if (!TRACE_smpi_view_internals()) { - TRACE_smpi_recv(rank, src_traced, rank); + TRACE_smpi_recv(rank, src_traced, rank, tag); } } TRACE_smpi_ptp_out(rank, src_traced, rank, __FUNCTION__); @@ -1197,7 +1197,7 @@ int PMPI_Send(void *buf, int count, MPI_Datatype datatype, int dst, int tag, MPI extra->send_size = count*dt_size_send; TRACE_smpi_ptp_in(rank, rank, dst_traced, __FUNCTION__, extra); if (!TRACE_smpi_view_internals()) { - TRACE_smpi_send(rank, rank, dst_traced,count*smpi_datatype_size(datatype)); + TRACE_smpi_send(rank, rank, dst_traced, tag,count*smpi_datatype_size(datatype)); } smpi_mpi_send(buf, count, datatype, dst, tag, comm); @@ -1242,7 +1242,7 @@ int PMPI_Ssend(void* buf, int count, MPI_Datatype datatype, int dst, int tag, MP } extra->send_size = count*dt_size_send; TRACE_smpi_ptp_in(rank, rank, dst_traced, __FUNCTION__, extra); - TRACE_smpi_send(rank, rank, dst_traced,count*smpi_datatype_size(datatype)); + TRACE_smpi_send(rank, rank, dst_traced, tag,count*smpi_datatype_size(datatype)); smpi_mpi_ssend(buf, count, datatype, dst, tag, comm); retval = MPI_SUCCESS; @@ -1300,14 +1300,14 @@ int PMPI_Sendrecv(void *sendbuf, int sendcount, MPI_Datatype sendtype, int dst, extra->recv_size = recvcount*dt_size_recv; TRACE_smpi_ptp_in(rank, src_traced, dst_traced, __FUNCTION__, extra); - TRACE_smpi_send(rank, rank, dst_traced,sendcount*smpi_datatype_size(sendtype)); + TRACE_smpi_send(rank, rank, dst_traced, sendtag,sendcount*smpi_datatype_size(sendtype)); smpi_mpi_sendrecv(sendbuf, sendcount, sendtype, dst, sendtag, recvbuf, recvcount, recvtype, src, recvtag, comm, status); retval = MPI_SUCCESS; TRACE_smpi_ptp_out(rank, src_traced, dst_traced, __FUNCTION__); - TRACE_smpi_recv(rank, src_traced, rank); + TRACE_smpi_recv(rank, src_traced, rank, recvtag); } smpi_bench_begin(); @@ -1451,6 +1451,7 @@ int PMPI_Wait(MPI_Request * request, MPI_Status * status) int src_traced = (*request)->src; int dst_traced = (*request)->dst; + int tag_traced= (*request)->tag; MPI_Comm comm = (*request)->comm; int is_wait_for_receive = (*request)->recv; instr_extra_data extra = xbt_new0(s_instr_extra_data_t,1); @@ -1467,7 +1468,7 @@ int PMPI_Wait(MPI_Request * request, MPI_Status * status) src_traced = (status!=MPI_STATUS_IGNORE) ? smpi_group_rank(smpi_comm_group(comm), status->MPI_SOURCE) : src_traced; - TRACE_smpi_recv(rank, src_traced, dst_traced); + TRACE_smpi_recv(rank, src_traced, dst_traced, tag_traced); } } @@ -1483,12 +1484,13 @@ int PMPI_Waitany(int count, MPI_Request requests[], int *index, MPI_Status * sta smpi_bench_end(); //save requests information for tracing int i; - int *srcs = nullptr, *dsts = nullptr, *recvs = nullptr; + int *srcs = nullptr, *dsts = nullptr, *recvs = nullptr, *tags = nullptr; MPI_Comm* comms = nullptr; if(count>0){ srcs = xbt_new0(int, count); dsts = xbt_new0(int, count); recvs = xbt_new0(int, count); + tags = xbt_new0(int, count); comms = xbt_new0(MPI_Comm, count); } for (i = 0; i < count; i++) { @@ -1497,6 +1499,7 @@ int PMPI_Waitany(int count, MPI_Request requests[], int *index, MPI_Status * sta srcs[i] = req->src; dsts[i] = req->dst; recvs[i] = req->recv; + tags[i] = req->tag; comms[i] = req->comm; } } @@ -1517,13 +1520,14 @@ int PMPI_Waitany(int count, MPI_Request requests[], int *index, MPI_Status * sta if(srcs[*index]==MPI_ANY_SOURCE) src_traced = (status!=MPI_STATUSES_IGNORE) ? smpi_group_rank(smpi_comm_group(comms[*index]), status->MPI_SOURCE) : srcs[*index]; - TRACE_smpi_recv(rank_traced, src_traced, dst_traced); + TRACE_smpi_recv(rank_traced, src_traced, dst_traced, tags[*index]); } TRACE_smpi_ptp_out(rank_traced, src_traced, dst_traced, __FUNCTION__); } xbt_free(srcs); xbt_free(dsts); xbt_free(recvs); + xbt_free(tags); xbt_free(comms); @@ -1539,6 +1543,7 @@ int PMPI_Waitall(int count, MPI_Request requests[], MPI_Status status[]) int *srcs = xbt_new0(int, count); int *dsts = xbt_new0(int, count); int *recvs = xbt_new0(int, count); + int *tags = xbt_new0(int, count); int *valid = xbt_new0(int, count); MPI_Comm *comms = xbt_new0(MPI_Comm, count); @@ -1548,6 +1553,7 @@ int PMPI_Waitall(int count, MPI_Request requests[], MPI_Status status[]) srcs[i] = req->src; dsts[i] = req->dst; recvs[i] = req->recv; + tags[i] = req->tag; comms[i] = req->comm; valid[i]=1;; }else{ @@ -1572,7 +1578,7 @@ int PMPI_Waitall(int count, MPI_Request requests[], MPI_Status status[]) if(src_traced==MPI_ANY_SOURCE) src_traced = (status!=MPI_STATUSES_IGNORE) ? smpi_group_rank(smpi_comm_group(comms[i]), status[i].MPI_SOURCE) : srcs[i]; - TRACE_smpi_recv(rank_traced, src_traced, dst_traced); + TRACE_smpi_recv(rank_traced, src_traced, dst_traced,tags[i]); } } } @@ -1581,6 +1587,7 @@ int PMPI_Waitall(int count, MPI_Request requests[], MPI_Status status[]) xbt_free(dsts); xbt_free(recvs); xbt_free(valid); + xbt_free(tags); xbt_free(comms); smpi_bench_begin(); @@ -2772,7 +2779,7 @@ int PMPI_Put( void *origin_addr, int origin_count, MPI_Datatype origin_datatype, smpi_mpi_win_get_group(win, &group); int dst_traced = smpi_group_index(group, target_rank); TRACE_smpi_ptp_in(rank, rank, dst_traced, __FUNCTION__, nullptr); - TRACE_smpi_send(rank, rank, dst_traced, origin_count*smpi_datatype_size(origin_datatype)); + TRACE_smpi_send(rank, rank, dst_traced, SMPI_RMA_TAG, origin_count*smpi_datatype_size(origin_datatype)); retval = smpi_mpi_put( origin_addr, origin_count, origin_datatype, target_rank, target_disp, target_count, target_datatype, win); diff --git a/src/smpi/smpi_replay.cpp b/src/smpi/smpi_replay.cpp index 4e8afaa184..7b7a5dc300 100644 --- a/src/smpi/smpi_replay.cpp +++ b/src/smpi/smpi_replay.cpp @@ -239,7 +239,7 @@ static void action_send(const char *const *action) extra->datatype1 = encode_datatype(MPI_CURRENT_TYPE, nullptr); TRACE_smpi_ptp_in(rank, rank, dst_traced, __FUNCTION__, extra); if (!TRACE_smpi_view_internals()) { - TRACE_smpi_send(rank, rank, dst_traced, size*smpi_datatype_size(MPI_CURRENT_TYPE)); + TRACE_smpi_send(rank, rank, dst_traced, 0, size*smpi_datatype_size(MPI_CURRENT_TYPE)); } smpi_mpi_send(nullptr, size, MPI_CURRENT_TYPE, to , 0, MPI_COMM_WORLD); @@ -272,7 +272,7 @@ static void action_Isend(const char *const *action) extra->datatype1 = encode_datatype(MPI_CURRENT_TYPE, nullptr); TRACE_smpi_ptp_in(rank, rank, dst_traced, __FUNCTION__, extra); if (!TRACE_smpi_view_internals()) { - TRACE_smpi_send(rank, rank, dst_traced, size*smpi_datatype_size(MPI_CURRENT_TYPE)); + TRACE_smpi_send(rank, rank, dst_traced, 0, size*smpi_datatype_size(MPI_CURRENT_TYPE)); } request = smpi_mpi_isend(nullptr, size, MPI_CURRENT_TYPE, to, 0,MPI_COMM_WORLD); @@ -318,7 +318,7 @@ static void action_recv(const char *const *action) { TRACE_smpi_ptp_out(rank, src_traced, rank, __FUNCTION__); if (!TRACE_smpi_view_internals()) { - TRACE_smpi_recv(rank, src_traced, rank); + TRACE_smpi_recv(rank, src_traced, rank, 0); } log_timed_action (action, clock); @@ -419,7 +419,7 @@ static void action_wait(const char *const *action){ TRACE_smpi_ptp_out(rank, src_traced, dst_traced, __FUNCTION__); if (is_wait_for_receive) - TRACE_smpi_recv(rank, src_traced, dst_traced); + TRACE_smpi_recv(rank, src_traced, dst_traced, 0); log_timed_action (action, clock); } @@ -451,7 +451,7 @@ static void action_waitall(const char *const *action){ for (i=0; icomm)){ //prepare send_request MPI_Request sreq = smpi_rma_send_init(origin_addr, origin_count, origin_datatype, smpi_process_index(), - smpi_group_index(smpi_comm_group(win->comm),target_rank), RMA_TAG+1, win->comm, MPI_OP_NULL); + smpi_group_index(smpi_comm_group(win->comm),target_rank), SMPI_RMA_TAG+1, win->comm, MPI_OP_NULL); //prepare receiver request MPI_Request rreq = smpi_rma_recv_init(recv_addr, target_count, target_datatype, smpi_process_index(), - smpi_group_index(smpi_comm_group(win->comm),target_rank), RMA_TAG+1, recv_win->comm, MPI_OP_NULL); + smpi_group_index(smpi_comm_group(win->comm),target_rank), SMPI_RMA_TAG+1, recv_win->comm, MPI_OP_NULL); //push request to receiver's win recv_win->requests->push_back(rreq); @@ -181,12 +179,12 @@ int smpi_mpi_get( void *origin_addr, int origin_count, MPI_Datatype origin_datat if(target_rank != smpi_comm_rank(win->comm)){ //prepare send_request MPI_Request sreq = smpi_rma_send_init(send_addr, target_count, target_datatype, - smpi_group_index(smpi_comm_group(win->comm),target_rank), smpi_process_index(), RMA_TAG+2, send_win->comm, + smpi_group_index(smpi_comm_group(win->comm),target_rank), smpi_process_index(), SMPI_RMA_TAG+2, send_win->comm, MPI_OP_NULL); //prepare receiver request MPI_Request rreq = smpi_rma_recv_init(origin_addr, origin_count, origin_datatype, - smpi_group_index(smpi_comm_group(win->comm),target_rank), smpi_process_index(), RMA_TAG+2, win->comm, + smpi_group_index(smpi_comm_group(win->comm),target_rank), smpi_process_index(), SMPI_RMA_TAG+2, win->comm, MPI_OP_NULL); //start the send, with another process than us as sender. @@ -222,11 +220,11 @@ int smpi_mpi_accumulate( void *origin_addr, int origin_count, MPI_Datatype origi //prepare send_request MPI_Request sreq = smpi_rma_send_init(origin_addr, origin_count, origin_datatype, - smpi_process_index(), smpi_group_index(smpi_comm_group(win->comm),target_rank), RMA_TAG+3, win->comm, op); + smpi_process_index(), smpi_group_index(smpi_comm_group(win->comm),target_rank), SMPI_RMA_TAG+3, win->comm, op); //prepare receiver request MPI_Request rreq = smpi_rma_recv_init(recv_addr, target_count, target_datatype, - smpi_process_index(), smpi_group_index(smpi_comm_group(win->comm),target_rank), RMA_TAG+3, recv_win->comm, op); + smpi_process_index(), smpi_group_index(smpi_comm_group(win->comm),target_rank), SMPI_RMA_TAG+3, recv_win->comm, op); //push request to receiver's win recv_win->requests->push_back(rreq); //start send @@ -259,7 +257,7 @@ int smpi_mpi_win_start(MPI_Group group, int assert, MPI_Win win){ while(j!=size){ int src=smpi_group_index(group,j); if(src!=smpi_process_index()){ - reqs[i]=smpi_irecv_init(nullptr, 0, MPI_CHAR, src,RMA_TAG+4, MPI_COMM_WORLD); + reqs[i]=smpi_irecv_init(nullptr, 0, MPI_CHAR, src,SMPI_RMA_TAG+4, MPI_COMM_WORLD); i++; } j++; @@ -286,7 +284,7 @@ int smpi_mpi_win_post(MPI_Group group, int assert, MPI_Win win){ while(j!=size){ int dst=smpi_group_index(group,j); if(dst!=smpi_process_index()){ - reqs[i]=smpi_mpi_send_init(nullptr, 0, MPI_CHAR, dst, RMA_TAG+4, MPI_COMM_WORLD); + reqs[i]=smpi_mpi_send_init(nullptr, 0, MPI_CHAR, dst, SMPI_RMA_TAG+4, MPI_COMM_WORLD); i++; } j++; @@ -317,7 +315,7 @@ int smpi_mpi_win_complete(MPI_Win win){ while(j!=size){ int dst=smpi_group_index(win->group,j); if(dst!=smpi_process_index()){ - reqs[i]=smpi_mpi_send_init(nullptr, 0, MPI_CHAR, dst, RMA_TAG+5, MPI_COMM_WORLD); + reqs[i]=smpi_mpi_send_init(nullptr, 0, MPI_CHAR, dst, SMPI_RMA_TAG+5, MPI_COMM_WORLD); i++; } j++; @@ -362,7 +360,7 @@ int smpi_mpi_win_wait(MPI_Win win){ while(j!=size){ int src=smpi_group_index(win->group,j); if(src!=smpi_process_index()){ - reqs[i]=smpi_irecv_init(nullptr, 0, MPI_CHAR, src,RMA_TAG+5, MPI_COMM_WORLD); + reqs[i]=smpi_irecv_init(nullptr, 0, MPI_CHAR, src,SMPI_RMA_TAG+5, MPI_COMM_WORLD); i++; } j++;