From 393f9552714504181bfb1e16633ac2bf06c92c09 Mon Sep 17 00:00:00 2001 From: degomme Date: Wed, 20 Apr 2016 23:45:18 +0200 Subject: [PATCH] Attempt to improve lifecycle of MPI_Comm and MPI_Group. Maybe this will remove some leaks. Maybe everything will crash and burn. --- src/smpi/smpi_comm.cpp | 18 ++++++++++-------- src/smpi/smpi_global.cpp | 8 ++++---- src/smpi/smpi_group.cpp | 9 +++++---- src/smpi/smpi_pmpi.cpp | 6 ++++-- src/smpi/smpi_topo.cpp | 2 +- 5 files changed, 24 insertions(+), 19 deletions(-) diff --git a/src/smpi/smpi_comm.cpp b/src/smpi/smpi_comm.cpp index a4f04683bb..3143742772 100644 --- a/src/smpi/smpi_comm.cpp +++ b/src/smpi/smpi_comm.cpp @@ -63,7 +63,6 @@ MPI_Comm smpi_comm_new(MPI_Group group, MPI_Topology topo) comm = xbt_new(s_smpi_mpi_communicator_t, 1); comm->group = group; - smpi_group_use(comm->group); comm->refcount=1; comm->topoType = MPI_INVALID_TOPO; comm->topo = topo; @@ -81,7 +80,6 @@ void smpi_comm_destroy(MPI_Comm comm) { if (comm == MPI_COMM_UNINITIALIZED) comm = smpi_process_comm_world(); - smpi_group_unuse(comm->group); smpi_topo_destroy(comm->topo); // there's no use count on topos smpi_comm_unuse(comm); } @@ -90,7 +88,8 @@ int smpi_comm_dup(MPI_Comm comm, MPI_Comm* newcomm){ if(smpi_privatize_global_variables){ //we need to switch as the called function may silently touch global variables smpi_switch_data_segment(smpi_process_index()); } - (*newcomm) = smpi_comm_new(smpi_comm_group(comm), smpi_comm_topo(comm)); + MPI_Group cp=smpi_group_copy(smpi_comm_group(comm)); + (*newcomm) = smpi_comm_new(cp, smpi_comm_topo(comm)); int ret = MPI_SUCCESS; //todo: faire en sorte que ça fonctionne avec un communicator dupliqué (refaire un init_smp ?) @@ -231,6 +230,7 @@ MPI_Comm smpi_comm_split(MPI_Comm comm, int color, int key) int* recvbuf; int* rankmap; MPI_Group group, group_root, group_out; + MPI_Group* group_snd; MPI_Request* requests; group_root = group_out = NULL; @@ -250,6 +250,7 @@ MPI_Comm smpi_comm_split(MPI_Comm comm, int color, int key) xbt_free(sendbuf); /* Do the actual job */ if(rank == 0) { + group_snd = xbt_new(MPI_Group, size); rankmap = xbt_new(int, 2 * size); for(i = 0; i < size; i++) { if(recvbuf[2 * i] == MPI_UNDEFINED) { @@ -275,7 +276,6 @@ MPI_Comm smpi_comm_split(MPI_Comm comm, int color, int key) group_root = group_out; /* Save root's group */ } for(j = 0; j < count; j++) { - //increment refcounter in order to avoid freeing the group too quick before copy index = smpi_group_index(group, rankmap[2 * j]); smpi_group_set_mapping(group_out, index, j); } @@ -283,7 +283,8 @@ MPI_Comm smpi_comm_split(MPI_Comm comm, int color, int key) reqs = 0; for(j = 0; j < count; j++) { if(rankmap[2 * j] != 0) { - requests[reqs] = smpi_isend_init(&group_out, 1, MPI_PTR, rankmap[2 * j], system_tag, comm); + group_snd[reqs]=smpi_group_copy(group_out); + requests[reqs] = smpi_isend_init(&(group_snd[reqs]), 1, MPI_PTR, rankmap[2 * j], system_tag, comm); reqs++; } } @@ -292,13 +293,12 @@ MPI_Comm smpi_comm_split(MPI_Comm comm, int color, int key) xbt_free(requests); } xbt_free(recvbuf); + xbt_free(rankmap); + xbt_free(group_snd); group_out = group_root; /* exit with root's group */ } else { if(color != MPI_UNDEFINED) { smpi_mpi_recv(&group_out, 1, MPI_PTR, 0, system_tag, comm, MPI_STATUS_IGNORE); - if(group_out){ - group_out=smpi_group_copy(group_out); - } } /* otherwise, exit with group_out == NULL */ } return group_out ? smpi_comm_new(group_out, NULL) : MPI_COMM_NULL; @@ -307,6 +307,7 @@ MPI_Comm smpi_comm_split(MPI_Comm comm, int color, int key) void smpi_comm_use(MPI_Comm comm){ if (comm == MPI_COMM_UNINITIALIZED) comm = smpi_process_comm_world(); + smpi_group_use(comm->group); comm->refcount++; } @@ -314,6 +315,7 @@ void smpi_comm_unuse(MPI_Comm comm){ if (comm == MPI_COMM_UNINITIALIZED) comm = smpi_process_comm_world(); comm->refcount--; + smpi_group_unuse(comm->group); if(comm->refcount==0){ if(comm->intra_comm != MPI_COMM_NULL) smpi_comm_unuse(comm->intra_comm); diff --git a/src/smpi/smpi_global.cpp b/src/smpi/smpi_global.cpp index cf08616b00..c2f843f0f9 100644 --- a/src/smpi/smpi_global.cpp +++ b/src/smpi/smpi_global.cpp @@ -448,19 +448,15 @@ void smpi_global_destroy(void) smpi_bench_destroy(); if (MPI_COMM_WORLD != MPI_COMM_UNINITIALIZED){ while (smpi_group_unuse(smpi_comm_group(MPI_COMM_WORLD)) > 0); - xbt_free(MPI_COMM_WORLD); xbt_barrier_destroy(process_data[0]->finalization_barrier); }else{ smpi_deployment_cleanup_instances(); } - MPI_COMM_WORLD = MPI_COMM_NULL; for (i = 0; i < count; i++) { if(process_data[i]->comm_self!=MPI_COMM_NULL){ - smpi_group_unuse(smpi_comm_group(process_data[i]->comm_self)); smpi_comm_destroy(process_data[i]->comm_self); } if(process_data[i]->comm_intra!=MPI_COMM_NULL){ - smpi_group_unuse(smpi_comm_group(process_data[i]->comm_intra)); smpi_comm_destroy(process_data[i]->comm_intra); } xbt_os_timer_free(process_data[i]->timer); @@ -472,6 +468,10 @@ void smpi_global_destroy(void) xbt_free(process_data); process_data = NULL; + if (MPI_COMM_WORLD != MPI_COMM_UNINITIALIZED) + xbt_free(MPI_COMM_WORLD); + MPI_COMM_WORLD = MPI_COMM_NULL; + xbt_free(index_to_process_data); if(smpi_privatize_global_variables) smpi_destroy_global_memory_segments(); diff --git a/src/smpi/smpi_group.cpp b/src/smpi/smpi_group.cpp index 247c68bfc7..ea72ca5d0e 100644 --- a/src/smpi/smpi_group.cpp +++ b/src/smpi/smpi_group.cpp @@ -49,8 +49,8 @@ MPI_Group smpi_group_copy(MPI_Group origin) xbt_dict_cursor_t cursor = NULL; int i; - if(origin!= smpi_comm_group(MPI_COMM_WORLD) && origin != MPI_GROUP_NULL - && origin != smpi_comm_group(MPI_COMM_SELF) && origin != MPI_GROUP_EMPTY) + if(origin != MPI_GROUP_NULL + && origin != MPI_GROUP_EMPTY) { group = xbt_new(s_smpi_mpi_group_t, 1); group->size = origin->size; @@ -62,7 +62,9 @@ MPI_Group smpi_group_copy(MPI_Group origin) } xbt_dict_foreach(origin->index_to_rank_map, cursor, key, ptr_rank) { - xbt_dict_set(group->index_to_rank_map, key, ptr_rank, NULL); + int * cp = (int*)xbt_malloc(sizeof(int)); + *cp=*(int*)ptr_rank; + xbt_dict_set(group->index_to_rank_map, key, cp, NULL); } } @@ -73,7 +75,6 @@ void smpi_group_destroy(MPI_Group group) { if(group!= smpi_comm_group(MPI_COMM_WORLD) && group != MPI_GROUP_NULL - && group != smpi_comm_group(MPI_COMM_SELF) && group != MPI_GROUP_EMPTY) smpi_group_unuse(group); } diff --git a/src/smpi/smpi_pmpi.cpp b/src/smpi/smpi_pmpi.cpp index cd4c0a2d56..00e2fce12a 100644 --- a/src/smpi/smpi_pmpi.cpp +++ b/src/smpi/smpi_pmpi.cpp @@ -718,7 +718,7 @@ int PMPI_Comm_group(MPI_Comm comm, MPI_Group * group) } else { *group = smpi_comm_group(comm); if(*group!= smpi_comm_group(MPI_COMM_WORLD) && *group != MPI_GROUP_NULL - && *group != smpi_comm_group(MPI_COMM_SELF) && *group != MPI_GROUP_EMPTY) + && *group != MPI_GROUP_EMPTY) smpi_group_use(*group); retval = MPI_SUCCESS; } @@ -757,6 +757,8 @@ int PMPI_Comm_dup(MPI_Comm comm, MPI_Comm * newcomm) retval = MPI_ERR_ARG; } else { retval = smpi_comm_dup(comm, newcomm); + if(retval==MPI_SUCCESS) + smpi_group_use(smpi_comm_group(*newcomm)); } return retval; } @@ -775,7 +777,7 @@ int PMPI_Comm_create(MPI_Comm comm, MPI_Group group, MPI_Comm * newcomm) *newcomm= MPI_COMM_NULL; retval = MPI_SUCCESS; }else{ - + smpi_group_use(group); *newcomm = smpi_comm_new(group, NULL); retval = MPI_SUCCESS; } diff --git a/src/smpi/smpi_topo.cpp b/src/smpi/smpi_topo.cpp index 2f22566b87..16b8cfa1c3 100644 --- a/src/smpi/smpi_topo.cpp +++ b/src/smpi/smpi_topo.cpp @@ -150,7 +150,7 @@ int smpi_mpi_cart_create(MPI_Comm comm_old, int ndims, int dims[], int periods[] } else { if (rank == 0) { newCart = smpi_cart_topo_create(ndims); - *comm_cart = smpi_comm_new(smpi_comm_group(MPI_COMM_SELF), newCart); + *comm_cart = smpi_comm_new(smpi_group_copy(smpi_comm_group(MPI_COMM_SELF)), newCart); } else { *comm_cart = MPI_COMM_NULL; } -- 2.20.1