From: Augustin Degomme Date: Tue, 6 May 2014 13:41:53 +0000 (+0200) Subject: Use the xbt barrier to finalize smpi processes cleanly. X-Git-Tag: v3_11~94^2~13 X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/commitdiff_plain/dd51a081b7de36a60cd56540dc1002b1cf75b0e5 Use the xbt barrier to finalize smpi processes cleanly. This removes the MPI_Barrier that was used before, which might have caused problems in some cases --- diff --git a/src/smpi/private.h b/src/smpi/private.h index 2c53164f74..aec9e4b49b 100644 --- a/src/smpi/private.h +++ b/src/smpi/private.h @@ -10,6 +10,7 @@ #include "internal_config.h" #include "xbt.h" #include "xbt/xbt_os_time.h" +#include "xbt/synchro_core.h" #include "simgrid/simix.h" #include "smpi/smpi_interface.h" #include "smpi/smpi.h" @@ -143,7 +144,8 @@ double smpi_process_simulated_elapsed(void); void smpi_process_set_sampling(int s); int smpi_process_get_sampling(void); -MPI_Comm* smpi_deployment_register_process(const char* instance_id, int rank, int index); +void smpi_deployment_register_process(const char* instance_id, int rank, int index, MPI_Comm**, xbt_bar_t*); +void smpi_deployment_cleanup_instances(void); void smpi_comm_copy_buffer_callback(smx_action_t comm, void *buff, size_t buff_size); diff --git a/src/smpi/smpi_deployment.c b/src/smpi/smpi_deployment.c index 42f679f482..83714ad7b1 100644 --- a/src/smpi/smpi_deployment.c +++ b/src/smpi/smpi_deployment.c @@ -6,6 +6,7 @@ #include "private.h" #include "xbt/sysdep.h" +#include "xbt/synchro_core.h" #include "xbt/log.h" #include "xbt/dict.h" @@ -19,6 +20,7 @@ typedef struct s_smpi_mpi_instance{ int present_processes; int index; MPI_Comm comm_world; + xbt_bar_t finalization_barrier; } s_smpi_mpi_instance_t; @@ -42,6 +44,8 @@ void SMPI_app_instance_register(const char *name, xbt_main_func_t code, int num_ instance->present_processes = 0; instance->index = process_count; instance->comm_world = MPI_COMM_NULL; + instance->finalization_barrier=xbt_barrier_init(num_processes); + process_count+=num_processes; if(!smpi_instances){ @@ -54,11 +58,13 @@ void SMPI_app_instance_register(const char *name, xbt_main_func_t code, int num_ //get the index of the process in the process_data array -MPI_Comm* smpi_deployment_register_process(const char* instance_id, int rank, int index){ +void smpi_deployment_register_process(const char* instance_id, int rank, int index,MPI_Comm** comm, xbt_bar_t* bar){ if(!smpi_instances){//no instance registered, we probably used smpirun. - index_to_process_data[index]=index; - return NULL; + index_to_process_data[index]=index; + *bar = NULL; + *comm = NULL; + return; } s_smpi_mpi_instance_t* instance = xbt_dict_get_or_null(smpi_instances, instance_id); @@ -72,5 +78,19 @@ MPI_Comm* smpi_deployment_register_process(const char* instance_id, int rank, in instance->present_processes++; index_to_process_data[index]=instance->index+rank; smpi_group_set_mapping(smpi_comm_group(instance->comm_world), index, rank); - return & instance->comm_world; + *bar = instance->finalization_barrier; + *comm = &instance->comm_world; + return; +} + +void smpi_deployment_cleanup_instances(){ + xbt_dict_cursor_t cursor = NULL; + s_smpi_mpi_instance_t* instance = NULL; + char *name = NULL; + xbt_dict_foreach((xbt_dict_t) smpi_instances, cursor, name, instance) { + while (smpi_group_unuse(smpi_comm_group(instance->comm_world)) > 0); + xbt_free(instance->comm_world); + xbt_barrier_destroy(instance->finalization_barrier); + } } + diff --git a/src/smpi/smpi_global.c b/src/smpi/smpi_global.c index 1ac052ffae..fbf18b4e80 100644 --- a/src/smpi/smpi_global.c +++ b/src/smpi/smpi_global.c @@ -34,6 +34,7 @@ typedef struct s_smpi_process_data { char state; int sampling; /* inside an SMPI_SAMPLE_ block? */ char* instance_id; + xbt_bar_t finalization_barrier; } s_smpi_process_data_t; static smpi_process_data_t *process_data = NULL; @@ -84,9 +85,12 @@ void smpi_process_init(int *argc, char ***argv) if(!index_to_process_data){ index_to_process_data=(int*)xbt_malloc(SIMIX_process_count()*sizeof(int)); } - MPI_Comm* temp_comm_world = smpi_deployment_register_process(instance_id, rank, index); + MPI_Comm* temp_comm_world; + xbt_bar_t temp_bar; + smpi_deployment_register_process(instance_id, rank, index, &temp_comm_world ,&temp_bar); data = smpi_process_remote_data(index); data->comm_world = temp_comm_world; + if(temp_bar != NULL) data->finalization_barrier = temp_bar; data->index = index; data->instance_id = instance_id; simcall_process_set_data(proc, data); @@ -128,44 +132,10 @@ void smpi_process_destroy(void) */ void smpi_process_finalize(void) { - if(MC_is_active()){ + int index = smpi_process_index(); // wait for all pending asynchronous comms to finish - while (SIMIX_process_has_pending_comms(SIMIX_process_self())) { - simcall_process_sleep(0.01); - } - }else{ - int i; - int size = smpi_comm_size(MPI_COMM_WORLD); - int rank = smpi_comm_rank(MPI_COMM_WORLD); - /* All non-root send & receive zero-length message. */ - if (rank > 0) { - smpi_mpi_ssend (NULL, 0, MPI_BYTE, 0, - COLL_TAG_BARRIER, - MPI_COMM_WORLD); - smpi_mpi_recv (NULL, 0, MPI_BYTE, 0, - COLL_TAG_BARRIER, - MPI_COMM_WORLD, MPI_STATUS_IGNORE); - } - /* The root collects and broadcasts the messages. */ - else { - MPI_Request* requests; - requests = (MPI_Request*)malloc( size * sizeof(MPI_Request) ); - for (i = 1; i < size; ++i) { - requests[i] = smpi_mpi_irecv(NULL, 0, MPI_BYTE, MPI_ANY_SOURCE, - COLL_TAG_BARRIER, MPI_COMM_WORLD - ); - } - smpi_mpi_waitall( size-1, requests+1, MPI_STATUSES_IGNORE ); - for (i = 1; i < size; ++i) { - requests[i] = smpi_mpi_issend(NULL, 0, MPI_BYTE, i, - COLL_TAG_BARRIER, - MPI_COMM_WORLD - ); - } - smpi_mpi_waitall( size-1, requests+1, MPI_STATUSES_IGNORE ); - free( requests ); - } - } + xbt_barrier_wait(process_data[index_to_process_data[index]]->finalization_barrier); + } /** @@ -441,6 +411,7 @@ void smpi_global_init(void) process_data[i]->comm_world = NULL; process_data[i]->state = SMPI_UNINITIALIZED; process_data[i]->sampling = 0; + process_data[i]->finalization_barrier = NULL; } //if the process was launched through smpirun script //we generate a global mpi_comm_world @@ -449,9 +420,12 @@ void smpi_global_init(void) if(smpirun){ group = smpi_group_new(process_count); MPI_COMM_WORLD = smpi_comm_new(group, NULL); + xbt_bar_t bar=xbt_barrier_init(process_count); + MPI_UNIVERSE_SIZE = smpi_comm_size(MPI_COMM_WORLD); for (i = 0; i < process_count; i++) { smpi_group_set_mapping(group, i, i); + process_data[i]->finalization_barrier = bar; } } } @@ -465,6 +439,9 @@ void smpi_global_destroy(void) if (MPI_COMM_WORLD != MPI_COMM_UNINITIALIZED){ while (smpi_group_unuse(smpi_comm_group(MPI_COMM_WORLD)) > 0); xbt_free(MPI_COMM_WORLD); + xbt_barrier_destroy(process_data[0]->finalization_barrier); + }else{ + smpi_deployment_cleanup_instances(); } MPI_COMM_WORLD = MPI_COMM_NULL; for (i = 0; i < count; i++) {