X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/598215cad967693167027b2bc7404110098f9e91..ff5aac78d1cecdc46c5f1430acbd007f16a3bfd9:/src/smpi/smpi_global.c diff --git a/src/smpi/smpi_global.c b/src/smpi/smpi_global.c index f1710f3cbd..bada714bdd 100644 --- a/src/smpi/smpi_global.c +++ b/src/smpi/smpi_global.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2007-2014. The SimGrid Team. +/* Copyright (c) 2007-2015. The SimGrid Team. * All rights reserved. */ /* This program is free software; you can redistribute it and/or modify it @@ -7,10 +7,12 @@ #include "private.h" #include "smpi_mpi_dt_private.h" #include "mc/mc.h" +#include "mc/mc_record.h" #include "xbt/replay.h" #include "surf/surf.h" #include "simix/smx_private.h" #include "simgrid/sg_config.h" +#include "mc/mc_replay.h" #include /* DBL_MAX */ #include @@ -26,6 +28,7 @@ typedef struct s_smpi_process_data { char ***argv; smx_rdv_t mailbox; smx_rdv_t mailbox_small; + xbt_mutex_t mailboxes_mutex; xbt_os_timer_t timer; MPI_Comm comm_self; MPI_Comm comm_intra; @@ -35,16 +38,17 @@ typedef struct s_smpi_process_data { char state; int sampling; /* inside an SMPI_SAMPLE_ block? */ char* instance_id; + int replaying; /* is the process replaying a trace */ xbt_bar_t finalization_barrier; } s_smpi_process_data_t; static smpi_process_data_t *process_data = NULL; int process_count = 0; +int smpi_universe_size = 0; int* index_to_process_data = NULL; - - +extern double smpi_total_benched_time; +xbt_os_timer_t global_timer; MPI_Comm MPI_COMM_WORLD = MPI_COMM_UNINITIALIZED; -int MPI_UNIVERSE_SIZE; MPI_Errhandler *MPI_ERRORS_RETURN = NULL; MPI_Errhandler *MPI_ERRORS_ARE_FATAL = NULL; @@ -78,10 +82,10 @@ void smpi_process_init(int *argc, char ***argv) proc->context->cleanup_func=SIMIX_process_cleanup; char* instance_id = (*argv)[1]; int rank = atoi((*argv)[2]); - index = SIMIX_process_get_PID(proc) -1; + index = smpi_process_index_of_smx_process(proc); if(!index_to_process_data){ - index_to_process_data=(int*)xbt_malloc(SIMIX_process_count()*sizeof(int)); + index_to_process_data=(int*)xbt_malloc(SIMIX_process_count()*sizeof(int)); } MPI_Comm* temp_comm_world; xbt_bar_t temp_bar; @@ -91,12 +95,12 @@ void smpi_process_init(int *argc, char ***argv) if(temp_bar != NULL) data->finalization_barrier = temp_bar; data->index = index; data->instance_id = instance_id; + data->replaying = 0; xbt_free(simcall_process_get_data(proc)); simcall_process_set_data(proc, data); if (*argc > 3) { free((*argv)[1]); - free((*argv)[2]); - memmove(&(*argv)[1], &(*argv)[3], sizeof(char *) * (*argc - 3)); + memmove(&(*argv)[0], &(*argv)[2], sizeof(char *) * (*argc - 2)); (*argv)[(*argc) - 1] = NULL; (*argv)[(*argc) - 2] = NULL; } @@ -105,11 +109,10 @@ void smpi_process_init(int *argc, char ***argv) data->argv = argv; // set the process attached to the mailbox simcall_rdv_set_receiver(data->mailbox_small, proc); - XBT_DEBUG("<%d> New process in the game: %p", index, proc); if(smpi_privatize_global_variables){ - switch_data_segment(index); + smpi_switch_data_segment(index); } } @@ -121,7 +124,7 @@ void smpi_process_destroy(void) { int index = smpi_process_index(); if(smpi_privatize_global_variables){ - switch_data_segment(index); + smpi_switch_data_segment(index); } process_data[index_to_process_data[index]]->state = SMPI_FINALIZED; XBT_DEBUG("<%d> Process left the game", index); @@ -134,7 +137,7 @@ void smpi_process_finalize(void) { // This leads to an explosion of the search graph // which cannot be reduced: - if(MC_is_active()) + if(MC_is_active() || MC_record_replay_is_active()) return; int index = smpi_process_index(); @@ -170,10 +173,23 @@ int smpi_process_initialized(void) void smpi_process_mark_as_initialized(void) { int index = smpi_process_index(); - if ((index != MPI_UNDEFINED) && (!process_data[index_to_process_data[index]]->state != SMPI_FINALIZED)) + if ((index != MPI_UNDEFINED) && (process_data[index_to_process_data[index]]->state != SMPI_FINALIZED)) process_data[index_to_process_data[index]]->state = SMPI_INITIALIZED; } +void smpi_process_set_replaying(int value){ + int index = smpi_process_index(); + if ((index != MPI_UNDEFINED) && (process_data[index_to_process_data[index]]->state != SMPI_FINALIZED)) + process_data[index_to_process_data[index]]->replaying = value; +} + +int smpi_process_get_replaying(){ + int index = smpi_process_index(); + if (index != MPI_UNDEFINED) + return process_data[index_to_process_data[index]]->replaying; + else return _xbt_replay_is_active(); +} + int smpi_global_size(void) { @@ -221,11 +237,6 @@ int smpi_process_index(void) return data ? data->index : MPI_UNDEFINED; } -int smpi_process_index_of_smx_process(smx_process_t process) { - smpi_process_data_t data = SIMIX_process_get_data(process); - return data ? data->index : MPI_UNDEFINED; -} - MPI_Comm smpi_process_comm_world(void) { smpi_process_data_t data = smpi_process_data(); @@ -245,6 +256,12 @@ smx_rdv_t smpi_process_mailbox_small(void) return data->mailbox_small; } +xbt_mutex_t smpi_process_mailboxes_mutex(void) +{ + smpi_process_data_t data = smpi_process_data(); + return data->mailboxes_mutex; +} + smx_rdv_t smpi_process_remote_mailbox(int index) { smpi_process_data_t data = smpi_process_remote_data(index); @@ -258,6 +275,12 @@ smx_rdv_t smpi_process_remote_mailbox_small(int index) return data->mailbox_small; } +xbt_mutex_t smpi_process_remote_mailboxes_mutex(int index) +{ + smpi_process_data_t data = smpi_process_remote_data(index); + return data->mailboxes_mutex; +} + xbt_os_timer_t smpi_process_timer(void) { smpi_process_data_t data = smpi_process_data(); @@ -315,36 +338,35 @@ int smpi_process_get_sampling(void) void print_request(const char *message, MPI_Request request) { - XBT_DEBUG + XBT_VERB ("%s request %p [buf = %p, size = %zu, src = %d, dst = %d, tag = %d, flags = %x]", message, request, request->buf, request->size, request->src, request->dst, request->tag, request->flags); } -void smpi_comm_copy_buffer_callback(smx_action_t comm, +void smpi_comm_copy_buffer_callback(smx_synchro_t comm, void *buff, size_t buff_size) { XBT_DEBUG("Copy the data over"); - if(_xbt_replay_is_active()) return; void* tmpbuff=buff; if((smpi_privatize_global_variables) - && ((char*)buff >= start_data_exe) - && ((char*)buff < start_data_exe + size_data_exe ) + && ((char*)buff >= smpi_start_data_exe) + && ((char*)buff < smpi_start_data_exe + smpi_size_data_exe ) ){ XBT_DEBUG("Privatization : We are copying from a zone inside global memory... Saving data to temp buffer !"); - switch_data_segment(((smpi_process_data_t)SIMIX_process_get_data(comm->comm.src_proc))->index); + smpi_switch_data_segment(((smpi_process_data_t)SIMIX_process_get_data(comm->comm.src_proc))->index); tmpbuff = (void*)xbt_malloc(buff_size); memcpy(tmpbuff, buff, buff_size); } if((smpi_privatize_global_variables) - && ((char*)comm->comm.dst_buff >= start_data_exe) - && ((char*)comm->comm.dst_buff < start_data_exe + size_data_exe ) + && ((char*)comm->comm.dst_buff >= smpi_start_data_exe) + && ((char*)comm->comm.dst_buff < smpi_start_data_exe + smpi_size_data_exe ) ){ XBT_DEBUG("Privatization : We are copying to a zone inside global memory - Switch data segment"); - switch_data_segment(((smpi_process_data_t)SIMIX_process_get_data(comm->comm.dst_proc))->index); + smpi_switch_data_segment(((smpi_process_data_t)SIMIX_process_get_data(comm->comm.dst_proc))->index); } @@ -364,6 +386,13 @@ void smpi_comm_copy_buffer_callback(smx_action_t comm, } + +void smpi_comm_null_copy_buffer_callback(smx_synchro_t comm, + void *buff, size_t buff_size) +{ + return; +} + static void smpi_check_options(){ //check correctness of MPI parameters @@ -389,28 +418,33 @@ void smpi_global_init(void) char name[MAILBOX_NAME_MAXLEN]; int smpirun=0; - + if (!MC_is_active()) { + global_timer = xbt_os_timer_new(); + xbt_os_walltimer_start(global_timer); + } if (process_count == 0){ process_count = SIMIX_process_count(); smpirun=1; } + smpi_universe_size = process_count; process_data = xbt_new0(smpi_process_data_t, process_count); for (i = 0; i < process_count; i++) { process_data[i] = xbt_new(s_smpi_process_data_t, 1); //process_data[i]->index = i; - process_data[i]->argc = NULL; - process_data[i]->argv = NULL; + process_data[i]->argc = NULL; + process_data[i]->argv = NULL; process_data[i]->mailbox = simcall_rdv_create(get_mailbox_name(name, i)); process_data[i]->mailbox_small = simcall_rdv_create(get_mailbox_name_small(name, i)); - process_data[i]->timer = xbt_os_timer_new(); + process_data[i]->mailboxes_mutex = xbt_mutex_init(); + process_data[i]->timer = xbt_os_timer_new(); if (MC_is_active()) MC_ignore_heap(process_data[i]->timer, xbt_os_timer_size()); - process_data[i]->comm_self = MPI_COMM_NULL; - process_data[i]->comm_intra = MPI_COMM_NULL; - process_data[i]->comm_world = NULL; - process_data[i]->state = SMPI_UNINITIALIZED; - process_data[i]->sampling = 0; + process_data[i]->comm_self = MPI_COMM_NULL; + process_data[i]->comm_intra = MPI_COMM_NULL; + process_data[i]->comm_world = NULL; + process_data[i]->state = SMPI_UNINITIALIZED; + process_data[i]->sampling = 0; process_data[i]->finalization_barrier = NULL; } //if the process was launched through smpirun script @@ -420,9 +454,9 @@ void smpi_global_init(void) if(smpirun){ group = smpi_group_new(process_count); MPI_COMM_WORLD = smpi_comm_new(group, NULL); + MPI_Attr_put(MPI_COMM_WORLD, MPI_UNIVERSE_SIZE, (void *)(MPI_Aint)process_count); xbt_bar_t bar=xbt_barrier_init(process_count); - MPI_UNIVERSE_SIZE = smpi_comm_size(MPI_COMM_WORLD); for (i = 0; i < process_count; i++) { smpi_group_set_mapping(group, i, i); process_data[i]->finalization_barrier = bar; @@ -456,6 +490,7 @@ void smpi_global_destroy(void) xbt_os_timer_free(process_data[i]->timer); simcall_rdv_destroy(process_data[i]->mailbox); simcall_rdv_destroy(process_data[i]->mailbox_small); + xbt_mutex_destroy(process_data[i]->mailboxes_mutex); xbt_free(process_data[i]); } xbt_free(process_data); @@ -495,9 +530,7 @@ static void smpi_init_logs(){ function: xbt_log_appender_file.c depends on it DO NOT connect this in XBT or so, or it will be useless to xbt_log_appender_file.c */ -#ifdef HAVE_TRACING XBT_LOG_CONNECT(instr_smpi); -#endif XBT_LOG_CONNECT(smpi_base); XBT_LOG_CONNECT(smpi_bench); XBT_LOG_CONNECT(smpi_coll); @@ -517,51 +550,51 @@ static void smpi_init_logs(){ static void smpi_init_options(){ int gather_id = find_coll_description(mpi_coll_gather_description, - sg_cfg_get_string("smpi/gather")); + sg_cfg_get_string("smpi/gather"),"gather"); mpi_coll_gather_fun = (int (*)(void *, int, MPI_Datatype, void *, int, MPI_Datatype, int, MPI_Comm)) mpi_coll_gather_description[gather_id].coll; int allgather_id = find_coll_description(mpi_coll_allgather_description, - sg_cfg_get_string("smpi/allgather")); + sg_cfg_get_string("smpi/allgather"),"allgather"); mpi_coll_allgather_fun = (int (*)(void *, int, MPI_Datatype, void *, int, MPI_Datatype, MPI_Comm)) mpi_coll_allgather_description[allgather_id].coll; int allgatherv_id = find_coll_description(mpi_coll_allgatherv_description, - sg_cfg_get_string("smpi/allgatherv")); + sg_cfg_get_string("smpi/allgatherv"),"allgatherv"); mpi_coll_allgatherv_fun = (int (*)(void *, int, MPI_Datatype, void *, int *, int *, MPI_Datatype, MPI_Comm)) mpi_coll_allgatherv_description[allgatherv_id].coll; int allreduce_id = find_coll_description(mpi_coll_allreduce_description, - sg_cfg_get_string("smpi/allreduce")); + sg_cfg_get_string("smpi/allreduce"),"allreduce"); mpi_coll_allreduce_fun = (int (*)(void *sbuf, void *rbuf, int rcount, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm)) mpi_coll_allreduce_description[allreduce_id].coll; int alltoall_id = find_coll_description(mpi_coll_alltoall_description, - sg_cfg_get_string("smpi/alltoall")); + sg_cfg_get_string("smpi/alltoall"),"alltoall"); mpi_coll_alltoall_fun = (int (*)(void *, int, MPI_Datatype, void *, int, MPI_Datatype, MPI_Comm)) mpi_coll_alltoall_description[alltoall_id].coll; int alltoallv_id = find_coll_description(mpi_coll_alltoallv_description, - sg_cfg_get_string("smpi/alltoallv")); + sg_cfg_get_string("smpi/alltoallv"),"alltoallv"); mpi_coll_alltoallv_fun = (int (*)(void *, int *, int *, MPI_Datatype, void *, int *, int *, MPI_Datatype, MPI_Comm)) mpi_coll_alltoallv_description[alltoallv_id].coll; int bcast_id = find_coll_description(mpi_coll_bcast_description, - sg_cfg_get_string("smpi/bcast")); + sg_cfg_get_string("smpi/bcast"),"bcast"); mpi_coll_bcast_fun = (int (*)(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm com)) mpi_coll_bcast_description[bcast_id].coll; int reduce_id = find_coll_description(mpi_coll_reduce_description, - sg_cfg_get_string("smpi/reduce")); + sg_cfg_get_string("smpi/reduce"),"reduce"); mpi_coll_reduce_fun = (int (*)(void *buf, void *rbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm)) @@ -569,14 +602,14 @@ static void smpi_init_options(){ int reduce_scatter_id = find_coll_description(mpi_coll_reduce_scatter_description, - sg_cfg_get_string("smpi/reduce_scatter")); + sg_cfg_get_string("smpi/reduce_scatter"),"reduce_scatter"); mpi_coll_reduce_scatter_fun = (int (*)(void *sbuf, void *rbuf, int *rcounts, MPI_Datatype dtype, MPI_Op op, MPI_Comm comm)) mpi_coll_reduce_scatter_description[reduce_scatter_id].coll; int scatter_id = find_coll_description(mpi_coll_scatter_description, - sg_cfg_get_string("smpi/scatter")); + sg_cfg_get_string("smpi/scatter"),"scatter"); mpi_coll_scatter_fun = (int (*)(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, @@ -584,7 +617,7 @@ static void smpi_init_options(){ mpi_coll_scatter_description[scatter_id].coll; int barrier_id = find_coll_description(mpi_coll_barrier_description, - sg_cfg_get_string("smpi/barrier")); + sg_cfg_get_string("smpi/barrier"),"barrier"); mpi_coll_barrier_fun = (int (*)(MPI_Comm comm)) mpi_coll_barrier_description[barrier_id].coll; @@ -608,12 +641,10 @@ int smpi_main(int (*realmain) (int argc, char *argv[]), int argc, char *argv[]) smpi_init_logs(); -#ifdef HAVE_TRACING TRACE_global_init(&argc, argv); TRACE_add_start_function(TRACE_smpi_alloc); TRACE_add_end_function(TRACE_smpi_release); -#endif SIMIX_global_init(&argc, argv); @@ -636,19 +667,29 @@ int smpi_main(int (*realmain) (int argc, char *argv[]), int argc, char *argv[]) fflush(stdout); fflush(stderr); - if (MC_is_active()) - MC_do_the_modelcheck_for_real(); - else + if (MC_is_active()) { + MC_run(); + } else { + SIMIX_run(); - if (sg_cfg_get_boolean("smpi/display_timing")) - XBT_INFO("Simulation time: %g seconds.", SIMIX_get_clock()); + xbt_os_walltimer_stop(global_timer); + if (sg_cfg_get_boolean("smpi/display_timing")){ + double global_time = xbt_os_timer_elapsed(global_timer); + XBT_INFO("Simulated time: %g seconds. \n\n" + "The simulation took %g seconds (after parsing and platform setup)\n" + "%g seconds were actual computation of the application" + , SIMIX_get_clock(), global_time , smpi_total_benched_time); + + if (smpi_total_benched_time/global_time>=0.75) + XBT_INFO("More than 75%% of the time was spent inside the application code.\n" + "You may want to use sampling functions or trace replay to reduce this."); + } + } smpi_global_destroy(); -#ifdef HAVE_TRACING TRACE_end(); -#endif return 0; } @@ -660,11 +701,8 @@ void SMPI_init(){ smpi_init_options(); smpi_global_init(); smpi_check_options(); -#ifdef HAVE_TRACING - if (TRACE_is_enabled() && TRACE_is_configured()) { + if (TRACE_is_enabled() && TRACE_is_configured()) TRACE_smpi_alloc(); - } -#endif if(smpi_privatize_global_variables) smpi_initialize_global_memory_segments(); }