From 37f5b9a91a456e44ca9a312f84201b3c562e90c6 Mon Sep 17 00:00:00 2001 From: Henri Casanova Date: Thu, 8 May 2014 10:10:44 -1000 Subject: [PATCH 1/1] Rewrote smpi_sleep() and smpi_usleep() so that: 1. the code is not duplicated but shared in a static private function 2. sleeping is done via an actual simcall_process_sleep rather than a bogus computation Augmented SMPI tracing to allow tracing of sleep events --- src/instr/instr_TI_trace.c | 3 +++ src/instr/instr_config.c | 19 ++++++++++++++++ src/instr/instr_private.h | 3 +++ src/smpi/instr_smpi.c | 46 ++++++++++++++++++++++++++++++++++++++ src/smpi/private.h | 3 +++ src/smpi/smpi_bench.c | 42 ++++++++++++++++------------------ 6 files changed, 93 insertions(+), 23 deletions(-) diff --git a/src/instr/instr_TI_trace.c b/src/instr/instr_TI_trace.c index 7c32c7de9f..ef558321b1 100644 --- a/src/instr/instr_TI_trace.c +++ b/src/instr/instr_TI_trace.c @@ -212,6 +212,9 @@ void print_TIPushState(paje_event_t event) case TRACING_COMPUTING: fprintf(trace_file, "%s compute %f\n", process_id, extra->comp_size); break; + case TRACING_SLEEPING: + fprintf(trace_file, "%s sleep %f\n", process_id, extra->sleep_duration); + break; case TRACING_GATHERV: // rank gatherv send_size [recvcounts] root (sendtype) (recvtype) fprintf(trace_file, "%s gatherv %d ", process_id, extra->send_size); for (i = 0; i < extra->num_processes; i++) diff --git a/src/instr/instr_config.c b/src/instr/instr_config.c index e01762519b..ffc5da464f 100644 --- a/src/instr/instr_config.c +++ b/src/instr/instr_config.c @@ -19,6 +19,7 @@ XBT_LOG_NEW_DEFAULT_SUBCATEGORY (instr_config, instr, "Configuration"); #define OPT_TRACING_SMPI "tracing/smpi" #define OPT_TRACING_SMPI_GROUP "tracing/smpi/group" #define OPT_TRACING_SMPI_COMPUTING "tracing/smpi/computing" +#define OPT_TRACING_SMPI_SLEEPING "tracing/smpi/sleeping" #define OPT_TRACING_SMPI_INTERNALS "tracing/smpi/internals" #define OPT_TRACING_DISPLAY_SIZES "tracing/smpi/display_sizes" #define OPT_TRACING_FORMAT "tracing/smpi/format" @@ -45,6 +46,7 @@ static int trace_platform_topology; static int trace_smpi_enabled; static int trace_smpi_grouped; static int trace_smpi_computing; +static int trace_smpi_sleeping; static int trace_view_internals; static int trace_categorized; static int trace_uncategorized; @@ -71,6 +73,7 @@ static void TRACE_getopts(void) trace_smpi_enabled = xbt_cfg_get_boolean(_sg_cfg_set, OPT_TRACING_SMPI); trace_smpi_grouped = xbt_cfg_get_boolean(_sg_cfg_set, OPT_TRACING_SMPI_GROUP); trace_smpi_computing = xbt_cfg_get_boolean(_sg_cfg_set, OPT_TRACING_SMPI_COMPUTING); + trace_smpi_sleeping = xbt_cfg_get_boolean(_sg_cfg_set, OPT_TRACING_SMPI_SLEEPING); trace_view_internals = xbt_cfg_get_boolean(_sg_cfg_set, OPT_TRACING_SMPI_INTERNALS); trace_categorized = xbt_cfg_get_boolean(_sg_cfg_set, OPT_TRACING_CATEGORIZED); trace_uncategorized = xbt_cfg_get_boolean(_sg_cfg_set, OPT_TRACING_UNCATEGORIZED); @@ -259,6 +262,11 @@ int TRACE_smpi_is_computing(void) return trace_smpi_computing; } +int TRACE_smpi_is_sleeping(void) +{ + return trace_smpi_sleeping; +} + int TRACE_smpi_view_internals(void) { return trace_view_internals; @@ -388,6 +396,13 @@ void TRACE_global_init(int *argc, char **argv) xbt_cfgelm_boolean, 1, 1, NULL, NULL); xbt_cfg_setdefault_boolean(_sg_cfg_set, OPT_TRACING_SMPI_COMPUTING, "no"); +/* smpi sleeping */ + xbt_cfg_register(&_sg_cfg_set, OPT_TRACING_SMPI_SLEEPING, + "Generate states for timing out of SMPI parts of the application", + xbt_cfgelm_boolean, 1, 1, NULL, NULL); + xbt_cfg_setdefault_boolean(_sg_cfg_set, OPT_TRACING_SMPI_SLEEPING, "no"); + + /* smpi internals */ xbt_cfg_register(&_sg_cfg_set, OPT_TRACING_SMPI_INTERNALS, "View internal messages sent by Collective communications in SMPI", @@ -553,6 +568,10 @@ void TRACE_help (int detailed) " This option aims at tracing computations in the application, outside SMPI\n" " to allow further study of simulated or real computation time", detailed); + print_line (OPT_TRACING_SMPI_SLEEPING, "Generates a \" Sleeping \" State", + " This option aims at tracing sleeps in the application, outside SMPI\n" + " to allow further study of simulated or real sleep time", + detailed); print_line (OPT_TRACING_SMPI_INTERNALS, "Generates tracing events corresponding", " to point-to-point messages sent by collective communications", detailed); diff --git a/src/instr/instr_private.h b/src/instr/instr_private.h index 80ba7b9a9c..dd531af82e 100644 --- a/src/instr/instr_private.h +++ b/src/instr/instr_private.h @@ -297,6 +297,7 @@ void instr_resume_tracing (void); XBT_PUBLIC(int) TRACE_smpi_is_enabled(void); XBT_PUBLIC(int) TRACE_smpi_is_grouped(void); XBT_PUBLIC(int) TRACE_smpi_is_computing(void); +XBT_PUBLIC(int) TRACE_smpi_is_sleeping(void); XBT_PUBLIC(int) TRACE_smpi_view_internals(void); /* from resource_utilization.c */ @@ -439,6 +440,7 @@ typedef enum{ TRACING_ALLGATHERV, TRACING_REDUCE_SCATTER, TRACING_COMPUTING, + TRACING_SLEEPING, TRACING_SCAN, TRACING_EXSCAN } e_caller_type ; @@ -450,6 +452,7 @@ typedef struct s_instr_extra_data { int send_size; int recv_size; double comp_size; + double sleep_duration; int src; int dst; int root; diff --git a/src/smpi/instr_smpi.c b/src/smpi/instr_smpi.c index 6910331990..6e048ba96e 100644 --- a/src/smpi/instr_smpi.c +++ b/src/smpi/instr_smpi.c @@ -43,7 +43,10 @@ static const char *smpi_colors[] ={ "exscan", "1 0.54 0.25", "scatterv", "0.52 0 0.52", "scatter", "1 0.74 0.54", + "computing", "0 1 1", + "sleeping", "0 0.5 0.5", + "init", "0 1 0", "finalize", "0 1 0", NULL, NULL, @@ -282,6 +285,49 @@ void TRACE_smpi_computing_out(int rank) new_pajePopState (SIMIX_get_clock(), container, type); } +void TRACE_smpi_sleeping_init(int rank) +{ + //first use, initialize the color in the trace + //TODO : check with lucas and Pierre how to generalize this approach + //to avoid unnecessary access to the color array + if (!TRACE_smpi_is_enabled() || !TRACE_smpi_is_sleeping()) return; + + char str[INSTR_DEFAULT_STR_SIZE]; + smpi_container(rank, str, INSTR_DEFAULT_STR_SIZE); + container_t container = PJ_container_get (str); + type_t type = PJ_type_get ("MPI_STATE", container->type); + const char *color = instr_find_color ("sleeping"); + val_t value = PJ_value_get_or_new ("sleeping", color, type); + new_pajePushState (SIMIX_get_clock(), container, type, value); +} + +void TRACE_smpi_sleeping_in(int rank, instr_extra_data extra) +{ + //do not forget to set the color first, otherwise this will explode + if (!TRACE_smpi_is_enabled()|| !TRACE_smpi_is_sleeping()) { + cleanup_extra_data(extra); + return; + } + + char str[INSTR_DEFAULT_STR_SIZE]; + smpi_container(rank, str, INSTR_DEFAULT_STR_SIZE); + container_t container = PJ_container_get (str); + type_t type = PJ_type_get ("MPI_STATE", container->type); + val_t value = PJ_value_get_or_new ("sleeping", NULL, type); + new_pajePushStateWithExtra (SIMIX_get_clock(), container, type, value, (void*)extra); +} + +void TRACE_smpi_sleeping_out(int rank) +{ + if (!TRACE_smpi_is_enabled()|| !TRACE_smpi_is_sleeping()) return; + char str[INSTR_DEFAULT_STR_SIZE]; + smpi_container(rank, str, INSTR_DEFAULT_STR_SIZE); + container_t container = PJ_container_get (str); + type_t type = PJ_type_get ("MPI_STATE", container->type); + new_pajePopState (SIMIX_get_clock(), container, type); +} + + void TRACE_smpi_testing_in(int rank, instr_extra_data extra) { //do not forget to set the color first, otherwise this will explode diff --git a/src/smpi/private.h b/src/smpi/private.h index 09e66faf84..24f56708ca 100644 --- a/src/smpi/private.h +++ b/src/smpi/private.h @@ -602,6 +602,9 @@ void TRACE_smpi_collective_out(int rank, int root, const char *operation); void TRACE_smpi_computing_init(int rank); void TRACE_smpi_computing_out(int rank); void TRACE_smpi_computing_in(int rank, instr_extra_data extra); +void TRACE_smpi_sleeping_init(int rank); +void TRACE_smpi_sleeping_out(int rank); +void TRACE_smpi_sleeping_in(int rank, instr_extra_data extra); void TRACE_smpi_testing_out(int rank); void TRACE_smpi_testing_in(int rank, instr_extra_data extra); void TRACE_smpi_alloc(void); diff --git a/src/smpi/smpi_bench.c b/src/smpi/smpi_bench.c index 28ef158e06..b97f86e4e8 100644 --- a/src/smpi/smpi_bench.c +++ b/src/smpi/smpi_bench.c @@ -209,40 +209,36 @@ void smpi_bench_end(void) smpi_execute(xbt_os_timer_elapsed(timer)); } -unsigned int smpi_sleep(unsigned int secs) +/* Private sleep function used by smpi_sleep() and smpi_usleep() */ +static unsigned int private_sleep(double secs) { - smx_action_t action; - smpi_bench_end(); - double flops = (double) secs*simcall_host_get_speed(SIMIX_host_self()); - XBT_DEBUG("Sleep for: %f flops", flops); - action = simcall_host_execute("computation", SIMIX_host_self(), flops, 1, 0, 0); + XBT_DEBUG("Sleep for: %lf secs", secs); #ifdef HAVE_TRACING - simcall_set_category (action, TRACE_internal_smpi_get_category()); - #endif - simcall_host_execution_wait(action); + int rank = smpi_comm_rank(MPI_COMM_WORLD); + instr_extra_data extra = xbt_new0(s_instr_extra_data_t,1); + extra->type=TRACING_SLEEPING; + extra->sleep_duration=secs; + TRACE_smpi_sleeping_in(rank, extra); +#endif + simcall_process_sleep(secs); +#ifdef HAVE_TRACING + TRACE_smpi_sleeping_out(rank); +#endif smpi_bench_begin(); return 0; } -int smpi_usleep(useconds_t usecs) +unsigned int smpi_sleep(unsigned int secs) { - smx_action_t action; - - smpi_bench_end(); - - double flops = (double) (usecs/1000000.0)*simcall_host_get_speed(SIMIX_host_self()); - XBT_DEBUG("Sleep for: %f flops", flops); - action = simcall_host_execute("computation", SIMIX_host_self(), flops, 1, 0, 0); - #ifdef HAVE_TRACING - simcall_set_category (action, TRACE_internal_smpi_get_category()); - #endif - simcall_host_execution_wait(action); + return private_sleep((double)secs); +} - smpi_bench_begin(); - return 0; +int smpi_usleep(useconds_t usecs) +{ + return (int)private_sleep((double)usecs / 1000000.0); } -- 2.20.1