From 2565e83d042a5f01c7f131c4af108028a5c4ce2b Mon Sep 17 00:00:00 2001 From: degomme Date: Wed, 20 Feb 2019 11:54:34 +0100 Subject: [PATCH] Change the syntax of SMPI_SAMPLE_* macros. To handle the end of the computing loop when sampling is still on, we need to wrap it. This replaces this syntax for(k = 1; k <= np; k++) { SMPI_SAMPLE_GLOBAL(0.25 * np, 0.03) { where the macro had no idea of the loop conditions by this SMPI_SAMPLE_GLOBAL(k = 1, k <= np, k++, 0.25 * np, 0.03, {...} ) syntax, where the body of loop is itself a parameter. This is not perfect, and may change soon. But now we can exit gracefully the sampling whenever the upper loop exits. --- examples/smpi/NAS/ep.c | 4 ++-- include/smpi/smpi.h | 27 ++++++++++++++++------ src/smpi/internals/smpi_bench.cpp | 23 ++++++++++++++++++ teshsuite/smpi/macro-sample/macro-sample.c | 14 ++++------- 4 files changed, 49 insertions(+), 19 deletions(-) diff --git a/examples/smpi/NAS/ep.c b/examples/smpi/NAS/ep.c index a7b8d50fae..75a35c339b 100644 --- a/examples/smpi/NAS/ep.c +++ b/examples/smpi/NAS/ep.c @@ -135,7 +135,7 @@ int main(int argc, char **argv) { k_offset = no_large_nodes*(np+1) + (node-no_large_nodes)*np -1; int stop = FALSE; - for(k = 1; k <= np; k++) { SMPI_SAMPLE_GLOBAL(0.25 * np, 0.03) { + SMPI_SAMPLE_GLOBAL(k = 1, k <= np, k++, 0.25 * np, 0.03, stop = FALSE; kk = k_offset + k ; t1 = s; @@ -181,7 +181,7 @@ int main(int argc, char **argv) { } } timer_stop(2); - } } + ) TRACE_smpi_set_category ("finalize"); diff --git a/include/smpi/smpi.h b/include/smpi/smpi.h index 8112b73476..593e46fcfb 100644 --- a/include/smpi/smpi.h +++ b/include/smpi/smpi.h @@ -955,7 +955,7 @@ XBT_PUBLIC unsigned long long smpi_rastro_timestamp(); XBT_PUBLIC void smpi_sample_1(int global, const char* file, int line, int iters, double threshold); XBT_PUBLIC int smpi_sample_2(int global, const char* file, int line); XBT_PUBLIC void smpi_sample_3(int global, const char* file, int line); - +XBT_PUBLIC void smpi_sample_exit(int global, const char* file, int line); /** * Need a public setter for SMPI copy_callback function, so users can define * their own while still using default SIMIX_copy_callback for S4U copies. @@ -973,11 +973,24 @@ XBT_PUBLIC void smpi_trace_set_call_location_(const char* file, int* line); /** Fortran binding + -fsecond-underscore **/ XBT_PUBLIC void smpi_trace_set_call_location__(const char* file, int* line); -#define SMPI_SAMPLE_LOOP(global, iters, thres) \ - for (smpi_sample_1(global, __FILE__, __LINE__, iters, thres); smpi_sample_2(global, __FILE__, __LINE__); \ - smpi_sample_3(global, __FILE__, __LINE__)) -#define SMPI_SAMPLE_LOCAL(iters, thres) SMPI_SAMPLE_LOOP(0, iters, thres) -#define SMPI_SAMPLE_GLOBAL(iters, thres) SMPI_SAMPLE_LOOP(1, iters, thres) +#define SMPI_SAMPLE_LOOP(loop_init, loop_end, loop_iter, global, iters, thres, loop_body) \ + {\ + loop_init;\ + for (;;){\ + if(!(loop_end)) {\ + smpi_sample_exit(global, __FILE__, __LINE__);\ + break;\ + }\ + for (smpi_sample_1(global, __FILE__, __LINE__, iters, thres); smpi_sample_2(global, __FILE__, __LINE__); \ + smpi_sample_3(global, __FILE__, __LINE__)){\ + loop_body\ + }\ + loop_iter;\ + }\ + } + +#define SMPI_SAMPLE_LOCAL(loop_init, loop_end, loop_iter, iters, thres, loop_body) SMPI_SAMPLE_LOOP(loop_init, loop_end, loop_iter, 0, iters, thres, loop_body) +#define SMPI_SAMPLE_GLOBAL(loop_init, loop_end, loop_iter,iters, thres, loop_body) SMPI_SAMPLE_LOOP(loop_init, loop_end, loop_iter, 1, iters, thres, loop_body) #define SMPI_SAMPLE_DELAY(duration) for(smpi_execute(duration); 0; ) #define SMPI_SAMPLE_FLOPS(flops) for(smpi_execute_flops(flops); 0; ) @@ -1090,4 +1103,4 @@ std::vector> merge_private_blocks(const std::vectorsampling()){ + SampleLocation loc(global, file, line); + + XBT_DEBUG("sample exit %s", loc.c_str()); + auto sample = samples.find(loc); + if (sample == samples.end()) + xbt_die("Y U NO use SMPI_SAMPLE_* macros? Stop messing directly with smpi_sample_* functions!"); + LocalData& data = sample->second; + + if (smpi_process()->sampling()){//end of loop, but still sampling needed + double sleep = data.mean; + if (data.sum != 0.0){ //we finished benching, sum is unecessary after the first injection, we can reset it. + sleep = data.sum; + data.sum = 0.0; + } + smpi_process()->set_sampling(0); + smpi_execute(sleep); + smpi_bench_begin(); + } + } +} + smpi_trace_call_location_t* smpi_trace_get_call_location() { return smpi_process()->call_location(); diff --git a/teshsuite/smpi/macro-sample/macro-sample.c b/teshsuite/smpi/macro-sample/macro-sample.c index 29c3fd5917..7309af15b9 100644 --- a/teshsuite/smpi/macro-sample/macro-sample.c +++ b/teshsuite/smpi/macro-sample/macro-sample.c @@ -32,9 +32,8 @@ int main(int argc, char *argv[]) MPI_Comm_size(MPI_COMM_WORLD, &n); MPI_Comm_rank(MPI_COMM_WORLD, &rank); double d = 2.0; - for (int i = 0; i < 5; i++) { /* I want no more than n + 1 benchs (thres < 0) */ - SMPI_SAMPLE_GLOBAL(n + 1, -1) { + SMPI_SAMPLE_GLOBAL(int i = 0, i < 5, i++, n + 1, -1, if (verbose) fprintf(stderr, "(%12.6f) [rank:%d]", MPI_Wtime(), rank); else @@ -42,14 +41,10 @@ int main(int argc, char *argv[]) fprintf(stderr, " Run the first computation. It's globally benched, " "and I want no more than %d benchmarks (thres<0)\n", n + 1); d = compute(2.0); - } - } + ) n = 0; - for (int i = 0; i < 5; i++) { - /* I want the standard error to go below 0.1 second. - * Two tests at least will be run (count is not > 0) */ - SMPI_SAMPLE_LOCAL(0, 0.1) { + SMPI_SAMPLE_LOCAL (int i = 0, i < 5, i++,0, 0.1, if (verbose || n < 2) { n++; if (verbose) @@ -61,8 +56,7 @@ int main(int argc, char *argv[]) "standard error to go below 0.1 second (count is not >0)\n", rank); } d = compute(d); - } - } + ) if (verbose) fprintf(stderr, "(%12.6f) [rank:%d] The result of the computation is: %f\n", MPI_Wtime(), rank, d); -- 2.20.1