From: degomme Date: Tue, 26 Feb 2019 10:58:12 +0000 (+0100) Subject: Yet again change the way SMPI_SAMPLE_MACRO works X-Git-Tag: v3_22~236 X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/commitdiff_plain/68948e0bf68283fd55c3697ba839a78f12424f36 Yet again change the way SMPI_SAMPLE_MACRO works There is no need anymore to have the body of the loop inside the macro. Kudos to Arnaud L for the ideas. now usage is just turning for(x;y;z) into SMPI_SAMPLE_GLOBAL(x,y,z,iters,thres) Time injection will be done only once for all the section, speeding up the simulation. TODO : iterator value will not be correct at the end of the loop for now. Not important if iterator is local to the loop, but may be annoying if not. --- diff --git a/examples/smpi/NAS/ep.c b/examples/smpi/NAS/ep.c index 75a35c339b..329b6157a7 100644 --- a/examples/smpi/NAS/ep.c +++ b/examples/smpi/NAS/ep.c @@ -135,7 +135,7 @@ int main(int argc, char **argv) { k_offset = no_large_nodes*(np+1) + (node-no_large_nodes)*np -1; int stop = FALSE; - SMPI_SAMPLE_GLOBAL(k = 1, k <= np, k++, 0.25 * np, 0.03, + SMPI_SAMPLE_GLOBAL(k = 1, k <= np, k++, 0.25 * np, 0.03){ stop = FALSE; kk = k_offset + k ; t1 = s; @@ -181,7 +181,7 @@ int main(int argc, char **argv) { } } timer_stop(2); - ) + } TRACE_smpi_set_category ("finalize"); diff --git a/include/smpi/smpi.h b/include/smpi/smpi.h index 593e46fcfb..d3f2086336 100644 --- a/include/smpi/smpi.h +++ b/include/smpi/smpi.h @@ -953,9 +953,9 @@ XBT_ATTRIB_DEPRECATED_v324("Please use sg_host_get_consumed_energy(sg_host_self( XBT_PUBLIC unsigned long long smpi_rastro_resolution(); XBT_PUBLIC unsigned long long smpi_rastro_timestamp(); XBT_PUBLIC void smpi_sample_1(int global, const char* file, int line, int iters, double threshold); -XBT_PUBLIC int smpi_sample_2(int global, const char* file, int line); +XBT_PUBLIC int smpi_sample_2(int global, const char* file, int line, int iter_count); XBT_PUBLIC void smpi_sample_3(int global, const char* file, int line); -XBT_PUBLIC void smpi_sample_exit(int global, const char* file, int line); +XBT_PUBLIC int smpi_sample_exit(int global, const char* file, int line, int iter_count); /** * Need a public setter for SMPI copy_callback function, so users can define * their own while still using default SIMIX_copy_callback for S4U copies. @@ -973,25 +973,21 @@ XBT_PUBLIC void smpi_trace_set_call_location_(const char* file, int* line); /** Fortran binding + -fsecond-underscore **/ XBT_PUBLIC void smpi_trace_set_call_location__(const char* file, int* line); -#define SMPI_SAMPLE_LOOP(loop_init, loop_end, loop_iter, global, iters, thres, loop_body) \ - {\ - loop_init;\ - for (;;){\ - if(!(loop_end)) {\ - smpi_sample_exit(global, __FILE__, __LINE__);\ - break;\ - }\ - for (smpi_sample_1(global, __FILE__, __LINE__, iters, thres); smpi_sample_2(global, __FILE__, __LINE__); \ - smpi_sample_3(global, __FILE__, __LINE__)){\ - loop_body\ - }\ - loop_iter;\ - }\ - } - -#define SMPI_SAMPLE_LOCAL(loop_init, loop_end, loop_iter, iters, thres, loop_body) SMPI_SAMPLE_LOOP(loop_init, loop_end, loop_iter, 0, iters, thres, loop_body) -#define SMPI_SAMPLE_GLOBAL(loop_init, loop_end, loop_iter,iters, thres, loop_body) SMPI_SAMPLE_LOOP(loop_init, loop_end, loop_iter, 1, iters, thres, loop_body) - +#define SMPI_ITER_NAME1(line) iter_count##line +#define SMPI_ITER_NAME(line) SMPI_ITER_NAME1(line) +#define SMPI_SAMPLE_LOOP(loop_init, loop_end, loop_iter, global, iters, thres)\ + int SMPI_ITER_NAME(__LINE__)=0;\ + {loop_init;\ + while(loop_end){\ + SMPI_ITER_NAME(__LINE__)++;\ + loop_iter;\ + }} \ + for(loop_init; \ + loop_end ? (smpi_sample_1(global, __FILE__, __LINE__, iters, thres), (smpi_sample_2(global, __FILE__, __LINE__, SMPI_ITER_NAME(__LINE__)))) :\ + smpi_sample_exit(global, __FILE__, __LINE__, SMPI_ITER_NAME(__LINE__));\ + smpi_sample_3(global, __FILE__, __LINE__),loop_iter) +#define SMPI_SAMPLE_LOCAL(loop_init, loop_end, loop_iter, iters, thres) SMPI_SAMPLE_LOOP(loop_init, loop_end, loop_iter, 0, iters, thres) +#define SMPI_SAMPLE_GLOBAL(loop_init, loop_end, loop_iter,iters, thres) SMPI_SAMPLE_LOOP(loop_init, loop_end, loop_iter, 1, iters, thres) #define SMPI_SAMPLE_DELAY(duration) for(smpi_execute(duration); 0; ) #define SMPI_SAMPLE_FLOPS(flops) for(smpi_execute_flops(flops); 0; ) diff --git a/src/smpi/internals/smpi_bench.cpp b/src/smpi/internals/smpi_bench.cpp index aa676ec268..b987d8544c 100644 --- a/src/smpi/internals/smpi_bench.cpp +++ b/src/smpi/internals/smpi_bench.cpp @@ -373,12 +373,11 @@ void smpi_sample_1(int global, const char *file, int line, int iters, double thr } } -int smpi_sample_2(int global, const char *file, int line) +int smpi_sample_2(int global, const char *file, int line, int iter_count) { SampleLocation loc(global, file, line); - int res; - XBT_DEBUG("sample2 %s", loc.c_str()); + XBT_DEBUG("sample2 %s %d", loc.c_str(), iter_count); auto sample = samples.find(loc); if (sample == samples.end()) xbt_die("Y U NO use SMPI_SAMPLE_* macros? Stop messing directly with smpi_sample_* functions!"); @@ -389,31 +388,25 @@ int smpi_sample_2(int global, const char *file, int line) XBT_DEBUG("benchmarking: count:%d iter:%d stderr:%f thres:%f; mean:%f; total:%f", data.count, data.iters, data.relstderr, data.threshold, data.mean, data.sum); smpi_bench_begin(); - res = 1; } else { // Enough data, no more bench (either we got enough data from previous visits to this benched nest, or we just //ran one bench and need to bail out now that our job is done). Just sleep instead if (not data.need_more_benchs()){ XBT_DEBUG("No benchmark (either no need, or just ran one): count >= iter (%d >= %d) or stderrset_sampling(0); - smpi_execute(sleep); + smpi_execute(data.mean*iter_count); smpi_bench_begin(); + return 0; } else { XBT_DEBUG("Skipping - Benchmark already performed - accumulating time"); xbt_os_threadtimer_start(smpi_process()->timer()); } - res = 0; // prepare to capture future, unrelated computations } - - return res; + return 1; } void smpi_sample_3(int global, const char *file, int line) @@ -448,7 +441,7 @@ void smpi_sample_3(int global, const char *file, int line) data.benching = false; } -void smpi_sample_exit(int global, const char *file, int line){ +int smpi_sample_exit(int global, const char *file, int line, int iter_count){ if (smpi_process()->sampling()){ SampleLocation loc(global, file, line); @@ -459,16 +452,12 @@ void smpi_sample_exit(int global, const char *file, int line){ LocalData& data = sample->second; if (smpi_process()->sampling()){//end of loop, but still sampling needed - double sleep = data.mean; - if (data.sum != 0.0){ //we finished benching, sum is unecessary after the first injection, we can reset it. - sleep = data.sum; - data.sum = 0.0; - } smpi_process()->set_sampling(0); - smpi_execute(sleep); + smpi_execute(data.mean*iter_count); smpi_bench_begin(); } } + return 0; } smpi_trace_call_location_t* smpi_trace_get_call_location() diff --git a/teshsuite/smpi/macro-sample/macro-sample.c b/teshsuite/smpi/macro-sample/macro-sample.c index 7309af15b9..5696f75f89 100644 --- a/teshsuite/smpi/macro-sample/macro-sample.c +++ b/teshsuite/smpi/macro-sample/macro-sample.c @@ -33,7 +33,7 @@ int main(int argc, char *argv[]) MPI_Comm_rank(MPI_COMM_WORLD, &rank); double d = 2.0; /* I want no more than n + 1 benchs (thres < 0) */ - SMPI_SAMPLE_GLOBAL(int i = 0, i < 5, i++, n + 1, -1, + SMPI_SAMPLE_GLOBAL(int i = 0, i < 5, i++, n + 1, -1){ if (verbose) fprintf(stderr, "(%12.6f) [rank:%d]", MPI_Wtime(), rank); else @@ -41,10 +41,10 @@ int main(int argc, char *argv[]) fprintf(stderr, " Run the first computation. It's globally benched, " "and I want no more than %d benchmarks (thres<0)\n", n + 1); d = compute(2.0); - ) + } n = 0; - SMPI_SAMPLE_LOCAL (int i = 0, i < 5, i++,0, 0.1, + SMPI_SAMPLE_LOCAL (int i = 0, i < 5, i++,0, 0.1){ if (verbose || n < 2) { n++; if (verbose) @@ -56,7 +56,7 @@ int main(int argc, char *argv[]) "standard error to go below 0.1 second (count is not >0)\n", rank); } d = compute(d); - ) + } if (verbose) fprintf(stderr, "(%12.6f) [rank:%d] The result of the computation is: %f\n", MPI_Wtime(), rank, d);