There is no need anymore to have the body of the loop inside the macro. Kudos to Arnaud L for the ideas.
now usage is just turning
for(x;y;z) into SMPI_SAMPLE_GLOBAL(x,y,z,iters,thres)
Time injection will be done only once for all the section, speeding up the simulation.
TODO : iterator value will not be correct at the end of the loop for now.
Not important if iterator is local to the loop, but may be annoying if not.
k_offset = no_large_nodes*(np+1) + (node-no_large_nodes)*np -1;
int stop = FALSE;
- SMPI_SAMPLE_GLOBAL(k = 1, k <= np, k++, 0.25 * np, 0.03,
+ SMPI_SAMPLE_GLOBAL(k = 1, k <= np, k++, 0.25 * np, 0.03){
stop = FALSE;
kk = k_offset + k ;
t1 = s;
}
}
timer_stop(2);
- )
+ }
TRACE_smpi_set_category ("finalize");
XBT_PUBLIC unsigned long long smpi_rastro_resolution();
XBT_PUBLIC unsigned long long smpi_rastro_timestamp();
XBT_PUBLIC void smpi_sample_1(int global, const char* file, int line, int iters, double threshold);
-XBT_PUBLIC int smpi_sample_2(int global, const char* file, int line);
+XBT_PUBLIC int smpi_sample_2(int global, const char* file, int line, int iter_count);
XBT_PUBLIC void smpi_sample_3(int global, const char* file, int line);
-XBT_PUBLIC void smpi_sample_exit(int global, const char* file, int line);
+XBT_PUBLIC int smpi_sample_exit(int global, const char* file, int line, int iter_count);
/**
* Need a public setter for SMPI copy_callback function, so users can define
* their own while still using default SIMIX_copy_callback for S4U copies.
/** Fortran binding + -fsecond-underscore **/
XBT_PUBLIC void smpi_trace_set_call_location__(const char* file, int* line);
-#define SMPI_SAMPLE_LOOP(loop_init, loop_end, loop_iter, global, iters, thres, loop_body) \
- {\
- loop_init;\
- for (;;){\
- if(!(loop_end)) {\
- smpi_sample_exit(global, __FILE__, __LINE__);\
- break;\
- }\
- for (smpi_sample_1(global, __FILE__, __LINE__, iters, thres); smpi_sample_2(global, __FILE__, __LINE__); \
- smpi_sample_3(global, __FILE__, __LINE__)){\
- loop_body\
- }\
- loop_iter;\
- }\
- }
-
-#define SMPI_SAMPLE_LOCAL(loop_init, loop_end, loop_iter, iters, thres, loop_body) SMPI_SAMPLE_LOOP(loop_init, loop_end, loop_iter, 0, iters, thres, loop_body)
-#define SMPI_SAMPLE_GLOBAL(loop_init, loop_end, loop_iter,iters, thres, loop_body) SMPI_SAMPLE_LOOP(loop_init, loop_end, loop_iter, 1, iters, thres, loop_body)
-
+#define SMPI_ITER_NAME1(line) iter_count##line
+#define SMPI_ITER_NAME(line) SMPI_ITER_NAME1(line)
+#define SMPI_SAMPLE_LOOP(loop_init, loop_end, loop_iter, global, iters, thres)\
+ int SMPI_ITER_NAME(__LINE__)=0;\
+ {loop_init;\
+ while(loop_end){\
+ SMPI_ITER_NAME(__LINE__)++;\
+ loop_iter;\
+ }} \
+ for(loop_init; \
+ loop_end ? (smpi_sample_1(global, __FILE__, __LINE__, iters, thres), (smpi_sample_2(global, __FILE__, __LINE__, SMPI_ITER_NAME(__LINE__)))) :\
+ smpi_sample_exit(global, __FILE__, __LINE__, SMPI_ITER_NAME(__LINE__));\
+ smpi_sample_3(global, __FILE__, __LINE__),loop_iter)
+#define SMPI_SAMPLE_LOCAL(loop_init, loop_end, loop_iter, iters, thres) SMPI_SAMPLE_LOOP(loop_init, loop_end, loop_iter, 0, iters, thres)
+#define SMPI_SAMPLE_GLOBAL(loop_init, loop_end, loop_iter,iters, thres) SMPI_SAMPLE_LOOP(loop_init, loop_end, loop_iter, 1, iters, thres)
#define SMPI_SAMPLE_DELAY(duration) for(smpi_execute(duration); 0; )
#define SMPI_SAMPLE_FLOPS(flops) for(smpi_execute_flops(flops); 0; )
}
}
-int smpi_sample_2(int global, const char *file, int line)
+int smpi_sample_2(int global, const char *file, int line, int iter_count)
{
SampleLocation loc(global, file, line);
- int res;
- XBT_DEBUG("sample2 %s", loc.c_str());
+ XBT_DEBUG("sample2 %s %d", loc.c_str(), iter_count);
auto sample = samples.find(loc);
if (sample == samples.end())
xbt_die("Y U NO use SMPI_SAMPLE_* macros? Stop messing directly with smpi_sample_* functions!");
XBT_DEBUG("benchmarking: count:%d iter:%d stderr:%f thres:%f; mean:%f; total:%f",
data.count, data.iters, data.relstderr, data.threshold, data.mean, data.sum);
smpi_bench_begin();
- res = 1;
} else {
// Enough data, no more bench (either we got enough data from previous visits to this benched nest, or we just
//ran one bench and need to bail out now that our job is done). Just sleep instead
if (not data.need_more_benchs()){
XBT_DEBUG("No benchmark (either no need, or just ran one): count >= iter (%d >= %d) or stderr<thres (%f<=%f). "
- "Mean is %f %s",
- data.count, data.iters, data.relstderr, data.threshold, data.mean,
- (data.sum != 0.0 ? "Applying it count times because we finished benching count iterations": ""));
- double sleep = data.mean;
- if (data.sum != 0.0){ //we finished benching, sum is unecessary after the first injection, we can reset it.
- sleep = data.sum;
- data.sum = 0.0;
- }
+ "Mean is %f, will be injected %d times",
+ data.count, data.iters, data.relstderr, data.threshold, data.mean, iter_count);
+
+ //we ended benchmarking, let's inject all the time, now, and fast forward out of the loop.
smpi_process()->set_sampling(0);
- smpi_execute(sleep);
+ smpi_execute(data.mean*iter_count);
smpi_bench_begin();
+ return 0;
} else {
XBT_DEBUG("Skipping - Benchmark already performed - accumulating time");
xbt_os_threadtimer_start(smpi_process()->timer());
}
- res = 0; // prepare to capture future, unrelated computations
}
-
- return res;
+ return 1;
}
void smpi_sample_3(int global, const char *file, int line)
data.benching = false;
}
-void smpi_sample_exit(int global, const char *file, int line){
+int smpi_sample_exit(int global, const char *file, int line, int iter_count){
if (smpi_process()->sampling()){
SampleLocation loc(global, file, line);
LocalData& data = sample->second;
if (smpi_process()->sampling()){//end of loop, but still sampling needed
- double sleep = data.mean;
- if (data.sum != 0.0){ //we finished benching, sum is unecessary after the first injection, we can reset it.
- sleep = data.sum;
- data.sum = 0.0;
- }
smpi_process()->set_sampling(0);
- smpi_execute(sleep);
+ smpi_execute(data.mean*iter_count);
smpi_bench_begin();
}
}
+ return 0;
}
smpi_trace_call_location_t* smpi_trace_get_call_location()
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
double d = 2.0;
/* I want no more than n + 1 benchs (thres < 0) */
- SMPI_SAMPLE_GLOBAL(int i = 0, i < 5, i++, n + 1, -1,
+ SMPI_SAMPLE_GLOBAL(int i = 0, i < 5, i++, n + 1, -1){
if (verbose)
fprintf(stderr, "(%12.6f) [rank:%d]", MPI_Wtime(), rank);
else
fprintf(stderr, " Run the first computation. It's globally benched, "
"and I want no more than %d benchmarks (thres<0)\n", n + 1);
d = compute(2.0);
- )
+ }
n = 0;
- SMPI_SAMPLE_LOCAL (int i = 0, i < 5, i++,0, 0.1,
+ SMPI_SAMPLE_LOCAL (int i = 0, i < 5, i++,0, 0.1){
if (verbose || n < 2) {
n++;
if (verbose)
"standard error to go below 0.1 second (count is not >0)\n", rank);
}
d = compute(d);
- )
+ }
if (verbose)
fprintf(stderr, "(%12.6f) [rank:%d] The result of the computation is: %f\n", MPI_Wtime(), rank, d);