XBT_PUBLIC void smpi_sample_1(int global, const char* file, int line, int iters, double threshold);
XBT_PUBLIC int smpi_sample_2(int global, const char* file, int line);
XBT_PUBLIC void smpi_sample_3(int global, const char* file, int line);
-
+XBT_PUBLIC void smpi_sample_exit(int global, const char* file, int line);
/**
* Need a public setter for SMPI copy_callback function, so users can define
* their own while still using default SIMIX_copy_callback for S4U copies.
/** Fortran binding + -fsecond-underscore **/
XBT_PUBLIC void smpi_trace_set_call_location__(const char* file, int* line);
-#define SMPI_SAMPLE_LOOP(global, iters, thres) \
- for (smpi_sample_1(global, __FILE__, __LINE__, iters, thres); smpi_sample_2(global, __FILE__, __LINE__); \
- smpi_sample_3(global, __FILE__, __LINE__))
-#define SMPI_SAMPLE_LOCAL(iters, thres) SMPI_SAMPLE_LOOP(0, iters, thres)
-#define SMPI_SAMPLE_GLOBAL(iters, thres) SMPI_SAMPLE_LOOP(1, iters, thres)
+#define SMPI_SAMPLE_LOOP(loop_init, loop_end, loop_iter, global, iters, thres, loop_body) \
+ {\
+ loop_init;\
+ for (;;){\
+ if(!(loop_end)) {\
+ smpi_sample_exit(global, __FILE__, __LINE__);\
+ break;\
+ }\
+ for (smpi_sample_1(global, __FILE__, __LINE__, iters, thres); smpi_sample_2(global, __FILE__, __LINE__); \
+ smpi_sample_3(global, __FILE__, __LINE__)){\
+ loop_body\
+ }\
+ loop_iter;\
+ }\
+ }
+
+#define SMPI_SAMPLE_LOCAL(loop_init, loop_end, loop_iter, iters, thres, loop_body) SMPI_SAMPLE_LOOP(loop_init, loop_end, loop_iter, 0, iters, thres, loop_body)
+#define SMPI_SAMPLE_GLOBAL(loop_init, loop_end, loop_iter,iters, thres, loop_body) SMPI_SAMPLE_LOOP(loop_init, loop_end, loop_iter, 1, iters, thres, loop_body)
#define SMPI_SAMPLE_DELAY(duration) for(smpi_execute(duration); 0; )
#define SMPI_SAMPLE_FLOPS(flops) for(smpi_execute_flops(flops); 0; )
#endif
-#endif
+#endif
data.benching = false;
}
+void smpi_sample_exit(int global, const char *file, int line){
+ if (smpi_process()->sampling()){
+ SampleLocation loc(global, file, line);
+
+ XBT_DEBUG("sample exit %s", loc.c_str());
+ auto sample = samples.find(loc);
+ if (sample == samples.end())
+ xbt_die("Y U NO use SMPI_SAMPLE_* macros? Stop messing directly with smpi_sample_* functions!");
+ LocalData& data = sample->second;
+
+ if (smpi_process()->sampling()){//end of loop, but still sampling needed
+ double sleep = data.mean;
+ if (data.sum != 0.0){ //we finished benching, sum is unecessary after the first injection, we can reset it.
+ sleep = data.sum;
+ data.sum = 0.0;
+ }
+ smpi_process()->set_sampling(0);
+ smpi_execute(sleep);
+ smpi_bench_begin();
+ }
+ }
+}
+
smpi_trace_call_location_t* smpi_trace_get_call_location()
{
return smpi_process()->call_location();
MPI_Comm_size(MPI_COMM_WORLD, &n);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
double d = 2.0;
- for (int i = 0; i < 5; i++) {
/* I want no more than n + 1 benchs (thres < 0) */
- SMPI_SAMPLE_GLOBAL(n + 1, -1) {
+ SMPI_SAMPLE_GLOBAL(int i = 0, i < 5, i++, n + 1, -1,
if (verbose)
fprintf(stderr, "(%12.6f) [rank:%d]", MPI_Wtime(), rank);
else
fprintf(stderr, " Run the first computation. It's globally benched, "
"and I want no more than %d benchmarks (thres<0)\n", n + 1);
d = compute(2.0);
- }
- }
+ )
n = 0;
- for (int i = 0; i < 5; i++) {
- /* I want the standard error to go below 0.1 second.
- * Two tests at least will be run (count is not > 0) */
- SMPI_SAMPLE_LOCAL(0, 0.1) {
+ SMPI_SAMPLE_LOCAL (int i = 0, i < 5, i++,0, 0.1,
if (verbose || n < 2) {
n++;
if (verbose)
"standard error to go below 0.1 second (count is not >0)\n", rank);
}
d = compute(d);
- }
- }
+ )
if (verbose)
fprintf(stderr, "(%12.6f) [rank:%d] The result of the computation is: %f\n", MPI_Wtime(), rank, d);