1 /* Copyright (c) 2007, 2009-2013. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
9 #include "xbt/sysdep.h"
12 #include "surf/surf.h"
13 #include "simgrid/sg_config.h"
19 #include <sys/types.h>
22 #include <math.h> // sqrt
27 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_bench, smpi,
28 "Logging specific to SMPI (benchmarking)");
30 /* Shared allocations are handled through shared memory segments.
31 * Associated data and metadata are used as follows:
34 * `allocs' dict ---- -.
35 * ---------- shared_data_t shared_metadata_t / | | |
36 * .->| <name> | ---> -------------------- <--. ----------------- | | | |
37 * | ---------- | fd of <name> | | | size of mmap | --| | | |
38 * | | count (2) | |-- | data | \ | | |
39 * `----------------- | <name> | | ----------------- ---- |
40 * -------------------- | ^ |
42 * | | `allocs_metadata' dict |
43 * | | ---------------------- |
44 * | `-- | <addr of mmap #1> |<-'
45 * | .-- | <addr of mmap #2> |<-.
46 * | | ---------------------- |
52 * | shared_metadata_t / | |
53 * | ----------------- | | |
54 * | | size of mmap | --| | |
56 * ----------------- | | |
61 #define PTR_STRLEN (2 + 2 * sizeof(void*) + 1)
63 xbt_dict_t allocs = NULL; /* Allocated on first use */
64 xbt_dict_t allocs_metadata = NULL; /* Allocated on first use */
65 xbt_dict_t samples = NULL; /* Allocated on first use */
66 xbt_dict_t calls = NULL; /* Allocated on first use */
67 __thread int smpi_current_rank = 0; /* Updated after each MPI call */
69 double smpi_cpu_threshold;
70 double smpi_running_power;
83 static size_t shm_size(int fd) {
86 if(fstat(fd, &st) < 0) {
87 xbt_die("Could not stat fd %d: %s", fd, strerror(errno));
89 return (size_t)st.st_size;
93 static void* shm_map(int fd, size_t size, shared_data_t* data) {
96 shared_metadata_t* meta;
98 if(size > shm_size(fd)) {
99 if(ftruncate(fd, (off_t)size) < 0) {
100 xbt_die("Could not truncate fd %d to %zu: %s", fd, size, strerror(errno));
104 mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
105 if(mem == MAP_FAILED) {
106 xbt_die("Could not map fd %d: %s", fd, strerror(errno));
108 if(!allocs_metadata) {
109 allocs_metadata = xbt_dict_new_homogeneous(xbt_free);
111 snprintf(loc, PTR_STRLEN, "%p", mem);
112 meta = xbt_new(shared_metadata_t, 1);
115 xbt_dict_set(allocs_metadata, loc, meta, NULL);
116 XBT_DEBUG("MMAP %zu to %p", size, mem);
121 void smpi_bench_destroy(void)
123 xbt_dict_free(&allocs);
124 xbt_dict_free(&allocs_metadata);
125 xbt_dict_free(&samples);
126 xbt_dict_free(&calls);
129 XBT_PUBLIC(void) smpi_execute_flops_(double *flops);
130 void smpi_execute_flops_(double *flops)
132 smpi_execute_flops(*flops);
135 XBT_PUBLIC(void) smpi_execute_(double *duration);
136 void smpi_execute_(double *duration)
138 smpi_execute(*duration);
141 void smpi_execute_flops(double flops) {
144 host = SIMIX_host_self();
145 XBT_DEBUG("Handle real computation time: %g flops", flops);
146 action = simcall_host_execute("computation", host, flops, 1);
148 simcall_set_category (action, TRACE_internal_smpi_get_category());
150 simcall_host_execution_wait(action);
153 void smpi_execute(double duration)
155 if (duration >= smpi_cpu_threshold) {
156 XBT_DEBUG("Sleep for %g to handle real computation time", duration);
157 double flops = duration * smpi_running_power;
159 int rank = smpi_process_index();
160 instr_extra_data extra = xbt_new0(s_instr_extra_data_t,1);
161 extra->type=TRACING_COMPUTING;
162 extra->comp_size=flops;
163 TRACE_smpi_computing_in(rank, extra);
165 smpi_execute_flops(flops);
168 TRACE_smpi_computing_out(rank);
172 XBT_DEBUG("Real computation took %g while option smpi/cpu_threshold is set to %g => ignore it",
173 duration, smpi_cpu_threshold);
177 void smpi_bench_begin(void)
179 xbt_os_threadtimer_start(smpi_process_timer());
180 smpi_current_rank = smpi_process_index();
183 void smpi_bench_end(void)
185 xbt_os_timer_t timer = smpi_process_timer();
186 xbt_os_threadtimer_stop(timer);
187 if (smpi_process_get_sampling()) {
188 XBT_CRITICAL("Cannot do recursive benchmarks.");
189 XBT_CRITICAL("Are you trying to make a call to MPI within a SMPI_SAMPLE_ block?");
190 xbt_backtrace_display_current();
191 xbt_die("Aborting.");
193 smpi_execute(xbt_os_timer_elapsed(timer));
196 unsigned int smpi_sleep(unsigned int secs)
202 double flops = (double) secs*simcall_host_get_speed(SIMIX_host_self());
203 XBT_DEBUG("Sleep for: %f flops", flops);
204 action = simcall_host_execute("computation", SIMIX_host_self(), flops, 1);
206 simcall_set_category (action, TRACE_internal_smpi_get_category());
208 simcall_host_execution_wait(action);
214 int smpi_gettimeofday(struct timeval *tv)
218 now = SIMIX_get_clock();
220 tv->tv_sec = (time_t)now;
222 tv->tv_usec = (useconds_t)((now - tv->tv_sec) * 1e6);
224 tv->tv_usec = (suseconds_t)((now - tv->tv_sec) * 1e6);
231 extern double sg_maxmin_precision;
232 unsigned long long smpi_rastro_resolution (void)
235 double resolution = (1/sg_maxmin_precision);
237 return (unsigned long long)resolution;
240 unsigned long long smpi_rastro_timestamp (void)
243 double now = SIMIX_get_clock();
245 unsigned long long sec = (unsigned long long)now;
246 unsigned long long pre = (now - sec) * smpi_rastro_resolution();
248 return (unsigned long long)sec * smpi_rastro_resolution() + pre;
251 /* ****************************** Functions related to the SMPI_SAMPLE_ macros ************************************/
253 double threshold; /* maximal stderr requested (if positive) */
254 double relstderr; /* observed stderr so far */
255 double mean; /* mean of benched times, to be used if the block is disabled */
256 double sum; /* sum of benched times (to compute the mean and stderr) */
257 double sum_pow2; /* sum of the square of the benched times (to compute the stderr) */
258 int iters; /* amount of requested iterations */
259 int count; /* amount of iterations done so far */
260 int benching; /* 1: we are benchmarking; 0: we have enough data, no bench anymore */
263 static char *sample_location(int global, const char *file, int line) {
265 return bprintf("%s:%d", file, line);
267 return bprintf("%s:%d:%d", file, line, smpi_process_index());
270 static int sample_enough_benchs(local_data_t *data) {
271 int res = data->count >= data->iters;
272 if (data->threshold>0.0) {
274 res = 0; // not enough data
275 if (data->relstderr > data->threshold)
276 res = 0; // stderr too high yet
278 XBT_DEBUG("%s (count:%d iter:%d stderr:%f thres:%f mean:%fs)",
279 (res?"enough benchs":"need more data"),
280 data->count, data->iters, data->relstderr, data->threshold, data->mean);
284 void smpi_sample_1(int global, const char *file, int line, int iters, double threshold)
286 char *loc = sample_location(global, file, line);
289 smpi_bench_end(); /* Take time from previous, unrelated computation into account */
290 smpi_process_set_sampling(1);
293 samples = xbt_dict_new_homogeneous(free);
295 data = xbt_dict_get_or_null(samples, loc);
297 xbt_assert(threshold>0 || iters>0,
298 "You should provide either a positive amount of iterations to bench, or a positive maximal stderr (or both)");
299 data = (local_data_t *) xbt_new(local_data_t, 1);
302 data->sum_pow2 = 0.0;
304 data->threshold = threshold;
305 data->benching = 1; // If we have no data, we need at least one
307 xbt_dict_set(samples, loc, data, NULL);
308 XBT_DEBUG("XXXXX First time ever on benched nest %s.",loc);
310 if (data->iters != iters || data->threshold != threshold) {
311 XBT_ERROR("Asked to bench block %s with different settings %d, %f is not %d, %f. How did you manage to give two numbers at the same line??",
312 loc, data->iters, data->threshold, iters,threshold);
316 // if we already have some data, check whether sample_2 should get one more bench or whether it should emulate the computation instead
317 data->benching = !sample_enough_benchs(data);
318 XBT_DEBUG("XXXX Re-entering the benched nest %s. %s",loc, (data->benching?"more benching needed":"we have enough data, skip computes"));
323 int smpi_sample_2(int global, const char *file, int line)
325 char *loc = sample_location(global, file, line);
329 xbt_assert(samples, "Y U NO use SMPI_SAMPLE_* macros? Stop messing directly with smpi_sample_* functions!");
330 data = xbt_dict_get(samples, loc);
331 XBT_DEBUG("sample2 %s",loc);
334 if (data->benching==1) {
335 // we need to run a new bench
336 XBT_DEBUG("benchmarking: count:%d iter:%d stderr:%f thres:%f; mean:%f",
337 data->count, data->iters, data->relstderr, data->threshold, data->mean);
340 // Enough data, no more bench (either we got enough data from previous visits to this benched nest, or we just ran one bench and need to bail out now that our job is done).
341 // Just sleep instead
342 XBT_DEBUG("No benchmark (either no need, or just ran one): count >= iter (%d >= %d) or stderr<thres (%f<=%f). apply the %fs delay instead",
343 data->count, data->iters, data->relstderr, data->threshold, data->mean);
344 smpi_execute(data->mean);
345 smpi_process_set_sampling(0);
346 res = 0; // prepare to capture future, unrelated computations
353 void smpi_sample_3(int global, const char *file, int line)
355 char *loc = sample_location(global, file, line);
358 xbt_assert(samples, "Y U NO use SMPI_SAMPLE_* macros? Stop messing directly with smpi_sample_* functions!");
359 data = xbt_dict_get(samples, loc);
360 XBT_DEBUG("sample3 %s",loc);
363 if (data->benching==0) {
367 // ok, benchmarking this loop is over
368 xbt_os_threadtimer_stop(smpi_process_timer());
373 sample = xbt_os_timer_elapsed(smpi_process_timer());
375 data->sum_pow2 += sample * sample;
376 n = (double)data->count;
377 data->mean = data->sum / n;
378 data->relstderr = sqrt((data->sum_pow2 / n - data->mean * data->mean) / n) / data->mean;
379 if (!sample_enough_benchs(data)) {
380 data->mean = sample; // Still in benching process; We want sample_2 to simulate the exact time of this loop occurrence before leaving, not the mean over the history
382 XBT_DEBUG("Average mean after %d steps is %f, relative standard error is %f (sample was %f)", data->count,
383 data->mean, data->relstderr, sample);
385 // That's enough for now, prevent sample_2 to run the same code over and over
390 static void smpi_shared_alloc_free(void *p)
392 shared_data_t *data = p;
397 static char *smpi_shared_alloc_hash(char *loc)
407 loc = xbt_realloc(loc, 30);
409 for (i = 0; i < 40; i += 6) { /* base64 encode */
410 memcpy(s, hash + i, 6);
411 val = strtoul(s, NULL, 16);
412 for (j = 0; j < 4; j++) {
413 unsigned char x = (val >> (18 - 3 * j)) & 0x3f;
414 loc[1 + 4 * i / 6 + j] =
415 "ABCDEFGHIJKLMNOPQRSTUVZXYZabcdefghijklmnopqrstuvzxyz0123456789-_"[x];
422 void *smpi_shared_malloc(size_t size, const char *file, int line)
425 if (sg_cfg_get_boolean("smpi/use_shared_malloc")){
426 char *loc = bprintf("%zu_%s_%d", (size_t)getpid(), file, line);
429 loc = smpi_shared_alloc_hash(loc); /* hash loc, in order to have something
432 allocs = xbt_dict_new_homogeneous(smpi_shared_alloc_free);
434 data = xbt_dict_get_or_null(allocs, loc);
436 fd = shm_open(loc, O_RDWR | O_CREAT | O_EXCL,
437 S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
441 xbt_die("Please cleanup /dev/shm/%s", loc);
443 xbt_die("An unhandled error occured while opening %s. shm_open: %s", loc, strerror(errno));
446 data = xbt_new(shared_data_t, 1);
450 mem = shm_map(fd, size, data);
451 if (shm_unlink(loc) < 0) {
452 XBT_WARN("Could not early unlink %s. shm_unlink: %s", loc, strerror(errno));
454 xbt_dict_set(allocs, loc, data, NULL);
455 XBT_DEBUG("Mapping %s at %p through %d", loc, mem, fd);
458 mem = shm_map(data->fd, size, data);
461 XBT_DEBUG("Shared malloc %zu in %p (metadata at %p)", size, mem, data);
463 mem = xbt_malloc(size);
464 XBT_DEBUG("Classic malloc %zu in %p", size, mem);
469 void smpi_shared_free(void *ptr)
471 char loc[PTR_STRLEN];
472 shared_metadata_t* meta;
474 if (sg_cfg_get_boolean("smpi/use_shared_malloc")){
477 XBT_WARN("Cannot free: nothing was allocated");
480 if(!allocs_metadata) {
481 XBT_WARN("Cannot free: no metadata was allocated");
483 snprintf(loc, PTR_STRLEN, "%p", ptr);
484 meta = (shared_metadata_t*)xbt_dict_get_or_null(allocs_metadata, loc);
486 XBT_WARN("Cannot free: %p was not shared-allocated by SMPI", ptr);
491 XBT_WARN("Cannot free: something is broken in the metadata link");
494 if(munmap(ptr, meta->size) < 0) {
495 XBT_WARN("Unmapping of fd %d failed: %s", data->fd, strerror(errno));
498 XBT_DEBUG("Shared free - no removal - of %p, count = %d", ptr, data->count);
499 if (data->count <= 0) {
501 xbt_dict_remove(allocs, data->loc);
502 XBT_DEBUG("Shared free - with removal - of %p", ptr);
505 XBT_DEBUG("Classic free of %p", ptr);
511 int smpi_shared_known_call(const char* func, const char* input)
513 char* loc = bprintf("%s:%s", func, input);
518 calls = xbt_dict_new_homogeneous(NULL);
521 xbt_dict_get(calls, loc); /* Succeed or throw */
528 if (ex.category != not_found_error)
535 void* smpi_shared_get_call(const char* func, const char* input) {
536 char* loc = bprintf("%s:%s", func, input);
540 calls = xbt_dict_new_homogeneous(NULL);
542 data = xbt_dict_get(calls, loc);
547 void* smpi_shared_set_call(const char* func, const char* input, void* data) {
548 char* loc = bprintf("%s:%s", func, input);
551 calls = xbt_dict_new_homogeneous(NULL);
553 xbt_dict_set(calls, loc, data, NULL);