1 /* Copyright (c) 2007, 2009-2013. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
9 #include "xbt/sysdep.h"
12 #include "surf/surf.h"
13 #include "simgrid/sg_config.h"
19 #include <sys/types.h>
22 #include <math.h> // sqrt
27 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_bench, smpi,
28 "Logging specific to SMPI (benchmarking)");
30 /* Shared allocations are handled through shared memory segments.
31 * Associated data and metadata are used as follows:
34 * `allocs' dict ---- -.
35 * ---------- shared_data_t shared_metadata_t / | | |
36 * .->| <name> | ---> -------------------- <--. ----------------- | | | |
37 * | ---------- | fd of <name> | | | size of mmap | --| | | |
38 * | | count (2) | |-- | data | \ | | |
39 * `----------------- | <name> | | ----------------- ---- |
40 * -------------------- | ^ |
42 * | | `allocs_metadata' dict |
43 * | | ---------------------- |
44 * | `-- | <addr of mmap #1> |<-'
45 * | .-- | <addr of mmap #2> |<-.
46 * | | ---------------------- |
52 * | shared_metadata_t / | |
53 * | ----------------- | | |
54 * | | size of mmap | --| | |
56 * ----------------- | | |
61 #define PTR_STRLEN (2 + 2 * sizeof(void*) + 1)
63 xbt_dict_t allocs = NULL; /* Allocated on first use */
64 xbt_dict_t allocs_metadata = NULL; /* Allocated on first use */
65 xbt_dict_t samples = NULL; /* Allocated on first use */
66 xbt_dict_t calls = NULL; /* Allocated on first use */
67 __thread int smpi_current_rank = 0; /* Updated after each MPI call */
80 static size_t shm_size(int fd) {
83 if(fstat(fd, &st) < 0) {
84 xbt_die("Could not stat fd %d: %s", fd, strerror(errno));
86 return (size_t)st.st_size;
90 static void* shm_map(int fd, size_t size, shared_data_t* data) {
93 shared_metadata_t* meta;
95 if(size > shm_size(fd)) {
96 if(ftruncate(fd, (off_t)size) < 0) {
97 xbt_die("Could not truncate fd %d to %zu: %s", fd, size, strerror(errno));
101 mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
102 if(mem == MAP_FAILED) {
103 xbt_die("Could not map fd %d: %s", fd, strerror(errno));
105 if(!allocs_metadata) {
106 allocs_metadata = xbt_dict_new_homogeneous(xbt_free);
108 snprintf(loc, PTR_STRLEN, "%p", mem);
109 meta = xbt_new(shared_metadata_t, 1);
112 xbt_dict_set(allocs_metadata, loc, meta, NULL);
113 XBT_DEBUG("MMAP %zu to %p", size, mem);
118 void smpi_bench_destroy(void)
120 xbt_dict_free(&allocs);
121 xbt_dict_free(&allocs_metadata);
122 xbt_dict_free(&samples);
123 xbt_dict_free(&calls);
126 void smpi_execute_flops(double flops) {
129 host = SIMIX_host_self();
130 XBT_DEBUG("Handle real computation time: %f flops", flops);
131 action = simcall_host_execute("computation", host, flops, 1);
133 simcall_set_category (action, TRACE_internal_smpi_get_category());
135 simcall_host_execution_wait(action);
138 static void smpi_execute(double duration)
140 /* FIXME: a global variable would be less expensive to consult than a call to xbt_cfg_get_double() right on the critical path */
141 if (duration >= sg_cfg_get_double("smpi/cpu_threshold")) {
142 XBT_DEBUG("Sleep for %f to handle real computation time", duration);
143 double flops = duration *
144 sg_cfg_get_double("smpi/running_power");
146 int rank = smpi_process_index();
147 instr_extra_data extra = xbt_new0(s_instr_extra_data_t,1);
148 extra->type=TRACING_COMPUTING;
149 extra->comp_size=flops;
150 TRACE_smpi_computing_in(rank, extra);
152 smpi_execute_flops(flops);
155 TRACE_smpi_computing_out(rank);
159 XBT_DEBUG("Real computation took %f while option smpi/cpu_threshold is set to %f => ignore it",
160 duration, sg_cfg_get_double("smpi/cpu_threshold"));
164 void smpi_bench_begin(void)
166 xbt_os_threadtimer_start(smpi_process_timer());
167 smpi_current_rank = smpi_process_index();
170 void smpi_bench_end(void)
172 xbt_os_timer_t timer = smpi_process_timer();
174 xbt_os_threadtimer_stop(timer);
175 smpi_execute(xbt_os_timer_elapsed(timer));
178 unsigned int smpi_sleep(unsigned int secs)
184 double flops = (double) secs*simcall_host_get_speed(SIMIX_host_self());
185 XBT_DEBUG("Sleep for: %f flops", flops);
186 action = simcall_host_execute("computation", SIMIX_host_self(), flops, 1);
188 simcall_set_category (action, TRACE_internal_smpi_get_category());
190 simcall_host_execution_wait(action);
196 int smpi_gettimeofday(struct timeval *tv)
200 now = SIMIX_get_clock();
202 tv->tv_sec = (time_t)now;
204 tv->tv_usec = (useconds_t)((now - tv->tv_sec) * 1e6);
206 tv->tv_usec = (suseconds_t)((now - tv->tv_sec) * 1e6);
213 extern double sg_maxmin_precision;
214 unsigned long long smpi_rastro_resolution (void)
217 double resolution = (1/sg_maxmin_precision);
219 return (unsigned long long)resolution;
222 unsigned long long smpi_rastro_timestamp (void)
225 double now = SIMIX_get_clock();
227 unsigned long long sec = (unsigned long long)now;
228 unsigned long long pre = (now - sec) * smpi_rastro_resolution();
230 return (unsigned long long)sec * smpi_rastro_resolution() + pre;
233 /* ****************************** Functions related to the SMPI_SAMPLE_ macros ************************************/
235 int iters; /* amount of requested iterations */
236 int count; /* amount of iterations done so far */
237 double threshold; /* maximal stderr requested (if positive) */
238 double relstderr; /* observed stderr so far */
239 double mean; /* mean of benched times, to be used if the block is disabled */
240 double sum; /* sum of benched times (to compute the mean and stderr) */
241 double sum_pow2; /* sum of the square of the benched times (to compute the stderr) */
242 int benching; /* 1: we are benchmarking; 0: we have enough data, no bench anymore */
245 int smpi_sample_is_running = 0;
247 static char *sample_location(int global, const char *file, int line) {
249 return bprintf("%s:%d", file, line);
251 return bprintf("%s:%d:%d", file, line, smpi_process_index());
254 static int sample_enough_benchs(local_data_t *data) {
255 int res = data->count >= data->iters;
256 if (data->threshold>0.0) {
258 res = 0; // not enough data
259 if (data->relstderr > data->threshold)
260 res = 0; // stderr too high yet
262 XBT_DEBUG("%s (count:%d iter:%d stderr:%f thres:%f mean:%fs)",
263 (res?"enough benchs":"need more data"),
264 data->count, data->iters, data->relstderr, data->threshold, data->mean);
268 void smpi_sample_1(int global, const char *file, int line, int iters, double threshold)
270 char *loc = sample_location(global, file, line);
273 smpi_bench_end(); /* Take time from previous, unrelated computation into account */
274 smpi_sample_is_running++;
277 samples = xbt_dict_new_homogeneous(free);
279 data = xbt_dict_get_or_null(samples, loc);
281 xbt_assert(threshold>0 || iters>0,
282 "You should provide either a positive amount of iterations to bench, or a positive maximal stderr (or both)");
283 data = (local_data_t *) xbt_new(local_data_t, 1);
286 data->sum_pow2 = 0.0;
288 data->threshold = threshold;
289 data->benching = 1; // If we have no data, we need at least one
291 xbt_dict_set(samples, loc, data, NULL);
292 XBT_DEBUG("XXXXX First time ever on benched nest %s.",loc);
294 if (data->iters != iters || data->threshold != threshold) {
295 XBT_ERROR("Asked to bench block %s with different settings %d, %f is not %d, %f. How did you manage to give two numbers at the same line??",
296 loc, data->iters, data->threshold, iters,threshold);
300 // if we already have some data, check whether sample_2 should get one more bench or whether it should emulate the computation instead
301 data->benching = !sample_enough_benchs(data);
302 XBT_DEBUG("XXXX Re-entering the benched nest %s. %s",loc, (data->benching?"more benching needed":"we have enough data, skip computes"));
307 int smpi_sample_2(int global, const char *file, int line)
309 char *loc = sample_location(global, file, line);
312 xbt_assert(samples, "Y U NO use SMPI_SAMPLE_* macros? Stop messing directly with smpi_sample_* functions!");
313 data = xbt_dict_get(samples, loc);
314 XBT_DEBUG("sample2 %s",loc);
317 if (data->benching==1) {
318 // we need to run a new bench
319 XBT_DEBUG("benchmarking: count:%d iter:%d stderr:%f thres:%f; mean:%f",
320 data->count, data->iters, data->relstderr, data->threshold, data->mean);
324 // Enough data, no more bench (either we got enough data from previous visits to this benched nest, or we just ran one bench and need to bail out now that our job is done).
325 // Just sleep instead
326 XBT_DEBUG("No benchmark (either no need, or just ran one): count >= iter (%d >= %d) or stderr<thres (%f<=%f). apply the %fs delay instead",
327 data->count, data->iters, data->relstderr, data->threshold, data->mean);
328 smpi_execute(data->mean);
330 smpi_sample_is_running--;
331 smpi_bench_begin(); // prepare to capture future, unrelated computations
337 void smpi_sample_3(int global, const char *file, int line)
339 char *loc = sample_location(global, file, line);
342 xbt_assert(samples, "Y U NO use SMPI_SAMPLE_* macros? Stop messing directly with smpi_sample_* functions!");
343 data = xbt_dict_get(samples, loc);
344 XBT_DEBUG("sample3 %s",loc);
347 if (data->benching==0) {
351 // ok, benchmarking this loop is over
352 xbt_os_threadtimer_stop(smpi_process_timer());
357 sample = xbt_os_timer_elapsed(smpi_process_timer());
359 data->sum_pow2 += sample * sample;
360 n = (double)data->count;
361 data->mean = data->sum / n;
362 data->relstderr = sqrt((data->sum_pow2 / n - data->mean * data->mean) / n) / data->mean;
363 if (!sample_enough_benchs(data)) {
364 data->mean = sample; // Still in benching process; We want sample_2 to simulate the exact time of this loop occurrence before leaving, not the mean over the history
366 XBT_DEBUG("Average mean after %d steps is %f, relative standard error is %f (sample was %f)", data->count,
367 data->mean, data->relstderr, sample);
369 // That's enough for now, prevent sample_2 to run the same code over and over
374 static void smpi_shared_alloc_free(void *p)
376 shared_data_t *data = p;
381 static char *smpi_shared_alloc_hash(char *loc)
391 loc = xbt_realloc(loc, 30);
393 for (i = 0; i < 40; i += 6) { /* base64 encode */
394 memcpy(s, hash + i, 6);
395 val = strtoul(s, NULL, 16);
396 for (j = 0; j < 4; j++) {
397 unsigned char x = (val >> (18 - 3 * j)) & 0x3f;
398 loc[1 + 4 * i / 6 + j] =
399 "ABCDEFGHIJKLMNOPQRSTUVZXYZabcdefghijklmnopqrstuvzxyz0123456789-_"[x];
406 void *smpi_shared_malloc(size_t size, const char *file, int line)
409 if (sg_cfg_get_boolean("smpi/use_shared_malloc")){
410 char *loc = bprintf("%zu_%s_%d", (size_t)getpid(), file, line);
413 loc = smpi_shared_alloc_hash(loc); /* hash loc, in order to have something
416 allocs = xbt_dict_new_homogeneous(smpi_shared_alloc_free);
418 data = xbt_dict_get_or_null(allocs, loc);
420 fd = shm_open(loc, O_RDWR | O_CREAT | O_EXCL,
421 S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
425 xbt_die("Please cleanup /dev/shm/%s", loc);
427 xbt_die("An unhandled error occured while opening %s. shm_open: %s", loc, strerror(errno));
430 data = xbt_new(shared_data_t, 1);
434 mem = shm_map(fd, size, data);
435 if (shm_unlink(loc) < 0) {
436 XBT_WARN("Could not early unlink %s. shm_unlink: %s", loc, strerror(errno));
438 xbt_dict_set(allocs, loc, data, NULL);
439 XBT_DEBUG("Mapping %s at %p through %d", loc, mem, fd);
442 mem = shm_map(data->fd, size, data);
445 XBT_DEBUG("Shared malloc %zu in %p (metadata at %p)", size, mem, data);
447 mem = xbt_malloc(size);
448 XBT_DEBUG("Classic malloc %zu in %p", size, mem);
453 void smpi_shared_free(void *ptr)
455 char loc[PTR_STRLEN];
456 shared_metadata_t* meta;
458 if (sg_cfg_get_boolean("smpi/use_shared_malloc")){
461 XBT_WARN("Cannot free: nothing was allocated");
464 if(!allocs_metadata) {
465 XBT_WARN("Cannot free: no metadata was allocated");
467 snprintf(loc, PTR_STRLEN, "%p", ptr);
468 meta = (shared_metadata_t*)xbt_dict_get_or_null(allocs_metadata, loc);
470 XBT_WARN("Cannot free: %p was not shared-allocated by SMPI", ptr);
475 XBT_WARN("Cannot free: something is broken in the metadata link");
478 if(munmap(ptr, meta->size) < 0) {
479 XBT_WARN("Unmapping of fd %d failed: %s", data->fd, strerror(errno));
482 XBT_DEBUG("Shared free - no removal - of %p, count = %d", ptr, data->count);
483 if (data->count <= 0) {
485 xbt_dict_remove(allocs, data->loc);
486 XBT_DEBUG("Shared free - with removal - of %p", ptr);
489 XBT_DEBUG("Classic free of %p", ptr);
495 int smpi_shared_known_call(const char* func, const char* input) {
496 char* loc = bprintf("%s:%s", func, input);
501 calls = xbt_dict_new_homogeneous(NULL);
504 xbt_dict_get(calls, loc); /* Succeed or throw */
508 if(ex.category == not_found_error) {
519 void* smpi_shared_get_call(const char* func, const char* input) {
520 char* loc = bprintf("%s:%s", func, input);
524 calls = xbt_dict_new_homogeneous(NULL);
526 data = xbt_dict_get(calls, loc);
531 void* smpi_shared_set_call(const char* func, const char* input, void* data) {
532 char* loc = bprintf("%s:%s", func, input);
535 calls = xbt_dict_new_homogeneous(NULL);
537 xbt_dict_set(calls, loc, data, NULL);