1 /* Copyright (c) 2007, 2009-2013. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
9 #include "xbt/sysdep.h"
12 #include "surf/surf.h"
13 #include "simgrid/sg_config.h"
19 #include <sys/types.h>
22 #include <math.h> // sqrt
27 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_bench, smpi,
28 "Logging specific to SMPI (benchmarking)");
30 /* Shared allocations are handled through shared memory segments.
31 * Associated data and metadata are used as follows:
34 * `allocs' dict ---- -.
35 * ---------- shared_data_t shared_metadata_t / | | |
36 * .->| <name> | ---> -------------------- <--. ----------------- | | | |
37 * | ---------- | fd of <name> | | | size of mmap | --| | | |
38 * | | count (2) | |-- | data | \ | | |
39 * `----------------- | <name> | | ----------------- ---- |
40 * -------------------- | ^ |
42 * | | `allocs_metadata' dict |
43 * | | ---------------------- |
44 * | `-- | <addr of mmap #1> |<-'
45 * | .-- | <addr of mmap #2> |<-.
46 * | | ---------------------- |
52 * | shared_metadata_t / | |
53 * | ----------------- | | |
54 * | | size of mmap | --| | |
56 * ----------------- | | |
61 #define PTR_STRLEN (2 + 2 * sizeof(void*) + 1)
63 xbt_dict_t allocs = NULL; /* Allocated on first use */
64 xbt_dict_t allocs_metadata = NULL; /* Allocated on first use */
65 xbt_dict_t samples = NULL; /* Allocated on first use */
66 xbt_dict_t calls = NULL; /* Allocated on first use */
67 __thread int smpi_current_rank = 0; /* Updated after each MPI call */
80 static size_t shm_size(int fd) {
83 if(fstat(fd, &st) < 0) {
84 xbt_die("Could not stat fd %d: %s", fd, strerror(errno));
86 return (size_t)st.st_size;
90 static void* shm_map(int fd, size_t size, shared_data_t* data) {
93 shared_metadata_t* meta;
95 if(size > shm_size(fd)) {
96 if(ftruncate(fd, (off_t)size) < 0) {
97 xbt_die("Could not truncate fd %d to %zu: %s", fd, size, strerror(errno));
101 mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
102 if(mem == MAP_FAILED) {
103 xbt_die("Could not map fd %d: %s", fd, strerror(errno));
105 if(!allocs_metadata) {
106 allocs_metadata = xbt_dict_new_homogeneous(xbt_free);
108 snprintf(loc, PTR_STRLEN, "%p", mem);
109 meta = xbt_new(shared_metadata_t, 1);
112 xbt_dict_set(allocs_metadata, loc, meta, NULL);
113 XBT_DEBUG("MMAP %zu to %p", size, mem);
118 void smpi_bench_destroy(void)
120 xbt_dict_free(&allocs);
121 xbt_dict_free(&allocs_metadata);
122 xbt_dict_free(&samples);
123 xbt_dict_free(&calls);
126 void smpi_execute_flops(double flops) {
129 host = SIMIX_host_self();
130 int rank = smpi_process_index();
132 TRACE_smpi_computing_in(rank);
135 XBT_DEBUG("Handle real computation time: %f flops", flops);
136 action = simcall_host_execute("computation", host, flops, 1);
139 TRACE_smpi_computing_out(rank,flops);
144 simcall_set_category (action, TRACE_internal_smpi_get_category());
146 simcall_host_execution_wait(action);
149 static void smpi_execute(double duration)
151 /* FIXME: a global variable would be less expensive to consult than a call to xbt_cfg_get_double() right on the critical path */
152 if (duration >= sg_cfg_get_double("smpi/cpu_threshold")) {
153 XBT_DEBUG("Sleep for %f to handle real computation time", duration);
154 smpi_execute_flops(duration *
155 sg_cfg_get_double("smpi/running_power"));
157 XBT_DEBUG("Real computation took %f while option smpi/cpu_threshold is set to %f => ignore it",
158 duration, sg_cfg_get_double("smpi/cpu_threshold"));
162 void smpi_bench_begin(void)
164 xbt_os_threadtimer_start(smpi_process_timer());
165 smpi_current_rank = smpi_process_index();
168 void smpi_bench_end(void)
170 xbt_os_timer_t timer = smpi_process_timer();
172 xbt_os_threadtimer_stop(timer);
173 smpi_execute(xbt_os_timer_elapsed(timer));
176 unsigned int smpi_sleep(unsigned int secs)
182 double flops = (double) secs*simcall_host_get_speed(SIMIX_host_self());
183 XBT_DEBUG("Sleep for: %f flops", flops);
184 action = simcall_host_execute("computation", SIMIX_host_self(), flops, 1);
186 simcall_set_category (action, TRACE_internal_smpi_get_category());
188 simcall_host_execution_wait(action);
194 int smpi_gettimeofday(struct timeval *tv)
198 now = SIMIX_get_clock();
200 tv->tv_sec = (time_t)now;
202 tv->tv_usec = (useconds_t)((now - tv->tv_sec) * 1e6);
204 tv->tv_usec = (suseconds_t)((now - tv->tv_sec) * 1e6);
211 extern double sg_maxmin_precision;
212 unsigned long long smpi_rastro_resolution (void)
215 double resolution = (1/sg_maxmin_precision);
217 return (unsigned long long)resolution;
220 unsigned long long smpi_rastro_timestamp (void)
223 double now = SIMIX_get_clock();
225 unsigned long long sec = (unsigned long long)now;
226 unsigned long long pre = (now - sec) * smpi_rastro_resolution();
228 return (unsigned long long)sec * smpi_rastro_resolution() + pre;
231 /* ****************************** Functions related to the SMPI_SAMPLE_ macros ************************************/
233 int iters; /* amount of requested iterations */
234 int count; /* amount of iterations done so far */
235 double threshold; /* maximal stderr requested (if positive) */
236 double relstderr; /* observed stderr so far */
237 double mean; /* mean of benched times, to be used if the block is disabled */
238 double sum; /* sum of benched times (to compute the mean and stderr) */
239 double sum_pow2; /* sum of the square of the benched times (to compute the stderr) */
240 int benching; /* 1: we are benchmarking; 0: we have enough data, no bench anymore */
243 int smpi_sample_is_running = 0;
245 static char *sample_location(int global, const char *file, int line) {
247 return bprintf("%s:%d", file, line);
249 return bprintf("%s:%d:%d", file, line, smpi_process_index());
252 static int sample_enough_benchs(local_data_t *data) {
253 int res = data->count >= data->iters;
254 if (data->threshold>0.0) {
256 res = 0; // not enough data
257 if (data->relstderr > data->threshold)
258 res = 0; // stderr too high yet
260 XBT_DEBUG("%s (count:%d iter:%d stderr:%f thres:%f mean:%fs)",
261 (res?"enough benchs":"need more data"),
262 data->count, data->iters, data->relstderr, data->threshold, data->mean);
266 void smpi_sample_1(int global, const char *file, int line, int iters, double threshold)
268 char *loc = sample_location(global, file, line);
271 smpi_bench_end(); /* Take time from previous, unrelated computation into account */
272 smpi_sample_is_running++;
275 samples = xbt_dict_new_homogeneous(free);
277 data = xbt_dict_get_or_null(samples, loc);
279 xbt_assert(threshold>0 || iters>0,
280 "You should provide either a positive amount of iterations to bench, or a positive maximal stderr (or both)");
281 data = (local_data_t *) xbt_new(local_data_t, 1);
284 data->sum_pow2 = 0.0;
286 data->threshold = threshold;
287 data->benching = 1; // If we have no data, we need at least one
289 xbt_dict_set(samples, loc, data, NULL);
290 XBT_DEBUG("XXXXX First time ever on benched nest %s.",loc);
292 if (data->iters != iters || data->threshold != threshold) {
293 XBT_ERROR("Asked to bench block %s with different settings %d, %f is not %d, %f. How did you manage to give two numbers at the same line??",
294 loc, data->iters, data->threshold, iters,threshold);
298 // if we already have some data, check whether sample_2 should get one more bench or whether it should emulate the computation instead
299 data->benching = !sample_enough_benchs(data);
300 XBT_DEBUG("XXXX Re-entering the benched nest %s. %s",loc, (data->benching?"more benching needed":"we have enough data, skip computes"));
305 int smpi_sample_2(int global, const char *file, int line)
307 char *loc = sample_location(global, file, line);
310 xbt_assert(samples, "Y U NO use SMPI_SAMPLE_* macros? Stop messing directly with smpi_sample_* functions!");
311 data = xbt_dict_get(samples, loc);
312 XBT_DEBUG("sample2 %s",loc);
315 if (data->benching==1) {
316 // we need to run a new bench
317 XBT_DEBUG("benchmarking: count:%d iter:%d stderr:%f thres:%f; mean:%f",
318 data->count, data->iters, data->relstderr, data->threshold, data->mean);
322 // Enough data, no more bench (either we got enough data from previous visits to this benched nest, or we just ran one bench and need to bail out now that our job is done).
323 // Just sleep instead
324 XBT_DEBUG("No benchmark (either no need, or just ran one): count >= iter (%d >= %d) or stderr<thres (%f<=%f). apply the %fs delay instead",
325 data->count, data->iters, data->relstderr, data->threshold, data->mean);
326 smpi_execute(data->mean);
328 smpi_sample_is_running--;
329 smpi_bench_begin(); // prepare to capture future, unrelated computations
335 void smpi_sample_3(int global, const char *file, int line)
337 char *loc = sample_location(global, file, line);
340 xbt_assert(samples, "Y U NO use SMPI_SAMPLE_* macros? Stop messing directly with smpi_sample_* functions!");
341 data = xbt_dict_get(samples, loc);
342 XBT_DEBUG("sample3 %s",loc);
345 if (data->benching==0) {
349 // ok, benchmarking this loop is over
350 xbt_os_threadtimer_stop(smpi_process_timer());
355 sample = xbt_os_timer_elapsed(smpi_process_timer());
357 data->sum_pow2 += sample * sample;
358 n = (double)data->count;
359 data->mean = data->sum / n;
360 data->relstderr = sqrt((data->sum_pow2 / n - data->mean * data->mean) / n) / data->mean;
361 if (!sample_enough_benchs(data)) {
362 data->mean = sample; // Still in benching process; We want sample_2 to simulate the exact time of this loop occurrence before leaving, not the mean over the history
364 XBT_DEBUG("Average mean after %d steps is %f, relative standard error is %f (sample was %f)", data->count,
365 data->mean, data->relstderr, sample);
367 // That's enough for now, prevent sample_2 to run the same code over and over
372 static void smpi_shared_alloc_free(void *p)
374 shared_data_t *data = p;
379 static char *smpi_shared_alloc_hash(char *loc)
389 loc = xbt_realloc(loc, 30);
391 for (i = 0; i < 40; i += 6) { /* base64 encode */
392 memcpy(s, hash + i, 6);
393 val = strtoul(s, NULL, 16);
394 for (j = 0; j < 4; j++) {
395 unsigned char x = (val >> (18 - 3 * j)) & 0x3f;
396 loc[1 + 4 * i / 6 + j] =
397 "ABCDEFGHIJKLMNOPQRSTUVZXYZabcdefghijklmnopqrstuvzxyz0123456789-_"[x];
404 void *smpi_shared_malloc(size_t size, const char *file, int line)
407 if (sg_cfg_get_boolean("smpi/use_shared_malloc")){
408 char *loc = bprintf("%zu_%s_%d", (size_t)getpid(), file, line);
411 loc = smpi_shared_alloc_hash(loc); /* hash loc, in order to have something
414 allocs = xbt_dict_new_homogeneous(smpi_shared_alloc_free);
416 data = xbt_dict_get_or_null(allocs, loc);
418 fd = shm_open(loc, O_RDWR | O_CREAT | O_EXCL,
419 S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
423 xbt_die("Please cleanup /dev/shm/%s", loc);
425 xbt_die("An unhandled error occured while opening %s. shm_open: %s", loc, strerror(errno));
428 data = xbt_new(shared_data_t, 1);
432 mem = shm_map(fd, size, data);
433 if (shm_unlink(loc) < 0) {
434 XBT_WARN("Could not early unlink %s. shm_unlink: %s", loc, strerror(errno));
436 xbt_dict_set(allocs, loc, data, NULL);
437 XBT_DEBUG("Mapping %s at %p through %d", loc, mem, fd);
440 mem = shm_map(data->fd, size, data);
443 XBT_DEBUG("Shared malloc %zu in %p (metadata at %p)", size, mem, data);
445 mem = xbt_malloc(size);
446 XBT_DEBUG("Classic malloc %zu in %p", size, mem);
451 void smpi_shared_free(void *ptr)
453 char loc[PTR_STRLEN];
454 shared_metadata_t* meta;
456 if (sg_cfg_get_boolean("smpi/use_shared_malloc")){
459 XBT_WARN("Cannot free: nothing was allocated");
462 if(!allocs_metadata) {
463 XBT_WARN("Cannot free: no metadata was allocated");
465 snprintf(loc, PTR_STRLEN, "%p", ptr);
466 meta = (shared_metadata_t*)xbt_dict_get_or_null(allocs_metadata, loc);
468 XBT_WARN("Cannot free: %p was not shared-allocated by SMPI", ptr);
473 XBT_WARN("Cannot free: something is broken in the metadata link");
476 if(munmap(ptr, meta->size) < 0) {
477 XBT_WARN("Unmapping of fd %d failed: %s", data->fd, strerror(errno));
480 XBT_DEBUG("Shared free - no removal - of %p, count = %d", ptr, data->count);
481 if (data->count <= 0) {
483 xbt_dict_remove(allocs, data->loc);
484 XBT_DEBUG("Shared free - with removal - of %p", ptr);
487 XBT_DEBUG("Classic free of %p", ptr);
493 int smpi_shared_known_call(const char* func, const char* input) {
494 char* loc = bprintf("%s:%s", func, input);
499 calls = xbt_dict_new_homogeneous(NULL);
502 xbt_dict_get(calls, loc); /* Succeed or throw */
506 if(ex.category == not_found_error) {
517 void* smpi_shared_get_call(const char* func, const char* input) {
518 char* loc = bprintf("%s:%s", func, input);
522 calls = xbt_dict_new_homogeneous(NULL);
524 data = xbt_dict_get(calls, loc);
529 void* smpi_shared_set_call(const char* func, const char* input, void* data) {
530 char* loc = bprintf("%s:%s", func, input);
533 calls = xbt_dict_new_homogeneous(NULL);
535 xbt_dict_set(calls, loc, data, NULL);