1 /* Copyright (c) 2007, 2009-2013. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
9 #include "xbt/sysdep.h"
12 #include "surf/surf.h"
13 #include "simgrid/sg_config.h"
19 #include <sys/types.h>
22 #include <math.h> // sqrt
27 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_bench, smpi,
28 "Logging specific to SMPI (benchmarking)");
30 /* Shared allocations are handled through shared memory segments.
31 * Associated data and metadata are used as follows:
34 * `allocs' dict ---- -.
35 * ---------- shared_data_t shared_metadata_t / | | |
36 * .->| <name> | ---> -------------------- <--. ----------------- | | | |
37 * | ---------- | fd of <name> | | | size of mmap | --| | | |
38 * | | count (2) | |-- | data | \ | | |
39 * `----------------- | <name> | | ----------------- ---- |
40 * -------------------- | ^ |
42 * | | `allocs_metadata' dict |
43 * | | ---------------------- |
44 * | `-- | <addr of mmap #1> |<-'
45 * | .-- | <addr of mmap #2> |<-.
46 * | | ---------------------- |
52 * | shared_metadata_t / | |
53 * | ----------------- | | |
54 * | | size of mmap | --| | |
56 * ----------------- | | |
61 #define PTR_STRLEN (2 + 2 * sizeof(void*) + 1)
63 xbt_dict_t allocs = NULL; /* Allocated on first use */
64 xbt_dict_t allocs_metadata = NULL; /* Allocated on first use */
65 xbt_dict_t samples = NULL; /* Allocated on first use */
66 xbt_dict_t calls = NULL; /* Allocated on first use */
67 __thread int smpi_current_rank = 0; /* Updated after each MPI call */
80 static size_t shm_size(int fd) {
83 if(fstat(fd, &st) < 0) {
84 xbt_die("Could not stat fd %d: %s", fd, strerror(errno));
86 return (size_t)st.st_size;
90 static void* shm_map(int fd, size_t size, shared_data_t* data) {
93 shared_metadata_t* meta;
95 if(size > shm_size(fd)) {
96 if(ftruncate(fd, (off_t)size) < 0) {
97 xbt_die("Could not truncate fd %d to %zu: %s", fd, size, strerror(errno));
101 mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
102 if(mem == MAP_FAILED) {
103 xbt_die("Could not map fd %d: %s", fd, strerror(errno));
105 if(!allocs_metadata) {
106 allocs_metadata = xbt_dict_new_homogeneous(xbt_free);
108 snprintf(loc, PTR_STRLEN, "%p", mem);
109 meta = xbt_new(shared_metadata_t, 1);
112 xbt_dict_set(allocs_metadata, loc, meta, NULL);
113 XBT_DEBUG("MMAP %zu to %p", size, mem);
118 void smpi_bench_destroy(void)
120 xbt_dict_free(&allocs);
121 xbt_dict_free(&allocs_metadata);
122 xbt_dict_free(&samples);
123 xbt_dict_free(&calls);
126 void smpi_execute_flops(double flops) {
129 host = SIMIX_host_self();
131 XBT_DEBUG("Handle real computation time: %f flops", flops);
132 action = simcall_host_execute("computation", host, flops, 1);
134 simcall_set_category (action, TRACE_internal_smpi_get_category());
136 simcall_host_execution_wait(action);
139 static void smpi_execute(double duration)
141 /* FIXME: a global variable would be less expensive to consult than a call to xbt_cfg_get_double() right on the critical path */
142 if (duration >= sg_cfg_get_double("smpi/cpu_threshold")) {
143 XBT_DEBUG("Sleep for %f to handle real computation time", duration);
144 smpi_execute_flops(duration *
145 sg_cfg_get_double("smpi/running_power"));
147 XBT_DEBUG("Real computation took %f while option smpi/cpu_threshold is set to %f => ignore it",
148 duration, sg_cfg_get_double("smpi/cpu_threshold"));
152 void smpi_bench_begin(void)
154 xbt_os_threadtimer_start(smpi_process_timer());
155 smpi_current_rank = smpi_process_index();
158 void smpi_bench_end(void)
160 xbt_os_timer_t timer = smpi_process_timer();
162 xbt_os_threadtimer_stop(timer);
163 smpi_execute(xbt_os_timer_elapsed(timer));
166 unsigned int smpi_sleep(unsigned int secs)
169 smpi_execute_flops((double) secs*simcall_host_get_speed(SIMIX_host_self()));
174 int smpi_gettimeofday(struct timeval *tv)
178 now = SIMIX_get_clock();
180 tv->tv_sec = (time_t)now;
182 tv->tv_usec = (useconds_t)((now - tv->tv_sec) * 1e6);
184 tv->tv_usec = (suseconds_t)((now - tv->tv_sec) * 1e6);
191 extern double sg_maxmin_precision;
192 unsigned long long smpi_rastro_resolution (void)
195 double resolution = (1/sg_maxmin_precision);
197 return (unsigned long long)resolution;
200 unsigned long long smpi_rastro_timestamp (void)
203 double now = SIMIX_get_clock();
205 unsigned long long sec = (unsigned long long)now;
206 unsigned long long pre = (now - sec) * smpi_rastro_resolution();
208 return (unsigned long long)sec * smpi_rastro_resolution() + pre;
211 /* ****************************** Functions related to the SMPI_SAMPLE_ macros ************************************/
213 int iters; /* amount of requested iterations */
214 int count; /* amount of iterations done so far */
215 double threshold; /* maximal stderr requested (if positive) */
216 double relstderr; /* observed stderr so far */
217 double mean; /* mean of benched times, to be used if the block is disabled */
218 double sum; /* sum of benched times (to compute the mean and stderr) */
219 double sum_pow2; /* sum of the square of the benched times (to compute the stderr) */
220 int benching; /* 1: we are benchmarking; 0: we have enough data, no bench anymore */
223 int smpi_sample_is_running = 0;
225 static char *sample_location(int global, const char *file, int line) {
227 return bprintf("%s:%d", file, line);
229 return bprintf("%s:%d:%d", file, line, smpi_process_index());
232 static int sample_enough_benchs(local_data_t *data) {
233 int res = data->count >= data->iters;
234 if (data->threshold>0.0) {
236 res = 0; // not enough data
237 if (data->relstderr > data->threshold)
238 res = 0; // stderr too high yet
240 XBT_DEBUG("%s (count:%d iter:%d stderr:%f thres:%f mean:%fs)",
241 (res?"enough benchs":"need more data"),
242 data->count, data->iters, data->relstderr, data->threshold, data->mean);
246 void smpi_sample_1(int global, const char *file, int line, int iters, double threshold)
248 char *loc = sample_location(global, file, line);
251 smpi_bench_end(); /* Take time from previous, unrelated computation into account */
252 smpi_sample_is_running++;
255 samples = xbt_dict_new_homogeneous(free);
257 data = xbt_dict_get_or_null(samples, loc);
259 xbt_assert(threshold>0 || iters>0,
260 "You should provide either a positive amount of iterations to bench, or a positive maximal stderr (or both)");
261 data = (local_data_t *) xbt_new(local_data_t, 1);
264 data->sum_pow2 = 0.0;
266 data->threshold = threshold;
267 data->benching = 1; // If we have no data, we need at least one
269 xbt_dict_set(samples, loc, data, NULL);
270 XBT_DEBUG("XXXXX First time ever on benched nest %s.",loc);
272 if (data->iters != iters || data->threshold != threshold) {
273 XBT_ERROR("Asked to bench block %s with different settings %d, %f is not %d, %f. How did you manage to give two numbers at the same line??",
274 loc, data->iters, data->threshold, iters,threshold);
278 // if we already have some data, check whether sample_2 should get one more bench or whether it should emulate the computation instead
279 data->benching = !sample_enough_benchs(data);
280 XBT_DEBUG("XXXX Re-entering the benched nest %s. %s",loc, (data->benching?"more benching needed":"we have enough data, skip computes"));
285 int smpi_sample_2(int global, const char *file, int line)
287 char *loc = sample_location(global, file, line);
290 xbt_assert(samples, "Y U NO use SMPI_SAMPLE_* macros? Stop messing directly with smpi_sample_* functions!");
291 data = xbt_dict_get(samples, loc);
292 XBT_DEBUG("sample2 %s",loc);
295 if (data->benching==1) {
296 // we need to run a new bench
297 XBT_DEBUG("benchmarking: count:%d iter:%d stderr:%f thres:%f; mean:%f",
298 data->count, data->iters, data->relstderr, data->threshold, data->mean);
302 // Enough data, no more bench (either we got enough data from previous visits to this benched nest, or we just ran one bench and need to bail out now that our job is done).
303 // Just sleep instead
304 XBT_DEBUG("No benchmark (either no need, or just ran one): count >= iter (%d >= %d) or stderr<thres (%f<=%f). apply the %fs delay instead",
305 data->count, data->iters, data->relstderr, data->threshold, data->mean);
306 smpi_execute(data->mean);
308 smpi_sample_is_running--;
309 smpi_bench_begin(); // prepare to capture future, unrelated computations
315 void smpi_sample_3(int global, const char *file, int line)
317 char *loc = sample_location(global, file, line);
320 xbt_assert(samples, "Y U NO use SMPI_SAMPLE_* macros? Stop messing directly with smpi_sample_* functions!");
321 data = xbt_dict_get(samples, loc);
322 XBT_DEBUG("sample3 %s",loc);
325 if (data->benching==0) {
329 // ok, benchmarking this loop is over
330 xbt_os_threadtimer_stop(smpi_process_timer());
335 sample = xbt_os_timer_elapsed(smpi_process_timer());
337 data->sum_pow2 += sample * sample;
338 n = (double)data->count;
339 data->mean = data->sum / n;
340 data->relstderr = sqrt((data->sum_pow2 / n - data->mean * data->mean) / n) / data->mean;
341 if (!sample_enough_benchs(data)) {
342 data->mean = sample; // Still in benching process; We want sample_2 to simulate the exact time of this loop occurrence before leaving, not the mean over the history
344 XBT_DEBUG("Average mean after %d steps is %f, relative standard error is %f (sample was %f)", data->count,
345 data->mean, data->relstderr, sample);
347 // That's enough for now, prevent sample_2 to run the same code over and over
352 static void smpi_shared_alloc_free(void *p)
354 shared_data_t *data = p;
359 static char *smpi_shared_alloc_hash(char *loc)
369 loc = xbt_realloc(loc, 30);
371 for (i = 0; i < 40; i += 6) { /* base64 encode */
372 memcpy(s, hash + i, 6);
373 val = strtoul(s, NULL, 16);
374 for (j = 0; j < 4; j++) {
375 unsigned char x = (val >> (18 - 3 * j)) & 0x3f;
376 loc[1 + 4 * i / 6 + j] =
377 "ABCDEFGHIJKLMNOPQRSTUVZXYZabcdefghijklmnopqrstuvzxyz0123456789-_"[x];
384 void *smpi_shared_malloc(size_t size, const char *file, int line)
387 if (sg_cfg_get_boolean("smpi/use_shared_malloc")){
388 char *loc = bprintf("%zu_%s_%d", (size_t)getpid(), file, line);
391 loc = smpi_shared_alloc_hash(loc); /* hash loc, in order to have something
394 allocs = xbt_dict_new_homogeneous(smpi_shared_alloc_free);
396 data = xbt_dict_get_or_null(allocs, loc);
398 fd = shm_open(loc, O_RDWR | O_CREAT | O_EXCL,
399 S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
403 xbt_die("Please cleanup /dev/shm/%s", loc);
405 xbt_die("An unhandled error occured while opening %s. shm_open: %s", loc, strerror(errno));
408 data = xbt_new(shared_data_t, 1);
412 mem = shm_map(fd, size, data);
413 if (shm_unlink(loc) < 0) {
414 XBT_WARN("Could not early unlink %s. shm_unlink: %s", loc, strerror(errno));
416 xbt_dict_set(allocs, loc, data, NULL);
417 XBT_DEBUG("Mapping %s at %p through %d", loc, mem, fd);
420 mem = shm_map(data->fd, size, data);
423 XBT_DEBUG("Shared malloc %zu in %p (metadata at %p)", size, mem, data);
425 mem = xbt_malloc(size);
426 XBT_DEBUG("Classic malloc %zu in %p", size, mem);
431 void smpi_shared_free(void *ptr)
433 char loc[PTR_STRLEN];
434 shared_metadata_t* meta;
436 if (sg_cfg_get_boolean("smpi/use_shared_malloc")){
439 XBT_WARN("Cannot free: nothing was allocated");
442 if(!allocs_metadata) {
443 XBT_WARN("Cannot free: no metadata was allocated");
445 snprintf(loc, PTR_STRLEN, "%p", ptr);
446 meta = (shared_metadata_t*)xbt_dict_get_or_null(allocs_metadata, loc);
448 XBT_WARN("Cannot free: %p was not shared-allocated by SMPI", ptr);
453 XBT_WARN("Cannot free: something is broken in the metadata link");
456 if(munmap(ptr, meta->size) < 0) {
457 XBT_WARN("Unmapping of fd %d failed: %s", data->fd, strerror(errno));
460 XBT_DEBUG("Shared free - no removal - of %p, count = %d", ptr, data->count);
461 if (data->count <= 0) {
463 xbt_dict_remove(allocs, data->loc);
464 XBT_DEBUG("Shared free - with removal - of %p", ptr);
467 XBT_DEBUG("Classic free of %p", ptr);
473 int smpi_shared_known_call(const char* func, const char* input) {
474 char* loc = bprintf("%s:%s", func, input);
479 calls = xbt_dict_new_homogeneous(NULL);
482 xbt_dict_get(calls, loc); /* Succeed or throw */
486 if(ex.category == not_found_error) {
497 void* smpi_shared_get_call(const char* func, const char* input) {
498 char* loc = bprintf("%s:%s", func, input);
502 calls = xbt_dict_new_homogeneous(NULL);
504 data = xbt_dict_get(calls, loc);
509 void* smpi_shared_set_call(const char* func, const char* input, void* data) {
510 char* loc = bprintf("%s:%s", func, input);
513 calls = xbt_dict_new_homogeneous(NULL);
515 xbt_dict_set(calls, loc, data, NULL);