1 /* Copyright (c) 2007, 2009-2013. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
9 #include "xbt/sysdep.h"
12 #include "surf/surf.h"
13 #include "simgrid/sg_config.h"
19 #include <sys/types.h>
22 #include <math.h> // sqrt
27 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_bench, smpi,
28 "Logging specific to SMPI (benchmarking)");
30 /* Shared allocations are handled through shared memory segments.
31 * Associated data and metadata are used as follows:
34 * `allocs' dict ---- -.
35 * ---------- shared_data_t shared_metadata_t / | | |
36 * .->| <name> | ---> -------------------- <--. ----------------- | | | |
37 * | ---------- | fd of <name> | | | size of mmap | --| | | |
38 * | | count (2) | |-- | data | \ | | |
39 * `----------------- | <name> | | ----------------- ---- |
40 * -------------------- | ^ |
42 * | | `allocs_metadata' dict |
43 * | | ---------------------- |
44 * | `-- | <addr of mmap #1> |<-'
45 * | .-- | <addr of mmap #2> |<-.
46 * | | ---------------------- |
52 * | shared_metadata_t / | |
53 * | ----------------- | | |
54 * | | size of mmap | --| | |
56 * ----------------- | | |
61 #define PTR_STRLEN (2 + 2 * sizeof(void*) + 1)
63 xbt_dict_t allocs = NULL; /* Allocated on first use */
64 xbt_dict_t allocs_metadata = NULL; /* Allocated on first use */
65 xbt_dict_t samples = NULL; /* Allocated on first use */
66 xbt_dict_t calls = NULL; /* Allocated on first use */
67 __thread int smpi_current_rank = 0; /* Updated after each MPI call */
80 static size_t shm_size(int fd) {
83 if(fstat(fd, &st) < 0) {
84 xbt_die("Could not stat fd %d: %s", fd, strerror(errno));
86 return (size_t)st.st_size;
90 static void* shm_map(int fd, size_t size, shared_data_t* data) {
93 shared_metadata_t* meta;
95 if(size > shm_size(fd)) {
96 if(ftruncate(fd, (off_t)size) < 0) {
97 xbt_die("Could not truncate fd %d to %zu: %s", fd, size, strerror(errno));
101 mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
102 if(mem == MAP_FAILED) {
103 xbt_die("Could not map fd %d: %s", fd, strerror(errno));
105 if(!allocs_metadata) {
106 allocs_metadata = xbt_dict_new_homogeneous(xbt_free);
108 snprintf(loc, PTR_STRLEN, "%p", mem);
109 meta = xbt_new(shared_metadata_t, 1);
112 xbt_dict_set(allocs_metadata, loc, meta, NULL);
113 XBT_DEBUG("MMAP %zu to %p", size, mem);
118 void smpi_bench_destroy(void)
120 xbt_dict_free(&allocs);
121 xbt_dict_free(&allocs_metadata);
122 xbt_dict_free(&samples);
123 xbt_dict_free(&calls);
126 void smpi_execute_flops(double flops) {
129 host = SIMIX_host_self();
131 XBT_DEBUG("Handle real computation time: %f flops", flops);
132 action = simcall_host_execute("computation", host, flops, 1, 0, 0);
134 simcall_set_category (action, TRACE_internal_smpi_get_category());
136 simcall_host_execution_wait(action);
139 static void smpi_execute(double duration)
141 /* FIXME: a global variable would be less expensive to consult than a call to xbt_cfg_get_double() right on the critical path */
142 if (duration >= sg_cfg_get_double("smpi/cpu_threshold")) {
143 XBT_DEBUG("Sleep for %f to handle real computation time", duration);
144 smpi_execute_flops(duration *
145 sg_cfg_get_double("smpi/running_power"));
147 XBT_DEBUG("Real computation took %f while option smpi/cpu_threshold is set to %f => ignore it",
148 duration, sg_cfg_get_double("smpi/cpu_threshold"));
152 void smpi_bench_begin(void)
154 xbt_os_threadtimer_start(smpi_process_timer());
155 smpi_current_rank = smpi_process_index();
158 void smpi_bench_end(void)
160 xbt_os_timer_t timer = smpi_process_timer();
162 xbt_os_threadtimer_stop(timer);
163 smpi_execute(xbt_os_timer_elapsed(timer));
166 unsigned int smpi_sleep(unsigned int secs)
169 smpi_execute_flops((double) secs*simcall_host_get_speed(SIMIX_host_self()));
174 int smpi_gettimeofday(struct timeval *tv)
178 now = SIMIX_get_clock();
180 tv->tv_sec = (time_t)now;
182 tv->tv_usec = (useconds_t)((now - tv->tv_sec) * 1e6);
184 tv->tv_usec = (suseconds_t)((now - tv->tv_sec) * 1e6);
191 extern double sg_maxmin_precision;
192 unsigned long long smpi_rastro_resolution (void)
195 double resolution = (1/sg_maxmin_precision);
197 return (unsigned long long)resolution;
200 unsigned long long smpi_rastro_timestamp (void)
203 double now = SIMIX_get_clock();
205 unsigned long long sec = (unsigned long long)now;
206 unsigned long long pre = (now - sec) * smpi_rastro_resolution();
208 return (unsigned long long)sec * smpi_rastro_resolution() + pre;
211 /* ****************************** Functions related to the SMPI_SAMPLE_ macros ************************************/
213 int iters; /* amount of requested iterations */
214 int count; /* amount of iterations done so far */
215 double threshold; /* maximal stderr requested (if positive) */
216 double relstderr; /* observed stderr so far */
217 double mean; /* mean of benched times, to be used if the block is disabled */
218 double sum; /* sum of benched times (to compute the mean and stderr) */
219 double sum_pow2; /* sum of the square of the benched times (to compute the stderr) */
220 int benching; /* 1: we are benchmarking; 0: we have enough data, no bench anymore */
223 static char *sample_location(int global, const char *file, int line) {
225 return bprintf("%s:%d", file, line);
227 return bprintf("%s:%d:%d", file, line, smpi_process_index());
230 static int sample_enough_benchs(local_data_t *data) {
231 int res = data->count >= data->iters;
232 if (data->threshold>0.0) {
234 res = 0; // not enough data
235 if (data->relstderr > data->threshold)
236 res = 0; // stderr too high yet
238 XBT_DEBUG("%s (count:%d iter:%d stderr:%f thres:%f mean:%fs)",
239 (res?"enough benchs":"need more data"),
240 data->count, data->iters, data->relstderr, data->threshold, data->mean);
244 void smpi_sample_1(int global, const char *file, int line, int iters, double threshold)
246 char *loc = sample_location(global, file, line);
249 smpi_bench_end(); /* Take time from previous, unrelated computation into account */
251 samples = xbt_dict_new_homogeneous(free);
253 data = xbt_dict_get_or_null(samples, loc);
255 xbt_assert(threshold>0 || iters>0,
256 "You should provide either a positive amount of iterations to bench, or a positive maximal stderr (or both)");
257 data = (local_data_t *) xbt_new(local_data_t, 1);
260 data->sum_pow2 = 0.0;
262 data->threshold = threshold;
263 data->benching = 1; // If we have no data, we need at least one
265 xbt_dict_set(samples, loc, data, NULL);
266 XBT_DEBUG("XXXXX First time ever on benched nest %s.",loc);
268 if (data->iters != iters || data->threshold != threshold) {
269 XBT_ERROR("Asked to bench block %s with different settings %d, %f is not %d, %f. How did you manage to give two numbers at the same line??",
270 loc, data->iters, data->threshold, iters,threshold);
274 // if we already have some data, check whether sample_2 should get one more bench or whether it should emulate the computation instead
275 data->benching = !sample_enough_benchs(data);
276 XBT_DEBUG("XXXX Re-entering the benched nest %s. %s",loc, (data->benching?"more benching needed":"we have enough data, skip computes"));
281 int smpi_sample_2(int global, const char *file, int line)
283 char *loc = sample_location(global, file, line);
286 xbt_assert(samples, "Y U NO use SMPI_SAMPLE_* macros? Stop messing directly with smpi_sample_* functions!");
287 data = xbt_dict_get(samples, loc);
288 XBT_DEBUG("sample2 %s",loc);
291 if (data->benching==1) {
292 // we need to run a new bench
293 XBT_DEBUG("benchmarking: count:%d iter:%d stderr:%f thres:%f; mean:%f",
294 data->count, data->iters, data->relstderr, data->threshold, data->mean);
298 // Enough data, no more bench (either we got enough data from previous visits to this benched nest, or we just ran one bench and need to bail out now that our job is done).
299 // Just sleep instead
300 XBT_DEBUG("No benchmark (either no need, or just ran one): count >= iter (%d >= %d) or stderr<thres (%f<=%f). apply the %fs delay instead",
301 data->count, data->iters, data->relstderr, data->threshold, data->mean);
302 smpi_execute(data->mean);
304 smpi_bench_begin(); // prepare to capture future, unrelated computations
310 void smpi_sample_3(int global, const char *file, int line)
312 char *loc = sample_location(global, file, line);
315 xbt_assert(samples, "Y U NO use SMPI_SAMPLE_* macros? Stop messing directly with smpi_sample_* functions!");
316 data = xbt_dict_get(samples, loc);
317 XBT_DEBUG("sample3 %s",loc);
320 if (data->benching==0) {
324 // ok, benchmarking this loop is over
325 xbt_os_threadtimer_stop(smpi_process_timer());
330 sample = xbt_os_timer_elapsed(smpi_process_timer());
332 data->sum_pow2 += sample * sample;
333 n = (double)data->count;
334 data->mean = data->sum / n;
335 data->relstderr = sqrt((data->sum_pow2 / n - data->mean * data->mean) / n) / data->mean;
336 if (!sample_enough_benchs(data)) {
337 data->mean = sample; // Still in benching process; We want sample_2 to simulate the exact time of this loop occurrence before leaving, not the mean over the history
339 XBT_DEBUG("Average mean after %d steps is %f, relative standard error is %f (sample was %f)", data->count,
340 data->mean, data->relstderr, sample);
342 // That's enough for now, prevent sample_2 to run the same code over and over
347 static void smpi_shared_alloc_free(void *p)
349 shared_data_t *data = p;
354 static char *smpi_shared_alloc_hash(char *loc)
364 loc = xbt_realloc(loc, 30);
366 for (i = 0; i < 40; i += 6) { /* base64 encode */
367 memcpy(s, hash + i, 6);
368 val = strtoul(s, NULL, 16);
369 for (j = 0; j < 4; j++) {
370 unsigned char x = (val >> (18 - 3 * j)) & 0x3f;
371 loc[1 + 4 * i / 6 + j] =
372 "ABCDEFGHIJKLMNOPQRSTUVZXYZabcdefghijklmnopqrstuvzxyz0123456789-_"[x];
379 void *smpi_shared_malloc(size_t size, const char *file, int line)
382 if (sg_cfg_get_boolean("smpi/use_shared_malloc")){
383 char *loc = bprintf("%zu_%s_%d", (size_t)getpid(), file, line);
386 loc = smpi_shared_alloc_hash(loc); /* hash loc, in order to have something
389 allocs = xbt_dict_new_homogeneous(smpi_shared_alloc_free);
391 data = xbt_dict_get_or_null(allocs, loc);
393 fd = shm_open(loc, O_RDWR | O_CREAT | O_EXCL,
394 S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
398 xbt_die("Please cleanup /dev/shm/%s", loc);
400 xbt_die("An unhandled error occured while opening %s: %s", loc, strerror(errno));
403 data = xbt_new(shared_data_t, 1);
407 mem = shm_map(fd, size, data);
408 if (shm_unlink(loc) < 0) {
409 XBT_WARN("Could not early unlink %s: %s", loc, strerror(errno));
411 xbt_dict_set(allocs, loc, data, NULL);
412 XBT_DEBUG("Mapping %s at %p through %d", loc, mem, fd);
415 mem = shm_map(data->fd, size, data);
418 XBT_DEBUG("Shared malloc %zu in %p (metadata at %p)", size, mem, data);
420 mem = xbt_malloc(size);
421 XBT_DEBUG("Classic malloc %zu in %p", size, mem);
426 void smpi_shared_free(void *ptr)
428 char loc[PTR_STRLEN];
429 shared_metadata_t* meta;
431 if (sg_cfg_get_boolean("smpi/use_shared_malloc")){
434 XBT_WARN("Cannot free: nothing was allocated");
437 if(!allocs_metadata) {
438 XBT_WARN("Cannot free: no metadata was allocated");
440 snprintf(loc, PTR_STRLEN, "%p", ptr);
441 meta = (shared_metadata_t*)xbt_dict_get_or_null(allocs_metadata, loc);
443 XBT_WARN("Cannot free: %p was not shared-allocated by SMPI", ptr);
448 XBT_WARN("Cannot free: something is broken in the metadata link");
451 if(munmap(ptr, meta->size) < 0) {
452 XBT_WARN("Unmapping of fd %d failed: %s", data->fd, strerror(errno));
455 XBT_DEBUG("Shared free - no removal - of %p, count = %d", ptr, data->count);
456 if (data->count <= 0) {
458 xbt_dict_remove(allocs, data->loc);
459 XBT_DEBUG("Shared free - with removal - of %p", ptr);
462 XBT_DEBUG("Classic free of %p", ptr);
468 int smpi_shared_known_call(const char* func, const char* input) {
469 char* loc = bprintf("%s:%s", func, input);
474 calls = xbt_dict_new_homogeneous(NULL);
477 xbt_dict_get(calls, loc); /* Succeed or throw */
481 if(ex.category == not_found_error) {
492 void* smpi_shared_get_call(const char* func, const char* input) {
493 char* loc = bprintf("%s:%s", func, input);
497 calls = xbt_dict_new_homogeneous(NULL);
499 data = xbt_dict_get(calls, loc);
504 void* smpi_shared_set_call(const char* func, const char* input, void* data) {
505 char* loc = bprintf("%s:%s", func, input);
508 calls = xbt_dict_new_homogeneous(NULL);
510 xbt_dict_set(calls, loc, data, NULL);