1 /* Copyright (c) 2007, 2009-2013. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
9 #include "xbt/sysdep.h"
12 #include "surf/surf.h"
13 #include "simgrid/sg_config.h"
19 #include <sys/types.h>
22 #include <math.h> // sqrt
27 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_bench, smpi,
28 "Logging specific to SMPI (benchmarking)");
30 /* Shared allocations are handled through shared memory segments.
31 * Associated data and metadata are used as follows:
34 * `allocs' dict ---- -.
35 * ---------- shared_data_t shared_metadata_t / | | |
36 * .->| <name> | ---> -------------------- <--. ----------------- | | | |
37 * | ---------- | fd of <name> | | | size of mmap | --| | | |
38 * | | count (2) | |-- | data | \ | | |
39 * `----------------- | <name> | | ----------------- ---- |
40 * -------------------- | ^ |
42 * | | `allocs_metadata' dict |
43 * | | ---------------------- |
44 * | `-- | <addr of mmap #1> |<-'
45 * | .-- | <addr of mmap #2> |<-.
46 * | | ---------------------- |
52 * | shared_metadata_t / | |
53 * | ----------------- | | |
54 * | | size of mmap | --| | |
56 * ----------------- | | |
61 #define PTR_STRLEN (2 + 2 * sizeof(void*) + 1)
63 xbt_dict_t allocs = NULL; /* Allocated on first use */
64 xbt_dict_t allocs_metadata = NULL; /* Allocated on first use */
65 xbt_dict_t samples = NULL; /* Allocated on first use */
66 xbt_dict_t calls = NULL; /* Allocated on first use */
67 __thread int smpi_current_rank = 0; /* Updated after each MPI call */
69 double smpi_cpu_threshold;
70 double smpi_running_power;
83 static size_t shm_size(int fd) {
86 if(fstat(fd, &st) < 0) {
87 xbt_die("Could not stat fd %d: %s", fd, strerror(errno));
89 return (size_t)st.st_size;
93 static void* shm_map(int fd, size_t size, shared_data_t* data) {
96 shared_metadata_t* meta;
98 if(size > shm_size(fd)) {
99 if(ftruncate(fd, (off_t)size) < 0) {
100 xbt_die("Could not truncate fd %d to %zu: %s", fd, size, strerror(errno));
104 mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
105 if(mem == MAP_FAILED) {
106 xbt_die("Could not map fd %d: %s", fd, strerror(errno));
108 if(!allocs_metadata) {
109 allocs_metadata = xbt_dict_new_homogeneous(xbt_free);
111 snprintf(loc, PTR_STRLEN, "%p", mem);
112 meta = xbt_new(shared_metadata_t, 1);
115 xbt_dict_set(allocs_metadata, loc, meta, NULL);
116 XBT_DEBUG("MMAP %zu to %p", size, mem);
121 void smpi_bench_destroy(void)
123 xbt_dict_free(&allocs);
124 xbt_dict_free(&allocs_metadata);
125 xbt_dict_free(&samples);
126 xbt_dict_free(&calls);
129 void smpi_execute_flops(double flops) {
132 host = SIMIX_host_self();
133 XBT_DEBUG("Handle real computation time: %g flops", flops);
134 action = simcall_host_execute("computation", host, flops, 1);
136 simcall_set_category (action, TRACE_internal_smpi_get_category());
138 simcall_host_execution_wait(action);
141 void smpi_execute(double duration)
143 if (duration >= smpi_cpu_threshold) {
144 XBT_DEBUG("Sleep for %g to handle real computation time", duration);
145 double flops = duration * smpi_running_power;
147 int rank = smpi_process_index();
148 instr_extra_data extra = xbt_new0(s_instr_extra_data_t,1);
149 extra->type=TRACING_COMPUTING;
150 extra->comp_size=flops;
151 TRACE_smpi_computing_in(rank, extra);
153 smpi_execute_flops(flops);
156 TRACE_smpi_computing_out(rank);
160 XBT_DEBUG("Real computation took %g while option smpi/cpu_threshold is set to %g => ignore it",
161 duration, smpi_cpu_threshold);
165 void smpi_bench_begin(void)
167 xbt_os_threadtimer_start(smpi_process_timer());
168 smpi_current_rank = smpi_process_index();
171 void smpi_bench_end(void)
173 xbt_os_timer_t timer = smpi_process_timer();
174 xbt_os_threadtimer_stop(timer);
175 if (smpi_process_get_sampling()) {
176 XBT_CRITICAL("Cannot do recursive benchmarks.");
177 XBT_CRITICAL("Are you trying to make a call to MPI within a SMPI_SAMPLE_ block?");
178 xbt_backtrace_display_current();
179 xbt_die("Aborting.");
181 smpi_execute(xbt_os_timer_elapsed(timer));
184 unsigned int smpi_sleep(unsigned int secs)
190 double flops = (double) secs*simcall_host_get_speed(SIMIX_host_self());
191 XBT_DEBUG("Sleep for: %f flops", flops);
192 action = simcall_host_execute("computation", SIMIX_host_self(), flops, 1);
194 simcall_set_category (action, TRACE_internal_smpi_get_category());
196 simcall_host_execution_wait(action);
202 int smpi_gettimeofday(struct timeval *tv)
206 now = SIMIX_get_clock();
208 tv->tv_sec = (time_t)now;
210 tv->tv_usec = (useconds_t)((now - tv->tv_sec) * 1e6);
212 tv->tv_usec = (suseconds_t)((now - tv->tv_sec) * 1e6);
219 extern double sg_maxmin_precision;
220 unsigned long long smpi_rastro_resolution (void)
223 double resolution = (1/sg_maxmin_precision);
225 return (unsigned long long)resolution;
228 unsigned long long smpi_rastro_timestamp (void)
231 double now = SIMIX_get_clock();
233 unsigned long long sec = (unsigned long long)now;
234 unsigned long long pre = (now - sec) * smpi_rastro_resolution();
236 return (unsigned long long)sec * smpi_rastro_resolution() + pre;
239 /* ****************************** Functions related to the SMPI_SAMPLE_ macros ************************************/
241 double threshold; /* maximal stderr requested (if positive) */
242 double relstderr; /* observed stderr so far */
243 double mean; /* mean of benched times, to be used if the block is disabled */
244 double sum; /* sum of benched times (to compute the mean and stderr) */
245 double sum_pow2; /* sum of the square of the benched times (to compute the stderr) */
246 int iters; /* amount of requested iterations */
247 int count; /* amount of iterations done so far */
248 int benching; /* 1: we are benchmarking; 0: we have enough data, no bench anymore */
251 static char *sample_location(int global, const char *file, int line) {
253 return bprintf("%s:%d", file, line);
255 return bprintf("%s:%d:%d", file, line, smpi_process_index());
258 static int sample_enough_benchs(local_data_t *data) {
259 int res = data->count >= data->iters;
260 if (data->threshold>0.0) {
262 res = 0; // not enough data
263 if (data->relstderr > data->threshold)
264 res = 0; // stderr too high yet
266 XBT_DEBUG("%s (count:%d iter:%d stderr:%f thres:%f mean:%fs)",
267 (res?"enough benchs":"need more data"),
268 data->count, data->iters, data->relstderr, data->threshold, data->mean);
272 void smpi_sample_1(int global, const char *file, int line, int iters, double threshold)
274 char *loc = sample_location(global, file, line);
277 smpi_bench_end(); /* Take time from previous, unrelated computation into account */
278 smpi_process_set_sampling(1);
281 samples = xbt_dict_new_homogeneous(free);
283 data = xbt_dict_get_or_null(samples, loc);
285 xbt_assert(threshold>0 || iters>0,
286 "You should provide either a positive amount of iterations to bench, or a positive maximal stderr (or both)");
287 data = (local_data_t *) xbt_new(local_data_t, 1);
290 data->sum_pow2 = 0.0;
292 data->threshold = threshold;
293 data->benching = 1; // If we have no data, we need at least one
295 xbt_dict_set(samples, loc, data, NULL);
296 XBT_DEBUG("XXXXX First time ever on benched nest %s.",loc);
298 if (data->iters != iters || data->threshold != threshold) {
299 XBT_ERROR("Asked to bench block %s with different settings %d, %f is not %d, %f. How did you manage to give two numbers at the same line??",
300 loc, data->iters, data->threshold, iters,threshold);
304 // if we already have some data, check whether sample_2 should get one more bench or whether it should emulate the computation instead
305 data->benching = !sample_enough_benchs(data);
306 XBT_DEBUG("XXXX Re-entering the benched nest %s. %s",loc, (data->benching?"more benching needed":"we have enough data, skip computes"));
311 int smpi_sample_2(int global, const char *file, int line)
313 char *loc = sample_location(global, file, line);
317 xbt_assert(samples, "Y U NO use SMPI_SAMPLE_* macros? Stop messing directly with smpi_sample_* functions!");
318 data = xbt_dict_get(samples, loc);
319 XBT_DEBUG("sample2 %s",loc);
322 if (data->benching==1) {
323 // we need to run a new bench
324 XBT_DEBUG("benchmarking: count:%d iter:%d stderr:%f thres:%f; mean:%f",
325 data->count, data->iters, data->relstderr, data->threshold, data->mean);
328 // Enough data, no more bench (either we got enough data from previous visits to this benched nest, or we just ran one bench and need to bail out now that our job is done).
329 // Just sleep instead
330 XBT_DEBUG("No benchmark (either no need, or just ran one): count >= iter (%d >= %d) or stderr<thres (%f<=%f). apply the %fs delay instead",
331 data->count, data->iters, data->relstderr, data->threshold, data->mean);
332 smpi_execute(data->mean);
333 smpi_process_set_sampling(0);
334 res = 0; // prepare to capture future, unrelated computations
341 void smpi_sample_3(int global, const char *file, int line)
343 char *loc = sample_location(global, file, line);
346 xbt_assert(samples, "Y U NO use SMPI_SAMPLE_* macros? Stop messing directly with smpi_sample_* functions!");
347 data = xbt_dict_get(samples, loc);
348 XBT_DEBUG("sample3 %s",loc);
351 if (data->benching==0) {
355 // ok, benchmarking this loop is over
356 xbt_os_threadtimer_stop(smpi_process_timer());
361 sample = xbt_os_timer_elapsed(smpi_process_timer());
363 data->sum_pow2 += sample * sample;
364 n = (double)data->count;
365 data->mean = data->sum / n;
366 data->relstderr = sqrt((data->sum_pow2 / n - data->mean * data->mean) / n) / data->mean;
367 if (!sample_enough_benchs(data)) {
368 data->mean = sample; // Still in benching process; We want sample_2 to simulate the exact time of this loop occurrence before leaving, not the mean over the history
370 XBT_DEBUG("Average mean after %d steps is %f, relative standard error is %f (sample was %f)", data->count,
371 data->mean, data->relstderr, sample);
373 // That's enough for now, prevent sample_2 to run the same code over and over
378 static void smpi_shared_alloc_free(void *p)
380 shared_data_t *data = p;
385 static char *smpi_shared_alloc_hash(char *loc)
395 loc = xbt_realloc(loc, 30);
397 for (i = 0; i < 40; i += 6) { /* base64 encode */
398 memcpy(s, hash + i, 6);
399 val = strtoul(s, NULL, 16);
400 for (j = 0; j < 4; j++) {
401 unsigned char x = (val >> (18 - 3 * j)) & 0x3f;
402 loc[1 + 4 * i / 6 + j] =
403 "ABCDEFGHIJKLMNOPQRSTUVZXYZabcdefghijklmnopqrstuvzxyz0123456789-_"[x];
410 void *smpi_shared_malloc(size_t size, const char *file, int line)
413 if (sg_cfg_get_boolean("smpi/use_shared_malloc")){
414 char *loc = bprintf("%zu_%s_%d", (size_t)getpid(), file, line);
417 loc = smpi_shared_alloc_hash(loc); /* hash loc, in order to have something
420 allocs = xbt_dict_new_homogeneous(smpi_shared_alloc_free);
422 data = xbt_dict_get_or_null(allocs, loc);
424 fd = shm_open(loc, O_RDWR | O_CREAT | O_EXCL,
425 S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
429 xbt_die("Please cleanup /dev/shm/%s", loc);
431 xbt_die("An unhandled error occured while opening %s. shm_open: %s", loc, strerror(errno));
434 data = xbt_new(shared_data_t, 1);
438 mem = shm_map(fd, size, data);
439 if (shm_unlink(loc) < 0) {
440 XBT_WARN("Could not early unlink %s. shm_unlink: %s", loc, strerror(errno));
442 xbt_dict_set(allocs, loc, data, NULL);
443 XBT_DEBUG("Mapping %s at %p through %d", loc, mem, fd);
446 mem = shm_map(data->fd, size, data);
449 XBT_DEBUG("Shared malloc %zu in %p (metadata at %p)", size, mem, data);
451 mem = xbt_malloc(size);
452 XBT_DEBUG("Classic malloc %zu in %p", size, mem);
457 void smpi_shared_free(void *ptr)
459 char loc[PTR_STRLEN];
460 shared_metadata_t* meta;
462 if (sg_cfg_get_boolean("smpi/use_shared_malloc")){
465 XBT_WARN("Cannot free: nothing was allocated");
468 if(!allocs_metadata) {
469 XBT_WARN("Cannot free: no metadata was allocated");
471 snprintf(loc, PTR_STRLEN, "%p", ptr);
472 meta = (shared_metadata_t*)xbt_dict_get_or_null(allocs_metadata, loc);
474 XBT_WARN("Cannot free: %p was not shared-allocated by SMPI", ptr);
479 XBT_WARN("Cannot free: something is broken in the metadata link");
482 if(munmap(ptr, meta->size) < 0) {
483 XBT_WARN("Unmapping of fd %d failed: %s", data->fd, strerror(errno));
486 XBT_DEBUG("Shared free - no removal - of %p, count = %d", ptr, data->count);
487 if (data->count <= 0) {
489 xbt_dict_remove(allocs, data->loc);
490 XBT_DEBUG("Shared free - with removal - of %p", ptr);
493 XBT_DEBUG("Classic free of %p", ptr);
499 int smpi_shared_known_call(const char* func, const char* input)
501 char* loc = bprintf("%s:%s", func, input);
506 calls = xbt_dict_new_homogeneous(NULL);
509 xbt_dict_get(calls, loc); /* Succeed or throw */
516 if (ex.category != not_found_error)
523 void* smpi_shared_get_call(const char* func, const char* input) {
524 char* loc = bprintf("%s:%s", func, input);
528 calls = xbt_dict_new_homogeneous(NULL);
530 data = xbt_dict_get(calls, loc);
535 void* smpi_shared_set_call(const char* func, const char* input, void* data) {
536 char* loc = bprintf("%s:%s", func, input);
539 calls = xbt_dict_new_homogeneous(NULL);
541 xbt_dict_set(calls, loc, data, NULL);