1 /* Copyright (c) 2007, 2009-2013. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
9 #include "xbt/sysdep.h"
12 #include "surf/surf.h"
13 #include "simgrid/sg_config.h"
19 #include <sys/types.h>
22 #include <math.h> // sqrt
27 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_bench, smpi,
28 "Logging specific to SMPI (benchmarking)");
30 /* Shared allocations are handled through shared memory segments.
31 * Associated data and metadata are used as follows:
34 * `allocs' dict ---- -.
35 * ---------- shared_data_t shared_metadata_t / | | |
36 * .->| <name> | ---> -------------------- <--. ----------------- | | | |
37 * | ---------- | fd of <name> | | | size of mmap | --| | | |
38 * | | count (2) | |-- | data | \ | | |
39 * `----------------- | <name> | | ----------------- ---- |
40 * -------------------- | ^ |
42 * | | `allocs_metadata' dict |
43 * | | ---------------------- |
44 * | `-- | <addr of mmap #1> |<-'
45 * | .-- | <addr of mmap #2> |<-.
46 * | | ---------------------- |
52 * | shared_metadata_t / | |
53 * | ----------------- | | |
54 * | | size of mmap | --| | |
56 * ----------------- | | |
61 #define PTR_STRLEN (2 + 2 * sizeof(void*) + 1)
63 xbt_dict_t allocs = NULL; /* Allocated on first use */
64 xbt_dict_t allocs_metadata = NULL; /* Allocated on first use */
65 xbt_dict_t samples = NULL; /* Allocated on first use */
66 xbt_dict_t calls = NULL; /* Allocated on first use */
67 __thread int smpi_current_rank = 0; /* Updated after each MPI call */
69 double smpi_cpu_threshold;
70 double smpi_running_power;
83 static size_t shm_size(int fd) {
86 if(fstat(fd, &st) < 0) {
87 xbt_die("Could not stat fd %d: %s", fd, strerror(errno));
89 return (size_t)st.st_size;
93 static void* shm_map(int fd, size_t size, shared_data_t* data) {
96 shared_metadata_t* meta;
98 if(size > shm_size(fd)) {
99 if(ftruncate(fd, (off_t)size) < 0) {
100 xbt_die("Could not truncate fd %d to %zu: %s", fd, size, strerror(errno));
104 mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
105 if(mem == MAP_FAILED) {
106 xbt_die("Could not map fd %d: %s", fd, strerror(errno));
108 if(!allocs_metadata) {
109 allocs_metadata = xbt_dict_new_homogeneous(xbt_free);
111 snprintf(loc, PTR_STRLEN, "%p", mem);
112 meta = xbt_new(shared_metadata_t, 1);
115 xbt_dict_set(allocs_metadata, loc, meta, NULL);
116 XBT_DEBUG("MMAP %zu to %p", size, mem);
121 void smpi_bench_destroy(void)
123 xbt_dict_free(&allocs);
124 xbt_dict_free(&allocs_metadata);
125 xbt_dict_free(&samples);
126 xbt_dict_free(&calls);
129 void smpi_execute_flops(double flops) {
132 host = SIMIX_host_self();
133 XBT_DEBUG("Handle real computation time: %g flops", flops);
134 action = simcall_host_execute("computation", host, flops, 1);
136 simcall_set_category (action, TRACE_internal_smpi_get_category());
138 simcall_host_execution_wait(action);
141 static void smpi_execute(double duration)
143 if (duration >= smpi_cpu_threshold) {
144 XBT_DEBUG("Sleep for %g to handle real computation time", duration);
145 double flops = duration * smpi_running_power;
147 int rank = smpi_process_index();
148 instr_extra_data extra = xbt_new0(s_instr_extra_data_t,1);
149 extra->type=TRACING_COMPUTING;
150 extra->comp_size=flops;
151 TRACE_smpi_computing_in(rank, extra);
153 smpi_execute_flops(flops);
156 TRACE_smpi_computing_out(rank);
160 XBT_DEBUG("Real computation took %g while option smpi/cpu_threshold is set to %g => ignore it",
161 duration, smpi_cpu_threshold);
165 void smpi_bench_begin(void)
167 xbt_os_threadtimer_start(smpi_process_timer());
168 smpi_current_rank = smpi_process_index();
171 void smpi_bench_end(void)
173 xbt_os_timer_t timer = smpi_process_timer();
175 xbt_os_threadtimer_stop(timer);
176 smpi_execute(xbt_os_timer_elapsed(timer));
179 unsigned int smpi_sleep(unsigned int secs)
185 double flops = (double) secs*simcall_host_get_speed(SIMIX_host_self());
186 XBT_DEBUG("Sleep for: %f flops", flops);
187 action = simcall_host_execute("computation", SIMIX_host_self(), flops, 1);
189 simcall_set_category (action, TRACE_internal_smpi_get_category());
191 simcall_host_execution_wait(action);
197 int smpi_gettimeofday(struct timeval *tv)
201 now = SIMIX_get_clock();
203 tv->tv_sec = (time_t)now;
205 tv->tv_usec = (useconds_t)((now - tv->tv_sec) * 1e6);
207 tv->tv_usec = (suseconds_t)((now - tv->tv_sec) * 1e6);
214 extern double sg_maxmin_precision;
215 unsigned long long smpi_rastro_resolution (void)
218 double resolution = (1/sg_maxmin_precision);
220 return (unsigned long long)resolution;
223 unsigned long long smpi_rastro_timestamp (void)
226 double now = SIMIX_get_clock();
228 unsigned long long sec = (unsigned long long)now;
229 unsigned long long pre = (now - sec) * smpi_rastro_resolution();
231 return (unsigned long long)sec * smpi_rastro_resolution() + pre;
234 /* ****************************** Functions related to the SMPI_SAMPLE_ macros ************************************/
236 double threshold; /* maximal stderr requested (if positive) */
237 double relstderr; /* observed stderr so far */
238 double mean; /* mean of benched times, to be used if the block is disabled */
239 double sum; /* sum of benched times (to compute the mean and stderr) */
240 double sum_pow2; /* sum of the square of the benched times (to compute the stderr) */
241 int iters; /* amount of requested iterations */
242 int count; /* amount of iterations done so far */
243 int benching; /* 1: we are benchmarking; 0: we have enough data, no bench anymore */
246 static char *sample_location(int global, const char *file, int line) {
248 return bprintf("%s:%d", file, line);
250 return bprintf("%s:%d:%d", file, line, smpi_process_index());
253 static int sample_enough_benchs(local_data_t *data) {
254 int res = data->count >= data->iters;
255 if (data->threshold>0.0) {
257 res = 0; // not enough data
258 if (data->relstderr > data->threshold)
259 res = 0; // stderr too high yet
261 XBT_DEBUG("%s (count:%d iter:%d stderr:%f thres:%f mean:%fs)",
262 (res?"enough benchs":"need more data"),
263 data->count, data->iters, data->relstderr, data->threshold, data->mean);
267 void smpi_sample_1(int global, const char *file, int line, int iters, double threshold)
269 char *loc = sample_location(global, file, line);
272 smpi_bench_end(); /* Take time from previous, unrelated computation into account */
273 smpi_process_set_sampling(1);
276 samples = xbt_dict_new_homogeneous(free);
278 data = xbt_dict_get_or_null(samples, loc);
280 xbt_assert(threshold>0 || iters>0,
281 "You should provide either a positive amount of iterations to bench, or a positive maximal stderr (or both)");
282 data = (local_data_t *) xbt_new(local_data_t, 1);
285 data->sum_pow2 = 0.0;
287 data->threshold = threshold;
288 data->benching = 1; // If we have no data, we need at least one
290 xbt_dict_set(samples, loc, data, NULL);
291 XBT_DEBUG("XXXXX First time ever on benched nest %s.",loc);
293 if (data->iters != iters || data->threshold != threshold) {
294 XBT_ERROR("Asked to bench block %s with different settings %d, %f is not %d, %f. How did you manage to give two numbers at the same line??",
295 loc, data->iters, data->threshold, iters,threshold);
299 // if we already have some data, check whether sample_2 should get one more bench or whether it should emulate the computation instead
300 data->benching = !sample_enough_benchs(data);
301 XBT_DEBUG("XXXX Re-entering the benched nest %s. %s",loc, (data->benching?"more benching needed":"we have enough data, skip computes"));
306 int smpi_sample_2(int global, const char *file, int line)
308 char *loc = sample_location(global, file, line);
312 xbt_assert(samples, "Y U NO use SMPI_SAMPLE_* macros? Stop messing directly with smpi_sample_* functions!");
313 data = xbt_dict_get(samples, loc);
314 XBT_DEBUG("sample2 %s",loc);
317 if (data->benching==1) {
318 // we need to run a new bench
319 XBT_DEBUG("benchmarking: count:%d iter:%d stderr:%f thres:%f; mean:%f",
320 data->count, data->iters, data->relstderr, data->threshold, data->mean);
323 // Enough data, no more bench (either we got enough data from previous visits to this benched nest, or we just ran one bench and need to bail out now that our job is done).
324 // Just sleep instead
325 XBT_DEBUG("No benchmark (either no need, or just ran one): count >= iter (%d >= %d) or stderr<thres (%f<=%f). apply the %fs delay instead",
326 data->count, data->iters, data->relstderr, data->threshold, data->mean);
327 smpi_execute(data->mean);
328 smpi_process_set_sampling(0);
329 res = 0; // prepare to capture future, unrelated computations
336 void smpi_sample_3(int global, const char *file, int line)
338 char *loc = sample_location(global, file, line);
341 xbt_assert(samples, "Y U NO use SMPI_SAMPLE_* macros? Stop messing directly with smpi_sample_* functions!");
342 data = xbt_dict_get(samples, loc);
343 XBT_DEBUG("sample3 %s",loc);
346 if (data->benching==0) {
350 // ok, benchmarking this loop is over
351 xbt_os_threadtimer_stop(smpi_process_timer());
356 sample = xbt_os_timer_elapsed(smpi_process_timer());
358 data->sum_pow2 += sample * sample;
359 n = (double)data->count;
360 data->mean = data->sum / n;
361 data->relstderr = sqrt((data->sum_pow2 / n - data->mean * data->mean) / n) / data->mean;
362 if (!sample_enough_benchs(data)) {
363 data->mean = sample; // Still in benching process; We want sample_2 to simulate the exact time of this loop occurrence before leaving, not the mean over the history
365 XBT_DEBUG("Average mean after %d steps is %f, relative standard error is %f (sample was %f)", data->count,
366 data->mean, data->relstderr, sample);
368 // That's enough for now, prevent sample_2 to run the same code over and over
373 static void smpi_shared_alloc_free(void *p)
375 shared_data_t *data = p;
380 static char *smpi_shared_alloc_hash(char *loc)
390 loc = xbt_realloc(loc, 30);
392 for (i = 0; i < 40; i += 6) { /* base64 encode */
393 memcpy(s, hash + i, 6);
394 val = strtoul(s, NULL, 16);
395 for (j = 0; j < 4; j++) {
396 unsigned char x = (val >> (18 - 3 * j)) & 0x3f;
397 loc[1 + 4 * i / 6 + j] =
398 "ABCDEFGHIJKLMNOPQRSTUVZXYZabcdefghijklmnopqrstuvzxyz0123456789-_"[x];
405 void *smpi_shared_malloc(size_t size, const char *file, int line)
408 if (sg_cfg_get_boolean("smpi/use_shared_malloc")){
409 char *loc = bprintf("%zu_%s_%d", (size_t)getpid(), file, line);
412 loc = smpi_shared_alloc_hash(loc); /* hash loc, in order to have something
415 allocs = xbt_dict_new_homogeneous(smpi_shared_alloc_free);
417 data = xbt_dict_get_or_null(allocs, loc);
419 fd = shm_open(loc, O_RDWR | O_CREAT | O_EXCL,
420 S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
424 xbt_die("Please cleanup /dev/shm/%s", loc);
426 xbt_die("An unhandled error occured while opening %s. shm_open: %s", loc, strerror(errno));
429 data = xbt_new(shared_data_t, 1);
433 mem = shm_map(fd, size, data);
434 if (shm_unlink(loc) < 0) {
435 XBT_WARN("Could not early unlink %s. shm_unlink: %s", loc, strerror(errno));
437 xbt_dict_set(allocs, loc, data, NULL);
438 XBT_DEBUG("Mapping %s at %p through %d", loc, mem, fd);
441 mem = shm_map(data->fd, size, data);
444 XBT_DEBUG("Shared malloc %zu in %p (metadata at %p)", size, mem, data);
446 mem = xbt_malloc(size);
447 XBT_DEBUG("Classic malloc %zu in %p", size, mem);
452 void smpi_shared_free(void *ptr)
454 char loc[PTR_STRLEN];
455 shared_metadata_t* meta;
457 if (sg_cfg_get_boolean("smpi/use_shared_malloc")){
460 XBT_WARN("Cannot free: nothing was allocated");
463 if(!allocs_metadata) {
464 XBT_WARN("Cannot free: no metadata was allocated");
466 snprintf(loc, PTR_STRLEN, "%p", ptr);
467 meta = (shared_metadata_t*)xbt_dict_get_or_null(allocs_metadata, loc);
469 XBT_WARN("Cannot free: %p was not shared-allocated by SMPI", ptr);
474 XBT_WARN("Cannot free: something is broken in the metadata link");
477 if(munmap(ptr, meta->size) < 0) {
478 XBT_WARN("Unmapping of fd %d failed: %s", data->fd, strerror(errno));
481 XBT_DEBUG("Shared free - no removal - of %p, count = %d", ptr, data->count);
482 if (data->count <= 0) {
484 xbt_dict_remove(allocs, data->loc);
485 XBT_DEBUG("Shared free - with removal - of %p", ptr);
488 XBT_DEBUG("Classic free of %p", ptr);
494 int smpi_shared_known_call(const char* func, const char* input) {
495 char* loc = bprintf("%s:%s", func, input);
500 calls = xbt_dict_new_homogeneous(NULL);
503 xbt_dict_get(calls, loc); /* Succeed or throw */
507 if(ex.category == not_found_error) {
518 void* smpi_shared_get_call(const char* func, const char* input) {
519 char* loc = bprintf("%s:%s", func, input);
523 calls = xbt_dict_new_homogeneous(NULL);
525 data = xbt_dict_get(calls, loc);
530 void* smpi_shared_set_call(const char* func, const char* input, void* data) {
531 char* loc = bprintf("%s:%s", func, input);
534 calls = xbt_dict_new_homogeneous(NULL);
536 xbt_dict_set(calls, loc, data, NULL);