1 /* Copyright (c) 2007, 2009-2013. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
9 #include "xbt/sysdep.h"
11 #include "surf/surf.h"
12 #include "simgrid/sg_config.h"
18 #include <sys/types.h>
21 #include <math.h> // sqrt
26 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_bench, smpi,
27 "Logging specific to SMPI (benchmarking)");
29 /* Shared allocations are handled through shared memory segments.
30 * Associated data and metadata are used as follows:
33 * `allocs' dict ---- -.
34 * ---------- shared_data_t shared_metadata_t / | | |
35 * .->| <name> | ---> -------------------- <--. ----------------- | | | |
36 * | ---------- | fd of <name> | | | size of mmap | --| | | |
37 * | | count (2) | |-- | data | \ | | |
38 * `----------------- | <name> | | ----------------- ---- |
39 * -------------------- | ^ |
41 * | | `allocs_metadata' dict |
42 * | | ---------------------- |
43 * | `-- | <addr of mmap #1> |<-'
44 * | .-- | <addr of mmap #2> |<-.
45 * | | ---------------------- |
51 * | shared_metadata_t / | |
52 * | ----------------- | | |
53 * | | size of mmap | --| | |
55 * ----------------- | | |
60 #define PTR_STRLEN (2 + 2 * sizeof(void*) + 1)
62 xbt_dict_t allocs = NULL; /* Allocated on first use */
63 xbt_dict_t allocs_metadata = NULL; /* Allocated on first use */
64 xbt_dict_t samples = NULL; /* Allocated on first use */
65 xbt_dict_t calls = NULL; /* Allocated on first use */
66 __thread int smpi_current_rank = 0; /* Updated after each MPI call */
79 static size_t shm_size(int fd) {
82 if(fstat(fd, &st) < 0) {
83 xbt_die("Could not stat fd %d: %s", fd, strerror(errno));
85 return (size_t)st.st_size;
89 static void* shm_map(int fd, size_t size, shared_data_t* data) {
92 shared_metadata_t* meta;
94 if(size > shm_size(fd)) {
95 if(ftruncate(fd, (off_t)size) < 0) {
96 xbt_die("Could not truncate fd %d to %zu: %s", fd, size, strerror(errno));
100 mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
101 if(mem == MAP_FAILED) {
102 xbt_die("Could not map fd %d: %s", fd, strerror(errno));
104 if(!allocs_metadata) {
105 allocs_metadata = xbt_dict_new_homogeneous(xbt_free);
107 snprintf(loc, PTR_STRLEN, "%p", mem);
108 meta = xbt_new(shared_metadata_t, 1);
111 xbt_dict_set(allocs_metadata, loc, meta, NULL);
112 XBT_DEBUG("MMAP %zu to %p", size, mem);
117 void smpi_bench_destroy(void)
119 xbt_dict_free(&allocs);
120 xbt_dict_free(&allocs_metadata);
121 xbt_dict_free(&samples);
122 xbt_dict_free(&calls);
125 void smpi_execute_flops(double flops) {
128 host = SIMIX_host_self();
130 XBT_DEBUG("Handle real computation time: %f flops", flops);
131 action = simcall_host_execute("computation", host, flops, 1);
133 simcall_set_category (action, TRACE_internal_smpi_get_category());
135 simcall_host_execution_wait(action);
138 static void smpi_execute(double duration)
140 /* FIXME: a global variable would be less expensive to consult than a call to xbt_cfg_get_double() right on the critical path */
141 if (duration >= sg_cfg_get_double("smpi/cpu_threshold")) {
142 XBT_DEBUG("Sleep for %f to handle real computation time", duration);
143 smpi_execute_flops(duration *
144 sg_cfg_get_double("smpi/running_power"));
146 XBT_DEBUG("Real computation took %f while option smpi/cpu_threshold is set to %f => ignore it",
147 duration, sg_cfg_get_double("smpi/cpu_threshold"));
151 void smpi_bench_begin(void)
153 xbt_os_threadtimer_start(smpi_process_timer());
154 smpi_current_rank = smpi_process_index();
157 void smpi_bench_end(void)
159 xbt_os_timer_t timer = smpi_process_timer();
161 xbt_os_threadtimer_stop(timer);
162 smpi_execute(xbt_os_timer_elapsed(timer));
165 unsigned int smpi_sleep(unsigned int secs)
168 smpi_execute_flops((double) secs*simcall_host_get_speed(SIMIX_host_self()));
173 int smpi_gettimeofday(struct timeval *tv)
177 now = SIMIX_get_clock();
179 tv->tv_sec = (time_t)now;
181 tv->tv_usec = (useconds_t)((now - tv->tv_sec) * 1e6);
183 tv->tv_usec = (suseconds_t)((now - tv->tv_sec) * 1e6);
190 extern double sg_maxmin_precision;
191 unsigned long long smpi_rastro_resolution (void)
194 double resolution = (1/sg_maxmin_precision);
196 return (unsigned long long)resolution;
199 unsigned long long smpi_rastro_timestamp (void)
202 double now = SIMIX_get_clock();
204 unsigned long long sec = (unsigned long long)now;
205 unsigned long long pre = (now - sec) * smpi_rastro_resolution();
207 return (unsigned long long)sec * smpi_rastro_resolution() + pre;
210 /* ****************************** Functions related to the SMPI_SAMPLE_ macros ************************************/
212 int iters; /* amount of requested iterations */
213 int count; /* amount of iterations done so far */
214 double threshold; /* maximal stderr requested (if positive) */
215 double relstderr; /* observed stderr so far */
216 double mean; /* mean of benched times, to be used if the block is disabled */
217 double sum; /* sum of benched times (to compute the mean and stderr) */
218 double sum_pow2; /* sum of the square of the benched times (to compute the stderr) */
219 int benching; /* 1: we are benchmarking; 0: we have enough data, no bench anymore */
222 static char *sample_location(int global, const char *file, int line) {
224 return bprintf("%s:%d", file, line);
226 return bprintf("%s:%d:%d", file, line, smpi_process_index());
229 static int sample_enough_benchs(local_data_t *data) {
230 int res = data->count >= data->iters;
231 if (data->threshold>0.0) {
233 res = 0; // not enough data
234 if (data->relstderr > data->threshold)
235 res = 0; // stderr too high yet
237 XBT_DEBUG("%s (count:%d iter:%d stderr:%f thres:%f mean:%fs)",
238 (res?"enough benchs":"need more data"),
239 data->count, data->iters, data->relstderr, data->threshold, data->mean);
243 void smpi_sample_1(int global, const char *file, int line, int iters, double threshold)
245 char *loc = sample_location(global, file, line);
248 smpi_bench_end(); /* Take time from previous, unrelated computation into account */
250 samples = xbt_dict_new_homogeneous(free);
252 data = xbt_dict_get_or_null(samples, loc);
254 xbt_assert(threshold>0 || iters>0,
255 "You should provide either a positive amount of iterations to bench, or a positive maximal stderr (or both)");
256 data = (local_data_t *) xbt_new(local_data_t, 1);
259 data->sum_pow2 = 0.0;
261 data->threshold = threshold;
262 data->benching = 1; // If we have no data, we need at least one
264 xbt_dict_set(samples, loc, data, NULL);
265 XBT_DEBUG("XXXXX First time ever on benched nest %s.",loc);
267 if (data->iters != iters || data->threshold != threshold) {
268 XBT_ERROR("Asked to bench block %s with different settings %d, %f is not %d, %f. How did you manage to give two numbers at the same line??",
269 loc, data->iters, data->threshold, iters,threshold);
273 // if we already have some data, check whether sample_2 should get one more bench or whether it should emulate the computation instead
274 data->benching = !sample_enough_benchs(data);
275 XBT_DEBUG("XXXX Re-entering the benched nest %s. %s",loc, (data->benching?"more benching needed":"we have enough data, skip computes"));
280 int smpi_sample_2(int global, const char *file, int line)
282 char *loc = sample_location(global, file, line);
285 xbt_assert(samples, "Y U NO use SMPI_SAMPLE_* macros? Stop messing directly with smpi_sample_* functions!");
286 data = xbt_dict_get(samples, loc);
287 XBT_DEBUG("sample2 %s",loc);
290 if (data->benching==1) {
291 // we need to run a new bench
292 XBT_DEBUG("benchmarking: count:%d iter:%d stderr:%f thres:%f; mean:%f",
293 data->count, data->iters, data->relstderr, data->threshold, data->mean);
297 // Enough data, no more bench (either we got enough data from previous visits to this benched nest, or we just ran one bench and need to bail out now that our job is done).
298 // Just sleep instead
299 XBT_DEBUG("No benchmark (either no need, or just ran one): count >= iter (%d >= %d) or stderr<thres (%f<=%f). apply the %fs delay instead",
300 data->count, data->iters, data->relstderr, data->threshold, data->mean);
301 smpi_execute(data->mean);
303 smpi_bench_begin(); // prepare to capture future, unrelated computations
309 void smpi_sample_3(int global, const char *file, int line)
311 char *loc = sample_location(global, file, line);
314 xbt_assert(samples, "Y U NO use SMPI_SAMPLE_* macros? Stop messing directly with smpi_sample_* functions!");
315 data = xbt_dict_get(samples, loc);
316 XBT_DEBUG("sample3 %s",loc);
319 if (data->benching==0) {
323 // ok, benchmarking this loop is over
324 xbt_os_threadtimer_stop(smpi_process_timer());
329 sample = xbt_os_timer_elapsed(smpi_process_timer());
331 data->sum_pow2 += sample * sample;
332 n = (double)data->count;
333 data->mean = data->sum / n;
334 data->relstderr = sqrt((data->sum_pow2 / n - data->mean * data->mean) / n) / data->mean;
335 if (!sample_enough_benchs(data)) {
336 data->mean = sample; // Still in benching process; We want sample_2 to simulate the exact time of this loop occurrence before leaving, not the mean over the history
338 XBT_DEBUG("Average mean after %d steps is %f, relative standard error is %f (sample was %f)", data->count,
339 data->mean, data->relstderr, sample);
341 // That's enough for now, prevent sample_2 to run the same code over and over
346 static void smpi_shared_alloc_free(void *p)
348 shared_data_t *data = p;
353 void *smpi_shared_malloc(size_t size, const char *file, int line)
355 char *loc = bprintf("%zu_%s_%d", (size_t)getpid(), file, line);
356 size_t len = strlen(loc);
361 if (sg_cfg_get_boolean("smpi/use_shared_malloc")){
362 for(i = 0; i < len; i++) {
363 /* Make the 'loc' ID be a flat filename */
369 allocs = xbt_dict_new_homogeneous(smpi_shared_alloc_free);
371 data = xbt_dict_get_or_null(allocs, loc);
373 fd = shm_open(loc, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
377 xbt_die("Please cleanup /dev/shm/%s", loc);
379 xbt_die("An unhandled error occured while opening %s: %s", loc, strerror(errno));
382 data = xbt_new(shared_data_t, 1);
386 mem = shm_map(fd, size, data);
387 if(shm_unlink(loc) < 0) {
388 XBT_WARN("Could not early unlink %s: %s", loc, strerror(errno));
390 xbt_dict_set(allocs, loc, data, NULL);
391 XBT_DEBUG("Mapping %s at %p through %d", loc, mem, fd);
394 mem = shm_map(data->fd, size, data);
397 XBT_DEBUG("Shared malloc %zu in %p (metadata at %p)", size, mem, data);
399 mem = xbt_malloc(size);
400 XBT_DEBUG("Classic malloc %zu in %p", size, mem);
405 void smpi_shared_free(void *ptr)
407 char loc[PTR_STRLEN];
408 shared_metadata_t* meta;
410 if (sg_cfg_get_boolean("smpi/use_shared_malloc")){
413 XBT_WARN("Cannot free: nothing was allocated");
416 if(!allocs_metadata) {
417 XBT_WARN("Cannot free: no metadata was allocated");
419 snprintf(loc, PTR_STRLEN, "%p", ptr);
420 meta = (shared_metadata_t*)xbt_dict_get_or_null(allocs_metadata, loc);
422 XBT_WARN("Cannot free: %p was not shared-allocated by SMPI", ptr);
427 XBT_WARN("Cannot free: something is broken in the metadata link");
430 if(munmap(ptr, meta->size) < 0) {
431 XBT_WARN("Unmapping of fd %d failed: %s", data->fd, strerror(errno));
434 XBT_DEBUG("Shared free - no removal - of %p, count = %d", ptr, data->count);
435 if (data->count <= 0) {
437 xbt_dict_remove(allocs, data->loc);
438 XBT_DEBUG("Shared free - with removal - of %p", ptr);
441 XBT_DEBUG("Classic free of %p", ptr);
447 int smpi_shared_known_call(const char* func, const char* input) {
448 char* loc = bprintf("%s:%s", func, input);
453 calls = xbt_dict_new_homogeneous(NULL);
456 xbt_dict_get(calls, loc); /* Succeed or throw */
460 if(ex.category == not_found_error) {
471 void* smpi_shared_get_call(const char* func, const char* input) {
472 char* loc = bprintf("%s:%s", func, input);
476 calls = xbt_dict_new_homogeneous(NULL);
478 data = xbt_dict_get(calls, loc);
483 void* smpi_shared_set_call(const char* func, const char* input, void* data) {
484 char* loc = bprintf("%s:%s", func, input);
487 calls = xbt_dict_new_homogeneous(NULL);
489 xbt_dict_set(calls, loc, data, NULL);