1 /* Copyright (c) 2007, 2009, 2010. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
7 #include <math.h> // sqrt
10 #include "xbt/sysdep.h"
12 #include "surf/surf.h"
16 #include <sys/types.h>
23 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_bench, smpi,
24 "Logging specific to SMPI (benchmarking)");
26 /* Shared allocations are handled through shared memory segments.
27 * Associated data and metadata are used as follows:
30 * `allocs' dict ---- -.
31 * ---------- shared_data_t shared_metadata_t / | | |
32 * .->| <name> | ---> -------------------- <--. ----------------- | | | |
33 * | ---------- | fd of <name> | | | size of mmap | --| | | |
34 * | | count (2) | |-- | data | \ | | |
35 * `----------------- | <name> | | ----------------- ---- |
36 * -------------------- | ^ |
38 * | | `allocs_metadata' dict |
39 * | | ---------------------- |
40 * | `-- | <addr of mmap #1> |<-'
41 * | .-- | <addr of mmap #2> |<-.
42 * | | ---------------------- |
48 * | shared_metadata_t / | |
49 * | ----------------- | | |
50 * | | size of mmap | --| | |
52 * ----------------- | | |
57 #define PTR_STRLEN (2 + 2 * sizeof(void*) + 1)
59 xbt_dict_t allocs = NULL; /* Allocated on first use */
60 xbt_dict_t allocs_metadata = NULL; /* Allocated on first use */
61 xbt_dict_t samples = NULL; /* Allocated on first use */
62 xbt_dict_t calls = NULL; /* Allocated on first use */
63 __thread int smpi_current_rank = 0; /* Updated after each MPI call */
76 static size_t shm_size(int fd) {
79 if(fstat(fd, &st) < 0) {
80 xbt_die("Could not stat fd %d: %s", fd, strerror(errno));
82 return (size_t)st.st_size;
85 static void* shm_map(int fd, size_t size, shared_data_t* data) {
88 shared_metadata_t* meta;
90 if(size > shm_size(fd)) {
91 if(ftruncate(fd, (off_t)size) < 0) {
92 xbt_die("Could not truncate fd %d to %zu: %s", fd, size, strerror(errno));
95 mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
96 if(mem == MAP_FAILED) {
97 xbt_die("Could not map fd %d: %s", fd, strerror(errno));
99 if(!allocs_metadata) {
100 allocs_metadata = xbt_dict_new();
102 snprintf(loc, PTR_STRLEN, "%p", mem);
103 meta = xbt_new(shared_metadata_t, 1);
106 xbt_dict_set(allocs_metadata, loc, meta, &free);
107 XBT_DEBUG("MMAP %zu to %p", size, mem);
111 void smpi_bench_destroy(void)
113 xbt_dict_free(&allocs);
114 xbt_dict_free(&samples);
115 xbt_dict_free(&calls);
118 void smpi_execute_flops(double flops) {
121 host = SIMIX_host_self();
123 XBT_DEBUG("Handle real computation time: %f flops", flops);
124 action = simcall_host_execute("computation", host, flops, 1);
126 simcall_set_category (action, TRACE_internal_smpi_get_category());
128 simcall_host_execution_wait(action);
131 static void smpi_execute(double duration)
133 /* FIXME: a global variable would be less expensive to consult than a call to xbt_cfg_get_double() right on the critical path */
134 if (duration >= xbt_cfg_get_double(_surf_cfg_set, "smpi/cpu_threshold")) {
135 XBT_DEBUG("Sleep for %f to handle real computation time", duration);
136 smpi_execute_flops(duration *
137 xbt_cfg_get_double(_surf_cfg_set,
138 "smpi/running_power"));
140 XBT_DEBUG("Real computation took %f while option smpi/cpu_threshold is set to %f => ignore it",
141 duration, xbt_cfg_get_double(_surf_cfg_set, "smpi/cpu_threshold"));
145 void smpi_bench_begin(void)
147 xbt_os_timer_start(smpi_process_timer());
148 smpi_current_rank = smpi_process_index();
151 void smpi_bench_end(void)
153 xbt_os_timer_t timer = smpi_process_timer();
155 xbt_os_timer_stop(timer);
156 smpi_execute(xbt_os_timer_elapsed(timer));
159 unsigned int smpi_sleep(unsigned int secs)
162 smpi_execute((double) secs);
167 int smpi_gettimeofday(struct timeval *tv, struct timezone *tz)
171 now = SIMIX_get_clock();
173 tv->tv_sec = (time_t)now;
174 tv->tv_usec = (suseconds_t)((now - tv->tv_sec) * 1e6);
180 extern double sg_maxmin_precision;
181 unsigned long long smpi_rastro_resolution (void)
184 double resolution = (1/sg_maxmin_precision);
186 return (unsigned long long)resolution;
189 unsigned long long smpi_rastro_timestamp (void)
192 double now = SIMIX_get_clock();
194 unsigned long long sec = (unsigned long long)now;
195 unsigned long long pre = (now - sec) * smpi_rastro_resolution();
197 return (unsigned long long)sec * smpi_rastro_resolution() + pre;
200 /* ****************************** Functions related to the SMPI_SAMPLE_ macros ************************************/
202 int iters; /* amount of requested iterations */
203 int count; /* amount of iterations done so far */
204 double threshold; /* maximal stderr requested (if positive) */
205 double relstderr; /* observed stderr so far */
206 double mean; /* mean of benched times, to be used if the block is disabled */
207 double sum; /* sum of benched times (to compute the mean and stderr) */
208 double sum_pow2; /* sum of the square of the benched times (to compute the stderr) */
209 int benching; /* 1: we are benchmarking; 0: we have enough data, no bench anymore */
212 static char *sample_location(int global, const char *file, int line) {
214 return bprintf("%s:%d", file, line);
216 return bprintf("%s:%d:%d", file, line, smpi_process_index());
219 static int sample_enough_benchs(local_data_t *data) {
220 int res = data->count >= data->iters;
221 if (data->threshold>0.0) {
223 res = 0; // not enough data
224 if (data->relstderr > data->threshold)
225 res = 0; // stderr too high yet
227 XBT_DEBUG("%s (count:%d iter:%d stderr:%f thres:%f mean:%fs)",
228 (res?"enough benchs":"need more data"),
229 data->count, data->iters, data->relstderr, data->threshold, data->mean);
233 void smpi_sample_1(int global, const char *file, int line, int iters, double threshold)
235 char *loc = sample_location(global, file, line);
238 smpi_bench_end(); /* Take time from previous, unrelated computation into account */
240 samples = xbt_dict_new_homogeneous(free);
242 data = xbt_dict_get_or_null(samples, loc);
244 xbt_assert(threshold>0 || iters>0,
245 "You should provide either a positive amount of iterations to bench, or a positive maximal stderr (or both)");
246 data = (local_data_t *) xbt_new(local_data_t, 1);
249 data->sum_pow2 = 0.0;
251 data->threshold = threshold;
252 data->benching = 1; // If we have no data, we need at least one
254 xbt_dict_set(samples, loc, data, NULL);
255 XBT_DEBUG("XXXXX First time ever on benched nest %s.",loc);
257 if (data->iters != iters || data->threshold != threshold) {
258 XBT_ERROR("Asked to bench block %s with different settings %d, %f is not %d, %f. How did you manage to give two numbers at the same line??",
259 loc, data->iters, data->threshold, iters,threshold);
263 // if we already have some data, check whether sample_2 should get one more bench or whether it should emulate the computation instead
264 data->benching = !sample_enough_benchs(data);
265 XBT_DEBUG("XXXX Re-entering the benched nest %s. %s",loc, (data->benching?"more benching needed":"we have enough data, skip computes"));
270 int smpi_sample_2(int global, const char *file, int line)
272 char *loc = sample_location(global, file, line);
275 xbt_assert(samples, "Y U NO use SMPI_SAMPLE_* macros? Stop messing directly with smpi_sample_* functions!");
276 data = xbt_dict_get(samples, loc);
277 XBT_DEBUG("sample2 %s",loc);
280 if (data->benching==1) {
281 // we need to run a new bench
282 XBT_DEBUG("benchmarking: count:%d iter:%d stderr:%f thres:%f; mean:%f",
283 data->count, data->iters, data->relstderr, data->threshold, data->mean);
287 // Enough data, no more bench (either we got enough data from previous visits to this benched nest, or we just ran one bench and need to bail out now that our job is done).
288 // Just sleep instead
289 XBT_DEBUG("No benchmark (either no need, or just ran one): count >= iter (%d >= %d) or stderr<thres (%f<=%f). apply the %fs delay instead",
290 data->count, data->iters, data->relstderr, data->threshold, data->mean);
291 smpi_execute(data->mean);
293 smpi_bench_begin(); // prepare to capture future, unrelated computations
299 void smpi_sample_3(int global, const char *file, int line)
301 char *loc = sample_location(global, file, line);
304 xbt_assert(samples, "Y U NO use SMPI_SAMPLE_* macros? Stop messing directly with smpi_sample_* functions!");
305 data = xbt_dict_get(samples, loc);
306 XBT_DEBUG("sample3 %s",loc);
308 if (data->benching==0) {
312 // ok, benchmarking this loop is over
313 xbt_os_timer_stop(smpi_process_timer());
318 sample = xbt_os_timer_elapsed(smpi_process_timer());
320 data->sum_pow2 += sample * sample;
321 n = (double)data->count;
322 data->mean = data->sum / n;
323 data->relstderr = sqrt((data->sum_pow2 / n - data->mean * data->mean) / n) / data->mean;
324 if (!sample_enough_benchs(data)) {
325 data->mean = sample; // Still in benching process; We want sample_2 to simulate the exact time of this loop occurrence before leaving, not the mean over the history
327 XBT_DEBUG("Average mean after %d steps is %f, relative standard error is %f (sample was %f)", data->count,
328 data->mean, data->relstderr, sample);
330 // That's enough for now, prevent sample_2 to run the same code over and over
334 void *smpi_shared_malloc(size_t size, const char *file, int line)
336 char *loc = bprintf("%zu_%s_%d", (size_t)getpid(), file, line);
337 size_t len = strlen(loc);
343 for(i = 0; i < len; i++) {
344 /* Make the 'loc' ID be a flat filename */
350 allocs = xbt_dict_new_homogeneous(free);
352 data = xbt_dict_get_or_null(allocs, loc);
354 fd = shm_open(loc, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
358 xbt_die("Please cleanup /dev/shm/%s", loc);
360 xbt_die("An unhandled error occured while opening %s: %s", loc, strerror(errno));
363 data = xbt_new(shared_data_t, 1);
367 mem = shm_map(fd, size, data);
368 if(shm_unlink(loc) < 0) {
369 XBT_WARN("Could not early unlink %s: %s", loc, strerror(errno));
371 xbt_dict_set(allocs, loc, data, NULL);
372 XBT_DEBUG("Mapping %s at %p through %d", loc, mem, fd);
374 mem = shm_map(data->fd, size, data);
377 XBT_DEBUG("Malloc %zu in %p (metadata at %p)", size, mem, data);
381 void smpi_shared_free(void *ptr)
383 char loc[PTR_STRLEN];
384 shared_metadata_t* meta;
388 XBT_WARN("Cannot free: nothing was allocated");
391 if(!allocs_metadata) {
392 XBT_WARN("Cannot free: no metadata was allocated");
394 snprintf(loc, PTR_STRLEN, "%p", ptr);
395 meta = (shared_metadata_t*)xbt_dict_get_or_null(allocs_metadata, loc);
397 XBT_WARN("Cannot free: %p was not shared-allocated by SMPI", ptr);
402 XBT_WARN("Cannot free: something is broken in the metadata link");
405 if(munmap(ptr, meta->size) < 0) {
406 XBT_WARN("Unmapping of fd %d failed: %s", data->fd, strerror(errno));
409 if (data->count <= 0) {
411 xbt_dict_remove(allocs, data->loc);
416 int smpi_shared_known_call(const char* func, const char* input) {
417 char* loc = bprintf("%s:%s", func, input);
422 calls = xbt_dict_new_homogeneous(NULL);
425 xbt_dict_get(calls, loc); /* Succeed or throw */
429 if(ex.category == not_found_error) {
440 void* smpi_shared_get_call(const char* func, const char* input) {
441 char* loc = bprintf("%s:%s", func, input);
445 calls = xbt_dict_new_homogeneous(NULL);
447 data = xbt_dict_get(calls, loc);
452 void* smpi_shared_set_call(const char* func, const char* input, void* data) {
453 char* loc = bprintf("%s:%s", func, input);
456 calls = xbt_dict_new_homogeneous(NULL);
458 xbt_dict_set(calls, loc, data, NULL);