1 /* Copyright (c) 2007, 2009, 2010. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
9 #include "xbt/sysdep.h"
11 #include "surf/surf.h"
12 #include "simgrid/sg_config.h"
18 #include <sys/types.h>
21 #include <math.h> // sqrt
26 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_bench, smpi,
27 "Logging specific to SMPI (benchmarking)");
29 /* Shared allocations are handled through shared memory segments.
30 * Associated data and metadata are used as follows:
33 * `allocs' dict ---- -.
34 * ---------- shared_data_t shared_metadata_t / | | |
35 * .->| <name> | ---> -------------------- <--. ----------------- | | | |
36 * | ---------- | fd of <name> | | | size of mmap | --| | | |
37 * | | count (2) | |-- | data | \ | | |
38 * `----------------- | <name> | | ----------------- ---- |
39 * -------------------- | ^ |
41 * | | `allocs_metadata' dict |
42 * | | ---------------------- |
43 * | `-- | <addr of mmap #1> |<-'
44 * | .-- | <addr of mmap #2> |<-.
45 * | | ---------------------- |
51 * | shared_metadata_t / | |
52 * | ----------------- | | |
53 * | | size of mmap | --| | |
55 * ----------------- | | |
60 #define PTR_STRLEN (2 + 2 * sizeof(void*) + 1)
62 xbt_dict_t allocs = NULL; /* Allocated on first use */
63 xbt_dict_t allocs_metadata = NULL; /* Allocated on first use */
64 xbt_dict_t samples = NULL; /* Allocated on first use */
65 xbt_dict_t calls = NULL; /* Allocated on first use */
66 __thread int smpi_current_rank = 0; /* Updated after each MPI call */
79 static size_t shm_size(int fd) {
82 if(fstat(fd, &st) < 0) {
83 xbt_die("Could not stat fd %d: %s", fd, strerror(errno));
85 return (size_t)st.st_size;
89 static void* shm_map(int fd, size_t size, shared_data_t* data) {
92 shared_metadata_t* meta;
94 if(size > shm_size(fd)) {
95 if(ftruncate(fd, (off_t)size) < 0) {
96 xbt_die("Could not truncate fd %d to %zu: %s", fd, size, strerror(errno));
100 mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
101 if(mem == MAP_FAILED) {
102 xbt_die("Could not map fd %d: %s", fd, strerror(errno));
104 if(!allocs_metadata) {
105 allocs_metadata = xbt_dict_new();
107 snprintf(loc, PTR_STRLEN, "%p", mem);
108 meta = xbt_new(shared_metadata_t, 1);
111 xbt_dict_set(allocs_metadata, loc, meta, &free);
112 XBT_DEBUG("MMAP %zu to %p", size, mem);
117 void smpi_bench_destroy(void)
119 xbt_dict_free(&allocs);
120 xbt_dict_free(&samples);
121 xbt_dict_free(&calls);
124 void smpi_execute_flops(double flops) {
127 host = SIMIX_host_self();
129 XBT_DEBUG("Handle real computation time: %f flops", flops);
130 action = simcall_host_execute("computation", host, flops, 1);
132 simcall_set_category (action, TRACE_internal_smpi_get_category());
134 simcall_host_execution_wait(action);
137 static void smpi_execute(double duration)
139 /* FIXME: a global variable would be less expensive to consult than a call to xbt_cfg_get_double() right on the critical path */
140 if (duration >= sg_cfg_get_double("smpi/cpu_threshold")) {
141 XBT_DEBUG("Sleep for %f to handle real computation time", duration);
142 smpi_execute_flops(duration *
143 sg_cfg_get_double("smpi/running_power"));
145 XBT_DEBUG("Real computation took %f while option smpi/cpu_threshold is set to %f => ignore it",
146 duration, sg_cfg_get_double("smpi/cpu_threshold"));
150 void smpi_bench_begin(void)
152 xbt_os_timer_start(smpi_process_timer());
153 smpi_current_rank = smpi_process_index();
156 void smpi_bench_end(void)
158 xbt_os_timer_t timer = smpi_process_timer();
160 xbt_os_timer_stop(timer);
161 smpi_execute(xbt_os_timer_elapsed(timer));
164 unsigned int smpi_sleep(unsigned int secs)
167 smpi_execute((double) secs);
172 int smpi_gettimeofday(struct timeval *tv)
176 now = SIMIX_get_clock();
178 tv->tv_sec = (time_t)now;
180 tv->tv_usec = (useconds_t)((now - tv->tv_sec) * 1e6);
182 tv->tv_usec = (suseconds_t)((now - tv->tv_sec) * 1e6);
189 extern double sg_maxmin_precision;
190 unsigned long long smpi_rastro_resolution (void)
193 double resolution = (1/sg_maxmin_precision);
195 return (unsigned long long)resolution;
198 unsigned long long smpi_rastro_timestamp (void)
201 double now = SIMIX_get_clock();
203 unsigned long long sec = (unsigned long long)now;
204 unsigned long long pre = (now - sec) * smpi_rastro_resolution();
206 return (unsigned long long)sec * smpi_rastro_resolution() + pre;
209 /* ****************************** Functions related to the SMPI_SAMPLE_ macros ************************************/
211 int iters; /* amount of requested iterations */
212 int count; /* amount of iterations done so far */
213 double threshold; /* maximal stderr requested (if positive) */
214 double relstderr; /* observed stderr so far */
215 double mean; /* mean of benched times, to be used if the block is disabled */
216 double sum; /* sum of benched times (to compute the mean and stderr) */
217 double sum_pow2; /* sum of the square of the benched times (to compute the stderr) */
218 int benching; /* 1: we are benchmarking; 0: we have enough data, no bench anymore */
221 static char *sample_location(int global, const char *file, int line) {
223 return bprintf("%s:%d", file, line);
225 return bprintf("%s:%d:%d", file, line, smpi_process_index());
228 static int sample_enough_benchs(local_data_t *data) {
229 int res = data->count >= data->iters;
230 if (data->threshold>0.0) {
232 res = 0; // not enough data
233 if (data->relstderr > data->threshold)
234 res = 0; // stderr too high yet
236 XBT_DEBUG("%s (count:%d iter:%d stderr:%f thres:%f mean:%fs)",
237 (res?"enough benchs":"need more data"),
238 data->count, data->iters, data->relstderr, data->threshold, data->mean);
242 void smpi_sample_1(int global, const char *file, int line, int iters, double threshold)
244 char *loc = sample_location(global, file, line);
247 smpi_bench_end(); /* Take time from previous, unrelated computation into account */
249 samples = xbt_dict_new_homogeneous(free);
251 data = xbt_dict_get_or_null(samples, loc);
253 xbt_assert(threshold>0 || iters>0,
254 "You should provide either a positive amount of iterations to bench, or a positive maximal stderr (or both)");
255 data = (local_data_t *) xbt_new(local_data_t, 1);
258 data->sum_pow2 = 0.0;
260 data->threshold = threshold;
261 data->benching = 1; // If we have no data, we need at least one
263 xbt_dict_set(samples, loc, data, NULL);
264 XBT_DEBUG("XXXXX First time ever on benched nest %s.",loc);
266 if (data->iters != iters || data->threshold != threshold) {
267 XBT_ERROR("Asked to bench block %s with different settings %d, %f is not %d, %f. How did you manage to give two numbers at the same line??",
268 loc, data->iters, data->threshold, iters,threshold);
272 // if we already have some data, check whether sample_2 should get one more bench or whether it should emulate the computation instead
273 data->benching = !sample_enough_benchs(data);
274 XBT_DEBUG("XXXX Re-entering the benched nest %s. %s",loc, (data->benching?"more benching needed":"we have enough data, skip computes"));
279 int smpi_sample_2(int global, const char *file, int line)
281 char *loc = sample_location(global, file, line);
284 xbt_assert(samples, "Y U NO use SMPI_SAMPLE_* macros? Stop messing directly with smpi_sample_* functions!");
285 data = xbt_dict_get(samples, loc);
286 XBT_DEBUG("sample2 %s",loc);
289 if (data->benching==1) {
290 // we need to run a new bench
291 XBT_DEBUG("benchmarking: count:%d iter:%d stderr:%f thres:%f; mean:%f",
292 data->count, data->iters, data->relstderr, data->threshold, data->mean);
296 // Enough data, no more bench (either we got enough data from previous visits to this benched nest, or we just ran one bench and need to bail out now that our job is done).
297 // Just sleep instead
298 XBT_DEBUG("No benchmark (either no need, or just ran one): count >= iter (%d >= %d) or stderr<thres (%f<=%f). apply the %fs delay instead",
299 data->count, data->iters, data->relstderr, data->threshold, data->mean);
300 smpi_execute(data->mean);
302 smpi_bench_begin(); // prepare to capture future, unrelated computations
308 void smpi_sample_3(int global, const char *file, int line)
310 char *loc = sample_location(global, file, line);
313 xbt_assert(samples, "Y U NO use SMPI_SAMPLE_* macros? Stop messing directly with smpi_sample_* functions!");
314 data = xbt_dict_get(samples, loc);
315 XBT_DEBUG("sample3 %s",loc);
318 if (data->benching==0) {
322 // ok, benchmarking this loop is over
323 xbt_os_timer_stop(smpi_process_timer());
328 sample = xbt_os_timer_elapsed(smpi_process_timer());
330 data->sum_pow2 += sample * sample;
331 n = (double)data->count;
332 data->mean = data->sum / n;
333 data->relstderr = sqrt((data->sum_pow2 / n - data->mean * data->mean) / n) / data->mean;
334 if (!sample_enough_benchs(data)) {
335 data->mean = sample; // Still in benching process; We want sample_2 to simulate the exact time of this loop occurrence before leaving, not the mean over the history
337 XBT_DEBUG("Average mean after %d steps is %f, relative standard error is %f (sample was %f)", data->count,
338 data->mean, data->relstderr, sample);
340 // That's enough for now, prevent sample_2 to run the same code over and over
345 void *smpi_shared_malloc(size_t size, const char *file, int line)
347 char *loc = bprintf("%zu_%s_%d", (size_t)getpid(), file, line);
348 size_t len = strlen(loc);
354 for(i = 0; i < len; i++) {
355 /* Make the 'loc' ID be a flat filename */
361 allocs = xbt_dict_new_homogeneous(free);
363 data = xbt_dict_get_or_null(allocs, loc);
365 fd = shm_open(loc, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
369 xbt_die("Please cleanup /dev/shm/%s", loc);
371 xbt_die("An unhandled error occured while opening %s: %s", loc, strerror(errno));
374 data = xbt_new(shared_data_t, 1);
378 mem = shm_map(fd, size, data);
379 if(shm_unlink(loc) < 0) {
380 XBT_WARN("Could not early unlink %s: %s", loc, strerror(errno));
382 xbt_dict_set(allocs, loc, data, NULL);
383 XBT_DEBUG("Mapping %s at %p through %d", loc, mem, fd);
385 mem = shm_map(data->fd, size, data);
388 XBT_DEBUG("Malloc %zu in %p (metadata at %p)", size, mem, data);
391 void smpi_shared_free(void *ptr)
393 char loc[PTR_STRLEN];
394 shared_metadata_t* meta;
398 XBT_WARN("Cannot free: nothing was allocated");
401 if(!allocs_metadata) {
402 XBT_WARN("Cannot free: no metadata was allocated");
404 snprintf(loc, PTR_STRLEN, "%p", ptr);
405 meta = (shared_metadata_t*)xbt_dict_get_or_null(allocs_metadata, loc);
407 XBT_WARN("Cannot free: %p was not shared-allocated by SMPI", ptr);
412 XBT_WARN("Cannot free: something is broken in the metadata link");
415 if(munmap(ptr, meta->size) < 0) {
416 XBT_WARN("Unmapping of fd %d failed: %s", data->fd, strerror(errno));
419 if (data->count <= 0) {
421 xbt_dict_remove(allocs, data->loc);
427 int smpi_shared_known_call(const char* func, const char* input) {
428 char* loc = bprintf("%s:%s", func, input);
433 calls = xbt_dict_new_homogeneous(NULL);
436 xbt_dict_get(calls, loc); /* Succeed or throw */
440 if(ex.category == not_found_error) {
451 void* smpi_shared_get_call(const char* func, const char* input) {
452 char* loc = bprintf("%s:%s", func, input);
456 calls = xbt_dict_new_homogeneous(NULL);
458 data = xbt_dict_get(calls, loc);
463 void* smpi_shared_set_call(const char* func, const char* input, void* data) {
464 char* loc = bprintf("%s:%s", func, input);
467 calls = xbt_dict_new_homogeneous(NULL);
469 xbt_dict_set(calls, loc, data, NULL);