1 /* Copyright (c) 2007, 2009, 2010. The SimGrid Team.
2 * All rights reserved. */
4 /* This program is free software; you can redistribute it and/or modify it
5 * under the terms of the license (GNU LGPL) which comes with this package. */
7 #include <math.h> // sqrt
10 #include "xbt/sysdep.h"
12 #include "surf/surf.h"
16 #include <sys/types.h>
23 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_bench, smpi,
24 "Logging specific to SMPI (benchmarking)");
26 /* Shared allocations are handled through shared memory segments.
27 * Associated data and metadata are used as follows:
30 * `allocs' dict ---- -.
31 * ---------- shared_data_t shared_metadata_t / | | |
32 * .->| <name> | ---> -------------------- <--. ----------------- | | | |
33 * | ---------- | fd of <name> | | | size of mmap | --| | | |
34 * | | count (2) | |-- | data | \ | | |
35 * `----------------- | <name> | | ----------------- ---- |
36 * -------------------- | ^ |
38 * | | `allocs_metadata' dict |
39 * | | ---------------------- |
40 * | `-- | <addr of mmap #1> |<-'
41 * | .-- | <addr of mmap #2> |<-.
42 * | | ---------------------- |
48 * | shared_metadata_t / | |
49 * | ----------------- | | |
50 * | | size of mmap | --| | |
52 * ----------------- | | |
57 #define PTR_STRLEN (2 + 2 * sizeof(void*) + 1)
59 xbt_dict_t allocs = NULL; /* Allocated on first use */
60 xbt_dict_t allocs_metadata = NULL; /* Allocated on first use */
61 xbt_dict_t samples = NULL; /* Allocated on first use */
62 xbt_dict_t calls = NULL; /* Allocated on first use */
63 __thread int smpi_current_rank = 0; /* Updated after each MPI call */
76 static size_t shm_size(int fd) {
79 if(fstat(fd, &st) < 0) {
80 xbt_die("Could not stat fd %d: %s", fd, strerror(errno));
82 return (size_t)st.st_size;
85 static void* shm_map(int fd, size_t size, shared_data_t* data) {
88 shared_metadata_t* meta;
90 if(size > shm_size(fd)) {
91 if(ftruncate(fd, (off_t)size) < 0) {
92 xbt_die("Could not truncate fd %d to %zu: %s", fd, size, strerror(errno));
95 mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
96 if(mem == MAP_FAILED) {
97 xbt_die("Could not map fd %d: %s", fd, strerror(errno));
99 if(!allocs_metadata) {
100 allocs_metadata = xbt_dict_new();
102 snprintf(loc, PTR_STRLEN, "%p", mem);
103 meta = xbt_new(shared_metadata_t, 1);
106 xbt_dict_set(allocs_metadata, loc, meta, &free);
107 XBT_DEBUG("MMAP %zu to %p", size, mem);
122 void smpi_bench_destroy(void)
124 xbt_dict_free(&allocs);
125 xbt_dict_free(&samples);
126 xbt_dict_free(&calls);
129 static void smpi_execute_flops(double flops)
133 host = SIMIX_host_self();
135 XBT_DEBUG("Handle real computation time: %f flops", flops);
136 action = simcall_host_execute("computation", host, flops, 1);
138 simcall_set_category (action, TRACE_internal_smpi_get_category());
140 simcall_host_execution_wait(action);
143 static void smpi_execute(double duration)
145 /* FIXME: a global variable would be less expensive to consult than a call to xbt_cfg_get_double() right on the critical path */
146 if (duration >= xbt_cfg_get_double(_surf_cfg_set, "smpi/cpu_threshold")) {
147 XBT_DEBUG("Sleep for %f to handle real computation time", duration);
148 smpi_execute_flops(duration *
149 xbt_cfg_get_double(_surf_cfg_set,
150 "smpi/running_power"));
152 XBT_DEBUG("Real computation took %f while threshold is set to %f; ignore it",
153 duration, xbt_cfg_get_double(_surf_cfg_set, "smpi/cpu_threshold"));
157 void smpi_bench_begin(void)
159 xbt_os_timer_start(smpi_process_timer());
160 smpi_current_rank = smpi_process_index();
163 void smpi_bench_end(void)
165 xbt_os_timer_t timer = smpi_process_timer();
167 xbt_os_timer_stop(timer);
168 smpi_execute(xbt_os_timer_elapsed(timer));
171 unsigned int smpi_sleep(unsigned int secs)
174 smpi_execute((double) secs);
179 int smpi_gettimeofday(struct timeval *tv, struct timezone *tz)
183 now = SIMIX_get_clock();
185 tv->tv_sec = (time_t)now;
186 tv->tv_usec = (suseconds_t)((now - tv->tv_sec) * 1e6);
192 extern double sg_maxmin_precision;
193 unsigned long long smpi_rastro_resolution (void)
196 double resolution = (1/sg_maxmin_precision);
198 return (unsigned long long)resolution;
201 unsigned long long smpi_rastro_timestamp (void)
204 double now = SIMIX_get_clock();
206 unsigned long long sec = (unsigned long long)now;
207 unsigned long long pre = (now - sec) * smpi_rastro_resolution();
209 return (unsigned long long)sec * smpi_rastro_resolution() + pre;
212 static char *sample_location(int global, const char *file, int line)
215 return bprintf("%s:%d", file, line);
217 return bprintf("%s:%d:%d", file, line, smpi_process_index());
221 int smpi_sample_1(int global, const char *file, int line, int iters, double threshold)
223 char *loc = sample_location(global, file, line);
226 smpi_bench_end(); /* Take time from previous MPI call into account */
228 samples = xbt_dict_new_homogeneous(free);
230 data = xbt_dict_get_or_null(samples, loc);
232 data = (local_data_t *) xbt_new(local_data_t, 1);
235 data->sum_pow2 = 0.0;
237 data->threshold = threshold;
239 xbt_dict_set(samples, loc, data, NULL);
246 int smpi_sample_2(int global, const char *file, int line)
248 char *loc = sample_location(global, file, line);
251 xbt_assert(samples, "You did something very inconsistent, didn't you?");
252 data = xbt_dict_get_or_null(samples, loc);
254 xbt_assert(data, "Please, do thing in order");
256 if (!data->started) {
257 if ((data->iters > 0 && data->count >= data->iters)
258 || (data->count > 1 && data->threshold > 0.0 && data->relstderr <= data->threshold)) {
259 XBT_DEBUG("Perform some wait of %f", data->mean);
260 smpi_execute(data->mean);
270 smpi_process_simulated_start();
271 return data->started;
274 void smpi_sample_3(int global, const char *file, int line)
276 char *loc = sample_location(global, file, line);
280 xbt_assert(samples, "You did something very inconsistent, didn't you?");
281 data = xbt_dict_get_or_null(samples, loc);
283 if(data && data->started && data->count < data->iters) {
284 sample = smpi_process_simulated_elapsed();
286 data->sum_pow2 += sample * sample;
287 n = (double)data->count;
288 data->mean = data->sum / n;
289 data->relstderr = sqrt((data->sum_pow2 / n - data->mean * data->mean) / n) / data->mean;
290 XBT_DEBUG("Average mean after %d steps is %f, relative standard error is %f (sample was %f)", data->count,
291 data->mean, data->relstderr, sample);
296 void smpi_sample_flops(double flops)
298 smpi_execute_flops(flops);
301 void *smpi_shared_malloc(size_t size, const char *file, int line)
303 char *loc = bprintf("%zu_%s_%d", (size_t)getpid(), file, line);
304 size_t len = strlen(loc);
310 for(i = 0; i < len; i++) {
311 /* Make the 'loc' ID be a flat filename */
317 allocs = xbt_dict_new_homogeneous(free);
319 data = xbt_dict_get_or_null(allocs, loc);
321 fd = shm_open(loc, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
325 xbt_die("Please cleanup /dev/shm/%s", loc);
327 xbt_die("An unhandled error occured while opening %s: %s", loc, strerror(errno));
330 data = xbt_new(shared_data_t, 1);
334 mem = shm_map(fd, size, data);
335 if(shm_unlink(loc) < 0) {
336 XBT_WARN("Could not early unlink %s: %s", loc, strerror(errno));
338 xbt_dict_set(allocs, loc, data, NULL);
339 XBT_DEBUG("Mapping %s at %p through %d", loc, mem, fd);
341 mem = shm_map(data->fd, size, data);
344 XBT_DEBUG("Malloc %zu in %p (metadata at %p)", size, mem, data);
348 void smpi_shared_free(void *ptr)
350 char loc[PTR_STRLEN];
351 shared_metadata_t* meta;
355 XBT_WARN("Cannot free: nothing was allocated");
358 if(!allocs_metadata) {
359 XBT_WARN("Cannot free: no metadata was allocated");
361 snprintf(loc, PTR_STRLEN, "%p", ptr);
362 meta = (shared_metadata_t*)xbt_dict_get_or_null(allocs_metadata, loc);
364 XBT_WARN("Cannot free: %p was not shared-allocated by SMPI", ptr);
369 XBT_WARN("Cannot free: something is broken in the metadata link");
372 if(munmap(ptr, meta->size) < 0) {
373 XBT_WARN("Unmapping of fd %d failed: %s", data->fd, strerror(errno));
376 if (data->count <= 0) {
378 xbt_dict_remove(allocs, data->loc);
383 int smpi_shared_known_call(const char* func, const char* input) {
384 char* loc = bprintf("%s:%s", func, input);
389 calls = xbt_dict_new_homogeneous(NULL);
392 xbt_dict_get(calls, loc); /* Succeed or throw */
396 if(ex.category == not_found_error) {
407 void* smpi_shared_get_call(const char* func, const char* input) {
408 char* loc = bprintf("%s:%s", func, input);
412 calls = xbt_dict_new_homogeneous(NULL);
414 data = xbt_dict_get(calls, loc);
419 void* smpi_shared_set_call(const char* func, const char* input, void* data) {
420 char* loc = bprintf("%s:%s", func, input);
423 calls = xbt_dict_new_homogeneous(NULL);
425 xbt_dict_set(calls, loc, data, NULL);