1 /* Copyright (c) 2007, 2009-2017. The SimGrid Team. All rights reserved. */
3 /* This program is free software; you can redistribute it and/or modify it
4 * under the terms of the license (GNU LGPL) which comes with this package. */
8 #include <unordered_map>
11 #include "src/internal_config.h"
13 #include "private.hpp"
16 #include "xbt/sysdep.h"
18 #include "surf/surf.h"
19 #include "simgrid/sg_config.h"
20 #include "simgrid/modelchecker.h"
21 #include "src/mc/mc_replay.h"
23 #include <sys/types.h>
30 #include <math.h> // sqrt
40 #define MAP_ANONYMOUS MAP_ANON
43 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_bench, smpi, "Logging specific to SMPI (benchmarking)");
45 /* Shared allocations are handled through shared memory segments.
46 * Associated data and metadata are used as follows:
49 * `allocs' dict ---- -.
50 * ---------- shared_data_t shared_metadata_t / | | |
51 * .->| <name> | ---> -------------------- <--. ----------------- | | | |
52 * | ---------- | fd of <name> | | | size of mmap | --| | | |
53 * | | count (2) | |-- | data | \ | | |
54 * `----------------- | <name> | | ----------------- ---- |
55 * -------------------- | ^ |
57 * | | `allocs_metadata' dict |
58 * | | ---------------------- |
59 * | `-- | <addr of mmap #1> |<-'
60 * | .-- | <addr of mmap #2> |<-.
61 * | | ---------------------- |
67 * | shared_metadata_t / | |
68 * | ----------------- | | |
69 * | | size of mmap | --| | |
71 * ----------------- | | |
76 #define PTR_STRLEN (2 + 2 * sizeof(void*) + 1)
78 xbt_dict_t samples = nullptr; /* Allocated on first use */
79 xbt_dict_t calls = nullptr; /* Allocated on first use */
81 double smpi_cpu_threshold;
82 double smpi_host_speed;
84 int smpi_loaded_page = -1;
85 char* smpi_start_data_exe = nullptr;
86 int smpi_size_data_exe = 0;
87 bool smpi_privatize_global_variables;
88 shared_malloc_type smpi_cfg_shared_malloc = shmalloc_global;
89 double smpi_total_benched_time = 0;
90 smpi_privatisation_region_t smpi_privatisation_regions;
94 /** Some location in the source code
96 * This information is used by SMPI_SHARED_MALLOC to allocate some shared memory for all simulated processes.
98 class smpi_source_location {
100 smpi_source_location(const char* filename, int line)
101 : filename(xbt_strdup(filename)), filename_length(strlen(filename)), line(line)
105 /** Pointer to a static string containing the file name */
106 char* filename = nullptr;
107 int filename_length = 0;
110 bool operator==(smpi_source_location const& that) const
112 return filename_length == that.filename_length && line == that.line &&
113 std::memcmp(filename, that.filename, filename_length) == 0;
115 bool operator!=(smpi_source_location const& that) const { return !(*this == that); }
121 template <> class hash<smpi_source_location> {
123 typedef smpi_source_location argument_type;
124 typedef std::size_t result_type;
125 result_type operator()(smpi_source_location const& loc) const
127 return xbt_str_hash_ext(loc.filename, loc.filename_length) ^
128 xbt_str_hash_ext((const char*)&loc.line, sizeof(loc.line));
140 std::unordered_map<smpi_source_location, shared_data_t> allocs;
141 typedef std::unordered_map<smpi_source_location, shared_data_t>::value_type shared_data_key_type;
145 shared_data_key_type* data;
148 std::unordered_map<void*, shared_metadata_t> allocs_metadata;
151 static size_t shm_size(int fd) {
154 if(fstat(fd, &st) < 0) {
155 xbt_die("Could not stat fd %d: %s", fd, strerror(errno));
157 return static_cast<size_t>(st.st_size);
161 static void* shm_map(int fd, size_t size, shared_data_key_type* data) {
162 char loc[PTR_STRLEN];
163 shared_metadata_t meta;
165 if(size > shm_size(fd) && (ftruncate(fd, static_cast<off_t>(size)) < 0)) {
166 xbt_die("Could not truncate fd %d to %zu: %s", fd, size, strerror(errno));
169 void* mem = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
170 if(mem == MAP_FAILED) {
172 "Failed to map fd %d with size %zu: %s\n"
173 "If you are running a lot of ranks, you may be exceeding the amount of mappings allowed per process.\n"
174 "On Linux systems, change this value with sudo sysctl -w vm.max_map_count=newvalue (default value: 65536)\n"
175 "Please see http://simgrid.gforge.inria.fr/simgrid/latest/doc/html/options.html#options_virt for more info.",
176 fd, size, strerror(errno));
178 snprintf(loc, PTR_STRLEN, "%p", mem);
181 allocs_metadata[mem] = meta;
182 XBT_DEBUG("MMAP %zu to %p", size, mem);
187 void smpi_bench_destroy()
190 allocs_metadata.clear();
191 xbt_dict_free(&samples);
192 xbt_dict_free(&calls);
195 extern "C" XBT_PUBLIC(void) smpi_execute_flops_(double *flops);
196 void smpi_execute_flops_(double *flops)
198 smpi_execute_flops(*flops);
201 extern "C" XBT_PUBLIC(void) smpi_execute_(double *duration);
202 void smpi_execute_(double *duration)
204 smpi_execute(*duration);
207 void smpi_execute_flops(double flops) {
208 XBT_DEBUG("Handle real computation time: %f flops", flops);
209 smx_activity_t action = simcall_execution_start("computation", flops, 1, 0);
210 simcall_set_category (action, TRACE_internal_smpi_get_category());
211 simcall_execution_wait(action);
212 smpi_switch_data_segment(smpi_process_index());
215 void smpi_execute(double duration)
217 if (duration >= smpi_cpu_threshold) {
218 XBT_DEBUG("Sleep for %g to handle real computation time", duration);
219 double flops = duration * smpi_host_speed;
220 int rank = smpi_process_index();
221 instr_extra_data extra = xbt_new0(s_instr_extra_data_t,1);
222 extra->type=TRACING_COMPUTING;
223 extra->comp_size=flops;
224 TRACE_smpi_computing_in(rank, extra);
226 smpi_execute_flops(flops);
228 TRACE_smpi_computing_out(rank);
231 XBT_DEBUG("Real computation took %g while option smpi/cpu_threshold is set to %g => ignore it", duration,
236 void smpi_bench_begin()
238 if (smpi_privatize_global_variables) {
239 smpi_switch_data_segment(smpi_process_index());
242 if (MC_is_active() || MC_record_replay_is_active())
246 if (xbt_cfg_get_string("smpi/papi-events")[0] != '\0') {
247 int event_set = smpi_process_papi_event_set();
248 // PAPI_start sets everything to 0! See man(3) PAPI_start
249 if (PAPI_LOW_LEVEL_INITED == PAPI_is_initialized()) {
250 if (PAPI_start(event_set) != PAPI_OK) {
251 // TODO This needs some proper handling.
252 XBT_CRITICAL("Could not start PAPI counters.\n");
258 xbt_os_threadtimer_start(smpi_process_timer());
261 void smpi_bench_end()
263 if (MC_is_active() || MC_record_replay_is_active())
267 xbt_os_timer_t timer = smpi_process_timer();
268 xbt_os_threadtimer_stop(timer);
272 * An MPI function has been called and now is the right time to update
273 * our PAPI counters for this process.
275 if (xbt_cfg_get_string("smpi/papi-events")[0] != '\0') {
276 papi_counter_t& counter_data = smpi_process_papi_counters();
277 int event_set = smpi_process_papi_event_set();
278 std::vector<long long> event_values = std::vector<long long>(counter_data.size());
280 if (PAPI_stop(event_set, &event_values[0]) != PAPI_OK) { // Error
281 XBT_CRITICAL("Could not stop PAPI counters.\n");
284 for (unsigned int i = 0; i < counter_data.size(); i++) {
285 counter_data[i].second += event_values[i];
286 // XBT_DEBUG("[%i] PAPI: Counter %s: Value is now %lli (got increment by %lli\n", smpi_process_index(),
287 // counter_data[i].first.c_str(), counter_data[i].second, event_values[i]);
293 if (smpi_process_get_sampling()) {
294 XBT_CRITICAL("Cannot do recursive benchmarks.");
295 XBT_CRITICAL("Are you trying to make a call to MPI within a SMPI_SAMPLE_ block?");
296 xbt_backtrace_display_current();
297 xbt_die("Aborting.");
300 if (xbt_cfg_get_string("smpi/comp-adjustment-file")[0] != '\0') { // Maybe we need to artificially speed up or slow
301 // down our computation based on our statistical analysis.
303 smpi_trace_call_location_t* loc = smpi_process_get_call_location();
304 std::string key = loc->get_composed_key();
305 std::unordered_map<std::string, double>::const_iterator it = location2speedup.find(key);
306 if (it != location2speedup.end()) {
307 speedup = it->second;
311 // Simulate the benchmarked computation unless disabled via command-line argument
312 if (xbt_cfg_get_boolean("smpi/simulate-computation")) {
313 smpi_execute(xbt_os_timer_elapsed(timer)/speedup);
317 if (xbt_cfg_get_string("smpi/papi-events")[0] != '\0' && TRACE_smpi_is_enabled()) {
318 char container_name[INSTR_DEFAULT_STR_SIZE];
319 smpi_container(smpi_process_index(), container_name, INSTR_DEFAULT_STR_SIZE);
320 container_t container = PJ_container_get(container_name);
321 papi_counter_t& counter_data = smpi_process_papi_counters();
323 for (auto& pair : counter_data) {
324 new_pajeSetVariable(surf_get_clock(), container,
325 PJ_type_get(/* countername */ pair.first.c_str(), container->type), pair.second);
330 smpi_total_benched_time += xbt_os_timer_elapsed(timer);
333 /* Private sleep function used by smpi_sleep() and smpi_usleep() */
334 static unsigned int private_sleep(double secs)
338 XBT_DEBUG("Sleep for: %lf secs", secs);
339 int rank = smpi_comm_rank(MPI_COMM_WORLD);
340 instr_extra_data extra = xbt_new0(s_instr_extra_data_t,1);
341 extra->type=TRACING_SLEEPING;
342 extra->sleep_duration=secs;
343 TRACE_smpi_sleeping_in(rank, extra);
345 simcall_process_sleep(secs);
347 TRACE_smpi_sleeping_out(rank);
353 unsigned int smpi_sleep(unsigned int secs)
355 return private_sleep(static_cast<double>(secs));
358 int smpi_usleep(useconds_t usecs)
360 return static_cast<int>(private_sleep(static_cast<double>(usecs) / 1000000.0));
363 #if _POSIX_TIMERS > 0
364 int smpi_nanosleep(const struct timespec *tp, struct timespec * t)
366 return static_cast<int>(private_sleep(static_cast<double>(tp->tv_sec + tp->tv_nsec / 1000000000.0)));
370 int smpi_gettimeofday(struct timeval *tv, void* tz)
373 double now = SIMIX_get_clock();
375 tv->tv_sec = static_cast<time_t>(now);
377 tv->tv_usec = static_cast<useconds_t>((now - tv->tv_sec) * 1e6);
379 tv->tv_usec = static_cast<suseconds_t>((now - tv->tv_sec) * 1e6);
386 #if _POSIX_TIMERS > 0
387 int smpi_clock_gettime(clockid_t clk_id, struct timespec *tp)
389 //there is only one time in SMPI, so clk_id is ignored.
391 double now = SIMIX_get_clock();
393 tp->tv_sec = static_cast<time_t>(now);
394 tp->tv_nsec = static_cast<long int>((now - tp->tv_sec) * 1e9);
401 extern double sg_surf_precision;
402 unsigned long long smpi_rastro_resolution ()
405 double resolution = (1/sg_surf_precision);
407 return static_cast<unsigned long long>(resolution);
410 unsigned long long smpi_rastro_timestamp ()
413 double now = SIMIX_get_clock();
415 unsigned long long sec = (unsigned long long)now;
416 unsigned long long pre = (now - sec) * smpi_rastro_resolution();
418 return static_cast<unsigned long long>(sec) * smpi_rastro_resolution() + pre;
421 /* ****************************** Functions related to the SMPI_SAMPLE_ macros ************************************/
423 double threshold; /* maximal stderr requested (if positive) */
424 double relstderr; /* observed stderr so far */
425 double mean; /* mean of benched times, to be used if the block is disabled */
426 double sum; /* sum of benched times (to compute the mean and stderr) */
427 double sum_pow2; /* sum of the square of the benched times (to compute the stderr) */
428 int iters; /* amount of requested iterations */
429 int count; /* amount of iterations done so far */
430 int benching; /* 1: we are benchmarking; 0: we have enough data, no bench anymore */
433 static char *sample_location(int global, const char *file, int line) {
435 return bprintf("%s:%d", file, line);
437 return bprintf("%s:%d:%d", file, line, smpi_process_index());
441 static int sample_enough_benchs(local_data_t *data) {
442 int res = data->count >= data->iters;
443 if (data->threshold>0.0) {
445 res = 0; // not enough data
446 if (data->relstderr > data->threshold)
447 res = 0; // stderr too high yet
449 XBT_DEBUG("%s (count:%d iter:%d stderr:%f thres:%f mean:%fs)",
450 (res?"enough benchs":"need more data"), data->count, data->iters, data->relstderr, data->threshold, data->mean);
454 void smpi_sample_1(int global, const char *file, int line, int iters, double threshold)
456 char *loc = sample_location(global, file, line);
458 smpi_bench_end(); /* Take time from previous, unrelated computation into account */
459 smpi_process_set_sampling(1);
461 if (samples==nullptr)
462 samples = xbt_dict_new_homogeneous(free);
464 local_data_t *data = static_cast<local_data_t *>(xbt_dict_get_or_null(samples, loc));
466 xbt_assert(threshold>0 || iters>0,
467 "You should provide either a positive amount of iterations to bench, or a positive maximal stderr (or both)");
468 data = static_cast<local_data_t *>( xbt_new(local_data_t, 1));
471 data->sum_pow2 = 0.0;
473 data->threshold = threshold;
474 data->benching = 1; // If we have no data, we need at least one
476 xbt_dict_set(samples, loc, data, nullptr);
477 XBT_DEBUG("XXXXX First time ever on benched nest %s.",loc);
479 if (data->iters != iters || data->threshold != threshold) {
480 XBT_ERROR("Asked to bench block %s with different settings %d, %f is not %d, %f. "
481 "How did you manage to give two numbers at the same line??",
482 loc, data->iters, data->threshold, iters, threshold);
486 // if we already have some data, check whether sample_2 should get one more bench or whether it should emulate
487 // the computation instead
488 data->benching = (sample_enough_benchs(data) == 0);
489 XBT_DEBUG("XXXX Re-entering the benched nest %s. %s", loc,
490 (data->benching ? "more benching needed" : "we have enough data, skip computes"));
495 int smpi_sample_2(int global, const char *file, int line)
497 char *loc = sample_location(global, file, line);
500 xbt_assert(samples, "Y U NO use SMPI_SAMPLE_* macros? Stop messing directly with smpi_sample_* functions!");
501 local_data_t *data = static_cast<local_data_t *>(xbt_dict_get(samples, loc));
502 XBT_DEBUG("sample2 %s",loc);
505 if (data->benching==1) {
506 // we need to run a new bench
507 XBT_DEBUG("benchmarking: count:%d iter:%d stderr:%f thres:%f; mean:%f",
508 data->count, data->iters, data->relstderr, data->threshold, data->mean);
511 // Enough data, no more bench (either we got enough data from previous visits to this benched nest, or we just
512 //ran one bench and need to bail out now that our job is done). Just sleep instead
513 XBT_DEBUG("No benchmark (either no need, or just ran one): count >= iter (%d >= %d) or stderr<thres (%f<=%f)."
514 " apply the %fs delay instead",
515 data->count, data->iters, data->relstderr, data->threshold, data->mean);
516 smpi_execute(data->mean);
517 smpi_process_set_sampling(0);
518 res = 0; // prepare to capture future, unrelated computations
524 void smpi_sample_3(int global, const char *file, int line)
526 char *loc = sample_location(global, file, line);
528 xbt_assert(samples, "Y U NO use SMPI_SAMPLE_* macros? Stop messing directly with smpi_sample_* functions!");
529 local_data_t *data = static_cast<local_data_t *>(xbt_dict_get(samples, loc));
530 XBT_DEBUG("sample3 %s",loc);
533 if (data->benching==0)
536 // ok, benchmarking this loop is over
537 xbt_os_threadtimer_stop(smpi_process_timer());
541 double sample = xbt_os_timer_elapsed(smpi_process_timer());
543 data->sum_pow2 += sample * sample;
544 double n = static_cast<double>(data->count);
545 data->mean = data->sum / n;
546 data->relstderr = sqrt((data->sum_pow2 / n - data->mean * data->mean) / n) / data->mean;
547 if (sample_enough_benchs(data)==0) {
548 data->mean = sample; // Still in benching process; We want sample_2 to simulate the exact time of this loop
549 // occurrence before leaving, not the mean over the history
551 XBT_DEBUG("Average mean after %d steps is %f, relative standard error is %f (sample was %f)", data->count,
552 data->mean, data->relstderr, sample);
554 // That's enough for now, prevent sample_2 to run the same code over and over
559 static int smpi_shared_malloc_bogusfile = -1;
560 static unsigned long smpi_shared_malloc_blocksize = 1UL << 20;
561 void *smpi_shared_malloc(size_t size, const char *file, int line)
564 if (size > 0 && smpi_cfg_shared_malloc == shmalloc_local) {
565 smpi_source_location loc(file, line);
566 auto res = allocs.insert(std::make_pair(loc, shared_data_t()));
567 auto data = res.first;
569 // The insertion did not take place.
570 // Generate a shared memory name from the address of the shared_data:
571 char shmname[32]; // cannot be longer than PSHMNAMLEN = 31 on Mac OS X (shm_open raises ENAMETOOLONG otherwise)
572 snprintf(shmname, 31, "/shmalloc%p", &*data);
573 int fd = shm_open(shmname, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
576 xbt_die("Please cleanup /dev/shm/%s", shmname);
578 xbt_die("An unhandled error occurred while opening %s. shm_open: %s", shmname, strerror(errno));
580 data->second.fd = fd;
581 data->second.count = 1;
582 mem = shm_map(fd, size, &*data);
583 if (shm_unlink(shmname) < 0) {
584 XBT_WARN("Could not early unlink %s. shm_unlink: %s", shmname, strerror(errno));
586 XBT_DEBUG("Mapping %s at %p through %d", shmname, mem, fd);
588 mem = shm_map(data->second.fd, size, &*data);
589 data->second.count++;
591 XBT_DEBUG("Shared malloc %zu in %p (metadata at %p)", size, mem, &*data);
593 } else if (smpi_cfg_shared_malloc == shmalloc_global) {
594 /* First reserve memory area */
595 mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
597 xbt_assert(mem != MAP_FAILED, "Failed to allocate %luMiB of memory. Run \"sysctl vm.overcommit_memory=1\" as root "
598 "to allow big allocations.\n",
599 (unsigned long)(size >> 20));
601 /* Create bogus file if not done already */
602 if (smpi_shared_malloc_bogusfile == -1) {
603 /* Create a fd to a new file on disk, make it smpi_shared_malloc_blocksize big, and unlink it.
604 * It still exists in memory but not in the file system (thus it cannot be leaked). */
605 char* name = xbt_strdup("/tmp/simgrid-shmalloc-XXXXXX");
606 smpi_shared_malloc_bogusfile = mkstemp(name);
609 char* dumb = (char*)calloc(1, smpi_shared_malloc_blocksize);
610 write(smpi_shared_malloc_bogusfile, dumb, smpi_shared_malloc_blocksize);
614 /* Map the bogus file in place of the anonymous memory */
616 for (i = 0; i < size / smpi_shared_malloc_blocksize; i++) {
617 void* pos = (void*)((unsigned long)mem + i * smpi_shared_malloc_blocksize);
618 void* res = mmap(pos, smpi_shared_malloc_blocksize, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED | MAP_POPULATE,
619 smpi_shared_malloc_bogusfile, 0);
620 xbt_assert(res == pos, "Could not map folded virtual memory (%s). Do you perhaps need to increase the "
621 "STARPU_MALLOC_SIMULATION_FOLD environment variable or the sysctl vm.max_map_count?",
624 if (size % smpi_shared_malloc_blocksize) {
625 void* pos = (void*)((unsigned long)mem + i * smpi_shared_malloc_blocksize);
626 void* res = mmap(pos, size % smpi_shared_malloc_blocksize, PROT_READ | PROT_WRITE,
627 MAP_FIXED | MAP_SHARED | MAP_POPULATE, smpi_shared_malloc_bogusfile, 0);
628 xbt_assert(res == pos, "Could not map folded virtual memory (%s). Do you perhaps need to increase the "
629 "STARPU_MALLOC_SIMULATION_FOLD environment variable or the sysctl vm.max_map_count?",
634 mem = xbt_malloc(size);
635 XBT_DEBUG("Classic malloc %zu in %p", size, mem);
641 void smpi_shared_free(void *ptr)
643 if (smpi_cfg_shared_malloc == shmalloc_local) {
644 char loc[PTR_STRLEN];
645 snprintf(loc, PTR_STRLEN, "%p", ptr);
646 auto meta = allocs_metadata.find(ptr);
647 if (meta == allocs_metadata.end()) {
648 XBT_WARN("Cannot free: %p was not shared-allocated by SMPI - maybe its size was 0?", ptr);
651 shared_data_t* data = &meta->second.data->second;
652 if (munmap(ptr, meta->second.size) < 0) {
653 XBT_WARN("Unmapping of fd %d failed: %s", data->fd, strerror(errno));
656 if (data->count <= 0) {
658 allocs.erase(allocs.find(meta->second.data->first));
659 XBT_DEBUG("Shared free - with removal - of %p", ptr);
661 XBT_DEBUG("Shared free - no removal - of %p, count = %d", ptr, data->count);
664 } else if (smpi_cfg_shared_malloc == shmalloc_global) {
665 munmap(ptr, 0); // the POSIX says that I should not give 0 as a length, but it seems to work OK
668 XBT_DEBUG("Classic free of %p", ptr);
674 int smpi_shared_known_call(const char* func, const char* input)
676 char* loc = bprintf("%s:%s", func, input);
679 if (calls==nullptr) {
680 calls = xbt_dict_new_homogeneous(nullptr);
683 xbt_dict_get(calls, loc); /* Succeed or throw */
689 if (ex.category != not_found_error)
699 void* smpi_shared_get_call(const char* func, const char* input) {
700 char* loc = bprintf("%s:%s", func, input);
702 if (calls == nullptr)
703 calls = xbt_dict_new_homogeneous(nullptr);
704 void* data = xbt_dict_get(calls, loc);
709 void* smpi_shared_set_call(const char* func, const char* input, void* data) {
710 char* loc = bprintf("%s:%s", func, input);
712 if (calls == nullptr)
713 calls = xbt_dict_new_homogeneous(nullptr);
714 xbt_dict_set(calls, loc, data, nullptr);
720 /** Map a given SMPI privatization segment (make a SMPI process active) */
721 void smpi_switch_data_segment(int dest) {
722 if (smpi_loaded_page == dest)//no need to switch, we've already loaded the one we want
726 smpi_really_switch_data_segment(dest);
729 /** Map a given SMPI privatization segment (make a SMPI process active) even if SMPI thinks it is already active
731 * When doing a state restoration, the state of the restored variables might not be consistent with the state of the
732 * virtual memory. In this case, we to change the data segment.
734 void smpi_really_switch_data_segment(int dest)
736 if(smpi_size_data_exe == 0)//no need to switch
739 #if HAVE_PRIVATIZATION
740 if(smpi_loaded_page==-1){//initial switch, do the copy from the real page here
741 for (int i=0; i< smpi_process_count(); i++){
742 memcpy(smpi_privatisation_regions[i].address, TOPAGE(smpi_start_data_exe), smpi_size_data_exe);
746 // FIXME, cross-process support (mmap across process when necessary)
747 int current = smpi_privatisation_regions[dest].file_descriptor;
748 XBT_DEBUG("Switching data frame to the one of process %d", dest);
750 mmap(TOPAGE(smpi_start_data_exe), smpi_size_data_exe, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED, current, 0);
751 if (tmp != TOPAGE(smpi_start_data_exe))
752 xbt_die("Couldn't map the new region");
753 smpi_loaded_page = dest;
757 int smpi_is_privatisation_file(char* file)
759 return strncmp("/dev/shm/my-buffer-", file, std::strlen("/dev/shm/my-buffer-")) == 0;
762 void smpi_initialize_global_memory_segments()
765 #if !HAVE_PRIVATIZATION
766 smpi_privatize_global_variables=false;
767 xbt_die("You are trying to use privatization on a system that does not support it. Don't.");
771 smpi_get_executable_global_size();
773 XBT_DEBUG ("bss+data segment found : size %d starting at %p", smpi_size_data_exe, smpi_start_data_exe );
775 if (smpi_size_data_exe == 0){//no need to switch
776 smpi_privatize_global_variables=false;
780 smpi_privatisation_regions = static_cast<smpi_privatisation_region_t>(
781 xbt_malloc(smpi_process_count() * sizeof(struct s_smpi_privatisation_region)));
783 for (int i=0; i< smpi_process_count(); i++){
784 // create SIMIX_process_count() mappings of this size with the same data inside
786 void* address = nullptr;
791 snprintf(path, sizeof(path), "/smpi-buffer-%06x", rand() % 0xffffff);
792 file_descriptor = shm_open(path, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
793 } while (file_descriptor == -1 && errno == EEXIST);
794 if (file_descriptor < 0) {
795 if (errno == EMFILE) {
796 xbt_die("Impossible to create temporary file for memory mapping: %s\n\
797 The open() system call failed with the EMFILE error code (too many files). \n\n\
798 This means that you reached the system limits concerning the amount of files per process. \
799 This is not a surprise if you are trying to virtualize many processes on top of SMPI. \
800 Don't panic -- you should simply increase your system limits and try again. \n\n\
801 First, check what your limits are:\n\
802 cat /proc/sys/fs/file-max # Gives you the system-wide limit\n\
803 ulimit -Hn # Gives you the per process hard limit\n\
804 ulimit -Sn # Gives you the per process soft limit\n\
805 cat /proc/self/limits # Displays any per-process limitation (including the one given above)\n\n\
806 If one of these values is less than the amount of MPI processes that you try to run, then you got the explanation of this error. \
807 Ask the Internet about tutorials on how to increase the files limit such as: https://rtcamp.com/tutorials/linux/increase-open-files-limit/",
810 xbt_die("Impossible to create temporary file for memory mapping: %s", strerror(errno));
813 status = ftruncate(file_descriptor, smpi_size_data_exe);
815 xbt_die("Impossible to set the size of the temporary file for memory mapping");
817 /* Ask for a free region */
818 address = mmap(nullptr, smpi_size_data_exe, PROT_READ | PROT_WRITE, MAP_SHARED, file_descriptor, 0);
819 if (address == MAP_FAILED)
820 xbt_die("Couldn't find a free region for memory mapping");
822 status = shm_unlink(path);
824 xbt_die("Impossible to unlink temporary file for memory mapping");
826 // initialize the values
827 memcpy(address, TOPAGE(smpi_start_data_exe), smpi_size_data_exe);
829 // store the address of the mapping for further switches
830 smpi_privatisation_regions[i].file_descriptor = file_descriptor;
831 smpi_privatisation_regions[i].address = address;
836 void smpi_destroy_global_memory_segments(){
837 if (smpi_size_data_exe == 0)//no need to switch
839 #if HAVE_PRIVATIZATION
840 for (int i=0; i< smpi_process_count(); i++) {
841 if (munmap(smpi_privatisation_regions[i].address, smpi_size_data_exe) < 0)
842 XBT_WARN("Unmapping of fd %d failed: %s", smpi_privatisation_regions[i].file_descriptor, strerror(errno));
843 close(smpi_privatisation_regions[i].file_descriptor);
845 xbt_free(smpi_privatisation_regions);
849 extern "C" { /** These functions will be called from the user code **/
850 smpi_trace_call_location_t* smpi_trace_get_call_location() {
851 return smpi_process_get_call_location();
854 void smpi_trace_set_call_location(const char* file, const int line) {
855 smpi_trace_call_location_t* loc = smpi_process_get_call_location();
857 loc->previous_filename = loc->filename;
858 loc->previous_linenumber = loc->linenumber;
859 loc->filename = file;
860 loc->linenumber = line;
864 * Required for Fortran bindings
866 void smpi_trace_set_call_location_(const char* file, int* line) {
867 smpi_trace_set_call_location(file, *line);
871 * Required for Fortran if -fsecond-underscore is activated
873 void smpi_trace_set_call_location__(const char* file, int* line) {
874 smpi_trace_set_call_location(file, *line);