1 /* Copyright (c) 2007, 2009-2017. The SimGrid Team. All rights reserved. */
3 /* This program is free software; you can redistribute it and/or modify it
4 * under the terms of the license (GNU LGPL) which comes with this package. */
8 #include <unordered_map>
11 #include "src/internal_config.h"
13 #include "private.hpp"
16 #include "xbt/sysdep.h"
18 #include "surf/surf.h"
19 #include "simgrid/sg_config.h"
20 #include "simgrid/modelchecker.h"
21 #include "src/mc/mc_replay.h"
23 #include <sys/types.h>
30 #include <math.h> // sqrt
40 #define MAP_ANONYMOUS MAP_ANON
44 #define MAP_POPULATE 0
47 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_bench, smpi, "Logging specific to SMPI (benchmarking)");
49 /* Shared allocations are handled through shared memory segments.
50 * Associated data and metadata are used as follows:
53 * `allocs' dict ---- -.
54 * ---------- shared_data_t shared_metadata_t / | | |
55 * .->| <name> | ---> -------------------- <--. ----------------- | | | |
56 * | ---------- | fd of <name> | | | size of mmap | --| | | |
57 * | | count (2) | |-- | data | \ | | |
58 * `----------------- | <name> | | ----------------- ---- |
59 * -------------------- | ^ |
61 * | | `allocs_metadata' dict |
62 * | | ---------------------- |
63 * | `-- | <addr of mmap #1> |<-'
64 * | .-- | <addr of mmap #2> |<-.
65 * | | ---------------------- |
71 * | shared_metadata_t / | |
72 * | ----------------- | | |
73 * | | size of mmap | --| | |
75 * ----------------- | | |
80 #define PTR_STRLEN (2 + 2 * sizeof(void*) + 1)
82 xbt_dict_t samples = nullptr; /* Allocated on first use */
83 xbt_dict_t calls = nullptr; /* Allocated on first use */
85 double smpi_cpu_threshold;
86 double smpi_host_speed;
88 int smpi_loaded_page = -1;
89 char* smpi_start_data_exe = nullptr;
90 int smpi_size_data_exe = 0;
91 bool smpi_privatize_global_variables;
92 shared_malloc_type smpi_cfg_shared_malloc = shmalloc_global;
93 double smpi_total_benched_time = 0;
94 smpi_privatisation_region_t smpi_privatisation_regions;
98 /** Some location in the source code
100 * This information is used by SMPI_SHARED_MALLOC to allocate some shared memory for all simulated processes.
102 class smpi_source_location {
104 smpi_source_location(const char* filename, int line)
105 : filename(xbt_strdup(filename)), filename_length(strlen(filename)), line(line)
109 /** Pointer to a static string containing the file name */
110 char* filename = nullptr;
111 int filename_length = 0;
114 bool operator==(smpi_source_location const& that) const
116 return filename_length == that.filename_length && line == that.line &&
117 std::memcmp(filename, that.filename, filename_length) == 0;
119 bool operator!=(smpi_source_location const& that) const { return !(*this == that); }
125 template <> class hash<smpi_source_location> {
127 typedef smpi_source_location argument_type;
128 typedef std::size_t result_type;
129 result_type operator()(smpi_source_location const& loc) const
131 return xbt_str_hash_ext(loc.filename, loc.filename_length) ^
132 xbt_str_hash_ext((const char*)&loc.line, sizeof(loc.line));
144 std::unordered_map<smpi_source_location, shared_data_t> allocs;
145 typedef std::unordered_map<smpi_source_location, shared_data_t>::value_type shared_data_key_type;
149 shared_data_key_type* data;
152 std::unordered_map<void*, shared_metadata_t> allocs_metadata;
155 static size_t shm_size(int fd) {
158 if(fstat(fd, &st) < 0) {
159 xbt_die("Could not stat fd %d: %s", fd, strerror(errno));
161 return static_cast<size_t>(st.st_size);
165 static void* shm_map(int fd, size_t size, shared_data_key_type* data) {
166 char loc[PTR_STRLEN];
167 shared_metadata_t meta;
169 if(size > shm_size(fd) && (ftruncate(fd, static_cast<off_t>(size)) < 0)) {
170 xbt_die("Could not truncate fd %d to %zu: %s", fd, size, strerror(errno));
173 void* mem = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
174 if(mem == MAP_FAILED) {
176 "Failed to map fd %d with size %zu: %s\n"
177 "If you are running a lot of ranks, you may be exceeding the amount of mappings allowed per process.\n"
178 "On Linux systems, change this value with sudo sysctl -w vm.max_map_count=newvalue (default value: 65536)\n"
179 "Please see http://simgrid.gforge.inria.fr/simgrid/latest/doc/html/options.html#options_virt for more info.",
180 fd, size, strerror(errno));
182 snprintf(loc, PTR_STRLEN, "%p", mem);
185 allocs_metadata[mem] = meta;
186 XBT_DEBUG("MMAP %zu to %p", size, mem);
191 void smpi_bench_destroy()
194 allocs_metadata.clear();
195 xbt_dict_free(&samples);
196 xbt_dict_free(&calls);
199 extern "C" XBT_PUBLIC(void) smpi_execute_flops_(double *flops);
200 void smpi_execute_flops_(double *flops)
202 smpi_execute_flops(*flops);
205 extern "C" XBT_PUBLIC(void) smpi_execute_(double *duration);
206 void smpi_execute_(double *duration)
208 smpi_execute(*duration);
211 void smpi_execute_flops(double flops) {
212 XBT_DEBUG("Handle real computation time: %f flops", flops);
213 smx_activity_t action = simcall_execution_start("computation", flops, 1, 0);
214 simcall_set_category (action, TRACE_internal_smpi_get_category());
215 simcall_execution_wait(action);
216 smpi_switch_data_segment(smpi_process_index());
219 void smpi_execute(double duration)
221 if (duration >= smpi_cpu_threshold) {
222 XBT_DEBUG("Sleep for %g to handle real computation time", duration);
223 double flops = duration * smpi_host_speed;
224 int rank = smpi_process_index();
225 instr_extra_data extra = xbt_new0(s_instr_extra_data_t,1);
226 extra->type=TRACING_COMPUTING;
227 extra->comp_size=flops;
228 TRACE_smpi_computing_in(rank, extra);
230 smpi_execute_flops(flops);
232 TRACE_smpi_computing_out(rank);
235 XBT_DEBUG("Real computation took %g while option smpi/cpu_threshold is set to %g => ignore it", duration,
240 void smpi_bench_begin()
242 if (smpi_privatize_global_variables) {
243 smpi_switch_data_segment(smpi_process_index());
246 if (MC_is_active() || MC_record_replay_is_active())
250 if (xbt_cfg_get_string("smpi/papi-events")[0] != '\0') {
251 int event_set = smpi_process_papi_event_set();
252 // PAPI_start sets everything to 0! See man(3) PAPI_start
253 if (PAPI_LOW_LEVEL_INITED == PAPI_is_initialized()) {
254 if (PAPI_start(event_set) != PAPI_OK) {
255 // TODO This needs some proper handling.
256 XBT_CRITICAL("Could not start PAPI counters.\n");
262 xbt_os_threadtimer_start(smpi_process_timer());
265 void smpi_bench_end()
267 if (MC_is_active() || MC_record_replay_is_active())
271 xbt_os_timer_t timer = smpi_process_timer();
272 xbt_os_threadtimer_stop(timer);
276 * An MPI function has been called and now is the right time to update
277 * our PAPI counters for this process.
279 if (xbt_cfg_get_string("smpi/papi-events")[0] != '\0') {
280 papi_counter_t& counter_data = smpi_process_papi_counters();
281 int event_set = smpi_process_papi_event_set();
282 std::vector<long long> event_values = std::vector<long long>(counter_data.size());
284 if (PAPI_stop(event_set, &event_values[0]) != PAPI_OK) { // Error
285 XBT_CRITICAL("Could not stop PAPI counters.\n");
288 for (unsigned int i = 0; i < counter_data.size(); i++) {
289 counter_data[i].second += event_values[i];
290 // XBT_DEBUG("[%i] PAPI: Counter %s: Value is now %lli (got increment by %lli\n", smpi_process_index(),
291 // counter_data[i].first.c_str(), counter_data[i].second, event_values[i]);
297 if (smpi_process_get_sampling()) {
298 XBT_CRITICAL("Cannot do recursive benchmarks.");
299 XBT_CRITICAL("Are you trying to make a call to MPI within a SMPI_SAMPLE_ block?");
300 xbt_backtrace_display_current();
301 xbt_die("Aborting.");
304 if (xbt_cfg_get_string("smpi/comp-adjustment-file")[0] != '\0') { // Maybe we need to artificially speed up or slow
305 // down our computation based on our statistical analysis.
307 smpi_trace_call_location_t* loc = smpi_process_get_call_location();
308 std::string key = loc->get_composed_key();
309 std::unordered_map<std::string, double>::const_iterator it = location2speedup.find(key);
310 if (it != location2speedup.end()) {
311 speedup = it->second;
315 // Simulate the benchmarked computation unless disabled via command-line argument
316 if (xbt_cfg_get_boolean("smpi/simulate-computation")) {
317 smpi_execute(xbt_os_timer_elapsed(timer)/speedup);
321 if (xbt_cfg_get_string("smpi/papi-events")[0] != '\0' && TRACE_smpi_is_enabled()) {
322 char container_name[INSTR_DEFAULT_STR_SIZE];
323 smpi_container(smpi_process_index(), container_name, INSTR_DEFAULT_STR_SIZE);
324 container_t container = PJ_container_get(container_name);
325 papi_counter_t& counter_data = smpi_process_papi_counters();
327 for (auto& pair : counter_data) {
328 new_pajeSetVariable(surf_get_clock(), container,
329 PJ_type_get(/* countername */ pair.first.c_str(), container->type), pair.second);
334 smpi_total_benched_time += xbt_os_timer_elapsed(timer);
337 /* Private sleep function used by smpi_sleep() and smpi_usleep() */
338 static unsigned int private_sleep(double secs)
342 XBT_DEBUG("Sleep for: %lf secs", secs);
343 int rank = MPI_COMM_WORLD->rank();
344 instr_extra_data extra = xbt_new0(s_instr_extra_data_t,1);
345 extra->type=TRACING_SLEEPING;
346 extra->sleep_duration=secs;
347 TRACE_smpi_sleeping_in(rank, extra);
349 simcall_process_sleep(secs);
351 TRACE_smpi_sleeping_out(rank);
357 unsigned int smpi_sleep(unsigned int secs)
359 return private_sleep(static_cast<double>(secs));
362 int smpi_usleep(useconds_t usecs)
364 return static_cast<int>(private_sleep(static_cast<double>(usecs) / 1000000.0));
367 #if _POSIX_TIMERS > 0
368 int smpi_nanosleep(const struct timespec *tp, struct timespec * t)
370 return static_cast<int>(private_sleep(static_cast<double>(tp->tv_sec + tp->tv_nsec / 1000000000.0)));
374 int smpi_gettimeofday(struct timeval *tv, void* tz)
377 double now = SIMIX_get_clock();
379 tv->tv_sec = static_cast<time_t>(now);
381 tv->tv_usec = static_cast<useconds_t>((now - tv->tv_sec) * 1e6);
383 tv->tv_usec = static_cast<suseconds_t>((now - tv->tv_sec) * 1e6);
390 #if _POSIX_TIMERS > 0
391 int smpi_clock_gettime(clockid_t clk_id, struct timespec *tp)
393 //there is only one time in SMPI, so clk_id is ignored.
395 double now = SIMIX_get_clock();
397 tp->tv_sec = static_cast<time_t>(now);
398 tp->tv_nsec = static_cast<long int>((now - tp->tv_sec) * 1e9);
405 extern double sg_surf_precision;
406 unsigned long long smpi_rastro_resolution ()
409 double resolution = (1/sg_surf_precision);
411 return static_cast<unsigned long long>(resolution);
414 unsigned long long smpi_rastro_timestamp ()
417 double now = SIMIX_get_clock();
419 unsigned long long sec = (unsigned long long)now;
420 unsigned long long pre = (now - sec) * smpi_rastro_resolution();
422 return static_cast<unsigned long long>(sec) * smpi_rastro_resolution() + pre;
425 /* ****************************** Functions related to the SMPI_SAMPLE_ macros ************************************/
427 double threshold; /* maximal stderr requested (if positive) */
428 double relstderr; /* observed stderr so far */
429 double mean; /* mean of benched times, to be used if the block is disabled */
430 double sum; /* sum of benched times (to compute the mean and stderr) */
431 double sum_pow2; /* sum of the square of the benched times (to compute the stderr) */
432 int iters; /* amount of requested iterations */
433 int count; /* amount of iterations done so far */
434 int benching; /* 1: we are benchmarking; 0: we have enough data, no bench anymore */
437 static char *sample_location(int global, const char *file, int line) {
439 return bprintf("%s:%d", file, line);
441 return bprintf("%s:%d:%d", file, line, smpi_process_index());
445 static int sample_enough_benchs(local_data_t *data) {
446 int res = data->count >= data->iters;
447 if (data->threshold>0.0) {
449 res = 0; // not enough data
450 if (data->relstderr > data->threshold)
451 res = 0; // stderr too high yet
453 XBT_DEBUG("%s (count:%d iter:%d stderr:%f thres:%f mean:%fs)",
454 (res?"enough benchs":"need more data"), data->count, data->iters, data->relstderr, data->threshold, data->mean);
458 void smpi_sample_1(int global, const char *file, int line, int iters, double threshold)
460 char *loc = sample_location(global, file, line);
462 smpi_bench_end(); /* Take time from previous, unrelated computation into account */
463 smpi_process_set_sampling(1);
465 if (samples==nullptr)
466 samples = xbt_dict_new_homogeneous(free);
468 local_data_t *data = static_cast<local_data_t *>(xbt_dict_get_or_null(samples, loc));
470 xbt_assert(threshold>0 || iters>0,
471 "You should provide either a positive amount of iterations to bench, or a positive maximal stderr (or both)");
472 data = static_cast<local_data_t *>( xbt_new(local_data_t, 1));
475 data->sum_pow2 = 0.0;
477 data->threshold = threshold;
478 data->benching = 1; // If we have no data, we need at least one
480 xbt_dict_set(samples, loc, data, nullptr);
481 XBT_DEBUG("XXXXX First time ever on benched nest %s.",loc);
483 if (data->iters != iters || data->threshold != threshold) {
484 XBT_ERROR("Asked to bench block %s with different settings %d, %f is not %d, %f. "
485 "How did you manage to give two numbers at the same line??",
486 loc, data->iters, data->threshold, iters, threshold);
490 // if we already have some data, check whether sample_2 should get one more bench or whether it should emulate
491 // the computation instead
492 data->benching = (sample_enough_benchs(data) == 0);
493 XBT_DEBUG("XXXX Re-entering the benched nest %s. %s", loc,
494 (data->benching ? "more benching needed" : "we have enough data, skip computes"));
499 int smpi_sample_2(int global, const char *file, int line)
501 char *loc = sample_location(global, file, line);
504 xbt_assert(samples, "Y U NO use SMPI_SAMPLE_* macros? Stop messing directly with smpi_sample_* functions!");
505 local_data_t *data = static_cast<local_data_t *>(xbt_dict_get(samples, loc));
506 XBT_DEBUG("sample2 %s",loc);
509 if (data->benching==1) {
510 // we need to run a new bench
511 XBT_DEBUG("benchmarking: count:%d iter:%d stderr:%f thres:%f; mean:%f",
512 data->count, data->iters, data->relstderr, data->threshold, data->mean);
515 // Enough data, no more bench (either we got enough data from previous visits to this benched nest, or we just
516 //ran one bench and need to bail out now that our job is done). Just sleep instead
517 XBT_DEBUG("No benchmark (either no need, or just ran one): count >= iter (%d >= %d) or stderr<thres (%f<=%f)."
518 " apply the %fs delay instead",
519 data->count, data->iters, data->relstderr, data->threshold, data->mean);
520 smpi_execute(data->mean);
521 smpi_process_set_sampling(0);
522 res = 0; // prepare to capture future, unrelated computations
528 void smpi_sample_3(int global, const char *file, int line)
530 char *loc = sample_location(global, file, line);
532 xbt_assert(samples, "Y U NO use SMPI_SAMPLE_* macros? Stop messing directly with smpi_sample_* functions!");
533 local_data_t *data = static_cast<local_data_t *>(xbt_dict_get(samples, loc));
534 XBT_DEBUG("sample3 %s",loc);
537 if (data->benching==0)
540 // ok, benchmarking this loop is over
541 xbt_os_threadtimer_stop(smpi_process_timer());
545 double sample = xbt_os_timer_elapsed(smpi_process_timer());
547 data->sum_pow2 += sample * sample;
548 double n = static_cast<double>(data->count);
549 data->mean = data->sum / n;
550 data->relstderr = sqrt((data->sum_pow2 / n - data->mean * data->mean) / n) / data->mean;
551 if (sample_enough_benchs(data)==0) {
552 data->mean = sample; // Still in benching process; We want sample_2 to simulate the exact time of this loop
553 // occurrence before leaving, not the mean over the history
555 XBT_DEBUG("Average mean after %d steps is %f, relative standard error is %f (sample was %f)", data->count,
556 data->mean, data->relstderr, sample);
558 // That's enough for now, prevent sample_2 to run the same code over and over
563 static int smpi_shared_malloc_bogusfile = -1;
564 static unsigned long smpi_shared_malloc_blocksize = 1UL << 20;
565 void *smpi_shared_malloc(size_t size, const char *file, int line)
568 if (size > 0 && smpi_cfg_shared_malloc == shmalloc_local) {
569 smpi_source_location loc(file, line);
570 auto res = allocs.insert(std::make_pair(loc, shared_data_t()));
571 auto data = res.first;
573 // The insertion did not take place.
574 // Generate a shared memory name from the address of the shared_data:
575 char shmname[32]; // cannot be longer than PSHMNAMLEN = 31 on Mac OS X (shm_open raises ENAMETOOLONG otherwise)
576 snprintf(shmname, 31, "/shmalloc%p", &*data);
577 int fd = shm_open(shmname, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
580 xbt_die("Please cleanup /dev/shm/%s", shmname);
582 xbt_die("An unhandled error occurred while opening %s. shm_open: %s", shmname, strerror(errno));
584 data->second.fd = fd;
585 data->second.count = 1;
586 mem = shm_map(fd, size, &*data);
587 if (shm_unlink(shmname) < 0) {
588 XBT_WARN("Could not early unlink %s. shm_unlink: %s", shmname, strerror(errno));
590 XBT_DEBUG("Mapping %s at %p through %d", shmname, mem, fd);
592 mem = shm_map(data->second.fd, size, &*data);
593 data->second.count++;
595 XBT_DEBUG("Shared malloc %zu in %p (metadata at %p)", size, mem, &*data);
597 } else if (smpi_cfg_shared_malloc == shmalloc_global) {
598 /* First reserve memory area */
599 mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
601 xbt_assert(mem != MAP_FAILED, "Failed to allocate %luMiB of memory. Run \"sysctl vm.overcommit_memory=1\" as root "
602 "to allow big allocations.\n",
603 (unsigned long)(size >> 20));
605 /* Create bogus file if not done already */
606 if (smpi_shared_malloc_bogusfile == -1) {
607 /* Create a fd to a new file on disk, make it smpi_shared_malloc_blocksize big, and unlink it.
608 * It still exists in memory but not in the file system (thus it cannot be leaked). */
609 char* name = xbt_strdup("/tmp/simgrid-shmalloc-XXXXXX");
610 smpi_shared_malloc_bogusfile = mkstemp(name);
613 char* dumb = (char*)calloc(1, smpi_shared_malloc_blocksize);
614 ssize_t err = write(smpi_shared_malloc_bogusfile, dumb, smpi_shared_malloc_blocksize);
616 xbt_die("Could not write bogus file for shared malloc");
620 /* Map the bogus file in place of the anonymous memory */
622 for (i = 0; i < size / smpi_shared_malloc_blocksize; i++) {
623 void* pos = (void*)((unsigned long)mem + i * smpi_shared_malloc_blocksize);
624 void* res = mmap(pos, smpi_shared_malloc_blocksize, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED | MAP_POPULATE,
625 smpi_shared_malloc_bogusfile, 0);
626 xbt_assert(res == pos, "Could not map folded virtual memory (%s). Do you perhaps need to increase the "
627 "STARPU_MALLOC_SIMULATION_FOLD environment variable or the sysctl vm.max_map_count?",
630 if (size % smpi_shared_malloc_blocksize) {
631 void* pos = (void*)((unsigned long)mem + i * smpi_shared_malloc_blocksize);
632 void* res = mmap(pos, size % smpi_shared_malloc_blocksize, PROT_READ | PROT_WRITE,
633 MAP_FIXED | MAP_SHARED | MAP_POPULATE, smpi_shared_malloc_bogusfile, 0);
634 xbt_assert(res == pos, "Could not map folded virtual memory (%s). Do you perhaps need to increase the "
635 "STARPU_MALLOC_SIMULATION_FOLD environment variable or the sysctl vm.max_map_count?",
640 mem = xbt_malloc(size);
641 XBT_DEBUG("Classic malloc %zu in %p", size, mem);
647 void smpi_shared_free(void *ptr)
649 if (smpi_cfg_shared_malloc == shmalloc_local) {
650 char loc[PTR_STRLEN];
651 snprintf(loc, PTR_STRLEN, "%p", ptr);
652 auto meta = allocs_metadata.find(ptr);
653 if (meta == allocs_metadata.end()) {
654 XBT_WARN("Cannot free: %p was not shared-allocated by SMPI - maybe its size was 0?", ptr);
657 shared_data_t* data = &meta->second.data->second;
658 if (munmap(ptr, meta->second.size) < 0) {
659 XBT_WARN("Unmapping of fd %d failed: %s", data->fd, strerror(errno));
662 if (data->count <= 0) {
664 allocs.erase(allocs.find(meta->second.data->first));
665 XBT_DEBUG("Shared free - with removal - of %p", ptr);
667 XBT_DEBUG("Shared free - no removal - of %p, count = %d", ptr, data->count);
670 } else if (smpi_cfg_shared_malloc == shmalloc_global) {
671 munmap(ptr, 0); // the POSIX says that I should not give 0 as a length, but it seems to work OK
674 XBT_DEBUG("Classic free of %p", ptr);
680 int smpi_shared_known_call(const char* func, const char* input)
682 char* loc = bprintf("%s:%s", func, input);
685 if (calls==nullptr) {
686 calls = xbt_dict_new_homogeneous(nullptr);
689 xbt_dict_get(calls, loc); /* Succeed or throw */
695 if (ex.category != not_found_error)
705 void* smpi_shared_get_call(const char* func, const char* input) {
706 char* loc = bprintf("%s:%s", func, input);
708 if (calls == nullptr)
709 calls = xbt_dict_new_homogeneous(nullptr);
710 void* data = xbt_dict_get(calls, loc);
715 void* smpi_shared_set_call(const char* func, const char* input, void* data) {
716 char* loc = bprintf("%s:%s", func, input);
718 if (calls == nullptr)
719 calls = xbt_dict_new_homogeneous(nullptr);
720 xbt_dict_set(calls, loc, data, nullptr);
726 /** Map a given SMPI privatization segment (make a SMPI process active) */
727 void smpi_switch_data_segment(int dest) {
728 if (smpi_loaded_page == dest)//no need to switch, we've already loaded the one we want
732 smpi_really_switch_data_segment(dest);
735 /** Map a given SMPI privatization segment (make a SMPI process active) even if SMPI thinks it is already active
737 * When doing a state restoration, the state of the restored variables might not be consistent with the state of the
738 * virtual memory. In this case, we to change the data segment.
740 void smpi_really_switch_data_segment(int dest)
742 if(smpi_size_data_exe == 0)//no need to switch
745 #if HAVE_PRIVATIZATION
746 if(smpi_loaded_page==-1){//initial switch, do the copy from the real page here
747 for (int i=0; i< smpi_process_count(); i++){
748 memcpy(smpi_privatisation_regions[i].address, TOPAGE(smpi_start_data_exe), smpi_size_data_exe);
752 // FIXME, cross-process support (mmap across process when necessary)
753 int current = smpi_privatisation_regions[dest].file_descriptor;
754 XBT_DEBUG("Switching data frame to the one of process %d", dest);
756 mmap(TOPAGE(smpi_start_data_exe), smpi_size_data_exe, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED, current, 0);
757 if (tmp != TOPAGE(smpi_start_data_exe))
758 xbt_die("Couldn't map the new region");
759 smpi_loaded_page = dest;
763 int smpi_is_privatisation_file(char* file)
765 return strncmp("/dev/shm/my-buffer-", file, std::strlen("/dev/shm/my-buffer-")) == 0;
768 void smpi_initialize_global_memory_segments()
771 #if !HAVE_PRIVATIZATION
772 smpi_privatize_global_variables=false;
773 xbt_die("You are trying to use privatization on a system that does not support it. Don't.");
777 smpi_get_executable_global_size();
779 XBT_DEBUG ("bss+data segment found : size %d starting at %p", smpi_size_data_exe, smpi_start_data_exe );
781 if (smpi_size_data_exe == 0){//no need to switch
782 smpi_privatize_global_variables=false;
786 smpi_privatisation_regions = static_cast<smpi_privatisation_region_t>(
787 xbt_malloc(smpi_process_count() * sizeof(struct s_smpi_privatisation_region)));
789 for (int i=0; i< smpi_process_count(); i++){
790 // create SIMIX_process_count() mappings of this size with the same data inside
792 void* address = nullptr;
797 snprintf(path, sizeof(path), "/smpi-buffer-%06x", rand() % 0xffffff);
798 file_descriptor = shm_open(path, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
799 } while (file_descriptor == -1 && errno == EEXIST);
800 if (file_descriptor < 0) {
801 if (errno == EMFILE) {
802 xbt_die("Impossible to create temporary file for memory mapping: %s\n\
803 The open() system call failed with the EMFILE error code (too many files). \n\n\
804 This means that you reached the system limits concerning the amount of files per process. \
805 This is not a surprise if you are trying to virtualize many processes on top of SMPI. \
806 Don't panic -- you should simply increase your system limits and try again. \n\n\
807 First, check what your limits are:\n\
808 cat /proc/sys/fs/file-max # Gives you the system-wide limit\n\
809 ulimit -Hn # Gives you the per process hard limit\n\
810 ulimit -Sn # Gives you the per process soft limit\n\
811 cat /proc/self/limits # Displays any per-process limitation (including the one given above)\n\n\
812 If one of these values is less than the amount of MPI processes that you try to run, then you got the explanation of this error. \
813 Ask the Internet about tutorials on how to increase the files limit such as: https://rtcamp.com/tutorials/linux/increase-open-files-limit/",
816 xbt_die("Impossible to create temporary file for memory mapping: %s", strerror(errno));
819 status = ftruncate(file_descriptor, smpi_size_data_exe);
821 xbt_die("Impossible to set the size of the temporary file for memory mapping");
823 /* Ask for a free region */
824 address = mmap(nullptr, smpi_size_data_exe, PROT_READ | PROT_WRITE, MAP_SHARED, file_descriptor, 0);
825 if (address == MAP_FAILED)
826 xbt_die("Couldn't find a free region for memory mapping");
828 status = shm_unlink(path);
830 xbt_die("Impossible to unlink temporary file for memory mapping");
832 // initialize the values
833 memcpy(address, TOPAGE(smpi_start_data_exe), smpi_size_data_exe);
835 // store the address of the mapping for further switches
836 smpi_privatisation_regions[i].file_descriptor = file_descriptor;
837 smpi_privatisation_regions[i].address = address;
842 void smpi_destroy_global_memory_segments(){
843 if (smpi_size_data_exe == 0)//no need to switch
845 #if HAVE_PRIVATIZATION
846 for (int i=0; i< smpi_process_count(); i++) {
847 if (munmap(smpi_privatisation_regions[i].address, smpi_size_data_exe) < 0)
848 XBT_WARN("Unmapping of fd %d failed: %s", smpi_privatisation_regions[i].file_descriptor, strerror(errno));
849 close(smpi_privatisation_regions[i].file_descriptor);
851 xbt_free(smpi_privatisation_regions);
855 extern "C" { /** These functions will be called from the user code **/
856 smpi_trace_call_location_t* smpi_trace_get_call_location() {
857 return smpi_process_get_call_location();
860 void smpi_trace_set_call_location(const char* file, const int line) {
861 smpi_trace_call_location_t* loc = smpi_process_get_call_location();
863 loc->previous_filename = loc->filename;
864 loc->previous_linenumber = loc->linenumber;
865 loc->filename = file;
866 loc->linenumber = line;
870 * Required for Fortran bindings
872 void smpi_trace_set_call_location_(const char* file, int* line) {
873 smpi_trace_set_call_location(file, *line);
877 * Required for Fortran if -fsecond-underscore is activated
879 void smpi_trace_set_call_location__(const char* file, int* line) {
880 smpi_trace_set_call_location(file, *line);