1 /* Copyright (c) 2015-2019. The SimGrid Team. All rights reserved. */
3 /* This program is free software; you can redistribute it and/or modify it
4 * under the terms of the license (GNU LGPL) which comes with this package. */
16 #include <sys/types.h>
23 #include "src/internal_config.h"
24 #include "src/xbt/memory_map.hpp"
26 #include "private.hpp"
27 #include "src/smpi/include/smpi_actor.hpp"
29 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_memory, smpi, "Memory layout support for SMPI");
31 int smpi_loaded_page = -1;
32 char* smpi_data_exe_start = nullptr;
33 int smpi_data_exe_size = 0;
34 SmpiPrivStrategies smpi_privatize_global_variables;
35 static void* smpi_data_exe_copy;
37 // Initialized by smpi_prepare_global_memory_segment().
38 static std::vector<simgrid::xbt::VmMap> initial_vm_map;
40 // We keep a copy of all the privatization regions: We can then delete everything easily by iterating over this
41 // collection and nothing can be leaked. We could also iterate over all actors but we would have to be diligent when two
42 // actors use the same privatization region (so, smart pointers would have to be used etc.)
43 // Use a std::deque so that pointers remain valid after push_back().
44 static std::deque<s_smpi_privatization_region_t> smpi_privatization_regions;
46 static constexpr int PROT_RWX = PROT_READ | PROT_WRITE | PROT_EXEC;
47 static constexpr int PROT_RW = PROT_READ | PROT_WRITE;
49 /** Take a snapshot of the process' memory map.
51 void smpi_prepare_global_memory_segment()
53 initial_vm_map = simgrid::xbt::get_memory_map(getpid());
56 static void smpi_get_executable_global_size()
58 char buffer[PATH_MAX];
59 char* full_name = realpath(xbt_binary_name, buffer);
60 if (full_name == nullptr)
61 xbt_die("Could not resolve binary file name");
63 std::vector<simgrid::xbt::VmMap> map = simgrid::xbt::get_memory_map(getpid());
64 for (auto i = map.begin(); i != map.end() ; ++i) {
65 // TODO, In practice, this implementation would not detect a completely
66 // anonymous data segment. This does not happen in practice, however.
68 // File backed RW entry:
69 if (i->pathname == full_name && (i->prot & PROT_RWX) == PROT_RW) {
70 smpi_data_exe_start = (char*)i->start_addr;
71 smpi_data_exe_size = i->end_addr - i->start_addr;
72 /* Here we are making the assumption that a suitable empty region
73 following the rw- area is the end of the data segment. It would
74 be better to check with the size of the data segment. */
76 if (i != map.end() && i->pathname.empty() && (i->prot & PROT_RWX) == PROT_RW &&
77 (char*)i->start_addr == smpi_data_exe_start + smpi_data_exe_size) {
78 // Only count the portion of this region not present in the initial map.
79 auto found = std::find_if(initial_vm_map.begin(), initial_vm_map.end(), [&i](const simgrid::xbt::VmMap& m) {
80 return i->start_addr <= m.start_addr && m.start_addr < i->end_addr;
82 auto end_addr = (found == initial_vm_map.end() ? i->end_addr : found->start_addr);
83 smpi_data_exe_size = (char*)end_addr - smpi_data_exe_start;
88 xbt_die("Did not find my data segment.");
92 #if HAVE_SANITIZER_ADDRESS
93 #include <sanitizer/asan_interface.h>
94 static void* asan_safe_memcpy(void* dest, void* src, size_t n)
96 char* psrc = static_cast<char*>(src);
97 char* pdest = static_cast<char*>(dest);
98 for (size_t i = 0; i < n;) {
99 while (i < n && __asan_address_is_poisoned(psrc + i))
102 char* p = static_cast<char*>(__asan_region_is_poisoned(psrc + i, n - i));
103 size_t j = p ? (p - psrc) : n;
104 memcpy(pdest + i, psrc + i, j - i);
111 #define asan_safe_memcpy(dest, src, n) memcpy(dest, src, n)
114 /** Map a given SMPI privatization segment (make a SMPI process active) */
115 void smpi_switch_data_segment(simgrid::s4u::ActorPtr actor)
117 if (smpi_loaded_page == actor->get_pid()) // no need to switch, we've already loaded the one we want
121 smpi_really_switch_data_segment(actor);
124 /** Map a given SMPI privatization segment (make a SMPI process active) even if SMPI thinks it is already active
126 * When doing a state restoration, the state of the restored variables might not be consistent with the state of the
127 * virtual memory. In this case, we to change the data segment.
129 void smpi_really_switch_data_segment(simgrid::s4u::ActorPtr actor)
131 if (smpi_data_exe_size == 0) // no need to switch
134 #if HAVE_PRIVATIZATION
135 // FIXME, cross-process support (mmap across process when necessary)
136 XBT_DEBUG("Switching data frame to the one of process %ld", actor->get_pid());
137 simgrid::smpi::ActorExt* process = smpi_process_remote(actor);
138 int current = process->privatized_region()->file_descriptor;
139 void* tmp = mmap(TOPAGE(smpi_data_exe_start), smpi_data_exe_size, PROT_RW, MAP_FIXED | MAP_SHARED, current, 0);
140 if (tmp != TOPAGE(smpi_data_exe_start))
141 xbt_die("Couldn't map the new region (errno %d): %s", errno, strerror(errno));
142 smpi_loaded_page = actor->get_pid();
147 * @brief Makes a backup of the segment in memory that stores the global variables of a process.
148 * This backup is then used to initialize the global variables for every single
149 * process that is added, regardless of the progress of the simulation.
151 void smpi_backup_global_memory_segment()
153 #if HAVE_PRIVATIZATION
154 smpi_get_executable_global_size();
155 initial_vm_map.clear();
156 initial_vm_map.shrink_to_fit();
158 XBT_DEBUG("bss+data segment found : size %d starting at %p", smpi_data_exe_size, smpi_data_exe_start);
160 if (smpi_data_exe_size == 0) { // no need to do anything as global variables don't exist
161 smpi_privatize_global_variables = SmpiPrivStrategies::NONE;
165 smpi_data_exe_copy = ::operator new(smpi_data_exe_size);
166 // Make a copy of the data segment. This clean copy is retained over the whole runtime
167 // of the simulation and can be used to initialize a dynamically added, new process.
168 asan_safe_memcpy(smpi_data_exe_copy, TOPAGE(smpi_data_exe_start), smpi_data_exe_size);
169 #else /* ! HAVE_PRIVATIZATION */
170 xbt_die("You are trying to use privatization on a system that does not support it. Don't.");
174 // Initializes the memory mapping for a single process and returns the privatization region
175 smpi_privatization_region_t smpi_init_global_memory_segment_process()
178 void* address = nullptr;
182 constexpr unsigned VAL_MASK = 0xffffffU;
183 static unsigned prev_val = VAL_MASK;
184 for (unsigned i = (prev_val + 1) & VAL_MASK; i != prev_val; i = (i + 1) & VAL_MASK) {
185 snprintf(path, sizeof(path), "/smpi-buffer-%06x", i);
186 file_descriptor = shm_open(path, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
187 if (file_descriptor != -1 || errno != EEXIST) {
192 if (file_descriptor < 0) {
193 if (errno == EMFILE) {
194 xbt_die("Impossible to create temporary file for memory mapping: %s\n\
195 The open() system call failed with the EMFILE error code (too many files). \n\n\
196 This means that you reached the system limits concerning the amount of files per process. \
197 This is not a surprise if you are trying to virtualize many processes on top of SMPI. \
198 Don't panic -- you should simply increase your system limits and try again. \n\n\
199 First, check what your limits are:\n\
200 cat /proc/sys/fs/file-max # Gives you the system-wide limit\n\
201 ulimit -Hn # Gives you the per process hard limit\n\
202 ulimit -Sn # Gives you the per process soft limit\n\
203 cat /proc/self/limits # Displays any per-process limitation (including the one given above)\n\n\
204 If one of these values is less than the amount of MPI processes that you try to run, then you got the explanation of this error. \
205 Ask the Internet about tutorials on how to increase the files limit such as: https://rtcamp.com/tutorials/linux/increase-open-files-limit/",
208 xbt_die("Impossible to create temporary file for memory mapping: %s", strerror(errno));
211 status = ftruncate(file_descriptor, smpi_data_exe_size);
213 xbt_die("Impossible to set the size of the temporary file for memory mapping");
215 /* Ask for a free region */
216 address = mmap(nullptr, smpi_data_exe_size, PROT_RW, MAP_SHARED, file_descriptor, 0);
217 if (address == MAP_FAILED)
218 xbt_die("Couldn't find a free region for memory mapping");
220 status = shm_unlink(path);
222 xbt_die("Impossible to unlink temporary file for memory mapping");
224 // initialize the values
225 asan_safe_memcpy(address, smpi_data_exe_copy, smpi_data_exe_size);
227 // store the address of the mapping for further switches
228 smpi_privatization_regions.emplace_back(s_smpi_privatization_region_t{address, file_descriptor});
230 return &smpi_privatization_regions.back();
233 void smpi_destroy_global_memory_segments(){
234 if (smpi_data_exe_size == 0) // no need to switch
236 #if HAVE_PRIVATIZATION
237 for (auto const& region : smpi_privatization_regions) {
238 if (munmap(region.address, smpi_data_exe_size) < 0)
239 XBT_WARN("Unmapping of fd %d failed: %s", region.file_descriptor, strerror(errno));
240 close(region.file_descriptor);
242 smpi_privatization_regions.clear();
243 ::operator delete(smpi_data_exe_copy);
247 static std::vector<unsigned char> sendbuffer;
248 static std::vector<unsigned char> recvbuffer;
250 //allocate a single buffer for all sends, growing it if needed
251 unsigned char* smpi_get_tmp_sendbuffer(size_t size)
253 if (not smpi_process()->replaying())
254 return new unsigned char[size];
255 // FIXME: a resize() may invalidate a previous pointer. Maybe we need to handle a queue of buffers with a reference
256 // counter. The same holds for smpi_get_tmp_recvbuffer.
257 if (sendbuffer.size() < size)
258 sendbuffer.resize(size);
259 return sendbuffer.data();
262 //allocate a single buffer for all recv
263 unsigned char* smpi_get_tmp_recvbuffer(size_t size)
265 if (not smpi_process()->replaying())
266 return new unsigned char[size];
267 if (recvbuffer.size() < size)
268 recvbuffer.resize(size);
269 return recvbuffer.data();
272 void smpi_free_tmp_buffer(const unsigned char* buf)
274 if (not smpi_process()->replaying())
278 void smpi_free_replay_tmp_buffers()
280 std::vector<unsigned char>().swap(sendbuffer);
281 std::vector<unsigned char>().swap(recvbuffer);