-/* Copyright (c) 2015-2018. The SimGrid Team. All rights reserved. */
+/* Copyright (c) 2015-2023. The SimGrid Team. All rights reserved. */
/* This program is free software; you can redistribute it and/or modify it
* under the terms of the license (GNU LGPL) which comes with this package. */
+#include "private.hpp"
+#include "src/internal_config.h"
+#include "src/kernel/EngineImpl.hpp"
+#include "src/smpi/include/smpi_actor.hpp"
+#include "src/xbt/memory_map.hpp"
+
#include <algorithm>
#include <cerrno>
#include <climits>
#include <cstring>
#include <deque>
#include <fcntl.h>
+#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
-#include <vector>
-
-#ifndef WIN32
-#include <sys/mman.h>
#include <unistd.h>
-
-#include "src/internal_config.h"
-#include "src/xbt/memory_map.hpp"
-
-#include "private.hpp"
-#include "src/smpi/include/smpi_actor.hpp"
+#include <vector>
XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_memory, smpi, "Memory layout support for SMPI");
-int smpi_loaded_page = -1;
-char* smpi_data_exe_start = nullptr;
-int smpi_data_exe_size = 0;
-SmpiPrivStrategies smpi_privatize_global_variables;
+static char* smpi_data_exe_start = nullptr; // start of the data+bss segment of the executable
+static size_t smpi_data_exe_size = 0; // size of the data+bss segment of the executable
+static SmpiPrivStrategies smpi_privatize_global_variables;
static void* smpi_data_exe_copy;
// Initialized by smpi_prepare_global_memory_segment().
static void smpi_get_executable_global_size()
{
- char buffer[PATH_MAX];
- char* full_name = realpath(xbt_binary_name, buffer);
- if (full_name == nullptr)
- xbt_die("Could not resolve binary file name");
+ const auto* binary_name = simgrid::kernel::EngineImpl::get_instance()->get_cmdline().front().c_str();
+ char* buffer = realpath(binary_name, nullptr);
+ xbt_assert(buffer != nullptr, "Could not resolve real path of binary file '%s'", binary_name);
+ std::string full_name = buffer;
+ free(buffer);
std::vector<simgrid::xbt::VmMap> map = simgrid::xbt::get_memory_map(getpid());
for (auto i = map.begin(); i != map.end() ; ++i) {
/* Here we are making the assumption that a suitable empty region
following the rw- area is the end of the data segment. It would
be better to check with the size of the data segment. */
- ++i;
- if (i != map.end() && i->pathname.empty() && (i->prot & PROT_RWX) == PROT_RW &&
- (char*)i->start_addr == smpi_data_exe_start + smpi_data_exe_size) {
- // Only count this region if it was not already present in the initial map.
- auto found = std::find_if(begin(initial_vm_map), end(initial_vm_map),
- [&i](const simgrid::xbt::VmMap& m) { return m.start_addr == i->start_addr; });
- if (found == end(initial_vm_map))
- smpi_data_exe_size = (char*)i->end_addr - smpi_data_exe_start;
+ if (auto j = i + 1; j != map.end() && j->pathname.empty() && (j->prot & PROT_RWX) == PROT_RW &&
+ (char*)j->start_addr == smpi_data_exe_start + smpi_data_exe_size) {
+ // Only count the portion of this region not present in the initial map.
+ auto found = std::find_if(initial_vm_map.begin(), initial_vm_map.end(), [&j](const simgrid::xbt::VmMap& m) {
+ return j->start_addr <= m.start_addr && m.start_addr < j->end_addr;
+ });
+ auto end_addr = (found == initial_vm_map.end() ? j->end_addr : found->start_addr);
+ smpi_data_exe_size = (char*)end_addr - smpi_data_exe_start;
}
return;
}
}
xbt_die("Did not find my data segment.");
}
-#endif
#if HAVE_SANITIZER_ADDRESS
#include <sanitizer/asan_interface.h>
while (i < n && __asan_address_is_poisoned(psrc + i))
++i;
if (i < n) {
- char* p = static_cast<char*>(__asan_region_is_poisoned(psrc + i, n - i));
+ const char* p = static_cast<char*>(__asan_region_is_poisoned(psrc + i, n - i));
size_t j = p ? (p - psrc) : n;
memcpy(pdest + i, psrc + i, j - i);
i = j;
return dest;
}
#else
-#define asan_safe_memcpy(dest, src, n) memcpy(dest, src, n)
+#define asan_safe_memcpy(dest, src, n) memcpy((dest), (src), (n))
#endif
-/** Map a given SMPI privatization segment (make a SMPI process active) */
-void smpi_switch_data_segment(simgrid::s4u::ActorPtr actor)
+/**
+ * @brief Uses shm_open to get a temporary shm, and returns its file descriptor.
+ */
+int smpi_temp_shm_get()
{
- if (smpi_loaded_page == actor->get_pid()) // no need to switch, we've already loaded the one we want
- return;
+ constexpr unsigned INDEX_MASK = 0xffffffffUL;
+ static unsigned index = INDEX_MASK;
+ char shmname[32]; // cannot be longer than PSHMNAMLEN = 31 on macOS (shm_open raises ENAMETOOLONG otherwise)
+ int fd;
- // So the job:
- smpi_really_switch_data_segment(actor);
+ unsigned limit = index;
+ do {
+ index = (index + 1) & INDEX_MASK;
+ snprintf(shmname, sizeof(shmname), "/smpi-buffer-%016x", index);
+ fd = shm_open(shmname, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
+ } while (fd == -1 && errno == EEXIST && index != limit);
+
+ if (fd < 0) {
+ if (errno == EMFILE) {
+ xbt_die("Impossible to create temporary file for memory mapping: %s\n\
+The shm_open() system call failed with the EMFILE error code (too many files). \n\n\
+This means that you reached the system limits concerning the amount of files per process. \
+This is not a surprise if you are trying to virtualize many processes on top of SMPI. \
+Don't panic -- you should simply increase your system limits and try again. \n\n\
+First, check what your limits are:\n\
+ cat /proc/sys/fs/file-max # Gives you the system-wide limit\n\
+ ulimit -Hn # Gives you the per process hard limit\n\
+ ulimit -Sn # Gives you the per process soft limit\n\
+ cat /proc/self/limits # Displays any per-process limitation (including the one given above)\n\n\
+If one of these values is less than the amount of MPI processes that you try to run, then you got the explanation of this error. \
+Ask the Internet about tutorials on how to increase the files limit such as: https://rtcamp.com/tutorials/linux/increase-open-files-limit/",
+ strerror(errno));
+ }
+ xbt_die("Impossible to create temporary file for memory mapping. shm_open: %s", strerror(errno));
+ }
+ XBT_DEBUG("Got temporary shm %s (fd = %d)", shmname, fd);
+ if (shm_unlink(shmname) < 0)
+ XBT_WARN("Could not early unlink %s. shm_unlink: %s", shmname, strerror(errno));
+ return fd;
}
-/** Map a given SMPI privatization segment (make a SMPI process active) even if SMPI thinks it is already active
+/**
+ * @brief Mmap a region of size bytes from temporary shm with file descriptor fd.
+ */
+void* smpi_temp_shm_mmap(int fd, size_t size)
+{
+ struct stat st;
+ xbt_assert(fstat(fd, &st) == 0, "Could not stat fd %d: %s", fd, strerror(errno));
+ xbt_assert(static_cast<off_t>(size) <= st.st_size || ftruncate(fd, static_cast<off_t>(size)) == 0,
+ "Could not truncate fd %d to %zu: %s", fd, size, strerror(errno));
+ void* mem = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ xbt_assert(
+ mem != MAP_FAILED,
+ "Failed to map fd %d with size %zu: %s\n"
+ "If you are running a lot of ranks, you may be exceeding the amount of mappings allowed per process.\n"
+ "On Linux systems, change this value with sudo sysctl -w vm.max_map_count=newvalue (default value: 65536)\n"
+ "Please see https://simgrid.org/doc/latest/Configuring_SimGrid.html#configuring-the-user-code-virtualization for "
+ "more information.",
+ fd, size, strerror(errno));
+ return mem;
+}
+
+/** Map a given SMPI privatization segment (make an SMPI process active)
*
* When doing a state restoration, the state of the restored variables might not be consistent with the state of the
- * virtual memory. In this case, we to change the data segment.
+ * virtual memory. In this case, we have to change the data segment.
+ *
+ * If 'addr' is not null, only switch if it's an address from the data segment.
+ *
+ * Returns 'true' if the segment has to be switched (mmap privatization and 'addr' in data segment).
*/
-void smpi_really_switch_data_segment(simgrid::s4u::ActorPtr actor)
+bool smpi_switch_data_segment(simgrid::s4u::ActorPtr actor, const void* addr)
{
- if (smpi_data_exe_size == 0) // no need to switch
- return;
+ if (smpi_cfg_privatization() != SmpiPrivStrategies::MMAP || smpi_data_exe_size == 0)
+ return false; // no need to switch
+
+ if (addr != nullptr &&
+ not(static_cast<const char*>(addr) >= smpi_data_exe_start &&
+ static_cast<const char*>(addr) < smpi_data_exe_start + smpi_data_exe_size))
+ return false; // no need to switch, addr is not concerned
+
+ static aid_t smpi_loaded_page = -1;
+ if (smpi_loaded_page == actor->get_pid()) // no need to switch, we've already loaded the one we want
+ return true; // return 'true' anyway
#if HAVE_PRIVATIZATION
// FIXME, cross-process support (mmap across process when necessary)
XBT_DEBUG("Switching data frame to the one of process %ld", actor->get_pid());
- simgrid::smpi::ActorExt* process = smpi_process_remote(actor);
+ const simgrid::smpi::ActorExt* process = smpi_process_remote(actor);
int current = process->privatized_region()->file_descriptor;
- void* tmp = mmap(TOPAGE(smpi_data_exe_start), smpi_data_exe_size, PROT_RW, MAP_FIXED | MAP_SHARED, current, 0);
- if (tmp != TOPAGE(smpi_data_exe_start))
- xbt_die("Couldn't map the new region (errno %d): %s", errno, strerror(errno));
+ xbt_assert(mmap(TOPAGE(smpi_data_exe_start), smpi_data_exe_size, PROT_RW, MAP_FIXED | MAP_SHARED, current, 0) ==
+ TOPAGE(smpi_data_exe_start),
+ "Couldn't map the new region (errno %d): %s", errno, strerror(errno));
smpi_loaded_page = actor->get_pid();
#endif
-}
-int smpi_is_privatization_file(char* file)
-{
- const std::string buffer_path("/dev/shm/my-buffer-");
- return buffer_path.compare(0, std::string::npos, file, buffer_path.length()) == 0;
+ return true;
}
/**
initial_vm_map.clear();
initial_vm_map.shrink_to_fit();
- XBT_DEBUG("bss+data segment found : size %d starting at %p", smpi_data_exe_size, smpi_data_exe_start);
+ XBT_DEBUG("bss+data segment found : size %zu starting at %p", smpi_data_exe_size, smpi_data_exe_start);
if (smpi_data_exe_size == 0) { // no need to do anything as global variables don't exist
smpi_privatize_global_variables = SmpiPrivStrategies::NONE;
// Initializes the memory mapping for a single process and returns the privatization region
smpi_privatization_region_t smpi_init_global_memory_segment_process()
{
- int file_descriptor;
- void* address = nullptr;
- char path[24];
- int status;
-
- do {
- snprintf(path, sizeof(path), "/smpi-buffer-%06x", rand() % 0xffffffU);
- file_descriptor = shm_open(path, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
- } while (file_descriptor == -1 && errno == EEXIST);
- if (file_descriptor < 0) {
- if (errno == EMFILE) {
- xbt_die("Impossible to create temporary file for memory mapping: %s\n\
-The open() system call failed with the EMFILE error code (too many files). \n\n\
-This means that you reached the system limits concerning the amount of files per process. \
-This is not a surprise if you are trying to virtualize many processes on top of SMPI. \
-Don't panic -- you should simply increase your system limits and try again. \n\n\
-First, check what your limits are:\n\
- cat /proc/sys/fs/file-max # Gives you the system-wide limit\n\
- ulimit -Hn # Gives you the per process hard limit\n\
- ulimit -Sn # Gives you the per process soft limit\n\
- cat /proc/self/limits # Displays any per-process limitation (including the one given above)\n\n\
-If one of these values is less than the amount of MPI processes that you try to run, then you got the explanation of this error. \
-Ask the Internet about tutorials on how to increase the files limit such as: https://rtcamp.com/tutorials/linux/increase-open-files-limit/",
- strerror(errno));
- }
- xbt_die("Impossible to create temporary file for memory mapping: %s", strerror(errno));
- }
-
- status = ftruncate(file_descriptor, smpi_data_exe_size);
- if (status)
- xbt_die("Impossible to set the size of the temporary file for memory mapping");
+ int file_descriptor = smpi_temp_shm_get();
- /* Ask for a free region */
- address = mmap(nullptr, smpi_data_exe_size, PROT_RW, MAP_SHARED, file_descriptor, 0);
- if (address == MAP_FAILED)
- xbt_die("Couldn't find a free region for memory mapping");
-
- status = shm_unlink(path);
- if (status)
- xbt_die("Impossible to unlink temporary file for memory mapping");
+ // ask for a free region
+ void* address = smpi_temp_shm_mmap(file_descriptor, smpi_data_exe_size);
// initialize the values
asan_safe_memcpy(address, smpi_data_exe_copy, smpi_data_exe_size);
#endif
}
-static int sendbuffer_size = 0;
-static char* sendbuffer = nullptr;
-static int recvbuffer_size = 0;
-static char* recvbuffer = nullptr;
+static std::vector<unsigned char> sendbuffer;
+static std::vector<unsigned char> recvbuffer;
//allocate a single buffer for all sends, growing it if needed
-void* smpi_get_tmp_sendbuffer(int size)
+unsigned char* smpi_get_tmp_sendbuffer(size_t size)
{
if (not smpi_process()->replaying())
- return xbt_malloc(size);
- if (sendbuffer_size<size){
- sendbuffer=static_cast<char*>(xbt_realloc(sendbuffer,size));
- sendbuffer_size=size;
- }
- return sendbuffer;
+ return new unsigned char[size];
+ // FIXME: a resize() may invalidate a previous pointer. Maybe we need to handle a queue of buffers with a reference
+ // counter. The same holds for smpi_get_tmp_recvbuffer.
+ if (sendbuffer.size() < size)
+ sendbuffer.resize(size);
+ return sendbuffer.data();
}
//allocate a single buffer for all recv
-void* smpi_get_tmp_recvbuffer(int size){
+unsigned char* smpi_get_tmp_recvbuffer(size_t size)
+{
if (not smpi_process()->replaying())
- return xbt_malloc(size);
- if (recvbuffer_size<size){
- recvbuffer=static_cast<char*>(xbt_realloc(recvbuffer,size));
- recvbuffer_size=size;
- }
- return recvbuffer;
+ return new unsigned char[size];
+ if (recvbuffer.size() < size)
+ recvbuffer.resize(size);
+ return recvbuffer.data();
}
-void smpi_free_tmp_buffer(void* buf){
+void smpi_free_tmp_buffer(const unsigned char* buf)
+{
if (not smpi_process()->replaying())
- xbt_free(buf);
+ delete[] buf;
}
-void smpi_free_replay_tmp_buffers(){
- xbt_free(sendbuffer);
- xbt_free(recvbuffer);
+void smpi_free_replay_tmp_buffers()
+{
+ std::vector<unsigned char>().swap(sendbuffer);
+ std::vector<unsigned char>().swap(recvbuffer);
}