Privatization is sent to smpi_memory.cpp (will be renamed later).
smpi_shared* functions are sent to smpi_shared.cpp.
I don't know what to do with trace_call_location yet. bench should only be timing related functions, smpi_sample* and sleep stuff.
XBT_PRIVATE void smpi_bench_destroy();
XBT_PRIVATE void smpi_bench_begin();
XBT_PRIVATE void smpi_bench_end();
+XBT_PRIVATE void smpi_shared_destroy();
XBT_PRIVATE void* smpi_get_tmp_sendbuffer(int size);
XBT_PRIVATE void* smpi_get_tmp_recvbuffer(int size);
#include <papi.h>
#endif
-#ifndef MAP_ANONYMOUS
-#define MAP_ANONYMOUS MAP_ANON
-#endif
-
-#ifndef MAP_POPULATE
-#define MAP_POPULATE 0
-#endif
-
XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_bench, smpi, "Logging specific to SMPI (benchmarking)");
-/* Shared allocations are handled through shared memory segments.
- * Associated data and metadata are used as follows:
- *
- * mmap #1
- * `allocs' dict ---- -.
- * ---------- shared_data_t shared_metadata_t / | | |
- * .->| <name> | ---> -------------------- <--. ----------------- | | | |
- * | ---------- | fd of <name> | | | size of mmap | --| | | |
- * | | count (2) | |-- | data | \ | | |
- * `----------------- | <name> | | ----------------- ---- |
- * -------------------- | ^ |
- * | | |
- * | | `allocs_metadata' dict |
- * | | ---------------------- |
- * | `-- | <addr of mmap #1> |<-'
- * | .-- | <addr of mmap #2> |<-.
- * | | ---------------------- |
- * | | |
- * | | |
- * | | |
- * | | mmap #2 |
- * | v ---- -'
- * | shared_metadata_t / | |
- * | ----------------- | | |
- * | | size of mmap | --| | |
- * `-- | data | | | |
- * ----------------- | | |
- * \ | |
- * ----
- */
-
-#define PTR_STRLEN (2 + 2 * sizeof(void*) + 1)
xbt_dict_t samples = nullptr; /* Allocated on first use */
-xbt_dict_t calls = nullptr; /* Allocated on first use */
double smpi_cpu_threshold;
double smpi_host_speed;
-int smpi_loaded_page = -1;
-char* smpi_start_data_exe = nullptr;
-int smpi_size_data_exe = 0;
-bool smpi_privatize_global_variables;
shared_malloc_type smpi_cfg_shared_malloc = shmalloc_global;
double smpi_total_benched_time = 0;
smpi_privatisation_region_t smpi_privatisation_regions;
-namespace {
-
-/** Some location in the source code
- *
- * This information is used by SMPI_SHARED_MALLOC to allocate some shared memory for all simulated processes.
- */
-class smpi_source_location {
-public:
- smpi_source_location(const char* filename, int line)
- : filename(xbt_strdup(filename)), filename_length(strlen(filename)), line(line)
- {
- }
-
- /** Pointer to a static string containing the file name */
- char* filename = nullptr;
- int filename_length = 0;
- int line = 0;
-
- bool operator==(smpi_source_location const& that) const
- {
- return filename_length == that.filename_length && line == that.line &&
- std::memcmp(filename, that.filename, filename_length) == 0;
- }
- bool operator!=(smpi_source_location const& that) const { return !(*this == that); }
-};
-}
-
-namespace std {
-
-template <> class hash<smpi_source_location> {
-public:
- typedef smpi_source_location argument_type;
- typedef std::size_t result_type;
- result_type operator()(smpi_source_location const& loc) const
- {
- return xbt_str_hash_ext(loc.filename, loc.filename_length) ^
- xbt_str_hash_ext((const char*)&loc.line, sizeof(loc.line));
- }
-};
-}
-
-namespace {
-
-typedef struct {
- int fd = -1;
- int count = 0;
-} shared_data_t;
-
-std::unordered_map<smpi_source_location, shared_data_t> allocs;
-typedef std::unordered_map<smpi_source_location, shared_data_t>::value_type shared_data_key_type;
-
-typedef struct {
- size_t size;
- shared_data_key_type* data;
-} shared_metadata_t;
-
-std::unordered_map<void*, shared_metadata_t> allocs_metadata;
-}
-
-static size_t shm_size(int fd) {
- struct stat st;
-
- if(fstat(fd, &st) < 0) {
- xbt_die("Could not stat fd %d: %s", fd, strerror(errno));
- }
- return static_cast<size_t>(st.st_size);
-}
-
-#ifndef WIN32
-static void* shm_map(int fd, size_t size, shared_data_key_type* data) {
- char loc[PTR_STRLEN];
- shared_metadata_t meta;
-
- if(size > shm_size(fd) && (ftruncate(fd, static_cast<off_t>(size)) < 0)) {
- xbt_die("Could not truncate fd %d to %zu: %s", fd, size, strerror(errno));
- }
-
- void* mem = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
- if(mem == MAP_FAILED) {
- xbt_die(
- "Failed to map fd %d with size %zu: %s\n"
- "If you are running a lot of ranks, you may be exceeding the amount of mappings allowed per process.\n"
- "On Linux systems, change this value with sudo sysctl -w vm.max_map_count=newvalue (default value: 65536)\n"
- "Please see http://simgrid.gforge.inria.fr/simgrid/latest/doc/html/options.html#options_virt for more info.",
- fd, size, strerror(errno));
- }
- snprintf(loc, PTR_STRLEN, "%p", mem);
- meta.size = size;
- meta.data = data;
- allocs_metadata[mem] = meta;
- XBT_DEBUG("MMAP %zu to %p", size, mem);
- return mem;
-}
-#endif
-
void smpi_bench_destroy()
{
- allocs.clear();
- allocs_metadata.clear();
xbt_dict_free(&samples);
- xbt_dict_free(&calls);
}
extern "C" XBT_PUBLIC(void) smpi_execute_flops_(double *flops);
data->benching = 0;
}
-#ifndef WIN32
-static int smpi_shared_malloc_bogusfile = -1;
-static unsigned long smpi_shared_malloc_blocksize = 1UL << 20;
-void *smpi_shared_malloc(size_t size, const char *file, int line)
-{
- void* mem;
- if (size > 0 && smpi_cfg_shared_malloc == shmalloc_local) {
- smpi_source_location loc(file, line);
- auto res = allocs.insert(std::make_pair(loc, shared_data_t()));
- auto data = res.first;
- if (res.second) {
- // The insertion did not take place.
- // Generate a shared memory name from the address of the shared_data:
- char shmname[32]; // cannot be longer than PSHMNAMLEN = 31 on Mac OS X (shm_open raises ENAMETOOLONG otherwise)
- snprintf(shmname, 31, "/shmalloc%p", &*data);
- int fd = shm_open(shmname, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
- if (fd < 0) {
- if (errno == EEXIST)
- xbt_die("Please cleanup /dev/shm/%s", shmname);
- else
- xbt_die("An unhandled error occurred while opening %s. shm_open: %s", shmname, strerror(errno));
- }
- data->second.fd = fd;
- data->second.count = 1;
- mem = shm_map(fd, size, &*data);
- if (shm_unlink(shmname) < 0) {
- XBT_WARN("Could not early unlink %s. shm_unlink: %s", shmname, strerror(errno));
- }
- XBT_DEBUG("Mapping %s at %p through %d", shmname, mem, fd);
- } else {
- mem = shm_map(data->second.fd, size, &*data);
- data->second.count++;
- }
- XBT_DEBUG("Shared malloc %zu in %p (metadata at %p)", size, mem, &*data);
-
- } else if (smpi_cfg_shared_malloc == shmalloc_global) {
- /* First reserve memory area */
- mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
-
- xbt_assert(mem != MAP_FAILED, "Failed to allocate %luMiB of memory. Run \"sysctl vm.overcommit_memory=1\" as root "
- "to allow big allocations.\n",
- (unsigned long)(size >> 20));
-
- /* Create bogus file if not done already */
- if (smpi_shared_malloc_bogusfile == -1) {
- /* Create a fd to a new file on disk, make it smpi_shared_malloc_blocksize big, and unlink it.
- * It still exists in memory but not in the file system (thus it cannot be leaked). */
- char* name = xbt_strdup("/tmp/simgrid-shmalloc-XXXXXX");
- smpi_shared_malloc_bogusfile = mkstemp(name);
- unlink(name);
- xbt_free(name);
- char* dumb = (char*)calloc(1, smpi_shared_malloc_blocksize);
- ssize_t err = write(smpi_shared_malloc_bogusfile, dumb, smpi_shared_malloc_blocksize);
- if(err<0)
- xbt_die("Could not write bogus file for shared malloc");
- xbt_free(dumb);
- }
-
- /* Map the bogus file in place of the anonymous memory */
- unsigned int i;
- for (i = 0; i < size / smpi_shared_malloc_blocksize; i++) {
- void* pos = (void*)((unsigned long)mem + i * smpi_shared_malloc_blocksize);
- void* res = mmap(pos, smpi_shared_malloc_blocksize, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED | MAP_POPULATE,
- smpi_shared_malloc_bogusfile, 0);
- xbt_assert(res == pos, "Could not map folded virtual memory (%s). Do you perhaps need to increase the "
- "STARPU_MALLOC_SIMULATION_FOLD environment variable or the sysctl vm.max_map_count?",
- strerror(errno));
- }
- if (size % smpi_shared_malloc_blocksize) {
- void* pos = (void*)((unsigned long)mem + i * smpi_shared_malloc_blocksize);
- void* res = mmap(pos, size % smpi_shared_malloc_blocksize, PROT_READ | PROT_WRITE,
- MAP_FIXED | MAP_SHARED | MAP_POPULATE, smpi_shared_malloc_bogusfile, 0);
- xbt_assert(res == pos, "Could not map folded virtual memory (%s). Do you perhaps need to increase the "
- "STARPU_MALLOC_SIMULATION_FOLD environment variable or the sysctl vm.max_map_count?",
- strerror(errno));
- }
-
- shared_metadata_t newmeta;
- //register metadata for memcpy avoidance
- shared_data_key_type* data = (shared_data_key_type*)xbt_malloc(sizeof(shared_data_key_type));
- data->second.fd = -1;
- data->second.count = 1;
- newmeta.size = size;
- newmeta.data = data;
- allocs_metadata[mem] = newmeta;
- } else {
- mem = xbt_malloc(size);
- XBT_DEBUG("Classic malloc %zu in %p", size, mem);
- }
-
- return mem;
-}
-
-int smpi_is_shared(void*ptr){
- if ( smpi_cfg_shared_malloc == shmalloc_local || smpi_cfg_shared_malloc == shmalloc_global) {
- if (allocs_metadata.count(ptr) != 0)
- return 1;
- for(auto it : allocs_metadata){
- if (ptr >= it.first && ptr < (char*)it.first + it.second.size)
- return 1;
- }
- return 0;
- } else {
- return 0;
- }
-}
-
-void smpi_shared_free(void *ptr)
-{
- if (smpi_cfg_shared_malloc == shmalloc_local) {
- char loc[PTR_STRLEN];
- snprintf(loc, PTR_STRLEN, "%p", ptr);
- auto meta = allocs_metadata.find(ptr);
- if (meta == allocs_metadata.end()) {
- XBT_WARN("Cannot free: %p was not shared-allocated by SMPI - maybe its size was 0?", ptr);
- return;
- }
- shared_data_t* data = &meta->second.data->second;
- if (munmap(ptr, meta->second.size) < 0) {
- XBT_WARN("Unmapping of fd %d failed: %s", data->fd, strerror(errno));
- }
- data->count--;
- if (data->count <= 0) {
- close(data->fd);
- allocs.erase(allocs.find(meta->second.data->first));
- allocs_metadata.erase(ptr);
- XBT_DEBUG("Shared free - with removal - of %p", ptr);
- } else {
- XBT_DEBUG("Shared free - no removal - of %p, count = %d", ptr, data->count);
- }
-
- } else if (smpi_cfg_shared_malloc == shmalloc_global) {
- auto meta = allocs_metadata.find(ptr);
- if (meta != allocs_metadata.end()){
- meta->second.data->second.count--;
- if(meta->second.data->second.count==0)
- xbt_free(meta->second.data);
- }
-
- munmap(ptr, 0); // the POSIX says that I should not give 0 as a length, but it seems to work OK
- } else {
- XBT_DEBUG("Classic free of %p", ptr);
- xbt_free(ptr);
- }
-}
-#endif
-
-int smpi_shared_known_call(const char* func, const char* input)
-{
- char* loc = bprintf("%s:%s", func, input);
- int known = 0;
-
- if (calls==nullptr) {
- calls = xbt_dict_new_homogeneous(nullptr);
- }
- try {
- xbt_dict_get(calls, loc); /* Succeed or throw */
- known = 1;
- xbt_free(loc);
- }
- catch (xbt_ex& ex) {
- xbt_free(loc);
- if (ex.category != not_found_error)
- throw;
- }
- catch(...) {
- xbt_free(loc);
- throw;
- }
- return known;
-}
-
-void* smpi_shared_get_call(const char* func, const char* input) {
- char* loc = bprintf("%s:%s", func, input);
-
- if (calls == nullptr)
- calls = xbt_dict_new_homogeneous(nullptr);
- void* data = xbt_dict_get(calls, loc);
- xbt_free(loc);
- return data;
-}
-
-void* smpi_shared_set_call(const char* func, const char* input, void* data) {
- char* loc = bprintf("%s:%s", func, input);
-
- if (calls == nullptr)
- calls = xbt_dict_new_homogeneous(nullptr);
- xbt_dict_set(calls, loc, data, nullptr);
- xbt_free(loc);
- return data;
-}
-
-
-/** Map a given SMPI privatization segment (make a SMPI process active) */
-void smpi_switch_data_segment(int dest) {
- if (smpi_loaded_page == dest)//no need to switch, we've already loaded the one we want
- return;
-
- // So the job:
- smpi_really_switch_data_segment(dest);
-}
-
-/** Map a given SMPI privatization segment (make a SMPI process active) even if SMPI thinks it is already active
- *
- * When doing a state restoration, the state of the restored variables might not be consistent with the state of the
- * virtual memory. In this case, we to change the data segment.
- */
-void smpi_really_switch_data_segment(int dest)
-{
- if(smpi_size_data_exe == 0)//no need to switch
- return;
-
-#if HAVE_PRIVATIZATION
- if(smpi_loaded_page==-1){//initial switch, do the copy from the real page here
- for (int i=0; i< smpi_process_count(); i++){
- memcpy(smpi_privatisation_regions[i].address, TOPAGE(smpi_start_data_exe), smpi_size_data_exe);
- }
- }
-
- // FIXME, cross-process support (mmap across process when necessary)
- int current = smpi_privatisation_regions[dest].file_descriptor;
- XBT_DEBUG("Switching data frame to the one of process %d", dest);
- void* tmp =
- mmap(TOPAGE(smpi_start_data_exe), smpi_size_data_exe, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED, current, 0);
- if (tmp != TOPAGE(smpi_start_data_exe))
- xbt_die("Couldn't map the new region");
- smpi_loaded_page = dest;
-#endif
-}
-
-int smpi_is_privatisation_file(char* file)
-{
- return strncmp("/dev/shm/my-buffer-", file, std::strlen("/dev/shm/my-buffer-")) == 0;
-}
-
-void smpi_initialize_global_memory_segments()
-{
-
-#if !HAVE_PRIVATIZATION
- smpi_privatize_global_variables=false;
- xbt_die("You are trying to use privatization on a system that does not support it. Don't.");
- return;
-#else
-
- smpi_get_executable_global_size();
-
- XBT_DEBUG ("bss+data segment found : size %d starting at %p", smpi_size_data_exe, smpi_start_data_exe );
-
- if (smpi_size_data_exe == 0){//no need to switch
- smpi_privatize_global_variables=false;
- return;
- }
-
- smpi_privatisation_regions = static_cast<smpi_privatisation_region_t>(
- xbt_malloc(smpi_process_count() * sizeof(struct s_smpi_privatisation_region)));
-
- for (int i=0; i< smpi_process_count(); i++){
- // create SIMIX_process_count() mappings of this size with the same data inside
- int file_descriptor;
- void* address = nullptr;
- char path[24];
- int status;
-
- do {
- snprintf(path, sizeof(path), "/smpi-buffer-%06x", rand() % 0xffffff);
- file_descriptor = shm_open(path, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
- } while (file_descriptor == -1 && errno == EEXIST);
- if (file_descriptor < 0) {
- if (errno == EMFILE) {
- xbt_die("Impossible to create temporary file for memory mapping: %s\n\
-The open() system call failed with the EMFILE error code (too many files). \n\n\
-This means that you reached the system limits concerning the amount of files per process. \
-This is not a surprise if you are trying to virtualize many processes on top of SMPI. \
-Don't panic -- you should simply increase your system limits and try again. \n\n\
-First, check what your limits are:\n\
- cat /proc/sys/fs/file-max # Gives you the system-wide limit\n\
- ulimit -Hn # Gives you the per process hard limit\n\
- ulimit -Sn # Gives you the per process soft limit\n\
- cat /proc/self/limits # Displays any per-process limitation (including the one given above)\n\n\
-If one of these values is less than the amount of MPI processes that you try to run, then you got the explanation of this error. \
-Ask the Internet about tutorials on how to increase the files limit such as: https://rtcamp.com/tutorials/linux/increase-open-files-limit/",
- strerror(errno));
- }
- xbt_die("Impossible to create temporary file for memory mapping: %s", strerror(errno));
- }
-
- status = ftruncate(file_descriptor, smpi_size_data_exe);
- if (status)
- xbt_die("Impossible to set the size of the temporary file for memory mapping");
-
- /* Ask for a free region */
- address = mmap(nullptr, smpi_size_data_exe, PROT_READ | PROT_WRITE, MAP_SHARED, file_descriptor, 0);
- if (address == MAP_FAILED)
- xbt_die("Couldn't find a free region for memory mapping");
-
- status = shm_unlink(path);
- if (status)
- xbt_die("Impossible to unlink temporary file for memory mapping");
-
- // initialize the values
- memcpy(address, TOPAGE(smpi_start_data_exe), smpi_size_data_exe);
-
- // store the address of the mapping for further switches
- smpi_privatisation_regions[i].file_descriptor = file_descriptor;
- smpi_privatisation_regions[i].address = address;
- }
-#endif
-}
-
-void smpi_destroy_global_memory_segments(){
- if (smpi_size_data_exe == 0)//no need to switch
- return;
-#if HAVE_PRIVATIZATION
- for (int i=0; i< smpi_process_count(); i++) {
- if (munmap(smpi_privatisation_regions[i].address, smpi_size_data_exe) < 0)
- XBT_WARN("Unmapping of fd %d failed: %s", smpi_privatisation_regions[i].file_descriptor, strerror(errno));
- close(smpi_privatisation_regions[i].file_descriptor);
- }
- xbt_free(smpi_privatisation_regions);
-#endif
-}
-
extern "C" { /** These functions will be called from the user code **/
smpi_trace_call_location_t* smpi_trace_get_call_location() {
return smpi_process()->call_location();
int count = smpi_process_count();
smpi_bench_destroy();
+ smpi_shared_destroy();
if (MPI_COMM_WORLD != MPI_COMM_UNINITIALIZED){
delete MPI_COMM_WORLD->group();
MSG_barrier_destroy(process_data[0]->finalization_barrier());
#include <stdlib.h>
#include <sys/types.h>
+#include <string.h>
+#include <stdio.h>
+#include "simgrid/sg_config.h"
+#include <fcntl.h>
#ifndef WIN32
#include <sys/mman.h>
XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_memory, smpi, "Memory layout support for SMPI");
+int smpi_loaded_page = -1;
+char* smpi_start_data_exe = nullptr;
+int smpi_size_data_exe = 0;
+bool smpi_privatize_global_variables;
+
static const int PROT_RWX = (PROT_READ | PROT_WRITE | PROT_EXEC);
static const int PROT_RW = (PROT_READ | PROT_WRITE );
XBT_ATTRIB_UNUSED static const int PROT_RX = (PROT_READ | PROT_EXEC );
xbt_die("Did not find my data segment.");
}
#endif
+
+
+/** Map a given SMPI privatization segment (make a SMPI process active) */
+void smpi_switch_data_segment(int dest) {
+ if (smpi_loaded_page == dest)//no need to switch, we've already loaded the one we want
+ return;
+
+ // So the job:
+ smpi_really_switch_data_segment(dest);
+}
+
+/** Map a given SMPI privatization segment (make a SMPI process active) even if SMPI thinks it is already active
+ *
+ * When doing a state restoration, the state of the restored variables might not be consistent with the state of the
+ * virtual memory. In this case, we to change the data segment.
+ */
+void smpi_really_switch_data_segment(int dest)
+{
+ if(smpi_size_data_exe == 0)//no need to switch
+ return;
+
+#if HAVE_PRIVATIZATION
+ if(smpi_loaded_page==-1){//initial switch, do the copy from the real page here
+ for (int i=0; i< smpi_process_count(); i++){
+ memcpy(smpi_privatisation_regions[i].address, TOPAGE(smpi_start_data_exe), smpi_size_data_exe);
+ }
+ }
+
+ // FIXME, cross-process support (mmap across process when necessary)
+ int current = smpi_privatisation_regions[dest].file_descriptor;
+ XBT_DEBUG("Switching data frame to the one of process %d", dest);
+ void* tmp =
+ mmap(TOPAGE(smpi_start_data_exe), smpi_size_data_exe, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED, current, 0);
+ if (tmp != TOPAGE(smpi_start_data_exe))
+ xbt_die("Couldn't map the new region");
+ smpi_loaded_page = dest;
+#endif
+}
+
+int smpi_is_privatisation_file(char* file)
+{
+ return strncmp("/dev/shm/my-buffer-", file, std::strlen("/dev/shm/my-buffer-")) == 0;
+}
+
+void smpi_initialize_global_memory_segments()
+{
+
+#if !HAVE_PRIVATIZATION
+ smpi_privatize_global_variables=false;
+ xbt_die("You are trying to use privatization on a system that does not support it. Don't.");
+ return;
+#else
+
+ smpi_get_executable_global_size();
+
+ XBT_DEBUG ("bss+data segment found : size %d starting at %p", smpi_size_data_exe, smpi_start_data_exe );
+
+ if (smpi_size_data_exe == 0){//no need to switch
+ smpi_privatize_global_variables=false;
+ return;
+ }
+
+ smpi_privatisation_regions = static_cast<smpi_privatisation_region_t>(
+ xbt_malloc(smpi_process_count() * sizeof(struct s_smpi_privatisation_region)));
+
+ for (int i=0; i< smpi_process_count(); i++){
+ // create SIMIX_process_count() mappings of this size with the same data inside
+ int file_descriptor;
+ void* address = nullptr;
+ char path[24];
+ int status;
+
+ do {
+ snprintf(path, sizeof(path), "/smpi-buffer-%06x", rand() % 0xffffff);
+ file_descriptor = shm_open(path, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
+ } while (file_descriptor == -1 && errno == EEXIST);
+ if (file_descriptor < 0) {
+ if (errno == EMFILE) {
+ xbt_die("Impossible to create temporary file for memory mapping: %s\n\
+The open() system call failed with the EMFILE error code (too many files). \n\n\
+This means that you reached the system limits concerning the amount of files per process. \
+This is not a surprise if you are trying to virtualize many processes on top of SMPI. \
+Don't panic -- you should simply increase your system limits and try again. \n\n\
+First, check what your limits are:\n\
+ cat /proc/sys/fs/file-max # Gives you the system-wide limit\n\
+ ulimit -Hn # Gives you the per process hard limit\n\
+ ulimit -Sn # Gives you the per process soft limit\n\
+ cat /proc/self/limits # Displays any per-process limitation (including the one given above)\n\n\
+If one of these values is less than the amount of MPI processes that you try to run, then you got the explanation of this error. \
+Ask the Internet about tutorials on how to increase the files limit such as: https://rtcamp.com/tutorials/linux/increase-open-files-limit/",
+ strerror(errno));
+ }
+ xbt_die("Impossible to create temporary file for memory mapping: %s", strerror(errno));
+ }
+
+ status = ftruncate(file_descriptor, smpi_size_data_exe);
+ if (status)
+ xbt_die("Impossible to set the size of the temporary file for memory mapping");
+
+ /* Ask for a free region */
+ address = mmap(nullptr, smpi_size_data_exe, PROT_READ | PROT_WRITE, MAP_SHARED, file_descriptor, 0);
+ if (address == MAP_FAILED)
+ xbt_die("Couldn't find a free region for memory mapping");
+
+ status = shm_unlink(path);
+ if (status)
+ xbt_die("Impossible to unlink temporary file for memory mapping");
+
+ // initialize the values
+ memcpy(address, TOPAGE(smpi_start_data_exe), smpi_size_data_exe);
+
+ // store the address of the mapping for further switches
+ smpi_privatisation_regions[i].file_descriptor = file_descriptor;
+ smpi_privatisation_regions[i].address = address;
+ }
+#endif
+}
+
+void smpi_destroy_global_memory_segments(){
+ if (smpi_size_data_exe == 0)//no need to switch
+ return;
+#if HAVE_PRIVATIZATION
+ for (int i=0; i< smpi_process_count(); i++) {
+ if (munmap(smpi_privatisation_regions[i].address, smpi_size_data_exe) < 0)
+ XBT_WARN("Unmapping of fd %d failed: %s", smpi_privatisation_regions[i].file_descriptor, strerror(errno));
+ close(smpi_privatisation_regions[i].file_descriptor);
+ }
+ xbt_free(smpi_privatisation_regions);
+#endif
+}
+
--- /dev/null
+/* Copyright (c) 2007, 2009-2017. The SimGrid Team. All rights reserved. */
+
+/* This program is free software; you can redistribute it and/or modify it
+ * under the terms of the license (GNU LGPL) which comes with this package. */
+
+/* Shared allocations are handled through shared memory segments.
+ * Associated data and metadata are used as follows:
+ *
+ * mmap #1
+ * `allocs' dict ---- -.
+ * ---------- shared_data_t shared_metadata_t / | | |
+ * .->| <name> | ---> -------------------- <--. ----------------- | | | |
+ * | ---------- | fd of <name> | | | size of mmap | --| | | |
+ * | | count (2) | |-- | data | \ | | |
+ * `----------------- | <name> | | ----------------- ---- |
+ * -------------------- | ^ |
+ * | | |
+ * | | `allocs_metadata' dict |
+ * | | ---------------------- |
+ * | `-- | <addr of mmap #1> |<-'
+ * | .-- | <addr of mmap #2> |<-.
+ * | | ---------------------- |
+ * | | |
+ * | | |
+ * | | |
+ * | | mmap #2 |
+ * | v ---- -'
+ * | shared_metadata_t / | |
+ * | ----------------- | | |
+ * | | size of mmap | --| | |
+ * `-- | data | | | |
+ * ----------------- | | |
+ * \ | |
+ * ----
+ */
+#include <cstring>
+
+#include <unordered_map>
+#include <utility>
+
+#include "src/internal_config.h"
+#include "private.h"
+#include "private.hpp"
+#include <xbt/ex.hpp>
+#include "xbt/dict.h"
+//#include "xbt/sysdep.h"
+//#include "xbt/ex.h"
+#include "surf/surf.h"
+#include "simgrid/sg_config.h"
+//#include "simgrid/modelchecker.h"
+//#include "src/mc/mc_replay.h"
+
+#include <sys/types.h>
+#ifndef WIN32
+#include <sys/mman.h>
+#endif
+#include <sys/stat.h>
+#include <errno.h>
+#include <fcntl.h>
+//#include <math.h> // sqrt
+#include <unistd.h>
+#include <string.h>
+#include <stdio.h>
+
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+
+#ifndef MAP_POPULATE
+#define MAP_POPULATE 0
+#endif
+
+XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_shared, smpi, "Logging specific to SMPI (shared memory macros)");
+
+#define PTR_STRLEN (2 + 2 * sizeof(void*) + 1)
+
+namespace{
+/** Some location in the source code
+ *
+ * This information is used by SMPI_SHARED_MALLOC to allocate some shared memory for all simulated processes.
+ */
+
+class smpi_source_location {
+public:
+ smpi_source_location(const char* filename, int line)
+ : filename(xbt_strdup(filename)), filename_length(strlen(filename)), line(line)
+ {
+ }
+
+ /** Pointer to a static string containing the file name */
+ char* filename = nullptr;
+ int filename_length = 0;
+ int line = 0;
+
+ bool operator==(smpi_source_location const& that) const
+ {
+ return filename_length == that.filename_length && line == that.line &&
+ std::memcmp(filename, that.filename, filename_length) == 0;
+ }
+ bool operator!=(smpi_source_location const& that) const { return !(*this == that); }
+};
+}
+
+namespace std {
+
+template <> class hash<smpi_source_location> {
+public:
+ typedef smpi_source_location argument_type;
+ typedef std::size_t result_type;
+ result_type operator()(smpi_source_location const& loc) const
+ {
+ return xbt_str_hash_ext(loc.filename, loc.filename_length) ^
+ xbt_str_hash_ext((const char*)&loc.line, sizeof(loc.line));
+ }
+};
+}
+
+namespace{
+
+typedef struct {
+ int fd = -1;
+ int count = 0;
+} shared_data_t;
+
+std::unordered_map<smpi_source_location, shared_data_t> allocs;
+typedef std::unordered_map<smpi_source_location, shared_data_t>::value_type shared_data_key_type;
+
+typedef struct {
+ size_t size;
+ shared_data_key_type* data;
+} shared_metadata_t;
+
+std::unordered_map<void*, shared_metadata_t> allocs_metadata;
+xbt_dict_t calls = nullptr; /* Allocated on first use */
+#ifndef WIN32
+static int smpi_shared_malloc_bogusfile = -1;
+static unsigned long smpi_shared_malloc_blocksize = 1UL << 20;
+#endif
+}
+
+
+void smpi_shared_destroy()
+{
+ allocs.clear();
+ allocs_metadata.clear();
+ xbt_dict_free(&calls);
+}
+
+static size_t shm_size(int fd) {
+ struct stat st;
+
+ if(fstat(fd, &st) < 0) {
+ xbt_die("Could not stat fd %d: %s", fd, strerror(errno));
+ }
+ return static_cast<size_t>(st.st_size);
+}
+
+#ifndef WIN32
+static void* shm_map(int fd, size_t size, shared_data_key_type* data) {
+ char loc[PTR_STRLEN];
+ shared_metadata_t meta;
+
+ if(size > shm_size(fd) && (ftruncate(fd, static_cast<off_t>(size)) < 0)) {
+ xbt_die("Could not truncate fd %d to %zu: %s", fd, size, strerror(errno));
+ }
+
+ void* mem = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if(mem == MAP_FAILED) {
+ xbt_die(
+ "Failed to map fd %d with size %zu: %s\n"
+ "If you are running a lot of ranks, you may be exceeding the amount of mappings allowed per process.\n"
+ "On Linux systems, change this value with sudo sysctl -w vm.max_map_count=newvalue (default value: 65536)\n"
+ "Please see http://simgrid.gforge.inria.fr/simgrid/latest/doc/html/options.html#options_virt for more info.",
+ fd, size, strerror(errno));
+ }
+ snprintf(loc, PTR_STRLEN, "%p", mem);
+ meta.size = size;
+ meta.data = data;
+ allocs_metadata[mem] = meta;
+ XBT_DEBUG("MMAP %zu to %p", size, mem);
+ return mem;
+}
+
+void *smpi_shared_malloc(size_t size, const char *file, int line)
+{
+ void* mem;
+ if (size > 0 && smpi_cfg_shared_malloc == shmalloc_local) {
+ smpi_source_location loc(file, line);
+ auto res = allocs.insert(std::make_pair(loc, shared_data_t()));
+ auto data = res.first;
+ if (res.second) {
+ // The insertion did not take place.
+ // Generate a shared memory name from the address of the shared_data:
+ char shmname[32]; // cannot be longer than PSHMNAMLEN = 31 on Mac OS X (shm_open raises ENAMETOOLONG otherwise)
+ snprintf(shmname, 31, "/shmalloc%p", &*data);
+ int fd = shm_open(shmname, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
+ if (fd < 0) {
+ if (errno == EEXIST)
+ xbt_die("Please cleanup /dev/shm/%s", shmname);
+ else
+ xbt_die("An unhandled error occurred while opening %s. shm_open: %s", shmname, strerror(errno));
+ }
+ data->second.fd = fd;
+ data->second.count = 1;
+ mem = shm_map(fd, size, &*data);
+ if (shm_unlink(shmname) < 0) {
+ XBT_WARN("Could not early unlink %s. shm_unlink: %s", shmname, strerror(errno));
+ }
+ XBT_DEBUG("Mapping %s at %p through %d", shmname, mem, fd);
+ } else {
+ mem = shm_map(data->second.fd, size, &*data);
+ data->second.count++;
+ }
+ XBT_DEBUG("Shared malloc %zu in %p (metadata at %p)", size, mem, &*data);
+
+ } else if (smpi_cfg_shared_malloc == shmalloc_global) {
+ /* First reserve memory area */
+ mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+
+ xbt_assert(mem != MAP_FAILED, "Failed to allocate %luMiB of memory. Run \"sysctl vm.overcommit_memory=1\" as root "
+ "to allow big allocations.\n",
+ (unsigned long)(size >> 20));
+
+ /* Create bogus file if not done already */
+ if (smpi_shared_malloc_bogusfile == -1) {
+ /* Create a fd to a new file on disk, make it smpi_shared_malloc_blocksize big, and unlink it.
+ * It still exists in memory but not in the file system (thus it cannot be leaked). */
+ char* name = xbt_strdup("/tmp/simgrid-shmalloc-XXXXXX");
+ smpi_shared_malloc_bogusfile = mkstemp(name);
+ unlink(name);
+ xbt_free(name);
+ char* dumb = (char*)calloc(1, smpi_shared_malloc_blocksize);
+ ssize_t err = write(smpi_shared_malloc_bogusfile, dumb, smpi_shared_malloc_blocksize);
+ if(err<0)
+ xbt_die("Could not write bogus file for shared malloc");
+ xbt_free(dumb);
+ }
+
+ /* Map the bogus file in place of the anonymous memory */
+ unsigned int i;
+ for (i = 0; i < size / smpi_shared_malloc_blocksize; i++) {
+ void* pos = (void*)((unsigned long)mem + i * smpi_shared_malloc_blocksize);
+ void* res = mmap(pos, smpi_shared_malloc_blocksize, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED | MAP_POPULATE,
+ smpi_shared_malloc_bogusfile, 0);
+ xbt_assert(res == pos, "Could not map folded virtual memory (%s). Do you perhaps need to increase the "
+ "STARPU_MALLOC_SIMULATION_FOLD environment variable or the sysctl vm.max_map_count?",
+ strerror(errno));
+ }
+ if (size % smpi_shared_malloc_blocksize) {
+ void* pos = (void*)((unsigned long)mem + i * smpi_shared_malloc_blocksize);
+ void* res = mmap(pos, size % smpi_shared_malloc_blocksize, PROT_READ | PROT_WRITE,
+ MAP_FIXED | MAP_SHARED | MAP_POPULATE, smpi_shared_malloc_bogusfile, 0);
+ xbt_assert(res == pos, "Could not map folded virtual memory (%s). Do you perhaps need to increase the "
+ "STARPU_MALLOC_SIMULATION_FOLD environment variable or the sysctl vm.max_map_count?",
+ strerror(errno));
+ }
+
+ shared_metadata_t newmeta;
+ //register metadata for memcpy avoidance
+ shared_data_key_type* data = (shared_data_key_type*)xbt_malloc(sizeof(shared_data_key_type));
+ data->second.fd = -1;
+ data->second.count = 1;
+ newmeta.size = size;
+ newmeta.data = data;
+ allocs_metadata[mem] = newmeta;
+ } else {
+ mem = xbt_malloc(size);
+ XBT_DEBUG("Classic malloc %zu in %p", size, mem);
+ }
+
+ return mem;
+}
+
+int smpi_is_shared(void*ptr){
+ if ( smpi_cfg_shared_malloc == shmalloc_local || smpi_cfg_shared_malloc == shmalloc_global) {
+ if (allocs_metadata.count(ptr) != 0)
+ return 1;
+ for(auto it : allocs_metadata){
+ if (ptr >= it.first && ptr < (char*)it.first + it.second.size)
+ return 1;
+ }
+ return 0;
+ } else {
+ return 0;
+ }
+}
+
+void smpi_shared_free(void *ptr)
+{
+ if (smpi_cfg_shared_malloc == shmalloc_local) {
+ char loc[PTR_STRLEN];
+ snprintf(loc, PTR_STRLEN, "%p", ptr);
+ auto meta = allocs_metadata.find(ptr);
+ if (meta == allocs_metadata.end()) {
+ XBT_WARN("Cannot free: %p was not shared-allocated by SMPI - maybe its size was 0?", ptr);
+ return;
+ }
+ shared_data_t* data = &meta->second.data->second;
+ if (munmap(ptr, meta->second.size) < 0) {
+ XBT_WARN("Unmapping of fd %d failed: %s", data->fd, strerror(errno));
+ }
+ data->count--;
+ if (data->count <= 0) {
+ close(data->fd);
+ allocs.erase(allocs.find(meta->second.data->first));
+ allocs_metadata.erase(ptr);
+ XBT_DEBUG("Shared free - with removal - of %p", ptr);
+ } else {
+ XBT_DEBUG("Shared free - no removal - of %p, count = %d", ptr, data->count);
+ }
+
+ } else if (smpi_cfg_shared_malloc == shmalloc_global) {
+ auto meta = allocs_metadata.find(ptr);
+ if (meta != allocs_metadata.end()){
+ meta->second.data->second.count--;
+ if(meta->second.data->second.count==0)
+ xbt_free(meta->second.data);
+ }
+
+ munmap(ptr, 0); // the POSIX says that I should not give 0 as a length, but it seems to work OK
+ } else {
+ XBT_DEBUG("Classic free of %p", ptr);
+ xbt_free(ptr);
+ }
+}
+#endif
+
+int smpi_shared_known_call(const char* func, const char* input)
+{
+ char* loc = bprintf("%s:%s", func, input);
+ int known = 0;
+
+ if (calls==nullptr) {
+ calls = xbt_dict_new_homogeneous(nullptr);
+ }
+ try {
+ xbt_dict_get(calls, loc); /* Succeed or throw */
+ known = 1;
+ xbt_free(loc);
+ }
+ catch (xbt_ex& ex) {
+ xbt_free(loc);
+ if (ex.category != not_found_error)
+ throw;
+ }
+ catch(...) {
+ xbt_free(loc);
+ throw;
+ }
+ return known;
+}
+
+void* smpi_shared_get_call(const char* func, const char* input) {
+ char* loc = bprintf("%s:%s", func, input);
+
+ if (calls == nullptr)
+ calls = xbt_dict_new_homogeneous(nullptr);
+ void* data = xbt_dict_get(calls, loc);
+ xbt_free(loc);
+ return data;
+}
+
+void* smpi_shared_set_call(const char* func, const char* input, void* data) {
+ char* loc = bprintf("%s:%s", func, input);
+
+ if (calls == nullptr)
+ calls = xbt_dict_new_homogeneous(nullptr);
+ xbt_dict_set(calls, loc, data, nullptr);
+ xbt_free(loc);
+ return data;
+}
+
src/smpi/instr_smpi.cpp
src/smpi/smpi_bench.cpp
src/smpi/smpi_memory.cpp
+ src/smpi/smpi_shared.cpp
src/smpi/smpi_static_variables.cpp
src/smpi/smpi_coll.cpp
src/smpi/smpi_coll.hpp