From: degomme Date: Tue, 28 Mar 2017 11:55:37 +0000 (+0200) Subject: Split up smpi_bench.cpp . X-Git-Tag: v3.16~420^2 X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/commitdiff_plain/9bf049cea1d0bdabc36c9ffbb69ded21bf462ce4 Split up smpi_bench.cpp . Privatization is sent to smpi_memory.cpp (will be renamed later). smpi_shared* functions are sent to smpi_shared.cpp. I don't know what to do with trace_call_location yet. bench should only be timing related functions, smpi_sample* and sleep stuff. --- diff --git a/src/smpi/private.h b/src/smpi/private.h index 2ddc806da3..2fed1121fe 100644 --- a/src/smpi/private.h +++ b/src/smpi/private.h @@ -112,6 +112,7 @@ XBT_PRIVATE void smpi_destroy_global_memory_segments(); XBT_PRIVATE void smpi_bench_destroy(); XBT_PRIVATE void smpi_bench_begin(); XBT_PRIVATE void smpi_bench_end(); +XBT_PRIVATE void smpi_shared_destroy(); XBT_PRIVATE void* smpi_get_tmp_sendbuffer(int size); XBT_PRIVATE void* smpi_get_tmp_recvbuffer(int size); diff --git a/src/smpi/smpi_bench.cpp b/src/smpi/smpi_bench.cpp index 8a6cdb9294..a284bb180e 100644 --- a/src/smpi/smpi_bench.cpp +++ b/src/smpi/smpi_bench.cpp @@ -36,164 +36,21 @@ #include #endif -#ifndef MAP_ANONYMOUS -#define MAP_ANONYMOUS MAP_ANON -#endif - -#ifndef MAP_POPULATE -#define MAP_POPULATE 0 -#endif - XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_bench, smpi, "Logging specific to SMPI (benchmarking)"); -/* Shared allocations are handled through shared memory segments. - * Associated data and metadata are used as follows: - * - * mmap #1 - * `allocs' dict ---- -. - * ---------- shared_data_t shared_metadata_t / | | | - * .->| | ---> -------------------- <--. ----------------- | | | | - * | ---------- | fd of | | | size of mmap | --| | | | - * | | count (2) | |-- | data | \ | | | - * `----------------- | | | ----------------- ---- | - * -------------------- | ^ | - * | | | - * | | `allocs_metadata' dict | - * | | ---------------------- | - * | `-- | |<-' - * | .-- | |<-. - * | | ---------------------- | - * | | | - * | | | - * | | | - * | | mmap #2 | - * | v ---- -' - * | shared_metadata_t / | | - * | ----------------- | | | - * | | size of mmap | --| | | - * `-- | data | | | | - * ----------------- | | | - * \ | | - * ---- - */ - -#define PTR_STRLEN (2 + 2 * sizeof(void*) + 1) xbt_dict_t samples = nullptr; /* Allocated on first use */ -xbt_dict_t calls = nullptr; /* Allocated on first use */ double smpi_cpu_threshold; double smpi_host_speed; -int smpi_loaded_page = -1; -char* smpi_start_data_exe = nullptr; -int smpi_size_data_exe = 0; -bool smpi_privatize_global_variables; shared_malloc_type smpi_cfg_shared_malloc = shmalloc_global; double smpi_total_benched_time = 0; smpi_privatisation_region_t smpi_privatisation_regions; -namespace { - -/** Some location in the source code - * - * This information is used by SMPI_SHARED_MALLOC to allocate some shared memory for all simulated processes. - */ -class smpi_source_location { -public: - smpi_source_location(const char* filename, int line) - : filename(xbt_strdup(filename)), filename_length(strlen(filename)), line(line) - { - } - - /** Pointer to a static string containing the file name */ - char* filename = nullptr; - int filename_length = 0; - int line = 0; - - bool operator==(smpi_source_location const& that) const - { - return filename_length == that.filename_length && line == that.line && - std::memcmp(filename, that.filename, filename_length) == 0; - } - bool operator!=(smpi_source_location const& that) const { return !(*this == that); } -}; -} - -namespace std { - -template <> class hash { -public: - typedef smpi_source_location argument_type; - typedef std::size_t result_type; - result_type operator()(smpi_source_location const& loc) const - { - return xbt_str_hash_ext(loc.filename, loc.filename_length) ^ - xbt_str_hash_ext((const char*)&loc.line, sizeof(loc.line)); - } -}; -} - -namespace { - -typedef struct { - int fd = -1; - int count = 0; -} shared_data_t; - -std::unordered_map allocs; -typedef std::unordered_map::value_type shared_data_key_type; - -typedef struct { - size_t size; - shared_data_key_type* data; -} shared_metadata_t; - -std::unordered_map allocs_metadata; -} - -static size_t shm_size(int fd) { - struct stat st; - - if(fstat(fd, &st) < 0) { - xbt_die("Could not stat fd %d: %s", fd, strerror(errno)); - } - return static_cast(st.st_size); -} - -#ifndef WIN32 -static void* shm_map(int fd, size_t size, shared_data_key_type* data) { - char loc[PTR_STRLEN]; - shared_metadata_t meta; - - if(size > shm_size(fd) && (ftruncate(fd, static_cast(size)) < 0)) { - xbt_die("Could not truncate fd %d to %zu: %s", fd, size, strerror(errno)); - } - - void* mem = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - if(mem == MAP_FAILED) { - xbt_die( - "Failed to map fd %d with size %zu: %s\n" - "If you are running a lot of ranks, you may be exceeding the amount of mappings allowed per process.\n" - "On Linux systems, change this value with sudo sysctl -w vm.max_map_count=newvalue (default value: 65536)\n" - "Please see http://simgrid.gforge.inria.fr/simgrid/latest/doc/html/options.html#options_virt for more info.", - fd, size, strerror(errno)); - } - snprintf(loc, PTR_STRLEN, "%p", mem); - meta.size = size; - meta.data = data; - allocs_metadata[mem] = meta; - XBT_DEBUG("MMAP %zu to %p", size, mem); - return mem; -} -#endif - void smpi_bench_destroy() { - allocs.clear(); - allocs_metadata.clear(); xbt_dict_free(&samples); - xbt_dict_free(&calls); } extern "C" XBT_PUBLIC(void) smpi_execute_flops_(double *flops); @@ -559,328 +416,6 @@ void smpi_sample_3(int global, const char *file, int line) data->benching = 0; } -#ifndef WIN32 -static int smpi_shared_malloc_bogusfile = -1; -static unsigned long smpi_shared_malloc_blocksize = 1UL << 20; -void *smpi_shared_malloc(size_t size, const char *file, int line) -{ - void* mem; - if (size > 0 && smpi_cfg_shared_malloc == shmalloc_local) { - smpi_source_location loc(file, line); - auto res = allocs.insert(std::make_pair(loc, shared_data_t())); - auto data = res.first; - if (res.second) { - // The insertion did not take place. - // Generate a shared memory name from the address of the shared_data: - char shmname[32]; // cannot be longer than PSHMNAMLEN = 31 on Mac OS X (shm_open raises ENAMETOOLONG otherwise) - snprintf(shmname, 31, "/shmalloc%p", &*data); - int fd = shm_open(shmname, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); - if (fd < 0) { - if (errno == EEXIST) - xbt_die("Please cleanup /dev/shm/%s", shmname); - else - xbt_die("An unhandled error occurred while opening %s. shm_open: %s", shmname, strerror(errno)); - } - data->second.fd = fd; - data->second.count = 1; - mem = shm_map(fd, size, &*data); - if (shm_unlink(shmname) < 0) { - XBT_WARN("Could not early unlink %s. shm_unlink: %s", shmname, strerror(errno)); - } - XBT_DEBUG("Mapping %s at %p through %d", shmname, mem, fd); - } else { - mem = shm_map(data->second.fd, size, &*data); - data->second.count++; - } - XBT_DEBUG("Shared malloc %zu in %p (metadata at %p)", size, mem, &*data); - - } else if (smpi_cfg_shared_malloc == shmalloc_global) { - /* First reserve memory area */ - mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); - - xbt_assert(mem != MAP_FAILED, "Failed to allocate %luMiB of memory. Run \"sysctl vm.overcommit_memory=1\" as root " - "to allow big allocations.\n", - (unsigned long)(size >> 20)); - - /* Create bogus file if not done already */ - if (smpi_shared_malloc_bogusfile == -1) { - /* Create a fd to a new file on disk, make it smpi_shared_malloc_blocksize big, and unlink it. - * It still exists in memory but not in the file system (thus it cannot be leaked). */ - char* name = xbt_strdup("/tmp/simgrid-shmalloc-XXXXXX"); - smpi_shared_malloc_bogusfile = mkstemp(name); - unlink(name); - xbt_free(name); - char* dumb = (char*)calloc(1, smpi_shared_malloc_blocksize); - ssize_t err = write(smpi_shared_malloc_bogusfile, dumb, smpi_shared_malloc_blocksize); - if(err<0) - xbt_die("Could not write bogus file for shared malloc"); - xbt_free(dumb); - } - - /* Map the bogus file in place of the anonymous memory */ - unsigned int i; - for (i = 0; i < size / smpi_shared_malloc_blocksize; i++) { - void* pos = (void*)((unsigned long)mem + i * smpi_shared_malloc_blocksize); - void* res = mmap(pos, smpi_shared_malloc_blocksize, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED | MAP_POPULATE, - smpi_shared_malloc_bogusfile, 0); - xbt_assert(res == pos, "Could not map folded virtual memory (%s). Do you perhaps need to increase the " - "STARPU_MALLOC_SIMULATION_FOLD environment variable or the sysctl vm.max_map_count?", - strerror(errno)); - } - if (size % smpi_shared_malloc_blocksize) { - void* pos = (void*)((unsigned long)mem + i * smpi_shared_malloc_blocksize); - void* res = mmap(pos, size % smpi_shared_malloc_blocksize, PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_SHARED | MAP_POPULATE, smpi_shared_malloc_bogusfile, 0); - xbt_assert(res == pos, "Could not map folded virtual memory (%s). Do you perhaps need to increase the " - "STARPU_MALLOC_SIMULATION_FOLD environment variable or the sysctl vm.max_map_count?", - strerror(errno)); - } - - shared_metadata_t newmeta; - //register metadata for memcpy avoidance - shared_data_key_type* data = (shared_data_key_type*)xbt_malloc(sizeof(shared_data_key_type)); - data->second.fd = -1; - data->second.count = 1; - newmeta.size = size; - newmeta.data = data; - allocs_metadata[mem] = newmeta; - } else { - mem = xbt_malloc(size); - XBT_DEBUG("Classic malloc %zu in %p", size, mem); - } - - return mem; -} - -int smpi_is_shared(void*ptr){ - if ( smpi_cfg_shared_malloc == shmalloc_local || smpi_cfg_shared_malloc == shmalloc_global) { - if (allocs_metadata.count(ptr) != 0) - return 1; - for(auto it : allocs_metadata){ - if (ptr >= it.first && ptr < (char*)it.first + it.second.size) - return 1; - } - return 0; - } else { - return 0; - } -} - -void smpi_shared_free(void *ptr) -{ - if (smpi_cfg_shared_malloc == shmalloc_local) { - char loc[PTR_STRLEN]; - snprintf(loc, PTR_STRLEN, "%p", ptr); - auto meta = allocs_metadata.find(ptr); - if (meta == allocs_metadata.end()) { - XBT_WARN("Cannot free: %p was not shared-allocated by SMPI - maybe its size was 0?", ptr); - return; - } - shared_data_t* data = &meta->second.data->second; - if (munmap(ptr, meta->second.size) < 0) { - XBT_WARN("Unmapping of fd %d failed: %s", data->fd, strerror(errno)); - } - data->count--; - if (data->count <= 0) { - close(data->fd); - allocs.erase(allocs.find(meta->second.data->first)); - allocs_metadata.erase(ptr); - XBT_DEBUG("Shared free - with removal - of %p", ptr); - } else { - XBT_DEBUG("Shared free - no removal - of %p, count = %d", ptr, data->count); - } - - } else if (smpi_cfg_shared_malloc == shmalloc_global) { - auto meta = allocs_metadata.find(ptr); - if (meta != allocs_metadata.end()){ - meta->second.data->second.count--; - if(meta->second.data->second.count==0) - xbt_free(meta->second.data); - } - - munmap(ptr, 0); // the POSIX says that I should not give 0 as a length, but it seems to work OK - } else { - XBT_DEBUG("Classic free of %p", ptr); - xbt_free(ptr); - } -} -#endif - -int smpi_shared_known_call(const char* func, const char* input) -{ - char* loc = bprintf("%s:%s", func, input); - int known = 0; - - if (calls==nullptr) { - calls = xbt_dict_new_homogeneous(nullptr); - } - try { - xbt_dict_get(calls, loc); /* Succeed or throw */ - known = 1; - xbt_free(loc); - } - catch (xbt_ex& ex) { - xbt_free(loc); - if (ex.category != not_found_error) - throw; - } - catch(...) { - xbt_free(loc); - throw; - } - return known; -} - -void* smpi_shared_get_call(const char* func, const char* input) { - char* loc = bprintf("%s:%s", func, input); - - if (calls == nullptr) - calls = xbt_dict_new_homogeneous(nullptr); - void* data = xbt_dict_get(calls, loc); - xbt_free(loc); - return data; -} - -void* smpi_shared_set_call(const char* func, const char* input, void* data) { - char* loc = bprintf("%s:%s", func, input); - - if (calls == nullptr) - calls = xbt_dict_new_homogeneous(nullptr); - xbt_dict_set(calls, loc, data, nullptr); - xbt_free(loc); - return data; -} - - -/** Map a given SMPI privatization segment (make a SMPI process active) */ -void smpi_switch_data_segment(int dest) { - if (smpi_loaded_page == dest)//no need to switch, we've already loaded the one we want - return; - - // So the job: - smpi_really_switch_data_segment(dest); -} - -/** Map a given SMPI privatization segment (make a SMPI process active) even if SMPI thinks it is already active - * - * When doing a state restoration, the state of the restored variables might not be consistent with the state of the - * virtual memory. In this case, we to change the data segment. - */ -void smpi_really_switch_data_segment(int dest) -{ - if(smpi_size_data_exe == 0)//no need to switch - return; - -#if HAVE_PRIVATIZATION - if(smpi_loaded_page==-1){//initial switch, do the copy from the real page here - for (int i=0; i< smpi_process_count(); i++){ - memcpy(smpi_privatisation_regions[i].address, TOPAGE(smpi_start_data_exe), smpi_size_data_exe); - } - } - - // FIXME, cross-process support (mmap across process when necessary) - int current = smpi_privatisation_regions[dest].file_descriptor; - XBT_DEBUG("Switching data frame to the one of process %d", dest); - void* tmp = - mmap(TOPAGE(smpi_start_data_exe), smpi_size_data_exe, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED, current, 0); - if (tmp != TOPAGE(smpi_start_data_exe)) - xbt_die("Couldn't map the new region"); - smpi_loaded_page = dest; -#endif -} - -int smpi_is_privatisation_file(char* file) -{ - return strncmp("/dev/shm/my-buffer-", file, std::strlen("/dev/shm/my-buffer-")) == 0; -} - -void smpi_initialize_global_memory_segments() -{ - -#if !HAVE_PRIVATIZATION - smpi_privatize_global_variables=false; - xbt_die("You are trying to use privatization on a system that does not support it. Don't."); - return; -#else - - smpi_get_executable_global_size(); - - XBT_DEBUG ("bss+data segment found : size %d starting at %p", smpi_size_data_exe, smpi_start_data_exe ); - - if (smpi_size_data_exe == 0){//no need to switch - smpi_privatize_global_variables=false; - return; - } - - smpi_privatisation_regions = static_cast( - xbt_malloc(smpi_process_count() * sizeof(struct s_smpi_privatisation_region))); - - for (int i=0; i< smpi_process_count(); i++){ - // create SIMIX_process_count() mappings of this size with the same data inside - int file_descriptor; - void* address = nullptr; - char path[24]; - int status; - - do { - snprintf(path, sizeof(path), "/smpi-buffer-%06x", rand() % 0xffffff); - file_descriptor = shm_open(path, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR); - } while (file_descriptor == -1 && errno == EEXIST); - if (file_descriptor < 0) { - if (errno == EMFILE) { - xbt_die("Impossible to create temporary file for memory mapping: %s\n\ -The open() system call failed with the EMFILE error code (too many files). \n\n\ -This means that you reached the system limits concerning the amount of files per process. \ -This is not a surprise if you are trying to virtualize many processes on top of SMPI. \ -Don't panic -- you should simply increase your system limits and try again. \n\n\ -First, check what your limits are:\n\ - cat /proc/sys/fs/file-max # Gives you the system-wide limit\n\ - ulimit -Hn # Gives you the per process hard limit\n\ - ulimit -Sn # Gives you the per process soft limit\n\ - cat /proc/self/limits # Displays any per-process limitation (including the one given above)\n\n\ -If one of these values is less than the amount of MPI processes that you try to run, then you got the explanation of this error. \ -Ask the Internet about tutorials on how to increase the files limit such as: https://rtcamp.com/tutorials/linux/increase-open-files-limit/", - strerror(errno)); - } - xbt_die("Impossible to create temporary file for memory mapping: %s", strerror(errno)); - } - - status = ftruncate(file_descriptor, smpi_size_data_exe); - if (status) - xbt_die("Impossible to set the size of the temporary file for memory mapping"); - - /* Ask for a free region */ - address = mmap(nullptr, smpi_size_data_exe, PROT_READ | PROT_WRITE, MAP_SHARED, file_descriptor, 0); - if (address == MAP_FAILED) - xbt_die("Couldn't find a free region for memory mapping"); - - status = shm_unlink(path); - if (status) - xbt_die("Impossible to unlink temporary file for memory mapping"); - - // initialize the values - memcpy(address, TOPAGE(smpi_start_data_exe), smpi_size_data_exe); - - // store the address of the mapping for further switches - smpi_privatisation_regions[i].file_descriptor = file_descriptor; - smpi_privatisation_regions[i].address = address; - } -#endif -} - -void smpi_destroy_global_memory_segments(){ - if (smpi_size_data_exe == 0)//no need to switch - return; -#if HAVE_PRIVATIZATION - for (int i=0; i< smpi_process_count(); i++) { - if (munmap(smpi_privatisation_regions[i].address, smpi_size_data_exe) < 0) - XBT_WARN("Unmapping of fd %d failed: %s", smpi_privatisation_regions[i].file_descriptor, strerror(errno)); - close(smpi_privatisation_regions[i].file_descriptor); - } - xbt_free(smpi_privatisation_regions); -#endif -} - extern "C" { /** These functions will be called from the user code **/ smpi_trace_call_location_t* smpi_trace_get_call_location() { return smpi_process()->call_location(); diff --git a/src/smpi/smpi_global.cpp b/src/smpi/smpi_global.cpp index e66b8351ef..7998f94e24 100644 --- a/src/smpi/smpi_global.cpp +++ b/src/smpi/smpi_global.cpp @@ -304,6 +304,7 @@ void smpi_global_destroy() int count = smpi_process_count(); smpi_bench_destroy(); + smpi_shared_destroy(); if (MPI_COMM_WORLD != MPI_COMM_UNINITIALIZED){ delete MPI_COMM_WORLD->group(); MSG_barrier_destroy(process_data[0]->finalization_barrier()); diff --git a/src/smpi/smpi_memory.cpp b/src/smpi/smpi_memory.cpp index 934885afa7..9296d975c8 100644 --- a/src/smpi/smpi_memory.cpp +++ b/src/smpi/smpi_memory.cpp @@ -11,6 +11,10 @@ #include #include +#include +#include +#include "simgrid/sg_config.h" +#include #ifndef WIN32 #include @@ -23,6 +27,11 @@ XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_memory, smpi, "Memory layout support for SMPI"); +int smpi_loaded_page = -1; +char* smpi_start_data_exe = nullptr; +int smpi_size_data_exe = 0; +bool smpi_privatize_global_variables; + static const int PROT_RWX = (PROT_READ | PROT_WRITE | PROT_EXEC); static const int PROT_RW = (PROT_READ | PROT_WRITE ); XBT_ATTRIB_UNUSED static const int PROT_RX = (PROT_READ | PROT_EXEC ); @@ -57,3 +66,134 @@ void smpi_get_executable_global_size() xbt_die("Did not find my data segment."); } #endif + + +/** Map a given SMPI privatization segment (make a SMPI process active) */ +void smpi_switch_data_segment(int dest) { + if (smpi_loaded_page == dest)//no need to switch, we've already loaded the one we want + return; + + // So the job: + smpi_really_switch_data_segment(dest); +} + +/** Map a given SMPI privatization segment (make a SMPI process active) even if SMPI thinks it is already active + * + * When doing a state restoration, the state of the restored variables might not be consistent with the state of the + * virtual memory. In this case, we to change the data segment. + */ +void smpi_really_switch_data_segment(int dest) +{ + if(smpi_size_data_exe == 0)//no need to switch + return; + +#if HAVE_PRIVATIZATION + if(smpi_loaded_page==-1){//initial switch, do the copy from the real page here + for (int i=0; i< smpi_process_count(); i++){ + memcpy(smpi_privatisation_regions[i].address, TOPAGE(smpi_start_data_exe), smpi_size_data_exe); + } + } + + // FIXME, cross-process support (mmap across process when necessary) + int current = smpi_privatisation_regions[dest].file_descriptor; + XBT_DEBUG("Switching data frame to the one of process %d", dest); + void* tmp = + mmap(TOPAGE(smpi_start_data_exe), smpi_size_data_exe, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED, current, 0); + if (tmp != TOPAGE(smpi_start_data_exe)) + xbt_die("Couldn't map the new region"); + smpi_loaded_page = dest; +#endif +} + +int smpi_is_privatisation_file(char* file) +{ + return strncmp("/dev/shm/my-buffer-", file, std::strlen("/dev/shm/my-buffer-")) == 0; +} + +void smpi_initialize_global_memory_segments() +{ + +#if !HAVE_PRIVATIZATION + smpi_privatize_global_variables=false; + xbt_die("You are trying to use privatization on a system that does not support it. Don't."); + return; +#else + + smpi_get_executable_global_size(); + + XBT_DEBUG ("bss+data segment found : size %d starting at %p", smpi_size_data_exe, smpi_start_data_exe ); + + if (smpi_size_data_exe == 0){//no need to switch + smpi_privatize_global_variables=false; + return; + } + + smpi_privatisation_regions = static_cast( + xbt_malloc(smpi_process_count() * sizeof(struct s_smpi_privatisation_region))); + + for (int i=0; i< smpi_process_count(); i++){ + // create SIMIX_process_count() mappings of this size with the same data inside + int file_descriptor; + void* address = nullptr; + char path[24]; + int status; + + do { + snprintf(path, sizeof(path), "/smpi-buffer-%06x", rand() % 0xffffff); + file_descriptor = shm_open(path, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR); + } while (file_descriptor == -1 && errno == EEXIST); + if (file_descriptor < 0) { + if (errno == EMFILE) { + xbt_die("Impossible to create temporary file for memory mapping: %s\n\ +The open() system call failed with the EMFILE error code (too many files). \n\n\ +This means that you reached the system limits concerning the amount of files per process. \ +This is not a surprise if you are trying to virtualize many processes on top of SMPI. \ +Don't panic -- you should simply increase your system limits and try again. \n\n\ +First, check what your limits are:\n\ + cat /proc/sys/fs/file-max # Gives you the system-wide limit\n\ + ulimit -Hn # Gives you the per process hard limit\n\ + ulimit -Sn # Gives you the per process soft limit\n\ + cat /proc/self/limits # Displays any per-process limitation (including the one given above)\n\n\ +If one of these values is less than the amount of MPI processes that you try to run, then you got the explanation of this error. \ +Ask the Internet about tutorials on how to increase the files limit such as: https://rtcamp.com/tutorials/linux/increase-open-files-limit/", + strerror(errno)); + } + xbt_die("Impossible to create temporary file for memory mapping: %s", strerror(errno)); + } + + status = ftruncate(file_descriptor, smpi_size_data_exe); + if (status) + xbt_die("Impossible to set the size of the temporary file for memory mapping"); + + /* Ask for a free region */ + address = mmap(nullptr, smpi_size_data_exe, PROT_READ | PROT_WRITE, MAP_SHARED, file_descriptor, 0); + if (address == MAP_FAILED) + xbt_die("Couldn't find a free region for memory mapping"); + + status = shm_unlink(path); + if (status) + xbt_die("Impossible to unlink temporary file for memory mapping"); + + // initialize the values + memcpy(address, TOPAGE(smpi_start_data_exe), smpi_size_data_exe); + + // store the address of the mapping for further switches + smpi_privatisation_regions[i].file_descriptor = file_descriptor; + smpi_privatisation_regions[i].address = address; + } +#endif +} + +void smpi_destroy_global_memory_segments(){ + if (smpi_size_data_exe == 0)//no need to switch + return; +#if HAVE_PRIVATIZATION + for (int i=0; i< smpi_process_count(); i++) { + if (munmap(smpi_privatisation_regions[i].address, smpi_size_data_exe) < 0) + XBT_WARN("Unmapping of fd %d failed: %s", smpi_privatisation_regions[i].file_descriptor, strerror(errno)); + close(smpi_privatisation_regions[i].file_descriptor); + } + xbt_free(smpi_privatisation_regions); +#endif +} + diff --git a/src/smpi/smpi_shared.cpp b/src/smpi/smpi_shared.cpp new file mode 100644 index 0000000000..e0aa0d1b35 --- /dev/null +++ b/src/smpi/smpi_shared.cpp @@ -0,0 +1,372 @@ +/* Copyright (c) 2007, 2009-2017. The SimGrid Team. All rights reserved. */ + +/* This program is free software; you can redistribute it and/or modify it + * under the terms of the license (GNU LGPL) which comes with this package. */ + +/* Shared allocations are handled through shared memory segments. + * Associated data and metadata are used as follows: + * + * mmap #1 + * `allocs' dict ---- -. + * ---------- shared_data_t shared_metadata_t / | | | + * .->| | ---> -------------------- <--. ----------------- | | | | + * | ---------- | fd of | | | size of mmap | --| | | | + * | | count (2) | |-- | data | \ | | | + * `----------------- | | | ----------------- ---- | + * -------------------- | ^ | + * | | | + * | | `allocs_metadata' dict | + * | | ---------------------- | + * | `-- | |<-' + * | .-- | |<-. + * | | ---------------------- | + * | | | + * | | | + * | | | + * | | mmap #2 | + * | v ---- -' + * | shared_metadata_t / | | + * | ----------------- | | | + * | | size of mmap | --| | | + * `-- | data | | | | + * ----------------- | | | + * \ | | + * ---- + */ +#include + +#include +#include + +#include "src/internal_config.h" +#include "private.h" +#include "private.hpp" +#include +#include "xbt/dict.h" +//#include "xbt/sysdep.h" +//#include "xbt/ex.h" +#include "surf/surf.h" +#include "simgrid/sg_config.h" +//#include "simgrid/modelchecker.h" +//#include "src/mc/mc_replay.h" + +#include +#ifndef WIN32 +#include +#endif +#include +#include +#include +//#include // sqrt +#include +#include +#include + +#ifndef MAP_ANONYMOUS +#define MAP_ANONYMOUS MAP_ANON +#endif + +#ifndef MAP_POPULATE +#define MAP_POPULATE 0 +#endif + +XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_shared, smpi, "Logging specific to SMPI (shared memory macros)"); + +#define PTR_STRLEN (2 + 2 * sizeof(void*) + 1) + +namespace{ +/** Some location in the source code + * + * This information is used by SMPI_SHARED_MALLOC to allocate some shared memory for all simulated processes. + */ + +class smpi_source_location { +public: + smpi_source_location(const char* filename, int line) + : filename(xbt_strdup(filename)), filename_length(strlen(filename)), line(line) + { + } + + /** Pointer to a static string containing the file name */ + char* filename = nullptr; + int filename_length = 0; + int line = 0; + + bool operator==(smpi_source_location const& that) const + { + return filename_length == that.filename_length && line == that.line && + std::memcmp(filename, that.filename, filename_length) == 0; + } + bool operator!=(smpi_source_location const& that) const { return !(*this == that); } +}; +} + +namespace std { + +template <> class hash { +public: + typedef smpi_source_location argument_type; + typedef std::size_t result_type; + result_type operator()(smpi_source_location const& loc) const + { + return xbt_str_hash_ext(loc.filename, loc.filename_length) ^ + xbt_str_hash_ext((const char*)&loc.line, sizeof(loc.line)); + } +}; +} + +namespace{ + +typedef struct { + int fd = -1; + int count = 0; +} shared_data_t; + +std::unordered_map allocs; +typedef std::unordered_map::value_type shared_data_key_type; + +typedef struct { + size_t size; + shared_data_key_type* data; +} shared_metadata_t; + +std::unordered_map allocs_metadata; +xbt_dict_t calls = nullptr; /* Allocated on first use */ +#ifndef WIN32 +static int smpi_shared_malloc_bogusfile = -1; +static unsigned long smpi_shared_malloc_blocksize = 1UL << 20; +#endif +} + + +void smpi_shared_destroy() +{ + allocs.clear(); + allocs_metadata.clear(); + xbt_dict_free(&calls); +} + +static size_t shm_size(int fd) { + struct stat st; + + if(fstat(fd, &st) < 0) { + xbt_die("Could not stat fd %d: %s", fd, strerror(errno)); + } + return static_cast(st.st_size); +} + +#ifndef WIN32 +static void* shm_map(int fd, size_t size, shared_data_key_type* data) { + char loc[PTR_STRLEN]; + shared_metadata_t meta; + + if(size > shm_size(fd) && (ftruncate(fd, static_cast(size)) < 0)) { + xbt_die("Could not truncate fd %d to %zu: %s", fd, size, strerror(errno)); + } + + void* mem = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if(mem == MAP_FAILED) { + xbt_die( + "Failed to map fd %d with size %zu: %s\n" + "If you are running a lot of ranks, you may be exceeding the amount of mappings allowed per process.\n" + "On Linux systems, change this value with sudo sysctl -w vm.max_map_count=newvalue (default value: 65536)\n" + "Please see http://simgrid.gforge.inria.fr/simgrid/latest/doc/html/options.html#options_virt for more info.", + fd, size, strerror(errno)); + } + snprintf(loc, PTR_STRLEN, "%p", mem); + meta.size = size; + meta.data = data; + allocs_metadata[mem] = meta; + XBT_DEBUG("MMAP %zu to %p", size, mem); + return mem; +} + +void *smpi_shared_malloc(size_t size, const char *file, int line) +{ + void* mem; + if (size > 0 && smpi_cfg_shared_malloc == shmalloc_local) { + smpi_source_location loc(file, line); + auto res = allocs.insert(std::make_pair(loc, shared_data_t())); + auto data = res.first; + if (res.second) { + // The insertion did not take place. + // Generate a shared memory name from the address of the shared_data: + char shmname[32]; // cannot be longer than PSHMNAMLEN = 31 on Mac OS X (shm_open raises ENAMETOOLONG otherwise) + snprintf(shmname, 31, "/shmalloc%p", &*data); + int fd = shm_open(shmname, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); + if (fd < 0) { + if (errno == EEXIST) + xbt_die("Please cleanup /dev/shm/%s", shmname); + else + xbt_die("An unhandled error occurred while opening %s. shm_open: %s", shmname, strerror(errno)); + } + data->second.fd = fd; + data->second.count = 1; + mem = shm_map(fd, size, &*data); + if (shm_unlink(shmname) < 0) { + XBT_WARN("Could not early unlink %s. shm_unlink: %s", shmname, strerror(errno)); + } + XBT_DEBUG("Mapping %s at %p through %d", shmname, mem, fd); + } else { + mem = shm_map(data->second.fd, size, &*data); + data->second.count++; + } + XBT_DEBUG("Shared malloc %zu in %p (metadata at %p)", size, mem, &*data); + + } else if (smpi_cfg_shared_malloc == shmalloc_global) { + /* First reserve memory area */ + mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + + xbt_assert(mem != MAP_FAILED, "Failed to allocate %luMiB of memory. Run \"sysctl vm.overcommit_memory=1\" as root " + "to allow big allocations.\n", + (unsigned long)(size >> 20)); + + /* Create bogus file if not done already */ + if (smpi_shared_malloc_bogusfile == -1) { + /* Create a fd to a new file on disk, make it smpi_shared_malloc_blocksize big, and unlink it. + * It still exists in memory but not in the file system (thus it cannot be leaked). */ + char* name = xbt_strdup("/tmp/simgrid-shmalloc-XXXXXX"); + smpi_shared_malloc_bogusfile = mkstemp(name); + unlink(name); + xbt_free(name); + char* dumb = (char*)calloc(1, smpi_shared_malloc_blocksize); + ssize_t err = write(smpi_shared_malloc_bogusfile, dumb, smpi_shared_malloc_blocksize); + if(err<0) + xbt_die("Could not write bogus file for shared malloc"); + xbt_free(dumb); + } + + /* Map the bogus file in place of the anonymous memory */ + unsigned int i; + for (i = 0; i < size / smpi_shared_malloc_blocksize; i++) { + void* pos = (void*)((unsigned long)mem + i * smpi_shared_malloc_blocksize); + void* res = mmap(pos, smpi_shared_malloc_blocksize, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED | MAP_POPULATE, + smpi_shared_malloc_bogusfile, 0); + xbt_assert(res == pos, "Could not map folded virtual memory (%s). Do you perhaps need to increase the " + "STARPU_MALLOC_SIMULATION_FOLD environment variable or the sysctl vm.max_map_count?", + strerror(errno)); + } + if (size % smpi_shared_malloc_blocksize) { + void* pos = (void*)((unsigned long)mem + i * smpi_shared_malloc_blocksize); + void* res = mmap(pos, size % smpi_shared_malloc_blocksize, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_SHARED | MAP_POPULATE, smpi_shared_malloc_bogusfile, 0); + xbt_assert(res == pos, "Could not map folded virtual memory (%s). Do you perhaps need to increase the " + "STARPU_MALLOC_SIMULATION_FOLD environment variable or the sysctl vm.max_map_count?", + strerror(errno)); + } + + shared_metadata_t newmeta; + //register metadata for memcpy avoidance + shared_data_key_type* data = (shared_data_key_type*)xbt_malloc(sizeof(shared_data_key_type)); + data->second.fd = -1; + data->second.count = 1; + newmeta.size = size; + newmeta.data = data; + allocs_metadata[mem] = newmeta; + } else { + mem = xbt_malloc(size); + XBT_DEBUG("Classic malloc %zu in %p", size, mem); + } + + return mem; +} + +int smpi_is_shared(void*ptr){ + if ( smpi_cfg_shared_malloc == shmalloc_local || smpi_cfg_shared_malloc == shmalloc_global) { + if (allocs_metadata.count(ptr) != 0) + return 1; + for(auto it : allocs_metadata){ + if (ptr >= it.first && ptr < (char*)it.first + it.second.size) + return 1; + } + return 0; + } else { + return 0; + } +} + +void smpi_shared_free(void *ptr) +{ + if (smpi_cfg_shared_malloc == shmalloc_local) { + char loc[PTR_STRLEN]; + snprintf(loc, PTR_STRLEN, "%p", ptr); + auto meta = allocs_metadata.find(ptr); + if (meta == allocs_metadata.end()) { + XBT_WARN("Cannot free: %p was not shared-allocated by SMPI - maybe its size was 0?", ptr); + return; + } + shared_data_t* data = &meta->second.data->second; + if (munmap(ptr, meta->second.size) < 0) { + XBT_WARN("Unmapping of fd %d failed: %s", data->fd, strerror(errno)); + } + data->count--; + if (data->count <= 0) { + close(data->fd); + allocs.erase(allocs.find(meta->second.data->first)); + allocs_metadata.erase(ptr); + XBT_DEBUG("Shared free - with removal - of %p", ptr); + } else { + XBT_DEBUG("Shared free - no removal - of %p, count = %d", ptr, data->count); + } + + } else if (smpi_cfg_shared_malloc == shmalloc_global) { + auto meta = allocs_metadata.find(ptr); + if (meta != allocs_metadata.end()){ + meta->second.data->second.count--; + if(meta->second.data->second.count==0) + xbt_free(meta->second.data); + } + + munmap(ptr, 0); // the POSIX says that I should not give 0 as a length, but it seems to work OK + } else { + XBT_DEBUG("Classic free of %p", ptr); + xbt_free(ptr); + } +} +#endif + +int smpi_shared_known_call(const char* func, const char* input) +{ + char* loc = bprintf("%s:%s", func, input); + int known = 0; + + if (calls==nullptr) { + calls = xbt_dict_new_homogeneous(nullptr); + } + try { + xbt_dict_get(calls, loc); /* Succeed or throw */ + known = 1; + xbt_free(loc); + } + catch (xbt_ex& ex) { + xbt_free(loc); + if (ex.category != not_found_error) + throw; + } + catch(...) { + xbt_free(loc); + throw; + } + return known; +} + +void* smpi_shared_get_call(const char* func, const char* input) { + char* loc = bprintf("%s:%s", func, input); + + if (calls == nullptr) + calls = xbt_dict_new_homogeneous(nullptr); + void* data = xbt_dict_get(calls, loc); + xbt_free(loc); + return data; +} + +void* smpi_shared_set_call(const char* func, const char* input, void* data) { + char* loc = bprintf("%s:%s", func, input); + + if (calls == nullptr) + calls = xbt_dict_new_homogeneous(nullptr); + xbt_dict_set(calls, loc, data, nullptr); + xbt_free(loc); + return data; +} + diff --git a/tools/cmake/DefinePackages.cmake b/tools/cmake/DefinePackages.cmake index e7ab091d90..b4a3261869 100644 --- a/tools/cmake/DefinePackages.cmake +++ b/tools/cmake/DefinePackages.cmake @@ -207,6 +207,7 @@ set(SMPI_SRC src/smpi/instr_smpi.cpp src/smpi/smpi_bench.cpp src/smpi/smpi_memory.cpp + src/smpi/smpi_shared.cpp src/smpi/smpi_static_variables.cpp src/smpi/smpi_coll.cpp src/smpi/smpi_coll.hpp