#ifndef WIN32
#include <sys/mman.h>
#endif
-#include <sys/stat.h>
-#include <errno.h>
-#include <fcntl.h>
#include <math.h> // sqrt
-#include <unistd.h>
-#include <string.h>
#include <stdio.h>
#if HAVE_PAPI
#include <papi.h>
#endif
-#ifndef MAP_ANONYMOUS
-#define MAP_ANONYMOUS MAP_ANON
-#endif
-
-#ifndef MAP_POPULATE
-#define MAP_POPULATE 0
-#endif
-
XBT_LOG_NEW_DEFAULT_SUBCATEGORY(smpi_bench, smpi, "Logging specific to SMPI (benchmarking)");
-/* Shared allocations are handled through shared memory segments.
- * Associated data and metadata are used as follows:
- *
- * mmap #1
- * `allocs' dict ---- -.
- * ---------- shared_data_t shared_metadata_t / | | |
- * .->| <name> | ---> -------------------- <--. ----------------- | | | |
- * | ---------- | fd of <name> | | | size of mmap | --| | | |
- * | | count (2) | |-- | data | \ | | |
- * `----------------- | <name> | | ----------------- ---- |
- * -------------------- | ^ |
- * | | |
- * | | `allocs_metadata' dict |
- * | | ---------------------- |
- * | `-- | <addr of mmap #1> |<-'
- * | .-- | <addr of mmap #2> |<-.
- * | | ---------------------- |
- * | | |
- * | | |
- * | | |
- * | | mmap #2 |
- * | v ---- -'
- * | shared_metadata_t / | |
- * | ----------------- | | |
- * | | size of mmap | --| | |
- * `-- | data | | | |
- * ----------------- | | |
- * \ | |
- * ----
- */
-
-#define PTR_STRLEN (2 + 2 * sizeof(void*) + 1)
xbt_dict_t samples = nullptr; /* Allocated on first use */
-xbt_dict_t calls = nullptr; /* Allocated on first use */
double smpi_cpu_threshold;
double smpi_host_speed;
-int smpi_loaded_page = -1;
-char* smpi_start_data_exe = nullptr;
-int smpi_size_data_exe = 0;
-bool smpi_privatize_global_variables;
shared_malloc_type smpi_cfg_shared_malloc = shmalloc_global;
double smpi_total_benched_time = 0;
smpi_privatisation_region_t smpi_privatisation_regions;
-namespace {
-
-/** Some location in the source code
- *
- * This information is used by SMPI_SHARED_MALLOC to allocate some shared memory for all simulated processes.
- */
-class smpi_source_location {
-public:
- smpi_source_location(const char* filename, int line)
- : filename(xbt_strdup(filename)), filename_length(strlen(filename)), line(line)
- {
- }
-
- /** Pointer to a static string containing the file name */
- char* filename = nullptr;
- int filename_length = 0;
- int line = 0;
-
- bool operator==(smpi_source_location const& that) const
- {
- return filename_length == that.filename_length && line == that.line &&
- std::memcmp(filename, that.filename, filename_length) == 0;
- }
- bool operator!=(smpi_source_location const& that) const { return !(*this == that); }
-};
-}
-
-namespace std {
-
-template <> class hash<smpi_source_location> {
-public:
- typedef smpi_source_location argument_type;
- typedef std::size_t result_type;
- result_type operator()(smpi_source_location const& loc) const
- {
- return xbt_str_hash_ext(loc.filename, loc.filename_length) ^
- xbt_str_hash_ext((const char*)&loc.line, sizeof(loc.line));
- }
-};
-}
-
-namespace {
-
-typedef struct {
- int fd = -1;
- int count = 0;
-} shared_data_t;
-
-std::unordered_map<smpi_source_location, shared_data_t> allocs;
-typedef std::unordered_map<smpi_source_location, shared_data_t>::value_type shared_data_key_type;
-
-typedef struct {
- size_t size;
- shared_data_key_type* data;
-} shared_metadata_t;
-
-std::unordered_map<void*, shared_metadata_t> allocs_metadata;
-}
-
-static size_t shm_size(int fd) {
- struct stat st;
-
- if(fstat(fd, &st) < 0) {
- xbt_die("Could not stat fd %d: %s", fd, strerror(errno));
- }
- return static_cast<size_t>(st.st_size);
-}
-
-#ifndef WIN32
-static void* shm_map(int fd, size_t size, shared_data_key_type* data) {
- char loc[PTR_STRLEN];
- shared_metadata_t meta;
-
- if(size > shm_size(fd) && (ftruncate(fd, static_cast<off_t>(size)) < 0)) {
- xbt_die("Could not truncate fd %d to %zu: %s", fd, size, strerror(errno));
- }
-
- void* mem = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
- if(mem == MAP_FAILED) {
- xbt_die(
- "Failed to map fd %d with size %zu: %s\n"
- "If you are running a lot of ranks, you may be exceeding the amount of mappings allowed per process.\n"
- "On Linux systems, change this value with sudo sysctl -w vm.max_map_count=newvalue (default value: 65536)\n"
- "Please see http://simgrid.gforge.inria.fr/simgrid/latest/doc/html/options.html#options_virt for more info.",
- fd, size, strerror(errno));
- }
- snprintf(loc, PTR_STRLEN, "%p", mem);
- meta.size = size;
- meta.data = data;
- allocs_metadata[mem] = meta;
- XBT_DEBUG("MMAP %zu to %p", size, mem);
- return mem;
-}
-#endif
-
void smpi_bench_destroy()
{
- allocs.clear();
- allocs_metadata.clear();
xbt_dict_free(&samples);
- xbt_dict_free(&calls);
}
extern "C" XBT_PUBLIC(void) smpi_execute_flops_(double *flops);
data->benching = 0;
}
-#ifndef WIN32
-static int smpi_shared_malloc_bogusfile = -1;
-static unsigned long smpi_shared_malloc_blocksize = 1UL << 20;
-void *smpi_shared_malloc(size_t size, const char *file, int line)
-{
- void* mem;
- if (size > 0 && smpi_cfg_shared_malloc == shmalloc_local) {
- smpi_source_location loc(file, line);
- auto res = allocs.insert(std::make_pair(loc, shared_data_t()));
- auto data = res.first;
- if (res.second) {
- // The insertion did not take place.
- // Generate a shared memory name from the address of the shared_data:
- char shmname[32]; // cannot be longer than PSHMNAMLEN = 31 on Mac OS X (shm_open raises ENAMETOOLONG otherwise)
- snprintf(shmname, 31, "/shmalloc%p", &*data);
- int fd = shm_open(shmname, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
- if (fd < 0) {
- if (errno == EEXIST)
- xbt_die("Please cleanup /dev/shm/%s", shmname);
- else
- xbt_die("An unhandled error occurred while opening %s. shm_open: %s", shmname, strerror(errno));
- }
- data->second.fd = fd;
- data->second.count = 1;
- mem = shm_map(fd, size, &*data);
- if (shm_unlink(shmname) < 0) {
- XBT_WARN("Could not early unlink %s. shm_unlink: %s", shmname, strerror(errno));
- }
- XBT_DEBUG("Mapping %s at %p through %d", shmname, mem, fd);
- } else {
- mem = shm_map(data->second.fd, size, &*data);
- data->second.count++;
- }
- XBT_DEBUG("Shared malloc %zu in %p (metadata at %p)", size, mem, &*data);
-
- } else if (smpi_cfg_shared_malloc == shmalloc_global) {
- /* First reserve memory area */
- mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
-
- xbt_assert(mem != MAP_FAILED, "Failed to allocate %luMiB of memory. Run \"sysctl vm.overcommit_memory=1\" as root "
- "to allow big allocations.\n",
- (unsigned long)(size >> 20));
-
- /* Create bogus file if not done already */
- if (smpi_shared_malloc_bogusfile == -1) {
- /* Create a fd to a new file on disk, make it smpi_shared_malloc_blocksize big, and unlink it.
- * It still exists in memory but not in the file system (thus it cannot be leaked). */
- char* name = xbt_strdup("/tmp/simgrid-shmalloc-XXXXXX");
- smpi_shared_malloc_bogusfile = mkstemp(name);
- unlink(name);
- xbt_free(name);
- char* dumb = (char*)calloc(1, smpi_shared_malloc_blocksize);
- ssize_t err = write(smpi_shared_malloc_bogusfile, dumb, smpi_shared_malloc_blocksize);
- if(err<0)
- xbt_die("Could not write bogus file for shared malloc");
- xbt_free(dumb);
- }
-
- /* Map the bogus file in place of the anonymous memory */
- unsigned int i;
- for (i = 0; i < size / smpi_shared_malloc_blocksize; i++) {
- void* pos = (void*)((unsigned long)mem + i * smpi_shared_malloc_blocksize);
- void* res = mmap(pos, smpi_shared_malloc_blocksize, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED | MAP_POPULATE,
- smpi_shared_malloc_bogusfile, 0);
- xbt_assert(res == pos, "Could not map folded virtual memory (%s). Do you perhaps need to increase the "
- "STARPU_MALLOC_SIMULATION_FOLD environment variable or the sysctl vm.max_map_count?",
- strerror(errno));
- }
- if (size % smpi_shared_malloc_blocksize) {
- void* pos = (void*)((unsigned long)mem + i * smpi_shared_malloc_blocksize);
- void* res = mmap(pos, size % smpi_shared_malloc_blocksize, PROT_READ | PROT_WRITE,
- MAP_FIXED | MAP_SHARED | MAP_POPULATE, smpi_shared_malloc_bogusfile, 0);
- xbt_assert(res == pos, "Could not map folded virtual memory (%s). Do you perhaps need to increase the "
- "STARPU_MALLOC_SIMULATION_FOLD environment variable or the sysctl vm.max_map_count?",
- strerror(errno));
- }
-
- shared_metadata_t newmeta;
- //register metadata for memcpy avoidance
- shared_data_key_type* data = (shared_data_key_type*)xbt_malloc(sizeof(shared_data_key_type));
- data->second.fd = -1;
- data->second.count = 1;
- newmeta.size = size;
- newmeta.data = data;
- allocs_metadata[mem] = newmeta;
- } else {
- mem = xbt_malloc(size);
- XBT_DEBUG("Classic malloc %zu in %p", size, mem);
- }
-
- return mem;
-}
-
-int smpi_is_shared(void*ptr){
- if ( smpi_cfg_shared_malloc == shmalloc_local || smpi_cfg_shared_malloc == shmalloc_global) {
- if (allocs_metadata.count(ptr) != 0)
- return 1;
- for(auto it : allocs_metadata){
- if (ptr >= it.first && ptr < (char*)it.first + it.second.size)
- return 1;
- }
- return 0;
- } else {
- return 0;
- }
-}
-
-void smpi_shared_free(void *ptr)
-{
- if (smpi_cfg_shared_malloc == shmalloc_local) {
- char loc[PTR_STRLEN];
- snprintf(loc, PTR_STRLEN, "%p", ptr);
- auto meta = allocs_metadata.find(ptr);
- if (meta == allocs_metadata.end()) {
- XBT_WARN("Cannot free: %p was not shared-allocated by SMPI - maybe its size was 0?", ptr);
- return;
- }
- shared_data_t* data = &meta->second.data->second;
- if (munmap(ptr, meta->second.size) < 0) {
- XBT_WARN("Unmapping of fd %d failed: %s", data->fd, strerror(errno));
- }
- data->count--;
- if (data->count <= 0) {
- close(data->fd);
- allocs.erase(allocs.find(meta->second.data->first));
- allocs_metadata.erase(ptr);
- XBT_DEBUG("Shared free - with removal - of %p", ptr);
- } else {
- XBT_DEBUG("Shared free - no removal - of %p, count = %d", ptr, data->count);
- }
-
- } else if (smpi_cfg_shared_malloc == shmalloc_global) {
- auto meta = allocs_metadata.find(ptr);
- if (meta != allocs_metadata.end()){
- meta->second.data->second.count--;
- if(meta->second.data->second.count==0)
- xbt_free(meta->second.data);
- }
-
- munmap(ptr, 0); // the POSIX says that I should not give 0 as a length, but it seems to work OK
- } else {
- XBT_DEBUG("Classic free of %p", ptr);
- xbt_free(ptr);
- }
-}
-#endif
-
-int smpi_shared_known_call(const char* func, const char* input)
-{
- char* loc = bprintf("%s:%s", func, input);
- int known = 0;
-
- if (calls==nullptr) {
- calls = xbt_dict_new_homogeneous(nullptr);
- }
- try {
- xbt_dict_get(calls, loc); /* Succeed or throw */
- known = 1;
- xbt_free(loc);
- }
- catch (xbt_ex& ex) {
- xbt_free(loc);
- if (ex.category != not_found_error)
- throw;
- }
- catch(...) {
- xbt_free(loc);
- throw;
- }
- return known;
-}
-
-void* smpi_shared_get_call(const char* func, const char* input) {
- char* loc = bprintf("%s:%s", func, input);
-
- if (calls == nullptr)
- calls = xbt_dict_new_homogeneous(nullptr);
- void* data = xbt_dict_get(calls, loc);
- xbt_free(loc);
- return data;
-}
-
-void* smpi_shared_set_call(const char* func, const char* input, void* data) {
- char* loc = bprintf("%s:%s", func, input);
-
- if (calls == nullptr)
- calls = xbt_dict_new_homogeneous(nullptr);
- xbt_dict_set(calls, loc, data, nullptr);
- xbt_free(loc);
- return data;
-}
-
-
-/** Map a given SMPI privatization segment (make a SMPI process active) */
-void smpi_switch_data_segment(int dest) {
- if (smpi_loaded_page == dest)//no need to switch, we've already loaded the one we want
- return;
-
- // So the job:
- smpi_really_switch_data_segment(dest);
-}
-
-/** Map a given SMPI privatization segment (make a SMPI process active) even if SMPI thinks it is already active
- *
- * When doing a state restoration, the state of the restored variables might not be consistent with the state of the
- * virtual memory. In this case, we to change the data segment.
- */
-void smpi_really_switch_data_segment(int dest)
-{
- if(smpi_size_data_exe == 0)//no need to switch
- return;
-
-#if HAVE_PRIVATIZATION
- if(smpi_loaded_page==-1){//initial switch, do the copy from the real page here
- for (int i=0; i< smpi_process_count(); i++){
- memcpy(smpi_privatisation_regions[i].address, TOPAGE(smpi_start_data_exe), smpi_size_data_exe);
- }
- }
-
- // FIXME, cross-process support (mmap across process when necessary)
- int current = smpi_privatisation_regions[dest].file_descriptor;
- XBT_DEBUG("Switching data frame to the one of process %d", dest);
- void* tmp =
- mmap(TOPAGE(smpi_start_data_exe), smpi_size_data_exe, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED, current, 0);
- if (tmp != TOPAGE(smpi_start_data_exe))
- xbt_die("Couldn't map the new region");
- smpi_loaded_page = dest;
-#endif
-}
-
-int smpi_is_privatisation_file(char* file)
-{
- return strncmp("/dev/shm/my-buffer-", file, std::strlen("/dev/shm/my-buffer-")) == 0;
-}
-
-void smpi_initialize_global_memory_segments()
-{
-
-#if !HAVE_PRIVATIZATION
- smpi_privatize_global_variables=false;
- xbt_die("You are trying to use privatization on a system that does not support it. Don't.");
- return;
-#else
-
- smpi_get_executable_global_size();
-
- XBT_DEBUG ("bss+data segment found : size %d starting at %p", smpi_size_data_exe, smpi_start_data_exe );
-
- if (smpi_size_data_exe == 0){//no need to switch
- smpi_privatize_global_variables=false;
- return;
- }
-
- smpi_privatisation_regions = static_cast<smpi_privatisation_region_t>(
- xbt_malloc(smpi_process_count() * sizeof(struct s_smpi_privatisation_region)));
-
- for (int i=0; i< smpi_process_count(); i++){
- // create SIMIX_process_count() mappings of this size with the same data inside
- int file_descriptor;
- void* address = nullptr;
- char path[24];
- int status;
-
- do {
- snprintf(path, sizeof(path), "/smpi-buffer-%06x", rand() % 0xffffff);
- file_descriptor = shm_open(path, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
- } while (file_descriptor == -1 && errno == EEXIST);
- if (file_descriptor < 0) {
- if (errno == EMFILE) {
- xbt_die("Impossible to create temporary file for memory mapping: %s\n\
-The open() system call failed with the EMFILE error code (too many files). \n\n\
-This means that you reached the system limits concerning the amount of files per process. \
-This is not a surprise if you are trying to virtualize many processes on top of SMPI. \
-Don't panic -- you should simply increase your system limits and try again. \n\n\
-First, check what your limits are:\n\
- cat /proc/sys/fs/file-max # Gives you the system-wide limit\n\
- ulimit -Hn # Gives you the per process hard limit\n\
- ulimit -Sn # Gives you the per process soft limit\n\
- cat /proc/self/limits # Displays any per-process limitation (including the one given above)\n\n\
-If one of these values is less than the amount of MPI processes that you try to run, then you got the explanation of this error. \
-Ask the Internet about tutorials on how to increase the files limit such as: https://rtcamp.com/tutorials/linux/increase-open-files-limit/",
- strerror(errno));
- }
- xbt_die("Impossible to create temporary file for memory mapping: %s", strerror(errno));
- }
-
- status = ftruncate(file_descriptor, smpi_size_data_exe);
- if (status)
- xbt_die("Impossible to set the size of the temporary file for memory mapping");
-
- /* Ask for a free region */
- address = mmap(nullptr, smpi_size_data_exe, PROT_READ | PROT_WRITE, MAP_SHARED, file_descriptor, 0);
- if (address == MAP_FAILED)
- xbt_die("Couldn't find a free region for memory mapping");
-
- status = shm_unlink(path);
- if (status)
- xbt_die("Impossible to unlink temporary file for memory mapping");
-
- // initialize the values
- memcpy(address, TOPAGE(smpi_start_data_exe), smpi_size_data_exe);
-
- // store the address of the mapping for further switches
- smpi_privatisation_regions[i].file_descriptor = file_descriptor;
- smpi_privatisation_regions[i].address = address;
- }
-#endif
-}
-
-void smpi_destroy_global_memory_segments(){
- if (smpi_size_data_exe == 0)//no need to switch
- return;
-#if HAVE_PRIVATIZATION
- for (int i=0; i< smpi_process_count(); i++) {
- if (munmap(smpi_privatisation_regions[i].address, smpi_size_data_exe) < 0)
- XBT_WARN("Unmapping of fd %d failed: %s", smpi_privatisation_regions[i].file_descriptor, strerror(errno));
- close(smpi_privatisation_regions[i].file_descriptor);
- }
- xbt_free(smpi_privatisation_regions);
-#endif
-}
-
extern "C" { /** These functions will be called from the user code **/
smpi_trace_call_location_t* smpi_trace_get_call_location() {
return smpi_process()->call_location();