"Whether SMPI_SHARED_MALLOC is enabled. Disable it for debugging purposes.");
xbt_cfg_register_alias("smpi/shared-malloc", "smpi/use-shared-malloc");
xbt_cfg_register_alias("smpi/shared-malloc", "smpi/use_shared_malloc");
+ xbt_cfg_register_double("smpi/shared-malloc-blocksize", 1UL << 20, nullptr, "Size of the bogus file which will be created for global shared allocations");
xbt_cfg_register_double("smpi/cpu-threshold", 1e-6, nullptr, "Minimal computation time (in seconds) not discarded, or -1 for infinity.");
xbt_cfg_register_alias("smpi/cpu-threshold", "smpi/cpu_threshold");
int Datatype::copy(void *sendbuf, int sendcount, MPI_Datatype sendtype,
void *recvbuf, int recvcount, MPI_Datatype recvtype){
int count;
+
+ if(smpi_is_shared(sendbuf)){
+ XBT_DEBUG("Copy input buf %p is shared. Let's ignore it.", sendbuf);
+ }else if(smpi_is_shared(recvbuf)){
+ XBT_DEBUG("Copy output buf %p is shared. Let's ignore it.", recvbuf);
+ }
+
if(smpi_privatize_global_variables){
smpi_switch_data_segment(smpi_process()->index());
}
Request::Request(void *buf, int count, MPI_Datatype datatype, int src, int dst, int tag, MPI_Comm comm, unsigned flags) : buf_(buf), old_type_(datatype), src_(src), dst_(dst), tag_(tag), comm_(comm), flags_(flags)
{
void *old_buf = nullptr;
- if((((flags & RECV) != 0) && ((flags & ACCUMULATE) !=0)) || (datatype->flags() & DT_FLAG_DERIVED)){
+ if(((((flags & RECV) != 0) && ((flags & ACCUMULATE) !=0)) || (datatype->flags() & DT_FLAG_DERIVED)) && (!smpi_is_shared(buf_))){
// This part handles the problem of non-contiguous memory
old_buf = buf;
buf_ = count==0 ? nullptr : xbt_malloc(count*datatype->size());
req->print_request("Finishing");
MPI_Datatype datatype = req->old_type_;
- if(((req->flags_ & ACCUMULATE) != 0) || (datatype->flags() & DT_FLAG_DERIVED)){
+ if((((req->flags_ & ACCUMULATE) != 0) || (datatype->flags() & DT_FLAG_DERIVED)) && (!smpi_is_shared(req->old_buf_))){
+
if (!smpi_process()->replaying()){
if( smpi_privatize_global_variables != 0 && (static_cast<char*>(req->old_buf_) >= smpi_start_data_exe)
&& ((char*)req->old_buf_ < smpi_start_data_exe + smpi_size_data_exe )){
* \ | |
* ----
*/
-#include <unordered_map>
+#include <map>
#include "private.h"
#include "private.hpp"
shared_data_key_type* data;
} shared_metadata_t;
-std::unordered_map<void*, shared_metadata_t> allocs_metadata;
+std::map<void*, shared_metadata_t> allocs_metadata;
xbt_dict_t calls = nullptr; /* Allocated on first use */
#ifndef WIN32
static int smpi_shared_malloc_bogusfile = -1;
if (smpi_shared_malloc_bogusfile == -1) {
/* Create a fd to a new file on disk, make it smpi_shared_malloc_blocksize big, and unlink it.
* It still exists in memory but not in the file system (thus it cannot be leaked). */
+ smpi_shared_malloc_blocksize = static_cast<unsigned long>(xbt_cfg_get_double("smpi/shared-malloc-blocksize"));
+ XBT_DEBUG("global shared allocation. Blocksize %lu", smpi_shared_malloc_blocksize);
char* name = xbt_strdup("/tmp/simgrid-shmalloc-XXXXXX");
smpi_shared_malloc_bogusfile = mkstemp(name);
unlink(name);
void* res = mmap(pos, smpi_shared_malloc_blocksize, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED | MAP_POPULATE,
smpi_shared_malloc_bogusfile, 0);
xbt_assert(res == pos, "Could not map folded virtual memory (%s). Do you perhaps need to increase the "
- "STARPU_MALLOC_SIMULATION_FOLD environment variable or the sysctl vm.max_map_count?",
+ "size of the mapped file using --cfg=smpi/shared-malloc-blocksize=newvalue (default 1048576) ?"
+ "You can also try using the sysctl vm.max_map_count",
strerror(errno));
}
if (size % smpi_shared_malloc_blocksize) {
void* res = mmap(pos, size % smpi_shared_malloc_blocksize, PROT_READ | PROT_WRITE,
MAP_FIXED | MAP_SHARED | MAP_POPULATE, smpi_shared_malloc_bogusfile, 0);
xbt_assert(res == pos, "Could not map folded virtual memory (%s). Do you perhaps need to increase the "
- "STARPU_MALLOC_SIMULATION_FOLD environment variable or the sysctl vm.max_map_count?",
+ "size of the mapped file using --cfg=smpi/shared-malloc-blocksize=newvalue (default 1048576) ?"
+ "You can also try using the sysctl vm.max_map_count",
strerror(errno));
}
return mem;
}
-int smpi_is_shared(void*ptr){
+int smpi_is_shared(void* ptr){
+ if (allocs_metadata.empty())
+ return 0;
if ( smpi_cfg_shared_malloc == shmalloc_local || smpi_cfg_shared_malloc == shmalloc_global) {
- if (allocs_metadata.count(ptr) != 0)
- return 1;
- for(auto it : allocs_metadata){
- if (ptr >= it.first && ptr < (char*)it.first + it.second.size)
- return 1;
- }
+ auto low = allocs_metadata.lower_bound(ptr);
+ if (low->first==ptr)
+ return 1;
+ if (low == allocs_metadata.begin())
return 0;
+ low --;
+ if (ptr < (char*)low->first + low->second.size)
+ return 1;
+ return 0;
} else {
return 0;
}