XBT_PUBLIC(void *) smpi_shared_malloc(size_t size, const char *file, int line);
#define SMPI_SHARED_MALLOC(size) smpi_shared_malloc(size, __FILE__, __LINE__)
-XBT_PUBLIC(void *) smpi_shared_malloc_global__(size_t size, const char *file, int line, int *shared_block_offsets, int nb_shared_blocks);
+XBT_PUBLIC(void *) smpi_shared_malloc_global__(size_t size, const char *file, int line, size_t *shared_block_offsets, int nb_shared_blocks);
#define SMPI_PARTIAL_SHARED_MALLOC(size, shared_block_offsets, nb_shared_blocks)\
smpi_shared_malloc_global__(size, __FILE__, __LINE__, shared_block_offsets, nb_shared_blocks)
*/
-XBT_PUBLIC(int) smpi_is_shared(void* ptr, std::vector<std::pair<int, int>> &private_blocks, int *offset);
+XBT_PUBLIC(int) smpi_is_shared(void* ptr, std::vector<std::pair<size_t, size_t>> &private_blocks, size_t *offset);
-std::vector<std::pair<int, int>> shift_and_frame_private_blocks(const std::vector<std::pair<int, int>> vec, int offset, int buff_size);
-std::vector<std::pair<int, int>> merge_private_blocks(std::vector<std::pair<int, int>> src, std::vector<std::pair<int, int>> dst);
+std::vector<std::pair<size_t, size_t>> shift_and_frame_private_blocks(const std::vector<std::pair<size_t, size_t>> vec, size_t offset, size_t buff_size);
+std::vector<std::pair<size_t, size_t>> merge_private_blocks(std::vector<std::pair<size_t, size_t>> src, std::vector<std::pair<size_t, size_t>> dst);
#endif
smpi_comm_copy_data_callback = callback;
}
-void print(std::vector<std::pair<int, int>> vec) {
+void print(std::vector<std::pair<size_t, size_t>> vec) {
fprintf(stderr, "{");
for(auto elt: vec) {
fprintf(stderr, "(0x%x, 0x%x),", elt.first, elt.second);
}
stderr, fprintf(stderr, "}\n");
}
-void memcpy_private(void *dest, const void *src, size_t n, std::vector<std::pair<int, int>> &private_blocks) {
+void memcpy_private(void *dest, const void *src, size_t n, std::vector<std::pair<size_t, size_t>> &private_blocks) {
for(auto block : private_blocks) {
memcpy((uint8_t*)dest+block.first, (uint8_t*)src+block.first, block.second-block.first);
}
}
-void check_blocks(std::vector<std::pair<int, int>> &private_blocks, size_t buff_size) {
+void check_blocks(std::vector<std::pair<size_t, size_t>> &private_blocks, size_t buff_size) {
for(auto block : private_blocks) {
xbt_assert(block.first >= 0 && block.second <= buff_size, "Oops, bug in shared malloc.");
}
{
simgrid::kernel::activity::Comm *comm = dynamic_cast<simgrid::kernel::activity::Comm*>(synchro);
int src_shared=0, dst_shared=0;
- int src_offset=0, dst_offset=0;
- std::vector<std::pair<int, int>> src_private_blocks;
- std::vector<std::pair<int, int>> dst_private_blocks;
+ size_t src_offset=0, dst_offset=0;
+ std::vector<std::pair<size_t, size_t>> src_private_blocks;
+ std::vector<std::pair<size_t, size_t>> dst_private_blocks;
XBT_DEBUG("Copy the data over");
if(src_shared=smpi_is_shared(buff, src_private_blocks, &src_offset)) {
XBT_DEBUG("Sender %p is shared. Let's ignore it.", buff);
typedef struct {
size_t size;
- std::vector<std::pair<int, int>> private_blocks;
+ std::vector<std::pair<size_t, size_t>> private_blocks;
shared_data_key_type* data;
} shared_metadata_t;
#define ALIGN_UP(n, align) (((n) + (align)-1) & -(align))
#define ALIGN_DOWN(n, align) ((n) & -(align))
-void *smpi_shared_malloc_global__(size_t size, const char *file, int line, int *shared_block_offsets, int nb_shared_blocks) {
+void *smpi_shared_malloc_global__(size_t size, const char *file, int line, size_t *shared_block_offsets, int nb_shared_blocks) {
void *mem;
xbt_assert(smpi_shared_malloc_blocksize % PAGE_SIZE == 0, "The block size of shared malloc should be a multiple of the page size.");
/* First reserve memory area */
/* Map the bogus file in place of the anonymous memory */
for(int i_block = 0; i_block < nb_shared_blocks; i_block ++) {
- int start_offset = shared_block_offsets[2*i_block];
- int stop_offset = shared_block_offsets[2*i_block+1];
- xbt_assert(0 <= start_offset, "start_offset (%d) should be greater than 0", start_offset);
- xbt_assert(start_offset < stop_offset, "start_offset (%d) should be lower than stop offset (%d)", start_offset, stop_offset);
- xbt_assert(stop_offset <= size, "stop_offset (%d) should be lower than size (%lu)", stop_offset, size);
+ size_t start_offset = shared_block_offsets[2*i_block];
+ size_t stop_offset = shared_block_offsets[2*i_block+1];
+ xbt_assert(0 <= start_offset, "start_offset (%lu) should be greater than 0", start_offset);
+ xbt_assert(start_offset < stop_offset, "start_offset (%lu) should be lower than stop offset (%lu)", start_offset, stop_offset);
+ xbt_assert(stop_offset <= size, "stop_offset (%lu) should be lower than size (%lu)", stop_offset, size);
if(i_block < nb_shared_blocks-1)
xbt_assert(stop_offset < shared_block_offsets[2*i_block+2],
- "stop_offset (%d) should be lower than its successor start offset (%d)", stop_offset, shared_block_offsets[2*i_block+2]);
+ "stop_offset (%lu) should be lower than its successor start offset (%lu)", stop_offset, shared_block_offsets[2*i_block+2]);
// fprintf(stderr, "shared block 0x%x - 0x%x\n", start_offset, stop_offset);
- int start_block_offset = ALIGN_UP(start_offset, smpi_shared_malloc_blocksize);
- int stop_block_offset = ALIGN_DOWN(stop_offset, smpi_shared_malloc_blocksize);
+ size_t start_block_offset = ALIGN_UP(start_offset, smpi_shared_malloc_blocksize);
+ size_t stop_block_offset = ALIGN_DOWN(stop_offset, smpi_shared_malloc_blocksize);
unsigned int i;
for (i = start_block_offset / smpi_shared_malloc_blocksize; i < stop_block_offset / smpi_shared_malloc_blocksize; i++) {
// fprintf(stderr, "\tmmap:for 0x%x - 0x%x\n", i*smpi_shared_malloc_blocksize, smpi_shared_malloc_blocksize);
"You can also try using the sysctl vm.max_map_count",
strerror(errno));
}
- int low_page_start_offset = ALIGN_UP(start_offset, PAGE_SIZE);
- int low_page_stop_offset = start_block_offset < ALIGN_DOWN(stop_offset, PAGE_SIZE) ? start_block_offset : ALIGN_DOWN(stop_offset, PAGE_SIZE);
+ size_t low_page_start_offset = ALIGN_UP(start_offset, PAGE_SIZE);
+ size_t low_page_stop_offset = start_block_offset < ALIGN_DOWN(stop_offset, PAGE_SIZE) ? start_block_offset : ALIGN_DOWN(stop_offset, PAGE_SIZE);
if(low_page_start_offset < low_page_stop_offset) {
// fprintf(stderr, "\tmmap:low 0x%x - 0x%x\n", low_page_start_offset, low_page_stop_offset-low_page_start_offset);
void* pos = (void*)((unsigned long)mem + low_page_start_offset);
strerror(errno));
}
if(low_page_stop_offset <= stop_block_offset) {
- int high_page_stop_offset = stop_offset == size ? size : ALIGN_DOWN(stop_offset, PAGE_SIZE);
+ size_t high_page_stop_offset = stop_offset == size ? size : ALIGN_DOWN(stop_offset, PAGE_SIZE);
if(high_page_stop_offset > stop_block_offset) {
// fprintf(stderr, "\tmmap:high 0x%x - 0x%x\n", stop_block_offset, high_page_stop_offset-stop_block_offset);
void* pos = (void*)((unsigned long)mem + stop_block_offset);
* Even indices are the start offsets (included), odd indices are the stop offsets (excluded).
* For instance, if shared_block_offsets == {27, 42}, then the elements mem[27], mem[28], ..., mem[41] are shared. The others are not.
*/
-void *smpi_shared_malloc_global(size_t size, const char *file, int line, int *shared_block_offsets=NULL, int nb_shared_blocks=-1) {
- int tmp_shared_block_offsets[2];
+void *smpi_shared_malloc_global(size_t size, const char *file, int line, size_t *shared_block_offsets=NULL, int nb_shared_blocks=-1) {
+ size_t tmp_shared_block_offsets[2];
if(nb_shared_blocks == -1) {
nb_shared_blocks = 1;
shared_block_offsets = tmp_shared_block_offsets;
return mem;
}
-int smpi_is_shared(void* ptr, std::vector<std::pair<int, int>> &private_blocks, int *offset){
+int smpi_is_shared(void* ptr, std::vector<std::pair<size_t, size_t>> &private_blocks, size_t *offset){
private_blocks.clear(); // being paranoid
if (allocs_metadata.empty())
return 0;
}
}
-std::vector<std::pair<int, int>> shift_and_frame_private_blocks(const std::vector<std::pair<int, int>> vec, int offset, int buff_size) {
- std::vector<std::pair<int, int>> result;
+std::vector<std::pair<size_t, size_t>> shift_and_frame_private_blocks(const std::vector<std::pair<size_t, size_t>> vec, size_t offset, size_t buff_size) {
+ std::vector<std::pair<size_t, size_t>> result;
for(auto block: vec) {
- auto new_block = std::make_pair(std::min(std::max(0, block.first-offset), buff_size),
- std::min(std::max(0, block.second-offset), buff_size));
+ auto new_block = std::make_pair(std::min(std::max((size_t)0, block.first-offset), buff_size),
+ std::min(std::max((size_t)0, block.second-offset), buff_size));
if(new_block.second > 0 && new_block.first < buff_size)
result.push_back(new_block);
}
return result;
}
-std::vector<std::pair<int, int>> merge_private_blocks(std::vector<std::pair<int, int>> src, std::vector<std::pair<int, int>> dst) {
- std::vector<std::pair<int, int>> result;
+std::vector<std::pair<size_t, size_t>> merge_private_blocks(std::vector<std::pair<size_t, size_t>> src, std::vector<std::pair<size_t, size_t>> dst) {
+ std::vector<std::pair<size_t, size_t>> result;
unsigned i_src=0, i_dst=0;
while(i_src < src.size() && i_dst < dst.size()) {
- std::pair<int, int> block;
+ std::pair<size_t, size_t> block;
if(src[i_src].second <= dst[i_dst].first) {
i_src++;
}
#include <assert.h>
// Set the elements between buf[start] and buf[stop-1] to (i+value)%256
-void set(uint8_t *buf, int start, int stop, uint8_t value) {
- for(int i = start; i < stop; i++) {
+void set(uint8_t *buf, size_t start, size_t stop, uint8_t value) {
+ for(size_t i = start; i < stop; i++) {
buf[i] = (i+value)%256;
}
}
// Return the number of times that an element is equal to (i+value)%256 between buf[start] and buf[stop-1].
-int count_all(uint8_t *buf, int start, int stop, uint8_t value) {
- int occ = 0;
- for(int i = start ; i < stop ; i++) {
+int count_all(uint8_t *buf, size_t start, size_t stop, uint8_t value) {
+ size_t occ = 0;
+ for(size_t i = start ; i < stop ; i++) {
if(buf[i] == (i+value)%256) {
occ ++;
}
}
// Return true iff the values from buf[start] to buf[stop-1] are all equal to (i+value)%256.
-int check_all(uint8_t *buf, int start, int stop, uint8_t value) {
- int occ = count_all(buf, start, stop, value);
+int check_all(uint8_t *buf, size_t start, size_t stop, uint8_t value) {
+ size_t occ = count_all(buf, start, stop, value);
return occ == stop-start;
}
// Return true iff "enough" elements are equal to (i+value)%256 between buf[start] and buf[stop-1].
-int check_enough(uint8_t *buf, int start, int stop, uint8_t value) {
+int check_enough(uint8_t *buf, size_t start, size_t stop, uint8_t value) {
int page_size = 0x1000;
- int size = stop-start;
+ size_t size = stop-start;
if(size <= 2*page_size) // we are not sure to have a whole page that is shared
return 1;
- int occ = count_all(buf, start, stop, value);
+ size_t occ = count_all(buf, start, stop, value);
return occ >= size - 2*page_size;
}
MPI_Init(&argc, &argv);
int rank;
int size;
- int mem_size = 0x10000000;
- int shared_blocks[] = {
+ size_t mem_size = 0x10000000;
+ size_t shared_blocks[] = {
0, 0x1234567,
0x1300000, 0x1300010,
0x3456789, 0x3457890,
0x5555565, 0x5600000,
0x8000000, 0x10000000
};
- int nb_blocks = (sizeof(shared_blocks)/sizeof(int))/2;
+ int nb_blocks = (sizeof(shared_blocks)/sizeof(size_t))/2;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
//Let's Allocate a shared memory buffer
// Even processes write their rank in private blocks
if(rank%2 == 0) {
for(int i = 0; i < nb_blocks-1; i++) {
- int start = shared_blocks[2*i+1];
- int stop = shared_blocks[2*i+2];
+ size_t start = shared_blocks[2*i+1];
+ size_t stop = shared_blocks[2*i+2];
set(buf, start, stop, rank);
}
}
// Odd processes verify that they successfully received the message
if(rank%2 == 1) {
for(int i = 0; i < nb_blocks-1; i++) {
- int start = shared_blocks[2*i+1];
- int stop = shared_blocks[2*i+2];
+ size_t start = shared_blocks[2*i+1];
+ size_t stop = shared_blocks[2*i+2];
int comm = check_all(buf, start, stop, rank-1);
- printf("[%d] The result of the (normal) communication check for block (0x%x, 0x%x) is: %d\n", rank, start, stop, comm);
+ printf("[%d] The result of the (normal) communication check for block (0x%lx, 0x%lx) is: %d\n", rank, start, stop, comm);
}
memset(buf, rank, mem_size);
}
// Odd processes verify that they successfully received the message
if(rank%2 == 1) {
for(int i = 0; i < nb_blocks-1; i++) {
- int start = shared_blocks[2*i+1];
- int stop = shared_blocks[2*i+2];
+ size_t start = shared_blocks[2*i+1];
+ size_t stop = shared_blocks[2*i+2];
int comm = check_all(buf, start, stop, rank-1);
- printf("[%d] The result of the (shifted) communication check for block (0x%x, 0x%x) is: %d\n", rank, start, stop, comm);
+ printf("[%d] The result of the (shifted) communication check for block (0x%lx, 0x%lx) is: %d\n", rank, start, stop, comm);
}
}
#include <inttypes.h>
// Set the elements between buf[start] and buf[stop-1] to (i+value)%256
-void set(uint8_t *buf, int start, int stop, uint8_t value) {
- for(int i = start; i < stop; i++) {
+void set(uint8_t *buf, size_t start, size_t stop, uint8_t value) {
+ for(size_t i = start; i < stop; i++) {
buf[i] = (i+value)%256;
}
}
// Return the number of times that an element is equal to (i+value)%256 between buf[start] and buf[stop-1].
-int count_all(uint8_t *buf, int start, int stop, uint8_t value) {
- int occ = 0;
- for(int i = start ; i < stop ; i++) {
+int count_all(uint8_t *buf, size_t start, size_t stop, uint8_t value) {
+ size_t occ = 0;
+ for(size_t i = start ; i < stop ; i++) {
if(buf[i] == (i+value)%256) {
occ ++;
}
}
// Return true iff the values from buf[start] to buf[stop-1] are all equal to (i+value)%256.
-int check_all(uint8_t *buf, int start, int stop, uint8_t value) {
- int occ = count_all(buf, start, stop, value);
+int check_all(uint8_t *buf, size_t start, size_t stop, uint8_t value) {
+ size_t occ = count_all(buf, start, stop, value);
return occ == stop-start;
}
// Return true iff "enough" elements are equal to (i+value)%256 between buf[start] and buf[stop-1].
-int check_enough(uint8_t *buf, int start, int stop, uint8_t value) {
+int check_enough(uint8_t *buf, size_t start, size_t stop, uint8_t value) {
int page_size = 0x1000;
- int size = stop-start;
+ size_t size = stop-start;
if(size <= 2*page_size) // we are not sure to have a whole page that is shared
return 1;
- int occ = count_all(buf, start, stop, value);
+ size_t occ = count_all(buf, start, stop, value);
return occ >= size - 2*page_size;
}
MPI_Init(&argc, &argv);
int rank;
int size;
- int mem_size = 0x10000000;
- int shared_blocks[] = {
+ size_t mem_size = 0x10000000;
+ size_t shared_blocks[] = {
0, 0x1234567,
0x1300000, 0x1300010,
0x3456789, 0x3457890,
0x5555565, 0x5600000,
0x8000000, 0x10000000
};
- int nb_blocks = (sizeof(shared_blocks)/sizeof(int))/2;
+ int nb_blocks = (sizeof(shared_blocks)/sizeof(size_t))/2;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
//Let's Allocate a shared memory buffer
// Process 0 write in shared blocks
if(rank == 0) {
for(int i = 0; i < nb_blocks; i++) {
- int start = shared_blocks[2*i];
- int stop = shared_blocks[2*i+1];
+ size_t start = shared_blocks[2*i];
+ size_t stop = shared_blocks[2*i+1];
set(buf, start, stop, 42);
}
}
MPI_Barrier(MPI_COMM_WORLD);
// All processes check that their shared blocks have been written (at least partially)
for(int i = 0; i < nb_blocks; i++) {
- int start = shared_blocks[2*i];
- int stop = shared_blocks[2*i+1];
+ size_t start = shared_blocks[2*i];
+ size_t stop = shared_blocks[2*i+1];
int is_shared = check_enough(buf, start, stop, 42);
- printf("[%d] The result of the shared check for block (0x%x, 0x%x) is: %d\n", rank, start, stop, is_shared);
+ printf("[%d] The result of the shared check for block (0x%lx, 0x%lx) is: %d\n", rank, start, stop, is_shared);
}
// Check the private blocks
MPI_Barrier(MPI_COMM_WORLD);
for(int i = 0; i < nb_blocks-1; i++) {
- int start = shared_blocks[2*i+1];
- int stop = shared_blocks[2*i+2];
+ size_t start = shared_blocks[2*i+1];
+ size_t stop = shared_blocks[2*i+2];
int is_private = check_all(buf, start, stop, 0);
- printf("[%d] The result of the private check for block (0x%x, 0x%x) is: %d\n", rank, start, stop, is_private);
+ printf("[%d] The result of the private check for block (0x%lx, 0x%lx) is: %d\n", rank, start, stop, is_private);
}
SMPI_SHARED_FREE(buf);