From 4343434e9890543bac24758d71340c0b2d1ac7d9 Mon Sep 17 00:00:00 2001 From: Tom Cornebize Date: Fri, 7 Apr 2017 15:42:15 +0200 Subject: [PATCH 1/1] Some more fixes for the communication optimization. --- include/smpi/smpi_shared_malloc.hpp | 2 +- src/smpi/smpi_global.cpp | 28 +++++++++++++++++-- src/smpi/smpi_shared.cpp | 17 +++++------ .../macro-partial-shared-communication.c | 6 ++-- .../macro-partial-shared-communication.tesh | 4 +-- 5 files changed, 40 insertions(+), 17 deletions(-) diff --git a/include/smpi/smpi_shared_malloc.hpp b/include/smpi/smpi_shared_malloc.hpp index 629d3f49ab..0f9f745001 100644 --- a/include/smpi/smpi_shared_malloc.hpp +++ b/include/smpi/smpi_shared_malloc.hpp @@ -12,7 +12,7 @@ XBT_PUBLIC(int) smpi_is_shared(void* ptr, std::vector> &private_blocks, int *offset); -std::vector> shift_private_blocks(const std::vector> vec, int offset); +std::vector> shift_and_frame_private_blocks(const std::vector> vec, int offset, int buff_size); std::vector> merge_private_blocks(std::vector> src, std::vector> dst); #endif diff --git a/src/smpi/smpi_global.cpp b/src/smpi/smpi_global.cpp index 1ad2a306b9..e2af2da3ed 100644 --- a/src/smpi/smpi_global.cpp +++ b/src/smpi/smpi_global.cpp @@ -117,17 +117,23 @@ void memcpy_private(void *dest, const void *src, size_t n, std::vector> &private_blocks, size_t buff_size) { + for(auto block : private_blocks) { + xbt_assert(block.first >= 0 && block.second <= buff_size, "Oops, bug in shared malloc."); + } +} + void smpi_comm_copy_buffer_callback(smx_activity_t synchro, void *buff, size_t buff_size) { simgrid::kernel::activity::Comm *comm = dynamic_cast(synchro); int src_shared=0, dst_shared=0; - int src_offset, dst_offset; + int src_offset=0, dst_offset=0; std::vector> src_private_blocks; std::vector> dst_private_blocks; XBT_DEBUG("Copy the data over"); if(src_shared=smpi_is_shared(buff, src_private_blocks, &src_offset)) { XBT_DEBUG("Sender %p is shared. Let's ignore it.", buff); - src_private_blocks = shift_private_blocks(src_private_blocks, src_offset); + src_private_blocks = shift_and_frame_private_blocks(src_private_blocks, src_offset, buff_size); } else { src_private_blocks.clear(); @@ -135,13 +141,29 @@ void smpi_comm_copy_buffer_callback(smx_activity_t synchro, void *buff, size_t b } if(dst_shared=smpi_is_shared((char*)comm->dst_buff, dst_private_blocks, &dst_offset)) { XBT_DEBUG("Receiver %p is shared. Let's ignore it.", (char*)comm->dst_buff); - dst_private_blocks = shift_private_blocks(dst_private_blocks, dst_offset); + dst_private_blocks = shift_and_frame_private_blocks(dst_private_blocks, dst_offset, buff_size); } else { dst_private_blocks.clear(); dst_private_blocks.push_back(std::make_pair(0, buff_size)); } +/* + fprintf(stderr, "size: 0x%x\n", buff_size); + fprintf(stderr, "src: "); + print(src_private_blocks); + fprintf(stderr, "src_offset = 0x%x\n", src_offset); + fprintf(stderr, "dst: "); + print(dst_private_blocks); + fprintf(stderr, "dst_offset = 0x%x\n", dst_offset); +*/ + check_blocks(src_private_blocks, buff_size); + check_blocks(dst_private_blocks, buff_size); auto private_blocks = merge_private_blocks(src_private_blocks, dst_private_blocks); +/* + fprintf(stderr, "Private blocks: "); + print(private_blocks); +*/ + check_blocks(private_blocks, buff_size); void* tmpbuff=buff; if((smpi_privatize_global_variables) && (static_cast(buff) >= smpi_start_data_exe) && (static_cast(buff) < smpi_start_data_exe + smpi_size_data_exe ) diff --git a/src/smpi/smpi_shared.cpp b/src/smpi/smpi_shared.cpp index e629da93ea..e4f5491209 100644 --- a/src/smpi/smpi_shared.cpp +++ b/src/smpi/smpi_shared.cpp @@ -364,14 +364,15 @@ int smpi_is_shared(void* ptr, std::vector> &private_blocks, } } -std::vector> shift_private_blocks(const std::vector> vec, int offset) { - std::vector> result; - for(auto block: vec) { - auto new_block = std::make_pair(std::max(0, block.first-offset), std::max(0, block.second-offset)); - if(new_block.second > 0) - result.push_back(new_block); - } - return result; +std::vector> shift_and_frame_private_blocks(const std::vector> vec, int offset, int buff_size) { + std::vector> result; + for(auto block: vec) { + auto new_block = std::make_pair(std::min(std::max(0, block.first-offset), buff_size), + std::min(std::max(0, block.second-offset), buff_size)); + if(new_block.second > 0 && new_block.first < buff_size) + result.push_back(new_block); + } + return result; } void append_or_merge_block(std::vector> &vec, std::pair &block) { diff --git a/teshsuite/smpi/macro-partial-shared-communication/macro-partial-shared-communication.c b/teshsuite/smpi/macro-partial-shared-communication/macro-partial-shared-communication.c index 2a93238083..69841a8a11 100644 --- a/teshsuite/smpi/macro-partial-shared-communication/macro-partial-shared-communication.c +++ b/teshsuite/smpi/macro-partial-shared-communication/macro-partial-shared-communication.c @@ -99,12 +99,12 @@ int main(int argc, char *argv[]) MPI_Barrier(MPI_COMM_WORLD); // Then, even processes send a sub-part of their buffer their successor - // Note that the part (0, 0x10000) which is not sent is a shared part, so we do not care + // Note that the last block should not be copied entirely if(rank%2 == 0) { - MPI_Send(buf+0x10000, mem_size-0x10000, MPI_UINT8_T, rank+1, 0, MPI_COMM_WORLD); + MPI_Send(buf+0x10000, mem_size-0xa000000, MPI_UINT8_T, rank+1, 0, MPI_COMM_WORLD); } else { - MPI_Recv(buf+0x10000, mem_size-0x10000, MPI_UINT8_T, rank-1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(buf+0x10000, mem_size-0xa000000, MPI_UINT8_T, rank-1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } diff --git a/teshsuite/smpi/macro-partial-shared-communication/macro-partial-shared-communication.tesh b/teshsuite/smpi/macro-partial-shared-communication/macro-partial-shared-communication.tesh index 2cf32cfed5..661eff770f 100644 --- a/teshsuite/smpi/macro-partial-shared-communication/macro-partial-shared-communication.tesh +++ b/teshsuite/smpi/macro-partial-shared-communication/macro-partial-shared-communication.tesh @@ -17,9 +17,9 @@ $ ${bindir:=.}/../../../bin/smpirun -hostfile ../hostfile -platform ../../../exa > [3] The result of the (shifted) communication check for block (0x1300010, 0x3456789) is: 1 > [3] The result of the (shifted) communication check for block (0x3457890, 0x4444444) is: 1 > [3] The result of the (shifted) communication check for block (0x5555555, 0x5555565) is: 1 -> [3] The result of the (shifted) communication check for block (0x5600000, 0x8000000) is: 1 +> [3] The result of the (shifted) communication check for block (0x5600000, 0x8000000) is: 0 > [1] The result of the (shifted) communication check for block (0x1234567, 0x1300000) is: 1 > [1] The result of the (shifted) communication check for block (0x1300010, 0x3456789) is: 1 > [1] The result of the (shifted) communication check for block (0x3457890, 0x4444444) is: 1 > [1] The result of the (shifted) communication check for block (0x5555555, 0x5555565) is: 1 -> [1] The result of the (shifted) communication check for block (0x5600000, 0x8000000) is: 1 +> [1] The result of the (shifted) communication check for block (0x5600000, 0x8000000) is: 0 -- 2.20.1