From 9710e7a45532ac1fc8302cf3e99077ecc92743ab Mon Sep 17 00:00:00 2001 From: Tom Cornebize Date: Fri, 7 Apr 2017 11:40:31 +0200 Subject: [PATCH] Keep working on the communication optimization. --- include/smpi/smpi_shared_malloc.hpp | 4 +- src/smpi/smpi_global.cpp | 105 +++++++++++++--------------- src/smpi/smpi_shared.cpp | 47 ++++++++++++- 3 files changed, 96 insertions(+), 60 deletions(-) diff --git a/include/smpi/smpi_shared_malloc.hpp b/include/smpi/smpi_shared_malloc.hpp index cf554aea70..629d3f49ab 100644 --- a/include/smpi/smpi_shared_malloc.hpp +++ b/include/smpi/smpi_shared_malloc.hpp @@ -10,7 +10,9 @@ */ -XBT_PUBLIC(int) smpi_is_shared(void* ptr, std::vector> &private_blocks); +XBT_PUBLIC(int) smpi_is_shared(void* ptr, std::vector> &private_blocks, int *offset); +std::vector> shift_private_blocks(const std::vector> vec, int offset); +std::vector> merge_private_blocks(std::vector> src, std::vector> dst); #endif diff --git a/src/smpi/smpi_global.cpp b/src/smpi/smpi_global.cpp index a343824724..03b6153db9 100644 --- a/src/smpi/smpi_global.cpp +++ b/src/smpi/smpi_global.cpp @@ -104,79 +104,68 @@ void smpi_comm_set_copy_data_callback(void (*callback) (smx_activity_t, void*, s smpi_comm_copy_data_callback = callback; } -std::vector> merge_private_blocks(std::vector> src, std::vector> dst) { - std::vector> result; - int i_src=0, i_dst=0; - while(i_src < src.size() && i_dst < dst.size()) { - std::pair block; - if(src[i_src].first < dst[i_dst].first) { - block = src[i_src]; - i_src ++; - } - else { - block = dst[i_dst]; - i_dst ++; - } - if(block.first <= result.back().second) { // overlapping with the last block inserted - result.back().second = std::max(result.back().second, block.second); - } - else { // not overlapping, we insert a new block - result.push_back(block); - } +void memcpy_private(void *dest, const void *src, size_t n, std::vector> &private_blocks) { + for(auto block : private_blocks) { + memcpy((uint8_t*)src+block.first, (uint8_t*)dest+block.first, block.second-block.first); } - for(; i_src < src.size(); i_src++) { - result.push_back(src[i_src]); - } - for(; i_dst < dst.size(); i_dst++) { - result.push_back(dst[i_dst]); - } - return result; } void smpi_comm_copy_buffer_callback(smx_activity_t synchro, void *buff, size_t buff_size) { simgrid::kernel::activity::Comm *comm = dynamic_cast(synchro); int src_shared=0, dst_shared=0; + int src_offset, dst_offset; std::vector> src_private_blocks; std::vector> dst_private_blocks; XBT_DEBUG("Copy the data over"); - if(src_shared=smpi_is_shared(buff, src_private_blocks)) + if(src_shared=smpi_is_shared(buff, src_private_blocks, &src_offset)) { XBT_DEBUG("Sender %p is shared. Let's ignore it.", buff); - if(dst_shared=smpi_is_shared((char*)comm->dst_buff, src_private_blocks)) - XBT_DEBUG("Receiver %p is shared. Let's ignore it.", (char*)comm->dst_buff); - if(!src_shared && !dst_shared){ - void* tmpbuff=buff; - if((smpi_privatize_global_variables) && (static_cast(buff) >= smpi_start_data_exe) - && (static_cast(buff) < smpi_start_data_exe + smpi_size_data_exe ) - ){ - XBT_DEBUG("Privatization : We are copying from a zone inside global memory... Saving data to temp buffer !"); - - smpi_switch_data_segment( - (static_cast((static_cast(comm->src_proc->data)->data))->index())); - tmpbuff = static_cast(xbt_malloc(buff_size)); - memcpy(tmpbuff, buff, buff_size); - } + src_private_blocks = shift_private_blocks(src_private_blocks, src_offset); + } + else { + src_private_blocks.clear(); + src_private_blocks.push_back(std::make_pair(0, buff_size)); + } + if(dst_shared=smpi_is_shared((char*)comm->dst_buff, dst_private_blocks, &dst_offset)) { + XBT_DEBUG("Receiver %p is shared. Let's ignore it.", (char*)comm->dst_buff); + dst_private_blocks = shift_private_blocks(dst_private_blocks, dst_offset); + } + else { + dst_private_blocks.clear(); + dst_private_blocks.push_back(std::make_pair(0, buff_size)); + } + auto private_blocks = merge_private_blocks(src_private_blocks, dst_private_blocks); + void* tmpbuff=buff; + if((smpi_privatize_global_variables) && (static_cast(buff) >= smpi_start_data_exe) + && (static_cast(buff) < smpi_start_data_exe + smpi_size_data_exe ) + ){ + XBT_DEBUG("Privatization : We are copying from a zone inside global memory... Saving data to temp buffer !"); + + smpi_switch_data_segment( + (static_cast((static_cast(comm->src_proc->data)->data))->index())); + tmpbuff = static_cast(xbt_malloc(buff_size)); + memcpy_private(tmpbuff, buff, buff_size, private_blocks); + } - if((smpi_privatize_global_variables) && ((char*)comm->dst_buff >= smpi_start_data_exe) - && ((char*)comm->dst_buff < smpi_start_data_exe + smpi_size_data_exe )){ - XBT_DEBUG("Privatization : We are copying to a zone inside global memory - Switch data segment"); - smpi_switch_data_segment( - (static_cast((static_cast(comm->dst_proc->data)->data))->index())); - } + if((smpi_privatize_global_variables) && ((char*)comm->dst_buff >= smpi_start_data_exe) + && ((char*)comm->dst_buff < smpi_start_data_exe + smpi_size_data_exe )){ + XBT_DEBUG("Privatization : We are copying to a zone inside global memory - Switch data segment"); + smpi_switch_data_segment( + (static_cast((static_cast(comm->dst_proc->data)->data))->index())); + } - XBT_DEBUG("Copying %zu bytes from %p to %p", buff_size, tmpbuff,comm->dst_buff); - memcpy(comm->dst_buff, tmpbuff, buff_size); + XBT_DEBUG("Copying %zu bytes from %p to %p", buff_size, tmpbuff,comm->dst_buff); + memcpy_private(comm->dst_buff, tmpbuff, buff_size, private_blocks); - if (comm->detached) { - // if this is a detached send, the source buffer was duplicated by SMPI - // sender to make the original buffer available to the application ASAP - xbt_free(buff); - //It seems that the request is used after the call there this should be free somewhere else but where??? - //xbt_free(comm->comm.src_data);// inside SMPI the request is kept inside the user data and should be free - comm->src_buff = nullptr; - } - if(tmpbuff!=buff)xbt_free(tmpbuff); + if (comm->detached) { + // if this is a detached send, the source buffer was duplicated by SMPI + // sender to make the original buffer available to the application ASAP + xbt_free(buff); + //It seems that the request is used after the call there this should be free somewhere else but where??? + //xbt_free(comm->comm.src_data);// inside SMPI the request is kept inside the user data and should be free + comm->src_buff = nullptr; } + if(tmpbuff!=buff)xbt_free(tmpbuff); } diff --git a/src/smpi/smpi_shared.cpp b/src/smpi/smpi_shared.cpp index 731f4ae29d..1737ea7248 100644 --- a/src/smpi/smpi_shared.cpp +++ b/src/smpi/smpi_shared.cpp @@ -337,7 +337,7 @@ void *smpi_shared_malloc(size_t size, const char *file, int line) { return mem; } -int smpi_is_shared(void* ptr, std::vector> &private_blocks){ +int smpi_is_shared(void* ptr, std::vector> &private_blocks, int *offset){ private_blocks.clear(); // being paranoid if (allocs_metadata.empty()) return 0; @@ -345,12 +345,14 @@ int smpi_is_shared(void* ptr, std::vector> &private_blocks){ auto low = allocs_metadata.lower_bound(ptr); if (low->first==ptr) { private_blocks = low->second.private_blocks; + *offset = 0; return 1; } if (low == allocs_metadata.begin()) return 0; low --; if (ptr < (char*)low->first + low->second.size) { + *offset = ((uint8_t*) low->first) - ((uint8_t*)ptr); private_blocks = low->second.private_blocks; return 1; } @@ -360,6 +362,49 @@ int smpi_is_shared(void* ptr, std::vector> &private_blocks){ } } +std::vector> shift_private_blocks(const std::vector> vec, int offset) { + std::vector> result; + for(auto block: vec) { + auto new_block = std::make_pair(std::max(0, block.first-offset), std::max(0, block.second-offset)); + if(new_block.second > 0) + result.push_back(new_block); + } + return result; +} + +void append_or_merge_block(std::vector> &vec, std::pair &block) { + if(vec.size() > 0 && block.first <= vec.back().second) { // overlapping with the last block inserted + vec.back().second = std::max(vec.back().second, block.second); + } + else { // not overlapping, we insert a new block + vec.push_back(block); + } +} + +std::vector> merge_private_blocks(std::vector> src, std::vector> dst) { + std::vector> result; + unsigned i_src=0, i_dst=0; + while(i_src < src.size() && i_dst < dst.size()) { + std::pair block; + if(src[i_src].first < dst[i_dst].first) { + block = src[i_src]; + i_src ++; + } + else { + block = dst[i_dst]; + i_dst ++; + } + append_or_merge_block(result, block); + } + for(; i_src < src.size(); i_src++) { + append_or_merge_block(result, src[i_src]); + } + for(; i_dst < dst.size(); i_dst++) { + append_or_merge_block(result, dst[i_dst]); + } + return result; +} + void smpi_shared_free(void *ptr) { if (smpi_cfg_shared_malloc == shmalloc_local) { -- 2.20.1