From b16e6e644a2b398d2618a70c1b2f49c94c65d73c Mon Sep 17 00:00:00 2001 From: Tom Cornebize Date: Fri, 7 Apr 2017 14:32:15 +0200 Subject: [PATCH] Fix the communication optimization. --- src/smpi/smpi_shared.cpp | 3 +- .../macro-partial-shared-communication.c | 25 +++++++++++++++- .../macro-partial-shared-communication.tesh | 30 ++++++++++++------- 3 files changed, 46 insertions(+), 12 deletions(-) diff --git a/src/smpi/smpi_shared.cpp b/src/smpi/smpi_shared.cpp index 0aacb65c31..e629da93ea 100644 --- a/src/smpi/smpi_shared.cpp +++ b/src/smpi/smpi_shared.cpp @@ -353,7 +353,8 @@ int smpi_is_shared(void* ptr, std::vector> &private_blocks, return 0; low --; if (ptr < (char*)low->first + low->second.size) { - *offset = ((uint8_t*) low->first) - ((uint8_t*)ptr); + xbt_assert(ptr > (char*)low->first, "Oops, there seems to be a bug in the shared memory metadata."); + *offset = ((uint8_t*)ptr) - ((uint8_t*) low->first); private_blocks = low->second.private_blocks; return 1; } diff --git a/teshsuite/smpi/macro-partial-shared-communication/macro-partial-shared-communication.c b/teshsuite/smpi/macro-partial-shared-communication/macro-partial-shared-communication.c index f5461c4208..2a93238083 100644 --- a/teshsuite/smpi/macro-partial-shared-communication/macro-partial-shared-communication.c +++ b/teshsuite/smpi/macro-partial-shared-communication/macro-partial-shared-communication.c @@ -91,7 +91,30 @@ int main(int argc, char *argv[]) int start = shared_blocks[2*i+1]; int stop = shared_blocks[2*i+2]; int comm = check_all(buf, start, stop, rank-1); - printf("[%d] The result of the communication check for block (0x%x, 0x%x) is: %d\n", rank, start, stop, comm); + printf("[%d] The result of the (normal) communication check for block (0x%x, 0x%x) is: %d\n", rank, start, stop, comm); + } + memset(buf, rank, mem_size); + } + + MPI_Barrier(MPI_COMM_WORLD); + + // Then, even processes send a sub-part of their buffer their successor + // Note that the part (0, 0x10000) which is not sent is a shared part, so we do not care + if(rank%2 == 0) { + MPI_Send(buf+0x10000, mem_size-0x10000, MPI_UINT8_T, rank+1, 0, MPI_COMM_WORLD); + } + else { + MPI_Recv(buf+0x10000, mem_size-0x10000, MPI_UINT8_T, rank-1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + + + // Odd processes verify that they successfully received the message + if(rank%2 == 1) { + for(int i = 0; i < nb_blocks-1; i++) { + int start = shared_blocks[2*i+1]; + int stop = shared_blocks[2*i+2]; + int comm = check_all(buf, start, stop, rank-1); + printf("[%d] The result of the (shifted) communication check for block (0x%x, 0x%x) is: %d\n", rank, start, stop, comm); } } diff --git a/teshsuite/smpi/macro-partial-shared-communication/macro-partial-shared-communication.tesh b/teshsuite/smpi/macro-partial-shared-communication/macro-partial-shared-communication.tesh index c129aca836..2cf32cfed5 100644 --- a/teshsuite/smpi/macro-partial-shared-communication/macro-partial-shared-communication.tesh +++ b/teshsuite/smpi/macro-partial-shared-communication/macro-partial-shared-communication.tesh @@ -3,13 +3,23 @@ p Test compute ! output sort ! timeout 5 $ ${bindir:=.}/../../../bin/smpirun -hostfile ../hostfile -platform ../../../examples/platforms/small_platform.xml -np 4 ${bindir:=.}/macro-partial-shared-communication --log=smpi_kernel.thres:warning --log=xbt_cfg.thres:warning -> [3] The result of the communication check for block (0x1234567, 0x1300000) is: 1 -> [3] The result of the communication check for block (0x1300010, 0x3456789) is: 1 -> [3] The result of the communication check for block (0x3457890, 0x4444444) is: 1 -> [3] The result of the communication check for block (0x5555555, 0x5555565) is: 1 -> [3] The result of the communication check for block (0x5600000, 0x8000000) is: 1 -> [1] The result of the communication check for block (0x1234567, 0x1300000) is: 1 -> [1] The result of the communication check for block (0x1300010, 0x3456789) is: 1 -> [1] The result of the communication check for block (0x3457890, 0x4444444) is: 1 -> [1] The result of the communication check for block (0x5555555, 0x5555565) is: 1 -> [1] The result of the communication check for block (0x5600000, 0x8000000) is: 1 +> [3] The result of the (normal) communication check for block (0x1234567, 0x1300000) is: 1 +> [3] The result of the (normal) communication check for block (0x1300010, 0x3456789) is: 1 +> [3] The result of the (normal) communication check for block (0x3457890, 0x4444444) is: 1 +> [3] The result of the (normal) communication check for block (0x5555555, 0x5555565) is: 1 +> [3] The result of the (normal) communication check for block (0x5600000, 0x8000000) is: 1 +> [1] The result of the (normal) communication check for block (0x1234567, 0x1300000) is: 1 +> [1] The result of the (normal) communication check for block (0x1300010, 0x3456789) is: 1 +> [1] The result of the (normal) communication check for block (0x3457890, 0x4444444) is: 1 +> [1] The result of the (normal) communication check for block (0x5555555, 0x5555565) is: 1 +> [1] The result of the (normal) communication check for block (0x5600000, 0x8000000) is: 1 +> [3] The result of the (shifted) communication check for block (0x1234567, 0x1300000) is: 1 +> [3] The result of the (shifted) communication check for block (0x1300010, 0x3456789) is: 1 +> [3] The result of the (shifted) communication check for block (0x3457890, 0x4444444) is: 1 +> [3] The result of the (shifted) communication check for block (0x5555555, 0x5555565) is: 1 +> [3] The result of the (shifted) communication check for block (0x5600000, 0x8000000) is: 1 +> [1] The result of the (shifted) communication check for block (0x1234567, 0x1300000) is: 1 +> [1] The result of the (shifted) communication check for block (0x1300010, 0x3456789) is: 1 +> [1] The result of the (shifted) communication check for block (0x3457890, 0x4444444) is: 1 +> [1] The result of the (shifted) communication check for block (0x5555555, 0x5555565) is: 1 +> [1] The result of the (shifted) communication check for block (0x5600000, 0x8000000) is: 1 -- 2.20.1