Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Fix tests for shared and partial-shared.
authorTom Cornebize <tom.cornebize@ensimag.grenoble-inp.fr>
Wed, 5 Apr 2017 12:14:03 +0000 (14:14 +0200)
committerTom Cornebize <tom.cornebize@ensimag.grenoble-inp.fr>
Wed, 5 Apr 2017 12:14:03 +0000 (14:14 +0200)
.gitignore
src/smpi/smpi_shared.cpp
teshsuite/smpi/CMakeLists.txt
teshsuite/smpi/macro-partial-shared/macro-partial-shared.c [new file with mode: 0644]
teshsuite/smpi/macro-partial-shared/macro-partial-shared.tesh [new file with mode: 0644]

index 7bd9666..891f4ad 100644 (file)
@@ -986,6 +986,7 @@ teshsuite/smpi/coll-reduce/coll-reduce
 teshsuite/smpi/coll-reduce-scatter/coll-reduce-scatter
 teshsuite/smpi/coll-scatter/coll-scatter
 teshsuite/smpi/macro-shared/macro-shared
 teshsuite/smpi/coll-reduce-scatter/coll-reduce-scatter
 teshsuite/smpi/coll-scatter/coll-scatter
 teshsuite/smpi/macro-shared/macro-shared
+teshsuite/smpi/macro-partial-shared/macro-partial-shared
 teshsuite/smpi/type-struct/type-struct
 teshsuite/smpi/type-vector/type-vector
 teshsuite/surf/lmm_usage/lmm_usage
 teshsuite/smpi/type-struct/type-struct
 teshsuite/smpi/type-vector/type-vector
 teshsuite/surf/lmm_usage/lmm_usage
index 7d7cced..ec76492 100644 (file)
@@ -202,11 +202,13 @@ void *smpi_shared_malloc_local(size_t size, const char *file, int line)
 }
 
 // Align functions, from http://stackoverflow.com/questions/4840410/how-to-align-a-pointer-in-c
 }
 
 // Align functions, from http://stackoverflow.com/questions/4840410/how-to-align-a-pointer-in-c
+#define PAGE_SIZE 0x1000
 #define ALIGN_UP(n, align) (((n) + (align)-1) & -(align))
 #define ALIGN_DOWN(n, align) ((n) & -(align))
 
 void *smpi_shared_malloc_global__(size_t size, const char *file, int line, int *shared_block_offsets, int nb_shared_blocks) {
   void *mem;
 #define ALIGN_UP(n, align) (((n) + (align)-1) & -(align))
 #define ALIGN_DOWN(n, align) ((n) & -(align))
 
 void *smpi_shared_malloc_global__(size_t size, const char *file, int line, int *shared_block_offsets, int nb_shared_blocks) {
   void *mem;
+  xbt_assert(smpi_shared_malloc_blocksize % PAGE_SIZE == 0, "The block size of shared malloc should be a multiple of the page size.");
   /* First reserve memory area */
   mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
 
   /* First reserve memory area */
   mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
 
@@ -233,10 +235,12 @@ void *smpi_shared_malloc_global__(size_t size, const char *file, int line, int *
 
   /* Map the bogus file in place of the anonymous memory */
   for(int i_block = 0; i_block < nb_shared_blocks; i_block ++) {
 
   /* Map the bogus file in place of the anonymous memory */
   for(int i_block = 0; i_block < nb_shared_blocks; i_block ++) {
-    int start_offset = ALIGN_UP(shared_block_offsets[2*i_block], smpi_shared_malloc_blocksize);
-    int stop_offset = ALIGN_DOWN(shared_block_offsets[2*i_block+1], smpi_shared_malloc_blocksize);
+    int start_offset = shared_block_offsets[2*i_block];
+    int stop_offset = shared_block_offsets[2*i_block+1];
+    int start_block_offset = ALIGN_UP(start_offset, smpi_shared_malloc_blocksize);
+    int stop_block_offset = ALIGN_DOWN(stop_offset, smpi_shared_malloc_blocksize);
     unsigned int i;
     unsigned int i;
-    for (i = start_offset / smpi_shared_malloc_blocksize; i < stop_offset / smpi_shared_malloc_blocksize; i++) {
+    for (i = start_block_offset / smpi_shared_malloc_blocksize; i < stop_block_offset / smpi_shared_malloc_blocksize; i++) {
       void* pos = (void*)((unsigned long)mem + i * smpi_shared_malloc_blocksize);
       void* res = mmap(pos, smpi_shared_malloc_blocksize, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED | MAP_POPULATE,
                        smpi_shared_malloc_bogusfile, 0);
       void* pos = (void*)((unsigned long)mem + i * smpi_shared_malloc_blocksize);
       void* res = mmap(pos, smpi_shared_malloc_blocksize, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED | MAP_POPULATE,
                        smpi_shared_malloc_bogusfile, 0);
@@ -245,6 +249,29 @@ void *smpi_shared_malloc_global__(size_t size, const char *file, int line, int *
                              "You can also try using  the sysctl vm.max_map_count",
                  strerror(errno));
     }
                              "You can also try using  the sysctl vm.max_map_count",
                  strerror(errno));
     }
+    int low_page_start_offset = ALIGN_UP(start_offset, PAGE_SIZE);
+    int low_page_stop_offset = start_block_offset < ALIGN_DOWN(stop_offset, PAGE_SIZE) ? start_block_offset : ALIGN_DOWN(stop_offset, PAGE_SIZE);
+    if(low_page_start_offset < low_page_stop_offset) {
+      void* pos = (void*)((unsigned long)mem + low_page_start_offset);
+      void* res = mmap(pos, low_page_stop_offset-low_page_start_offset, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED | MAP_POPULATE,
+                       smpi_shared_malloc_bogusfile, 0);
+      xbt_assert(res == pos, "Could not map folded virtual memory (%s). Do you perhaps need to increase the "
+                             "size of the mapped file using --cfg=smpi/shared-malloc-blocksize=newvalue (default 1048576) ?"
+                             "You can also try using  the sysctl vm.max_map_count",
+                 strerror(errno));
+    }
+    if(low_page_stop_offset <= stop_block_offset) {
+      int high_page_stop_offset = stop_offset == size ? size : ALIGN_DOWN(stop_offset, PAGE_SIZE);
+      if(high_page_stop_offset > stop_block_offset) {
+        void* pos = (void*)((unsigned long)mem + stop_block_offset);
+        void* res = mmap(pos, high_page_stop_offset-stop_block_offset, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED | MAP_POPULATE,
+                         smpi_shared_malloc_bogusfile, 0);
+        xbt_assert(res == pos, "Could not map folded virtual memory (%s). Do you perhaps need to increase the "
+                               "size of the mapped file using --cfg=smpi/shared-malloc-blocksize=newvalue (default 1048576) ?"
+                               "You can also try using  the sysctl vm.max_map_count",
+                   strerror(errno));
+      }
+    }
   }
 
   if(nb_shared_blocks == 1 && shared_block_offsets[0] == 0 && shared_block_offsets[1] == size) {
   }
 
   if(nb_shared_blocks == 1 && shared_block_offsets[0] == 0 && shared_block_offsets[1] == size) {
index 30ff3c6..36d1546 100644 (file)
@@ -21,14 +21,20 @@ if(enable_smpi)
     add_executable       (macro-shared macro-shared/macro-shared.c)
     target_link_libraries(macro-shared simgrid)
     set_target_properties(macro-shared PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/macro-shared)
     add_executable       (macro-shared macro-shared/macro-shared.c)
     target_link_libraries(macro-shared simgrid)
     set_target_properties(macro-shared PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/macro-shared)
+
+    add_executable       (macro-partial-shared macro-partial-shared/macro-partial-shared.c)
+    target_link_libraries(macro-partial-shared simgrid)
+    set_target_properties(macro-partial-shared PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/macro-partial-shared)
   endif()
 endif()
 
 set(teshsuite_src ${teshsuite_src}  ${CMAKE_CURRENT_SOURCE_DIR}/macro-shared/macro-shared.c                PARENT_SCOPE)
   endif()
 endif()
 
 set(teshsuite_src ${teshsuite_src}  ${CMAKE_CURRENT_SOURCE_DIR}/macro-shared/macro-shared.c                PARENT_SCOPE)
+set(teshsuite_src ${teshsuite_src}  ${CMAKE_CURRENT_SOURCE_DIR}/macro-partial-shared/macro-partial-shared.c                PARENT_SCOPE)
 set(tesh_files    ${tesh_files}     ${CMAKE_CURRENT_SOURCE_DIR}/coll-allreduce/coll-allreduce-large.tesh
                                     ${CMAKE_CURRENT_SOURCE_DIR}/coll-allreduce/coll-allreduce-automatic.tesh
                                     ${CMAKE_CURRENT_SOURCE_DIR}/coll-alltoall/clusters.tesh
                                     ${CMAKE_CURRENT_SOURCE_DIR}/macro-shared/macro-shared.tesh
 set(tesh_files    ${tesh_files}     ${CMAKE_CURRENT_SOURCE_DIR}/coll-allreduce/coll-allreduce-large.tesh
                                     ${CMAKE_CURRENT_SOURCE_DIR}/coll-allreduce/coll-allreduce-automatic.tesh
                                     ${CMAKE_CURRENT_SOURCE_DIR}/coll-alltoall/clusters.tesh
                                     ${CMAKE_CURRENT_SOURCE_DIR}/macro-shared/macro-shared.tesh
+                                    ${CMAKE_CURRENT_SOURCE_DIR}/macro-partial-shared/macro-partial-shared.tesh
                                     ${CMAKE_CURRENT_SOURCE_DIR}/pt2pt-pingpong/broken_hostfiles.tesh
                                     ${CMAKE_CURRENT_SOURCE_DIR}/pt2pt-pingpong/TI_output.tesh              PARENT_SCOPE)
 set(bin_files       ${bin_files}    ${CMAKE_CURRENT_SOURCE_DIR}/hostfile
                                     ${CMAKE_CURRENT_SOURCE_DIR}/pt2pt-pingpong/broken_hostfiles.tesh
                                     ${CMAKE_CURRENT_SOURCE_DIR}/pt2pt-pingpong/TI_output.tesh              PARENT_SCOPE)
 set(bin_files       ${bin_files}    ${CMAKE_CURRENT_SOURCE_DIR}/hostfile
@@ -39,6 +45,7 @@ set(bin_files       ${bin_files}    ${CMAKE_CURRENT_SOURCE_DIR}/hostfile
 if(enable_smpi)
   if(NOT WIN32)
     ADD_TESH_FACTORIES(tesh-smpi-macro-shared "thread;ucontext;raw;boost" --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/macro-shared --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/macro-shared macro-shared.tesh)
 if(enable_smpi)
   if(NOT WIN32)
     ADD_TESH_FACTORIES(tesh-smpi-macro-shared "thread;ucontext;raw;boost" --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/macro-shared --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/macro-shared macro-shared.tesh)
+    ADD_TESH_FACTORIES(tesh-smpi-macro-partial-shared "thread;ucontext;raw;boost" --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/macro-partial-shared --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/macro-partial-shared macro-partial-shared.tesh)
   endif()
 
   foreach(x coll-allgather coll-allgatherv coll-allreduce coll-alltoall coll-alltoallv coll-barrier coll-bcast 
   endif()
 
   foreach(x coll-allgather coll-allgatherv coll-allreduce coll-alltoall coll-alltoallv coll-barrier coll-bcast 
diff --git a/teshsuite/smpi/macro-partial-shared/macro-partial-shared.c b/teshsuite/smpi/macro-partial-shared/macro-partial-shared.c
new file mode 100644 (file)
index 0000000..2daeb04
--- /dev/null
@@ -0,0 +1,93 @@
+/* Copyright (c) 2009-2015. The SimGrid Team.
+ * All rights reserved.                                                     */
+
+/* This program is free software; you can redistribute it and/or modify it
+ * under the terms of the license (GNU LGPL) which comes with this package. */
+
+/* This example should be instructive to learn about SMPI_SHARED_CALL */
+
+#include <stdio.h>
+#include <mpi.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+// Return the number of occurences of the given value between buf[start] and buf[stop-1].
+int count_all(uint8_t *buf, int start, int stop, uint8_t value) {
+  int occ = 0;
+  for(int i = start ; i < stop ; i++) {
+    if(buf[i] == value) {
+      occ ++;
+    }
+  }
+  return occ;
+}
+
+// Return true iff the values from buf[start] to buf[stop-1] are all equal to value.
+int check_all(uint8_t *buf, int start, int stop, uint8_t value) {
+  int occ = count_all(buf, start, stop, value);
+  return occ == stop-start;
+}
+
+// Return true iff "enough" occurences of the given value are between buf[start] and buf[stop-1].
+int check_enough(uint8_t *buf, int start, int stop, uint8_t value) {
+  int page_size = 0x1000;
+  int size = stop-start;
+  if(size <= 2*page_size) // we are not sure to have a whole page that is shared
+    return 1;
+  int occ = count_all(buf, start, stop, value);
+  return occ >= size - 2*page_size;
+}
+
+int main(int argc, char *argv[])
+{
+  MPI_Init(&argc, &argv);
+  int rank;
+  int size;
+  int mem_size = 0x10000000;
+  int shared_blocks[] = {
+    0,        0x1234567,
+    0x3456789, 0x3457890,
+    0x4444444, 0x5555555,
+    0x8000000, 0x10000000
+  };
+  int nb_blocks = (sizeof(shared_blocks)/sizeof(int))/2;
+  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+  MPI_Comm_size(MPI_COMM_WORLD, &size);
+  //Let's Allocate a shared memory buffer
+  uint8_t *buf;
+  buf = SMPI_PARTIAL_SHARED_MALLOC(mem_size, shared_blocks, nb_blocks);
+  memset(buf, 0, mem_size);
+  MPI_Barrier(MPI_COMM_WORLD);
+
+  // Process 0 write in shared blocks
+  if(rank == 0) {
+    for(int i = 0; i < nb_blocks; i++) {
+      int start = shared_blocks[2*i];
+      int stop = shared_blocks[2*i+1];
+      memset(buf+start, 42, stop-start);
+    }
+  }
+  MPI_Barrier(MPI_COMM_WORLD);
+  // All processes check that their shared blocks have been written (at least partially)
+  for(int i = 0; i < nb_blocks; i++) {
+    int start = shared_blocks[2*i];
+    int stop = shared_blocks[2*i+1];
+    int is_shared = check_enough(buf, start, stop, 42);
+    printf("[%d] The result of the shared check for block (0x%x, 0x%x) is: %d\n", rank, start, stop, is_shared);
+  }
+
+
+  // Check the private blocks
+  MPI_Barrier(MPI_COMM_WORLD);
+  for(int i = 0; i < nb_blocks-1; i++) {
+    int start = shared_blocks[2*i+1];
+    int stop = shared_blocks[2*i+2];
+    int is_private = check_all(buf, start, stop, 0);
+    printf("[%d] The result of the private check for block (0x%x, 0x%x) is: %d\n", rank, start, stop, is_private);
+  }
+
+  SMPI_SHARED_FREE(buf);
+
+  MPI_Finalize();
+  return 0;
+}
diff --git a/teshsuite/smpi/macro-partial-shared/macro-partial-shared.tesh b/teshsuite/smpi/macro-partial-shared/macro-partial-shared.tesh
new file mode 100644 (file)
index 0000000..db11ea3
--- /dev/null
@@ -0,0 +1,33 @@
+p Test compute
+! setenv LD_LIBRARY_PATH=../../lib
+! output sort
+! timeout 5
+$ ${bindir:=.}/../../../bin/smpirun -hostfile ../hostfile -platform ../../../examples/platforms/small_platform.xml -np 4 ${bindir:=.}/macro-partial-shared --log=smpi_kernel.thres:warning --log=xbt_cfg.thres:warning
+> [0] The result of the private check for block (0x1234567, 0x3456789) is: 1
+> [0] The result of the private check for block (0x3457890, 0x4444444) is: 1
+> [0] The result of the private check for block (0x5555555, 0x8000000) is: 1
+> [0] The result of the shared check for block (0x0, 0x1234567) is: 1
+> [0] The result of the shared check for block (0x3456789, 0x3457890) is: 1
+> [0] The result of the shared check for block (0x4444444, 0x5555555) is: 1
+> [0] The result of the shared check for block (0x8000000, 0x10000000) is: 1
+> [1] The result of the private check for block (0x1234567, 0x3456789) is: 1
+> [1] The result of the private check for block (0x3457890, 0x4444444) is: 1
+> [1] The result of the private check for block (0x5555555, 0x8000000) is: 1
+> [1] The result of the shared check for block (0x0, 0x1234567) is: 1
+> [1] The result of the shared check for block (0x3456789, 0x3457890) is: 1
+> [1] The result of the shared check for block (0x4444444, 0x5555555) is: 1
+> [1] The result of the shared check for block (0x8000000, 0x10000000) is: 1
+> [2] The result of the private check for block (0x1234567, 0x3456789) is: 1
+> [2] The result of the private check for block (0x3457890, 0x4444444) is: 1
+> [2] The result of the private check for block (0x5555555, 0x8000000) is: 1
+> [2] The result of the shared check for block (0x0, 0x1234567) is: 1
+> [2] The result of the shared check for block (0x3456789, 0x3457890) is: 1
+> [2] The result of the shared check for block (0x4444444, 0x5555555) is: 1
+> [2] The result of the shared check for block (0x8000000, 0x10000000) is: 1
+> [3] The result of the private check for block (0x1234567, 0x3456789) is: 1
+> [3] The result of the private check for block (0x3457890, 0x4444444) is: 1
+> [3] The result of the private check for block (0x5555555, 0x8000000) is: 1
+> [3] The result of the shared check for block (0x0, 0x1234567) is: 1
+> [3] The result of the shared check for block (0x3456789, 0x3457890) is: 1
+> [3] The result of the shared check for block (0x4444444, 0x5555555) is: 1
+> [3] The result of the shared check for block (0x8000000, 0x10000000) is: 1