teshsuite/smpi/coll-reduce-scatter/coll-reduce-scatter
teshsuite/smpi/coll-scatter/coll-scatter
teshsuite/smpi/macro-shared/macro-shared
+teshsuite/smpi/macro-partial-shared/macro-partial-shared
teshsuite/smpi/type-struct/type-struct
teshsuite/smpi/type-vector/type-vector
teshsuite/surf/lmm_usage/lmm_usage
}
// Align functions, from http://stackoverflow.com/questions/4840410/how-to-align-a-pointer-in-c
+#define PAGE_SIZE 0x1000
#define ALIGN_UP(n, align) (((n) + (align)-1) & -(align))
#define ALIGN_DOWN(n, align) ((n) & -(align))
void *smpi_shared_malloc_global__(size_t size, const char *file, int line, int *shared_block_offsets, int nb_shared_blocks) {
void *mem;
+ xbt_assert(smpi_shared_malloc_blocksize % PAGE_SIZE == 0, "The block size of shared malloc should be a multiple of the page size.");
/* First reserve memory area */
mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
/* Map the bogus file in place of the anonymous memory */
for(int i_block = 0; i_block < nb_shared_blocks; i_block ++) {
- int start_offset = ALIGN_UP(shared_block_offsets[2*i_block], smpi_shared_malloc_blocksize);
- int stop_offset = ALIGN_DOWN(shared_block_offsets[2*i_block+1], smpi_shared_malloc_blocksize);
+ int start_offset = shared_block_offsets[2*i_block];
+ int stop_offset = shared_block_offsets[2*i_block+1];
+ int start_block_offset = ALIGN_UP(start_offset, smpi_shared_malloc_blocksize);
+ int stop_block_offset = ALIGN_DOWN(stop_offset, smpi_shared_malloc_blocksize);
unsigned int i;
- for (i = start_offset / smpi_shared_malloc_blocksize; i < stop_offset / smpi_shared_malloc_blocksize; i++) {
+ for (i = start_block_offset / smpi_shared_malloc_blocksize; i < stop_block_offset / smpi_shared_malloc_blocksize; i++) {
void* pos = (void*)((unsigned long)mem + i * smpi_shared_malloc_blocksize);
void* res = mmap(pos, smpi_shared_malloc_blocksize, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED | MAP_POPULATE,
smpi_shared_malloc_bogusfile, 0);
"You can also try using the sysctl vm.max_map_count",
strerror(errno));
}
+ int low_page_start_offset = ALIGN_UP(start_offset, PAGE_SIZE);
+ int low_page_stop_offset = start_block_offset < ALIGN_DOWN(stop_offset, PAGE_SIZE) ? start_block_offset : ALIGN_DOWN(stop_offset, PAGE_SIZE);
+ if(low_page_start_offset < low_page_stop_offset) {
+ void* pos = (void*)((unsigned long)mem + low_page_start_offset);
+ void* res = mmap(pos, low_page_stop_offset-low_page_start_offset, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED | MAP_POPULATE,
+ smpi_shared_malloc_bogusfile, 0);
+ xbt_assert(res == pos, "Could not map folded virtual memory (%s). Do you perhaps need to increase the "
+ "size of the mapped file using --cfg=smpi/shared-malloc-blocksize=newvalue (default 1048576) ?"
+ "You can also try using the sysctl vm.max_map_count",
+ strerror(errno));
+ }
+ if(low_page_stop_offset <= stop_block_offset) {
+ int high_page_stop_offset = stop_offset == size ? size : ALIGN_DOWN(stop_offset, PAGE_SIZE);
+ if(high_page_stop_offset > stop_block_offset) {
+ void* pos = (void*)((unsigned long)mem + stop_block_offset);
+ void* res = mmap(pos, high_page_stop_offset-stop_block_offset, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED | MAP_POPULATE,
+ smpi_shared_malloc_bogusfile, 0);
+ xbt_assert(res == pos, "Could not map folded virtual memory (%s). Do you perhaps need to increase the "
+ "size of the mapped file using --cfg=smpi/shared-malloc-blocksize=newvalue (default 1048576) ?"
+ "You can also try using the sysctl vm.max_map_count",
+ strerror(errno));
+ }
+ }
}
if(nb_shared_blocks == 1 && shared_block_offsets[0] == 0 && shared_block_offsets[1] == size) {
add_executable (macro-shared macro-shared/macro-shared.c)
target_link_libraries(macro-shared simgrid)
set_target_properties(macro-shared PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/macro-shared)
+
+ add_executable (macro-partial-shared macro-partial-shared/macro-partial-shared.c)
+ target_link_libraries(macro-partial-shared simgrid)
+ set_target_properties(macro-partial-shared PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/macro-partial-shared)
endif()
endif()
set(teshsuite_src ${teshsuite_src} ${CMAKE_CURRENT_SOURCE_DIR}/macro-shared/macro-shared.c PARENT_SCOPE)
+set(teshsuite_src ${teshsuite_src} ${CMAKE_CURRENT_SOURCE_DIR}/macro-partial-shared/macro-partial-shared.c PARENT_SCOPE)
set(tesh_files ${tesh_files} ${CMAKE_CURRENT_SOURCE_DIR}/coll-allreduce/coll-allreduce-large.tesh
${CMAKE_CURRENT_SOURCE_DIR}/coll-allreduce/coll-allreduce-automatic.tesh
${CMAKE_CURRENT_SOURCE_DIR}/coll-alltoall/clusters.tesh
${CMAKE_CURRENT_SOURCE_DIR}/macro-shared/macro-shared.tesh
+ ${CMAKE_CURRENT_SOURCE_DIR}/macro-partial-shared/macro-partial-shared.tesh
${CMAKE_CURRENT_SOURCE_DIR}/pt2pt-pingpong/broken_hostfiles.tesh
${CMAKE_CURRENT_SOURCE_DIR}/pt2pt-pingpong/TI_output.tesh PARENT_SCOPE)
set(bin_files ${bin_files} ${CMAKE_CURRENT_SOURCE_DIR}/hostfile
if(enable_smpi)
if(NOT WIN32)
ADD_TESH_FACTORIES(tesh-smpi-macro-shared "thread;ucontext;raw;boost" --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/macro-shared --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/macro-shared macro-shared.tesh)
+ ADD_TESH_FACTORIES(tesh-smpi-macro-partial-shared "thread;ucontext;raw;boost" --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/macro-partial-shared --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/macro-partial-shared macro-partial-shared.tesh)
endif()
foreach(x coll-allgather coll-allgatherv coll-allreduce coll-alltoall coll-alltoallv coll-barrier coll-bcast
--- /dev/null
+/* Copyright (c) 2009-2015. The SimGrid Team.
+ * All rights reserved. */
+
+/* This program is free software; you can redistribute it and/or modify it
+ * under the terms of the license (GNU LGPL) which comes with this package. */
+
+/* This example should be instructive to learn about SMPI_SHARED_CALL */
+
+#include <stdio.h>
+#include <mpi.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+// Return the number of occurences of the given value between buf[start] and buf[stop-1].
+int count_all(uint8_t *buf, int start, int stop, uint8_t value) {
+ int occ = 0;
+ for(int i = start ; i < stop ; i++) {
+ if(buf[i] == value) {
+ occ ++;
+ }
+ }
+ return occ;
+}
+
+// Return true iff the values from buf[start] to buf[stop-1] are all equal to value.
+int check_all(uint8_t *buf, int start, int stop, uint8_t value) {
+ int occ = count_all(buf, start, stop, value);
+ return occ == stop-start;
+}
+
+// Return true iff "enough" occurences of the given value are between buf[start] and buf[stop-1].
+int check_enough(uint8_t *buf, int start, int stop, uint8_t value) {
+ int page_size = 0x1000;
+ int size = stop-start;
+ if(size <= 2*page_size) // we are not sure to have a whole page that is shared
+ return 1;
+ int occ = count_all(buf, start, stop, value);
+ return occ >= size - 2*page_size;
+}
+
+int main(int argc, char *argv[])
+{
+ MPI_Init(&argc, &argv);
+ int rank;
+ int size;
+ int mem_size = 0x10000000;
+ int shared_blocks[] = {
+ 0, 0x1234567,
+ 0x3456789, 0x3457890,
+ 0x4444444, 0x5555555,
+ 0x8000000, 0x10000000
+ };
+ int nb_blocks = (sizeof(shared_blocks)/sizeof(int))/2;
+ MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+ MPI_Comm_size(MPI_COMM_WORLD, &size);
+ //Let's Allocate a shared memory buffer
+ uint8_t *buf;
+ buf = SMPI_PARTIAL_SHARED_MALLOC(mem_size, shared_blocks, nb_blocks);
+ memset(buf, 0, mem_size);
+ MPI_Barrier(MPI_COMM_WORLD);
+
+ // Process 0 write in shared blocks
+ if(rank == 0) {
+ for(int i = 0; i < nb_blocks; i++) {
+ int start = shared_blocks[2*i];
+ int stop = shared_blocks[2*i+1];
+ memset(buf+start, 42, stop-start);
+ }
+ }
+ MPI_Barrier(MPI_COMM_WORLD);
+ // All processes check that their shared blocks have been written (at least partially)
+ for(int i = 0; i < nb_blocks; i++) {
+ int start = shared_blocks[2*i];
+ int stop = shared_blocks[2*i+1];
+ int is_shared = check_enough(buf, start, stop, 42);
+ printf("[%d] The result of the shared check for block (0x%x, 0x%x) is: %d\n", rank, start, stop, is_shared);
+ }
+
+
+ // Check the private blocks
+ MPI_Barrier(MPI_COMM_WORLD);
+ for(int i = 0; i < nb_blocks-1; i++) {
+ int start = shared_blocks[2*i+1];
+ int stop = shared_blocks[2*i+2];
+ int is_private = check_all(buf, start, stop, 0);
+ printf("[%d] The result of the private check for block (0x%x, 0x%x) is: %d\n", rank, start, stop, is_private);
+ }
+
+ SMPI_SHARED_FREE(buf);
+
+ MPI_Finalize();
+ return 0;
+}
--- /dev/null
+p Test compute
+! setenv LD_LIBRARY_PATH=../../lib
+! output sort
+! timeout 5
+$ ${bindir:=.}/../../../bin/smpirun -hostfile ../hostfile -platform ../../../examples/platforms/small_platform.xml -np 4 ${bindir:=.}/macro-partial-shared --log=smpi_kernel.thres:warning --log=xbt_cfg.thres:warning
+> [0] The result of the private check for block (0x1234567, 0x3456789) is: 1
+> [0] The result of the private check for block (0x3457890, 0x4444444) is: 1
+> [0] The result of the private check for block (0x5555555, 0x8000000) is: 1
+> [0] The result of the shared check for block (0x0, 0x1234567) is: 1
+> [0] The result of the shared check for block (0x3456789, 0x3457890) is: 1
+> [0] The result of the shared check for block (0x4444444, 0x5555555) is: 1
+> [0] The result of the shared check for block (0x8000000, 0x10000000) is: 1
+> [1] The result of the private check for block (0x1234567, 0x3456789) is: 1
+> [1] The result of the private check for block (0x3457890, 0x4444444) is: 1
+> [1] The result of the private check for block (0x5555555, 0x8000000) is: 1
+> [1] The result of the shared check for block (0x0, 0x1234567) is: 1
+> [1] The result of the shared check for block (0x3456789, 0x3457890) is: 1
+> [1] The result of the shared check for block (0x4444444, 0x5555555) is: 1
+> [1] The result of the shared check for block (0x8000000, 0x10000000) is: 1
+> [2] The result of the private check for block (0x1234567, 0x3456789) is: 1
+> [2] The result of the private check for block (0x3457890, 0x4444444) is: 1
+> [2] The result of the private check for block (0x5555555, 0x8000000) is: 1
+> [2] The result of the shared check for block (0x0, 0x1234567) is: 1
+> [2] The result of the shared check for block (0x3456789, 0x3457890) is: 1
+> [2] The result of the shared check for block (0x4444444, 0x5555555) is: 1
+> [2] The result of the shared check for block (0x8000000, 0x10000000) is: 1
+> [3] The result of the private check for block (0x1234567, 0x3456789) is: 1
+> [3] The result of the private check for block (0x3457890, 0x4444444) is: 1
+> [3] The result of the private check for block (0x5555555, 0x8000000) is: 1
+> [3] The result of the shared check for block (0x0, 0x1234567) is: 1
+> [3] The result of the shared check for block (0x3456789, 0x3457890) is: 1
+> [3] The result of the shared check for block (0x4444444, 0x5555555) is: 1
+> [3] The result of the shared check for block (0x8000000, 0x10000000) is: 1