Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Add bug-17132 tesh suite
authorPierre Veyre <pierre.veyre@cc.in2p3.fr>
Tue, 1 Apr 2014 06:25:59 +0000 (08:25 +0200)
committerPierre Veyre <pierre.veyre@cc.in2p3.fr>
Tue, 1 Apr 2014 06:25:59 +0000 (08:25 +0200)
teshsuite/bug-17132/CMakeLists.txt [new file with mode: 0644]
teshsuite/bug-17132/README [new file with mode: 0644]
teshsuite/bug-17132/bug-17132 [new file with mode: 0755]
teshsuite/bug-17132/bug-17132.c [new file with mode: 0644]
teshsuite/bug-17132/bug-17132.tesh [new file with mode: 0644]
teshsuite/bug-17132/hostfile.txt [new file with mode: 0644]
teshsuite/bug-17132/small_platform.xml [new file with mode: 0644]

diff --git a/teshsuite/bug-17132/CMakeLists.txt b/teshsuite/bug-17132/CMakeLists.txt
new file mode 100644 (file)
index 0000000..d2d8163
--- /dev/null
@@ -0,0 +1,37 @@
+cmake_minimum_required(VERSION 2.6)
+
+if(enable_smpi)
+  if(WIN32)
+      set(CMAKE_C_FLAGS "-include ${CMAKE_HOME_DIRECTORY}/include/smpi/smpi_main.h")
+  else()
+      set(CMAKE_C_COMPILER "${CMAKE_BINARY_DIR}/smpi_script/bin/smpicc")
+  endif()
+       
+       
+  set(EXECUTABLE_OUTPUT_PATH "${CMAKE_CURRENT_BINARY_DIR}")
+
+  add_executable(bug-17132 ${CMAKE_HOME_DIRECTORY}/teshsuite/bug-17132/bug-17132.c)
+  target_link_libraries(bug-17132 simgrid)
+
+  set(tesh_files
+    ${tesh_files}
+    ${CMAKE_CURRENT_SOURCE_DIR}/bug-17132.tesh
+    PARENT_SCOPE)
+  set(xml_files
+    ${xml_files}
+    ${CMAKE_CURRENT_SOURCE_DIR}/small_platform.xml
+    PARENT_SCOPE)
+  set(teshsuite_src
+    ${teshsuite_src}
+    ${CMAKE_CURRENT_SOURCE_DIR}/bug-17132.c
+    PARENT_SCOPE)
+  set(bin_files
+    ${bin_files}
+    PARENT_SCOPE)
+  set(txt_files
+    ${txt_files}
+    ${CMAKE_CURRENT_SOURCE_DIR}/hostfile.txt
+    ${CMAKE_CURRENT_SOURCE_DIR}/README
+    PARENT_SCOPE)
+    
+endif(enable_smpi)
diff --git a/teshsuite/bug-17132/README b/teshsuite/bug-17132/README
new file mode 100644 (file)
index 0000000..22e35b9
--- /dev/null
@@ -0,0 +1,8 @@
+This is the bug #17132 described on gforge [1]. This small SMPI code
+triggers an issue in SURF, which is still to be debugged.
+
+The problem seems to be related to the order of events, as changing it
+(with another platform or another message size or a MPI_barrier in
+between) fixes the problem.
+
+[1] https://gforge.inria.fr/tracker/index.php?func=detail&aid=17132&group_id=12&atid=165
\ No newline at end of file
diff --git a/teshsuite/bug-17132/bug-17132 b/teshsuite/bug-17132/bug-17132
new file mode 100755 (executable)
index 0000000..66d4254
Binary files /dev/null and b/teshsuite/bug-17132/bug-17132 differ
diff --git a/teshsuite/bug-17132/bug-17132.c b/teshsuite/bug-17132/bug-17132.c
new file mode 100644 (file)
index 0000000..cecb3e4
--- /dev/null
@@ -0,0 +1,40 @@
+#include "xbt/log.h"
+#include <stdio.h>
+#include <mpi.h>
+
+XBT_LOG_NEW_DEFAULT_CATEGORY(MM_mpi, "Messages for this SMPI test");
+
+int main(int argc, char ** argv)
+{
+  size_t err;
+  size_t M = 8*1024;
+  size_t N = 32*1024;
+
+  MPI_Init(&argc, &argv);
+
+  double *a = malloc(sizeof(double) * M);
+  double *b = malloc(sizeof(double) * N);
+
+  // A broadcast
+  err = MPI_Bcast(a, M, MPI_DOUBLE, 0, MPI_COMM_WORLD);
+  if (err != MPI_SUCCESS) {
+    perror("Error Bcast A\n"); MPI_Finalize(); exit(-1);
+  }
+
+//  Uncommenting this barrier fixes it!
+//  MPI_Barrier(MPI_COMM_WORLD);
+
+  // Another broadcast
+  err = MPI_Bcast(b, N, MPI_DOUBLE, 0, MPI_COMM_WORLD );
+  if (err != MPI_SUCCESS) {
+    perror("Error Bcast B\n"); MPI_Finalize(); exit(-1);
+  }
+
+  // Commenting out this barrier fixes it!!
+  MPI_Barrier(MPI_COMM_WORLD);
+
+  MPI_Finalize();
+  free(a);
+  free(b);
+  return 0;
+}
diff --git a/teshsuite/bug-17132/bug-17132.tesh b/teshsuite/bug-17132/bug-17132.tesh
new file mode 100644 (file)
index 0000000..d167f08
--- /dev/null
@@ -0,0 +1,8 @@
+$ ../../smpi_script/bin/smpirun -np 16 -platform ${srcdir:=.}/small_platform.xml -hostfile ${srcdir:=.}/hostfile.txt ./bug-17132 --cfg=smpi/cpu_threshold:-1 --log=smpi_kernel.thres:warning
+> You requested to use 16 processes, but there is only 2 processes in your hostfile...
+> [0.000000] [xbt_cfg/INFO] Configuration change: Set 'maxmin/precision' to '1e-9'
+> [0.000000] [xbt_cfg/INFO] Configuration change: Set 'network/model' to 'SMPI'
+> [0.000000] [xbt_cfg/INFO] Configuration change: Set 'network/TCP_gamma' to '4194304'
+> [0.000000] [xbt_cfg/INFO] Configuration change: Set 'smpi/cpu_threshold' to '-1'
+> [0.000000] [surf_config/INFO] Switching workstation model to compound since you changed the network and/or cpu model(s)
+> (some sensible output)
diff --git a/teshsuite/bug-17132/hostfile.txt b/teshsuite/bug-17132/hostfile.txt
new file mode 100644 (file)
index 0000000..994b3e2
--- /dev/null
@@ -0,0 +1,2 @@
+host1
+host2
diff --git a/teshsuite/bug-17132/small_platform.xml b/teshsuite/bug-17132/small_platform.xml
new file mode 100644 (file)
index 0000000..a2b420d
--- /dev/null
@@ -0,0 +1,15 @@
+<?xml version='1.0'?>
+<!DOCTYPE platform SYSTEM "http://simgrid.gforge.inria.fr/simgrid.dtd">
+<platform version='3'>
+ <AS id='AS0' routing='Full'>
+
+  <host id='host1' power='1.0'/>
+  <host id='host2' power='1.0'/>
+  <link id='link' bandwidth='1e+10' latency='1e-09'/>
+
+  <route src='host1' dst='host2' symmetrical="YES">
+       <link_ctn id='link'/>
+  </route>
+
+ </AS>
+</platform>