Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Merge branch 'master' of scm.gforge.inria.fr:/gitroot/simgrid/simgrid
authorMartin Quinson <martin.quinson@loria.fr>
Thu, 27 Aug 2015 17:46:10 +0000 (19:46 +0200)
committerMartin Quinson <martin.quinson@loria.fr>
Thu, 27 Aug 2015 17:46:10 +0000 (19:46 +0200)
13 files changed:
.gitignore
examples/smpi/mc/only_send_deterministic.tesh
src/mc/mc_request.cpp
src/smpi/smpi_base.c
src/smpi/smpirun.in
teshsuite/mc/CMakeLists.txt
teshsuite/mc/mutex_handling.c [new file with mode: 0644]
teshsuite/mc/mutex_handling.tesh [new file with mode: 0644]
teshsuite/mc/mutex_handling.xml [new file with mode: 0644]
teshsuite/mc/no_mutex_handling.tesh [new file with mode: 0644]
tools/cmake/AddTests.cmake
tools/fix-paje-trace.sh
tools/tesh/tesh.pl

index 458db25..6dff2de 100644 (file)
@@ -17,6 +17,7 @@
 
 ### cmake
 CTestCustom.cmake
+CTestResults.xml
 
 ### Maintainer mode
 src/simdag/dax_dtd.l
index 6484cc9..c315df9 100644 (file)
@@ -7,8 +7,8 @@ $ ../../../smpi_script/bin/smpirun -wrapper "${bindir:=.}/../../../bin/simgrid-m
 > [0.000000] [mc_global/INFO] **** Only-send-deterministic communication pattern ****
 > [0.000000] [mc_global/INFO] ******************************************************
 > [0.000000] [mc_global/INFO] The recv communications pattern of the process 0 is different! Different source for communication #2
-> [0.000000] [mc_global/INFO] Expanded states = 1025
-> [0.000000] [mc_global/INFO] Visited states = 3640
-> [0.000000] [mc_global/INFO] Executed transitions = 3360
+> [0.000000] [mc_global/INFO] Expanded states = 520
+> [0.000000] [mc_global/INFO] Visited states = 1476
+> [0.000000] [mc_global/INFO] Executed transitions = 1312
 > [0.000000] [mc_global/INFO] Send-deterministic : Yes
 > [0.000000] [mc_global/INFO] Recv-deterministic : No
index 4dfc427..4503597 100644 (file)
@@ -650,7 +650,7 @@ char *MC_request_get_dot_output(smx_simcall_t req, int value)
     break;
 
   case SIMCALL_MUTEX_LOCK:
-    label = bprintf("[(%lu)] Mutex LOCK", req->issuer->pid);
+    label = bprintf("[(%lu)] Mutex LOCK", issuer->pid);
     break;
 
   case SIMCALL_MC_RANDOM:
index a61c0e5..da028de 100644 (file)
@@ -341,11 +341,17 @@ void smpi_mpi_start(MPI_Request request)
 
   if (request->flags & RECV) {
     print_request("New recv", request);
-        
-    xbt_mutex_t mut=smpi_process_mailboxes_mutex();
-    xbt_mutex_acquire(mut);
-    
-    if (request->flags & RMA || request->size < sg_cfg_get_int("smpi/async_small_thres")){
+
+    int async_small_thres = sg_cfg_get_int("smpi/async_small_thres");
+
+    xbt_mutex_t mut = smpi_process_mailboxes_mutex();
+    if (async_small_thres != 0 ||request->flags & RMA)
+      xbt_mutex_acquire(mut);
+
+    if (async_small_thres == 0 && !(request->flags & RMA)) {
+      mailbox = smpi_process_mailbox();
+    }
+    else if (request->flags & RMA || request->size < async_small_thres){
     //We have to check both mailboxes (because SSEND messages are sent to the large mbox). begin with the more appropriate one : the small one.
       mailbox = smpi_process_mailbox_small();
       XBT_DEBUG("Is there a corresponding send already posted in the small mailbox %p (in case of SSEND)?", mailbox);
@@ -393,7 +399,8 @@ void smpi_mpi_start(MPI_Request request)
                                          request, -1.0);
         XBT_DEBUG("recv simcall posted");
 
-    xbt_mutex_release(mut);
+    if (async_small_thres != 0 || request->flags & RMA)
+      xbt_mutex_release(mut);
   } else {
 
 
@@ -416,11 +423,18 @@ void smpi_mpi_start(MPI_Request request)
         simcall_process_sleep(sleeptime);
         XBT_DEBUG("sending size of %zu : sleep %f ", request->size, smpi_os(request->size));
     }
-    
+
+    int async_small_thres = sg_cfg_get_int("smpi/async_small_thres");
+
     xbt_mutex_t mut=smpi_process_remote_mailboxes_mutex(receiver);
-    xbt_mutex_acquire(mut);
-    
-    if (request->flags & RMA || request->size < sg_cfg_get_int("smpi/async_small_thres")) { // eager mode
+
+    if (async_small_thres != 0 || request->flags & RMA)
+      xbt_mutex_acquire(mut);
+
+    if (!(async_small_thres != 0 || request->flags & RMA)) {
+      mailbox = smpi_process_remote_mailbox(receiver);
+    }
+    else if (request->flags & RMA || request->size < async_small_thres) { // eager mode
       mailbox = smpi_process_remote_mailbox(receiver);
       XBT_DEBUG("Is there a corresponding recv already posted in the large mailbox %p?", mailbox);
       smx_synchro_t action = simcall_comm_iprobe(mailbox, 1,request->dst, request->tag, &match_send, (void*)request);
@@ -489,7 +503,8 @@ void smpi_mpi_start(MPI_Request request)
     if (request->action)
        simcall_set_category(request->action, TRACE_internal_smpi_get_category());
 
-    xbt_mutex_release(mut);
+    if (async_small_thres != 0 || request->flags & RMA)
+      xbt_mutex_release(mut);
   }
 
 }
index 812acdb..f27daa8 100755 (executable)
@@ -45,7 +45,11 @@ Options:
   -trace-viva                # generate configuration for Viva's GraphView
   -trace-file <tracefile>    # name of the tracefile (simgrid_smpi.trace)
   -ext <value>               # additional parameter (reserved)
-  
+  -foreground                # run the child process in the foreground:
+                             # - it can access the terminal;
+                             # - but we cannot stop it when we receive a signal.
+                             # This is useful with -wrapper "gdb --args".
+
   -version                   # Displays the SimGrid version (human readable)
   -git-version               # Displays the git hash of SimGrid
 
@@ -65,6 +69,58 @@ fi
 EXTOPT=""
 WRAPPER=""
 HOSTFILE=""
+HOSTFILETMP=0
+
+unset pid
+unset foreground ; foreground=0
+
+trapped_signals="HUP INT QUIT ILL ABRT SEGV FPE ALRM TERM USR1 USR2 BUS"
+
+smpirun_cleanup()
+{
+  if [ -z "${KEEP}" ] ; then
+      if [ -z "${PLATFORM}" -a -n "$PLATFORMTMP" ]; then
+        rm -f ${PLATFORMTMP}
+        PLATFORMTMP=""
+      fi
+      if [ ${HOSTFILETMP} = 1 -a -n "$HOSTFILE" ] ; then
+          rm -f ${HOSTFILE}
+          HOSTFILE=""
+      fi
+      if [ ${UNROLLEDHOSTFILETMP} = 1 -a -n "$UNROLLEDHOSTFILE" ] ; then
+          rm -f ${UNROLLEDHOSTFILE}
+          UNROLLEDHOSTFILE=""
+      fi
+      if [ -n ${APPLICATIONTMP} ]; then
+        rm -f ${APPLICATIONTMP}
+        APPLICATIONTMP=""
+      fi
+  fi
+}
+
+smpirun_trap() {
+  local sig
+  sig="$1"
+
+  # Cleanup and kill the child process:
+  smpirun_cleanup
+  if ! [ -z "$pid" ]; then
+    kill -TERM $pid
+  fi
+  unset pid
+
+  # Raise the same signal again (remove the traps first):
+  trap - $trapped_signals
+  kill -$sig $$
+
+  # This should never happen:
+  kill -ABRT $$
+  kill -TERM $$
+}
+
+for s in $trapped_signals; do
+  trap "smpirun_trap $s" $s
+done
 
 while true; do
     case "$1" in
@@ -189,6 +245,10 @@ while true; do
             done
             shift 1
             ;;
+        "-foreground")
+            foreground=1
+            shift 1
+            ;;
         *)
             break
             ;;
@@ -229,7 +289,6 @@ if [ -z "${HOSTFILE}" ] && [ -z "${PLATFORM}" ] ; then
     exit 1
 fi
 
-HOSTFILETMP=0
 if [ -z "${HOSTFILE}" ] ; then
     HOSTFILETMP=1
     HOSTFILE="$(mktemp tmphostXXXXXX)"
@@ -458,20 +517,17 @@ if [ -n "${KEEP}" ] ; then
         echo "Generated unrolled hostfile ${UNROLLEDHOSTFILE} keeped." 
     fi
 fi
-${EXEC} ${TRACEOPTIONS} ${SIMOPTS} ${PLATFORMTMP} ${APPLICATIONTMP}
-status=$?
-
-if [ -z "${KEEP}" ] ; then
-    if [ -z "${PLATFORM}" ]; then
-       rm ${PLATFORMTMP}
-    fi
-    if [ ${HOSTFILETMP} = 1 ] ; then
-        rm ${HOSTFILE}
-    fi
-    if [ ${UNROLLEDHOSTFILETMP} = 1 ] ; then
-        rm ${UNROLLEDHOSTFILE}
-    fi
-    rm ${APPLICATIONTMP}
+if [ "$foreground" = 1 ]; then
+  ${EXEC} ${TRACEOPTIONS} ${SIMOPTS} ${PLATFORMTMP} ${APPLICATIONTMP}
+  status=$?
+else
+  ${EXEC} ${TRACEOPTIONS} ${SIMOPTS} ${PLATFORMTMP} ${APPLICATIONTMP} &
+  pid=$!
+  wait $pid
+  status=$?
+  pid=""
 fi
 
+smpirun_cleanup
+
 exit $status
index 5c2f3d2..b6e1d81 100644 (file)
@@ -4,11 +4,26 @@ if(HAVE_MC)
   set(EXECUTABLE_OUTPUT_PATH "${CMAKE_CURRENT_BINARY_DIR}")
 endif()
 
+add_executable(mutex_handling mutex_handling.c)
+target_link_libraries(mutex_handling simgrid)
+
+add_executable(no_mutex_handling mutex_handling.c)
+target_link_libraries(no_mutex_handling simgrid)
+set_target_properties(no_mutex_handling PROPERTIES COMPILE_FLAGS -DDISABLE_THE_MUTEX=1)
+
 set(tesh_files
   ${tesh_files}
+  ${CMAKE_CURRENT_SOURCE_DIR}/mutex_handling.tesh
+  ${CMAKE_CURRENT_SOURCE_DIR}/no_mutex_handling.tesh
   PARENT_SCOPE
   )
 set(testsuite_src
   ${testsuite_src}
+  ${CMAKE_CURRENT_SOURCE_DIR}/mutex_handling.c
+  PARENT_SCOPE
+  )
+set(xml_files
+  ${xml_files}
+  ${CMAKE_CURRENT_SOURCE_DIR}/mutex_handling.xml
   PARENT_SCOPE
   )
diff --git a/teshsuite/mc/mutex_handling.c b/teshsuite/mc/mutex_handling.c
new file mode 100644 (file)
index 0000000..4db2783
--- /dev/null
@@ -0,0 +1,93 @@
+/* Copyright (c) 2015. The SimGrid Team.
+ * All rights reserved.                                                     */
+
+/* This program is free software; you can redistribute it and/or modify it
+ * under the terms of the license (GNU LGPL) which comes with this package. */
+
+/* In this test, we have two senders sending one message to a common receiver.
+ * The receiver should be able to see any ordering between the two messages.
+ * If we model-check the application with assertions on a specific order of
+ * the messages (see the assertions in the receiver code), it should fail
+ * because both ordering are possible.
+ *
+ * If the senders sends the message directly, the current version of the MC
+ * finds that the ordering may differ and the MC find a counter-example.
+ *
+ * However, if the senders send the message in a mutex, the MC always let
+ * the first process take the mutex because it thinks that the effect of
+ * a mutex is purely local: the ordering of the messages is always the same
+ * and the MC does not find the counter-example.
+ */
+
+#include <stdio.h>
+#include "simgrid/msg.h"
+#include "xbt/log.h"
+#include "mc/mc.h"
+#include <xbt/synchro_core.h>
+
+XBT_LOG_NEW_DEFAULT_CATEGORY(msg_test,
+                             "Messages specific for this msg example");
+
+#define BOX_NAME "box"
+
+#ifndef DISABLE_THE_MUTEX
+static xbt_mutex_t mutex = NULL;
+#endif
+
+static int receiver(int argc, char *argv[])
+{
+  msg_task_t task = NULL;
+
+  MSG_task_receive(&task, BOX_NAME);
+  MC_assert(strcmp(MSG_task_get_name(task), "X") == 0);
+  MSG_task_destroy(task);
+
+  MSG_task_receive(&task, BOX_NAME);
+  MC_assert(strcmp(MSG_task_get_name(task), "Y") == 0);
+  MSG_task_destroy(task);
+
+  return 0;
+}
+
+static int sender(int argc, char *argv[])
+{
+  char* message_name = argv[1];
+#ifndef DISABLE_THE_MUTEX
+  xbt_mutex_acquire(mutex);
+#endif
+  MSG_task_send(MSG_task_create(message_name, 0.0, 0.0, NULL), BOX_NAME);
+#ifndef DISABLE_THE_MUTEX
+  xbt_mutex_release(mutex);
+#endif
+  return 0;
+}
+
+int main(int argc, char *argv[])
+{
+  MSG_init(&argc, argv);
+  if (argc != 3) {
+    printf("Usage: %s platform_file deployment_file\n", argv[0]);
+    printf("example: %s msg_platform.xml msg_deployment.xml\n", argv[0]);
+    exit(1);
+  }
+  const char *platform_file = argv[1];
+  const char *application_file = argv[2];
+  MSG_create_environment(platform_file);
+  MSG_function_register("receiver", receiver);
+  MSG_function_register("sender", sender);
+
+  MSG_launch_application(application_file);
+#ifndef DISABLE_THE_MUTEX
+  mutex = xbt_mutex_init();
+#endif
+  msg_error_t res = MSG_main();
+#ifndef DISABLE_THE_MUTEX
+  xbt_mutex_destroy(mutex); mutex = NULL;
+#endif
+  XBT_INFO("Simulation time %g", MSG_get_clock());
+
+  if (res == MSG_OK)
+    return 0;
+  else
+    return 1;
+}
\ No newline at end of file
diff --git a/teshsuite/mc/mutex_handling.tesh b/teshsuite/mc/mutex_handling.tesh
new file mode 100644 (file)
index 0000000..4839f3f
--- /dev/null
@@ -0,0 +1,4 @@
+#!/usr/bin/env tesh
+! expect return 1
+! output ignore
+$ ${bindir:=.}/../../bin/simgrid-mc ${bindir:=.}/mutex_handling --cfg=model-check:1 ../../examples/platforms/platform.xml mutex_handling.xml
diff --git a/teshsuite/mc/mutex_handling.xml b/teshsuite/mc/mutex_handling.xml
new file mode 100644 (file)
index 0000000..73e165a
--- /dev/null
@@ -0,0 +1,16 @@
+<?xml version='1.0'?>
+<!DOCTYPE platform SYSTEM "http://simgrid.gforge.inria.fr/simgrid.dtd">
+<platform version="3"><!-- For using with ping_pong, platform_sendrecv.xml -->
+
+  <!-- receiver my_name -->
+  <process host="Disney" function="receiver"></process>
+
+  <!-- sender my_name receiver_name other_sender_name -->
+  <process host="Jill" function="sender">
+    <argument value="X"/>
+  </process>
+  <process host="UNIX" function="sender">
+    <argument value="Y"/>
+  </process>
+
+</platform>
diff --git a/teshsuite/mc/no_mutex_handling.tesh b/teshsuite/mc/no_mutex_handling.tesh
new file mode 100644 (file)
index 0000000..58f31e8
--- /dev/null
@@ -0,0 +1,4 @@
+#!/usr/bin/env tesh
+! expect return 1
+! output ignore
+$ ${bindir:=.}/../../bin/simgrid-mc ${bindir:=.}/no_mutex_handling --cfg=model-check:1 ../../examples/platforms/platform.xml mutex_handling.xml
index e433f10..ebdd393 100644 (file)
@@ -101,6 +101,11 @@ IF(NOT enable_memcheck)
     ADD_TESH(tesh-mc-dwarf                       --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/mc/dwarf --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/mc/dwarf dwarf.tesh)
     ADD_TESH(tesh-mc-dwarf-expression            --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/mc/dwarf_expression --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/mc/dwarf_expression dwarf_expression.tesh)
 
+    ADD_TESH(tesh-mc-mutex-handling              --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/mc --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/mc mutex_handling.tesh --cfg=model-check/reduction:none)
+    ADD_TESH(tesh-mc-mutex-handling-dpor         --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/mc --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/mc mutex_handling.tesh --cfg=model-check/reduction:dpor)
+    ADD_TESH(tesh-mc-no-mutex-handling           --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/mc --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/mc no_mutex_handling.tesh --cfg=model-check/reduction:none)
+    ADD_TESH(tesh-mc-no-mutex-handling-dpor      --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/mc --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/mc no_mutex_handling.tesh --cfg=model-check/reduction:dpor)
+
     ADD_TESH(mc-record-random-bug                --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/mc/replay --setenv srcdir=${CMAKE_HOME_DIRECTORY}/teshsuite/mc/replay --cd ${CMAKE_HOME_DIRECTORY}/teshsuite/mc/replay random_bug.tesh)
 
     ADD_TESH_FACTORIES(mc-bugged1                "ucontext;raw" --setenv bindir=${CMAKE_BINARY_DIR}/examples/msg/mc --cd ${CMAKE_HOME_DIRECTORY}/examples/msg/mc bugged1.tesh)
index 5305d78..211a727 100755 (executable)
@@ -26,7 +26,7 @@ do
   GREP="/^$i /d; $GREP"
   GREP2="-e '^$i ' $GREP2"
 done
-GREP="/^%\ /d; /^%E/d; $GREP"
+GREP="/^%\ /d; /^%     /d; /^%E/d; $GREP"
 
 cat $TRACE | eval grep $GREP2 > types
 /bin/sed -e "$GREP" $TRACE > events
index 4bbeb20..2dd685b 100755 (executable)
@@ -280,6 +280,8 @@ sub exec_cmd {
     die "fork() failed: $!" unless defined $forked;
     if ( $forked == 0 ) { # child
       sleep $time_to_wait;
+      kill(SIGTERM, $cmd{'pid'});
+      sleep 1;
       kill(SIGKILL, $cmd{'pid'});
       exit $time_to_wait;
     }
@@ -363,7 +365,7 @@ sub parse_out {
 
   # Did we timeout ? If yes, handle it. If not, kill the forked process.
 
-  if($timeout==-1 and $gotret eq "got signal SIGKILL"){
+  if($timeout==-1 and ($gotret eq "got signal SIGTERM" or $gotret eq "got signal SIGKILL")){
     $gotret="return code 0";
     $timeout=1;
     $gotret= "timeout after $time_to_wait sec";