Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Fix a bug in task exchange which broke MSG_task_get_sender()
authormquinson <mquinson@48e7efb5-ca39-0410-a469-dd3cf9ba447f>
Mon, 11 May 2009 22:37:59 +0000 (22:37 +0000)
committermquinson <mquinson@48e7efb5-ca39-0410-a469-dd3cf9ba447f>
Mon, 11 May 2009 22:37:59 +0000 (22:37 +0000)
git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/simgrid/simgrid/trunk@6277 48e7efb5-ca39-0410-a469-dd3cf9ba447f

.gitignore
ChangeLog
src/msg/msg_mailbox.c
teshsuite/Makefile.am
teshsuite/msg/get_sender.c [new file with mode: 0644]
teshsuite/msg/get_sender.tesh [new file with mode: 0644]
teshsuite/msg/get_sender.xml [new file with mode: 0644]

index e7d7fe7..57bae96 100644 (file)
@@ -82,8 +82,6 @@ examples/simdag/metaxml/sd_meta
 examples/simdag/properties/sd_prop
 examples/simdag/sd_test
 examples/simdag/sd_test2
 examples/simdag/properties/sd_prop
 examples/simdag/sd_test
 examples/simdag/sd_test2
-teshsuite/simdag/platforms/basic_parsing_test
-teshsuite/simdag/platforms/flatifier
 simgrid-3.3-svn/*
 src/.classes/*
 src/context_sysv_config.h
 simgrid-3.3-svn/*
 src/.classes/*
 src/context_sysv_config.h
@@ -101,6 +99,7 @@ teshsuite/gras/msg_handle/msg_handle_server
 teshsuite/gras/small_sleep/log.txt
 teshsuite/gras/small_sleep/small_sleep_function
 teshsuite/gras/modelcheck/modelcheck_checker
 teshsuite/gras/small_sleep/log.txt
 teshsuite/gras/small_sleep/small_sleep_function
 teshsuite/gras/modelcheck/modelcheck_checker
+teshsuite/msg/get_sender
 teshsuite/simdag/basic0
 teshsuite/simdag/basic1
 teshsuite/simdag/basic2
 teshsuite/simdag/basic0
 teshsuite/simdag/basic1
 teshsuite/simdag/basic2
@@ -116,6 +115,8 @@ teshsuite/simdag/network/p2p/test_latency2
 teshsuite/simdag/network/p2p/test_latency3
 teshsuite/simdag/network/p2p/test_latency_bound
 teshsuite/simdag/network/test_reinit_costs
 teshsuite/simdag/network/p2p/test_latency3
 teshsuite/simdag/network/p2p/test_latency_bound
 teshsuite/simdag/network/test_reinit_costs
+teshsuite/simdag/platforms/basic_parsing_test
+teshsuite/simdag/platforms/flatifier
 teshsuite/simdag/partask/test_comp_only_par
 teshsuite/simdag/partask/test_comp_only_seq
 teshsuite/xbt/log_large_test
 teshsuite/simdag/partask/test_comp_only_par
 teshsuite/simdag/partask/test_comp_only_seq
 teshsuite/xbt/log_large_test
index 8acec8d..e0f9f26 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -15,7 +15,11 @@ SimGrid (3.4-svn) unstable; urgency=high
      triggers the corresponding actions within the system.
     For now, only a toy example is provided in examples/msg/actions
   * Add an exemple of process migration in examples/msg/migration
      triggers the corresponding actions within the system.
     For now, only a toy example is provided in examples/msg/actions
   * Add an exemple of process migration in examples/msg/migration
-    
+  * Fix a bug in task exchange which broke MSG_task_get_sender()
+    Add a teshsuite regression test for that.
+    [Bug: if MSG_task_get_sender() is called after sender exit,
+     bad things happen]
+     
  SIMIX:
   * Add SIMIX_process_set_name() to change the name of the current
     process in the log messages.
  SIMIX:
   * Add SIMIX_process_set_name() to change the name of the current
     process in the log messages.
index a6a4444..1b75fd6 100644 (file)
@@ -259,7 +259,7 @@ MSG_mailbox_get_task_ext(msg_mailbox_t mailbox, m_task_t * task,
   /* This is a hack. We know that both the receiver and the sender will
      need to look at the content of t_simdata->comm. And it needs to be
      destroyed. However, we don't known whether the receiver or the sender
   /* This is a hack. We know that both the receiver and the sender will
      need to look at the content of t_simdata->comm. And it needs to be
      destroyed. However, we don't known whether the receiver or the sender
-     will get to it first. So by setting whit refcount to 2 we can enforce
+     will get to it first. So by setting with refcount to 2 we can enforce
      that things happen correctly. An alternative would be to only do ++ and
      -- on this refcount and to sprinkle them judiciously throughout the code,
      which appears perhaps worse? Or perhaps the refcount field of
      that things happen correctly. An alternative would be to only do ++ and
      -- on this refcount and to sprinkle them judiciously throughout the code,
      which appears perhaps worse? Or perhaps the refcount field of
@@ -285,13 +285,12 @@ MSG_mailbox_get_task_ext(msg_mailbox_t mailbox, m_task_t * task,
   SIMIX_unregister_action_to_condition(t_simdata->comm, t_simdata->cond);
   process->simdata->waiting_task = NULL;
 
   SIMIX_unregister_action_to_condition(t_simdata->comm, t_simdata->cond);
   process->simdata->waiting_task = NULL;
 
-  /* the task has already finished and the pointer must be null */
-  if (t->simdata->sender) {
+  /* If sender still around (it didn't free the comm yet), note that it's not waiting anymore */
+  if (t_simdata->comm->refcount == 2) {
     t->simdata->sender->simdata->waiting_task = NULL;
   }
 
   /* for this process, don't need to change in get function */
     t->simdata->sender->simdata->waiting_task = NULL;
   }
 
   /* for this process, don't need to change in get function */
-  t->simdata->receiver = NULL;
   SIMIX_mutex_unlock(t_simdata->mutex);
 
 
   SIMIX_mutex_unlock(t_simdata->mutex);
 
 
@@ -360,10 +359,9 @@ MSG_mailbox_put_with_timeout(msg_mailbox_t mailbox, m_task_t task,
     THROW1(not_found_error, 0, "Host %s not fount", hostname);
 
 
     THROW1(not_found_error, 0, "Host %s not fount", hostname);
 
 
-  DEBUG4
-      ("Trying to send a task (%g kB) from %s to %s on the channel aliased by the alias %s",
-       t_simdata->message_size / 1000, local_host->name,
-       remote_host->name, MSG_mailbox_get_alias(mailbox));
+  DEBUG4("Trying to send a task (%g kB) from %s to %s on the channel %s",
+                 t_simdata->message_size / 1000, local_host->name,
+                 remote_host->name, MSG_mailbox_get_alias(mailbox));
 
   SIMIX_mutex_lock(remote_host->simdata->mutex);
 
 
   SIMIX_mutex_lock(remote_host->simdata->mutex);
 
@@ -410,12 +408,10 @@ MSG_mailbox_put_with_timeout(msg_mailbox_t mailbox, m_task_t task,
          /* remove the task from the mailbox */
          MSG_mailbox_remove(mailbox, task);
 
          /* remove the task from the mailbox */
          MSG_mailbox_remove(mailbox, task);
 
-         if (t_simdata->receiver) {
+         if (t_simdata->receiver && t_simdata->receiver->simdata) { /* receiver still around */
            t_simdata->receiver->simdata->waiting_task = NULL;
          }
 
            t_simdata->receiver->simdata->waiting_task = NULL;
          }
 
-         t_simdata->sender = NULL;
-
          SIMIX_mutex_unlock(t_simdata->mutex);
          MSG_RETURN(MSG_TRANSFER_FAILURE);
        }
          SIMIX_mutex_unlock(t_simdata->mutex);
          MSG_RETURN(MSG_TRANSFER_FAILURE);
        }
@@ -436,12 +432,10 @@ MSG_mailbox_put_with_timeout(msg_mailbox_t mailbox, m_task_t task,
   DEBUG1("Action terminated %s", task->name);
   process->simdata->waiting_task = NULL;
 
   DEBUG1("Action terminated %s", task->name);
   process->simdata->waiting_task = NULL;
 
-  /* the task has already finished and the pointer must be null */
-  if (t_simdata->receiver) {
+  if (t_simdata->comm->refcount == 2) { //receiver didn't free it yet: he's still around
     t_simdata->receiver->simdata->waiting_task = NULL;
   }
 
     t_simdata->receiver->simdata->waiting_task = NULL;
   }
 
-  t_simdata->sender = NULL;
   SIMIX_mutex_unlock(task->simdata->mutex);
 
   if (SIMIX_action_get_state(t_simdata->comm) == SURF_ACTION_DONE) {
   SIMIX_mutex_unlock(task->simdata->mutex);
 
   if (SIMIX_action_get_state(t_simdata->comm) == SURF_ACTION_DONE) {
index f5dd4bc..3f601ad 100644 (file)
@@ -151,6 +151,17 @@ else
   TESTS += gras/small_sleep/test_sg_64
 endif
 
   TESTS += gras/small_sleep/test_sg_64
 endif
 
+#############
+# MSG tests #
+#############
+
+noinst_PROGRAMS += msg/get_sender
+EXTRA_DIST      += msg/get_sender.xml \
+                  msg/get_sender.tesh
+msg_get_sender_SOURCES = msg/get_sender.c
+msg_get_sender_LDADD   = $(LDADD_SG)
+TESTS += msg/get_sender.tesh
+
 ###################################
 # network model test via SimDag API
 ###################################
 ###################################
 # network model test via SimDag API
 ###################################
diff --git a/teshsuite/msg/get_sender.c b/teshsuite/msg/get_sender.c
new file mode 100644 (file)
index 0000000..308d41d
--- /dev/null
@@ -0,0 +1,44 @@
+#include <stdio.h>
+#include "msg/msg.h"
+#include <float.h>
+
+XBT_LOG_NEW_DEFAULT_CATEGORY(test,"Messages specific to this example");
+
+
+static int send(int argc, char *argv[]){
+  INFO0("Sending");
+  MSG_task_put(MSG_task_create("Blah", 0.0, 0.0, NULL),
+              MSG_host_self(), 0);
+  MSG_process_sleep(1.); /* FIXME: if the sender exits before the receiver calls get_sender(), bad thing happens */
+  INFO0("Exiting");
+  return 0;
+}
+
+static int receive(int argc, char *argv[]) {
+  INFO0("Receiving");
+  m_task_t task = NULL;
+  MSG_task_get_with_timeout(&task, 0, DBL_MAX);
+  xbt_assert0(MSG_task_get_sender(task), "No sender received");
+  INFO1("Got a message sent by '%s'", MSG_process_get_name(MSG_task_get_sender(task)));
+  return 0;
+}
+
+/** Main function */
+int main(int argc, char *argv[]) {
+  MSG_error_t res = MSG_OK;
+
+  MSG_global_init(&argc,argv);
+  MSG_set_channel_number(100);
+
+  /*   Application deployment */
+  MSG_function_register("send", &send);
+  MSG_function_register("receive", &receive);
+
+  MSG_create_environment(argv[1]);
+  MSG_launch_application(argv[1]);
+  res = MSG_main();
+  MSG_clean();
+  if(res==MSG_OK) return 0;
+  else return 1;
+}
+
diff --git a/teshsuite/msg/get_sender.tesh b/teshsuite/msg/get_sender.tesh
new file mode 100644 (file)
index 0000000..eecff1c
--- /dev/null
@@ -0,0 +1,5 @@
+$ msg/get_sender msg/get_sender.xml
+> [toto:send:(1) 0.000000] [test/INFO] Sending
+> [toto:receive:(2) 0.000000] [test/INFO] Receiving
+> [toto:receive:(2) 0.000000] [test/INFO] Got a message sent by 'send'
+> [toto:send:(1) 1.000000] [test/INFO] Exiting
diff --git a/teshsuite/msg/get_sender.xml b/teshsuite/msg/get_sender.xml
new file mode 100644 (file)
index 0000000..86ea2a9
--- /dev/null
@@ -0,0 +1,13 @@
+<?xml version='1.0'?>
+<!DOCTYPE platform SYSTEM "simgrid.dtd">
+<platform version="2">
+  <!-- Platform -->
+  
+  <host id="toto" power="1000000"/>
+  <link id="lb" bandwidth="100000000" latency="0.000000"/>
+  <route src="toto" dst="toto"><link:ctn id="lb"/></route>
+  
+  <!-- Deployment -->
+  <process host="toto" function="send"/>
+  <process host="toto" function="receive"/>
+</platform>