Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Fix a bug in task exchange which broke MSG_task_get_sender()
authormquinson <mquinson@48e7efb5-ca39-0410-a469-dd3cf9ba447f>
Mon, 11 May 2009 22:37:59 +0000 (22:37 +0000)
committermquinson <mquinson@48e7efb5-ca39-0410-a469-dd3cf9ba447f>
Mon, 11 May 2009 22:37:59 +0000 (22:37 +0000)
git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/simgrid/simgrid/trunk@6277 48e7efb5-ca39-0410-a469-dd3cf9ba447f

.gitignore
ChangeLog
src/msg/msg_mailbox.c
teshsuite/Makefile.am
teshsuite/msg/get_sender.c [new file with mode: 0644]
teshsuite/msg/get_sender.tesh [new file with mode: 0644]
teshsuite/msg/get_sender.xml [new file with mode: 0644]

index e7d7fe7..57bae96 100644 (file)
@@ -82,8 +82,6 @@ examples/simdag/metaxml/sd_meta
 examples/simdag/properties/sd_prop
 examples/simdag/sd_test
 examples/simdag/sd_test2
-teshsuite/simdag/platforms/basic_parsing_test
-teshsuite/simdag/platforms/flatifier
 simgrid-3.3-svn/*
 src/.classes/*
 src/context_sysv_config.h
@@ -101,6 +99,7 @@ teshsuite/gras/msg_handle/msg_handle_server
 teshsuite/gras/small_sleep/log.txt
 teshsuite/gras/small_sleep/small_sleep_function
 teshsuite/gras/modelcheck/modelcheck_checker
+teshsuite/msg/get_sender
 teshsuite/simdag/basic0
 teshsuite/simdag/basic1
 teshsuite/simdag/basic2
@@ -116,6 +115,8 @@ teshsuite/simdag/network/p2p/test_latency2
 teshsuite/simdag/network/p2p/test_latency3
 teshsuite/simdag/network/p2p/test_latency_bound
 teshsuite/simdag/network/test_reinit_costs
+teshsuite/simdag/platforms/basic_parsing_test
+teshsuite/simdag/platforms/flatifier
 teshsuite/simdag/partask/test_comp_only_par
 teshsuite/simdag/partask/test_comp_only_seq
 teshsuite/xbt/log_large_test
index 8acec8d..e0f9f26 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -15,7 +15,11 @@ SimGrid (3.4-svn) unstable; urgency=high
      triggers the corresponding actions within the system.
     For now, only a toy example is provided in examples/msg/actions
   * Add an exemple of process migration in examples/msg/migration
-    
+  * Fix a bug in task exchange which broke MSG_task_get_sender()
+    Add a teshsuite regression test for that.
+    [Bug: if MSG_task_get_sender() is called after sender exit,
+     bad things happen]
+     
  SIMIX:
   * Add SIMIX_process_set_name() to change the name of the current
     process in the log messages.
index a6a4444..1b75fd6 100644 (file)
@@ -259,7 +259,7 @@ MSG_mailbox_get_task_ext(msg_mailbox_t mailbox, m_task_t * task,
   /* This is a hack. We know that both the receiver and the sender will
      need to look at the content of t_simdata->comm. And it needs to be
      destroyed. However, we don't known whether the receiver or the sender
-     will get to it first. So by setting whit refcount to 2 we can enforce
+     will get to it first. So by setting with refcount to 2 we can enforce
      that things happen correctly. An alternative would be to only do ++ and
      -- on this refcount and to sprinkle them judiciously throughout the code,
      which appears perhaps worse? Or perhaps the refcount field of
@@ -285,13 +285,12 @@ MSG_mailbox_get_task_ext(msg_mailbox_t mailbox, m_task_t * task,
   SIMIX_unregister_action_to_condition(t_simdata->comm, t_simdata->cond);
   process->simdata->waiting_task = NULL;
 
-  /* the task has already finished and the pointer must be null */
-  if (t->simdata->sender) {
+  /* If sender still around (it didn't free the comm yet), note that it's not waiting anymore */
+  if (t_simdata->comm->refcount == 2) {
     t->simdata->sender->simdata->waiting_task = NULL;
   }
 
   /* for this process, don't need to change in get function */
-  t->simdata->receiver = NULL;
   SIMIX_mutex_unlock(t_simdata->mutex);
 
 
@@ -360,10 +359,9 @@ MSG_mailbox_put_with_timeout(msg_mailbox_t mailbox, m_task_t task,
     THROW1(not_found_error, 0, "Host %s not fount", hostname);
 
 
-  DEBUG4
-      ("Trying to send a task (%g kB) from %s to %s on the channel aliased by the alias %s",
-       t_simdata->message_size / 1000, local_host->name,
-       remote_host->name, MSG_mailbox_get_alias(mailbox));
+  DEBUG4("Trying to send a task (%g kB) from %s to %s on the channel %s",
+                 t_simdata->message_size / 1000, local_host->name,
+                 remote_host->name, MSG_mailbox_get_alias(mailbox));
 
   SIMIX_mutex_lock(remote_host->simdata->mutex);
 
@@ -410,12 +408,10 @@ MSG_mailbox_put_with_timeout(msg_mailbox_t mailbox, m_task_t task,
          /* remove the task from the mailbox */
          MSG_mailbox_remove(mailbox, task);
 
-         if (t_simdata->receiver) {
+         if (t_simdata->receiver && t_simdata->receiver->simdata) { /* receiver still around */
            t_simdata->receiver->simdata->waiting_task = NULL;
          }
 
-         t_simdata->sender = NULL;
-
          SIMIX_mutex_unlock(t_simdata->mutex);
          MSG_RETURN(MSG_TRANSFER_FAILURE);
        }
@@ -436,12 +432,10 @@ MSG_mailbox_put_with_timeout(msg_mailbox_t mailbox, m_task_t task,
   DEBUG1("Action terminated %s", task->name);
   process->simdata->waiting_task = NULL;
 
-  /* the task has already finished and the pointer must be null */
-  if (t_simdata->receiver) {
+  if (t_simdata->comm->refcount == 2) { //receiver didn't free it yet: he's still around
     t_simdata->receiver->simdata->waiting_task = NULL;
   }
 
-  t_simdata->sender = NULL;
   SIMIX_mutex_unlock(task->simdata->mutex);
 
   if (SIMIX_action_get_state(t_simdata->comm) == SURF_ACTION_DONE) {
index f5dd4bc..3f601ad 100644 (file)
@@ -151,6 +151,17 @@ else
   TESTS += gras/small_sleep/test_sg_64
 endif
 
+#############
+# MSG tests #
+#############
+
+noinst_PROGRAMS += msg/get_sender
+EXTRA_DIST      += msg/get_sender.xml \
+                  msg/get_sender.tesh
+msg_get_sender_SOURCES = msg/get_sender.c
+msg_get_sender_LDADD   = $(LDADD_SG)
+TESTS += msg/get_sender.tesh
+
 ###################################
 # network model test via SimDag API
 ###################################
diff --git a/teshsuite/msg/get_sender.c b/teshsuite/msg/get_sender.c
new file mode 100644 (file)
index 0000000..308d41d
--- /dev/null
@@ -0,0 +1,44 @@
+#include <stdio.h>
+#include "msg/msg.h"
+#include <float.h>
+
+XBT_LOG_NEW_DEFAULT_CATEGORY(test,"Messages specific to this example");
+
+
+static int send(int argc, char *argv[]){
+  INFO0("Sending");
+  MSG_task_put(MSG_task_create("Blah", 0.0, 0.0, NULL),
+              MSG_host_self(), 0);
+  MSG_process_sleep(1.); /* FIXME: if the sender exits before the receiver calls get_sender(), bad thing happens */
+  INFO0("Exiting");
+  return 0;
+}
+
+static int receive(int argc, char *argv[]) {
+  INFO0("Receiving");
+  m_task_t task = NULL;
+  MSG_task_get_with_timeout(&task, 0, DBL_MAX);
+  xbt_assert0(MSG_task_get_sender(task), "No sender received");
+  INFO1("Got a message sent by '%s'", MSG_process_get_name(MSG_task_get_sender(task)));
+  return 0;
+}
+
+/** Main function */
+int main(int argc, char *argv[]) {
+  MSG_error_t res = MSG_OK;
+
+  MSG_global_init(&argc,argv);
+  MSG_set_channel_number(100);
+
+  /*   Application deployment */
+  MSG_function_register("send", &send);
+  MSG_function_register("receive", &receive);
+
+  MSG_create_environment(argv[1]);
+  MSG_launch_application(argv[1]);
+  res = MSG_main();
+  MSG_clean();
+  if(res==MSG_OK) return 0;
+  else return 1;
+}
+
diff --git a/teshsuite/msg/get_sender.tesh b/teshsuite/msg/get_sender.tesh
new file mode 100644 (file)
index 0000000..eecff1c
--- /dev/null
@@ -0,0 +1,5 @@
+$ msg/get_sender msg/get_sender.xml
+> [toto:send:(1) 0.000000] [test/INFO] Sending
+> [toto:receive:(2) 0.000000] [test/INFO] Receiving
+> [toto:receive:(2) 0.000000] [test/INFO] Got a message sent by 'send'
+> [toto:send:(1) 1.000000] [test/INFO] Exiting
diff --git a/teshsuite/msg/get_sender.xml b/teshsuite/msg/get_sender.xml
new file mode 100644 (file)
index 0000000..86ea2a9
--- /dev/null
@@ -0,0 +1,13 @@
+<?xml version='1.0'?>
+<!DOCTYPE platform SYSTEM "simgrid.dtd">
+<platform version="2">
+  <!-- Platform -->
+  
+  <host id="toto" power="1000000"/>
+  <link id="lb" bandwidth="100000000" latency="0.000000"/>
+  <route src="toto" dst="toto"><link:ctn id="lb"/></route>
+  
+  <!-- Deployment -->
+  <process host="toto" function="send"/>
+  <process host="toto" function="receive"/>
+</platform>