From: mquinson Date: Mon, 11 May 2009 22:37:59 +0000 (+0000) Subject: Fix a bug in task exchange which broke MSG_task_get_sender() X-Git-Tag: SVN~1358 X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/commitdiff_plain/bc6d7585229ccb9b293a722a3fa9691f3ce16360 Fix a bug in task exchange which broke MSG_task_get_sender() git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/simgrid/simgrid/trunk@6277 48e7efb5-ca39-0410-a469-dd3cf9ba447f --- diff --git a/.gitignore b/.gitignore index e7d7fe793c..57bae96dd4 100644 --- a/.gitignore +++ b/.gitignore @@ -82,8 +82,6 @@ examples/simdag/metaxml/sd_meta examples/simdag/properties/sd_prop examples/simdag/sd_test examples/simdag/sd_test2 -teshsuite/simdag/platforms/basic_parsing_test -teshsuite/simdag/platforms/flatifier simgrid-3.3-svn/* src/.classes/* src/context_sysv_config.h @@ -101,6 +99,7 @@ teshsuite/gras/msg_handle/msg_handle_server teshsuite/gras/small_sleep/log.txt teshsuite/gras/small_sleep/small_sleep_function teshsuite/gras/modelcheck/modelcheck_checker +teshsuite/msg/get_sender teshsuite/simdag/basic0 teshsuite/simdag/basic1 teshsuite/simdag/basic2 @@ -116,6 +115,8 @@ teshsuite/simdag/network/p2p/test_latency2 teshsuite/simdag/network/p2p/test_latency3 teshsuite/simdag/network/p2p/test_latency_bound teshsuite/simdag/network/test_reinit_costs +teshsuite/simdag/platforms/basic_parsing_test +teshsuite/simdag/platforms/flatifier teshsuite/simdag/partask/test_comp_only_par teshsuite/simdag/partask/test_comp_only_seq teshsuite/xbt/log_large_test diff --git a/ChangeLog b/ChangeLog index 8acec8db26..e0f9f266dd 100644 --- a/ChangeLog +++ b/ChangeLog @@ -15,7 +15,11 @@ SimGrid (3.4-svn) unstable; urgency=high triggers the corresponding actions within the system. For now, only a toy example is provided in examples/msg/actions * Add an exemple of process migration in examples/msg/migration - + * Fix a bug in task exchange which broke MSG_task_get_sender() + Add a teshsuite regression test for that. + [Bug: if MSG_task_get_sender() is called after sender exit, + bad things happen] + SIMIX: * Add SIMIX_process_set_name() to change the name of the current process in the log messages. diff --git a/src/msg/msg_mailbox.c b/src/msg/msg_mailbox.c index a6a4444fdf..1b75fd6ac4 100644 --- a/src/msg/msg_mailbox.c +++ b/src/msg/msg_mailbox.c @@ -259,7 +259,7 @@ MSG_mailbox_get_task_ext(msg_mailbox_t mailbox, m_task_t * task, /* This is a hack. We know that both the receiver and the sender will need to look at the content of t_simdata->comm. And it needs to be destroyed. However, we don't known whether the receiver or the sender - will get to it first. So by setting whit refcount to 2 we can enforce + will get to it first. So by setting with refcount to 2 we can enforce that things happen correctly. An alternative would be to only do ++ and -- on this refcount and to sprinkle them judiciously throughout the code, which appears perhaps worse? Or perhaps the refcount field of @@ -285,13 +285,12 @@ MSG_mailbox_get_task_ext(msg_mailbox_t mailbox, m_task_t * task, SIMIX_unregister_action_to_condition(t_simdata->comm, t_simdata->cond); process->simdata->waiting_task = NULL; - /* the task has already finished and the pointer must be null */ - if (t->simdata->sender) { + /* If sender still around (it didn't free the comm yet), note that it's not waiting anymore */ + if (t_simdata->comm->refcount == 2) { t->simdata->sender->simdata->waiting_task = NULL; } /* for this process, don't need to change in get function */ - t->simdata->receiver = NULL; SIMIX_mutex_unlock(t_simdata->mutex); @@ -360,10 +359,9 @@ MSG_mailbox_put_with_timeout(msg_mailbox_t mailbox, m_task_t task, THROW1(not_found_error, 0, "Host %s not fount", hostname); - DEBUG4 - ("Trying to send a task (%g kB) from %s to %s on the channel aliased by the alias %s", - t_simdata->message_size / 1000, local_host->name, - remote_host->name, MSG_mailbox_get_alias(mailbox)); + DEBUG4("Trying to send a task (%g kB) from %s to %s on the channel %s", + t_simdata->message_size / 1000, local_host->name, + remote_host->name, MSG_mailbox_get_alias(mailbox)); SIMIX_mutex_lock(remote_host->simdata->mutex); @@ -410,12 +408,10 @@ MSG_mailbox_put_with_timeout(msg_mailbox_t mailbox, m_task_t task, /* remove the task from the mailbox */ MSG_mailbox_remove(mailbox, task); - if (t_simdata->receiver) { + if (t_simdata->receiver && t_simdata->receiver->simdata) { /* receiver still around */ t_simdata->receiver->simdata->waiting_task = NULL; } - t_simdata->sender = NULL; - SIMIX_mutex_unlock(t_simdata->mutex); MSG_RETURN(MSG_TRANSFER_FAILURE); } @@ -436,12 +432,10 @@ MSG_mailbox_put_with_timeout(msg_mailbox_t mailbox, m_task_t task, DEBUG1("Action terminated %s", task->name); process->simdata->waiting_task = NULL; - /* the task has already finished and the pointer must be null */ - if (t_simdata->receiver) { + if (t_simdata->comm->refcount == 2) { //receiver didn't free it yet: he's still around t_simdata->receiver->simdata->waiting_task = NULL; } - t_simdata->sender = NULL; SIMIX_mutex_unlock(task->simdata->mutex); if (SIMIX_action_get_state(t_simdata->comm) == SURF_ACTION_DONE) { diff --git a/teshsuite/Makefile.am b/teshsuite/Makefile.am index f5dd4bc68c..3f601ad745 100644 --- a/teshsuite/Makefile.am +++ b/teshsuite/Makefile.am @@ -151,6 +151,17 @@ else TESTS += gras/small_sleep/test_sg_64 endif +############# +# MSG tests # +############# + +noinst_PROGRAMS += msg/get_sender +EXTRA_DIST += msg/get_sender.xml \ + msg/get_sender.tesh +msg_get_sender_SOURCES = msg/get_sender.c +msg_get_sender_LDADD = $(LDADD_SG) +TESTS += msg/get_sender.tesh + ################################### # network model test via SimDag API ################################### diff --git a/teshsuite/msg/get_sender.c b/teshsuite/msg/get_sender.c new file mode 100644 index 0000000000..308d41dcbe --- /dev/null +++ b/teshsuite/msg/get_sender.c @@ -0,0 +1,44 @@ +#include +#include "msg/msg.h" +#include + +XBT_LOG_NEW_DEFAULT_CATEGORY(test,"Messages specific to this example"); + + +static int send(int argc, char *argv[]){ + INFO0("Sending"); + MSG_task_put(MSG_task_create("Blah", 0.0, 0.0, NULL), + MSG_host_self(), 0); + MSG_process_sleep(1.); /* FIXME: if the sender exits before the receiver calls get_sender(), bad thing happens */ + INFO0("Exiting"); + return 0; +} + +static int receive(int argc, char *argv[]) { + INFO0("Receiving"); + m_task_t task = NULL; + MSG_task_get_with_timeout(&task, 0, DBL_MAX); + xbt_assert0(MSG_task_get_sender(task), "No sender received"); + INFO1("Got a message sent by '%s'", MSG_process_get_name(MSG_task_get_sender(task))); + return 0; +} + +/** Main function */ +int main(int argc, char *argv[]) { + MSG_error_t res = MSG_OK; + + MSG_global_init(&argc,argv); + MSG_set_channel_number(100); + + /* Application deployment */ + MSG_function_register("send", &send); + MSG_function_register("receive", &receive); + + MSG_create_environment(argv[1]); + MSG_launch_application(argv[1]); + res = MSG_main(); + MSG_clean(); + if(res==MSG_OK) return 0; + else return 1; +} + diff --git a/teshsuite/msg/get_sender.tesh b/teshsuite/msg/get_sender.tesh new file mode 100644 index 0000000000..eecff1c1f5 --- /dev/null +++ b/teshsuite/msg/get_sender.tesh @@ -0,0 +1,5 @@ +$ msg/get_sender msg/get_sender.xml +> [toto:send:(1) 0.000000] [test/INFO] Sending +> [toto:receive:(2) 0.000000] [test/INFO] Receiving +> [toto:receive:(2) 0.000000] [test/INFO] Got a message sent by 'send' +> [toto:send:(1) 1.000000] [test/INFO] Exiting diff --git a/teshsuite/msg/get_sender.xml b/teshsuite/msg/get_sender.xml new file mode 100644 index 0000000000..86ea2a93c3 --- /dev/null +++ b/teshsuite/msg/get_sender.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + +