From 93ff1e377b7880b5410f82d1c0fbaad0afb68f84 Mon Sep 17 00:00:00 2001 From: alegrand Date: Tue, 14 Aug 2007 07:52:22 +0000 Subject: [PATCH] Adding a masterslave example with failures. git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/simgrid/simgrid/trunk@4082 48e7efb5-ca39-0410-a469-dd3cf9ba447f --- examples/msg/Makefile.am | 11 +- examples/msg/jupiter_state.trace | 3 + examples/msg/link5_state.trace | 3 + .../masterslave/deployment_masterslave.xml | 4 +- .../msg/masterslave/masterslave_failure.c | 202 ++++++++++++++++++ examples/msg/small_platform_with_failures.xml | 90 ++++++++ 6 files changed, 308 insertions(+), 5 deletions(-) create mode 100644 examples/msg/jupiter_state.trace create mode 100644 examples/msg/link5_state.trace create mode 100644 examples/msg/masterslave/masterslave_failure.c create mode 100644 examples/msg/small_platform_with_failures.xml diff --git a/examples/msg/Makefile.am b/examples/msg/Makefile.am index a90bc2fa6e..ad5163ad05 100644 --- a/examples/msg/Makefile.am +++ b/examples/msg/Makefile.am @@ -52,7 +52,8 @@ endif bin_PROGRAMS = sendrecv/sendrecv \ suspend/suspend \ masterslave/masterslave \ - masterslave/masterslave_forwarder + masterslave/masterslave_forwarder \ + masterslave/masterslave_failure #masterslave/masterslave_bypass if HAVE_GTNETS @@ -69,14 +70,18 @@ sendrecv_sendrecv_LDADD = $(top_builddir)/src/libsimgrid.la suspend_suspend_SOURCES = suspend/suspend.c suspend_suspend_LDADD = $(top_builddir)/src/libsimgrid.la -# master/slave application example using a forwarder module +# master/slave application example masterslave_masterslave_SOURCES = masterslave/masterslave.c masterslave_masterslave_LDADD = $(top_builddir)/src/libsimgrid.la -# verify the communication time of a simple simulation +# master/slave application example using a forwarder module masterslave_masterslave_forwarder_SOURCES = masterslave/masterslave_forwarder.c masterslave_masterslave_forwarder_LDADD = $(top_builddir)/src/libsimgrid.la +# master/slave application example with failures +masterslave_masterslave_failure_SOURCES = masterslave/masterslave_failure.c +masterslave_masterslave_failure_LDADD = $(top_builddir)/src/libsimgrid.la + # bypass the surf xml parser #masterslave_masterslave_bypass_SOURCES = masterslave/masterslave_bypass.c #masterslave_masterslave_bypass_LDADD = $(top_builddir)/src/libsimgrid.la diff --git a/examples/msg/jupiter_state.trace b/examples/msg/jupiter_state.trace new file mode 100644 index 0000000000..bd24c43d8d --- /dev/null +++ b/examples/msg/jupiter_state.trace @@ -0,0 +1,3 @@ +0 1 +1.1 0 +2 1 diff --git a/examples/msg/link5_state.trace b/examples/msg/link5_state.trace new file mode 100644 index 0000000000..bc5ff5c43c --- /dev/null +++ b/examples/msg/link5_state.trace @@ -0,0 +1,3 @@ +0 1 +1.0 0 +10 1 diff --git a/examples/msg/masterslave/deployment_masterslave.xml b/examples/msg/masterslave/deployment_masterslave.xml index 102c3bb13d..c00e29dfc7 100644 --- a/examples/msg/masterslave/deployment_masterslave.xml +++ b/examples/msg/masterslave/deployment_masterslave.xml @@ -4,8 +4,8 @@ - - + + diff --git a/examples/msg/masterslave/masterslave_failure.c b/examples/msg/masterslave/masterslave_failure.c new file mode 100644 index 0000000000..ee1a615f80 --- /dev/null +++ b/examples/msg/masterslave/masterslave_failure.c @@ -0,0 +1,202 @@ +/* $Id$ */ + +/* Copyright (c) 2002,2003,2004 Arnaud Legrand. All rights reserved. */ + +/* This program is free software; you can redistribute it and/or modify it + * under the terms of the license (GNU LGPL) which comes with this package. */ + +#include +#include "msg/msg.h" /* Yeah! If you want to use msg, you need to include msg/msg.h */ +#include "xbt/sysdep.h" /* calloc, printf */ + +/* Create a log channel to have nice outputs. */ +#include "xbt/log.h" +#include "xbt/asserts.h" +XBT_LOG_NEW_DEFAULT_CATEGORY(msg_test,"Messages specific for this msg example"); + +int master(int argc, char *argv[]); +int slave(int argc, char *argv[]); +int forwarder(int argc, char *argv[]); +MSG_error_t test_all(const char *platform_file, const char *application_file); + +typedef enum { + PORT_22 = 0, + MAX_CHANNEL +} channel_t; + +#define FINALIZE ((void*)221297) /* a magic number to tell people to stop working */ + +/** Emitter function */ +int master(int argc, char *argv[]) +{ + int slaves_count = 0; + m_host_t *slaves = NULL; + int number_of_tasks = 0; + double task_comp_size = 0; + double task_comm_size = 0; + + + int i; + + xbt_assert1(sscanf(argv[1],"%d", &number_of_tasks), + "Invalid argument %s\n",argv[1]); + xbt_assert1(sscanf(argv[2],"%lg", &task_comp_size), + "Invalid argument %s\n",argv[2]); + xbt_assert1(sscanf(argv[3],"%lg", &task_comm_size), + "Invalid argument %s\n",argv[3]); + + { /* Process organisation */ + slaves_count = argc - 4; + slaves = calloc(slaves_count, sizeof(m_host_t)); + + for (i = 4; i < argc; i++) { + slaves[i-4] = MSG_get_host_by_name(argv[i]); + if(slaves[i-4]==NULL) { + INFO1("Unknown host %s. Stopping Now! ", argv[i]); + abort(); + } + } + } + + INFO1("Got %d slave(s) :", slaves_count); + for (i = 0; i < slaves_count; i++) + INFO1("%s", slaves[i]->name); + + INFO1("Got %d task to process :", number_of_tasks); + + for (i = 0; i < number_of_tasks; i++) { + m_task_t task = MSG_task_create("Task", task_comp_size, task_comm_size, + calloc(1,sizeof(double))); + int a; + *((double*) task->data) = MSG_get_clock(); + + a = MSG_task_put_with_timeout(task, slaves[i % slaves_count], PORT_22,10.0); + if (a == MSG_OK) { + INFO0("Send completed"); + } else if (a == MSG_HOST_FAILURE) { + INFO0("Gloups. The cpu on which I'm running just turned off!. See you!"); + free(slaves); + return 0; + } else if (a == MSG_TRANSFER_FAILURE) { + INFO1("Mmh. Something went wrong with '%s'. Nevermind. Let's keep going!", + slaves[i % slaves_count]->name); + MSG_task_destroy(task); + } else { + INFO0("Hey ?! What's up ? "); + xbt_assert0(0,"Unexpected behavior"); + } + } + + INFO0("All tasks have been dispatched. Let's tell everybody the computation is over."); + for (i = 0; i < slaves_count; i++) { + m_task_t task = MSG_task_create("finalize", 0, 0, FINALIZE); + int a = MSG_task_put_with_timeout(task, slaves[i], PORT_22,1.0); + if (a == MSG_OK) continue; + if (a == MSG_HOST_FAILURE) { + INFO0("Gloups. The cpu on which I'm running just turned off!. See you!"); + return 0; + } else if (a == MSG_TRANSFER_FAILURE) { + INFO1("Mmh. Can't reach '%s'! Nevermind. Let's keep going!", + slaves[i]->name); + MSG_task_destroy(task); + } else { + INFO0("Hey ?! What's up ? "); + xbt_assert2(0,"Unexpected behavior with '%s': %d",slaves[i]->name,a); + } + } + + INFO0("Goodbye now!"); + free(slaves); + return 0; +} /* end_of_master */ + +/** Receiver function */ +int slave(int argc, char *argv[]) +{ + while(1) { + m_task_t task = NULL; + int a; + double time1,time2; + + time1 = MSG_get_clock(); + a = MSG_task_get(&(task), PORT_22); + time2 = MSG_get_clock(); + if (a == MSG_OK) { + INFO1("Received \"%s\"", MSG_task_get_name(task)); + if(MSG_task_get_data(task)==FINALIZE) { + MSG_task_destroy(task); + break; + } + if(time1<*((double *)task->data)) + time1 = *((double *) task->data); + INFO1("Communication time : \"%f\"", time2-time1); + INFO1("Processing \"%s\"", MSG_task_get_name(task)); + a = MSG_task_execute(task); + if (a == MSG_OK) { + INFO1("\"%s\" done", MSG_task_get_name(task)); + free(task->data); + MSG_task_destroy(task); + } else if (a == MSG_HOST_FAILURE) { + INFO0("Gloups. The cpu on which I'm running just turned off!. See you!"); + return 0; + } else { + INFO0("Hey ?! What's up ? "); + xbt_assert0(0,"Unexpected behavior"); + } + } else if (a == MSG_HOST_FAILURE) { + INFO0("Gloups. The cpu on which I'm running just turned off!. See you!"); + return 0; + } else if (a == MSG_TRANSFER_FAILURE) { + INFO0("Mmh. Something went wrong. Nevermind. Let's keep going!"); + } else { + INFO0("Hey ?! What's up ? "); + xbt_assert0(0,"Unexpected behavior"); + } + } + INFO0("I'm done. See you!"); + return 0; +} /* end_of_slave */ + +/** Test function */ +MSG_error_t test_all(const char *platform_file, + const char *application_file) +{ + MSG_error_t res = MSG_OK; + + /* MSG_config("workstation_model","KCCFLN05"); */ + { /* Simulation setting */ + MSG_set_channel_number(MAX_CHANNEL); + MSG_paje_output("msg_test.trace"); + MSG_create_environment(platform_file); + } + { /* Application deployment */ + MSG_function_register("master", master); + MSG_function_register("slave", slave); + MSG_launch_application(application_file); + } + res = MSG_main(); + + INFO1("Simulation time %g",MSG_get_clock()); + return res; +} /* end_of_test_all */ + + +/** Main function */ +int main(int argc, char *argv[]) +{ + MSG_error_t res = MSG_OK; + + MSG_global_init(&argc,argv); + if (argc < 3) { + printf ("Usage: %s platform_file deployment_file\n",argv[0]); + printf ("example: %s msg_platform.xml msg_deployment.xml\n",argv[0]); + exit(1); + } + res = test_all(argv[1],argv[2]); + MSG_clean(); + + if(res==MSG_OK) + return 0; + else + return 1; +} /* end_of_main */ diff --git a/examples/msg/small_platform_with_failures.xml b/examples/msg/small_platform_with_failures.xml new file mode 100644 index 0000000000..a5ed1c81fc --- /dev/null +++ b/examples/msg/small_platform_with_failures.xml @@ -0,0 +1,90 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + -- 2.20.1