Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Adding a masterslave example with failures.
authoralegrand <alegrand@48e7efb5-ca39-0410-a469-dd3cf9ba447f>
Tue, 14 Aug 2007 07:52:22 +0000 (07:52 +0000)
committeralegrand <alegrand@48e7efb5-ca39-0410-a469-dd3cf9ba447f>
Tue, 14 Aug 2007 07:52:22 +0000 (07:52 +0000)
git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/simgrid/simgrid/trunk@4082 48e7efb5-ca39-0410-a469-dd3cf9ba447f

examples/msg/Makefile.am
examples/msg/jupiter_state.trace [new file with mode: 0644]
examples/msg/link5_state.trace [new file with mode: 0644]
examples/msg/masterslave/deployment_masterslave.xml
examples/msg/masterslave/masterslave_failure.c [new file with mode: 0644]
examples/msg/small_platform_with_failures.xml [new file with mode: 0644]

index a90bc2f..ad5163a 100644 (file)
@@ -52,7 +52,8 @@ endif
 bin_PROGRAMS = sendrecv/sendrecv \
                suspend/suspend \
                masterslave/masterslave \
-               masterslave/masterslave_forwarder
+               masterslave/masterslave_forwarder \
+               masterslave/masterslave_failure
                #masterslave/masterslave_bypass
 
 if HAVE_GTNETS
@@ -69,14 +70,18 @@ sendrecv_sendrecv_LDADD   = $(top_builddir)/src/libsimgrid.la
 suspend_suspend_SOURCES = suspend/suspend.c
 suspend_suspend_LDADD   = $(top_builddir)/src/libsimgrid.la
 
-# master/slave application example using a forwarder module
+# master/slave application example
 masterslave_masterslave_SOURCES = masterslave/masterslave.c
 masterslave_masterslave_LDADD   = $(top_builddir)/src/libsimgrid.la
 
-# verify the communication time of a simple simulation
+# master/slave application example using a forwarder module
 masterslave_masterslave_forwarder_SOURCES = masterslave/masterslave_forwarder.c
 masterslave_masterslave_forwarder_LDADD   = $(top_builddir)/src/libsimgrid.la
 
+# master/slave application example with failures
+masterslave_masterslave_failure_SOURCES = masterslave/masterslave_failure.c
+masterslave_masterslave_failure_LDADD   = $(top_builddir)/src/libsimgrid.la
+
 # bypass the surf xml parser
 #masterslave_masterslave_bypass_SOURCES = masterslave/masterslave_bypass.c
 #masterslave_masterslave_bypass_LDADD   = $(top_builddir)/src/libsimgrid.la
diff --git a/examples/msg/jupiter_state.trace b/examples/msg/jupiter_state.trace
new file mode 100644 (file)
index 0000000..bd24c43
--- /dev/null
@@ -0,0 +1,3 @@
+0 1
+1.1 0
+2 1
diff --git a/examples/msg/link5_state.trace b/examples/msg/link5_state.trace
new file mode 100644 (file)
index 0000000..bc5ff5c
--- /dev/null
@@ -0,0 +1,3 @@
+0 1
+1.0 0
+10 1
index 102c3bb..c00e29d 100644 (file)
@@ -4,8 +4,8 @@
   <!-- The master process (with some arguments) -->
   <process host="Tremblay" function="master">
      <argument value="20"/>       <!-- Number of tasks -->
-     <argument value="5000000"/>  <!-- Computation size of tasks -->
-     <argument value="100000"/>   <!-- Communication size of tasks -->
+     <argument value="50000000"/>  <!-- Computation size of tasks -->
+     <argument value="1000000"/>   <!-- Communication size of tasks -->
      <argument value="Jupiter"/>  <!-- First slave -->
      <argument value="Fafard"/>   <!-- Second slave -->
      <argument value="Ginette"/>  <!-- Third slave -->
diff --git a/examples/msg/masterslave/masterslave_failure.c b/examples/msg/masterslave/masterslave_failure.c
new file mode 100644 (file)
index 0000000..ee1a615
--- /dev/null
@@ -0,0 +1,202 @@
+/*     $Id$     */
+
+/* Copyright (c) 2002,2003,2004 Arnaud Legrand. All rights reserved.        */
+
+/* This program is free software; you can redistribute it and/or modify it
+ * under the terms of the license (GNU LGPL) which comes with this package. */
+
+#include <stdio.h>
+#include "msg/msg.h" /* Yeah! If you want to use msg, you need to include msg/msg.h */
+#include "xbt/sysdep.h" /* calloc, printf */
+
+/* Create a log channel to have nice outputs. */
+#include "xbt/log.h"
+#include "xbt/asserts.h"
+XBT_LOG_NEW_DEFAULT_CATEGORY(msg_test,"Messages specific for this msg example");
+
+int master(int argc, char *argv[]);
+int slave(int argc, char *argv[]);
+int forwarder(int argc, char *argv[]);
+MSG_error_t test_all(const char *platform_file, const char *application_file);
+
+typedef enum {
+  PORT_22 = 0,
+  MAX_CHANNEL
+} channel_t;
+
+#define FINALIZE ((void*)221297) /* a magic number to tell people to stop working */
+
+/** Emitter function  */
+int master(int argc, char *argv[])
+{
+  int slaves_count = 0;
+  m_host_t *slaves = NULL;
+  int number_of_tasks = 0;
+  double task_comp_size = 0;
+  double task_comm_size = 0;
+
+
+  int i;
+
+  xbt_assert1(sscanf(argv[1],"%d", &number_of_tasks),
+        "Invalid argument %s\n",argv[1]);
+  xbt_assert1(sscanf(argv[2],"%lg", &task_comp_size),
+        "Invalid argument %s\n",argv[2]);
+  xbt_assert1(sscanf(argv[3],"%lg", &task_comm_size),
+        "Invalid argument %s\n",argv[3]);
+
+  {                  /* Process organisation */
+    slaves_count = argc - 4;
+    slaves = calloc(slaves_count, sizeof(m_host_t));
+    
+    for (i = 4; i < argc; i++) {
+      slaves[i-4] = MSG_get_host_by_name(argv[i]);
+      if(slaves[i-4]==NULL) {
+       INFO1("Unknown host %s. Stopping Now! ", argv[i]);
+       abort();
+      }
+    }
+  }
+
+  INFO1("Got %d slave(s) :", slaves_count);
+  for (i = 0; i < slaves_count; i++)
+    INFO1("%s", slaves[i]->name);
+
+  INFO1("Got %d task to process :", number_of_tasks);
+
+  for (i = 0; i < number_of_tasks; i++) {
+    m_task_t task = MSG_task_create("Task", task_comp_size, task_comm_size, 
+                                   calloc(1,sizeof(double)));
+    int a;
+    *((double*) task->data) = MSG_get_clock();
+    
+    a = MSG_task_put_with_timeout(task, slaves[i % slaves_count], PORT_22,10.0);
+    if (a == MSG_OK) {
+      INFO0("Send completed");
+    } else if (a == MSG_HOST_FAILURE) {
+      INFO0("Gloups. The cpu on which I'm running just turned off!. See you!");
+      free(slaves);
+      return 0;
+    } else if (a == MSG_TRANSFER_FAILURE) {
+      INFO1("Mmh. Something went wrong with '%s'. Nevermind. Let's keep going!",
+           slaves[i % slaves_count]->name);
+      MSG_task_destroy(task);
+    } else {
+      INFO0("Hey ?! What's up ? ");
+      xbt_assert0(0,"Unexpected behavior");
+    }
+  }
+  
+  INFO0("All tasks have been dispatched. Let's tell everybody the computation is over.");
+  for (i = 0; i < slaves_count; i++) {
+    m_task_t task = MSG_task_create("finalize", 0, 0, FINALIZE);
+    int a = MSG_task_put_with_timeout(task, slaves[i], PORT_22,1.0); 
+    if (a == MSG_OK) continue;
+    if (a == MSG_HOST_FAILURE) {
+      INFO0("Gloups. The cpu on which I'm running just turned off!. See you!");
+      return 0;
+    } else if (a == MSG_TRANSFER_FAILURE) {
+      INFO1("Mmh. Can't reach '%s'! Nevermind. Let's keep going!",
+           slaves[i]->name);
+      MSG_task_destroy(task);
+    } else {
+      INFO0("Hey ?! What's up ? ");
+      xbt_assert2(0,"Unexpected behavior with '%s': %d",slaves[i]->name,a);
+    }    
+  }
+  
+  INFO0("Goodbye now!");
+  free(slaves);
+  return 0;
+} /* end_of_master */
+
+/** Receiver function  */
+int slave(int argc, char *argv[])
+{
+  while(1) {
+    m_task_t task = NULL;
+    int a;
+    double time1,time2;
+
+    time1 = MSG_get_clock();
+    a = MSG_task_get(&(task), PORT_22);
+    time2 = MSG_get_clock();
+    if (a == MSG_OK) {
+      INFO1("Received \"%s\"", MSG_task_get_name(task));
+      if(MSG_task_get_data(task)==FINALIZE) {
+       MSG_task_destroy(task);
+       break;
+      }
+      if(time1<*((double *)task->data))
+       time1 = *((double *) task->data);
+      INFO1("Communication time : \"%f\"", time2-time1);
+      INFO1("Processing \"%s\"", MSG_task_get_name(task));
+      a = MSG_task_execute(task);
+      if (a == MSG_OK) {
+      INFO1("\"%s\" done", MSG_task_get_name(task));
+      free(task->data);
+      MSG_task_destroy(task);
+      } else if (a == MSG_HOST_FAILURE) {
+       INFO0("Gloups. The cpu on which I'm running just turned off!. See you!");
+       return 0;
+      } else {
+       INFO0("Hey ?! What's up ? ");
+       xbt_assert0(0,"Unexpected behavior");
+      }
+    } else if (a == MSG_HOST_FAILURE) {
+      INFO0("Gloups. The cpu on which I'm running just turned off!. See you!");
+      return 0;
+    } else if (a == MSG_TRANSFER_FAILURE) {
+      INFO0("Mmh. Something went wrong. Nevermind. Let's keep going!");      
+    } else {
+      INFO0("Hey ?! What's up ? ");
+      xbt_assert0(0,"Unexpected behavior");
+    }
+  }
+  INFO0("I'm done. See you!");
+  return 0;
+} /* end_of_slave */
+
+/** Test function */
+MSG_error_t test_all(const char *platform_file,
+                           const char *application_file)
+{
+  MSG_error_t res = MSG_OK;
+
+  /* MSG_config("workstation_model","KCCFLN05"); */
+  {                            /*  Simulation setting */
+    MSG_set_channel_number(MAX_CHANNEL);
+    MSG_paje_output("msg_test.trace");
+    MSG_create_environment(platform_file);
+  }
+  {                            /*   Application deployment */
+    MSG_function_register("master", master);
+    MSG_function_register("slave", slave);
+    MSG_launch_application(application_file);
+  }
+  res = MSG_main();
+  
+  INFO1("Simulation time %g",MSG_get_clock());
+  return res;
+} /* end_of_test_all */
+
+
+/** Main function */
+int main(int argc, char *argv[])
+{
+  MSG_error_t res = MSG_OK;
+
+  MSG_global_init(&argc,argv);
+  if (argc < 3) {
+     printf ("Usage: %s platform_file deployment_file\n",argv[0]);
+     printf ("example: %s msg_platform.xml msg_deployment.xml\n",argv[0]);
+     exit(1);
+  }
+  res = test_all(argv[1],argv[2]);
+  MSG_clean();
+
+  if(res==MSG_OK)
+    return 0;
+  else
+    return 1;
+} /* end_of_main */
diff --git a/examples/msg/small_platform_with_failures.xml b/examples/msg/small_platform_with_failures.xml
new file mode 100644 (file)
index 0000000..a5ed1c8
--- /dev/null
@@ -0,0 +1,90 @@
+<?xml version='1.0'?>
+<!DOCTYPE platform_description SYSTEM "surfxml.dtd">
+<platform_description version="1">
+  <!-- ljlkj -->
+  <cpu name="Tremblay" power="98095000"/>
+  <cpu name="Jupiter" power="76296000" state_file="jupiter_state.trace"/>
+  <cpu name="Fafard" power="76296000"/>
+  <cpu name="Ginette" power="48492000"/>
+  <cpu name="Bourassa" power="48492000"/>
+  <network_link name="6" bandwidth="41279125" latency="5.9904e-05"/>
+  <network_link name="11" bandwidth="252750" latency="0.00570455"/>
+  <network_link name="3" bandwidth="34285625" latency="0.000514433"/>
+  <network_link name="7" bandwidth="11618875" latency="0.00018998"/>
+  <network_link name="9" bandwidth="7209750" latency="0.001461517"/>
+  <network_link name="12" bandwidth="1792625" latency="0.007877863"/>
+  <network_link name="2" bandwidth="118682500" latency="0.000136931"/>
+  <network_link name="8" bandwidth="8158000" latency="0.000270544"/>
+  <network_link name="1" bandwidth="34285625" latency="0.000514433"/>
+  <network_link name="4" bandwidth="10099625" latency="0.00047978"/>
+  <network_link name="0" bandwidth="41279125" latency="5.9904e-05"/>
+  <network_link name="10" bandwidth="4679750" latency="0.000848712"/>
+  <network_link name="5" bandwidth="27946250" latency="0.000278066" state_file="link5_state.trace"/>
+  <network_link name="loopback_FATPIPE" bandwidth="10000000" latency="0.1" sharing_policy="FATPIPE"/>
+  <network_link name="loopback" bandwidth="498000000" latency="0.000015"/>
+  <route src="Tremblay" dst="Tremblay"><route_element name="loopback"/></route>
+  <route src="Jupiter" dst="Jupiter"><route_element name="loopback"/></route>
+  <route src="Fafard" dst="Fafard"><route_element name="loopback"/></route>
+  <route src="Ginette" dst="Ginette"><route_element name="loopback"/></route>
+  <route src="Bourassa" dst="Bourassa"><route_element name="loopback"/></route>
+  <route src="Tremblay" dst="Jupiter">
+    <route_element name="9"/>
+  </route>
+  <route src="Tremblay" dst="Fafard">
+    <route_element name="4"/><route_element name="3"/><route_element name="2"/><route_element name="0"/><route_element name="1"/><route_element name="8"/>
+  </route>
+  <route src="Tremblay" dst="Ginette">
+    <route_element name="4"/><route_element name="3"/><route_element name="5"/>
+  </route>
+  <route src="Tremblay" dst="Bourassa">
+    <route_element name="4"/><route_element name="3"/><route_element name="2"/><route_element name="0"/><route_element name="1"/><route_element name="6"/><route_element name="7"/>
+  </route>
+  <route src="Jupiter" dst="Tremblay">
+    <route_element name="9"/>
+  </route>
+  <route src="Jupiter" dst="Fafard">
+    <route_element name="9"/><route_element name="4"/><route_element name="3"/><route_element name="2"/><route_element name="0"/><route_element name="1"/><route_element name="8"/>
+  </route>
+  <route src="Jupiter" dst="Ginette">
+    <route_element name="9"/><route_element name="4"/><route_element name="3"/><route_element name="5"/>
+  </route>
+  <route src="Jupiter" dst="Bourassa">
+    <route_element name="9"/><route_element name="4"/><route_element name="3"/><route_element name="2"/><route_element name="0"/><route_element name="1"/><route_element name="6"/><route_element name="7"/>
+  </route>
+  <route src="Fafard" dst="Tremblay">
+    <route_element name="8"/><route_element name="1"/><route_element name="0"/><route_element name="2"/><route_element name="3"/><route_element name="4"/>
+  </route>
+  <route src="Fafard" dst="Jupiter">
+    <route_element name="8"/><route_element name="1"/><route_element name="0"/><route_element name="2"/><route_element name="3"/><route_element name="4"/><route_element name="9"/>
+  </route>
+  <route src="Fafard" dst="Ginette">
+    <route_element name="8"/><route_element name="1"/><route_element name="0"/><route_element name="2"/><route_element name="5"/>
+  </route>
+  <route src="Fafard" dst="Bourassa">
+    <route_element name="8"/><route_element name="6"/><route_element name="7"/>
+  </route>
+  <route src="Ginette" dst="Tremblay">
+    <route_element name="5"/><route_element name="3"/><route_element name="4"/>
+  </route>
+  <route src="Ginette" dst="Jupiter">
+    <route_element name="5"/><route_element name="3"/><route_element name="4"/><route_element name="9"/>
+  </route>
+  <route src="Ginette" dst="Fafard">
+    <route_element name="5"/><route_element name="2"/><route_element name="0"/><route_element name="1"/><route_element name="8"/>
+  </route>
+  <route src="Ginette" dst="Bourassa">
+    <route_element name="5"/><route_element name="2"/><route_element name="0"/><route_element name="1"/><route_element name="6"/><route_element name="7"/>
+  </route>
+  <route src="Bourassa" dst="Tremblay">
+    <route_element name="7"/><route_element name="6"/><route_element name="1"/><route_element name="0"/><route_element name="2"/><route_element name="3"/><route_element name="4"/>
+  </route>
+  <route src="Bourassa" dst="Jupiter">
+    <route_element name="7"/><route_element name="6"/><route_element name="1"/><route_element name="0"/><route_element name="2"/><route_element name="3"/><route_element name="4"/><route_element name="9"/>
+  </route>
+  <route src="Bourassa" dst="Fafard">
+    <route_element name="7"/><route_element name="6"/><route_element name="8"/>
+  </route>
+  <route src="Bourassa" dst="Ginette">
+    <route_element name="7"/><route_element name="6"/><route_element name="1"/><route_element name="0"/><route_element name="2"/><route_element name="5"/>
+  </route>
+</platform_description>