Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
replace old simdag-fail example by new s4u-dag-failure one
authorSUTER Frederic <frederic.suter@cc.in2p3.fr>
Mon, 20 Dec 2021 20:13:28 +0000 (21:13 +0100)
committerSUTER Frederic <frederic.suter@cc.in2p3.fr>
Mon, 20 Dec 2021 20:40:19 +0000 (21:40 +0100)
MANIFEST.in
examples/cpp/CMakeLists.txt
examples/cpp/dag-failure/s4u-dag-failure.cpp [new file with mode: 0644]
examples/cpp/dag-failure/s4u-dag-failure.tesh [new file with mode: 0644]
examples/deprecated/simdag/CMakeLists.txt
examples/deprecated/simdag/fail/sd_fail.c [deleted file]
examples/deprecated/simdag/fail/sd_fail.tesh [deleted file]
examples/platforms/profiles/faulty_host.profile

index 7171bbc..76442aa 100644 (file)
@@ -200,6 +200,8 @@ include examples/cpp/comm-waitany/s4u-comm-waitany_d.xml
 include examples/cpp/comm-waituntil/s4u-comm-waituntil.cpp
 include examples/cpp/comm-waituntil/s4u-comm-waituntil.tesh
 include examples/cpp/comm-waituntil/s4u-comm-waituntil_d.xml
+include examples/cpp/dag-failure/s4u-dag-failure.cpp
+include examples/cpp/dag-failure/s4u-dag-failure.tesh
 include examples/cpp/dag-simple/s4u-dag-simple.cpp
 include examples/cpp/dag-simple/s4u-dag-simple.tesh
 include examples/cpp/dht-chord/s4u-dht-chord-node.cpp
@@ -488,8 +490,6 @@ include examples/deprecated/simdag/daxload/sd_daxload.c
 include examples/deprecated/simdag/daxload/sd_daxload.tesh
 include examples/deprecated/simdag/daxload/simple_dax_with_cycle.xml
 include examples/deprecated/simdag/daxload/smalldax.xml
-include examples/deprecated/simdag/fail/sd_fail.c
-include examples/deprecated/simdag/fail/sd_fail.tesh
 include examples/deprecated/simdag/ptg-dotload/ptg.dot
 include examples/deprecated/simdag/ptg-dotload/sd_ptg-dotload.c
 include examples/deprecated/simdag/ptg-dotload/sd_ptg-dotload.tesh
index 3253bf4..af65846 100644 (file)
@@ -66,7 +66,7 @@ foreach (example actor-create actor-daemon actor-exiting actor-join actor-kill
                  comm-pingpong comm-ready comm-serialize comm-suspend comm-wait comm-waitany comm-waitall comm-waituntil
                  comm-dependent comm-host2host comm-failure
                  cloud-capping cloud-migration cloud-simple
-                 dag-simple
+                 dag-failure dag-simple
                  dht-chord dht-kademlia
                  energy-exec energy-boot energy-link energy-vm energy-exec-ptask energy-wifi
                  engine-filtering engine-run-partial
diff --git a/examples/cpp/dag-failure/s4u-dag-failure.cpp b/examples/cpp/dag-failure/s4u-dag-failure.cpp
new file mode 100644 (file)
index 0000000..9d0b77a
--- /dev/null
@@ -0,0 +1,76 @@
+/* Copyright (c) 2006-2021. The SimGrid Team.
+ * All rights reserved.                                                     */
+
+/* This program is free software; you can redistribute it and/or modify it
+ * under the terms of the license (GNU LGPL) which comes with this package. */
+
+#include "simgrid/s4u.hpp"
+
+XBT_LOG_NEW_DEFAULT_CATEGORY(dag_failure, "Logging specific to this example");
+
+int main(int argc, char** argv)
+{
+  simgrid::s4u::Engine e(&argc, argv);
+  simgrid::s4u::Engine::set_config("host/model:ptask_L07");
+  e.load_platform(argv[1]);
+
+  auto* faulty = e.host_by_name("Faulty Host");
+  auto* safe   = e.host_by_name("Safe Host");
+  simgrid::s4u::Exec::on_completion.connect([](simgrid::s4u::Exec const& exec) {
+    if (exec.get_state() == simgrid::s4u::Activity::State::FINISHED)
+      XBT_INFO("Activity '%s' is complete (start time: %f, finish time: %f)", exec.get_cname(), exec.get_start_time(),
+               exec.get_finish_time());
+    if (exec.get_state() == simgrid::s4u::Activity::State::FAILED)
+      if (exec.is_parallel())
+        XBT_INFO("Activity '%s' has failed. %.f %% remain to be done", exec.get_cname(),
+                 100 * exec.get_remaining_ratio());
+      else
+        XBT_INFO("Activity '%s' has failed. %.f flops remain to be done", exec.get_cname(), exec.get_remaining());
+  });
+
+  /* creation of a single Exec that will poorly fail when the workstation will stop */
+  XBT_INFO("First test: sequential Exec activity");
+  simgrid::s4u::ExecPtr exec =
+      simgrid::s4u::Exec::init()->set_name("Poor task")->set_flops_amount(2e10)->vetoable_start();
+
+  XBT_INFO("Schedule Activity '%s' on 'Faulty Host'", exec->get_cname());
+  exec->set_host(faulty);
+
+  /* Add a child Exec that depends on the Poor task' */
+  simgrid::s4u::ExecPtr child = simgrid::s4u::Exec::init()->set_name("Child")->set_flops_amount(2e10)->set_host(safe);
+  exec->add_successor(child);
+  child->vetoable_start();
+
+  XBT_INFO("Run the simulation");
+  e.run();
+
+  XBT_INFO("let's unschedule Activity '%s' and reschedule it on the 'Safe Host'", exec->get_cname());
+  exec->unset_host();
+  exec->set_host(safe);
+
+  XBT_INFO("Run the simulation again");
+  e.run();
+
+  XBT_INFO("Second test: parallel Exec activity");
+  exec = simgrid::s4u::Exec::init()->set_name("Poor parallel task")->set_flops_amounts({2e10, 2e10})->vetoable_start();
+
+  XBT_INFO("Schedule Activity '%s' on 'Safe Host' and 'Faulty Host'", exec->get_cname());
+  exec->set_hosts({safe, faulty});
+
+  /* Add a child Exec that depends on the Poor task' */
+  child = simgrid::s4u::Exec::init()->set_name("Child")->set_flops_amount(2e10)->set_host(safe);
+  exec->add_successor(child);
+  child->vetoable_start();
+
+  XBT_INFO("Run the simulation");
+  e.run();
+
+  XBT_INFO("let's unschedule Activity '%s' and reschedule it only on the 'Safe Host'", exec->get_cname());
+  exec->unset_host();
+  exec->set_flops_amount(4e10)->set_host(safe);
+
+  XBT_INFO("Run the simulation again");
+  e.run();
+
+  return 0;
+}
diff --git a/examples/cpp/dag-failure/s4u-dag-failure.tesh b/examples/cpp/dag-failure/s4u-dag-failure.tesh
new file mode 100644 (file)
index 0000000..b47537d
--- /dev/null
@@ -0,0 +1,32 @@
+#!/usr/bin/env tesh
+
+p Test of the management of failed tasks simdag
+
+$ ${bindir:=.}/s4u-dag-failure ${platfdir}/faulty_host.xml --log=s4u_activity.t:verbose "--log=root.fmt:[%10.6r]%e(%i:%a@%h)%e%m%n"
+> [  0.000000] (0:maestro@) Configuration change: Set 'host/model' to 'ptask_L07'
+> [  0.000000] (0:maestro@) Switching to the L07 model to handle parallel tasks.
+> [  0.000000] (0:maestro@) First test: sequential Exec activity
+> [  0.000000] (0:maestro@) Schedule Activity 'Poor task' on 'Faulty Host'
+> [  0.000000] (0:maestro@) 'Poor task' is assigned to a resource and all dependencies are solved. Let's start
+> [  0.000000] (0:maestro@) Run the simulation
+> [ 10.000000] (0:maestro@) Activity 'Poor task' has failed. 20000000000 flops remain to be done
+> [ 10.000000] (0:maestro@) let's unschedule Activity 'Poor task' and reschedule it on the 'Safe Host'
+> [ 10.000000] (0:maestro@) 'Poor task' is assigned to a resource and all dependencies are solved. Let's start
+> [ 10.000000] (0:maestro@) Run the simulation again
+> [ 50.000000] (0:maestro@) Remove a dependency from 'Poor task' on 'Child'
+> [ 50.000000] (0:maestro@) 'Child' is assigned to a resource and all dependencies are solved. Let's start
+> [ 50.000000] (0:maestro@) Activity 'Poor task' is complete (start time: 10.000000, finish time: 50.000000)
+> [ 90.000000] (0:maestro@) Activity 'Child' is complete (start time: 50.000000, finish time: 90.000000)
+> [ 90.000000] (0:maestro@) Second test: parallel Exec activity
+> [ 90.000000] (0:maestro@) Schedule Activity 'Poor parallel task' on 'Safe Host' and 'Faulty Host'
+> [ 90.000000] (0:maestro@) 'Poor parallel task' is assigned to a resource and all dependencies are solved. Let's start
+> [ 90.000000] (0:maestro@) Run the simulation
+> [100.000000] (0:maestro@) Activity 'Poor parallel task' has failed. 100 % remain to be done
+> [100.000000] (0:maestro@) let's unschedule Activity 'Poor parallel task' and reschedule it only on the 'Safe Host'
+> [100.000000] (0:maestro@) 'Poor parallel task' is assigned to a resource and all dependencies are solved. Let's start
+> [100.000000] (0:maestro@) Run the simulation again
+> [180.000000] (0:maestro@) Remove a dependency from 'Poor parallel task' on 'Child'
+> [180.000000] (0:maestro@) 'Child' is assigned to a resource and all dependencies are solved. Let's start
+> [180.000000] (0:maestro@) Activity 'Poor parallel task' is complete (start time: 100.000000, finish time: 180.000000)
+> [220.000000] (0:maestro@) Activity 'Child' is complete (start time: 180.000000, finish time: 220.000000)
+
index 7c47790..59c59a6 100644 (file)
@@ -1,4 +1,4 @@
-foreach(x daxload fail typed_tasks throttling scheduling)
+foreach(x daxload typed_tasks throttling scheduling)
   add_executable       (sd_${x}  EXCLUDE_FROM_ALL  ${x}/sd_${x}.c)
   target_link_libraries(sd_${x}     simgrid)
   set_target_properties(sd_${x}  PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/${x})
@@ -39,7 +39,7 @@ set(txt_files    ${txt_files}     ${CMAKE_CURRENT_SOURCE_DIR}/dag-dotload/dag_wi
                                   ${CMAKE_CURRENT_SOURCE_DIR}/schedule-dotload/dag_with_good_schedule.dot
                                   ${CMAKE_CURRENT_SOURCE_DIR}/scheduling/expected_output.jed               PARENT_SCOPE)
 
-foreach(x daxload fail typed_tasks throttling scheduling test)
+foreach(x daxload typed_tasks throttling scheduling test)
   ADD_TESH(simdag-${x} --setenv srcdir=${CMAKE_HOME_DIRECTORY}/examples/deprecated/simdag --cd ${CMAKE_BINARY_DIR}/examples/deprecated/simdag ${CMAKE_HOME_DIRECTORY}/examples/deprecated/simdag/${x}/sd_${x}.tesh)
 endforeach()
 
diff --git a/examples/deprecated/simdag/fail/sd_fail.c b/examples/deprecated/simdag/fail/sd_fail.c
deleted file mode 100644 (file)
index 9146805..0000000
+++ /dev/null
@@ -1,89 +0,0 @@
-/* Copyright (c) 2006-2021. The SimGrid Team.
- * All rights reserved.                                                     */
-
-/* This program is free software; you can redistribute it and/or modify it
- * under the terms of the license (GNU LGPL) which comes with this package. */
-
-#include "simgrid/simdag.h"
-
-XBT_LOG_NEW_DEFAULT_CATEGORY(sd_fail, "Logging specific to this SimDag example");
-
-int main(int argc, char **argv)
-{
-  double computation_amount[1];
-  double communication_amount[2] = { 0 };
-  sg_host_t hosts[1];
-
-  /* initialization of SD */
-  SD_init(&argc, argv);
-
-  /* creation of the environment */
-  SD_create_environment(argv[1]);
-
-  /* creation of a single task that will poorly fail when the workstation will stop */
-  XBT_INFO("First test: COMP_SEQ task");
-  SD_task_t task = SD_task_create_comp_seq("Poor task", NULL, 2e10);
-  SD_task_watch(task, SD_FAILED);
-  SD_task_watch(task, SD_DONE);
-
-  XBT_INFO("Schedule task '%s' on 'Faulty Host'", SD_task_get_name(task));
-
-  SD_task_schedulel(task, 1, sg_host_by_name("Faulty Host"));
-
-  SD_simulate(-1.0);
-
-  SD_task_dump(task);
-
-  XBT_INFO("Task '%s' has failed. %.f flops remain to be done", SD_task_get_name(task),
-           SD_task_get_remaining_amount(task));
-
-  XBT_INFO("let's unschedule task '%s' and reschedule it on the 'Safe Host'", SD_task_get_name(task));
-  SD_task_unschedule(task);
-  SD_task_schedulel(task, 1, sg_host_by_name("Safe Host"));
-
-  XBT_INFO("Run the simulation again");
-  SD_simulate(-1.0);
-
-  SD_task_dump(task);
-  XBT_INFO("Task '%s' start time: %f, finish time: %f", SD_task_get_name(task), SD_task_get_start_time(task),
-           SD_task_get_finish_time(task));
-
-  SD_task_destroy(task);
-
-  XBT_INFO("Second test: NON TYPED task");
-
-  task = SD_task_create("Poor parallel task", NULL, 2e10);
-  SD_task_watch(task, SD_FAILED);
-  SD_task_watch(task, SD_DONE);
-
-  computation_amount[0] = 2e10;
-
-  XBT_INFO("Schedule task '%s' on 'Faulty Host'", SD_task_get_name(task));
-
-  hosts[0] = sg_host_by_name("Faulty Host");
-  SD_task_schedule(task, 1, hosts, computation_amount, communication_amount,-1);
-
-  SD_simulate(-1.0);
-
-  SD_task_dump(task);
-
-  XBT_INFO("Task '%s' has failed. %.f flops remain to be done", SD_task_get_name(task),
-            SD_task_get_remaining_amount(task));
-
-  XBT_INFO("let's unschedule task '%s' and reschedule it on the 'Safe Host'", SD_task_get_name(task));
-  SD_task_unschedule(task);
-
-  hosts[0] = sg_host_by_name("Safe Host");
-
-  SD_task_schedule(task, 1, hosts, computation_amount, communication_amount,-1);
-
-  XBT_INFO("Run the simulation again");
-  SD_simulate(-1.0);
-
-  SD_task_dump(task);
-  XBT_INFO("Task '%s' start time: %f, finish time: %f", SD_task_get_name(task), SD_task_get_start_time(task),
-           SD_task_get_finish_time(task));
-
-  SD_task_destroy(task);
-  return 0;
-}
diff --git a/examples/deprecated/simdag/fail/sd_fail.tesh b/examples/deprecated/simdag/fail/sd_fail.tesh
deleted file mode 100644 (file)
index 965cace..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/usr/bin/env tesh
-
-p Test of the management of failed tasks simdag
-
-$ ${bindir:=.}/fail/sd_fail ${srcdir:=.}/../../platforms/faulty_host.xml
-> [0.000000] [xbt_cfg/INFO] Switching to the L07 model to handle parallel tasks.
-> [0.000000] [sd_fail/INFO] First test: COMP_SEQ task
-> [0.000000] [sd_fail/INFO] Schedule task 'Poor task' on 'Faulty Host'
-> [10.000000] [sd_task/INFO] Displaying task Poor task
-> [10.000000] [sd_task/INFO]   - state: not runnable failed
-> [10.000000] [sd_task/INFO]   - kind: sequential computation
-> [10.000000] [sd_task/INFO]   - amount: 20000000000
-> [10.000000] [sd_task/INFO]   - Dependencies to satisfy: 0
-> [10.000000] [sd_fail/INFO] Task 'Poor task' has failed. 20000000000 flops remain to be done
-> [10.000000] [sd_fail/INFO] let's unschedule task 'Poor task' and reschedule it on the 'Safe Host'
-> [10.000000] [sd_fail/INFO] Run the simulation again
-> [50.000000] [sd_task/INFO] Displaying task Poor task
-> [50.000000] [sd_task/INFO]   - state: not runnable done
-> [50.000000] [sd_task/INFO]   - kind: sequential computation
-> [50.000000] [sd_task/INFO]   - amount: 20000000000
-> [50.000000] [sd_task/INFO]   - Dependencies to satisfy: 0
-> [50.000000] [sd_fail/INFO] Task 'Poor task' start time: 10.000000, finish time: 50.000000
-> [50.000000] [sd_fail/INFO] Second test: NON TYPED task
-> [50.000000] [sd_fail/INFO] Schedule task 'Poor parallel task' on 'Faulty Host'
-> [60.000000] [sd_task/INFO] Displaying task Poor parallel task
-> [60.000000] [sd_task/INFO]   - state: not runnable failed
-> [60.000000] [sd_task/INFO]   - amount: 20000000000
-> [60.000000] [sd_task/INFO]   - Dependencies to satisfy: 0
-> [60.000000] [sd_fail/INFO] Task 'Poor parallel task' has failed. 20000000000 flops remain to be done
-> [60.000000] [sd_fail/INFO] let's unschedule task 'Poor parallel task' and reschedule it on the 'Safe Host'
-> [60.000000] [sd_fail/INFO] Run the simulation again
-> [100.000000] [sd_task/INFO] Displaying task Poor parallel task
-> [100.000000] [sd_task/INFO]   - state: not runnable done
-> [100.000000] [sd_task/INFO]   - amount: 20000000000
-> [100.000000] [sd_task/INFO]   - Dependencies to satisfy: 0
-> [100.000000] [sd_fail/INFO] Task 'Poor parallel task' start time: 60.000000, finish time: 100.000000
index 18ffe77..f301177 100644 (file)
@@ -3,4 +3,5 @@
 11 1
 60 0
 61 1
-111 0
\ No newline at end of file
+100 0
+101 1
\ No newline at end of file