include examples/cpp/comm-waituntil/s4u-comm-waituntil.cpp
include examples/cpp/comm-waituntil/s4u-comm-waituntil.tesh
include examples/cpp/comm-waituntil/s4u-comm-waituntil_d.xml
+include examples/cpp/dag-failure/s4u-dag-failure.cpp
+include examples/cpp/dag-failure/s4u-dag-failure.tesh
include examples/cpp/dag-simple/s4u-dag-simple.cpp
include examples/cpp/dag-simple/s4u-dag-simple.tesh
include examples/cpp/dht-chord/s4u-dht-chord-node.cpp
include examples/deprecated/simdag/daxload/sd_daxload.tesh
include examples/deprecated/simdag/daxload/simple_dax_with_cycle.xml
include examples/deprecated/simdag/daxload/smalldax.xml
-include examples/deprecated/simdag/fail/sd_fail.c
-include examples/deprecated/simdag/fail/sd_fail.tesh
include examples/deprecated/simdag/ptg-dotload/ptg.dot
include examples/deprecated/simdag/ptg-dotload/sd_ptg-dotload.c
include examples/deprecated/simdag/ptg-dotload/sd_ptg-dotload.tesh
comm-pingpong comm-ready comm-serialize comm-suspend comm-wait comm-waitany comm-waitall comm-waituntil
comm-dependent comm-host2host comm-failure
cloud-capping cloud-migration cloud-simple
- dag-simple
+ dag-failure dag-simple
dht-chord dht-kademlia
energy-exec energy-boot energy-link energy-vm energy-exec-ptask energy-wifi
engine-filtering engine-run-partial
--- /dev/null
+/* Copyright (c) 2006-2021. The SimGrid Team.
+ * All rights reserved. */
+
+/* This program is free software; you can redistribute it and/or modify it
+ * under the terms of the license (GNU LGPL) which comes with this package. */
+
+#include "simgrid/s4u.hpp"
+
+XBT_LOG_NEW_DEFAULT_CATEGORY(dag_failure, "Logging specific to this example");
+
+int main(int argc, char** argv)
+{
+ simgrid::s4u::Engine e(&argc, argv);
+ simgrid::s4u::Engine::set_config("host/model:ptask_L07");
+ e.load_platform(argv[1]);
+
+ auto* faulty = e.host_by_name("Faulty Host");
+ auto* safe = e.host_by_name("Safe Host");
+ simgrid::s4u::Exec::on_completion.connect([](simgrid::s4u::Exec const& exec) {
+ if (exec.get_state() == simgrid::s4u::Activity::State::FINISHED)
+ XBT_INFO("Activity '%s' is complete (start time: %f, finish time: %f)", exec.get_cname(), exec.get_start_time(),
+ exec.get_finish_time());
+ if (exec.get_state() == simgrid::s4u::Activity::State::FAILED)
+ if (exec.is_parallel())
+ XBT_INFO("Activity '%s' has failed. %.f %% remain to be done", exec.get_cname(),
+ 100 * exec.get_remaining_ratio());
+ else
+ XBT_INFO("Activity '%s' has failed. %.f flops remain to be done", exec.get_cname(), exec.get_remaining());
+ });
+
+ /* creation of a single Exec that will poorly fail when the workstation will stop */
+ XBT_INFO("First test: sequential Exec activity");
+ simgrid::s4u::ExecPtr exec =
+ simgrid::s4u::Exec::init()->set_name("Poor task")->set_flops_amount(2e10)->vetoable_start();
+
+ XBT_INFO("Schedule Activity '%s' on 'Faulty Host'", exec->get_cname());
+ exec->set_host(faulty);
+
+ /* Add a child Exec that depends on the Poor task' */
+ simgrid::s4u::ExecPtr child = simgrid::s4u::Exec::init()->set_name("Child")->set_flops_amount(2e10)->set_host(safe);
+ exec->add_successor(child);
+ child->vetoable_start();
+
+ XBT_INFO("Run the simulation");
+ e.run();
+
+ XBT_INFO("let's unschedule Activity '%s' and reschedule it on the 'Safe Host'", exec->get_cname());
+ exec->unset_host();
+ exec->set_host(safe);
+
+ XBT_INFO("Run the simulation again");
+ e.run();
+
+ XBT_INFO("Second test: parallel Exec activity");
+ exec = simgrid::s4u::Exec::init()->set_name("Poor parallel task")->set_flops_amounts({2e10, 2e10})->vetoable_start();
+
+ XBT_INFO("Schedule Activity '%s' on 'Safe Host' and 'Faulty Host'", exec->get_cname());
+ exec->set_hosts({safe, faulty});
+
+ /* Add a child Exec that depends on the Poor task' */
+ child = simgrid::s4u::Exec::init()->set_name("Child")->set_flops_amount(2e10)->set_host(safe);
+ exec->add_successor(child);
+ child->vetoable_start();
+
+ XBT_INFO("Run the simulation");
+ e.run();
+
+ XBT_INFO("let's unschedule Activity '%s' and reschedule it only on the 'Safe Host'", exec->get_cname());
+ exec->unset_host();
+ exec->set_flops_amount(4e10)->set_host(safe);
+
+ XBT_INFO("Run the simulation again");
+ e.run();
+
+ return 0;
+}
--- /dev/null
+#!/usr/bin/env tesh
+
+p Test of the management of failed tasks simdag
+
+$ ${bindir:=.}/s4u-dag-failure ${platfdir}/faulty_host.xml --log=s4u_activity.t:verbose "--log=root.fmt:[%10.6r]%e(%i:%a@%h)%e%m%n"
+> [ 0.000000] (0:maestro@) Configuration change: Set 'host/model' to 'ptask_L07'
+> [ 0.000000] (0:maestro@) Switching to the L07 model to handle parallel tasks.
+> [ 0.000000] (0:maestro@) First test: sequential Exec activity
+> [ 0.000000] (0:maestro@) Schedule Activity 'Poor task' on 'Faulty Host'
+> [ 0.000000] (0:maestro@) 'Poor task' is assigned to a resource and all dependencies are solved. Let's start
+> [ 0.000000] (0:maestro@) Run the simulation
+> [ 10.000000] (0:maestro@) Activity 'Poor task' has failed. 20000000000 flops remain to be done
+> [ 10.000000] (0:maestro@) let's unschedule Activity 'Poor task' and reschedule it on the 'Safe Host'
+> [ 10.000000] (0:maestro@) 'Poor task' is assigned to a resource and all dependencies are solved. Let's start
+> [ 10.000000] (0:maestro@) Run the simulation again
+> [ 50.000000] (0:maestro@) Remove a dependency from 'Poor task' on 'Child'
+> [ 50.000000] (0:maestro@) 'Child' is assigned to a resource and all dependencies are solved. Let's start
+> [ 50.000000] (0:maestro@) Activity 'Poor task' is complete (start time: 10.000000, finish time: 50.000000)
+> [ 90.000000] (0:maestro@) Activity 'Child' is complete (start time: 50.000000, finish time: 90.000000)
+> [ 90.000000] (0:maestro@) Second test: parallel Exec activity
+> [ 90.000000] (0:maestro@) Schedule Activity 'Poor parallel task' on 'Safe Host' and 'Faulty Host'
+> [ 90.000000] (0:maestro@) 'Poor parallel task' is assigned to a resource and all dependencies are solved. Let's start
+> [ 90.000000] (0:maestro@) Run the simulation
+> [100.000000] (0:maestro@) Activity 'Poor parallel task' has failed. 100 % remain to be done
+> [100.000000] (0:maestro@) let's unschedule Activity 'Poor parallel task' and reschedule it only on the 'Safe Host'
+> [100.000000] (0:maestro@) 'Poor parallel task' is assigned to a resource and all dependencies are solved. Let's start
+> [100.000000] (0:maestro@) Run the simulation again
+> [180.000000] (0:maestro@) Remove a dependency from 'Poor parallel task' on 'Child'
+> [180.000000] (0:maestro@) 'Child' is assigned to a resource and all dependencies are solved. Let's start
+> [180.000000] (0:maestro@) Activity 'Poor parallel task' is complete (start time: 100.000000, finish time: 180.000000)
+> [220.000000] (0:maestro@) Activity 'Child' is complete (start time: 180.000000, finish time: 220.000000)
+
-foreach(x daxload fail typed_tasks throttling scheduling)
+foreach(x daxload typed_tasks throttling scheduling)
add_executable (sd_${x} EXCLUDE_FROM_ALL ${x}/sd_${x}.c)
target_link_libraries(sd_${x} simgrid)
set_target_properties(sd_${x} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/${x})
${CMAKE_CURRENT_SOURCE_DIR}/schedule-dotload/dag_with_good_schedule.dot
${CMAKE_CURRENT_SOURCE_DIR}/scheduling/expected_output.jed PARENT_SCOPE)
-foreach(x daxload fail typed_tasks throttling scheduling test)
+foreach(x daxload typed_tasks throttling scheduling test)
ADD_TESH(simdag-${x} --setenv srcdir=${CMAKE_HOME_DIRECTORY}/examples/deprecated/simdag --cd ${CMAKE_BINARY_DIR}/examples/deprecated/simdag ${CMAKE_HOME_DIRECTORY}/examples/deprecated/simdag/${x}/sd_${x}.tesh)
endforeach()
+++ /dev/null
-/* Copyright (c) 2006-2021. The SimGrid Team.
- * All rights reserved. */
-
-/* This program is free software; you can redistribute it and/or modify it
- * under the terms of the license (GNU LGPL) which comes with this package. */
-
-#include "simgrid/simdag.h"
-
-XBT_LOG_NEW_DEFAULT_CATEGORY(sd_fail, "Logging specific to this SimDag example");
-
-int main(int argc, char **argv)
-{
- double computation_amount[1];
- double communication_amount[2] = { 0 };
- sg_host_t hosts[1];
-
- /* initialization of SD */
- SD_init(&argc, argv);
-
- /* creation of the environment */
- SD_create_environment(argv[1]);
-
- /* creation of a single task that will poorly fail when the workstation will stop */
- XBT_INFO("First test: COMP_SEQ task");
- SD_task_t task = SD_task_create_comp_seq("Poor task", NULL, 2e10);
- SD_task_watch(task, SD_FAILED);
- SD_task_watch(task, SD_DONE);
-
- XBT_INFO("Schedule task '%s' on 'Faulty Host'", SD_task_get_name(task));
-
- SD_task_schedulel(task, 1, sg_host_by_name("Faulty Host"));
-
- SD_simulate(-1.0);
-
- SD_task_dump(task);
-
- XBT_INFO("Task '%s' has failed. %.f flops remain to be done", SD_task_get_name(task),
- SD_task_get_remaining_amount(task));
-
- XBT_INFO("let's unschedule task '%s' and reschedule it on the 'Safe Host'", SD_task_get_name(task));
- SD_task_unschedule(task);
- SD_task_schedulel(task, 1, sg_host_by_name("Safe Host"));
-
- XBT_INFO("Run the simulation again");
- SD_simulate(-1.0);
-
- SD_task_dump(task);
- XBT_INFO("Task '%s' start time: %f, finish time: %f", SD_task_get_name(task), SD_task_get_start_time(task),
- SD_task_get_finish_time(task));
-
- SD_task_destroy(task);
-
- XBT_INFO("Second test: NON TYPED task");
-
- task = SD_task_create("Poor parallel task", NULL, 2e10);
- SD_task_watch(task, SD_FAILED);
- SD_task_watch(task, SD_DONE);
-
- computation_amount[0] = 2e10;
-
- XBT_INFO("Schedule task '%s' on 'Faulty Host'", SD_task_get_name(task));
-
- hosts[0] = sg_host_by_name("Faulty Host");
- SD_task_schedule(task, 1, hosts, computation_amount, communication_amount,-1);
-
- SD_simulate(-1.0);
-
- SD_task_dump(task);
-
- XBT_INFO("Task '%s' has failed. %.f flops remain to be done", SD_task_get_name(task),
- SD_task_get_remaining_amount(task));
-
- XBT_INFO("let's unschedule task '%s' and reschedule it on the 'Safe Host'", SD_task_get_name(task));
- SD_task_unschedule(task);
-
- hosts[0] = sg_host_by_name("Safe Host");
-
- SD_task_schedule(task, 1, hosts, computation_amount, communication_amount,-1);
-
- XBT_INFO("Run the simulation again");
- SD_simulate(-1.0);
-
- SD_task_dump(task);
- XBT_INFO("Task '%s' start time: %f, finish time: %f", SD_task_get_name(task), SD_task_get_start_time(task),
- SD_task_get_finish_time(task));
-
- SD_task_destroy(task);
- return 0;
-}
+++ /dev/null
-#!/usr/bin/env tesh
-
-p Test of the management of failed tasks simdag
-
-$ ${bindir:=.}/fail/sd_fail ${srcdir:=.}/../../platforms/faulty_host.xml
-> [0.000000] [xbt_cfg/INFO] Switching to the L07 model to handle parallel tasks.
-> [0.000000] [sd_fail/INFO] First test: COMP_SEQ task
-> [0.000000] [sd_fail/INFO] Schedule task 'Poor task' on 'Faulty Host'
-> [10.000000] [sd_task/INFO] Displaying task Poor task
-> [10.000000] [sd_task/INFO] - state: not runnable failed
-> [10.000000] [sd_task/INFO] - kind: sequential computation
-> [10.000000] [sd_task/INFO] - amount: 20000000000
-> [10.000000] [sd_task/INFO] - Dependencies to satisfy: 0
-> [10.000000] [sd_fail/INFO] Task 'Poor task' has failed. 20000000000 flops remain to be done
-> [10.000000] [sd_fail/INFO] let's unschedule task 'Poor task' and reschedule it on the 'Safe Host'
-> [10.000000] [sd_fail/INFO] Run the simulation again
-> [50.000000] [sd_task/INFO] Displaying task Poor task
-> [50.000000] [sd_task/INFO] - state: not runnable done
-> [50.000000] [sd_task/INFO] - kind: sequential computation
-> [50.000000] [sd_task/INFO] - amount: 20000000000
-> [50.000000] [sd_task/INFO] - Dependencies to satisfy: 0
-> [50.000000] [sd_fail/INFO] Task 'Poor task' start time: 10.000000, finish time: 50.000000
-> [50.000000] [sd_fail/INFO] Second test: NON TYPED task
-> [50.000000] [sd_fail/INFO] Schedule task 'Poor parallel task' on 'Faulty Host'
-> [60.000000] [sd_task/INFO] Displaying task Poor parallel task
-> [60.000000] [sd_task/INFO] - state: not runnable failed
-> [60.000000] [sd_task/INFO] - amount: 20000000000
-> [60.000000] [sd_task/INFO] - Dependencies to satisfy: 0
-> [60.000000] [sd_fail/INFO] Task 'Poor parallel task' has failed. 20000000000 flops remain to be done
-> [60.000000] [sd_fail/INFO] let's unschedule task 'Poor parallel task' and reschedule it on the 'Safe Host'
-> [60.000000] [sd_fail/INFO] Run the simulation again
-> [100.000000] [sd_task/INFO] Displaying task Poor parallel task
-> [100.000000] [sd_task/INFO] - state: not runnable done
-> [100.000000] [sd_task/INFO] - amount: 20000000000
-> [100.000000] [sd_task/INFO] - Dependencies to satisfy: 0
-> [100.000000] [sd_fail/INFO] Task 'Poor parallel task' start time: 60.000000, finish time: 100.000000
11 1
60 0
61 1
-111 0
\ No newline at end of file
+100 0
+101 1
\ No newline at end of file