examples/python/exec-ptask/exec-ptask.py

   1 # Copyright (c) 2018-2022. The SimGrid Team. All rights reserved.
   2 #
   3 # This program is free software; you can redistribute it and/or modify it
   4 # under the terms of the license (GNU LGPL) which comes with this package.
   5
   6
   7 # This script does exactly the same thing as file s4u-exec-ptask.cpp
   8
   9 import sys
  10 from simgrid import Actor, Engine, Host, this_actor, TimeoutException
  11
  12 def runner():
  13     hosts = Engine.instance.all_hosts
  14     hosts_count = len(hosts)
  15
  16     # Test 1
  17     this_actor.info("First, build a classical parallel activity, with 1 Gflop to execute on each node, "
  18                "and 10MB to exchange between each pair")
  19     computation_amounts = [1e9]*hosts_count
  20     communication_amounts = [0]*hosts_count*hosts_count
  21     for i in range(hosts_count):
  22         for j in range(i+1, hosts_count):
  23             communication_amounts[i * hosts_count + j] = 1e7
  24     this_actor.parallel_execute(hosts, computation_amounts, communication_amounts)
  25
  26     # Test 2
  27     this_actor.info("We can do the same with a timeout of 10 seconds enabled.")
  28     activity = this_actor.exec_init(hosts, computation_amounts, communication_amounts)
  29     try:
  30         activity.wait_for(10.0)
  31         sys.exit("Woops, this did not timeout as expected... Please report that bug.")
  32     except TimeoutException:
  33         this_actor.info("Caught the expected timeout exception.")
  34         activity.cancel()
  35
  36     # Test 3
  37     this_actor.info("Then, build a parallel activity involving only computations (of different amounts) and no communication")
  38     computation_amounts = [3e8, 6e8, 1e9]
  39     communication_amounts = []
  40     this_actor.parallel_execute(hosts, computation_amounts, communication_amounts)
  41
  42     # Test 4
  43     this_actor.info("Then, build a parallel activity with no computation nor communication (synchro only)")
  44     computation_amounts = []
  45     this_actor.parallel_execute(hosts, computation_amounts, communication_amounts)
  46
  47     # Test 5
  48     this_actor.info("Then, Monitor the execution of a parallel activity")
  49     computation_amounts = [1e6]*hosts_count
  50     communication_amounts = [0, 1e6, 0, 0, 0, 1e6, 1e6, 0, 0]
  51     activity = this_actor.exec_init(hosts, computation_amounts, communication_amounts)
  52     activity.start()
  53     while not activity.test():
  54         ratio = activity.remaining_ratio * 100
  55         this_actor.info(f"Remaining flop ratio: {ratio:.0f}%")
  56         this_actor.sleep_for(5)
  57     activity.wait()
  58
  59     # Test 6
  60     this_actor.info("Finally, simulate a malleable task (a parallel execution that gets reconfigured after its start).")
  61     this_actor.info("  - Start a regular parallel execution, with both comm and computation")
  62     computation_amounts = [1e6]*hosts_count
  63     communication_amounts = [0, 1e6, 0, 0, 1e6, 0, 1e6, 0, 0]
  64     activity = this_actor.exec_init(hosts, computation_amounts, communication_amounts)
  65     activity.start()
  66     this_actor.sleep_for(10)
  67     remaining_ratio = activity.remaining_ratio
  68     this_actor.info(f"  - After 10 seconds, {remaining_ratio*100:.2f}% remains to be done. Change it from 3 hosts to 2 hosts only.")
  69     this_actor.info("    Let's first suspend the task.")
  70     activity.suspend()
  71     this_actor.info("  - Now, simulate the reconfiguration (modeled as a comm from the removed host to the remaining ones).")
  72     rescheduling_comp = [0, 0, 0]
  73     rescheduling_comm = [0, 0, 0, 0, 0, 0, 25000, 25000, 0]
  74     this_actor.parallel_execute(hosts, rescheduling_comp, rescheduling_comm)
  75     this_actor.info("  - Now, let's cancel the old task and create a new task with modified comm and computation vectors:")
  76     this_actor.info("    What was already done is removed, and the load of the removed host is shared between remaining ones.")
  77     for i in range(2):
  78         # remove what we've done so far, for both comm and compute load
  79         computation_amounts[i]   *= remaining_ratio
  80         communication_amounts[i] *= remaining_ratio
  81         # The work from 1 must be shared between 2 remaining ones. 1/2=50% of extra work for each
  82         computation_amounts[i]   *= 1.5;
  83         communication_amounts[i] *= 1.5;
  84     hosts = hosts[:2]
  85     computation_amounts = computation_amounts[:2]
  86     remaining_comm = communication_amounts[1]
  87     communication_amounts = [0, remaining_comm, remaining_comm, 0]
  88     activity.cancel()
  89     activity = this_actor.exec_init(hosts, computation_amounts, communication_amounts)
  90     this_actor.info("  - Done, let's wait for the task completion")
  91     activity.wait()
  92     this_actor.info("Goodbye now!")
  93
  94
  95 if __name__ == "__main__":
  96     if len(sys.argv) != 2:
  97         sys.exit(f"Syntax: {sys.argv[0]} <platform_file>")
  98     platform = sys.argv[1]
  99     engine = Engine.instance
 100     Engine.set_config("host/model:ptask_L07")  # /!\ this is required for running ptasks
 101     engine.load_platform(platform)
 102     Actor.create("foo", engine.host_by_name("MyHost1"), runner)
 103     engine.run()