Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
actions-mpi becomes actions-comm
authorFrederic Suter <frederic.suter@cc.in2p3.fr>
Tue, 5 Apr 2016 12:19:29 +0000 (14:19 +0200)
committerFrederic Suter <frederic.suter@cc.in2p3.fr>
Tue, 5 Apr 2016 12:19:29 +0000 (14:19 +0200)
  + allow us to keep an example of such trace replay
  + stop misleading MPI users, this is not MPI replay, just comm replay

12 files changed:
.gitignore
examples/msg/CMakeLists.txt
examples/msg/actions-comm/actions-comm.c [moved from examples/msg/actions-mpi/actions-mpi.c with 69% similarity]
examples/msg/actions-comm/actions-comm.tesh [new file with mode: 0644]
examples/msg/actions-comm/actions-comm.txt [moved from examples/msg/actions-mpi/actions-mpi.txt with 65% similarity]
examples/msg/actions-comm/actions-comm_d.xml [moved from examples/msg/actions-mpi/actions-mpi_d.xml with 100% similarity]
examples/msg/actions-comm/actions-comm_split_d.xml [moved from examples/msg/actions-mpi/actions-mpi_split_d.xml with 59% similarity]
examples/msg/actions-comm/actions-comm_split_p0.txt [new file with mode: 0644]
examples/msg/actions-comm/actions-comm_split_p1.txt [new file with mode: 0644]
examples/msg/actions-mpi/actions-mpi.tesh [deleted file]
examples/msg/actions-mpi/actions-mpi_split_p0.txt [deleted file]
examples/msg/actions-mpi/actions-mpi_split_p1.txt [deleted file]

index 99d820d..596cbb4 100644 (file)
@@ -116,7 +116,7 @@ tags
 callgrind.out.*
 ### Examples and traces
 *.exe
-examples/msg/actions-mpi/actions-mpi
+examples/msg/actions-comm/actions-comm
 examples/msg/actions-storage/actions-storage
 examples/msg/async-wait/async-wait
 examples/msg/async-waitall/async-waitall
index 99fd79d..6afeded 100644 (file)
@@ -1,4 +1,4 @@
-foreach(x actions-mpi actions-storage async-wait async-waitall async-waitany cloud-capping cloud-masterworker 
+foreach(x actions-comm actions-storage async-wait async-waitall async-waitany cloud-capping cloud-masterworker 
           cloud-migration cloud-multicore cloud-scale cloud-simple cloud-two-tasks dht-chord dht-pastry exception 
           energy-consumption energy-onoff energy-pstate energy-ptask energy-vm failures io-file io-file-unlink io-remote
           io-storage masterworker masterworker-mailbox pmm task-priority process-kill process-migration process-suspend 
@@ -39,16 +39,16 @@ foreach (file answer dht-kademlia node routing_table task)
   set(examples_src  ${examples_src}  ${CMAKE_CURRENT_SOURCE_DIR}/dht-kademlia/${file}.c  ${CMAKE_CURRENT_SOURCE_DIR}/dht-kademlia/${file}.h)
 endforeach()
 
-foreach (file actions-mpi actions-storage async-wait async-waitall async-waitany bittorrent chainsend dht-chord 
+foreach (file actions-comm actions-storage async-wait async-waitall async-waitany bittorrent chainsend dht-chord 
          dht-kademlia dht-pastry io-remote masterworker masterworker-mailbox properties sendrecv set-maestro
          task-priority)
   set(xml_files    ${xml_files}     ${CMAKE_CURRENT_SOURCE_DIR}/${file}/${file}_d.xml)
 endforeach()
 
 set(txt_files    ${txt_files}     ${CMAKE_CURRENT_SOURCE_DIR}/README
-                                  ${CMAKE_CURRENT_SOURCE_DIR}/actions-mpi/actions-mpi.txt
-                                  ${CMAKE_CURRENT_SOURCE_DIR}/actions-mpi/actions-mpi_split_p0.txt
-                                  ${CMAKE_CURRENT_SOURCE_DIR}/actions-mpi/actions-mpi_split_p1.txt
+                                  ${CMAKE_CURRENT_SOURCE_DIR}/actions-comm/actions-comm.txt
+                                  ${CMAKE_CURRENT_SOURCE_DIR}/actions-comm/actions-comm_split_p0.txt
+                                  ${CMAKE_CURRENT_SOURCE_DIR}/actions-comm/actions-comm_split_p1.txt
                                   ${CMAKE_CURRENT_SOURCE_DIR}/actions-storage/actions-storage.txt          PARENT_SCOPE)
 set(bin_files    ${bin_files}     ${CMAKE_CURRENT_SOURCE_DIR}/bittorrent/generate.py
                                   ${CMAKE_CURRENT_SOURCE_DIR}/dht-chord/generate.py
@@ -83,7 +83,7 @@ set(xml_files    ${xml_files}     ${CMAKE_CURRENT_SOURCE_DIR}/actions-mpi/action
                                   ${CMAKE_CURRENT_SOURCE_DIR}/process-startkilltime/start_d.xml
                                   ${CMAKE_CURRENT_SOURCE_DIR}/process-startkilltime/start_kill_d.xml       PARENT_SCOPE)
 
-foreach(x actions-mpi actions-storage async-wait async-waitall async-waitany bittorrent chainsend cloud-capping 
+foreach(x actions-comm actions-storage async-wait async-waitall async-waitany bittorrent chainsend cloud-capping 
         cloud-masterworker cloud-migration cloud-scale cloud-simple cloud-two-tasks dht-chord dht-kademlia
         failures io-file io-file-unlink io-remote io-storage masterworker masterworker-mailbox task-priority process-kill 
         process-migration process-suspend properties sendrecv synchro process-startkilltime token_ring)
similarity index 69%
rename from examples/msg/actions-mpi/actions-mpi.c
rename to examples/msg/actions-comm/actions-comm.c
index df86b80..0d2d646 100644 (file)
@@ -210,58 +210,6 @@ static void action_barrier(const char *const *action)
   }
 }
 
-static void action_reduce(const char *const *action)
-{
-  int i;
-  char *reduce_identifier;
-  char mailbox[80];
-  double comm_size = parse_double(action[2]);
-  double comp_size = parse_double(action[3]);
-  msg_task_t comp_task = NULL;
-  const char *process_name;
-  double clock = MSG_get_clock();
-
-  process_globals_t counters = (process_globals_t) MSG_process_get_data(MSG_process_self());
-
-  xbt_assert(communicator_size, "Size of Communicator is not defined can't use collective operations");
-
-  process_name = MSG_process_get_name(MSG_process_self());
-
-  reduce_identifier = bprintf("reduce_%d", counters->reduce_counter++);
-
-  if (!strcmp(process_name, "p0")) {
-    XBT_DEBUG("%s: %s is the Root", reduce_identifier, process_name);
-
-    msg_comm_t *comms = xbt_new0(msg_comm_t, communicator_size - 1);
-    msg_task_t *tasks = xbt_new0(msg_task_t, communicator_size - 1);
-    for (i = 1; i < communicator_size; i++) {
-      sprintf(mailbox, "%s_p%d_p0", reduce_identifier, i);
-      comms[i - 1] = MSG_task_irecv(&(tasks[i - 1]), mailbox);
-    }
-    MSG_comm_waitall(comms, communicator_size - 1, -1);
-    for (i = 1; i < communicator_size; i++) {
-      MSG_comm_destroy(comms[i - 1]);
-      MSG_task_destroy(tasks[i - 1]);
-    }
-    xbt_free(comms);
-    xbt_free(tasks);
-
-    comp_task = MSG_task_create("reduce_comp", comp_size, 0, NULL);
-    XBT_DEBUG("%s: computing 'reduce_comp'", reduce_identifier);
-    MSG_task_execute(comp_task);
-    MSG_task_destroy(comp_task);
-    XBT_DEBUG("%s: computed", reduce_identifier);
-  } else {
-    XBT_DEBUG("%s: %s sends", reduce_identifier, process_name);
-    sprintf(mailbox, "%s_%s_p0", reduce_identifier, process_name);
-    XBT_DEBUG("put on %s", mailbox);
-    MSG_task_send(MSG_task_create(reduce_identifier, 0, comm_size, NULL), mailbox);
-  }
-
-  log_action(action, MSG_get_clock() - clock);
-  xbt_free(reduce_identifier);
-}
-
 static void action_bcast(const char *const *action)
 {
   int i;
@@ -306,83 +254,6 @@ static void action_bcast(const char *const *action)
   xbt_free(bcast_identifier);
 }
 
-static void action_sleep(const char *const *action)
-{
-  const char *duration = action[2];
-  double clock = MSG_get_clock();
-
-  ACT_DEBUG("Entering %s", NAME);
-  MSG_process_sleep(parse_double(duration));
-  log_action(action, MSG_get_clock() - clock);
-}
-
-static void action_allReduce(const char *const *action)
-{
-  int i;
-  char *allreduce_identifier;
-  char mailbox[80];
-  double comm_size = parse_double(action[2]);
-  double comp_size = parse_double(action[3]);
-  msg_task_t task = NULL, comp_task = NULL;
-  const char *process_name;
-  double clock = MSG_get_clock();
-
-  process_globals_t counters = (process_globals_t) MSG_process_get_data(MSG_process_self());
-
-  xbt_assert(communicator_size, "Size of Communicator is not defined, can't use collective operations");
-
-  process_name = MSG_process_get_name(MSG_process_self());
-
-  allreduce_identifier = bprintf("allReduce_%d", counters->allReduce_counter++);
-
-  if (!strcmp(process_name, "p0")) {
-    XBT_DEBUG("%s: %s is the Root", allreduce_identifier, process_name);
-
-    msg_comm_t *comms = xbt_new0(msg_comm_t, communicator_size - 1);
-    msg_task_t *tasks = xbt_new0(msg_task_t, communicator_size - 1);
-    for (i = 1; i < communicator_size; i++) {
-      sprintf(mailbox, "%s_p%d_p0", allreduce_identifier, i);
-      comms[i - 1] = MSG_task_irecv(&(tasks[i - 1]), mailbox);
-    }
-    MSG_comm_waitall(comms, communicator_size - 1, -1);
-    for (i = 1; i < communicator_size; i++) {
-      MSG_comm_destroy(comms[i - 1]);
-      MSG_task_destroy(tasks[i - 1]);
-    }
-    xbt_free(tasks);
-
-    comp_task = MSG_task_create("allReduce_comp", comp_size, 0, NULL);
-    XBT_DEBUG("%s: computing 'reduce_comp'", allreduce_identifier);
-    MSG_task_execute(comp_task);
-    MSG_task_destroy(comp_task);
-    XBT_DEBUG("%s: computed", allreduce_identifier);
-
-    for (i = 1; i < communicator_size; i++) {
-      sprintf(mailbox, "%s_p0_p%d", allreduce_identifier, i);
-      comms[i - 1] = MSG_task_isend(MSG_task_create(mailbox, 0, comm_size, NULL), mailbox);
-    }
-    MSG_comm_waitall(comms, communicator_size - 1, -1);
-    for (i = 1; i < communicator_size; i++)
-      MSG_comm_destroy(comms[i - 1]);
-    xbt_free(comms);
-
-    XBT_DEBUG("%s: all messages sent by %s have been received", allreduce_identifier, process_name);
-  } else {
-    XBT_DEBUG("%s: %s sends", allreduce_identifier, process_name);
-    sprintf(mailbox, "%s_%s_p0", allreduce_identifier, process_name);
-    XBT_DEBUG("put on %s", mailbox);
-    MSG_task_send(MSG_task_create(allreduce_identifier, 0, comm_size, NULL), mailbox);
-
-    sprintf(mailbox, "%s_p0_%s", allreduce_identifier, process_name);
-    MSG_task_receive(&task, mailbox);
-    MSG_task_destroy(task);
-    XBT_DEBUG("%s: %s has received", allreduce_identifier, process_name);
-  }
-
-  log_action(action, MSG_get_clock() - clock);
-  xbt_free(allreduce_identifier);
-}
-
 static void action_comm_size(const char *const *action)
 {
   const char *size = action[2];
@@ -444,10 +315,6 @@ int main(int argc, char *argv[])
        "\tExample: %s msg_platform.xml msg_deployment.xml ",
        argv[0],argv[0],argv[0]);
 
-  printf("WARNING: THIS BINARY IS KINDA DEPRECATED\n"
-   "This example is still relevant if you want to learn about MSG-based trace replay, but if you want to simulate "
-   "MPI-like traces, you should use the newer version that is in the examples/smpi/replay directory instead.\n");
-   
   MSG_create_environment(argv[1]);
   MSG_launch_application(argv[2]);
 
@@ -462,9 +329,6 @@ int main(int argc, char *argv[])
   xbt_replay_action_register("wait", action_wait);
   xbt_replay_action_register("barrier", action_barrier);
   xbt_replay_action_register("bcast", action_bcast);
-  xbt_replay_action_register("reduce", action_reduce);
-  xbt_replay_action_register("allReduce", action_allReduce);
-  xbt_replay_action_register("sleep", action_sleep);
   xbt_replay_action_register("compute", action_compute);
 
   /* Actually do the simulation using MSG_action_trace_run */
diff --git a/examples/msg/actions-comm/actions-comm.tesh b/examples/msg/actions-comm/actions-comm.tesh
new file mode 100644 (file)
index 0000000..d2833bb
--- /dev/null
@@ -0,0 +1,27 @@
+! output sort 19
+$ ${bindir:=.}/actions-comm --log=actions.thres=verbose ${srcdir:=.}/small_platform_fatpipe.xml actions-comm_split_d.xml "--log=root.fmt:[%10.6r]%e(%i:%P@%h)%e%m%n"
+> [ 20.703314] (1:p0@Tremblay) p0 recv p1 20.703314
+> [ 20.703314] (2:p1@Ruby) p1 send p0 1e10 20.703314
+> [ 30.897513] (0:maestro@) Simulation time 30.8975
+> [ 30.897513] (1:p0@Tremblay) p0 compute 1e9 10.194200
+> [ 30.897513] (2:p1@Ruby) p1 compute 1e9 10.194200
+
+! output sort 19
+$ ${bindir:=.}/actions-comm --log=actions.thres=verbose ${srcdir:=.}/small_platform_fatpipe.xml actions-comm_d.xml actions-comm.txt "--log=root.fmt:[%10.6r]%e(%i:%P@%h)%e%m%n"
+> [  0.000000] (1:p0@Tremblay) p0 comm_size 3 0.000000
+> [  1.037020] (1:p0@Tremblay) p0 bcast 5e8 1.037020
+> [  1.037020] (2:p1@Ruby) p1 bcast 5e8 1.037020
+> [  1.037020] (3:p2@Perl) p2 bcast 5e8 1.037020
+> [  1.082894] (1:p0@Tremblay) p0 compute 4.5E6 0.045874
+> [  1.123670] (1:p0@Tremblay) p0 compute 4E6 0.040777
+> [  1.149156] (1:p0@Tremblay) p0 compute 2.5E6 0.025485
+> [  1.149156] (2:p1@Ruby) p1 Irecv p0 0.000000
+> [  1.149156] (3:p2@Perl) p2 Irecv p1 0.000000
+> [  3.221244] (1:p0@Tremblay) p0 send p1 1e9 2.072088
+> [  6.246256] (3:p2@Perl) p2 compute 5e8 5.097100
+> [ 11.343355] (2:p1@Ruby) p1 compute 1e9 10.194200
+> [ 11.343355] (2:p1@Ruby) p1 wait 0.000000
+> [ 11.343355] (2:p1@Ruby) p1 Isend p2 1e9 0.000000
+> [ 13.415443] (0:maestro@) Simulation time 13.4154
+> [ 13.415443] (1:p0@Tremblay) p0 compute 1e9 10.194200
+> [ 13.415443] (3:p2@Perl) p2 wait 7.169187
similarity index 65%
rename from examples/msg/actions-mpi/actions-mpi.txt
rename to examples/msg/actions-comm/actions-comm.txt
index 088c23b..94054f2 100644 (file)
@@ -32,22 +32,6 @@ p0 barrier
 p1 barrier
 p2 barrier
 
-p0 reduce 5e8 5e8
-p1 reduce 5e8 5e8
-p2 reduce 5e8 5e8
-
-p0 compute 5e8
-p1 compute 5e8
-p2 compute 5e8
-
-p0 barrier
-p1 barrier
-p2 barrier
-
-p0 allReduce 5e8 5e8
-p1 allReduce 5e8 5e8
-p2 allReduce 5e8 5e8
-
 p0 finalize
 p1 finalize
 p2 finalize
@@ -3,8 +3,8 @@
 <platform version="4">
 <!-- Example file of how to use trace replay, with actions split in separate files, one per process.
      Launch it like this:
-         ./mpi_actions homogeneous_3_hosts.xml mpi_deployment_split.xml  -->
+         ./actions-comm ../../platforms/platform.xml actions-comm_split_d.xml  -->
 
-  <process host="Tremblay" function="p0">    <argument value="actions-mpi_split_p0.txt"/>  </process>
-  <process host="Ruby" function="p1">        <argument value="actions-mpi_split_p1.txt"/>  </process>
+  <process host="Tremblay" function="p0">    <argument value="actions-comm_split_p0.txt"/>  </process>
+  <process host="Ruby"     function="p1">    <argument value="actions-comm_split_p1.txt"/>  </process>
 </platform>
diff --git a/examples/msg/actions-comm/actions-comm_split_p0.txt b/examples/msg/actions-comm/actions-comm_split_p0.txt
new file mode 100644 (file)
index 0000000..dd16140
--- /dev/null
@@ -0,0 +1,6 @@
+# sample action file (with only the actions for p0, to be launched by deployment file)
+p0 init
+p0 recv p1
+p0 compute 1e9
+p0 finalize
+
diff --git a/examples/msg/actions-comm/actions-comm_split_p1.txt b/examples/msg/actions-comm/actions-comm_split_p1.txt
new file mode 100644 (file)
index 0000000..d7e9dfe
--- /dev/null
@@ -0,0 +1,5 @@
+# sample action file (with only the actions for p1, to be launched by deployment file)
+p1 init
+p1 send p0 1e10
+p1 compute 1e9
+p1 finalize
diff --git a/examples/msg/actions-mpi/actions-mpi.tesh b/examples/msg/actions-mpi/actions-mpi.tesh
deleted file mode 100644 (file)
index cc49d0a..0000000
+++ /dev/null
@@ -1,42 +0,0 @@
-# A little tesh file testing most MPI-related actions
-
-! output sort 19
-$ ${bindir:=.}/actions-mpi --log=actions.thres=verbose ${srcdir:=.}/small_platform_fatpipe.xml actions-mpi_split_d.xml "--log=root.fmt:[%10.6r]%e(%i:%P@%h)%e%m%n"
-> WARNING: THIS BINARY IS KINDA DEPRECATED
-> This example is still relevant if you want to learn about MSG-based trace replay, but if you want to simulate MPI-like traces, you should use the newer version that is in the examples/smpi/replay directory instead.
-> [ 20.703314] (1:p0@Tremblay) p0 recv p1 20.703314
-> [ 20.703314] (2:p1@Ruby) p1 send p0 1e10 20.703314
-> [ 20.703314] (1:p0@Tremblay) p0 compute 12 0.000000
-> [ 32.703314] (2:p1@Ruby) p1 sleep 12 12.000000
-> [ 32.703314] (0:maestro@) Simulation time 32.7033
-
-! output sort 19
-$ ${bindir:=.}/actions-mpi --log=actions.thres=verbose ${srcdir:=.}/small_platform_fatpipe.xml actions-mpi_d.xml actions-mpi.txt "--log=root.fmt:[%10.6r]%e(%i:%P@%h)%e%m%n"
-> WARNING: THIS BINARY IS KINDA DEPRECATED
-> This example is still relevant if you want to learn about MSG-based trace replay, but if you want to simulate MPI-like traces, you should use the newer version that is in the examples/smpi/replay directory instead.
-> [  0.000000] (1:p0@Tremblay) p0 comm_size 3 0.000000
-> [  1.037020] (1:p0@Tremblay) p0 bcast 5e8 1.037020
-> [  1.037020] (2:p1@Ruby) p1 bcast 5e8 1.037020
-> [  1.037020] (3:p2@Perl) p2 bcast 5e8 1.037020
-> [  1.082894] (1:p0@Tremblay) p0 compute 4.5E6 0.045874
-> [  1.123670] (1:p0@Tremblay) p0 compute 4E6 0.040777
-> [  1.149156] (1:p0@Tremblay) p0 compute 2.5E6 0.025485
-> [  1.149156] (2:p1@Ruby) p1 Irecv p0 0.000000
-> [  1.149156] (3:p2@Perl) p2 Irecv p1 0.000000
-> [  3.221244] (1:p0@Tremblay) p0 send p1 1e9 2.072088
-> [  6.246256] (3:p2@Perl) p2 compute 5e8 5.097100
-> [ 11.343355] (2:p1@Ruby) p1 compute 1e9 10.194200
-> [ 11.343355] (2:p1@Ruby) p1 wait 0.000000
-> [ 11.343355] (2:p1@Ruby) p1 Isend p2 1e9 0.000000
-> [ 13.415443] (1:p0@Tremblay) p0 compute 1e9 10.194200
-> [ 13.415443] (3:p2@Perl) p2 wait 7.169187
-> [ 14.452463] (2:p1@Ruby) p1 reduce 5e8 5e8 1.037020
-> [ 14.452463] (3:p2@Perl) p2 reduce 5e8 5e8 1.037020
-> [ 19.549562] (1:p0@Tremblay) p0 reduce 5e8 5e8 6.134119
-> [ 19.549562] (2:p1@Ruby) p1 compute 5e8 5.097100
-> [ 19.549562] (3:p2@Perl) p2 compute 5e8 5.097100
-> [ 24.646662] (1:p0@Tremblay) p0 compute 5e8 5.097100
-> [ 31.817801] (0:maestro@) Simulation time 31.8178
-> [ 31.817801] (1:p0@Tremblay) p0 allReduce 5e8 5e8 7.171139
-> [ 31.817801] (2:p1@Ruby) p1 allReduce 5e8 5e8 7.171139
-> [ 31.817801] (3:p2@Perl) p2 allReduce 5e8 5e8 7.171139
diff --git a/examples/msg/actions-mpi/actions-mpi_split_p0.txt b/examples/msg/actions-mpi/actions-mpi_split_p0.txt
deleted file mode 100644 (file)
index b873353..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-# sample action file (with only the actions for p0,
-#                     to be launched by deployment file)
-p0 init
-p0 recv p1
-p0 compute 12
-p0 finalize
-
diff --git a/examples/msg/actions-mpi/actions-mpi_split_p1.txt b/examples/msg/actions-mpi/actions-mpi_split_p1.txt
deleted file mode 100644 (file)
index 2c38b29..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-# sample action file (with only the actions for p1,
-#                     to be launched by deployment file)
-p1 init
-p1 send p0 1e10
-p1 sleep 12
-p1 finalize