From 92a8bcd9439a127da04ada7593f00397b0342c94 Mon Sep 17 00:00:00 2001 From: Millian Poquet Date: Tue, 27 Nov 2018 18:45:55 +0100 Subject: [PATCH] [examples] smpi-replay-mm: trace w/ collectives --- .../CMakeLists.txt | 7 ++++ .../replay_multiple_manual_deploy/coll.txt | 2 + .../coll/actions0.txt | 7 ++++ .../coll/actions1.txt | 6 +++ .../replay_multiple_manual_coll1.tesh | 38 +++++++++++++++++++ ...lay_multiple_manual_coll2_st_sr_noise.tesh | 31 +++++++++++++++ 6 files changed, 91 insertions(+) create mode 100644 examples/smpi/replay_multiple_manual_deploy/coll.txt create mode 100644 examples/smpi/replay_multiple_manual_deploy/coll/actions0.txt create mode 100644 examples/smpi/replay_multiple_manual_deploy/coll/actions1.txt create mode 100644 examples/smpi/replay_multiple_manual_deploy/replay_multiple_manual_coll1.tesh create mode 100644 examples/smpi/replay_multiple_manual_deploy/replay_multiple_manual_coll2_st_sr_noise.tesh diff --git a/examples/smpi/replay_multiple_manual_deploy/CMakeLists.txt b/examples/smpi/replay_multiple_manual_deploy/CMakeLists.txt index 61c42a1e06..47301372d5 100644 --- a/examples/smpi/replay_multiple_manual_deploy/CMakeLists.txt +++ b/examples/smpi/replay_multiple_manual_deploy/CMakeLists.txt @@ -6,6 +6,8 @@ if(enable_smpi) # Define a list of a tesh files list(APPEND tesh_filename_list replay_multiple_manual_nojob) + list(APPEND tesh_filename_list replay_multiple_manual_coll1) + list(APPEND tesh_filename_list replay_multiple_manual_coll2_st_sr_noise) list(APPEND tesh_filename_list replay_multiple_manual_empty1) list(APPEND tesh_filename_list replay_multiple_manual_empty2) list(APPEND tesh_filename_list replay_multiple_manual_mixed1) @@ -30,6 +32,9 @@ endif() set(txt_files ${txt_files} ${CMAKE_CURRENT_SOURCE_DIR}/compute_only.txt ${CMAKE_CURRENT_SOURCE_DIR}/compute_only/actions0.txt ${CMAKE_CURRENT_SOURCE_DIR}/compute_only/actions1.txt + ${CMAKE_CURRENT_SOURCE_DIR}/coll.txt + ${CMAKE_CURRENT_SOURCE_DIR}/coll/actions0.txt + ${CMAKE_CURRENT_SOURCE_DIR}/coll/actions1.txt ${CMAKE_CURRENT_SOURCE_DIR}/empty.txt ${CMAKE_CURRENT_SOURCE_DIR}/empty/actions0.txt ${CMAKE_CURRENT_SOURCE_DIR}/empty/actions1.txt @@ -43,6 +48,8 @@ set(txt_files ${txt_files} ${CMAKE_CURRENT_SOURCE_DIR}/compute_only.txt ${CMAKE_CURRENT_SOURCE_DIR}/workload_mixed2_same_time ${CMAKE_CURRENT_SOURCE_DIR}/workload_mixed2_same_time_and_resources PARENT_SCOPE) set(tesh_files ${tesh_files} ${CMAKE_CURRENT_SOURCE_DIR}/replay_multiple_manual_nojob.tesh + ${CMAKE_CURRENT_SOURCE_DIR}/replay_multiple_manual_coll1.tesh + ${CMAKE_CURRENT_SOURCE_DIR}/replay_multiple_manual_coll2_st_sr_noise.tesh ${CMAKE_CURRENT_SOURCE_DIR}/replay_multiple_manual_empty1.tesh ${CMAKE_CURRENT_SOURCE_DIR}/replay_multiple_manual_empty2.tesh ${CMAKE_CURRENT_SOURCE_DIR}/replay_multiple_manual_mixed1.tesh diff --git a/examples/smpi/replay_multiple_manual_deploy/coll.txt b/examples/smpi/replay_multiple_manual_deploy/coll.txt new file mode 100644 index 0000000000..5fb71bb3cd --- /dev/null +++ b/examples/smpi/replay_multiple_manual_deploy/coll.txt @@ -0,0 +1,2 @@ +coll/actions0.txt +coll/actions1.txt diff --git a/examples/smpi/replay_multiple_manual_deploy/coll/actions0.txt b/examples/smpi/replay_multiple_manual_deploy/coll/actions0.txt new file mode 100644 index 0000000000..53c4fd516f --- /dev/null +++ b/examples/smpi/replay_multiple_manual_deploy/coll/actions0.txt @@ -0,0 +1,7 @@ +0 init +0 compute 1e9 +0 bcast 1 0 1 +0 alltoall 134217728 134217728 26 26 +0 barrier +0 reduce 16 0 0 26 +0 finalize diff --git a/examples/smpi/replay_multiple_manual_deploy/coll/actions1.txt b/examples/smpi/replay_multiple_manual_deploy/coll/actions1.txt new file mode 100644 index 0000000000..315e201bc9 --- /dev/null +++ b/examples/smpi/replay_multiple_manual_deploy/coll/actions1.txt @@ -0,0 +1,6 @@ +1 init +1 bcast 1 0 1 +1 alltoall 134217728 134217728 26 26 +1 barrier +1 reduce 16 0 0 26 +1 finalize diff --git a/examples/smpi/replay_multiple_manual_deploy/replay_multiple_manual_coll1.tesh b/examples/smpi/replay_multiple_manual_deploy/replay_multiple_manual_coll1.tesh new file mode 100644 index 0000000000..72d3cd408a --- /dev/null +++ b/examples/smpi/replay_multiple_manual_deploy/replay_multiple_manual_coll1.tesh @@ -0,0 +1,38 @@ +p Workload with one coll job + +< One app alone +< alone coll.txt 2 0 0,1 +$ mkfile ${srcdir:=.}/workload_coll1 + +! timeout 120 +! output sort 25 +$ ./replay_multiple_manual ${platfdir}/small_platform_with_routers.xml ${srcdir:=.}/workload_coll1 --log=smpi.:info --cfg=smpi/host-speed:100 "--log=root.fmt:[%11.6r]%e(%P@%h)%e%m%n" 0 0 +> [ 0.000000] (maestro@) Configuration change: Set 'smpi/host-speed' to '100' +> [ 0.000000] (maestro@) Job read: app='alone', file='coll.txt', size=2, start=0, alloc='0,1' +> [ 0.000000] (workload@Bourassa) Launching the job executor of job 0 (app 'alone') +> [ 0.000000] (job_0000@Bourassa) Executing job 0 (smpi_app 'alone') +> [ 0.000000] (rank_0_0@Bourassa) Replaying rank 0 of job 0 (smpi_app 'alone') +> [ 0.000000] (rank_0_1@Fafard) Replaying rank 1 of job 0 (smpi_app 'alone') +> [ 590.549375] (rank_0_0@Bourassa) Simulation time 590.549375 +> [ 590.549375] (rank_0_0@Bourassa) Finished replaying rank 0 of job 0 (smpi_app 'alone') +> [ 590.549375] (rank_0_1@Fafard) Finished replaying rank 1 of job 0 (smpi_app 'alone') +> [ 591.549375] (job_0000@Bourassa) Finished job 0 (smpi_app 'alone') +> [ 591.549375] (maestro@) Simulation finished! Final time: 591.549 + +p Workload with one coll job (with noise) +! timeout 120 +! output sort 25 +$ ./replay_multiple_manual ${platfdir}/small_platform_with_routers.xml ${srcdir:=.}/workload_coll1 --log=smpi.:info --cfg=smpi/host-speed:100 "--log=root.fmt:[%11.6r]%e(%P@%h)%e%m%n" 7 13 +> [ 0.000000] (maestro@) Configuration change: Set 'smpi/host-speed' to '100' +> [ 0.000000] (maestro@) Job read: app='alone', file='coll.txt', size=2, start=0, alloc='0,1' +> [ 0.000000] (workload@Bourassa) Launching the job executor of job 0 (app 'alone') +> [ 0.000000] (job_0000@Bourassa) Executing job 0 (smpi_app 'alone') +> [ 0.000000] (rank_0_0@Bourassa) Replaying rank 0 of job 0 (smpi_app 'alone') +> [ 0.000000] (rank_0_1@Fafard) Replaying rank 1 of job 0 (smpi_app 'alone') +> [ 590.549375] (rank_0_0@Bourassa) Simulation time 590.549375 +> [ 590.549375] (rank_0_0@Bourassa) Finished replaying rank 0 of job 0 (smpi_app 'alone') +> [ 590.549375] (rank_0_1@Fafard) Finished replaying rank 1 of job 0 (smpi_app 'alone') +> [ 591.549375] (job_0000@Bourassa) Finished job 0 (smpi_app 'alone') +> [ 591.549375] (maestro@) Simulation finished! Final time: 591.549 + +$ rm -f ${srcdir:=.}/workload_coll1 diff --git a/examples/smpi/replay_multiple_manual_deploy/replay_multiple_manual_coll2_st_sr_noise.tesh b/examples/smpi/replay_multiple_manual_deploy/replay_multiple_manual_coll2_st_sr_noise.tesh new file mode 100644 index 0000000000..3b996edb89 --- /dev/null +++ b/examples/smpi/replay_multiple_manual_deploy/replay_multiple_manual_coll2_st_sr_noise.tesh @@ -0,0 +1,31 @@ +p Workload with two coll jobs (at the same time and on the same resources) + +< Two jobs at the same time and on the same resources +< job0 coll.txt 2 0 0,1 +< job1 coll.txt 2 0 0,1 +$ mkfile ${srcdir:=.}/workload_coll2_same_time_and_resources + +! timeout 120 +! output sort 25 +$ ./replay_multiple_manual ${srcdir:=.}/../../platforms/small_platform_with_routers.xml ${srcdir:=.}/workload_coll2_same_time_and_resources --log=smpi.:info --cfg=smpi/host-speed:100 "--log=root.fmt:[%11.6r]%e(%P@%h)%e%m%n" 7 13 +> [ 0.000000] (maestro@) Configuration change: Set 'smpi/host-speed' to '100' +> [ 0.000000] (maestro@) Job read: app='job0', file='coll.txt', size=2, start=0, alloc='0,1' +> [ 0.000000] (maestro@) Job read: app='job1', file='coll.txt', size=2, start=0, alloc='0,1' +> [ 0.000000] (workload@Bourassa) Launching the job executor of job 0 (app 'job0') +> [ 0.000000] (job_0000@Bourassa) Executing job 0 (smpi_app 'job0') +> [ 0.000000] (rank_0_0@Bourassa) Replaying rank 0 of job 0 (smpi_app 'job0') +> [ 0.000000] (rank_0_1@Fafard) Replaying rank 1 of job 0 (smpi_app 'job0') +> [ 0.000000] (workload@Bourassa) Launching the job executor of job 1 (app 'job1') +> [ 0.000000] (job_0001@Bourassa) Executing job 1 (smpi_app 'job1') +> [ 0.000000] (rank_1_0@Bourassa) Replaying rank 0 of job 1 (smpi_app 'job1') +> [ 0.000000] (rank_1_1@Fafard) Replaying rank 1 of job 1 (smpi_app 'job1') +> [1181.064896] (rank_0_0@Bourassa) Simulation time 1181.064896 +> [1181.064896] (rank_1_0@Bourassa) Finished replaying rank 0 of job 1 (smpi_app 'job1') +> [1181.064896] (rank_0_0@Bourassa) Finished replaying rank 0 of job 0 (smpi_app 'job0') +> [1181.064896] (rank_1_1@Fafard) Finished replaying rank 1 of job 1 (smpi_app 'job1') +> [1181.064896] (rank_0_1@Fafard) Finished replaying rank 1 of job 0 (smpi_app 'job0') +> [1182.064896] (job_0000@Bourassa) Finished job 0 (smpi_app 'job0') +> [1182.064896] (job_0001@Bourassa) Finished job 1 (smpi_app 'job1') +> [1182.064896] (maestro@) Simulation finished! Final time: 1182.06 + +$ rm -f ${srcdir:=.}/workload_coll2_same_time_and_resources -- 2.20.1