Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
[examples] smpi-replay-mm: trace w/ collectives
authorMillian Poquet <millian.poquet@inria.fr>
Tue, 27 Nov 2018 17:45:55 +0000 (18:45 +0100)
committerMillian Poquet <millian.poquet@inria.fr>
Tue, 27 Nov 2018 17:45:55 +0000 (18:45 +0100)
examples/smpi/replay_multiple_manual_deploy/CMakeLists.txt
examples/smpi/replay_multiple_manual_deploy/coll.txt [new file with mode: 0644]
examples/smpi/replay_multiple_manual_deploy/coll/actions0.txt [new file with mode: 0644]
examples/smpi/replay_multiple_manual_deploy/coll/actions1.txt [new file with mode: 0644]
examples/smpi/replay_multiple_manual_deploy/replay_multiple_manual_coll1.tesh [new file with mode: 0644]
examples/smpi/replay_multiple_manual_deploy/replay_multiple_manual_coll2_st_sr_noise.tesh [new file with mode: 0644]

index 61c42a1..4730137 100644 (file)
@@ -6,6 +6,8 @@ if(enable_smpi)
 
   # Define a list of a tesh files
   list(APPEND tesh_filename_list replay_multiple_manual_nojob)
 
   # Define a list of a tesh files
   list(APPEND tesh_filename_list replay_multiple_manual_nojob)
+  list(APPEND tesh_filename_list replay_multiple_manual_coll1)
+  list(APPEND tesh_filename_list replay_multiple_manual_coll2_st_sr_noise)
   list(APPEND tesh_filename_list replay_multiple_manual_empty1)
   list(APPEND tesh_filename_list replay_multiple_manual_empty2)
   list(APPEND tesh_filename_list replay_multiple_manual_mixed1)
   list(APPEND tesh_filename_list replay_multiple_manual_empty1)
   list(APPEND tesh_filename_list replay_multiple_manual_empty2)
   list(APPEND tesh_filename_list replay_multiple_manual_mixed1)
@@ -30,6 +32,9 @@ endif()
 set(txt_files     ${txt_files}     ${CMAKE_CURRENT_SOURCE_DIR}/compute_only.txt
                                    ${CMAKE_CURRENT_SOURCE_DIR}/compute_only/actions0.txt
                                    ${CMAKE_CURRENT_SOURCE_DIR}/compute_only/actions1.txt
 set(txt_files     ${txt_files}     ${CMAKE_CURRENT_SOURCE_DIR}/compute_only.txt
                                    ${CMAKE_CURRENT_SOURCE_DIR}/compute_only/actions0.txt
                                    ${CMAKE_CURRENT_SOURCE_DIR}/compute_only/actions1.txt
+                                   ${CMAKE_CURRENT_SOURCE_DIR}/coll.txt
+                                   ${CMAKE_CURRENT_SOURCE_DIR}/coll/actions0.txt
+                                   ${CMAKE_CURRENT_SOURCE_DIR}/coll/actions1.txt
                                    ${CMAKE_CURRENT_SOURCE_DIR}/empty.txt
                                    ${CMAKE_CURRENT_SOURCE_DIR}/empty/actions0.txt
                                    ${CMAKE_CURRENT_SOURCE_DIR}/empty/actions1.txt
                                    ${CMAKE_CURRENT_SOURCE_DIR}/empty.txt
                                    ${CMAKE_CURRENT_SOURCE_DIR}/empty/actions0.txt
                                    ${CMAKE_CURRENT_SOURCE_DIR}/empty/actions1.txt
@@ -43,6 +48,8 @@ set(txt_files     ${txt_files}     ${CMAKE_CURRENT_SOURCE_DIR}/compute_only.txt
                                    ${CMAKE_CURRENT_SOURCE_DIR}/workload_mixed2_same_time
                                    ${CMAKE_CURRENT_SOURCE_DIR}/workload_mixed2_same_time_and_resources PARENT_SCOPE)
 set(tesh_files  ${tesh_files}   ${CMAKE_CURRENT_SOURCE_DIR}/replay_multiple_manual_nojob.tesh
                                    ${CMAKE_CURRENT_SOURCE_DIR}/workload_mixed2_same_time
                                    ${CMAKE_CURRENT_SOURCE_DIR}/workload_mixed2_same_time_and_resources PARENT_SCOPE)
 set(tesh_files  ${tesh_files}   ${CMAKE_CURRENT_SOURCE_DIR}/replay_multiple_manual_nojob.tesh
+                                ${CMAKE_CURRENT_SOURCE_DIR}/replay_multiple_manual_coll1.tesh
+                                ${CMAKE_CURRENT_SOURCE_DIR}/replay_multiple_manual_coll2_st_sr_noise.tesh
                                 ${CMAKE_CURRENT_SOURCE_DIR}/replay_multiple_manual_empty1.tesh
                                 ${CMAKE_CURRENT_SOURCE_DIR}/replay_multiple_manual_empty2.tesh
                                 ${CMAKE_CURRENT_SOURCE_DIR}/replay_multiple_manual_mixed1.tesh
                                 ${CMAKE_CURRENT_SOURCE_DIR}/replay_multiple_manual_empty1.tesh
                                 ${CMAKE_CURRENT_SOURCE_DIR}/replay_multiple_manual_empty2.tesh
                                 ${CMAKE_CURRENT_SOURCE_DIR}/replay_multiple_manual_mixed1.tesh
diff --git a/examples/smpi/replay_multiple_manual_deploy/coll.txt b/examples/smpi/replay_multiple_manual_deploy/coll.txt
new file mode 100644 (file)
index 0000000..5fb71bb
--- /dev/null
@@ -0,0 +1,2 @@
+coll/actions0.txt
+coll/actions1.txt
diff --git a/examples/smpi/replay_multiple_manual_deploy/coll/actions0.txt b/examples/smpi/replay_multiple_manual_deploy/coll/actions0.txt
new file mode 100644 (file)
index 0000000..53c4fd5
--- /dev/null
@@ -0,0 +1,7 @@
+0 init
+0 compute 1e9
+0 bcast 1 0 1
+0 alltoall 134217728 134217728 26 26
+0 barrier
+0 reduce 16 0 0 26
+0 finalize
diff --git a/examples/smpi/replay_multiple_manual_deploy/coll/actions1.txt b/examples/smpi/replay_multiple_manual_deploy/coll/actions1.txt
new file mode 100644 (file)
index 0000000..315e201
--- /dev/null
@@ -0,0 +1,6 @@
+1 init
+1 bcast 1 0 1
+1 alltoall 134217728 134217728 26 26
+1 barrier
+1 reduce 16 0 0 26
+1 finalize
diff --git a/examples/smpi/replay_multiple_manual_deploy/replay_multiple_manual_coll1.tesh b/examples/smpi/replay_multiple_manual_deploy/replay_multiple_manual_coll1.tesh
new file mode 100644 (file)
index 0000000..72d3cd4
--- /dev/null
@@ -0,0 +1,38 @@
+p Workload with one coll job
+
+< One app alone
+< alone coll.txt 2 0 0,1
+$ mkfile ${srcdir:=.}/workload_coll1
+
+! timeout 120
+! output sort 25
+$ ./replay_multiple_manual ${platfdir}/small_platform_with_routers.xml  ${srcdir:=.}/workload_coll1 --log=smpi.:info --cfg=smpi/host-speed:100 "--log=root.fmt:[%11.6r]%e(%P@%h)%e%m%n" 0 0
+> [   0.000000] (maestro@) Configuration change: Set 'smpi/host-speed' to '100'
+> [   0.000000] (maestro@) Job read: app='alone', file='coll.txt', size=2, start=0, alloc='0,1'
+> [   0.000000] (workload@Bourassa) Launching the job executor of job 0 (app 'alone')
+> [   0.000000] (job_0000@Bourassa) Executing job 0 (smpi_app 'alone')
+> [   0.000000] (rank_0_0@Bourassa) Replaying rank 0 of job 0 (smpi_app 'alone')
+> [   0.000000] (rank_0_1@Fafard) Replaying rank 1 of job 0 (smpi_app 'alone')
+> [ 590.549375] (rank_0_0@Bourassa) Simulation time 590.549375
+> [ 590.549375] (rank_0_0@Bourassa) Finished replaying rank 0 of job 0 (smpi_app 'alone')
+> [ 590.549375] (rank_0_1@Fafard) Finished replaying rank 1 of job 0 (smpi_app 'alone')
+> [ 591.549375] (job_0000@Bourassa) Finished job 0 (smpi_app 'alone')
+> [ 591.549375] (maestro@) Simulation finished! Final time: 591.549
+
+p Workload with one coll job (with noise)
+! timeout 120
+! output sort 25
+$ ./replay_multiple_manual ${platfdir}/small_platform_with_routers.xml  ${srcdir:=.}/workload_coll1 --log=smpi.:info --cfg=smpi/host-speed:100 "--log=root.fmt:[%11.6r]%e(%P@%h)%e%m%n" 7 13
+> [   0.000000] (maestro@) Configuration change: Set 'smpi/host-speed' to '100'
+> [   0.000000] (maestro@) Job read: app='alone', file='coll.txt', size=2, start=0, alloc='0,1'
+> [   0.000000] (workload@Bourassa) Launching the job executor of job 0 (app 'alone')
+> [   0.000000] (job_0000@Bourassa) Executing job 0 (smpi_app 'alone')
+> [   0.000000] (rank_0_0@Bourassa) Replaying rank 0 of job 0 (smpi_app 'alone')
+> [   0.000000] (rank_0_1@Fafard) Replaying rank 1 of job 0 (smpi_app 'alone')
+> [ 590.549375] (rank_0_0@Bourassa) Simulation time 590.549375
+> [ 590.549375] (rank_0_0@Bourassa) Finished replaying rank 0 of job 0 (smpi_app 'alone')
+> [ 590.549375] (rank_0_1@Fafard) Finished replaying rank 1 of job 0 (smpi_app 'alone')
+> [ 591.549375] (job_0000@Bourassa) Finished job 0 (smpi_app 'alone')
+> [ 591.549375] (maestro@) Simulation finished! Final time: 591.549
+
+$ rm -f ${srcdir:=.}/workload_coll1
diff --git a/examples/smpi/replay_multiple_manual_deploy/replay_multiple_manual_coll2_st_sr_noise.tesh b/examples/smpi/replay_multiple_manual_deploy/replay_multiple_manual_coll2_st_sr_noise.tesh
new file mode 100644 (file)
index 0000000..3b996ed
--- /dev/null
@@ -0,0 +1,31 @@
+p Workload with two coll jobs (at the same time and on the same resources)
+
+< Two jobs at the same time and on the same resources
+< job0 coll.txt 2 0 0,1
+< job1 coll.txt 2 0 0,1
+$ mkfile ${srcdir:=.}/workload_coll2_same_time_and_resources
+
+! timeout 120
+! output sort 25
+$ ./replay_multiple_manual ${srcdir:=.}/../../platforms/small_platform_with_routers.xml ${srcdir:=.}/workload_coll2_same_time_and_resources --log=smpi.:info --cfg=smpi/host-speed:100 "--log=root.fmt:[%11.6r]%e(%P@%h)%e%m%n" 7 13
+> [   0.000000] (maestro@) Configuration change: Set 'smpi/host-speed' to '100'
+> [   0.000000] (maestro@) Job read: app='job0', file='coll.txt', size=2, start=0, alloc='0,1'
+> [   0.000000] (maestro@) Job read: app='job1', file='coll.txt', size=2, start=0, alloc='0,1'
+> [   0.000000] (workload@Bourassa) Launching the job executor of job 0 (app 'job0')
+> [   0.000000] (job_0000@Bourassa) Executing job 0 (smpi_app 'job0')
+> [   0.000000] (rank_0_0@Bourassa) Replaying rank 0 of job 0 (smpi_app 'job0')
+> [   0.000000] (rank_0_1@Fafard) Replaying rank 1 of job 0 (smpi_app 'job0')
+> [   0.000000] (workload@Bourassa) Launching the job executor of job 1 (app 'job1')
+> [   0.000000] (job_0001@Bourassa) Executing job 1 (smpi_app 'job1')
+> [   0.000000] (rank_1_0@Bourassa) Replaying rank 0 of job 1 (smpi_app 'job1')
+> [   0.000000] (rank_1_1@Fafard) Replaying rank 1 of job 1 (smpi_app 'job1')
+> [1181.064896] (rank_0_0@Bourassa) Simulation time 1181.064896
+> [1181.064896] (rank_1_0@Bourassa) Finished replaying rank 0 of job 1 (smpi_app 'job1')
+> [1181.064896] (rank_0_0@Bourassa) Finished replaying rank 0 of job 0 (smpi_app 'job0')
+> [1181.064896] (rank_1_1@Fafard) Finished replaying rank 1 of job 1 (smpi_app 'job1')
+> [1181.064896] (rank_0_1@Fafard) Finished replaying rank 1 of job 0 (smpi_app 'job0')
+> [1182.064896] (job_0000@Bourassa) Finished job 0 (smpi_app 'job0')
+> [1182.064896] (job_0001@Bourassa) Finished job 1 (smpi_app 'job1')
+> [1182.064896] (maestro@) Simulation finished! Final time: 1182.06
+
+$ rm -f ${srcdir:=.}/workload_coll2_same_time_and_resources