From f00adec93f7c6103d515abf334d4ba63e6e4f8a0 Mon Sep 17 00:00:00 2001 From: Augustin Degomme Date: Sun, 8 Aug 2021 00:29:14 +0200 Subject: [PATCH] Add a new ti-tracing/replay test (in teshsuite) to improve coverage. It tests all collectives, generates a trace, then replays it while tracing again, and checks that both traces are identical. --- teshsuite/smpi/CMakeLists.txt | 6 +- teshsuite/smpi/replay/replay.c | 99 +++++++++++++++++++++++++++++++ teshsuite/smpi/replay/replay.tesh | 15 +++++ 3 files changed, 118 insertions(+), 2 deletions(-) create mode 100644 teshsuite/smpi/replay/replay.c create mode 100644 teshsuite/smpi/replay/replay.tesh diff --git a/teshsuite/smpi/CMakeLists.txt b/teshsuite/smpi/CMakeLists.txt index 7a517910f7..f577b74242 100644 --- a/teshsuite/smpi/CMakeLists.txt +++ b/teshsuite/smpi/CMakeLists.txt @@ -9,7 +9,7 @@ if(enable_smpi) foreach(x coll-allgather coll-allgatherv coll-allreduce coll-allreduce-with-leaks coll-alltoall coll-alltoallv coll-barrier coll-bcast coll-gather coll-reduce coll-reduce-scatter coll-scatter macro-sample pt2pt-dsend pt2pt-pingpong type-hvector type-indexed type-struct type-vector bug-17132 gh-139 timers privatization - io-simple io-simple-at io-all io-all-at io-shared io-ordered topo-cart-sub) + io-simple io-simple-at io-all io-all-at io-shared io-ordered topo-cart-sub replay) add_executable (${x} EXCLUDE_FROM_ALL ${x}/${x}.c) target_link_libraries(${x} simgrid) set_target_properties(${x} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/${x}) @@ -39,7 +39,7 @@ foreach(x coll-allgather coll-allgatherv coll-allreduce coll-allreduce-with-leak coll-gather coll-reduce coll-reduce-scatter coll-scatter macro-sample pt2pt-dsend pt2pt-pingpong type-hvector type-indexed type-struct type-vector bug-17132 gh-139 timers privatization macro-shared auto-shared macro-partial-shared macro-partial-shared-communication - io-simple io-simple-at io-all io-all-at io-shared io-ordered topo-cart-sub) + io-simple io-simple-at io-all io-all-at io-shared io-ordered topo-cart-sub replay) set(tesh_files ${tesh_files} ${CMAKE_CURRENT_SOURCE_DIR}/${x}/${x}.tesh) set(teshsuite_src ${teshsuite_src} ${CMAKE_CURRENT_SOURCE_DIR}/${x}/${x}.c) endforeach() @@ -159,6 +159,8 @@ if(enable_smpi) # Extra pt2pt pingpong test: broken usage ti-tracing ADD_TESH_FACTORIES(tesh-smpi-broken "thread" --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/pt2pt-pingpong --cd ${CMAKE_BINARY_DIR}/teshsuite/smpi/pt2pt-pingpong ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/pt2pt-pingpong/broken_hostfiles.tesh) ADD_TESH(tesh-smpi-replay-ti-tracing --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --setenv srcdir=${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/pt2pt-pingpong --cd ${CMAKE_BINARY_DIR}/teshsuite/smpi/pt2pt-pingpong ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/pt2pt-pingpong/TI_output.tesh) + ADD_TESH(tesh-smpi-replay-ti-tracing-coll --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --setenv srcdir=${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/replay --cd ${CMAKE_BINARY_DIR}/teshsuite/smpi/replay ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/replay/replay.tesh) + ADD_TESH_FACTORIES(tesh-smpi-gh-139 "thread" --setenv platfdir=${CMAKE_HOME_DIRECTORY}/examples/platforms --setenv bindir=${CMAKE_BINARY_DIR}/teshsuite/smpi/gh-139 --cd ${CMAKE_BINARY_DIR}/teshsuite/smpi/gh-139 ${CMAKE_HOME_DIRECTORY}/teshsuite/smpi/gh-139/gh-139.tesh) # Simple privatization tests diff --git a/teshsuite/smpi/replay/replay.c b/teshsuite/smpi/replay/replay.c new file mode 100644 index 0000000000..aac46105e0 --- /dev/null +++ b/teshsuite/smpi/replay/replay.c @@ -0,0 +1,99 @@ +#include +#include +#include "mpi.h" + + +#define BUFSIZE 1024*1024 + +int +main (int argc, char **argv){ + int i, nprocs = -1; + int rank = -1; + int *sendbuf, *recvbuf, *displs, *counts, *rcounts, *alltoallvcounts; + + /* init */ + MPI_Init (&argc, &argv); + MPI_Comm_size (MPI_COMM_WORLD, &nprocs); + MPI_Comm_rank (MPI_COMM_WORLD, &rank); + + sendbuf = (int *) malloc (BUFSIZE * nprocs * sizeof(int)); + for (i = 0; i < BUFSIZE * nprocs; i++) + sendbuf[i] = rank; + + alltoallvcounts = (int *) malloc (nprocs * sizeof(int)); + for (i = 0; i < nprocs; i++) + if ((i + rank) < BUFSIZE) + alltoallvcounts[i] = i + rank; + else + alltoallvcounts[i] = BUFSIZE; + + if (rank == 0) { + recvbuf = (int *) malloc (BUFSIZE * nprocs * sizeof(int)); + for (i = 0; i < BUFSIZE * nprocs; i++) + recvbuf[i] = i; + + displs = (int *) malloc (nprocs * sizeof(int)); + counts = (int *) malloc (nprocs * sizeof(int)); + rcounts = (int *) malloc (nprocs * sizeof(int)); + for (i = 0; i < nprocs; i++) { + displs[i] = i * BUFSIZE; + if (i < BUFSIZE) + rcounts[i] = counts[i] = i; + else + rcounts[i] = counts[i] = BUFSIZE; + } + } + + //first test, with unallocated non significative buffers + MPI_Barrier (MPI_COMM_WORLD); + MPI_Bcast (sendbuf, BUFSIZE, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Gather (&sendbuf[rank*BUFSIZE], BUFSIZE, MPI_INT, recvbuf, BUFSIZE, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Scatter (recvbuf, BUFSIZE, MPI_INT, sendbuf, BUFSIZE, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Gatherv (&sendbuf[rank*BUFSIZE], (rank < BUFSIZE) ? rank : BUFSIZE, MPI_INT, recvbuf, rcounts, displs, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Scatterv (recvbuf, counts, displs, MPI_INT, sendbuf, (rank < BUFSIZE) ? rank : BUFSIZE, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Reduce (sendbuf, recvbuf, BUFSIZE, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); + + if (rank != 0) { + recvbuf = (int *) malloc (BUFSIZE * nprocs * sizeof(int)); + for (i = 0; i < BUFSIZE * nprocs; i++) + recvbuf[i] = i; + + displs = (int *) malloc (nprocs * sizeof(int)); + counts = (int *) malloc (nprocs * sizeof(int)); + rcounts = (int *) malloc (nprocs * sizeof(int)); + for (i = 0; i < nprocs; i++) { + displs[i] = i * BUFSIZE; + if (i < BUFSIZE) + rcounts[i] = counts[i] = i; + else + rcounts[i] = counts[i] = BUFSIZE; + } + } + + MPI_Barrier (MPI_COMM_WORLD); + MPI_Bcast (sendbuf, BUFSIZE, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Gather (&sendbuf[rank*BUFSIZE], BUFSIZE, MPI_INT, recvbuf, BUFSIZE, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Scatter (recvbuf, BUFSIZE, MPI_INT, sendbuf, BUFSIZE, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Gatherv (&sendbuf[rank*BUFSIZE], (rank < BUFSIZE) ? rank : BUFSIZE, MPI_INT, recvbuf, rcounts, displs, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Scatterv (recvbuf, counts, displs, MPI_INT, sendbuf, (rank < BUFSIZE) ? rank : BUFSIZE, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Reduce (sendbuf, recvbuf, BUFSIZE, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); + MPI_Allgather (sendbuf, BUFSIZE, MPI_INT, recvbuf, BUFSIZE, MPI_INT, MPI_COMM_WORLD); + MPI_Alltoall (recvbuf, BUFSIZE, MPI_INT, sendbuf, BUFSIZE, MPI_INT, MPI_COMM_WORLD); + MPI_Allgatherv (sendbuf, (rank < BUFSIZE) ? rank : BUFSIZE, MPI_INT, recvbuf, rcounts, displs, MPI_INT, MPI_COMM_WORLD); + MPI_Alltoallv (recvbuf, alltoallvcounts, displs, MPI_INT, sendbuf, alltoallvcounts, displs, MPI_INT, MPI_COMM_WORLD); + MPI_Allreduce (sendbuf, recvbuf, BUFSIZE, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + MPI_Reduce_scatter (sendbuf, recvbuf, rcounts, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + MPI_Scan (sendbuf, recvbuf, BUFSIZE, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + MPI_Exscan (sendbuf, recvbuf, BUFSIZE, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + MPI_Barrier (MPI_COMM_WORLD); + + free (alltoallvcounts); + free (sendbuf); + free (recvbuf); + free (displs); + free (counts); + free (rcounts); + + MPI_Finalize (); + return 0; +} diff --git a/teshsuite/smpi/replay/replay.tesh b/teshsuite/smpi/replay/replay.tesh new file mode 100644 index 0000000000..c10da29837 --- /dev/null +++ b/teshsuite/smpi/replay/replay.tesh @@ -0,0 +1,15 @@ +# Replay test with all collectives, and two runs to +! output sort + +p First test +$ ${bindir:=.}/../../../smpi_script/bin/smpirun -hostfile ${bindir:=.}/../hostfile_coll -platform ${platfdir}/small_platform.xml -np 4 --log=xbt_cfg.thres:critical --log=smpi_config.thres:warning --log=smpi_coll.thres:error --log=smpi_mpi.thres:error --log=smpi_pmpi.thres:error --cfg=smpi/simulate-computation:false -trace-ti --cfg=tracing/filename:trace1 ${bindir:=.}/replay + +p Replay +$ ${bindir:=.}/../../../smpi_script/bin/smpirun -hostfile ${bindir:=.}/../hostfile_coll -platform ${platfdir}/small_platform.xml -np 4 --log=xbt_cfg.thres:critical --log=smpi_config.thres:warning --log=smpi_coll.thres:error --log=smpi_mpi.thres:error --log=smpi_pmpi.thres:error --cfg=smpi/simulate-computation:false -trace-ti --cfg=tracing/filename:trace2 -replay ${bindir:=.}/trace1 +> [Tremblay:1:(2) 0.173855] [smpi_replay/INFO] Simulation time 0.173855 + +p Compare traces +$ sh -c "diff trace1_files/*1.txt trace2_files/*1.txt" + +$ sh -c "rm -rf trace1* trace2*" + -- 2.20.1