From d4fcd27038f98736bb4e88690a058ae343b99d2c Mon Sep 17 00:00:00 2001
From: Martin Quinson <martin.quinson@loria.fr>
Date: Thu, 1 Oct 2015 04:07:07 +0200
Subject: [PATCH] [tesh] centralize and rework the documentation

---
 tools/cmake/Documentation.cmake |   3 +-
 tools/tesh/CMakeLists.txt       |   2 -
 tools/tesh/README               | 149 ----------------------
 tools/tesh/set-timeout.tesh     |   3 +
 tools/tesh/tesh.1               | 148 ----------------------
 tools/tesh/tesh.pl              | 212 +++++++++++++++++++++++++++++++-
 6 files changed, 214 insertions(+), 303 deletions(-)
 delete mode 100644 tools/tesh/README
 delete mode 100644 tools/tesh/tesh.1
diff --git a/tools/cmake/Documentation.cmake b/tools/cmake/Documentation.cmake
index 5c432429cb..038edc33bb 100644
--- a/tools/cmake/Documentation.cmake
+++ b/tools/cmake/Documentation.cmake
@@ -141,11 +141,12 @@ endif()
 add_custom_target(manpages ALL
   COMMAND ${CMAKE_COMMAND} -E make_directory ${MANPAGE_DIR}
   COMMAND pod2man ${CMAKE_HOME_DIRECTORY}/tools/simgrid_update_xml.pl > ${MANPAGE_DIR}/simgrid_update_xml.1
+  COMMAND pod2man ${CMAKE_HOME_DIRECTORY}/tools/tesh/tesh.pl > ${MANPAGE_DIR}/tesh.1
   COMMENT "Generating manpages"
   )
 install(FILES
   ${MANPAGE_DIR}/simgrid_update_xml.1
-  ${CMAKE_HOME_DIRECTORY}/tools/tesh/tesh.1
+  ${MANPAGE_DIR}/tesh.1
   ${CMAKE_HOME_DIRECTORY}/doc/manpage/smpicc.1
   ${CMAKE_HOME_DIRECTORY}/doc/manpage/smpicxx.1
   ${CMAKE_HOME_DIRECTORY}/doc/manpage/smpif90.1
diff --git a/tools/tesh/CMakeLists.txt b/tools/tesh/CMakeLists.txt
index 5d6d6fffd4..b0a018cde8 100644
--- a/tools/tesh/CMakeLists.txt
+++ b/tools/tesh/CMakeLists.txt
@@ -42,7 +42,5 @@ set(bin_files
   )
 set(txt_files
   ${txt_files}
-  ${CMAKE_CURRENT_SOURCE_DIR}/README
-  ${CMAKE_CURRENT_SOURCE_DIR}/tesh.1
   PARENT_SCOPE
   )
diff --git a/tools/tesh/README b/tools/tesh/README
deleted file mode 100644
index 5298b49c7c..0000000000
--- a/tools/tesh/README
+++ /dev/null
@@ -1,149 +0,0 @@
-This is the TESH tool. It constitutes a testing shell, ie a sort of shell
-specialized to run tests. The list of actions to take is parsed from files
-files called testsuite.
-
-Testsuites syntax
------------------
-Here is the syntax of these files:
-
-The kind of each line is given by the first char (the second char should be
-blank and is ignored):
-
- `$' command to run in foreground
- `&' command to run in background
- `<' input to pass to the command
- `>' output expected from the command
- `!' metacommand, which can be one of:
-     `timeout' <integer>|no
-     `expect signal' <signal name>
-     `expect return' <integer>
-     `output' <ignore|display>
-     `setenv <key>=<val>'
- `p' a string to print
- `P' a string to print at the CRITICAL level (ease logging grepping)
-
-If the expected output do not match what the command spits, TESH will produce
-an error showing the diff (see OUTPUT below).
-
-Command line arguments
-----------------------
-Tesh accepts several command line arguments:
-  --cd some/directory : ask tesh to switch the working directory before
-                        launching the tests
-  --setenv var=value  : set a specific environment variable
-  --cfg arg           : add parameter --cfg=arg to each command line
-  --enable-coverage   : ignore output lines starting with "profiling:"
-
-IO orders
----------
-
-The < and > lines add IO to the command defined in the current block (blocks
-are separated by blank lines). It is possible to place these lines either after
-the command or before. The difference between the two following chunks is
-mainly cosmetic in your testsuites, TESH don't care. (cf IO-orders.tesh)
-
- $ cat
- < TOTO
- > TOTO
-
- > TOTO
- $ cat
- < TOTO
-
-Nevertheless, it is possible to have several commands in the same block, but
-none of them can have any output. It may seem a bit restrictive, as one could
-say that a command gets all the IO until the next command, but I'm afraid of
-errors such as the following:
-
- $ cd toto
- > TOTO
- $ mkfile file
-
-TOTO will be passed to the cd command, where the user clearly want to pass it
-to the mkfile built-in command (see below).
-
-Stream redirection
-------------------
-Stream redirections (">", "<" and "|" constructs in sh) are not
-implemented yet in tesh. This is a bit restrictive, but well, patch
-welcome...
-
-The situation in which it is mainly problematic is to create a
-temporary file. The solution is to use the "mkfile" built-in command,
-as in the following example:
-$ mkfile myFile
-> some content
-> to the file
-
-This will create a file called myFile (first argument of the mkfile
-command). Its content will be all the input provided to the command.
-
-RETURN CODE
------------
-
-TESH spits an appropriate error message when the child do not return 0 as
-return code (cf. catch-return.tesh), and returns code+40 itself.
-
-It is also possible to specify that a given command must return another
-value. For this, use the "expect return" metacommand, which takes an integer as
-argument. The change only apply to the next command (cf. set-return.tesh).
-
-SIGNALS
--------
-
-TESH detects when the child is killed by a signal (like on segfaults), and
-spits an appropriate error message (cf. catch-signal.tesh).
-
-It is also possible to specify that a given command must raise a given
-signal. For this, use the "expect signal" metacommand. It takes the signal name
-as argument. The change only apply to the next command (cf. set-signal.tesh).
-
-TIMEOUTS
---------
-
-By default, all commands are given 5 seconds to execute
-(cf. catch-timeout.tesh). You can change this with the "timeout", which
-takes an integer as argument. The change only apply to the next command
-(cf. set-timeout.tesh). If you pass "no" as argument, the command
-cannot timeout.
-
-OUTPUT
-------
-
-By default, the commands output is matched against the one expected,
-and an error is raised on discrepancy. Metacommands to change this:
- "output ignore"  -> output completely discarded
- "output display" -> output displayed (but not verified)
- "output sort"    -> sorts the display before verifying it (see below)
-
-SORTING OUTPUT
---------------
-Sorting the output seems to be a strange idea, but it is mandatory in
-SimGrid since the processes run out of order at any scheduling point
-(ie, every processes ready to run at simulated time t run in
-parallel). To ensure that the simulator outputs still match, we have
-to sort the output back before comparing it.
-
-We expect the simulators to run with that log formatting argument:
-   -log=root.fmt:[%10.6r]%e(%i:%P@%h)%e%m%n
-Then, tesh sorts string on the 19 first chars only, and is stable when
-line beginnings are equal. This should ensure that:
- (1) tesh is effective (no false positive, no false negative)
- (2) scheduling points are separated from each other
- (3) at each scheduling point, processes are separated from each other
- (4) the order of what a given process says at a given scheduling
-     point is preserved.
-
-This is of course very SimGrid oriented, breaking the generality of
-tesh, but who cares, actually?
-
-If you want to change the length of the prefix used for the sort,
-simply specify it after the output sort directive, like this:
-
-! output sort 22
-
-ENVIRONMENT
------------
-You can add some content to the tested processes environment with the
-setenv metacommand. It works as expected. For example:
-  "setenv PATH=/bin"
\ No newline at end of file
diff --git a/tools/tesh/set-timeout.tesh b/tools/tesh/set-timeout.tesh
index 7f94ed9e0d..1ba153f70a 100644
--- a/tools/tesh/set-timeout.tesh
+++ b/tools/tesh/set-timeout.tesh
@@ -5,3 +5,6 @@
 
 ! timeout 10
 $ sleep 6
+
+! timeout no
+$ sleep 1
\ No newline at end of file
diff --git a/tools/tesh/tesh.1 b/tools/tesh/tesh.1
deleted file mode 100644
index 3c55b70808..0000000000
--- a/tools/tesh/tesh.1
+++ /dev/null
@@ -1,148 +0,0 @@
-.\" Manpage for tesh, the TEsting SHell.
-.\" 
-.\" To read it locally (when not installed): 
-.\"   man ./tesh.1
-.\"
-.TH tesh 1 "10 Oct 2012" "1.0" "tesh man page"
-.SH NAME
-tesh \- testing shell
-.SH SYNOPSIS
-.B tesh
-[\fIOPTION\fR]... [\fIFILE\fR]...
-.SH DESCRIPTION
-This is the TESH tool. It constitutes a testing shell, ie a sort of shell specialized to run tests. The list of actions to take is parsed from files files called testsuite.
-.SH OPTIONS
-  --cd some/directory : ask tesh to switch the working directory before
-                        launching the tests
-  --setenv var=value  : set a specific environment variable
-  --cfg arg           : add parameter --cfg=arg to each command line
-  --enable-coverage   : ignore output lines starting with "profiling:"
-.SH TESH FILE SYNTAX
-Here is the syntax of these files:
-
-The kind of each line is given by the first char (the second char should be
-blank and is ignored):
-
- `$' command to run in foreground
- `&' command to run in background
- `<' input to pass to the command
- `>' output expected from the command
- `!' metacommand, which can be one of:
-     `timeout' <integer>|no
-     `expect signal' <signal name>
-     `expect return' <integer>
-     `output' <ignore|display>
-     `setenv <key>=<val>'
- `p' a string to print
- `P' a string to print at the CRITICAL level (ease logging grepping)
-
-If the expected output do not match what the command spits, TESH will produce
-an error showing the diff (see OUTPUT below).
-.SH IO ORDERS
-The < and > lines add IO to the command defined in the current block (blocks
-are separated by blank lines). It is possible to place these lines either after
-the command or before. The difference between the two following chunks is
-mainly cosmetic in your testsuites, TESH don't care. (cf IO-orders.tesh)
-
- $ cat
- < TOTO
- > TOTO
-
- > TOTO
- $ cat
- < TOTO
-
-Nevertheless, it is possible to have several commands in the same block, but
-none of them can have any output. It may seem a bit restrictive, as one could
-say that a command gets all the IO until the next command, but I'm afraid of
-errors such as the following:
-
- $ cd toto
- > TOTO
- $ mkfile file
-
-TOTO will be passed to the cd command, where the user clearly want to pass it
-to the mkfile built-in command (see below).
-.SH STREAM REDIRECTION
-Stream redirections (">", "<" and "|" constructs in sh) are not
-implemented yet in tesh. This is a bit restrictive, but well, patch
-welcome...
-
-The situation in which it is mainly problematic is to create a
-temporary file. The solution is to use the "mkfile" built-in command,
-as in the following example:
-$ mkfile myFile
-> some content
-> to the file
-
-This will create a file called myFile (first argument of the mkfile
-command). Its content will be all the input provided to the command.
-.SH RETURN CODE
-TESH spits an appropriate error message when the child do not return 0 as
-return code (cf. catch-return.tesh), and returns code+40 itself.
-
-It is also possible to specify that a given command must return another
-value. For this, use the "expect return" metacommand, which takes an integer as
-argument. The change only apply to the next command (cf. set-return.tesh).
-.SH SIGNALS
-TESH detects when the child is killed by a signal (like on segfaults), and
-spits an appropriate error message (cf. catch-signal.tesh).
-
-It is also possible to specify that a given command must raise a given
-signal. For this, use the "expect signal" metacommand. It takes the signal name
-as argument. The change only apply to the next command (cf. set-signal.tesh).
-.SH TIMEOUTS
-By default, all commands are given 5 seconds to execute
-(cf. catch-timeout.tesh). You can change this with the "timeout", which
-takes an integer as argument. The change only apply to the next command
-(cf. set-timeout.tesh). If you pass "no" as argument, the command
-cannot timeout.
-.SH OUTPUT
-By default, the commands output is matched against the one expected,
-and an error is raised on discrepancy. Metacommands to change this:
- "output ignore"  -> output completely discarded
- "output display" -> output displayed (but not verified)
- "output sort"    -> sorts the display before verifying it (see below)
-.SH SORTING OUTPUT
-
-SimGrid is designed to produce perfectly reproducible results, so its
-output can usualy be compared without any preprocessing. This is not
-true anymore when the user activates parallel execution: User
-processes are run in parallel at each timestamp, and the output is not
-reproducible anymore. Until you sort the lines.
-
-If you ask for 
-.B ! output sort
-then tesh will sort the whole lines. But it really complicates the
-analysis of the error detected: the logical order of the output is
-defeated by the lexicographical sort. 
-
-The solution is to ask for
-.B ! output sort 19
-instead to sort only on the prefix of the line. Indeed, we run our simulation
-tests with the flag: 
-  --log=root.fmt:[%10.6r]%e(%i:%P@%h)%e%m%n
-
-Then, the previous command sorts  lines on the first 19 chars, that is
-exactly the length of the prefix indicating the timestamp and the
-process. That's exactly what we need:
- - Every timestamps remain separated, as it should; 
- - In each timestamp, the output order of processes become
-   reproducible: that's the lexicographical order of their name;
- - For each process, the order of its execution is preserved: its
-   messages within a given timestamp are not reordered.
-
-That way, tesh can do its job (no false positive, no false negative)
-despite the unpredictable order of executions of processes within a
-timestamp, and reported errors remain easy to analyze (execution of a
-given process preserved).
-
-This is of course very SimGrid oriented, but could even be usable by
-others, who knows?
-
-.SH ENVIRONMENT
-You can add some content to the tested processes environment with the
-setenv metacommand. It works as expected. For example:
-  "setenv PATH=/bin"
-.SH BUGS
-No known bugs. 
diff --git a/tools/tesh/tesh.pl b/tools/tesh/tesh.pl
index 287a90fd28..f25be34489 100755
--- a/tools/tesh/tesh.pl
+++ b/tools/tesh/tesh.pl
@@ -19,7 +19,212 @@ tesh -- testing shell
 
 =head1 SYNOPSIS
 
-B<tesh> [I<options>] I<tesh_file>
+B<tesh> [I<options>]... I<testsuite>
+
+=head1 DESCRIPTION
+
+Tesh is the testing shell, a specialized shell for running tests. It
+provides the specified input to the tested commands, and check that
+they produce the expected output and return the expected value.
+
+=head1 OPTIONS
+
+  --cd some/directory : ask tesh to switch the working directory before
+                        launching the tests
+  --setenv var=value  : set a specific environment variable
+  --cfg arg           : add parameter --cfg=arg to each command line
+  --enable-coverage   : ignore output lines starting with "profiling:"
+
+=head1 TEST SUITE FILE SYTAX
+
+A test suite is composed of one or several I<command blocks> separated
+by empty lines, each of them being composed of a command to run, its
+input text and the expected output.
+
+The first char of each line specifies the type of line according to
+the following list. The second char of each line is ignored.
+
+ `$' command to run in foreground
+ `&' command to run in background
+
+ `<' input to pass to the command
+ `>' output expected from the command
+
+ `!' metacommand, which can be one of:
+     `timeout' <integer>|no
+     `expect signal' <signal name>
+     `expect return' <integer>
+     `output' <ignore|display>
+     `setenv <key>=<val>'
+
+ `p' an informative message to print
+
+If the expected output do not match the produced output, or if the
+command did not end as expected, Tesh provides an error message (see
+the OUTPUT section below) and stops.
+
+=head2 Command blocks examples
+
+In a given command block, you can declare the command, its input and
+its expected output in the order that you see fit.
+
+    $ cat
+    < TOTO
+    > TOTO
+
+    > TOTO
+    $ cat
+    < TOTO
+
+    > TOTO
+    < TOTO
+    $ cat
+
+You can group several commands together, provided that they don't have
+any input nor output.
+
+    $ mkdir testdir
+    $ cd testdir
+
+=head2 Enforcing the command return code
+
+By default, Tesh enforces that the tested command returns 0. If not,
+it fails with an appropriate message and returns I<code+40> itself.
+
+You specify that a given command block is expected to return another
+code as follows:
+
+    # This command MUST return 42
+    ! expect return 42
+    $ sh -e "exit 42"
+
+The I<expect return> construct applies only to the next command block.
+
+=head2 Commands that are expected to raise signals
+
+By default, Tesh detects when the command is killed by a signal (such
+as SEGV on segfaults). This is usually unexpected and unfortunate. But
+if not, you can specify that a given command block is expected to fail
+with a signal as follows:
+
+    # This command MUST raise a segfault
+    ! expect signal SIGSEGV
+    $ ./some_failing_code
+
+The I<expect signal> construct applies only to the next command block.
+
+=head2 Timeouts
+
+By default, no command is allowed to run more than 5 seconds. You can
+change this value as follows:
+
+    # Allow some more time to the command
+    ! timeout 60
+    $ ./some_longer_command
+
+You can also disable the timeout completely by passing "no" as a value:
+
+    # This command will never timeout
+    ! timeout no
+    $ ./some_very_long_but_safe_command
+
+=head2 Setting environment variables
+
+You can modify the environment of the tested commands as follows:
+
+    ! setenv PATH=/bin
+    $ my_command
+
+=head2 Not enforcing the expected output 
+
+By default, the commands output is matched against the one expected,
+and an error is raised on discrepancy. Metacommands to change this:
+
+=over 4
+
+=item output ignore
+
+The output is completely discarded.
+
+=item output display
+
+The output is displayed, but no error is issued if it differs from the
+expected output.
+
+=item output sort
+
+The output is sorted before comparison (see next section).
+
+=back
+
+=head2 Sorting output
+
+If the order of the command output changes between runs, you want to
+sort it before enforcing that it is exactly what you expect. In
+SimGrid for example, this happens when parallel execution is
+activated: User processes are run in parallel at each timestamp, and
+the output is not reproducible anymore. Until you sort the lines.
+
+You can sort the command output as follows:
+
+    ! output sort
+    $ ./some_multithreaded_command
+
+Sorting lines this ways often makes the tesh output very intricate,
+complicating the error analysis: the process logical order is defeated
+by the lexicographical sort.
+
+The solution is to prefix each line of your output with temporal
+information so that lines can be grouped by timestamps. The
+lexicographical sort then only applies to lines that occured at the
+same timestamp. Here is a SimGrid example:
+
+    # Sort only lines depending on the first 19 chars
+    ! output sort 19
+    $ ./some_simgrid_simulator --log=root.fmt:[%10.6r]%e(%i:%P@%h)%e%m%n
+
+This approach may seem surprizing at the first glance but it does its job:
+
+=over 4
+
+=item Every timestamps remain separated, as it should; 
+
+=item In each timestamp, the output order of processes become
+   reproducible: that's the lexicographical order of their name;
+
+=item For each process, the order of its execution is preserved: its
+   messages within a given timestamp are not reordered.
+
+=back
+
+That way, tesh can do its job (no false positive, no false negative)
+despite the unpredictable order of executions of processes within a
+timestamp, and reported errors remain easy to analyze (execution of a
+given process preserved).
+
+This example is very SimGrid oriented, but the feature could even be
+usable by others, who knows?
+
+
+=head1 BUILTIN COMMANDS
+
+=head2 mkfile: creating a file
+
+This command creates a file of the name provided as argument, and adds
+the content it gets as input.
+
+  $ mkfile myFile
+  > some content
+  > to the file
+
+It is not possible to use the cat command, as one would expect,
+because stream redirections are currently not implemented in Tesh.
+
+=head1 BUGS AND LIMITATIONS
+
+The main limitation is the lack of stream redirections in the commands
+(">", "<" and "|" shell constructs and friends). The B<mkfile> builtin
+command makes this situation bearable.
 
 =cut
 
@@ -80,7 +285,7 @@ BEGIN {
 ####
 
 my %opts = ( "debug" => 0,
-             "timeout" => 120, # No command should run any longer than 2 minutes by default
+             "timeout" => 5, # No command should run any longer than 5 seconds by default
            );
 
 Getopt::Long::config( 'bundling', 'no_getopt_compat', 'no_auto_abbrev' );
@@ -184,7 +389,8 @@ sub exec_cmd {
     my $input = defined($cmd{'in'})? join("\n",@{$cmd{'in'}}) : "";
     my $output = " " x 10240; $output = ""; # Preallocate 10kB, and reset length to 0
     $cmd{'got'} = \$output;
-    $cmd{'job'} = start \@cmdline, '<', \$input, '>&', \$output, timeout($cmd{'timeout'});
+    $cmd{'job'} = start \@cmdline, '<', \$input, '>&', \$output, 
+                  ($cmd{'timeout'} eq 'no' ? () : timeout($cmd{'timeout'}));
 
     if ( $cmd{'background'} ) {
 	# Just enqueue the job. It will be dealed with at the end
-- 
2.20.1