From 926f866bd5d08f749df619f78ef2ed77a5cddea7 Mon Sep 17 00:00:00 2001 From: Martin Quinson Date: Tue, 26 Apr 2011 16:12:29 +0200 Subject: [PATCH] reimplement tesh.pl. Still not working in parallel (WIP) --- buildtools/Cmake/Diff.pm | 1713 ++++++++++++++++++++++++++++++++++++++ buildtools/Cmake/tesh.pl | 643 ++++++-------- 2 files changed, 1979 insertions(+), 377 deletions(-) create mode 100644 buildtools/Cmake/Diff.pm diff --git a/buildtools/Cmake/Diff.pm b/buildtools/Cmake/Diff.pm new file mode 100644 index 0000000000..5069c220dc --- /dev/null +++ b/buildtools/Cmake/Diff.pm @@ -0,0 +1,1713 @@ +package Diff; +# Skip to first "=head" line for documentation. +use strict; + +use integer; # see below in _replaceNextLargerWith() for mod to make + # if you don't use this +use vars qw( $VERSION @EXPORT_OK ); +$VERSION = 1.19_02; +# ^ ^^ ^^-- Incremented at will +# | \+----- Incremented for non-trivial changes to features +# \-------- Incremented for fundamental changes +require Exporter; +*import = \&Exporter::import; +@EXPORT_OK = qw( + prepare LCS LCSidx LCS_length + diff sdiff compact_diff + traverse_sequences traverse_balanced +); + +# McIlroy-Hunt diff algorithm +# Adapted from the Smalltalk code of Mario I. Wolczko, +# by Ned Konz, perl@bike-nomad.com +# Updates by Tye McQueen, http://perlmonks.org/?node=tye + +# Create a hash that maps each element of $aCollection to the set of +# positions it occupies in $aCollection, restricted to the elements +# within the range of indexes specified by $start and $end. +# The fourth parameter is a subroutine reference that will be called to +# generate a string to use as a key. +# Additional parameters, if any, will be passed to this subroutine. +# +# my $hashRef = _withPositionsOfInInterval( \@array, $start, $end, $keyGen ); + +sub _withPositionsOfInInterval +{ + my $aCollection = shift; # array ref + my $start = shift; + my $end = shift; + my $keyGen = shift; + my %d; + my $index; + for ( $index = $start ; $index <= $end ; $index++ ) + { + my $element = $aCollection->[$index]; + my $key = &$keyGen( $element, @_ ); + if ( exists( $d{$key} ) ) + { + unshift ( @{ $d{$key} }, $index ); + } + else + { + $d{$key} = [$index]; + } + } + return wantarray ? %d : \%d; +} + +# Find the place at which aValue would normally be inserted into the +# array. If that place is already occupied by aValue, do nothing, and +# return undef. If the place does not exist (i.e., it is off the end of +# the array), add it to the end, otherwise replace the element at that +# point with aValue. It is assumed that the array's values are numeric. +# This is where the bulk (75%) of the time is spent in this module, so +# try to make it fast! + +sub _replaceNextLargerWith +{ + my ( $array, $aValue, $high ) = @_; + $high ||= $#$array; + + # off the end? + if ( $high == -1 || $aValue > $array->[-1] ) + { + push ( @$array, $aValue ); + return $high + 1; + } + + # binary search for insertion point... + my $low = 0; + my $index; + my $found; + while ( $low <= $high ) + { + $index = ( $high + $low ) / 2; + + # $index = int(( $high + $low ) / 2); # without 'use integer' + $found = $array->[$index]; + + if ( $aValue == $found ) + { + return undef; + } + elsif ( $aValue > $found ) + { + $low = $index + 1; + } + else + { + $high = $index - 1; + } + } + + # now insertion point is in $low. + $array->[$low] = $aValue; # overwrite next larger + return $low; +} + +# This method computes the longest common subsequence in $a and $b. + +# Result is array or ref, whose contents is such that +# $a->[ $i ] == $b->[ $result[ $i ] ] +# foreach $i in ( 0 .. $#result ) if $result[ $i ] is defined. + +# An additional argument may be passed; this is a hash or key generating +# function that should return a string that uniquely identifies the given +# element. It should be the case that if the key is the same, the elements +# will compare the same. If this parameter is undef or missing, the key +# will be the element as a string. + +# By default, comparisons will use "eq" and elements will be turned into keys +# using the default stringizing operator '""'. + +# Additional parameters, if any, will be passed to the key generation +# routine. + +sub _longestCommonSubsequence +{ + my $a = shift; # array ref or hash ref + my $b = shift; # array ref or hash ref + my $counting = shift; # scalar + my $keyGen = shift; # code ref + my $compare; # code ref + + if ( ref($a) eq 'HASH' ) + { # prepared hash must be in $b + my $tmp = $b; + $b = $a; + $a = $tmp; + } + + # Check for bogus (non-ref) argument values + if ( !ref($a) || !ref($b) ) + { + my @callerInfo = caller(1); + die 'error: must pass array or hash references to ' . $callerInfo[3]; + } + + # set up code refs + # Note that these are optimized. + if ( !defined($keyGen) ) # optimize for strings + { + $keyGen = sub { $_[0] }; + $compare = sub { my ( $a, $b ) = @_; $a eq $b }; + } + else + { + $compare = sub { + my $a = shift; + my $b = shift; + &$keyGen( $a, @_ ) eq &$keyGen( $b, @_ ); + }; + } + + my ( $aStart, $aFinish, $matchVector ) = ( 0, $#$a, [] ); + my ( $prunedCount, $bMatches ) = ( 0, {} ); + + if ( ref($b) eq 'HASH' ) # was $bMatches prepared for us? + { + $bMatches = $b; + } + else + { + my ( $bStart, $bFinish ) = ( 0, $#$b ); + + # First we prune off any common elements at the beginning + while ( $aStart <= $aFinish + and $bStart <= $bFinish + and &$compare( $a->[$aStart], $b->[$bStart], @_ ) ) + { + $matchVector->[ $aStart++ ] = $bStart++; + $prunedCount++; + } + + # now the end + while ( $aStart <= $aFinish + and $bStart <= $bFinish + and &$compare( $a->[$aFinish], $b->[$bFinish], @_ ) ) + { + $matchVector->[ $aFinish-- ] = $bFinish--; + $prunedCount++; + } + + # Now compute the equivalence classes of positions of elements + $bMatches = + _withPositionsOfInInterval( $b, $bStart, $bFinish, $keyGen, @_ ); + } + my $thresh = []; + my $links = []; + + my ( $i, $ai, $j, $k ); + for ( $i = $aStart ; $i <= $aFinish ; $i++ ) + { + $ai = &$keyGen( $a->[$i], @_ ); + if ( exists( $bMatches->{$ai} ) ) + { + $k = 0; + for $j ( @{ $bMatches->{$ai} } ) + { + + # optimization: most of the time this will be true + if ( $k and $thresh->[$k] > $j and $thresh->[ $k - 1 ] < $j ) + { + $thresh->[$k] = $j; + } + else + { + $k = _replaceNextLargerWith( $thresh, $j, $k ); + } + + # oddly, it's faster to always test this (CPU cache?). + if ( defined($k) ) + { + $links->[$k] = + [ ( $k ? $links->[ $k - 1 ] : undef ), $i, $j ]; + } + } + } + } + + if (@$thresh) + { + return $prunedCount + @$thresh if $counting; + for ( my $link = $links->[$#$thresh] ; $link ; $link = $link->[0] ) + { + $matchVector->[ $link->[1] ] = $link->[2]; + } + } + elsif ($counting) + { + return $prunedCount; + } + + return wantarray ? @$matchVector : $matchVector; +} + +sub traverse_sequences +{ + my $a = shift; # array ref + my $b = shift; # array ref + my $callbacks = shift || {}; + my $keyGen = shift; + my $matchCallback = $callbacks->{'MATCH'} || sub { }; + my $discardACallback = $callbacks->{'DISCARD_A'} || sub { }; + my $finishedACallback = $callbacks->{'A_FINISHED'}; + my $discardBCallback = $callbacks->{'DISCARD_B'} || sub { }; + my $finishedBCallback = $callbacks->{'B_FINISHED'}; + my $matchVector = _longestCommonSubsequence( $a, $b, 0, $keyGen, @_ ); + + # Process all the lines in @$matchVector + my $lastA = $#$a; + my $lastB = $#$b; + my $bi = 0; + my $ai; + + for ( $ai = 0 ; $ai <= $#$matchVector ; $ai++ ) + { + my $bLine = $matchVector->[$ai]; + if ( defined($bLine) ) # matched + { + &$discardBCallback( $ai, $bi++, @_ ) while $bi < $bLine; + &$matchCallback( $ai, $bi++, @_ ); + } + else + { + &$discardACallback( $ai, $bi, @_ ); + } + } + + # The last entry (if any) processed was a match. + # $ai and $bi point just past the last matching lines in their sequences. + + while ( $ai <= $lastA or $bi <= $lastB ) + { + + # last A? + if ( $ai == $lastA + 1 and $bi <= $lastB ) + { + if ( defined($finishedACallback) ) + { + &$finishedACallback( $lastA, @_ ); + $finishedACallback = undef; + } + else + { + &$discardBCallback( $ai, $bi++, @_ ) while $bi <= $lastB; + } + } + + # last B? + if ( $bi == $lastB + 1 and $ai <= $lastA ) + { + if ( defined($finishedBCallback) ) + { + &$finishedBCallback( $lastB, @_ ); + $finishedBCallback = undef; + } + else + { + &$discardACallback( $ai++, $bi, @_ ) while $ai <= $lastA; + } + } + + &$discardACallback( $ai++, $bi, @_ ) if $ai <= $lastA; + &$discardBCallback( $ai, $bi++, @_ ) if $bi <= $lastB; + } + + return 1; +} + +sub traverse_balanced +{ + my $a = shift; # array ref + my $b = shift; # array ref + my $callbacks = shift || {}; + my $keyGen = shift; + my $matchCallback = $callbacks->{'MATCH'} || sub { }; + my $discardACallback = $callbacks->{'DISCARD_A'} || sub { }; + my $discardBCallback = $callbacks->{'DISCARD_B'} || sub { }; + my $changeCallback = $callbacks->{'CHANGE'}; + my $matchVector = _longestCommonSubsequence( $a, $b, 0, $keyGen, @_ ); + + # Process all the lines in match vector + my $lastA = $#$a; + my $lastB = $#$b; + my $bi = 0; + my $ai = 0; + my $ma = -1; + my $mb; + + while (1) + { + + # Find next match indices $ma and $mb + do { + $ma++; + } while( + $ma <= $#$matchVector + && !defined $matchVector->[$ma] + ); + + last if $ma > $#$matchVector; # end of matchVector? + $mb = $matchVector->[$ma]; + + # Proceed with discard a/b or change events until + # next match + while ( $ai < $ma || $bi < $mb ) + { + + if ( $ai < $ma && $bi < $mb ) + { + + # Change + if ( defined $changeCallback ) + { + &$changeCallback( $ai++, $bi++, @_ ); + } + else + { + &$discardACallback( $ai++, $bi, @_ ); + &$discardBCallback( $ai, $bi++, @_ ); + } + } + elsif ( $ai < $ma ) + { + &$discardACallback( $ai++, $bi, @_ ); + } + else + { + + # $bi < $mb + &$discardBCallback( $ai, $bi++, @_ ); + } + } + + # Match + &$matchCallback( $ai++, $bi++, @_ ); + } + + while ( $ai <= $lastA || $bi <= $lastB ) + { + if ( $ai <= $lastA && $bi <= $lastB ) + { + + # Change + if ( defined $changeCallback ) + { + &$changeCallback( $ai++, $bi++, @_ ); + } + else + { + &$discardACallback( $ai++, $bi, @_ ); + &$discardBCallback( $ai, $bi++, @_ ); + } + } + elsif ( $ai <= $lastA ) + { + &$discardACallback( $ai++, $bi, @_ ); + } + else + { + + # $bi <= $lastB + &$discardBCallback( $ai, $bi++, @_ ); + } + } + + return 1; +} + +sub prepare +{ + my $a = shift; # array ref + my $keyGen = shift; # code ref + + # set up code ref + $keyGen = sub { $_[0] } unless defined($keyGen); + + return scalar _withPositionsOfInInterval( $a, 0, $#$a, $keyGen, @_ ); +} + +sub LCS +{ + my $a = shift; # array ref + my $b = shift; # array ref or hash ref + my $matchVector = _longestCommonSubsequence( $a, $b, 0, @_ ); + my @retval; + my $i; + for ( $i = 0 ; $i <= $#$matchVector ; $i++ ) + { + if ( defined( $matchVector->[$i] ) ) + { + push ( @retval, $a->[$i] ); + } + } + return wantarray ? @retval : \@retval; +} + +sub LCS_length +{ + my $a = shift; # array ref + my $b = shift; # array ref or hash ref + return _longestCommonSubsequence( $a, $b, 1, @_ ); +} + +sub LCSidx +{ + my $a= shift @_; + my $b= shift @_; + my $match= _longestCommonSubsequence( $a, $b, 0, @_ ); + my @am= grep defined $match->[$_], 0..$#$match; + my @bm= @{$match}[@am]; + return \@am, \@bm; +} + +sub compact_diff +{ + my $a= shift @_; + my $b= shift @_; + my( $am, $bm )= LCSidx( $a, $b, @_ ); + my @cdiff; + my( $ai, $bi )= ( 0, 0 ); + push @cdiff, $ai, $bi; + while( 1 ) { + while( @$am && $ai == $am->[0] && $bi == $bm->[0] ) { + shift @$am; + shift @$bm; + ++$ai, ++$bi; + } + push @cdiff, $ai, $bi; + last if ! @$am; + $ai = $am->[0]; + $bi = $bm->[0]; + push @cdiff, $ai, $bi; + } + push @cdiff, 0+@$a, 0+@$b + if $ai < @$a || $bi < @$b; + return wantarray ? @cdiff : \@cdiff; +} + +sub diff +{ + my $a = shift; # array ref + my $b = shift; # array ref + my $retval = []; + my $hunk = []; + my $discard = sub { + push @$hunk, [ '-', $_[0], $a->[ $_[0] ] ]; + }; + my $add = sub { + push @$hunk, [ '+', $_[1], $b->[ $_[1] ] ]; + }; + my $match = sub { + push @$retval, $hunk + if 0 < @$hunk; + $hunk = [] + }; + traverse_sequences( $a, $b, + { MATCH => $match, DISCARD_A => $discard, DISCARD_B => $add }, @_ ); + &$match(); + return wantarray ? @$retval : $retval; +} + +sub sdiff +{ + my $a = shift; # array ref + my $b = shift; # array ref + my $retval = []; + my $discard = sub { push ( @$retval, [ '-', $a->[ $_[0] ], "" ] ) }; + my $add = sub { push ( @$retval, [ '+', "", $b->[ $_[1] ] ] ) }; + my $change = sub { + push ( @$retval, [ 'c', $a->[ $_[0] ], $b->[ $_[1] ] ] ); + }; + my $match = sub { + push ( @$retval, [ 'u', $a->[ $_[0] ], $b->[ $_[1] ] ] ); + }; + traverse_balanced( + $a, + $b, + { + MATCH => $match, + DISCARD_A => $discard, + DISCARD_B => $add, + CHANGE => $change, + }, + @_ + ); + return wantarray ? @$retval : $retval; +} + +######################################## +my $Root= __PACKAGE__; +package Algorithm::Diff::_impl; +use strict; + +sub _Idx() { 0 } # $me->[_Idx]: Ref to array of hunk indices + # 1 # $me->[1]: Ref to first sequence + # 2 # $me->[2]: Ref to second sequence +sub _End() { 3 } # $me->[_End]: Diff between forward and reverse pos +sub _Same() { 4 } # $me->[_Same]: 1 if pos 1 contains unchanged items +sub _Base() { 5 } # $me->[_Base]: Added to range's min and max +sub _Pos() { 6 } # $me->[_Pos]: Which hunk is currently selected +sub _Off() { 7 } # $me->[_Off]: Offset into _Idx for current position +sub _Min() { -2 } # Added to _Off to get min instead of max+1 + +sub Die +{ + require Carp; + Carp::confess( @_ ); +} + +sub _ChkPos +{ + my( $me )= @_; + return if $me->[_Pos]; + my $meth= ( caller(1) )[3]; + Die( "Called $meth on 'reset' object" ); +} + +sub _ChkSeq +{ + my( $me, $seq )= @_; + return $seq + $me->[_Off] + if 1 == $seq || 2 == $seq; + my $meth= ( caller(1) )[3]; + Die( "$meth: Invalid sequence number ($seq); must be 1 or 2" ); +} + +sub getObjPkg +{ + my( $us )= @_; + return ref $us if ref $us; + return $us . "::_obj"; +} + +sub new +{ + my( $us, $seq1, $seq2, $opts ) = @_; + my @args; + for( $opts->{keyGen} ) { + push @args, $_ if $_; + } + for( $opts->{keyGenArgs} ) { + push @args, @$_ if $_; + } + my $cdif= Diff::compact_diff( $seq1, $seq2, @args ); + my $same= 1; + if( 0 == $cdif->[2] && 0 == $cdif->[3] ) { + $same= 0; + splice @$cdif, 0, 2; + } + my @obj= ( $cdif, $seq1, $seq2 ); + $obj[_End] = (1+@$cdif)/2; + $obj[_Same] = $same; + $obj[_Base] = 0; + my $me = bless \@obj, $us->getObjPkg(); + $me->Reset( 0 ); + return $me; +} + +sub Reset +{ + my( $me, $pos )= @_; + $pos= int( $pos || 0 ); + $pos += $me->[_End] + if $pos < 0; + $pos= 0 + if $pos < 0 || $me->[_End] <= $pos; + $me->[_Pos]= $pos || !1; + $me->[_Off]= 2*$pos - 1; + return $me; +} + +sub Base +{ + my( $me, $base )= @_; + my $oldBase= $me->[_Base]; + $me->[_Base]= 0+$base if defined $base; + return $oldBase; +} + +sub Copy +{ + my( $me, $pos, $base )= @_; + my @obj= @$me; + my $you= bless \@obj, ref($me); + $you->Reset( $pos ) if defined $pos; + $you->Base( $base ); + return $you; +} + +sub Next { + my( $me, $steps )= @_; + $steps= 1 if ! defined $steps; + if( $steps ) { + my $pos= $me->[_Pos]; + my $new= $pos + $steps; + $new= 0 if $pos && $new < 0; + $me->Reset( $new ) + } + return $me->[_Pos]; +} + +sub Prev { + my( $me, $steps )= @_; + $steps= 1 if ! defined $steps; + my $pos= $me->Next(-$steps); + $pos -= $me->[_End] if $pos; + return $pos; +} + +sub Diff { + my( $me )= @_; + $me->_ChkPos(); + return 0 if $me->[_Same] == ( 1 & $me->[_Pos] ); + my $ret= 0; + my $off= $me->[_Off]; + for my $seq ( 1, 2 ) { + $ret |= $seq + if $me->[_Idx][ $off + $seq + _Min ] + < $me->[_Idx][ $off + $seq ]; + } + return $ret; +} + +sub Min { + my( $me, $seq, $base )= @_; + $me->_ChkPos(); + my $off= $me->_ChkSeq($seq); + $base= $me->[_Base] if !defined $base; + return $base + $me->[_Idx][ $off + _Min ]; +} + +sub Max { + my( $me, $seq, $base )= @_; + $me->_ChkPos(); + my $off= $me->_ChkSeq($seq); + $base= $me->[_Base] if !defined $base; + return $base + $me->[_Idx][ $off ] -1; +} + +sub Range { + my( $me, $seq, $base )= @_; + $me->_ChkPos(); + my $off = $me->_ChkSeq($seq); + if( !wantarray ) { + return $me->[_Idx][ $off ] + - $me->[_Idx][ $off + _Min ]; + } + $base= $me->[_Base] if !defined $base; + return ( $base + $me->[_Idx][ $off + _Min ] ) + .. ( $base + $me->[_Idx][ $off ] - 1 ); +} + +sub Items { + my( $me, $seq )= @_; + $me->_ChkPos(); + my $off = $me->_ChkSeq($seq); + if( !wantarray ) { + return $me->[_Idx][ $off ] + - $me->[_Idx][ $off + _Min ]; + } + return + @{$me->[$seq]}[ + $me->[_Idx][ $off + _Min ] + .. ( $me->[_Idx][ $off ] - 1 ) + ]; +} + +sub Same { + my( $me )= @_; + $me->_ChkPos(); + return wantarray ? () : 0 + if $me->[_Same] != ( 1 & $me->[_Pos] ); + return $me->Items(1); +} + +my %getName; +BEGIN { + %getName= ( + same => \&Same, + diff => \&Diff, + base => \&Base, + min => \&Min, + max => \&Max, + range=> \&Range, + items=> \&Items, # same thing + ); +} + +sub Get +{ + my $me= shift @_; + $me->_ChkPos(); + my @value; + for my $arg ( @_ ) { + for my $word ( split ' ', $arg ) { + my $meth; + if( $word !~ /^(-?\d+)?([a-zA-Z]+)([12])?$/ + || not $meth= $getName{ lc $2 } + ) { + Die( $Root, ", Get: Invalid request ($word)" ); + } + my( $base, $name, $seq )= ( $1, $2, $3 ); + push @value, scalar( + 4 == length($name) + ? $meth->( $me ) + : $meth->( $me, $seq, $base ) + ); + } + } + if( wantarray ) { + return @value; + } elsif( 1 == @value ) { + return $value[0]; + } + Die( 0+@value, " values requested from ", + $Root, "'s Get in scalar context" ); +} + + +my $Obj= getObjPkg($Root); +no strict 'refs'; + +for my $meth ( qw( new getObjPkg ) ) { + *{$Root."::".$meth} = \&{$meth}; + *{$Obj ."::".$meth} = \&{$meth}; +} +for my $meth ( qw( + Next Prev Reset Copy Base Diff + Same Items Range Min Max Get + _ChkPos _ChkSeq +) ) { + *{$Obj."::".$meth} = \&{$meth}; +} + +1; +__END__ + +=head1 NAME + +Algorithm::Diff - Compute `intelligent' differences between two files / lists + +=head1 SYNOPSIS + + require Algorithm::Diff; + + # This example produces traditional 'diff' output: + + my $diff = Algorithm::Diff->new( \@seq1, \@seq2 ); + + $diff->Base( 1 ); # Return line numbers, not indices + while( $diff->Next() ) { + next if $diff->Same(); + my $sep = ''; + if( ! $diff->Items(2) ) { + printf "%d,%dd%d\n", + $diff->Get(qw( Min1 Max1 Max2 )); + } elsif( ! $diff->Items(1) ) { + printf "%da%d,%d\n", + $diff->Get(qw( Max1 Min2 Max2 )); + } else { + $sep = "---\n"; + printf "%d,%dc%d,%d\n", + $diff->Get(qw( Min1 Max1 Min2 Max2 )); + } + print "< $_" for $diff->Items(1); + print $sep; + print "> $_" for $diff->Items(2); + } + + + # Alternate interfaces: + + use Algorithm::Diff qw( + LCS LCS_length LCSidx + diff sdiff compact_diff + traverse_sequences traverse_balanced ); + + @lcs = LCS( \@seq1, \@seq2 ); + $lcsref = LCS( \@seq1, \@seq2 ); + $count = LCS_length( \@seq1, \@seq2 ); + + ( $seq1idxref, $seq2idxref ) = LCSidx( \@seq1, \@seq2 ); + + + # Complicated interfaces: + + @diffs = diff( \@seq1, \@seq2 ); + + @sdiffs = sdiff( \@seq1, \@seq2 ); + + @cdiffs = compact_diff( \@seq1, \@seq2 ); + + traverse_sequences( + \@seq1, + \@seq2, + { MATCH => \&callback1, + DISCARD_A => \&callback2, + DISCARD_B => \&callback3, + }, + \&key_generator, + @extra_args, + ); + + traverse_balanced( + \@seq1, + \@seq2, + { MATCH => \&callback1, + DISCARD_A => \&callback2, + DISCARD_B => \&callback3, + CHANGE => \&callback4, + }, + \&key_generator, + @extra_args, + ); + + +=head1 INTRODUCTION + +(by Mark-Jason Dominus) + +I once read an article written by the authors of C; they said +that they worked very hard on the algorithm until they found the +right one. + +I think what they ended up using (and I hope someone will correct me, +because I am not very confident about this) was the `longest common +subsequence' method. In the LCS problem, you have two sequences of +items: + + a b c d f g h j q z + + a b c d e f g i j k r x y z + +and you want to find the longest sequence of items that is present in +both original sequences in the same order. That is, you want to find +a new sequence I which can be obtained from the first sequence by +deleting some items, and from the secend sequence by deleting other +items. You also want I to be as long as possible. In this case I +is + + a b c d f g j z + +From there it's only a small step to get diff-like output: + + e h i k q r x y + + - + + - + + + + +This module solves the LCS problem. It also includes a canned function +to generate C-like output. + +It might seem from the example above that the LCS of two sequences is +always pretty obvious, but that's not always the case, especially when +the two sequences have many repeated elements. For example, consider + + a x b y c z p d q + a b c a x b y c z + +A naive approach might start by matching up the C and C that +appear at the beginning of each sequence, like this: + + a x b y c z p d q + a b c a b y c z + +This finds the common subsequence C. But actually, the LCS +is C: + + a x b y c z p d q + a b c a x b y c z + +or + + a x b y c z p d q + a b c a x b y c z + +=head1 USAGE + +(See also the README file and several example +scripts include with this module.) + +This module now provides an object-oriented interface that uses less +memory and is easier to use than most of the previous procedural +interfaces. It also still provides several exportable functions. We'll +deal with these in ascending order of difficulty: C, +C, C, OO interface, C, C, C, +C, and C. + +=head2 C + +Given references to two lists of items, LCS returns an array containing +their longest common subsequence. In scalar context, it returns a +reference to such a list. + + @lcs = LCS( \@seq1, \@seq2 ); + $lcsref = LCS( \@seq1, \@seq2 ); + +C may be passed an optional third parameter; this is a CODE +reference to a key generation function. See L. + + @lcs = LCS( \@seq1, \@seq2, \&keyGen, @args ); + $lcsref = LCS( \@seq1, \@seq2, \&keyGen, @args ); + +Additional parameters, if any, will be passed to the key generation +routine. + +=head2 C + +This is just like C except it only returns the length of the +longest common subsequence. This provides a performance gain of about +9% compared to C. + +=head2 C + +Like C except it returns references to two arrays. The first array +contains the indices into @seq1 where the LCS items are located. The +second array contains the indices into @seq2 where the LCS items are located. + +Therefore, the following three lists will contain the same values: + + my( $idx1, $idx2 ) = LCSidx( \@seq1, \@seq2 ); + my @list1 = @seq1[ @$idx1 ]; + my @list2 = @seq2[ @$idx2 ]; + my @list3 = LCS( \@seq1, \@seq2 ); + +=head2 C + + $diff = Algorithm::Diffs->new( \@seq1, \@seq2 ); + $diff = Algorithm::Diffs->new( \@seq1, \@seq2, \%opts ); + +C computes the smallest set of additions and deletions necessary +to turn the first sequence into the second and compactly records them +in the object. + +You use the object to iterate over I, where each hunk represents +a contiguous section of items which should be added, deleted, replaced, +or left unchanged. + +=over 4 + +The following summary of all of the methods looks a lot like Perl code +but some of the symbols have different meanings: + + [ ] Encloses optional arguments + : Is followed by the default value for an optional argument + | Separates alternate return results + +Method summary: + + $obj = Algorithm::Diff->new( \@seq1, \@seq2, [ \%opts ] ); + $pos = $obj->Next( [ $count : 1 ] ); + $revPos = $obj->Prev( [ $count : 1 ] ); + $obj = $obj->Reset( [ $pos : 0 ] ); + $copy = $obj->Copy( [ $pos, [ $newBase ] ] ); + $oldBase = $obj->Base( [ $newBase ] ); + +Note that all of the following methods C if used on an object that +is "reset" (not currently pointing at any hunk). + + $bits = $obj->Diff( ); + @items|$cnt = $obj->Same( ); + @items|$cnt = $obj->Items( $seqNum ); + @idxs |$cnt = $obj->Range( $seqNum, [ $base ] ); + $minIdx = $obj->Min( $seqNum, [ $base ] ); + $maxIdx = $obj->Max( $seqNum, [ $base ] ); + @values = $obj->Get( @names ); + +Passing in C for an optional argument is always treated the same +as if no argument were passed in. + +=item C + + $pos = $diff->Next(); # Move forward 1 hunk + $pos = $diff->Next( 2 ); # Move forward 2 hunks + $pos = $diff->Next(-5); # Move backward 5 hunks + +C moves the object to point at the next hunk. The object starts +out "reset", which means it isn't pointing at any hunk. If the object +is reset, then C moves to the first hunk. + +C returns a true value iff the move didn't go past the last hunk. +So C will return true iff the object is not reset. + +Actually, C returns the object's new position, which is a number +between 1 and the number of hunks (inclusive), or returns a false value. + +=item C + +C is almost identical to C; it moves to the $Nth +previous hunk. On a 'reset' object, C [and C] move +to the last hunk. + +The position returned by C is relative to the I of the +hunks; -1 for the last hunk, -2 for the second-to-last, etc. + +=item C + + $diff->Reset(); # Reset the object's position + $diff->Reset($pos); # Move to the specified hunk + $diff->Reset(1); # Move to the first hunk + $diff->Reset(-1); # Move to the last hunk + +C returns the object, so, for example, you could use +C<< $diff->Reset()->Next(-1) >> to get the number of hunks. + +=item C + + $copy = $diff->Copy( $newPos, $newBase ); + +C returns a copy of the object. The copy and the orignal object +share most of their data, so making copies takes very little memory. +The copy maintains its own position (separate from the original), which +is the main purpose of copies. It also maintains its own base. + +By default, the copy's position starts out the same as the original +object's position. But C takes an optional first argument to set the +new position, so the following three snippets are equivalent: + + $copy = $diff->Copy($pos); + + $copy = $diff->Copy(); + $copy->Reset($pos); + + $copy = $diff->Copy()->Reset($pos); + +C takes an optional second argument to set the base for +the copy. If you wish to change the base of the copy but leave +the position the same as in the original, here are two +equivalent ways: + + $copy = $diff->Copy(); + $copy->Base( 0 ); + + $copy = $diff->Copy(undef,0); + +Here are two equivalent way to get a "reset" copy: + + $copy = $diff->Copy(0); + + $copy = $diff->Copy()->Reset(); + +=item C + + $bits = $obj->Diff(); + +C returns a true value iff the current hunk contains items that are +different between the two sequences. It actually returns one of the +follow 4 values: + +=over 4 + +=item 3 + +C<3==(1|2)>. This hunk contains items from @seq1 and the items +from @seq2 that should replace them. Both sequence 1 and 2 +contain changed items so both the 1 and 2 bits are set. + +=item 2 + +This hunk only contains items from @seq2 that should be inserted (not +items from @seq1). Only sequence 2 contains changed items so only the 2 +bit is set. + +=item 1 + +This hunk only contains items from @seq1 that should be deleted (not +items from @seq2). Only sequence 1 contains changed items so only the 1 +bit is set. + +=item 0 + +This means that the items in this hunk are the same in both sequences. +Neither sequence 1 nor 2 contain changed items so neither the 1 nor the +2 bits are set. + +=back + +=item C + +C returns a true value iff the current hunk contains items that +are the same in both sequences. It actually returns the list of items +if they are the same or an emty list if they aren't. In a scalar +context, it returns the size of the list. + +=item C + + $count = $diff->Items(2); + @items = $diff->Items($seqNum); + +C returns the (number of) items from the specified sequence that +are part of the current hunk. + +If the current hunk contains only insertions, then +C<< $diff->Items(1) >> will return an empty list (0 in a scalar conext). +If the current hunk contains only deletions, then C<< $diff->Items(2) >> +will return an empty list (0 in a scalar conext). + +If the hunk contains replacements, then both C<< $diff->Items(1) >> and +C<< $diff->Items(2) >> will return different, non-empty lists. + +Otherwise, the hunk contains identical items and all of the following +will return the same lists: + + @items = $diff->Items(1); + @items = $diff->Items(2); + @items = $diff->Same(); + +=item C + + $count = $diff->Range( $seqNum ); + @indices = $diff->Range( $seqNum ); + @indices = $diff->Range( $seqNum, $base ); + +C is like C except that it returns a list of I to +the items rather than the items themselves. By default, the index of +the first item (in each sequence) is 0 but this can be changed by +calling the C method. So, by default, the following two snippets +return the same lists: + + @list = $diff->Items(2); + @list = @seq2[ $diff->Range(2) ]; + +You can also specify the base to use as the second argument. So the +following two snippets I return the same lists: + + @list = $diff->Items(1); + @list = @seq1[ $diff->Range(1,0) ]; + +=item C + + $curBase = $diff->Base(); + $oldBase = $diff->Base($newBase); + +C sets and/or returns the current base (usually 0 or 1) that is +used when you request range information. The base defaults to 0 so +that range information is returned as array indices. You can set the +base to 1 if you want to report traditional line numbers instead. + +=item C + + $min1 = $diff->Min(1); + $min = $diff->Min( $seqNum, $base ); + +C returns the first value that C would return (given the +same arguments) or returns C if C would return an empty +list. + +=item C + +C returns the last value that C would return or C. + +=item C + + ( $n, $x, $r ) = $diff->Get(qw( min1 max1 range1 )); + @values = $diff->Get(qw( 0min2 1max2 range2 same base )); + +C returns one or more scalar values. You pass in a list of the +names of the values you want returned. Each name must match one of the +following regexes: + + /^(-?\d+)?(min|max)[12]$/i + /^(range[12]|same|diff|base)$/i + +The 1 or 2 after a name says which sequence you want the information +for (and where allowed, it is required). The optional number before +"min" or "max" is the base to use. So the following equalities hold: + + $diff->Get('min1') == $diff->Min(1) + $diff->Get('0min2') == $diff->Min(2,0) + +Using C in a scalar context when you've passed in more than one +name is a fatal error (C is called). + +=back + +=head2 C + +Given a reference to a list of items, C returns a reference +to a hash which can be used when comparing this sequence to other +sequences with C or C. + + $prep = prepare( \@seq1 ); + for $i ( 0 .. 10_000 ) + { + @lcs = LCS( $prep, $seq[$i] ); + # do something useful with @lcs + } + +C may be passed an optional third parameter; this is a CODE +reference to a key generation function. See L. + + $prep = prepare( \@seq1, \&keyGen ); + for $i ( 0 .. 10_000 ) + { + @lcs = LCS( $seq[$i], $prep, \&keyGen ); + # do something useful with @lcs + } + +Using C provides a performance gain of about 50% when calling LCS +many times compared with not preparing. + +=head2 C + + @diffs = diff( \@seq1, \@seq2 ); + $diffs_ref = diff( \@seq1, \@seq2 ); + +C computes the smallest set of additions and deletions necessary +to turn the first sequence into the second, and returns a description +of these changes. The description is a list of I; each hunk +represents a contiguous section of items which should be added, +deleted, or replaced. (Hunks containing unchanged items are not +included.) + +The return value of C is a list of hunks, or, in scalar context, a +reference to such a list. If there are no differences, the list will be +empty. + +Here is an example. Calling C for the following two sequences: + + a b c e h j l m n p + b c d e f j k l m r s t + +would produce the following list: + + ( + [ [ '-', 0, 'a' ] ], + + [ [ '+', 2, 'd' ] ], + + [ [ '-', 4, 'h' ], + [ '+', 4, 'f' ] ], + + [ [ '+', 6, 'k' ] ], + + [ [ '-', 8, 'n' ], + [ '-', 9, 'p' ], + [ '+', 9, 'r' ], + [ '+', 10, 's' ], + [ '+', 11, 't' ] ], + ) + +There are five hunks here. The first hunk says that the C at +position 0 of the first sequence should be deleted (C<->). The second +hunk says that the C at position 2 of the second sequence should +be inserted (C<+>). The third hunk says that the C at position 4 +of the first sequence should be removed and replaced with the C +from position 4 of the second sequence. And so on. + +C may be passed an optional third parameter; this is a CODE +reference to a key generation function. See L. + +Additional parameters, if any, will be passed to the key generation +routine. + +=head2 C + + @sdiffs = sdiff( \@seq1, \@seq2 ); + $sdiffs_ref = sdiff( \@seq1, \@seq2 ); + +C computes all necessary components to show two sequences +and their minimized differences side by side, just like the +Unix-utility I does: + + same same + before | after + old < - + - > new + +It returns a list of array refs, each pointing to an array of +display instructions. In scalar context it returns a reference +to such a list. If there are no differences, the list will have one +entry per item, each indicating that the item was unchanged. + +Display instructions consist of three elements: A modifier indicator +(C<+>: Element added, C<->: Element removed, C: Element unmodified, +C: Element changed) and the value of the old and new elements, to +be displayed side-by-side. + +An C of the following two sequences: + + a b c e h j l m n p + b c d e f j k l m r s t + +results in + + ( [ '-', 'a', '' ], + [ 'u', 'b', 'b' ], + [ 'u', 'c', 'c' ], + [ '+', '', 'd' ], + [ 'u', 'e', 'e' ], + [ 'c', 'h', 'f' ], + [ 'u', 'j', 'j' ], + [ '+', '', 'k' ], + [ 'u', 'l', 'l' ], + [ 'u', 'm', 'm' ], + [ 'c', 'n', 'r' ], + [ 'c', 'p', 's' ], + [ '+', '', 't' ], + ) + +C may be passed an optional third parameter; this is a CODE +reference to a key generation function. See L. + +Additional parameters, if any, will be passed to the key generation +routine. + +=head2 C + +C is much like C except it returns a much more +compact description consisting of just one flat list of indices. An +example helps explain the format: + + my @a = qw( a b c e h j l m n p ); + my @b = qw( b c d e f j k l m r s t ); + @cdiff = compact_diff( \@a, \@b ); + # Returns: + # @a @b @a @b + # start start values values + ( 0, 0, # = + 0, 0, # a ! + 1, 0, # b c = b c + 3, 2, # ! d + 3, 3, # e = e + 4, 4, # f ! h + 5, 5, # j = j + 6, 6, # ! k + 6, 7, # l m = l m + 8, 9, # n p ! r s t + 10, 12, # + ); + +The 0th, 2nd, 4th, etc. entries are all indices into @seq1 (@a in the +above example) indicating where a hunk begins. The 1st, 3rd, 5th, etc. +entries are all indices into @seq2 (@b in the above example) indicating +where the same hunk begins. + +So each pair of indices (except the last pair) describes where a hunk +begins (in each sequence). Since each hunk must end at the item just +before the item that starts the next hunk, the next pair of indices can +be used to determine where the hunk ends. + +So, the first 4 entries (0..3) describe the first hunk. Entries 0 and 1 +describe where the first hunk begins (and so are always both 0). +Entries 2 and 3 describe where the next hunk begins, so subtracting 1 +from each tells us where the first hunk ends. That is, the first hunk +contains items C<$diff[0]> through C<$diff[2] - 1> of the first sequence +and contains items C<$diff[1]> through C<$diff[3] - 1> of the second +sequence. + +In other words, the first hunk consists of the following two lists of items: + + # 1st pair 2nd pair + # of indices of indices + @list1 = @a[ $cdiff[0] .. $cdiff[2]-1 ]; + @list2 = @b[ $cdiff[1] .. $cdiff[3]-1 ]; + # Hunk start Hunk end + +Note that the hunks will always alternate between those that are part of +the LCS (those that contain unchanged items) and those that contain +changes. This means that all we need to be told is whether the first +hunk is a 'same' or 'diff' hunk and we can determine which of the other +hunks contain 'same' items or 'diff' items. + +By convention, we always make the first hunk contain unchanged items. +So the 1st, 3rd, 5th, etc. hunks (all odd-numbered hunks if you start +counting from 1) all contain unchanged items. And the 2nd, 4th, 6th, +etc. hunks (all even-numbered hunks if you start counting from 1) all +contain changed items. + +Since @a and @b don't begin with the same value, the first hunk in our +example is empty (otherwise we'd violate the above convention). Note +that the first 4 index values in our example are all zero. Plug these +values into our previous code block and we get: + + @hunk1a = @a[ 0 .. 0-1 ]; + @hunk1b = @b[ 0 .. 0-1 ]; + +And C<0..-1> returns the empty list. + +Move down one pair of indices (2..5) and we get the offset ranges for +the second hunk, which contains changed items. + +Since C<@diff[2..5]> contains (0,0,1,0) in our example, the second hunk +consists of these two lists of items: + + @hunk2a = @a[ $cdiff[2] .. $cdiff[4]-1 ]; + @hunk2b = @b[ $cdiff[3] .. $cdiff[5]-1 ]; + # or + @hunk2a = @a[ 0 .. 1-1 ]; + @hunk2b = @b[ 0 .. 0-1 ]; + # or + @hunk2a = @a[ 0 .. 0 ]; + @hunk2b = @b[ 0 .. -1 ]; + # or + @hunk2a = ( 'a' ); + @hunk2b = ( ); + +That is, we would delete item 0 ('a') from @a. + +Since C<@diff[4..7]> contains (1,0,3,2) in our example, the third hunk +consists of these two lists of items: + + @hunk3a = @a[ $cdiff[4] .. $cdiff[6]-1 ]; + @hunk3a = @b[ $cdiff[5] .. $cdiff[7]-1 ]; + # or + @hunk3a = @a[ 1 .. 3-1 ]; + @hunk3a = @b[ 0 .. 2-1 ]; + # or + @hunk3a = @a[ 1 .. 2 ]; + @hunk3a = @b[ 0 .. 1 ]; + # or + @hunk3a = qw( b c ); + @hunk3a = qw( b c ); + +Note that this third hunk contains unchanged items as our convention demands. + +You can continue this process until you reach the last two indices, +which will always be the number of items in each sequence. This is +required so that subtracting one from each will give you the indices to +the last items in each sequence. + +=head2 C + +C used to be the most general facility provided by +this module (the new OO interface is more powerful and much easier to +use). + +Imagine that there are two arrows. Arrow A points to an element of +sequence A, and arrow B points to an element of the sequence B. +Initially, the arrows point to the first elements of the respective +sequences. C will advance the arrows through the +sequences one element at a time, calling an appropriate user-specified +callback function before each advance. It willadvance the arrows in +such a way that if there are equal elements C<$A[$i]> and C<$B[$j]> +which are equal and which are part of the LCS, there will be some moment +during the execution of C when arrow A is pointing +to C<$A[$i]> and arrow B is pointing to C<$B[$j]>. When this happens, +C will call the C callback function and then +it will advance both arrows. + +Otherwise, one of the arrows is pointing to an element of its sequence +that is not part of the LCS. C will advance that +arrow and will call the C or the C callback, +depending on which arrow it advanced. If both arrows point to elements +that are not part of the LCS, then C will advance +one of them and call the appropriate callback, but it is not specified +which it will call. + +The arguments to C are the two sequences to +traverse, and a hash which specifies the callback functions, like this: + + traverse_sequences( + \@seq1, \@seq2, + { MATCH => $callback_1, + DISCARD_A => $callback_2, + DISCARD_B => $callback_3, + } + ); + +Callbacks for MATCH, DISCARD_A, and DISCARD_B are invoked with at least +the indices of the two arrows as their arguments. They are not expected +to return any values. If a callback is omitted from the table, it is +not called. + +Callbacks for A_FINISHED and B_FINISHED are invoked with at least the +corresponding index in A or B. + +If arrow A reaches the end of its sequence, before arrow B does, +C will call the C callback when it +advances arrow B, if there is such a function; if not it will call +C instead. Similarly if arrow B finishes first. +C returns when both arrows are at the ends of their +respective sequences. It returns true on success and false on failure. +At present there is no way to fail. + +C may be passed an optional fourth parameter; this +is a CODE reference to a key generation function. See L. + +Additional parameters, if any, will be passed to the key generation function. + +If you want to pass additional parameters to your callbacks, but don't +need a custom key generation function, you can get the default by +passing undef: + + traverse_sequences( + \@seq1, \@seq2, + { MATCH => $callback_1, + DISCARD_A => $callback_2, + DISCARD_B => $callback_3, + }, + undef, # default key-gen + $myArgument1, + $myArgument2, + $myArgument3, + ); + +C does not have a useful return value; you are +expected to plug in the appropriate behavior with the callback +functions. + +=head2 C + +C is an alternative to C. It +uses a different algorithm to iterate through the entries in the +computed LCS. Instead of sticking to one side and showing element changes +as insertions and deletions only, it will jump back and forth between +the two sequences and report I occurring as deletions on one +side followed immediatly by an insertion on the other side. + +In addition to the C, C, and C callbacks +supported by C, C supports +a C callback indicating that one element got C by another: + + traverse_balanced( + \@seq1, \@seq2, + { MATCH => $callback_1, + DISCARD_A => $callback_2, + DISCARD_B => $callback_3, + CHANGE => $callback_4, + } + ); + +If no C callback is specified, C +will map C events to C and C actions, +therefore resulting in a similar behaviour as C +with different order of events. + +C might be a bit slower than C, +noticable only while processing huge amounts of data. + +The C function of this module +is implemented as call to C. + +C does not have a useful return value; you are expected to +plug in the appropriate behavior with the callback functions. + +=head1 KEY GENERATION FUNCTIONS + +Most of the functions accept an optional extra parameter. This is a +CODE reference to a key generating (hashing) function that should return +a string that uniquely identifies a given element. It should be the +case that if two elements are to be considered equal, their keys should +be the same (and the other way around). If no key generation function +is provided, the key will be the element as a string. + +By default, comparisons will use "eq" and elements will be turned into keys +using the default stringizing operator '""'. + +Where this is important is when you're comparing something other than +strings. If it is the case that you have multiple different objects +that should be considered to be equal, you should supply a key +generation function. Otherwise, you have to make sure that your arrays +contain unique references. + +For instance, consider this example: + + package Person; + + sub new + { + my $package = shift; + return bless { name => '', ssn => '', @_ }, $package; + } + + sub clone + { + my $old = shift; + my $new = bless { %$old }, ref($old); + } + + sub hash + { + return shift()->{'ssn'}; + } + + my $person1 = Person->new( name => 'Joe', ssn => '123-45-6789' ); + my $person2 = Person->new( name => 'Mary', ssn => '123-47-0000' ); + my $person3 = Person->new( name => 'Pete', ssn => '999-45-2222' ); + my $person4 = Person->new( name => 'Peggy', ssn => '123-45-9999' ); + my $person5 = Person->new( name => 'Frank', ssn => '000-45-9999' ); + +If you did this: + + my $array1 = [ $person1, $person2, $person4 ]; + my $array2 = [ $person1, $person3, $person4, $person5 ]; + Algorithm::Diff::diff( $array1, $array2 ); + +everything would work out OK (each of the objects would be converted +into a string like "Person=HASH(0x82425b0)" for comparison). + +But if you did this: + + my $array1 = [ $person1, $person2, $person4 ]; + my $array2 = [ $person1, $person3, $person4->clone(), $person5 ]; + Algorithm::Diff::diff( $array1, $array2 ); + +$person4 and $person4->clone() (which have the same name and SSN) +would be seen as different objects. If you wanted them to be considered +equivalent, you would have to pass in a key generation function: + + my $array1 = [ $person1, $person2, $person4 ]; + my $array2 = [ $person1, $person3, $person4->clone(), $person5 ]; + Algorithm::Diff::diff( $array1, $array2, \&Person::hash ); + +This would use the 'ssn' field in each Person as a comparison key, and +so would consider $person4 and $person4->clone() as equal. + +You may also pass additional parameters to the key generation function +if you wish. + +=head1 ERROR CHECKING + +If you pass these routines a non-reference and they expect a reference, +they will die with a message. + +=head1 AUTHOR + +This version released by Tye McQueen (http://perlmonks.org/?node=tye). + +=head1 LICENSE + +Parts Copyright (c) 2000-2004 Ned Konz. All rights reserved. +Parts by Tye McQueen. + +This program is free software; you can redistribute it and/or modify it +under the same terms as Perl. + +=head1 MAILING LIST + +Mark-Jason still maintains a mailing list. To join a low-volume mailing +list for announcements related to diff and Algorithm::Diff, send an +empty mail message to mjd-perl-diff-request@plover.com. + +=head1 CREDITS + +Versions through 0.59 (and much of this documentation) were written by: + +Mark-Jason Dominus, mjd-perl-diff@plover.com + +This version borrows some documentation and routine names from +Mark-Jason's, but Diff.pm's code was completely replaced. + +This code was adapted from the Smalltalk code of Mario Wolczko +, which is available at +ftp://st.cs.uiuc.edu/pub/Smalltalk/MANCHESTER/manchester/4.0/diff.st + +C and C were written by Mike Schilli +. + +The algorithm is that described in +I, +CACM, vol.20, no.5, pp.350-353, May 1977, with a few +minor improvements to improve the speed. + +Much work was done by Ned Konz (perl@bike-nomad.com). + +The OO interface and some other changes are by Tye McQueen. + +=cut diff --git a/buildtools/Cmake/tesh.pl b/buildtools/Cmake/tesh.pl index 907995d530..0e63b590c6 100755 --- a/buildtools/Cmake/tesh.pl +++ b/buildtools/Cmake/tesh.pl @@ -1,4 +1,4 @@ -#! /usr/bin/perl -w +#! /usr/bin/perl eval 'exec perl -S $0 ${1+"$@"}' if $running_under_some_shell; @@ -14,6 +14,10 @@ B [I] I =cut +my $path = $0; +$path =~ s|[^/]*$||; +push @INC,$path; + use Pod::Usage qw(pod2usage); use Getopt::Long qw(GetOptions); use strict; @@ -35,11 +39,13 @@ $ENV{PATH} = "$ENV{PATH}:."; # option handling helper subs sub cd_cmd { my $directory=$_[1]; - if (-e $directory) { + if (-e $directory && -d $directory) { chdir("$directory"); print "[Tesh/INFO] change directory to $directory\n"; + } elsif (-e $directory) { + die "[Tesh/CRITICAL] Cannot change directory to '$directory': it is not a directory\n"; } else { - die "[Tesh/CRITICAL] Directory not found : \"$directory\"\n"; + die "[Tesh/CRITICAL] Cannot change directory to '$directory': no such directory\n"; } } @@ -63,6 +69,7 @@ sub get_options { # temporary arrays for GetOption my @verbose = (); my @cfg; + my $log; # ignored my %opt = ( "help" => 0, @@ -81,6 +88,7 @@ sub get_options { 'cd=s' => \&cd_cmd, 'setenv=s' => \&setenv_cmd, 'cfg=s' => \@cfg, + 'log=s' => \$log, ); $opt{'verbose'} = scalar @verbose; @@ -95,15 +103,9 @@ my %opts = get_options(@ARGV); ## ## File parsing ## -my($line2,$execline,$command,$command_tesh); -my($command_executed)=0; -my($expected_result_line)=0; my($sort)=0; my($nb_arg)=0; -my(@list1,@list2,@list3,@list_of_commands)=(); -my(@buffer)=(); my($timeout)=0; -my($encore)=0; my($old_buffer); my($linebis); my($SIGABRT)=0; @@ -113,404 +115,291 @@ my($return)=-1; my($pid); my($result); my($result_err); -my($fich_name); my($forked); my($config)=""; my($tesh_command)=0; my(@buffer_tesh)=(); -# parse tesh file -open TESH_FILE, $tesh_file or die "[Tesh/CRITICAL] Unable to open $tesh_file $!\n"; - -my %cmd; # everything about the next command to run -while (defined(my $line1=)) { - chomp $line1; - if($line1 =~ /^\< \$ /){ # arg command line - $line1 =~ s/\${EXEEXT:=}//g; - $line1 =~ s/^\< \$\ *//g; - $line1 =~ s|^./lua|lua|g; - $line1 =~ s|^./ruby|ruby|g; - $line1 =~ s|^./||g; - $line1 =~ s|^tesh|./tesh|g; - $line1 =~ s/\(%i:%P@%h\)/\\\(%i:%P@%h\\\)/g; - $command_tesh = $line1; - print "[Tesh/INFO] arg_exec_line : $command_tesh\n"; +eval { + use POSIX; + sub exit_status { + my $status = shift; + if (POSIX::WIFEXITED($status)) { + return "returned code ".POSIX::WEXITSTATUS($status); + } elsif (POSIX::WIFSIGNALED($status)) { + return "got signal ".$SIG{POSIX::WTERMSIG($status)}; + } + return "Unparsable status. Is the process stopped?"; } - elsif($line1 =~ /^\< \< /){ # arg buffer line - $line1 =~ s/^\< \<\ *//g; - print "[Tesh/INFO] arg_buffer_line : $line1\n"; - push @buffer_tesh, "$line1\n"; +}; +if ($@) { # no POSIX available? + warn "POSIX not usable to parse the return value of forked child: $@\n"; + sub exit_status { + return "returned code 0"; } - elsif($line1 =~ /^\< \> /){ # arg output line - $line1 =~ s/^\< \>\ *//g; - $line1 =~ s/\r//g; - push @list2, $line1; - print "[Tesh/INFO] arg_output_line : $line1\n"; - $expected_result_line = 1; +} + +sub exec_cmd { + my %cmd = %{$_[0]}; + if ($opts{'debug'}) { + print "IN BEGIN\n"; + map {print " $_"} @{$cmd{'in'}}; + print "IN END\n"; + print "OUT BEGIN\n"; + map {print " $_"} @{$cmd{'out'}}; + print "OUT END\n"; + print "CMD: $cmd{'cmd'}\n"; } - elsif($line1 =~ /^\$ mkfile/){ # "mkfile" command line - $line1 =~ s/\$ //g; - $line1 =~ s/mkfile//g; - $fich_name = $line1; - $line1 =(); - print "[Tesh/INFO] exec_line : mkfile $fich_name\n"; - `rm -f $fich_name`; - open(FILE,">$fich_name") or die "[Tesh/CRITICAL] Unable to make file : $fich_name. $!\n"; - print FILE @buffer; - close(FILE); - @buffer = (); + + # cleanup the command line + $cmd{'cmd'} =~ s/\${EXEEXT:=}//g; + $cmd{'cmd'} =~ s|^\./||g; +# $cmd{'cmd'} =~ s|tesh|tesh.pl|g; + $cmd{'cmd'} =~ s/\(%i:%P@%h\)/\\\(%i:%P@%h\\\)/g; + $cmd{'cmd'} .= " $opts{'cfg'}" if (defined($opts{'cfg'}) && length($opts{'cfg'})); + + print "[$cmd{'file'}:$cmd{'line'}] $cmd{'cmd'}\n"; + + ### + # exec the command line + ### + $pid = open3(\*IN, \*OUT, \*OUT, $cmd{'cmd'} ); + + # if timeout specified, fork and kill executing child at the end of timeout + if ($timeout){ + $forked = fork(); + die "fork() failed: $!" unless defined $forked; + if ( $forked == 0 ) { # child + sleep $timeout; + kill(9, $pid); + exit; + } } - elsif($line1 =~ /^\$ cd/){ # "cd" command line - $line1 =~ s/\$ //g; - print "[Tesh/INFO] exec_line : $line1\n"; - $line1 =~ s/cd //g; - chdir("$line1") or die "[Tesh/CRITICAL] Unable to open $line1. $!\n"; + + # push all provided input to executing child + map { print IN "$_\n" } $cmd{'in'}; + close IN; + + # pop all output from executing child + my @got; + while(defined(my $got=)) { + $got =~ s/\r//g; + $got =~ s/^( )*//g; + chomp $got; + push @got, "$got"; + } + close OUT; + + # Cleanup the executing child, and kill the timeouter brother on need + $cmd{'return'} = 0 unless defined($cmd{'return'}); + my $wantret = "returned code ".(defined($cmd{'return'})? $cmd{'return'} : 0); + waitpid ($pid, 0); + my $gotret = exit_status($?); + if($gotret ne $wantret) { + my $msg = "Test suite `$cmd{'file'}': NOK (<$cmd{'file'}:$cmd{'line'}> $gotret)\n". + "Output of <$cmd{'file'}:$cmd{'line'}> so far:\n"; + map {$msg .= "|| $_\n"} @got; + print STDERR "$msg"; + exit(1); } - elsif($line1 =~ /^\$ /){ #command line - if($line1 =~ m|^\$ ./tesh|){ # tesh command line - $tesh_command = 1; - @buffer = @buffer_tesh; - @buffer_tesh=(); - } - $line1 =~ s/\${EXEEXT:=}//g; - $line1 =~ s/^\$\ *//g; - $line1 =~ s|^./lua|lua|g; - $line1 =~ s|^./ruby|ruby|g; - $line1 =~ s|^./||g; - $line1 =~ s|^tesh|./tesh|g; - $line1 =~ s/\(%i:%P@%h\)/\\\(%i:%P@%h\\\)/g; - $line1 = "$line1 $config"; - - if(@list1){ - print color("red"); - print "[Tesh/CRITICAL] -\n"; - print "[Tesh/CRITICAL] + @list1"; - print color("reset"), "\n"; - die;} - if(@list_of_commands){ # need parallel execution - push @list_of_commands, $line1; - print "[Tesh/INFO] exec_line : $line1\n"; - } - else{ - print "[Tesh/INFO] exec_line : $line1\n"; - if($tesh_command) - { $execline = $command_tesh; - $tesh_command=0;} - else - { $execline = $line1;} - $pid = open3(\*IN, \*OUT, \*OUT, $execline ); - if( $timeout){ - $forked = fork();die "fork() failed: $!" unless defined $forked; - if ( $forked == 0 ) - { - sleep $timeout; - kill(9, $pid); - exit; - } - } - - while(@buffer) - { - $line1 = shift (@buffer); - print IN $line1; - } - close IN ; - - waitpid( $pid, 0 ); - if($timeout){kill(9, $forked);$timeout=0;} - $timeout = 0; + if($timeout){kill(9, $forked);$timeout=0;} + $timeout = 0; - while(defined($linebis=)) - { - $linebis =~ s/\r//g; - $linebis =~ s/^( )*//g; - chomp $linebis; - push @list1,"$linebis"; - } - close OUT; - $command_executed = 1; - } + ### + # Check the result of execution + ### + my $diff = build_diff(\@{$cmd{'out'}}, \@got); + if (length $diff) { + print color("red")."[TESH/CRITICAL$$] Output mismatch\n"; + map { print "[TESH/CRITICAL] $_\n" } split(/\n/,$diff); + print color("reset"); + die "Tesh failed\n"; } - elsif($line1 =~ /^\& /){ # parallel command line - $command_executed = 0; - $expected_result_line = 0; - $line1 =~ s/\${EXEEXT:=}//g; - $line1 =~ s/^\& //g; - $line1 =~ s|^./lua|lua|g; - $line1 =~ s|^./ruby|ruby|g; - $line1 =~ s|^./||g; - $line1 =~ s/\(%i:%P@%h\)/\\\(%i:%P@%h\\\)/g; - $line1 = "$line1 $config"; - - $execline = "$line1"; - print "[Tesh/INFO] exec_line : $execline\n"; - push @list_of_commands, $execline; - } - elsif($line1 =~ /^\>/){ #expected result line - $line1 =~ s/^\>( )*//g; - $line1 =~ s/\r//g; - push @list2, $line1; - $expected_result_line = 1; +} + +sub mkfile_cmd { + my %cmd = %{$_[0]}; + my $file = $cmd{'arg'}; + print "[Tesh/INFO] mkfile $file\n"; + + die "[TESH/CRITICAL] no input provided to mkfile\n" unless defined($cmd{'in'}) && scalar @{$cmd{'in'}}; + unlink($file); + open(FILE,">$file") or die "[Tesh/CRITICAL] Unable to create file $file: $!\n"; + print FILE join("\n", @{$cmd{'in'}}); + print FILE "\n" if (scalar @{$cmd{'in'}} > 0); + close(FILE); +} + +# parse tesh file +print "Test suite $tesh_file\n"; +open TESH_FILE, $tesh_file or die "[Tesh/CRITICAL] Unable to open $tesh_file $!\n"; + + +my %cmd; # everything about the next command to run +my $line_num=0; +LINE: while (defined(my $line=)) { + $line_num++; + chomp $line; + print "[TESH/debug] $line_num: $line\n" if $opts{'debug'}; + + # deal with line continuations + while ($line =~ /^(.*?)\\$/) { + my $next=; + die "[TESH/CRITICAL] Continued line at end of file\n" + unless defined($next); + chomp $next; + print "[TESH/debug] $line_num: $next\n" if $opts{'debug'}; + $line = $1.$next; } - elsif($line1 =~ /^\ '){ #expected result line + print "[TESH/debug] push expected result\n" if $opts{'debug'}; + push @{$cmd{'out'}}, $arg; + + } elsif ($cmd eq '< ') { # provided input + print "[TESH/debug] push provided input\n" if $opts{'debug'}; + push @{$cmd{'in'}}, $arg; + + } elsif ($cmd eq 'p ') { # comment + print "[Tesh/INFO] $arg\n"; + + } elsif ($cmd eq '$ ') { # Command + # if we have something buffered, run it now + if (defined($cmd{'cmd'})) { + exec_cmd(\%cmd); + %cmd = (); } - else - { - if($encore == 1){push @buffer, "$old_buffer$line1";} - else{push @buffer, "$line1\n";} - $old_buffer = (); - $encore = 0; + if ($arg =~ /^ *mkfile /){ # "mkfile" command line + die "[TESH/CRITICAL] Output expected from mkfile command!\n" if scalar @{cmd{'out'}}; + + $cmd{'arg'} = $arg; + $cmd{'arg'} =~ s/ *mkfile //; + mkfile_cmd(\%cmd); + %cmd = (); + + } elsif ($arg =~ /^ *cd /) { + die "[TESH/CRITICAL] Input provided to cd command!\n" if scalar @{cmd{'in'}}; + die "[TESH/CRITICAL] Output expected from cd command!\n" if scalar @{cmd{'out'}}; + + $arg =~ s/^ *cd //; + cd_cmd("",$arg); + %cmd = (); + + } else { # regular command + $cmd{'cmd'} = $arg; + $cmd{'file'} = $tesh_file; + $cmd{'line'} = $line_num; } } - elsif($line1 =~ /^p/){ #comment - $line1 =~ s/^p //g; - $line1 =~ s/\r//g; - print "[Tesh/INFO] comment_line :$line1\n"; - } - elsif($line1 =~ /^! output sort/){ #output sort - $sort=1; + elsif($cmd eq '& '){ # parallel command line + $cmd{'background'} = 1; + $cmd{'cmd'} = $arg; + } + elsif($line =~ /^! output sort/){ #output sort + $cmd{'sort'} = 1; } - elsif($line1 =~ /^! output ignore/){ #output ignore - $no_output_ignore=0; + elsif($line =~ /^! output ignore/){ #output ignore + $cmd{'output ignore'} = 1; } - elsif($line1 =~ /^! expect signal SIGABRT$/) #expect signal SIGABRT - { - $SIGABRT=1; + elsif($line =~ /^! expect signal SIGABRT$/) {#expect signal SIGABRT + $cmd{'expect'} = "SIGABRT"; } - elsif($line1 =~ /^! expect return/){ #expect return - $line1 =~ s/^! expect return //g; - $line1 =~ s/\r//g; - $return = $line1; - print color("red"), "[Tesh/INFO] expect return $return"; - print color("reset"), "\n"; - die; + elsif($line =~ /^! expect return/){ #expect return + $line =~ s/^! expect return //g; + $line =~ s/\r//g; + $cmd{'return'} = $line; } - elsif($line1 =~ /^! setenv/){ #setenv - $line1 =~ s/^! setenv //g; - $line1 =~ s/\r//g; - $line1 =~ /^(.*)=(.*)$/; - $ENV{$1} = $2; - print "[Tesh/INFO] setenv: $1=$2\n"; + elsif($line =~ /^! setenv/){ #setenv + $line =~ s/^! setenv //g; + $line =~ s/\r//g; + setenv_cmd($line); } - elsif($line1 =~ /^! include/){ #output sort + elsif($line =~ /^! include/){ #output sort print color("red"), "[Tesh/CRITICAL] need include"; print color("reset"), "\n"; die; } - elsif($line1 =~ /^! timeout/){ #timeout - $line1 =~ s/^! timeout //g; - $line1 =~ s/\r//g; - if($timeout != $line1){ - $timeout = $line1; - print "[Tesh/INFO] timeout : $timeout\n";} - } - elsif($line1 =~ /^! need execute/){ #need execute // commands - $execline = (); - $sort = 1; # need sort output - while(@list_of_commands) - { - $command = shift (@list_of_commands); - if($execline){$execline = "$command & $execline";} - else{$execline = "$command";} - } - $pid = open3(\*IN, \*OUT, \*OUT, $execline); - if( $timeout){ - $forked = fork();die "fork() failed: $!" unless defined $forked; - if ( $forked == 0 ) - { - sleep $timeout; - kill(9, $pid); - exit; - } - } - - while(@buffer) - { - $line1 = shift (@buffer); - print IN $line1; - } - close IN ; - waitpid( $pid, 0 ); - if($timeout){kill(9, $forked);$timeout=0;} - $timeout = 0; - - @list1=(); - while(defined($linebis=)) - { - $linebis =~ s/\r//g; - $linebis =~ s/^( )*//g; - chomp $linebis; - push @list1,"$linebis"; - } - close OUT; - $command_executed = 1; - } - elsif($command_executed and $expected_result_line) - { - if($no_output_ignore){ - @buffer = (); - if($sort == 1) - { - @list3 = sort @list1; - @list1=(); - @list1=@list3; - @list3=(); - - @list3 = sort @list2; - @list2=(); - @list2=@list3; - @list3=(); - - $sort=0; - } - if($SIGABRT) - { - push @list2,"Aborted"; - $SIGABRT = 0; - } - - while(@list1 or @list2) - { - if(@list1){$line1 = shift (@list1);$expected_result_line = "x$line1";} - if(@list2){$line2 = shift (@list2);$command_executed = "x$line2";} - if($command_executed and $expected_result_line) - { - - if($line1 eq $line2){ - if($verbose == 1){print color("green"),"[Tesh/VERB] $line1\n",color("reset");} - else{push @buffer, "[Tesh/CRITICAL] $line1\n";} - - } - else - { if($verbose == 0){print color("green"),@buffer,color("reset");} - if($line2) {print color("red"), "[Tesh/CRITICAL] - $line2",color("reset"),"\n";} - if($line1) {print color("red"), "[Tesh/CRITICAL] + $line1",color("reset"),"\n";} - die; - } - } - else - { if($verbose == 0){print color("green"),@buffer,color("reset");} - if($line2) {print color("red"), "[Tesh/CRITICAL] - $line2",color("reset"),"\n";} - if($line1) {print color("red"), "[Tesh/CRITICAL] + $line1",color("reset"),"\n";} - die; - } - } - }else{$no_output_ignore = 1;} - $command_executed = 0; - $expected_result_line = 0; - @list1=(); - @list2=(); - @buffer = (); - $tesh_command=0; - @buffer_tesh=(); + elsif($line =~ /^! timeout/){ #timeout + $line =~ s/^! timeout //; + $line =~ s/\r//g; + $cmd{'timeout'} = $line; + } else { + die "[TESH/CRITICAL] parse error: $line\n"; } - } -if(@list_of_commands){ # need parallel execution - $execline = (); - $sort = 1; # need sort output - while(@list_of_commands) - { - $command = shift (@list_of_commands); - if($execline){$execline = "$command & $execline";} - else{$execline = "$command";} - } - print "[Tesh/INFO] exec_line : $execline\n"; - $pid = open3(\*IN, \*OUT, \*OUT,"$execline"); - - if( $timeout){ - $forked = fork();die "fork() failed: $!" unless defined $forked; - if ( $forked == 0 ) - { - sleep $timeout; - kill(9, $pid); - exit; - } - } +# Deal with last command +if (defined($cmd{'cmd'})) { + exec_cmd(\%cmd); + %cmd = (); +} + +#my (@a,@b); +#push @a,"bl1"; push @b,"bl1"; +#push @a,"bl2"; push @b,"bl2"; +#push @a,"bl3"; push @b,"bl3"; +#push @a,"bl4"; push @b,"bl4"; +#push @a,"bl5"; push @b,"bl5"; +#push @a,"bl6"; push @b,"bl6"; +#push @a,"bl7"; push @b,"bl7"; +##push @a,"Perl"; push @b,"ruby"; +#push @a,"END1"; push @b,"END1"; +#push @a,"END2"; push @b,"END2"; +#push @a,"END3"; push @b,"END3"; +#push @a,"END4"; push @b,"END4"; +#push @a,"END5"; push @b,"END5"; +#push @a,"END6"; push @b,"END6"; +#push @a,"END7"; push @b,"END7"; +#print "Identical:\n". build_diff(\@a,\@b); + +#@a = (); @b =(); +#push @a,"AZE"; push @b,"EZA"; +#print "Different:\n".build_diff(\@a,\@b); + +use Diff qw(diff); # postpone a bit to have time to change INC + +sub build_diff { + my $res; + my $diff = Diff->new(@_); - while(@buffer) - { - $line1 = shift (@buffer); - print IN $line1; + $diff->Base( 1 ); # Return line numbers, not indices + my $chunk_count = $diff->Next(-1); # Compute the amount of chuncks + return "" if ($chunk_count == 1 && $diff->Same()); + $diff->Reset(); + while( $diff->Next() ) { + my @same = $diff->Same(); + if ($diff->Same() ) { + if ($diff->Next(0) > 1) { # not first chunk: print 2 first lines + $res .= ' '.$same[0]."\n" ; + $res .= ' '.$same[1]."\n" if (scalar @same>1); + } + $res .= "...\n" if (scalar @same>2); +# $res .= $diff->Next(0)."/$chunk_count\n"; + if ($diff->Next(0) < $chunk_count) { # not last chunk: print 2 last lines + $res .= ' '.$same[scalar @same -2]."\n" if (scalar @same>1); + $res .= ' '.$same[scalar @same -1]."\n"; + } + } + next if $diff->Same(); + map { $res .= "- $_\n" } $diff->Items(1); + map { $res .= "+ $_\n" } $diff->Items(2); } - close IN ; - waitpid( $pid, 0 ); - if($timeout){kill(9, $forked);$timeout=0;} - $timeout = 0; - - @list1=(); - while(defined($linebis=)) - { - $linebis =~ s/\r//g; - $linebis =~ s/^( )*//g; - chomp $linebis; - push @list1,"$linebis"; - } - close OUT; - $command_executed = 1; + return $res; } -if($command_executed and $expected_result_line ){ - if($no_output_ignore){ - @buffer = (); - if($sort == 1) - { - @list3 = sort @list1; - @list1=(); - @list1=@list3; - @list3=(); - - @list3 = sort @list2; - @list2=(); - @list2=@list3; - @list3=(); - - $sort=0; - } - if($SIGABRT) - { - push @list2,"Aborted"; - $SIGABRT = 0; - } - - while(@list1 or @list2) - { - if(@list1){$line1 = shift (@list1);$expected_result_line = "x$line1";} - if(@list2){$line2 = shift (@list2);$command_executed = "x$line2";} - if($command_executed and $expected_result_line) - { - if($line1 eq $line2){ - if($verbose == 1){print color("green"),"[Tesh/VERB] $line1\n",color("reset");} - else{push @buffer, "[Tesh/CRITICAL] $line1\n";} - - } - else - { if($verbose == 0){print color("green"),@buffer,color("reset");} - if($line2) {print color("red"), "[Tesh/CRITICAL] - $line2",color("reset"),"\n";} - if($line1) {print color("red"), "[Tesh/CRITICAL] + $line1",color("reset"),"\n";} - die; - } - } - else - { if($verbose == 0){print color("green"),@buffer,color("reset");} - if($line2) {print color("red"), "[Tesh/CRITICAL] - $line2",color("reset"),"\n";} - if($line1) {print color("red"), "[Tesh/CRITICAL] + $line1",color("reset"),"\n";} - die; - } - } - }else{$no_output_ignore = 1;} - $command_executed = 0; - $expected_result_line= 0; - @list1=(); - @list2=(); - @buffer = (); -} + -- 2.20.1