From 789fda2379b532c01a06401d26ad2e4d9725c789 Mon Sep 17 00:00:00 2001 From: Martin Quinson Date: Fri, 2 Sep 2016 14:51:42 +0200 Subject: [PATCH] remove the dependencies of tesh.pl --- appveyor.yml | 2 - tools/cmake/DefinePackages.cmake | 8 - tools/cmake/scripts/Diff.pm | 1713 -------- tools/cmake/scripts/IPC/Run.pm | 4418 -------------------- tools/cmake/scripts/IPC/Run/Debug.pm | 312 -- tools/cmake/scripts/IPC/Run/IO.pm | 584 --- tools/cmake/scripts/IPC/Run/Timer.pm | 690 --- tools/cmake/scripts/IPC/Run/Win32Helper.pm | 489 --- tools/cmake/scripts/IPC/Run/Win32IO.pm | 576 --- tools/cmake/scripts/IPC/Run/Win32Pump.pm | 170 - 10 files changed, 8962 deletions(-) delete mode 100644 tools/cmake/scripts/Diff.pm delete mode 100644 tools/cmake/scripts/IPC/Run.pm delete mode 100644 tools/cmake/scripts/IPC/Run/Debug.pm delete mode 100644 tools/cmake/scripts/IPC/Run/IO.pm delete mode 100644 tools/cmake/scripts/IPC/Run/Timer.pm delete mode 100644 tools/cmake/scripts/IPC/Run/Win32Helper.pm delete mode 100644 tools/cmake/scripts/IPC/Run/Win32IO.pm delete mode 100644 tools/cmake/scripts/IPC/Run/Win32Pump.pm diff --git a/appveyor.yml b/appveyor.yml index 0e7ad9518d..80247d4825 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -21,8 +21,6 @@ install: # Strawberry perl is the one to work with gcc - choco install --limit-output strawberryperl -version 5.20.1.1 - SET PATH=C:\strawberry\c\bin;C:\strawberry\perl\site\bin;C:\strawberry\perl\bin;%PATH% -- cpanm --notest --quiet Win32API::File -- cpanm --notest --quiet IPC::Run # Lua #- choco install lua53 # Basic dependencies: gcc and Cmake (must be placed before Perl in the path) diff --git a/tools/cmake/DefinePackages.cmake b/tools/cmake/DefinePackages.cmake index 6abafa3d5e..ba6c3c026e 100644 --- a/tools/cmake/DefinePackages.cmake +++ b/tools/cmake/DefinePackages.cmake @@ -1010,14 +1010,6 @@ set(CMAKE_SOURCE_FILES tools/cmake/Modules/FindSimGrid.cmake tools/cmake/Modules/FindValgrind.cmake tools/cmake/Option.cmake - tools/cmake/scripts/IPC/Run.pm - tools/cmake/scripts/IPC/Run/Debug.pm - tools/cmake/scripts/IPC/Run/IO.pm - tools/cmake/scripts/IPC/Run/Timer.pm - tools/cmake/scripts/IPC/Run/Win32Helper.pm - tools/cmake/scripts/IPC/Run/Win32IO.pm - tools/cmake/scripts/IPC/Run/Win32Pump.pm - tools/cmake/scripts/Diff.pm tools/cmake/scripts/my_valgrind.pl tools/cmake/scripts/update_tesh.pl tools/cmake/UnitTesting.cmake diff --git a/tools/cmake/scripts/Diff.pm b/tools/cmake/scripts/Diff.pm deleted file mode 100644 index 5069c220dc..0000000000 --- a/tools/cmake/scripts/Diff.pm +++ /dev/null @@ -1,1713 +0,0 @@ -package Diff; -# Skip to first "=head" line for documentation. -use strict; - -use integer; # see below in _replaceNextLargerWith() for mod to make - # if you don't use this -use vars qw( $VERSION @EXPORT_OK ); -$VERSION = 1.19_02; -# ^ ^^ ^^-- Incremented at will -# | \+----- Incremented for non-trivial changes to features -# \-------- Incremented for fundamental changes -require Exporter; -*import = \&Exporter::import; -@EXPORT_OK = qw( - prepare LCS LCSidx LCS_length - diff sdiff compact_diff - traverse_sequences traverse_balanced -); - -# McIlroy-Hunt diff algorithm -# Adapted from the Smalltalk code of Mario I. Wolczko, -# by Ned Konz, perl@bike-nomad.com -# Updates by Tye McQueen, http://perlmonks.org/?node=tye - -# Create a hash that maps each element of $aCollection to the set of -# positions it occupies in $aCollection, restricted to the elements -# within the range of indexes specified by $start and $end. -# The fourth parameter is a subroutine reference that will be called to -# generate a string to use as a key. -# Additional parameters, if any, will be passed to this subroutine. -# -# my $hashRef = _withPositionsOfInInterval( \@array, $start, $end, $keyGen ); - -sub _withPositionsOfInInterval -{ - my $aCollection = shift; # array ref - my $start = shift; - my $end = shift; - my $keyGen = shift; - my %d; - my $index; - for ( $index = $start ; $index <= $end ; $index++ ) - { - my $element = $aCollection->[$index]; - my $key = &$keyGen( $element, @_ ); - if ( exists( $d{$key} ) ) - { - unshift ( @{ $d{$key} }, $index ); - } - else - { - $d{$key} = [$index]; - } - } - return wantarray ? %d : \%d; -} - -# Find the place at which aValue would normally be inserted into the -# array. If that place is already occupied by aValue, do nothing, and -# return undef. If the place does not exist (i.e., it is off the end of -# the array), add it to the end, otherwise replace the element at that -# point with aValue. It is assumed that the array's values are numeric. -# This is where the bulk (75%) of the time is spent in this module, so -# try to make it fast! - -sub _replaceNextLargerWith -{ - my ( $array, $aValue, $high ) = @_; - $high ||= $#$array; - - # off the end? - if ( $high == -1 || $aValue > $array->[-1] ) - { - push ( @$array, $aValue ); - return $high + 1; - } - - # binary search for insertion point... - my $low = 0; - my $index; - my $found; - while ( $low <= $high ) - { - $index = ( $high + $low ) / 2; - - # $index = int(( $high + $low ) / 2); # without 'use integer' - $found = $array->[$index]; - - if ( $aValue == $found ) - { - return undef; - } - elsif ( $aValue > $found ) - { - $low = $index + 1; - } - else - { - $high = $index - 1; - } - } - - # now insertion point is in $low. - $array->[$low] = $aValue; # overwrite next larger - return $low; -} - -# This method computes the longest common subsequence in $a and $b. - -# Result is array or ref, whose contents is such that -# $a->[ $i ] == $b->[ $result[ $i ] ] -# foreach $i in ( 0 .. $#result ) if $result[ $i ] is defined. - -# An additional argument may be passed; this is a hash or key generating -# function that should return a string that uniquely identifies the given -# element. It should be the case that if the key is the same, the elements -# will compare the same. If this parameter is undef or missing, the key -# will be the element as a string. - -# By default, comparisons will use "eq" and elements will be turned into keys -# using the default stringizing operator '""'. - -# Additional parameters, if any, will be passed to the key generation -# routine. - -sub _longestCommonSubsequence -{ - my $a = shift; # array ref or hash ref - my $b = shift; # array ref or hash ref - my $counting = shift; # scalar - my $keyGen = shift; # code ref - my $compare; # code ref - - if ( ref($a) eq 'HASH' ) - { # prepared hash must be in $b - my $tmp = $b; - $b = $a; - $a = $tmp; - } - - # Check for bogus (non-ref) argument values - if ( !ref($a) || !ref($b) ) - { - my @callerInfo = caller(1); - die 'error: must pass array or hash references to ' . $callerInfo[3]; - } - - # set up code refs - # Note that these are optimized. - if ( !defined($keyGen) ) # optimize for strings - { - $keyGen = sub { $_[0] }; - $compare = sub { my ( $a, $b ) = @_; $a eq $b }; - } - else - { - $compare = sub { - my $a = shift; - my $b = shift; - &$keyGen( $a, @_ ) eq &$keyGen( $b, @_ ); - }; - } - - my ( $aStart, $aFinish, $matchVector ) = ( 0, $#$a, [] ); - my ( $prunedCount, $bMatches ) = ( 0, {} ); - - if ( ref($b) eq 'HASH' ) # was $bMatches prepared for us? - { - $bMatches = $b; - } - else - { - my ( $bStart, $bFinish ) = ( 0, $#$b ); - - # First we prune off any common elements at the beginning - while ( $aStart <= $aFinish - and $bStart <= $bFinish - and &$compare( $a->[$aStart], $b->[$bStart], @_ ) ) - { - $matchVector->[ $aStart++ ] = $bStart++; - $prunedCount++; - } - - # now the end - while ( $aStart <= $aFinish - and $bStart <= $bFinish - and &$compare( $a->[$aFinish], $b->[$bFinish], @_ ) ) - { - $matchVector->[ $aFinish-- ] = $bFinish--; - $prunedCount++; - } - - # Now compute the equivalence classes of positions of elements - $bMatches = - _withPositionsOfInInterval( $b, $bStart, $bFinish, $keyGen, @_ ); - } - my $thresh = []; - my $links = []; - - my ( $i, $ai, $j, $k ); - for ( $i = $aStart ; $i <= $aFinish ; $i++ ) - { - $ai = &$keyGen( $a->[$i], @_ ); - if ( exists( $bMatches->{$ai} ) ) - { - $k = 0; - for $j ( @{ $bMatches->{$ai} } ) - { - - # optimization: most of the time this will be true - if ( $k and $thresh->[$k] > $j and $thresh->[ $k - 1 ] < $j ) - { - $thresh->[$k] = $j; - } - else - { - $k = _replaceNextLargerWith( $thresh, $j, $k ); - } - - # oddly, it's faster to always test this (CPU cache?). - if ( defined($k) ) - { - $links->[$k] = - [ ( $k ? $links->[ $k - 1 ] : undef ), $i, $j ]; - } - } - } - } - - if (@$thresh) - { - return $prunedCount + @$thresh if $counting; - for ( my $link = $links->[$#$thresh] ; $link ; $link = $link->[0] ) - { - $matchVector->[ $link->[1] ] = $link->[2]; - } - } - elsif ($counting) - { - return $prunedCount; - } - - return wantarray ? @$matchVector : $matchVector; -} - -sub traverse_sequences -{ - my $a = shift; # array ref - my $b = shift; # array ref - my $callbacks = shift || {}; - my $keyGen = shift; - my $matchCallback = $callbacks->{'MATCH'} || sub { }; - my $discardACallback = $callbacks->{'DISCARD_A'} || sub { }; - my $finishedACallback = $callbacks->{'A_FINISHED'}; - my $discardBCallback = $callbacks->{'DISCARD_B'} || sub { }; - my $finishedBCallback = $callbacks->{'B_FINISHED'}; - my $matchVector = _longestCommonSubsequence( $a, $b, 0, $keyGen, @_ ); - - # Process all the lines in @$matchVector - my $lastA = $#$a; - my $lastB = $#$b; - my $bi = 0; - my $ai; - - for ( $ai = 0 ; $ai <= $#$matchVector ; $ai++ ) - { - my $bLine = $matchVector->[$ai]; - if ( defined($bLine) ) # matched - { - &$discardBCallback( $ai, $bi++, @_ ) while $bi < $bLine; - &$matchCallback( $ai, $bi++, @_ ); - } - else - { - &$discardACallback( $ai, $bi, @_ ); - } - } - - # The last entry (if any) processed was a match. - # $ai and $bi point just past the last matching lines in their sequences. - - while ( $ai <= $lastA or $bi <= $lastB ) - { - - # last A? - if ( $ai == $lastA + 1 and $bi <= $lastB ) - { - if ( defined($finishedACallback) ) - { - &$finishedACallback( $lastA, @_ ); - $finishedACallback = undef; - } - else - { - &$discardBCallback( $ai, $bi++, @_ ) while $bi <= $lastB; - } - } - - # last B? - if ( $bi == $lastB + 1 and $ai <= $lastA ) - { - if ( defined($finishedBCallback) ) - { - &$finishedBCallback( $lastB, @_ ); - $finishedBCallback = undef; - } - else - { - &$discardACallback( $ai++, $bi, @_ ) while $ai <= $lastA; - } - } - - &$discardACallback( $ai++, $bi, @_ ) if $ai <= $lastA; - &$discardBCallback( $ai, $bi++, @_ ) if $bi <= $lastB; - } - - return 1; -} - -sub traverse_balanced -{ - my $a = shift; # array ref - my $b = shift; # array ref - my $callbacks = shift || {}; - my $keyGen = shift; - my $matchCallback = $callbacks->{'MATCH'} || sub { }; - my $discardACallback = $callbacks->{'DISCARD_A'} || sub { }; - my $discardBCallback = $callbacks->{'DISCARD_B'} || sub { }; - my $changeCallback = $callbacks->{'CHANGE'}; - my $matchVector = _longestCommonSubsequence( $a, $b, 0, $keyGen, @_ ); - - # Process all the lines in match vector - my $lastA = $#$a; - my $lastB = $#$b; - my $bi = 0; - my $ai = 0; - my $ma = -1; - my $mb; - - while (1) - { - - # Find next match indices $ma and $mb - do { - $ma++; - } while( - $ma <= $#$matchVector - && !defined $matchVector->[$ma] - ); - - last if $ma > $#$matchVector; # end of matchVector? - $mb = $matchVector->[$ma]; - - # Proceed with discard a/b or change events until - # next match - while ( $ai < $ma || $bi < $mb ) - { - - if ( $ai < $ma && $bi < $mb ) - { - - # Change - if ( defined $changeCallback ) - { - &$changeCallback( $ai++, $bi++, @_ ); - } - else - { - &$discardACallback( $ai++, $bi, @_ ); - &$discardBCallback( $ai, $bi++, @_ ); - } - } - elsif ( $ai < $ma ) - { - &$discardACallback( $ai++, $bi, @_ ); - } - else - { - - # $bi < $mb - &$discardBCallback( $ai, $bi++, @_ ); - } - } - - # Match - &$matchCallback( $ai++, $bi++, @_ ); - } - - while ( $ai <= $lastA || $bi <= $lastB ) - { - if ( $ai <= $lastA && $bi <= $lastB ) - { - - # Change - if ( defined $changeCallback ) - { - &$changeCallback( $ai++, $bi++, @_ ); - } - else - { - &$discardACallback( $ai++, $bi, @_ ); - &$discardBCallback( $ai, $bi++, @_ ); - } - } - elsif ( $ai <= $lastA ) - { - &$discardACallback( $ai++, $bi, @_ ); - } - else - { - - # $bi <= $lastB - &$discardBCallback( $ai, $bi++, @_ ); - } - } - - return 1; -} - -sub prepare -{ - my $a = shift; # array ref - my $keyGen = shift; # code ref - - # set up code ref - $keyGen = sub { $_[0] } unless defined($keyGen); - - return scalar _withPositionsOfInInterval( $a, 0, $#$a, $keyGen, @_ ); -} - -sub LCS -{ - my $a = shift; # array ref - my $b = shift; # array ref or hash ref - my $matchVector = _longestCommonSubsequence( $a, $b, 0, @_ ); - my @retval; - my $i; - for ( $i = 0 ; $i <= $#$matchVector ; $i++ ) - { - if ( defined( $matchVector->[$i] ) ) - { - push ( @retval, $a->[$i] ); - } - } - return wantarray ? @retval : \@retval; -} - -sub LCS_length -{ - my $a = shift; # array ref - my $b = shift; # array ref or hash ref - return _longestCommonSubsequence( $a, $b, 1, @_ ); -} - -sub LCSidx -{ - my $a= shift @_; - my $b= shift @_; - my $match= _longestCommonSubsequence( $a, $b, 0, @_ ); - my @am= grep defined $match->[$_], 0..$#$match; - my @bm= @{$match}[@am]; - return \@am, \@bm; -} - -sub compact_diff -{ - my $a= shift @_; - my $b= shift @_; - my( $am, $bm )= LCSidx( $a, $b, @_ ); - my @cdiff; - my( $ai, $bi )= ( 0, 0 ); - push @cdiff, $ai, $bi; - while( 1 ) { - while( @$am && $ai == $am->[0] && $bi == $bm->[0] ) { - shift @$am; - shift @$bm; - ++$ai, ++$bi; - } - push @cdiff, $ai, $bi; - last if ! @$am; - $ai = $am->[0]; - $bi = $bm->[0]; - push @cdiff, $ai, $bi; - } - push @cdiff, 0+@$a, 0+@$b - if $ai < @$a || $bi < @$b; - return wantarray ? @cdiff : \@cdiff; -} - -sub diff -{ - my $a = shift; # array ref - my $b = shift; # array ref - my $retval = []; - my $hunk = []; - my $discard = sub { - push @$hunk, [ '-', $_[0], $a->[ $_[0] ] ]; - }; - my $add = sub { - push @$hunk, [ '+', $_[1], $b->[ $_[1] ] ]; - }; - my $match = sub { - push @$retval, $hunk - if 0 < @$hunk; - $hunk = [] - }; - traverse_sequences( $a, $b, - { MATCH => $match, DISCARD_A => $discard, DISCARD_B => $add }, @_ ); - &$match(); - return wantarray ? @$retval : $retval; -} - -sub sdiff -{ - my $a = shift; # array ref - my $b = shift; # array ref - my $retval = []; - my $discard = sub { push ( @$retval, [ '-', $a->[ $_[0] ], "" ] ) }; - my $add = sub { push ( @$retval, [ '+', "", $b->[ $_[1] ] ] ) }; - my $change = sub { - push ( @$retval, [ 'c', $a->[ $_[0] ], $b->[ $_[1] ] ] ); - }; - my $match = sub { - push ( @$retval, [ 'u', $a->[ $_[0] ], $b->[ $_[1] ] ] ); - }; - traverse_balanced( - $a, - $b, - { - MATCH => $match, - DISCARD_A => $discard, - DISCARD_B => $add, - CHANGE => $change, - }, - @_ - ); - return wantarray ? @$retval : $retval; -} - -######################################## -my $Root= __PACKAGE__; -package Algorithm::Diff::_impl; -use strict; - -sub _Idx() { 0 } # $me->[_Idx]: Ref to array of hunk indices - # 1 # $me->[1]: Ref to first sequence - # 2 # $me->[2]: Ref to second sequence -sub _End() { 3 } # $me->[_End]: Diff between forward and reverse pos -sub _Same() { 4 } # $me->[_Same]: 1 if pos 1 contains unchanged items -sub _Base() { 5 } # $me->[_Base]: Added to range's min and max -sub _Pos() { 6 } # $me->[_Pos]: Which hunk is currently selected -sub _Off() { 7 } # $me->[_Off]: Offset into _Idx for current position -sub _Min() { -2 } # Added to _Off to get min instead of max+1 - -sub Die -{ - require Carp; - Carp::confess( @_ ); -} - -sub _ChkPos -{ - my( $me )= @_; - return if $me->[_Pos]; - my $meth= ( caller(1) )[3]; - Die( "Called $meth on 'reset' object" ); -} - -sub _ChkSeq -{ - my( $me, $seq )= @_; - return $seq + $me->[_Off] - if 1 == $seq || 2 == $seq; - my $meth= ( caller(1) )[3]; - Die( "$meth: Invalid sequence number ($seq); must be 1 or 2" ); -} - -sub getObjPkg -{ - my( $us )= @_; - return ref $us if ref $us; - return $us . "::_obj"; -} - -sub new -{ - my( $us, $seq1, $seq2, $opts ) = @_; - my @args; - for( $opts->{keyGen} ) { - push @args, $_ if $_; - } - for( $opts->{keyGenArgs} ) { - push @args, @$_ if $_; - } - my $cdif= Diff::compact_diff( $seq1, $seq2, @args ); - my $same= 1; - if( 0 == $cdif->[2] && 0 == $cdif->[3] ) { - $same= 0; - splice @$cdif, 0, 2; - } - my @obj= ( $cdif, $seq1, $seq2 ); - $obj[_End] = (1+@$cdif)/2; - $obj[_Same] = $same; - $obj[_Base] = 0; - my $me = bless \@obj, $us->getObjPkg(); - $me->Reset( 0 ); - return $me; -} - -sub Reset -{ - my( $me, $pos )= @_; - $pos= int( $pos || 0 ); - $pos += $me->[_End] - if $pos < 0; - $pos= 0 - if $pos < 0 || $me->[_End] <= $pos; - $me->[_Pos]= $pos || !1; - $me->[_Off]= 2*$pos - 1; - return $me; -} - -sub Base -{ - my( $me, $base )= @_; - my $oldBase= $me->[_Base]; - $me->[_Base]= 0+$base if defined $base; - return $oldBase; -} - -sub Copy -{ - my( $me, $pos, $base )= @_; - my @obj= @$me; - my $you= bless \@obj, ref($me); - $you->Reset( $pos ) if defined $pos; - $you->Base( $base ); - return $you; -} - -sub Next { - my( $me, $steps )= @_; - $steps= 1 if ! defined $steps; - if( $steps ) { - my $pos= $me->[_Pos]; - my $new= $pos + $steps; - $new= 0 if $pos && $new < 0; - $me->Reset( $new ) - } - return $me->[_Pos]; -} - -sub Prev { - my( $me, $steps )= @_; - $steps= 1 if ! defined $steps; - my $pos= $me->Next(-$steps); - $pos -= $me->[_End] if $pos; - return $pos; -} - -sub Diff { - my( $me )= @_; - $me->_ChkPos(); - return 0 if $me->[_Same] == ( 1 & $me->[_Pos] ); - my $ret= 0; - my $off= $me->[_Off]; - for my $seq ( 1, 2 ) { - $ret |= $seq - if $me->[_Idx][ $off + $seq + _Min ] - < $me->[_Idx][ $off + $seq ]; - } - return $ret; -} - -sub Min { - my( $me, $seq, $base )= @_; - $me->_ChkPos(); - my $off= $me->_ChkSeq($seq); - $base= $me->[_Base] if !defined $base; - return $base + $me->[_Idx][ $off + _Min ]; -} - -sub Max { - my( $me, $seq, $base )= @_; - $me->_ChkPos(); - my $off= $me->_ChkSeq($seq); - $base= $me->[_Base] if !defined $base; - return $base + $me->[_Idx][ $off ] -1; -} - -sub Range { - my( $me, $seq, $base )= @_; - $me->_ChkPos(); - my $off = $me->_ChkSeq($seq); - if( !wantarray ) { - return $me->[_Idx][ $off ] - - $me->[_Idx][ $off + _Min ]; - } - $base= $me->[_Base] if !defined $base; - return ( $base + $me->[_Idx][ $off + _Min ] ) - .. ( $base + $me->[_Idx][ $off ] - 1 ); -} - -sub Items { - my( $me, $seq )= @_; - $me->_ChkPos(); - my $off = $me->_ChkSeq($seq); - if( !wantarray ) { - return $me->[_Idx][ $off ] - - $me->[_Idx][ $off + _Min ]; - } - return - @{$me->[$seq]}[ - $me->[_Idx][ $off + _Min ] - .. ( $me->[_Idx][ $off ] - 1 ) - ]; -} - -sub Same { - my( $me )= @_; - $me->_ChkPos(); - return wantarray ? () : 0 - if $me->[_Same] != ( 1 & $me->[_Pos] ); - return $me->Items(1); -} - -my %getName; -BEGIN { - %getName= ( - same => \&Same, - diff => \&Diff, - base => \&Base, - min => \&Min, - max => \&Max, - range=> \&Range, - items=> \&Items, # same thing - ); -} - -sub Get -{ - my $me= shift @_; - $me->_ChkPos(); - my @value; - for my $arg ( @_ ) { - for my $word ( split ' ', $arg ) { - my $meth; - if( $word !~ /^(-?\d+)?([a-zA-Z]+)([12])?$/ - || not $meth= $getName{ lc $2 } - ) { - Die( $Root, ", Get: Invalid request ($word)" ); - } - my( $base, $name, $seq )= ( $1, $2, $3 ); - push @value, scalar( - 4 == length($name) - ? $meth->( $me ) - : $meth->( $me, $seq, $base ) - ); - } - } - if( wantarray ) { - return @value; - } elsif( 1 == @value ) { - return $value[0]; - } - Die( 0+@value, " values requested from ", - $Root, "'s Get in scalar context" ); -} - - -my $Obj= getObjPkg($Root); -no strict 'refs'; - -for my $meth ( qw( new getObjPkg ) ) { - *{$Root."::".$meth} = \&{$meth}; - *{$Obj ."::".$meth} = \&{$meth}; -} -for my $meth ( qw( - Next Prev Reset Copy Base Diff - Same Items Range Min Max Get - _ChkPos _ChkSeq -) ) { - *{$Obj."::".$meth} = \&{$meth}; -} - -1; -__END__ - -=head1 NAME - -Algorithm::Diff - Compute `intelligent' differences between two files / lists - -=head1 SYNOPSIS - - require Algorithm::Diff; - - # This example produces traditional 'diff' output: - - my $diff = Algorithm::Diff->new( \@seq1, \@seq2 ); - - $diff->Base( 1 ); # Return line numbers, not indices - while( $diff->Next() ) { - next if $diff->Same(); - my $sep = ''; - if( ! $diff->Items(2) ) { - printf "%d,%dd%d\n", - $diff->Get(qw( Min1 Max1 Max2 )); - } elsif( ! $diff->Items(1) ) { - printf "%da%d,%d\n", - $diff->Get(qw( Max1 Min2 Max2 )); - } else { - $sep = "---\n"; - printf "%d,%dc%d,%d\n", - $diff->Get(qw( Min1 Max1 Min2 Max2 )); - } - print "< $_" for $diff->Items(1); - print $sep; - print "> $_" for $diff->Items(2); - } - - - # Alternate interfaces: - - use Algorithm::Diff qw( - LCS LCS_length LCSidx - diff sdiff compact_diff - traverse_sequences traverse_balanced ); - - @lcs = LCS( \@seq1, \@seq2 ); - $lcsref = LCS( \@seq1, \@seq2 ); - $count = LCS_length( \@seq1, \@seq2 ); - - ( $seq1idxref, $seq2idxref ) = LCSidx( \@seq1, \@seq2 ); - - - # Complicated interfaces: - - @diffs = diff( \@seq1, \@seq2 ); - - @sdiffs = sdiff( \@seq1, \@seq2 ); - - @cdiffs = compact_diff( \@seq1, \@seq2 ); - - traverse_sequences( - \@seq1, - \@seq2, - { MATCH => \&callback1, - DISCARD_A => \&callback2, - DISCARD_B => \&callback3, - }, - \&key_generator, - @extra_args, - ); - - traverse_balanced( - \@seq1, - \@seq2, - { MATCH => \&callback1, - DISCARD_A => \&callback2, - DISCARD_B => \&callback3, - CHANGE => \&callback4, - }, - \&key_generator, - @extra_args, - ); - - -=head1 INTRODUCTION - -(by Mark-Jason Dominus) - -I once read an article written by the authors of C; they said -that they worked very hard on the algorithm until they found the -right one. - -I think what they ended up using (and I hope someone will correct me, -because I am not very confident about this) was the `longest common -subsequence' method. In the LCS problem, you have two sequences of -items: - - a b c d f g h j q z - - a b c d e f g i j k r x y z - -and you want to find the longest sequence of items that is present in -both original sequences in the same order. That is, you want to find -a new sequence I which can be obtained from the first sequence by -deleting some items, and from the secend sequence by deleting other -items. You also want I to be as long as possible. In this case I -is - - a b c d f g j z - -From there it's only a small step to get diff-like output: - - e h i k q r x y - + - + + - + + + - -This module solves the LCS problem. It also includes a canned function -to generate C-like output. - -It might seem from the example above that the LCS of two sequences is -always pretty obvious, but that's not always the case, especially when -the two sequences have many repeated elements. For example, consider - - a x b y c z p d q - a b c a x b y c z - -A naive approach might start by matching up the C and C that -appear at the beginning of each sequence, like this: - - a x b y c z p d q - a b c a b y c z - -This finds the common subsequence C. But actually, the LCS -is C: - - a x b y c z p d q - a b c a x b y c z - -or - - a x b y c z p d q - a b c a x b y c z - -=head1 USAGE - -(See also the README file and several example -scripts include with this module.) - -This module now provides an object-oriented interface that uses less -memory and is easier to use than most of the previous procedural -interfaces. It also still provides several exportable functions. We'll -deal with these in ascending order of difficulty: C, -C, C, OO interface, C, C, C, -C, and C. - -=head2 C - -Given references to two lists of items, LCS returns an array containing -their longest common subsequence. In scalar context, it returns a -reference to such a list. - - @lcs = LCS( \@seq1, \@seq2 ); - $lcsref = LCS( \@seq1, \@seq2 ); - -C may be passed an optional third parameter; this is a CODE -reference to a key generation function. See L. - - @lcs = LCS( \@seq1, \@seq2, \&keyGen, @args ); - $lcsref = LCS( \@seq1, \@seq2, \&keyGen, @args ); - -Additional parameters, if any, will be passed to the key generation -routine. - -=head2 C - -This is just like C except it only returns the length of the -longest common subsequence. This provides a performance gain of about -9% compared to C. - -=head2 C - -Like C except it returns references to two arrays. The first array -contains the indices into @seq1 where the LCS items are located. The -second array contains the indices into @seq2 where the LCS items are located. - -Therefore, the following three lists will contain the same values: - - my( $idx1, $idx2 ) = LCSidx( \@seq1, \@seq2 ); - my @list1 = @seq1[ @$idx1 ]; - my @list2 = @seq2[ @$idx2 ]; - my @list3 = LCS( \@seq1, \@seq2 ); - -=head2 C - - $diff = Algorithm::Diffs->new( \@seq1, \@seq2 ); - $diff = Algorithm::Diffs->new( \@seq1, \@seq2, \%opts ); - -C computes the smallest set of additions and deletions necessary -to turn the first sequence into the second and compactly records them -in the object. - -You use the object to iterate over I, where each hunk represents -a contiguous section of items which should be added, deleted, replaced, -or left unchanged. - -=over 4 - -The following summary of all of the methods looks a lot like Perl code -but some of the symbols have different meanings: - - [ ] Encloses optional arguments - : Is followed by the default value for an optional argument - | Separates alternate return results - -Method summary: - - $obj = Algorithm::Diff->new( \@seq1, \@seq2, [ \%opts ] ); - $pos = $obj->Next( [ $count : 1 ] ); - $revPos = $obj->Prev( [ $count : 1 ] ); - $obj = $obj->Reset( [ $pos : 0 ] ); - $copy = $obj->Copy( [ $pos, [ $newBase ] ] ); - $oldBase = $obj->Base( [ $newBase ] ); - -Note that all of the following methods C if used on an object that -is "reset" (not currently pointing at any hunk). - - $bits = $obj->Diff( ); - @items|$cnt = $obj->Same( ); - @items|$cnt = $obj->Items( $seqNum ); - @idxs |$cnt = $obj->Range( $seqNum, [ $base ] ); - $minIdx = $obj->Min( $seqNum, [ $base ] ); - $maxIdx = $obj->Max( $seqNum, [ $base ] ); - @values = $obj->Get( @names ); - -Passing in C for an optional argument is always treated the same -as if no argument were passed in. - -=item C - - $pos = $diff->Next(); # Move forward 1 hunk - $pos = $diff->Next( 2 ); # Move forward 2 hunks - $pos = $diff->Next(-5); # Move backward 5 hunks - -C moves the object to point at the next hunk. The object starts -out "reset", which means it isn't pointing at any hunk. If the object -is reset, then C moves to the first hunk. - -C returns a true value iff the move didn't go past the last hunk. -So C will return true iff the object is not reset. - -Actually, C returns the object's new position, which is a number -between 1 and the number of hunks (inclusive), or returns a false value. - -=item C - -C is almost identical to C; it moves to the $Nth -previous hunk. On a 'reset' object, C [and C] move -to the last hunk. - -The position returned by C is relative to the I of the -hunks; -1 for the last hunk, -2 for the second-to-last, etc. - -=item C - - $diff->Reset(); # Reset the object's position - $diff->Reset($pos); # Move to the specified hunk - $diff->Reset(1); # Move to the first hunk - $diff->Reset(-1); # Move to the last hunk - -C returns the object, so, for example, you could use -C<< $diff->Reset()->Next(-1) >> to get the number of hunks. - -=item C - - $copy = $diff->Copy( $newPos, $newBase ); - -C returns a copy of the object. The copy and the orignal object -share most of their data, so making copies takes very little memory. -The copy maintains its own position (separate from the original), which -is the main purpose of copies. It also maintains its own base. - -By default, the copy's position starts out the same as the original -object's position. But C takes an optional first argument to set the -new position, so the following three snippets are equivalent: - - $copy = $diff->Copy($pos); - - $copy = $diff->Copy(); - $copy->Reset($pos); - - $copy = $diff->Copy()->Reset($pos); - -C takes an optional second argument to set the base for -the copy. If you wish to change the base of the copy but leave -the position the same as in the original, here are two -equivalent ways: - - $copy = $diff->Copy(); - $copy->Base( 0 ); - - $copy = $diff->Copy(undef,0); - -Here are two equivalent way to get a "reset" copy: - - $copy = $diff->Copy(0); - - $copy = $diff->Copy()->Reset(); - -=item C - - $bits = $obj->Diff(); - -C returns a true value iff the current hunk contains items that are -different between the two sequences. It actually returns one of the -follow 4 values: - -=over 4 - -=item 3 - -C<3==(1|2)>. This hunk contains items from @seq1 and the items -from @seq2 that should replace them. Both sequence 1 and 2 -contain changed items so both the 1 and 2 bits are set. - -=item 2 - -This hunk only contains items from @seq2 that should be inserted (not -items from @seq1). Only sequence 2 contains changed items so only the 2 -bit is set. - -=item 1 - -This hunk only contains items from @seq1 that should be deleted (not -items from @seq2). Only sequence 1 contains changed items so only the 1 -bit is set. - -=item 0 - -This means that the items in this hunk are the same in both sequences. -Neither sequence 1 nor 2 contain changed items so neither the 1 nor the -2 bits are set. - -=back - -=item C - -C returns a true value iff the current hunk contains items that -are the same in both sequences. It actually returns the list of items -if they are the same or an emty list if they aren't. In a scalar -context, it returns the size of the list. - -=item C - - $count = $diff->Items(2); - @items = $diff->Items($seqNum); - -C returns the (number of) items from the specified sequence that -are part of the current hunk. - -If the current hunk contains only insertions, then -C<< $diff->Items(1) >> will return an empty list (0 in a scalar conext). -If the current hunk contains only deletions, then C<< $diff->Items(2) >> -will return an empty list (0 in a scalar conext). - -If the hunk contains replacements, then both C<< $diff->Items(1) >> and -C<< $diff->Items(2) >> will return different, non-empty lists. - -Otherwise, the hunk contains identical items and all of the following -will return the same lists: - - @items = $diff->Items(1); - @items = $diff->Items(2); - @items = $diff->Same(); - -=item C - - $count = $diff->Range( $seqNum ); - @indices = $diff->Range( $seqNum ); - @indices = $diff->Range( $seqNum, $base ); - -C is like C except that it returns a list of I to -the items rather than the items themselves. By default, the index of -the first item (in each sequence) is 0 but this can be changed by -calling the C method. So, by default, the following two snippets -return the same lists: - - @list = $diff->Items(2); - @list = @seq2[ $diff->Range(2) ]; - -You can also specify the base to use as the second argument. So the -following two snippets I return the same lists: - - @list = $diff->Items(1); - @list = @seq1[ $diff->Range(1,0) ]; - -=item C - - $curBase = $diff->Base(); - $oldBase = $diff->Base($newBase); - -C sets and/or returns the current base (usually 0 or 1) that is -used when you request range information. The base defaults to 0 so -that range information is returned as array indices. You can set the -base to 1 if you want to report traditional line numbers instead. - -=item C - - $min1 = $diff->Min(1); - $min = $diff->Min( $seqNum, $base ); - -C returns the first value that C would return (given the -same arguments) or returns C if C would return an empty -list. - -=item C - -C returns the last value that C would return or C. - -=item C - - ( $n, $x, $r ) = $diff->Get(qw( min1 max1 range1 )); - @values = $diff->Get(qw( 0min2 1max2 range2 same base )); - -C returns one or more scalar values. You pass in a list of the -names of the values you want returned. Each name must match one of the -following regexes: - - /^(-?\d+)?(min|max)[12]$/i - /^(range[12]|same|diff|base)$/i - -The 1 or 2 after a name says which sequence you want the information -for (and where allowed, it is required). The optional number before -"min" or "max" is the base to use. So the following equalities hold: - - $diff->Get('min1') == $diff->Min(1) - $diff->Get('0min2') == $diff->Min(2,0) - -Using C in a scalar context when you've passed in more than one -name is a fatal error (C is called). - -=back - -=head2 C - -Given a reference to a list of items, C returns a reference -to a hash which can be used when comparing this sequence to other -sequences with C or C. - - $prep = prepare( \@seq1 ); - for $i ( 0 .. 10_000 ) - { - @lcs = LCS( $prep, $seq[$i] ); - # do something useful with @lcs - } - -C may be passed an optional third parameter; this is a CODE -reference to a key generation function. See L. - - $prep = prepare( \@seq1, \&keyGen ); - for $i ( 0 .. 10_000 ) - { - @lcs = LCS( $seq[$i], $prep, \&keyGen ); - # do something useful with @lcs - } - -Using C provides a performance gain of about 50% when calling LCS -many times compared with not preparing. - -=head2 C - - @diffs = diff( \@seq1, \@seq2 ); - $diffs_ref = diff( \@seq1, \@seq2 ); - -C computes the smallest set of additions and deletions necessary -to turn the first sequence into the second, and returns a description -of these changes. The description is a list of I; each hunk -represents a contiguous section of items which should be added, -deleted, or replaced. (Hunks containing unchanged items are not -included.) - -The return value of C is a list of hunks, or, in scalar context, a -reference to such a list. If there are no differences, the list will be -empty. - -Here is an example. Calling C for the following two sequences: - - a b c e h j l m n p - b c d e f j k l m r s t - -would produce the following list: - - ( - [ [ '-', 0, 'a' ] ], - - [ [ '+', 2, 'd' ] ], - - [ [ '-', 4, 'h' ], - [ '+', 4, 'f' ] ], - - [ [ '+', 6, 'k' ] ], - - [ [ '-', 8, 'n' ], - [ '-', 9, 'p' ], - [ '+', 9, 'r' ], - [ '+', 10, 's' ], - [ '+', 11, 't' ] ], - ) - -There are five hunks here. The first hunk says that the C at -position 0 of the first sequence should be deleted (C<->). The second -hunk says that the C at position 2 of the second sequence should -be inserted (C<+>). The third hunk says that the C at position 4 -of the first sequence should be removed and replaced with the C -from position 4 of the second sequence. And so on. - -C may be passed an optional third parameter; this is a CODE -reference to a key generation function. See L. - -Additional parameters, if any, will be passed to the key generation -routine. - -=head2 C - - @sdiffs = sdiff( \@seq1, \@seq2 ); - $sdiffs_ref = sdiff( \@seq1, \@seq2 ); - -C computes all necessary components to show two sequences -and their minimized differences side by side, just like the -Unix-utility I does: - - same same - before | after - old < - - - > new - -It returns a list of array refs, each pointing to an array of -display instructions. In scalar context it returns a reference -to such a list. If there are no differences, the list will have one -entry per item, each indicating that the item was unchanged. - -Display instructions consist of three elements: A modifier indicator -(C<+>: Element added, C<->: Element removed, C: Element unmodified, -C: Element changed) and the value of the old and new elements, to -be displayed side-by-side. - -An C of the following two sequences: - - a b c e h j l m n p - b c d e f j k l m r s t - -results in - - ( [ '-', 'a', '' ], - [ 'u', 'b', 'b' ], - [ 'u', 'c', 'c' ], - [ '+', '', 'd' ], - [ 'u', 'e', 'e' ], - [ 'c', 'h', 'f' ], - [ 'u', 'j', 'j' ], - [ '+', '', 'k' ], - [ 'u', 'l', 'l' ], - [ 'u', 'm', 'm' ], - [ 'c', 'n', 'r' ], - [ 'c', 'p', 's' ], - [ '+', '', 't' ], - ) - -C may be passed an optional third parameter; this is a CODE -reference to a key generation function. See L. - -Additional parameters, if any, will be passed to the key generation -routine. - -=head2 C - -C is much like C except it returns a much more -compact description consisting of just one flat list of indices. An -example helps explain the format: - - my @a = qw( a b c e h j l m n p ); - my @b = qw( b c d e f j k l m r s t ); - @cdiff = compact_diff( \@a, \@b ); - # Returns: - # @a @b @a @b - # start start values values - ( 0, 0, # = - 0, 0, # a ! - 1, 0, # b c = b c - 3, 2, # ! d - 3, 3, # e = e - 4, 4, # f ! h - 5, 5, # j = j - 6, 6, # ! k - 6, 7, # l m = l m - 8, 9, # n p ! r s t - 10, 12, # - ); - -The 0th, 2nd, 4th, etc. entries are all indices into @seq1 (@a in the -above example) indicating where a hunk begins. The 1st, 3rd, 5th, etc. -entries are all indices into @seq2 (@b in the above example) indicating -where the same hunk begins. - -So each pair of indices (except the last pair) describes where a hunk -begins (in each sequence). Since each hunk must end at the item just -before the item that starts the next hunk, the next pair of indices can -be used to determine where the hunk ends. - -So, the first 4 entries (0..3) describe the first hunk. Entries 0 and 1 -describe where the first hunk begins (and so are always both 0). -Entries 2 and 3 describe where the next hunk begins, so subtracting 1 -from each tells us where the first hunk ends. That is, the first hunk -contains items C<$diff[0]> through C<$diff[2] - 1> of the first sequence -and contains items C<$diff[1]> through C<$diff[3] - 1> of the second -sequence. - -In other words, the first hunk consists of the following two lists of items: - - # 1st pair 2nd pair - # of indices of indices - @list1 = @a[ $cdiff[0] .. $cdiff[2]-1 ]; - @list2 = @b[ $cdiff[1] .. $cdiff[3]-1 ]; - # Hunk start Hunk end - -Note that the hunks will always alternate between those that are part of -the LCS (those that contain unchanged items) and those that contain -changes. This means that all we need to be told is whether the first -hunk is a 'same' or 'diff' hunk and we can determine which of the other -hunks contain 'same' items or 'diff' items. - -By convention, we always make the first hunk contain unchanged items. -So the 1st, 3rd, 5th, etc. hunks (all odd-numbered hunks if you start -counting from 1) all contain unchanged items. And the 2nd, 4th, 6th, -etc. hunks (all even-numbered hunks if you start counting from 1) all -contain changed items. - -Since @a and @b don't begin with the same value, the first hunk in our -example is empty (otherwise we'd violate the above convention). Note -that the first 4 index values in our example are all zero. Plug these -values into our previous code block and we get: - - @hunk1a = @a[ 0 .. 0-1 ]; - @hunk1b = @b[ 0 .. 0-1 ]; - -And C<0..-1> returns the empty list. - -Move down one pair of indices (2..5) and we get the offset ranges for -the second hunk, which contains changed items. - -Since C<@diff[2..5]> contains (0,0,1,0) in our example, the second hunk -consists of these two lists of items: - - @hunk2a = @a[ $cdiff[2] .. $cdiff[4]-1 ]; - @hunk2b = @b[ $cdiff[3] .. $cdiff[5]-1 ]; - # or - @hunk2a = @a[ 0 .. 1-1 ]; - @hunk2b = @b[ 0 .. 0-1 ]; - # or - @hunk2a = @a[ 0 .. 0 ]; - @hunk2b = @b[ 0 .. -1 ]; - # or - @hunk2a = ( 'a' ); - @hunk2b = ( ); - -That is, we would delete item 0 ('a') from @a. - -Since C<@diff[4..7]> contains (1,0,3,2) in our example, the third hunk -consists of these two lists of items: - - @hunk3a = @a[ $cdiff[4] .. $cdiff[6]-1 ]; - @hunk3a = @b[ $cdiff[5] .. $cdiff[7]-1 ]; - # or - @hunk3a = @a[ 1 .. 3-1 ]; - @hunk3a = @b[ 0 .. 2-1 ]; - # or - @hunk3a = @a[ 1 .. 2 ]; - @hunk3a = @b[ 0 .. 1 ]; - # or - @hunk3a = qw( b c ); - @hunk3a = qw( b c ); - -Note that this third hunk contains unchanged items as our convention demands. - -You can continue this process until you reach the last two indices, -which will always be the number of items in each sequence. This is -required so that subtracting one from each will give you the indices to -the last items in each sequence. - -=head2 C - -C used to be the most general facility provided by -this module (the new OO interface is more powerful and much easier to -use). - -Imagine that there are two arrows. Arrow A points to an element of -sequence A, and arrow B points to an element of the sequence B. -Initially, the arrows point to the first elements of the respective -sequences. C will advance the arrows through the -sequences one element at a time, calling an appropriate user-specified -callback function before each advance. It willadvance the arrows in -such a way that if there are equal elements C<$A[$i]> and C<$B[$j]> -which are equal and which are part of the LCS, there will be some moment -during the execution of C when arrow A is pointing -to C<$A[$i]> and arrow B is pointing to C<$B[$j]>. When this happens, -C will call the C callback function and then -it will advance both arrows. - -Otherwise, one of the arrows is pointing to an element of its sequence -that is not part of the LCS. C will advance that -arrow and will call the C or the C callback, -depending on which arrow it advanced. If both arrows point to elements -that are not part of the LCS, then C will advance -one of them and call the appropriate callback, but it is not specified -which it will call. - -The arguments to C are the two sequences to -traverse, and a hash which specifies the callback functions, like this: - - traverse_sequences( - \@seq1, \@seq2, - { MATCH => $callback_1, - DISCARD_A => $callback_2, - DISCARD_B => $callback_3, - } - ); - -Callbacks for MATCH, DISCARD_A, and DISCARD_B are invoked with at least -the indices of the two arrows as their arguments. They are not expected -to return any values. If a callback is omitted from the table, it is -not called. - -Callbacks for A_FINISHED and B_FINISHED are invoked with at least the -corresponding index in A or B. - -If arrow A reaches the end of its sequence, before arrow B does, -C will call the C callback when it -advances arrow B, if there is such a function; if not it will call -C instead. Similarly if arrow B finishes first. -C returns when both arrows are at the ends of their -respective sequences. It returns true on success and false on failure. -At present there is no way to fail. - -C may be passed an optional fourth parameter; this -is a CODE reference to a key generation function. See L. - -Additional parameters, if any, will be passed to the key generation function. - -If you want to pass additional parameters to your callbacks, but don't -need a custom key generation function, you can get the default by -passing undef: - - traverse_sequences( - \@seq1, \@seq2, - { MATCH => $callback_1, - DISCARD_A => $callback_2, - DISCARD_B => $callback_3, - }, - undef, # default key-gen - $myArgument1, - $myArgument2, - $myArgument3, - ); - -C does not have a useful return value; you are -expected to plug in the appropriate behavior with the callback -functions. - -=head2 C - -C is an alternative to C. It -uses a different algorithm to iterate through the entries in the -computed LCS. Instead of sticking to one side and showing element changes -as insertions and deletions only, it will jump back and forth between -the two sequences and report I occurring as deletions on one -side followed immediatly by an insertion on the other side. - -In addition to the C, C, and C callbacks -supported by C, C supports -a C callback indicating that one element got C by another: - - traverse_balanced( - \@seq1, \@seq2, - { MATCH => $callback_1, - DISCARD_A => $callback_2, - DISCARD_B => $callback_3, - CHANGE => $callback_4, - } - ); - -If no C callback is specified, C -will map C events to C and C actions, -therefore resulting in a similar behaviour as C -with different order of events. - -C might be a bit slower than C, -noticable only while processing huge amounts of data. - -The C function of this module -is implemented as call to C. - -C does not have a useful return value; you are expected to -plug in the appropriate behavior with the callback functions. - -=head1 KEY GENERATION FUNCTIONS - -Most of the functions accept an optional extra parameter. This is a -CODE reference to a key generating (hashing) function that should return -a string that uniquely identifies a given element. It should be the -case that if two elements are to be considered equal, their keys should -be the same (and the other way around). If no key generation function -is provided, the key will be the element as a string. - -By default, comparisons will use "eq" and elements will be turned into keys -using the default stringizing operator '""'. - -Where this is important is when you're comparing something other than -strings. If it is the case that you have multiple different objects -that should be considered to be equal, you should supply a key -generation function. Otherwise, you have to make sure that your arrays -contain unique references. - -For instance, consider this example: - - package Person; - - sub new - { - my $package = shift; - return bless { name => '', ssn => '', @_ }, $package; - } - - sub clone - { - my $old = shift; - my $new = bless { %$old }, ref($old); - } - - sub hash - { - return shift()->{'ssn'}; - } - - my $person1 = Person->new( name => 'Joe', ssn => '123-45-6789' ); - my $person2 = Person->new( name => 'Mary', ssn => '123-47-0000' ); - my $person3 = Person->new( name => 'Pete', ssn => '999-45-2222' ); - my $person4 = Person->new( name => 'Peggy', ssn => '123-45-9999' ); - my $person5 = Person->new( name => 'Frank', ssn => '000-45-9999' ); - -If you did this: - - my $array1 = [ $person1, $person2, $person4 ]; - my $array2 = [ $person1, $person3, $person4, $person5 ]; - Algorithm::Diff::diff( $array1, $array2 ); - -everything would work out OK (each of the objects would be converted -into a string like "Person=HASH(0x82425b0)" for comparison). - -But if you did this: - - my $array1 = [ $person1, $person2, $person4 ]; - my $array2 = [ $person1, $person3, $person4->clone(), $person5 ]; - Algorithm::Diff::diff( $array1, $array2 ); - -$person4 and $person4->clone() (which have the same name and SSN) -would be seen as different objects. If you wanted them to be considered -equivalent, you would have to pass in a key generation function: - - my $array1 = [ $person1, $person2, $person4 ]; - my $array2 = [ $person1, $person3, $person4->clone(), $person5 ]; - Algorithm::Diff::diff( $array1, $array2, \&Person::hash ); - -This would use the 'ssn' field in each Person as a comparison key, and -so would consider $person4 and $person4->clone() as equal. - -You may also pass additional parameters to the key generation function -if you wish. - -=head1 ERROR CHECKING - -If you pass these routines a non-reference and they expect a reference, -they will die with a message. - -=head1 AUTHOR - -This version released by Tye McQueen (http://perlmonks.org/?node=tye). - -=head1 LICENSE - -Parts Copyright (c) 2000-2004 Ned Konz. All rights reserved. -Parts by Tye McQueen. - -This program is free software; you can redistribute it and/or modify it -under the same terms as Perl. - -=head1 MAILING LIST - -Mark-Jason still maintains a mailing list. To join a low-volume mailing -list for announcements related to diff and Algorithm::Diff, send an -empty mail message to mjd-perl-diff-request@plover.com. - -=head1 CREDITS - -Versions through 0.59 (and much of this documentation) were written by: - -Mark-Jason Dominus, mjd-perl-diff@plover.com - -This version borrows some documentation and routine names from -Mark-Jason's, but Diff.pm's code was completely replaced. - -This code was adapted from the Smalltalk code of Mario Wolczko -, which is available at -ftp://st.cs.uiuc.edu/pub/Smalltalk/MANCHESTER/manchester/4.0/diff.st - -C and C were written by Mike Schilli -. - -The algorithm is that described in -I, -CACM, vol.20, no.5, pp.350-353, May 1977, with a few -minor improvements to improve the speed. - -Much work was done by Ned Konz (perl@bike-nomad.com). - -The OO interface and some other changes are by Tye McQueen. - -=cut diff --git a/tools/cmake/scripts/IPC/Run.pm b/tools/cmake/scripts/IPC/Run.pm deleted file mode 100644 index 079c69faf1..0000000000 --- a/tools/cmake/scripts/IPC/Run.pm +++ /dev/null @@ -1,4418 +0,0 @@ -package IPC::Run; -use bytes; - -=pod - -=head1 NAME - -IPC::Run - system() and background procs w/ piping, redirs, ptys (Unix, Win32) - -=head1 SYNOPSIS - - ## First,a command to run: - my @cat = qw( cat ); - - ## Using run() instead of system(): - use IPC::Run qw( run timeout ); - - run \@cmd, \$in, \$out, \$err, timeout( 10 ) or die "cat: $?" - - # Can do I/O to sub refs and filenames, too: - run \@cmd, '<', "in.txt", \&out, \&err or die "cat: $?" - run \@cat, '<', "in.txt", '>>', "out.txt", '2>>', "err.txt"; - - - # Redirecting using psuedo-terminals instad of pipes. - run \@cat, 'pty>', \$out_and_err; - - ## Scripting subprocesses (like Expect): - - use IPC::Run qw( start pump finish timeout ); - - # Incrementally read from / write to scalars. - # $in is drained as it is fed to cat's stdin, - # $out accumulates cat's stdout - # $err accumulates cat's stderr - # $h is for "harness". - my $h = start \@cat, \$in, \$out, \$err, timeout( 10 ); - - $in .= "some input\n"; - pump $h until $out =~ /input\n/g; - - $in .= "some more input\n"; - pump $h until $out =~ /\G.*more input\n/; - - $in .= "some final input\n"; - finish $h or die "cat returned $?"; - - warn $err if $err; - print $out; ## All of cat's output - - # Piping between children - run \@cat, '|', \@gzip; - - # Multiple children simultaneously (run() blocks until all - # children exit, use start() for background execution): - run \@foo1, '&', \@foo2; - - # Calling \&set_up_child in the child before it executes the - # command (only works on systems with true fork() & exec()) - # exceptions thrown in set_up_child() will be propagated back - # to the parent and thrown from run(). - run \@cat, \$in, \$out, - init => \&set_up_child; - - # Read from / write to file handles you open and close - open IN, 'out.txt' or die $!; - print OUT "preamble\n"; - run \@cat, \*IN, \*OUT or die "cat returned $?"; - print OUT "postamble\n"; - close IN; - close OUT; - - # Create pipes for you to read / write (like IPC::Open2 & 3). - $h = start - \@cat, - 'pipe', \*OUT, - '2>pipe', \*ERR - or die "cat returned $?"; - print IN "some input\n"; - close IN; - print , ; - finish $h; - - # Mixing input and output modes - run \@cat, 'in.txt', \&catch_some_out, \*ERR_LOG ); - - # Other redirection constructs - run \@cat, '>&', \$out_and_err; - run \@cat, '2>&1'; - run \@cat, '0<&3'; - run \@cat, '<&-'; - run \@cat, '3<', \$in3; - run \@cat, '4>', \$out4; - # etc. - - # Passing options: - run \@cat, 'in.txt', debug => 1; - - # Call this system's shell, returns TRUE on 0 exit code - # THIS IS THE OPPOSITE SENSE OF system()'s RETURN VALUE - run "cat a b c" or die "cat returned $?"; - - # Launch a sub process directly, no shell. Can't do redirection - # with this form, it's here to behave like system() with an - # inverted result. - $r = run "cat a b c"; - - # Read from a file in to a scalar - run io( "filename", 'r', \$recv ); - run io( \*HANDLE, 'r', \$recv ); - -=head1 DESCRIPTION - -IPC::Run allows you to run and interact with child processes using files, pipes, -and pseudo-ttys. Both system()-style and scripted usages are supported and -may be mixed. Likewise, functional and OO API styles are both supported and -may be mixed. - -Various redirection operators reminiscent of those seen on common Unix and DOS -command lines are provided. - -Before digging in to the details a few LIMITATIONS are important enough -to be mentioned right up front: - -=over - -=item Win32 Support - -Win32 support is working but B, but does pass all relevant tests -on NT 4.0. See L. - -=item pty Support - -If you need pty support, IPC::Run should work well enough most of the -time, but IO::Pty is being improved, and IPC::Run will be improved to -use IO::Pty's new features when it is release. - -The basic problem is that the pty needs to initialize itself before the -parent writes to the master pty, or the data written gets lost. So -IPC::Run does a sleep(1) in the parent after forking to (hopefully) give -the child a chance to run. This is a kludge that works well on non -heavily loaded systems :(. - -ptys are not supported yet under Win32, but will be emulated... - -=item Debugging Tip - -You may use the environment variable C to see what's going on -under the hood: - - $ IPCRUNDEBUG=basic myscript # prints minimal debugging - $ IPCRUNDEBUG=data myscript # prints all data reads/writes - $ IPCRUNDEBUG=details myscript # prints lots of low-level details - $ IPCRUNDEBUG=gory myscript # (Win32 only) prints data moving through - # the helper processes. - -=back - -We now return you to your regularly scheduled documentation. - -=head2 Harnesses - -Child processes and I/O handles are gathered in to a harness, then -started and run until the processing is finished or aborted. - -=head2 run() vs. start(); pump(); finish(); - -There are two modes you can run harnesses in: run() functions as an -enhanced system(), and start()/pump()/finish() allow for background -processes and scripted interactions with them. - -When using run(), all data to be sent to the harness is set up in -advance (though one can feed subprocesses input from subroutine refs to -get around this limitation). The harness is run and all output is -collected from it, then any child processes are waited for: - - run \@cmd, \< and C<$err> in our examples. - -Regular expressions can be used to wait for appropriate output in -several ways. The C example in the previous section demonstrates -how to pump() until some string appears in the output. Here's an -example that uses C to fetch files from a remote server: - - $h = harness \@smbclient, \$in, \$out; - - $in = "cd /src\n"; - $h->pump until $out =~ /^smb.*> \Z/m; - die "error cding to /src:\n$out" if $out =~ "ERR"; - $out = ''; - - $in = "mget *\n"; - $h->pump until $out =~ /^smb.*> \Z/m; - die "error retrieving files:\n$out" if $out =~ "ERR"; - - $in = "quit\n"; - $h->finish; - -Notice that we carefully clear $out after the first command/response -cycle? That's because IPC::Run does not delete $out when we continue, -and we don't want to trip over the old output in the second -command/response cycle. - -Say you want to accumulate all the output in $out and analyze it -afterwards. Perl offers incremental regular expression matching using -the C and pattern matching idiom and the C<\G> assertion. -IPC::Run is careful not to disturb the current C value for -scalars it appends data to, so we could modify the above so as not to -destroy $out by adding a couple of C modifiers. The C keeps us -from tripping over the previous prompt and the C keeps us from -resetting the prior match position if the expected prompt doesn't -materialize immediately: - - $h = harness \@smbclient, \$in, \$out; - - $in = "cd /src\n"; - $h->pump until $out =~ /^smb.*> \Z/mgc; - die "error cding to /src:\n$out" if $out =~ "ERR"; - - $in = "mget *\n"; - $h->pump until $out =~ /^smb.*> \Z/mgc; - die "error retrieving files:\n$out" if $out =~ "ERR"; - - $in = "quit\n"; - $h->finish; - - analyze( $out ); - -When using this technique, you may want to preallocate $out to have -plenty of memory or you may find that the act of growing $out each time -new input arrives causes an C slowdown as $out grows. -Say we expect no more than 10,000 characters of input at the most. To -preallocate memory to $out, do something like: - - my $out = "x" x 10_000; - $out = ""; - -C will allocate at least 10,000 characters' worth of space, then -mark the $out as having 0 length without freeing all that yummy RAM. - -=head2 Timeouts and Timers - -More than likely, you don't want your subprocesses to run forever, and -sometimes it's nice to know that they're going a little slowly. -Timeouts throw exceptions after a some time has elapsed, timers merely -cause pump() to return after some time has elapsed. Neither is -reset/restarted automatically. - -Timeout objects are created by calling timeout( $interval ) and passing -the result to run(), start() or harness(). The timeout period starts -ticking just after all the child processes have been fork()ed or -spawn()ed, and are polled for expiration in run(), pump() and finish(). -If/when they expire, an exception is thrown. This is typically useful -to keep a subprocess from taking too long. - -If a timeout occurs in run(), all child processes will be terminated and -all file/pipe/ptty descriptors opened by run() will be closed. File -descriptors opened by the parent process and passed in to run() are not -closed in this event. - -If a timeout occurs in pump(), pump_nb(), or finish(), it's up to you to -decide whether to kill_kill() all the children or to implement some more -graceful fallback. No I/O will be closed in pump(), pump_nb() or -finish() by such an exception (though I/O is often closed down in those -routines during the natural course of events). - -Often an exception is too harsh. timer( $interval ) creates timer -objects that merely prevent pump() from blocking forever. This can be -useful for detecting stalled I/O or printing a soothing message or "." -to pacify an anxious user. - -Timeouts and timers can both be restarted at any time using the timer's -start() method (this is not the start() that launches subprocesses). To -restart a timer, you need to keep a reference to the timer: - - ## Start with a nice long timeout to let smbclient connect. If - ## pump or finish take too long, an exception will be thrown. - - my $h; - eval { - $h = harness \@smbclient, \$in, \$out, \$err, ( my $t = timeout 30 ); - sleep 11; # No effect: timer not running yet - - start $h; - $in = "cd /src\n"; - pump $h until ! length $in; - - $in = "ls\n"; - ## Now use a short timeout, since this should be faster - $t->start( 5 ); - pump $h until ! length $in; - - $t->start( 10 ); ## Give smbclient a little while to shut down. - $h->finish; - }; - if ( $@ ) { - my $x = $@; ## Preserve $@ in case another exception occurs - $h->kill_kill; ## kill it gently, then brutally if need be, or just - ## brutally on Win32. - die $x; - } - -Timeouts and timers are I checked once the subprocesses are shut -down; they will not expire in the interval between the last valid -process and when IPC::Run scoops up the processes' result codes, for -instance. - -=head2 Spawning synchronization, child exception propagation - -start() pauses the parent until the child executes the command or CODE -reference and propagates any exceptions thrown (including exec() -failure) back to the parent. This has several pleasant effects: any -exceptions thrown in the child, including exec() failure, come flying -out of start() or run() as though they had occurred in the parent. - -This includes exceptions your code thrown from init subs. In this -example: - - eval { - run \@cmd, init => sub { die "blast it! foiled again!" }; - }; - print $@; - -the exception "blast it! foiled again" will be thrown from the child -process (preventing the exec()) and printed by the parent. - -In situations like - - run \@cmd1, "|", \@cmd2, "|", \@cmd3; - -@cmd1 will be initted and exec()ed before @cmd2, and @cmd2 before @cmd3. -This can save time and prevent oddball errors emitted by later commands -when earlier commands fail to execute. Note that IPC::Run doesn't start -any commands unless it can find the executables referenced by all -commands. These executables must pass both the C<-f> and C<-x> tests -described in L. - -Another nice effect is that init() subs can take their time doing things -and there will be no problems caused by a parent continuing to execute -before a child's init() routine is complete. Say the init() routine -needs to open a socket or a temp file that the parent wants to connect -to; without this synchronization, the parent will need to implement a -retry loop to wait for the child to run, since often, the parent gets a -lot of things done before the child's first timeslice is allocated. - -This is also quite necessary for pseudo-tty initialization, which needs -to take place before the parent writes to the child via pty. Writes -that occur before the pty is set up can get lost. - -A final, minor, nicety is that debugging output from the child will be -emitted before the parent continues on, making for much clearer debugging -output in complex situations. - -The only drawback I can conceive of is that the parent can't continue to -operate while the child is being initted. If this ever becomes a -problem in the field, we can implement an option to avoid this behavior, -but I don't expect it to. - -B: executing CODE references isn't supported on Win32, see -L for details. - -=head2 Syntax - -run(), start(), and harness() can all take a harness specification -as input. A harness specification is either a single string to be passed -to the systems' shell: - - run "echo 'hi there'"; - -or a list of commands, io operations, and/or timers/timeouts to execute. -Consecutive commands must be separated by a pipe operator '|' or an '&'. -External commands are passed in as array references, and, on systems -supporting fork(), Perl code may be passed in as subs: - - run \@cmd; - run \@cmd1, '|', \@cmd2; - run \@cmd1, '&', \@cmd2; - run \&sub1; - run \&sub1, '|', \&sub2; - run \&sub1, '&', \&sub2; - -'|' pipes the stdout of \@cmd1 the stdin of \@cmd2, just like a -shell pipe. '&' does not. Child processes to the right of a '&' -will have their stdin closed unless it's redirected-to. - -L objects may be passed in as well, whether or not -child processes are also specified: - - run io( "infile", ">", \$in ), io( "outfile", "<", \$in ); - -as can L objects: - - run \@cmd, io( "outfile", "<", \$in ), timeout( 10 ); - -Commands may be followed by scalar, sub, or i/o handle references for -redirecting -child process input & output: - - run \@cmd, \undef, \$out; - run \@cmd, \$in, \$out; - run \@cmd1, \&in, '|', \@cmd2, \*OUT; - run \@cmd1, \*IN, '|', \@cmd2, \&out; - -This is known as succinct redirection syntax, since run(), start() -and harness(), figure out which file descriptor to redirect and how. -File descriptor 0 is presumed to be an input for -the child process, all others are outputs. The assumed file -descriptor always starts at 0, unless the command is being piped to, -in which case it starts at 1. - -To be explicit about your redirects, or if you need to do more complex -things, there's also a redirection operator syntax: - - run \@cmd, '<', \undef, '>', \$out; - run \@cmd, '<', \undef, '>&', \$out_and_err; - run( - \@cmd1, - '<', \$in, - '|', \@cmd2, - \$out - ); - -Operator syntax is required if you need to do something other than simple -redirection to/from scalars or subs, like duping or closing file descriptors -or redirecting to/from a named file. The operators are covered in detail -below. - -After each \@cmd (or \&foo), parsing begins in succinct mode and toggles to -operator syntax mode when an operator (ie plain scalar, not a ref) is seen. -Once in -operator syntax mode, parsing only reverts to succinct mode when a '|' or -'&' is seen. - -In succinct mode, each parameter after the \@cmd specifies what to -do with the next highest file descriptor. These File descriptor start -with 0 (stdin) unless stdin is being piped to (C<'|', \@cmd>), in which -case they start with 1 (stdout). Currently, being on the left of -a pipe (C<\@cmd, \$out, \$err, '|'>) does I cause stdout to be -skipped, though this may change since it's not as DWIMerly as it -could be. Only stdin is assumed to be an -input in succinct mode, all others are assumed to be outputs. - -If no piping or redirection is specified for a child, it will inherit -the parent's open file handles as dictated by your system's -close-on-exec behavior and the $^F flag, except that processes after a -'&' will not inherit the parent's stdin. Also note that $^F does not -affect file descriptors obtained via POSIX, since it only applies to -full-fledged Perl file handles. Such processes will have their stdin -closed unless it has been redirected-to. - -If you want to close a child processes stdin, you may do any of: - - run \@cmd, \undef; - run \@cmd, \""; - run \@cmd, '<&-'; - run \@cmd, '0<&-'; - -Redirection is done by placing redirection specifications immediately -after a command or child subroutine: - - run \@cmd1, \$in, '|', \@cmd2, \$out; - run \@cmd1, '<', \$in, '|', \@cmd2, '>', \$out; - -If you omit the redirection operators, descriptors are counted -starting at 0. Descriptor 0 is assumed to be input, all others -are outputs. A leading '|' consumes descriptor 0, so this -works as expected. - - run \@cmd1, \$in, '|', \@cmd2, \$out; - -The parameter following a redirection operator can be a scalar ref, -a subroutine ref, a file name, an open filehandle, or a closed -filehandle. - -If it's a scalar ref, the child reads input from or sends output to -that variable: - - $in = "Hello World.\n"; - run \@cat, \$in, \$out; - print $out; - -Scalars used in incremental (start()/pump()/finish()) applications are treated -as queues: input is removed from input scalers, resulting in them dwindling -to '', and output is appended to output scalars. This is not true of -harnesses run() in batch mode. - -It's usually wise to append new input to be sent to the child to the input -queue, and you'll often want to zap output queues to '' before pumping. - - $h = start \@cat, \$in; - $in = "line 1\n"; - pump $h; - $in .= "line 2\n"; - pump $h; - $in .= "line 3\n"; - finish $h; - -The final call to finish() must be there: it allows the child process(es) -to run to completion and waits for their exit values. - -=head1 OBSTINATE CHILDREN - -Interactive applications are usually optimized for human use. This -can help or hinder trying to interact with them through modules like -IPC::Run. Frequently, programs alter their behavior when they detect -that stdin, stdout, or stderr are not connected to a tty, assuming that -they are being run in batch mode. Whether this helps or hurts depends -on which optimizations change. And there's often no way of telling -what a program does in these areas other than trial and error and, -occasionally, reading the source. This includes different versions -and implementations of the same program. - -All hope is not lost, however. Most programs behave in reasonably -tractable manners, once you figure out what it's trying to do. - -Here are some of the issues you might need to be aware of. - -=over - -=item * - -fflush()ing stdout and stderr - -This lets the user see stdout and stderr immediately. Many programs -undo this optimization if stdout is not a tty, making them harder to -manage by things like IPC::Run. - -Many programs decline to fflush stdout or stderr if they do not -detect a tty there. Some ftp commands do this, for instance. - -If this happens to you, look for a way to force interactive behavior, -like a command line switch or command. If you can't, you will -need to use a pseudo terminal ('pty>'). - -=item * - -false prompts - -Interactive programs generally do not guarantee that output from user -commands won't contain a prompt string. For example, your shell prompt -might be a '$', and a file named '$' might be the only file in a directory -listing. - -This can make it hard to guarantee that your output parser won't be fooled -into early termination of results. - -To help work around this, you can see if the program can alter it's -prompt, and use something you feel is never going to occur in actual -practice. - -You should also look for your prompt to be the only thing on a line: - - pump $h until $out =~ /^\s?\z/m; - -(use C<(?!\n)\Z> in place of C<\z> on older perls). - -You can also take the approach that IPC::ChildSafe takes and emit a -command with known output after each 'real' command you issue, then -look for this known output. See new_appender() and new_chunker() for -filters that can help with this task. - -If it's not convenient or possibly to alter a prompt or use a known -command/response pair, you might need to autodetect the prompt in case -the local version of the child program is different then the one -you tested with, or if the user has control over the look & feel of -the prompt. - -=item * - -Refusing to accept input unless stdin is a tty. - -Some programs, for security reasons, will only accept certain types -of input from a tty. su, notable, will not prompt for a password unless -it's connected to a tty. - -If this is your situation, use a pseudo terminal ('pty>'). - -=item * - -Not prompting unless connected to a tty. - -Some programs don't prompt unless stdin or stdout is a tty. See if you can -turn prompting back on. If not, see if you can come up with a command that -you can issue after every real command and look for it's output, as -IPC::ChildSafe does. There are two filters included with IPC::Run that -can help with doing this: appender and chunker (see new_appender() and -new_chunker()). - -=item * - -Different output format when not connected to a tty. - -Some commands alter their formats to ease machine parsability when they -aren't connected to a pipe. This is actually good, but can be surprising. - -=back - -=head1 PSEUDO TERMINALS - -On systems providing pseudo terminals under /dev, IPC::Run can use IO::Pty -(available on CPAN) to provide a terminal environment to subprocesses. -This is necessary when the subprocess really wants to think it's connected -to a real terminal. - -=head2 CAVEATS - -Psuedo-terminals are not pipes, though they are similar. Here are some -differences to watch out for. - -=over - -=item Echoing - -Sending to stdin will cause an echo on stdout, which occurs before each -line is passed to the child program. There is currently no way to -disable this, although the child process can and should disable it for -things like passwords. - -=item Shutdown - -IPC::Run cannot close a pty until all output has been collected. This -means that it is not possible to send an EOF to stdin by half-closing -the pty, as we can when using a pipe to stdin. - -This means that you need to send the child process an exit command or -signal, or run() / finish() will time out. Be careful not to expect a -prompt after sending the exit command. - -=item Command line editing - -Some subprocesses, notable shells that depend on the user's prompt -settings, will reissue the prompt plus the command line input so far -once for each character. - -=item '>pty>' means '&>pty>', not '1>pty>' - -The pseudo terminal redirects both stdout and stderr unless you specify -a file descriptor. If you want to grab stderr separately, do this: - - start \@cmd, 'pty>', \$out, '2>', \$err; - -=item stdin, stdout, and stderr not inherited - -Child processes harnessed to a pseudo terminal have their stdin, stdout, -and stderr completely closed before any redirection operators take -effect. This casts of the bonds of the controlling terminal. This is -not done when using pipes. - -Right now, this affects all children in a harness that has a pty in use, -even if that pty would not affect a particular child. That's a bug and -will be fixed. Until it is, it's best not to mix-and-match children. - -=back - -=head2 Redirection Operators - - Operator SHNP Description - ======== ==== =========== - <, N< SHN Redirects input to a child's fd N (0 assumed) - - >, N> SHN Redirects output from a child's fd N (1 assumed) - >>, N>> SHN Like '>', but appends to scalars or named files - >&, &> SHN Redirects stdout & stderr from a child process - - pty, N>pty S Like '>', but uses a pseudo-tty instead of a pipe - - N<&M Dups input fd N to input fd M - M>&N Dups output fd N to input fd M - N<&- Closes fd N - - pipe, N>pipe P Pipe opens H for caller to read, write, close. - -'N' and 'M' are placeholders for integer file descriptor numbers. The -terms 'input' and 'output' are from the child process's perspective. - -The SHNP field indicates what parameters an operator can take: - - S: \$scalar or \&function references. Filters may be used with - these operators (and only these). - H: \*HANDLE or IO::Handle for caller to open, and close - N: "file name". - P: \*HANDLE opened by IPC::Run as the parent end of a pipe, but read - and written to and closed by the caller (like IPC::Open3). - -=over - -=item Redirecting input: [n]<, [n] -below for more information. - -The : The handle returned is actually a socket handle, so you can -use select() on it. - -=item Redirecting output: [n]>, [n]>>, [n]>&[m], [n]>pipe - -You can redirect any output the child emits -to a scalar variable, subroutine, file handle, or file name. You -can have &run truncate or append to named files or scalars. If -you are redirecting stdin as well, or if the command is on the -receiving end of a pipeline ('|'), you can omit the redirection -operator: - - @ls = ( 'ls' ); - run \@ls, \undef, \$out - or die "ls returned $?"; - - run \@ls, \undef, \&out; ## Calls &out each time some output - ## is received from the child's - ## when undef is returned. - - run \@ls, \undef, '2>ls.err'; - run \@ls, '2>', 'ls.err'; - -The two parameter form guarantees that the filename -will not be interpreted as a redirection operator: - - run \@ls, '>', "&more"; - run \@ls, '2>', ">foo\n"; - -You can pass file handles you've opened for writing: - - open( *OUT, ">out.txt" ); - open( *ERR, ">err.txt" ); - run \@cat, \*OUT, \*ERR; - -Passing a scalar reference and a code reference requires a little -more work, but allows you to capture all of the output in a scalar -or each piece of output by a callback: - -These two do the same things: - - run( [ 'ls' ], '2>', sub { $err_out .= $_[0] } ); - -does the same basic thing as: - - run( [ 'ls' ], '2>', \$err_out ); - -The subroutine will be called each time some data is read from the child. - -The >pipe operator is different in concept than the other '>' operators, -although it's syntax is similar: - - $h = start \@cat, $in, '>pipe', \*OUT, '2>pipe', \*ERR; - $in = "hello world\n"; - finish $h; - print ; - print ; - close OUT; - close ERR; - -causes two pipe to be created, with one end attached to cat's stdout -and stderr, respectively, and the other left open on OUT and ERR, so -that the script can manually -read(), select(), etc. on them. This is like -the behavior of IPC::Open2 and IPC::Open3. - -B: The handle returned is actually a socket handle, so you can -use select() on it. - -=item Duplicating output descriptors: >&m, n>&m - -This duplicates output descriptor number n (default is 1 if n is omitted) -from descriptor number m. - -=item Duplicating input descriptors: <&m, n<&m - -This duplicates input descriptor number n (default is 0 if n is omitted) -from descriptor number m - -=item Closing descriptors: <&-, 3<&- - -This closes descriptor number n (default is 0 if n is omitted). The -following commands are equivalent: - - run \@cmd, \undef; - run \@cmd, '<&-'; - run \@cmd, ', >&, &>pipe, >pipe& - -The following pairs of commands are equivalent: - - run \@cmd, '>&', \$out; run \@cmd, '>', \$out, '2>&1'; - run \@cmd, '>&', 'out.txt'; run \@cmd, '>', 'out.txt', '2>&1'; - -etc. - -File descriptor numbers are not permitted to the left or the right of -these operators, and the '&' may occur on either end of the operator. - -The '&>pipe' and '>pipe&' variants behave like the '>pipe' operator, except -that both stdout and stderr write to the created pipe. - -=item Redirection Filters - -Both input redirections and output redirections that use scalars or -subs as endpoints may have an arbitrary number of filter subs placed -between them and the child process. This is useful if you want to -receive output in chunks, or if you want to massage each chunk of -data sent to the child. To use this feature, you must use operator -syntax: - - run( - \@cmd - '<', \&in_filter_2, \&in_filter_1, $in, - '>', \&out_filter_1, \&in_filter_2, $out, - ); - -This capability is not provided for IO handles or named files. - -Two filters are provided by IPC::Run: appender and chunker. Because -these may take an argument, you need to use the constructor functions -new_appender() and new_chunker() rather than using \& syntax: - - run( - \@cmd - '<', new_appender( "\n" ), $in, - '>', new_chunker, $out, - ); - -=back - -=head2 Just doing I/O - -If you just want to do I/O to a handle or file you open yourself, you -may specify a filehandle or filename instead of a command in the harness -specification: - - run io( "filename", '>', \$recv ); - - $h = start io( $io, '>', \$recv ); - - $h = harness \@cmd, '&', io( "file", '<', \$send ); - -=head2 Options - -Options are passed in as name/value pairs: - - run \@cat, \$in, debug => 1; - -If you pass the debug option, you may want to pass it in first, so you -can see what parsing is going on: - - run debug => 1, \@cat, \$in; - -=over - -=item debug - -Enables debugging output in parent and child. Debugging info is emitted -to the STDERR that was present when IPC::Run was first Ced (it's -Ced out of the way so that it can be redirected in children without -having debugging output emitted on it). - -=back - -=head1 RETURN VALUES - -harness() and start() return a reference to an IPC::Run harness. This is -blessed in to the IPC::Run package, so you may make later calls to -functions as members if you like: - - $h = harness( ... ); - $h->start; - $h->pump; - $h->finish; - - $h = start( .... ); - $h->pump; - ... - -Of course, using method call syntax lets you deal with any IPC::Run -subclasses that might crop up, but don't hold your breath waiting for -any. - -run() and finish() return TRUE when all subcommands exit with a 0 result -code. B. - -All routines raise exceptions (via die()) when error conditions are -recognized. A non-zero command result is not treated as an error -condition, since some commands are tests whose results are reported -in their exit codes. - -=head1 ROUTINES - -=over - -=cut - -use strict; -use Exporter (); -use vars qw{$VERSION @ISA @FILTER_IMP @FILTERS @API @EXPORT_OK %EXPORT_TAGS}; -BEGIN { - $VERSION = '0.94'; - @ISA = qw{ Exporter }; - - ## We use @EXPORT for the end user's convenience: there's only one function - ## exported, it's homonymous with the module, it's an unusual name, and - ## it can be suppressed by "use IPC::Run ();". - @FILTER_IMP = qw( input_avail get_more_input ); - @FILTERS = qw( - new_appender - new_chunker - new_string_source - new_string_sink - ); - @API = qw( - run - harness start pump pumpable finish - signal kill_kill reap_nb - io timer timeout - close_terminal - binary - ); - @EXPORT_OK = ( @API, @FILTER_IMP, @FILTERS, qw( Win32_MODE ) ); - %EXPORT_TAGS = ( - 'filter_imp' => \@FILTER_IMP, - 'all' => \@EXPORT_OK, - 'filters' => \@FILTERS, - 'api' => \@API, - ); - -} - -use strict; -use IPC::Run::Debug; -use Exporter; -use Fcntl; -use POSIX (); -BEGIN { if ($] < 5.008) { require Symbol; } } -use Carp; -use File::Spec (); -use IO::Handle; -require IPC::Run::IO; -require IPC::Run::Timer; -use UNIVERSAL (); - -use constant Win32_MODE => $^O =~ /os2|Win32/i; - -BEGIN { - if ( Win32_MODE ) { - eval "use IPC::Run::Win32Helper; 1;" - or ( $@ && die ) or die "$!"; - } - else { - eval "use File::Basename; 1;" or die $!; - } -} - -sub input_avail(); -sub get_more_input(); - -############################################################################### - -## -## Error constants, not too locale-dependant -use vars qw( $_EIO $_EAGAIN ); -use Errno qw( EIO EAGAIN ); -BEGIN { - local $!; - $! = EIO; $_EIO = qr/^$!/; - $! = EAGAIN; $_EAGAIN = qr/^$!/; -} - -## -## State machine states, set in $self->{STATE} -## -## These must be in ascending order numerically -## -sub _newed() {0} -sub _harnessed(){1} -sub _finished() {2} ## _finished behave almost exactly like _harnessed -sub _started() {3} - -## -## Which fds have been opened in the parent. This may have extra fds, since -## we aren't all that rigorous about closing these off, but that's ok. This -## is used on Unixish OSs to close all fds in the child that aren't needed -## by that particular child. -my %fds; - -## There's a bit of hackery going on here. -## -## We want to have any code anywhere be able to emit -## debugging statements without knowing what harness the code is -## being called in/from, since we'd need to pass a harness around to -## everything. -## -## Thus, $cur_self was born. - -use vars qw( $cur_self ); - -sub _debug_fd { - return fileno STDERR unless defined $cur_self; - - if ( _debugging && ! defined $cur_self->{DEBUG_FD} ) { - my $fd = select STDERR; $| = 1; select $fd; - $cur_self->{DEBUG_FD} = POSIX::dup fileno STDERR; - _debug( "debugging fd is $cur_self->{DEBUG_FD}\n" ) - if _debugging_details; - } - - return fileno STDERR unless defined $cur_self->{DEBUG_FD}; - - return $cur_self->{DEBUG_FD} -} - -sub DESTROY { - ## We absolutely do not want to do anything else here. We are likely - ## to be in a child process and we don't want to do things like kill_kill - ## ourself or cause other destruction. - my IPC::Run $self = shift; - POSIX::close $self->{DEBUG_FD} if defined $self->{DEBUG_FD}; - $self->{DEBUG_FD} = undef; -} - -## -## Support routines (NOT METHODS) -## -my %cmd_cache; - -sub _search_path { - my ( $cmd_name ) = @_; - if ( File::Spec->file_name_is_absolute( $cmd_name ) && -x $cmd_name) { - _debug "'", $cmd_name, "' is absolute" - if _debugging_details; - return $cmd_name; - } - - my $dirsep = - ( Win32_MODE - ? '[/\\\\]' - : $^O =~ /MacOS/ - ? ':' - : $^O =~ /VMS/ - ? '[\[\]]' - : '/' - ); - - if ( Win32_MODE - && ( $cmd_name =~ /$dirsep/ ) -# && ( $cmd_name !~ /\..+$/ ) ## Only run if cmd_name has no extension? - && ( $cmd_name !~ m!\.[^\\/\.]+$! ) - ) { - - _debug "no extension(.exe), checking ENV{PATHEXT}" if _debugging; - for ( split /;/, $ENV{PATHEXT} || ".COM;.BAT;.EXE" ) { - my $name = "$cmd_name$_"; - $cmd_name = $name, last if -f $name && -x _; - } - _debug "cmd_name is now '$cmd_name'" if _debugging; - } - - if ( $cmd_name =~ /($dirsep)/ ) { - _debug "'$cmd_name' contains '$1'" if _debugging; - croak "file not found: $cmd_name" unless -e $cmd_name; - croak "not a file: $cmd_name" unless -f $cmd_name; - croak "permission denied: $cmd_name" unless -x $cmd_name; - return $cmd_name; - } - - if ( exists $cmd_cache{$cmd_name} ) { - _debug "'$cmd_name' found in cache: '$cmd_cache{$cmd_name}'" - if _debugging; - return $cmd_cache{$cmd_name} if -x $cmd_cache{$cmd_name}; - _debug "'$cmd_cache{$cmd_name}' no longer executable, searching..." - if _debugging; - delete $cmd_cache{$cmd_name}; - } - - my @searched_in; - - ## This next bit is Unix/Win32 specific, unfortunately. - ## There's been some conversation about extending File::Spec to provide - ## a universal interface to PATH, but I haven't seen it yet. - my $re = Win32_MODE ? qr/;/ : qr/:/; - -LOOP: - for ( split( $re, $ENV{PATH} || '', -1 ) ) { - $_ = "." unless length $_; - push @searched_in, $_; - - my $prospect = File::Spec->catfile( $_, $cmd_name ); - my @prospects; - - @prospects = - ( Win32_MODE && ! ( -f $prospect && -x _ ) ) - ? map "$prospect$_", split /;/, $ENV{PATHEXT} || ".COM;.BAT;.EXE" - : ( $prospect ); - - for my $found ( @prospects ) { - if ( -f $found && -x _ ) { - $cmd_cache{$cmd_name} = $found; - last LOOP; - } - } - } - - if ( exists $cmd_cache{$cmd_name} ) { - _debug "'", $cmd_name, "' added to cache: '", $cmd_cache{$cmd_name}, "'" - if _debugging_details; - return $cmd_cache{$cmd_name}; - } - - croak "Command '$cmd_name' not found in " . join( ", ", @searched_in ); -} - - -sub _empty($) { ! ( defined $_[0] && length $_[0] ) } - -## 'safe' versions of otherwise fun things to do. See also IPC::Run::Win32Helper. -sub _close { - confess 'undef' unless defined $_[0]; - my $fd = $_[0] =~ /^\d+$/ ? $_[0] : fileno $_[0]; - my $r = POSIX::close $fd; - $r = $r ? '' : " ERROR $!"; - delete $fds{$fd}; - _debug "close( $fd ) = " . ( $r || 0 ) if _debugging_details; -} - -sub _dup { - confess 'undef' unless defined $_[0]; - my $r = POSIX::dup( $_[0] ); - croak "$!: dup( $_[0] )" unless defined $r; - $r = 0 if $r eq '0 but true'; - _debug "dup( $_[0] ) = $r" if _debugging_details; - $fds{$r} = 1; - return $r; -} - - -sub _dup2_rudely { - confess 'undef' unless defined $_[0] && defined $_[1]; - my $r = POSIX::dup2( $_[0], $_[1] ); - croak "$!: dup2( $_[0], $_[1] )" unless defined $r; - $r = 0 if $r eq '0 but true'; - _debug "dup2( $_[0], $_[1] ) = $r" if _debugging_details; - $fds{$r} = 1; - return $r; -} - -sub _exec { - confess 'undef passed' if grep !defined, @_; -# exec @_ or croak "$!: exec( " . join( ', ', @_ ) . " )"; - _debug 'exec()ing ', join " ", map "'$_'", @_ if _debugging_details; - -# { -## Commented out since we don't call this on Win32. -# # This works around the bug where 5.6.1 complains -# # "Can't exec ...: No error" after an exec on NT, where -# # exec() is simulated and actually returns in Perl's C -# # code, though Perl's &exec does not... -# no warnings "exec"; -# -# # Just in case the no warnings workaround -# # stops being a workaround, we don't want -# # old values of $! causing spurious strerr() -# # messages to appear in the "Can't exec" message -# undef $!; - exec { $_[0] } @_; -# } -# croak "$!: exec( " . join( ', ', map "'$_'", @_ ) . " )"; - ## Fall through so $! can be reported to parent. -} - - -sub _sysopen { - confess 'undef' unless defined $_[0] && defined $_[1]; -_debug sprintf( "O_RDONLY=0x%02x ", O_RDONLY ), -sprintf( "O_WRONLY=0x%02x ", O_WRONLY ), -sprintf( "O_RDWR=0x%02x ", O_RDWR ), -sprintf( "O_TRUNC=0x%02x ", O_TRUNC), -sprintf( "O_CREAT=0x%02x ", O_CREAT), -sprintf( "O_APPEND=0x%02x ", O_APPEND), -if _debugging_details; - my $r = POSIX::open( $_[0], $_[1], 0644 ); - croak "$!: open( $_[0], ", sprintf( "0x%03x", $_[1] ), " )" unless defined $r; - _debug "open( $_[0], ", sprintf( "0x%03x", $_[1] ), " ) = $r" - if _debugging_data; - $fds{$r} = 1; - return $r; -} - -sub _pipe { - ## Normal, blocking write for pipes that we read and the child writes, - ## since most children expect writes to stdout to block rather than - ## do a partial write. - my ( $r, $w ) = POSIX::pipe; - croak "$!: pipe()" unless defined $r; - _debug "pipe() = ( $r, $w ) " if _debugging_details; - $fds{$r} = $fds{$w} = 1; - return ( $r, $w ); -} - -sub _pipe_nb { - ## For pipes that we write, unblock the write side, so we can fill a buffer - ## and continue to select(). - ## Contributed by Borislav Deianov , with minor - ## bugfix on fcntl result by me. - local ( *R, *W ); - my $f = pipe( R, W ); - croak "$!: pipe()" unless defined $f; - my ( $r, $w ) = ( fileno R, fileno W ); - _debug "pipe_nb pipe() = ( $r, $w )" if _debugging_details; - unless ( Win32_MODE ) { - ## POSIX::fcntl doesn't take fd numbers, so gotta use Perl's and - ## then _dup the originals (which get closed on leaving this block) - my $fres = fcntl( W, &F_SETFL, O_WRONLY | O_NONBLOCK ); - croak "$!: fcntl( $w, F_SETFL, O_NONBLOCK )" unless $fres; - _debug "fcntl( $w, F_SETFL, O_NONBLOCK )" if _debugging_details; - } - ( $r, $w ) = ( _dup( $r ), _dup( $w ) ); - _debug "pipe_nb() = ( $r, $w )" if _debugging_details; - return ( $r, $w ); -} - -sub _pty { - require IO::Pty; - my $pty = IO::Pty->new(); - croak "$!: pty ()" unless $pty; - $pty->autoflush(); - $pty->blocking( 0 ) or croak "$!: pty->blocking ( 0 )"; - _debug "pty() = ( ", $pty->fileno, ", ", $pty->slave->fileno, " )" - if _debugging_details; - $fds{$pty->fileno} = $fds{$pty->slave->fileno} = 1; - return $pty; -} - - -sub _read { - confess 'undef' unless defined $_[0]; - my $s = ''; - my $r = POSIX::read( $_[0], $s, 10_000 ); - croak "$!: read( $_[0] )" if not($r) and $! != POSIX::EINTR; - $r ||= 0; - _debug "read( $_[0] ) = $r chars '$s'" if _debugging_data; - return $s; -} - - -## A METHOD, not a function. -sub _spawn { - my IPC::Run $self = shift; - my ( $kid ) = @_; - - _debug "opening sync pipe ", $kid->{PID} if _debugging_details; - my $sync_reader_fd; - ( $sync_reader_fd, $self->{SYNC_WRITER_FD} ) = _pipe; - $kid->{PID} = fork(); - croak "$! during fork" unless defined $kid->{PID}; - - unless ( $kid->{PID} ) { - ## _do_kid_and_exit closes sync_reader_fd since it closes all unwanted and - ## unloved fds. - $self->_do_kid_and_exit( $kid ); - } - _debug "fork() = ", $kid->{PID} if _debugging_details; - - ## Wait for kid to get to it's exec() and see if it fails. - _close $self->{SYNC_WRITER_FD}; - my $sync_pulse = _read $sync_reader_fd; - _close $sync_reader_fd; - - if ( ! defined $sync_pulse || length $sync_pulse ) { - if ( waitpid( $kid->{PID}, 0 ) >= 0 ) { - $kid->{RESULT} = $?; - } - else { - $kid->{RESULT} = -1; - } - $sync_pulse = - "error reading synchronization pipe for $kid->{NUM}, pid $kid->{PID}" - unless length $sync_pulse; - croak $sync_pulse; - } - return $kid->{PID}; - -## Wait for pty to get set up. This is a hack until we get synchronous -## selects. -if ( keys %{$self->{PTYS}} && $IO::Pty::VERSION < 0.9 ) { -_debug "sleeping to give pty a chance to init, will fix when newer IO::Pty arrives."; -sleep 1; -} -} - - -sub _write { - confess 'undef' unless defined $_[0] && defined $_[1]; - my $r = POSIX::write( $_[0], $_[1], length $_[1] ); - croak "$!: write( $_[0], '$_[1]' )" unless $r; - _debug "write( $_[0], '$_[1]' ) = $r" if _debugging_data; - return $r; -} - -=pod - -=over - -=item run - -Run takes a harness or harness specification and runs it, pumping -all input to the child(ren), closing the input pipes when no more -input is available, collecting all output that arrives, until the -pipes delivering output are closed, then waiting for the children to -exit and reaping their result codes. - -You may think of C as being like - - start( ... )->finish(); - -, though there is one subtle difference: run() does not -set \$input_scalars to '' like finish() does. If an exception is thrown -from run(), all children will be killed off "gently", and then "annihilated" -if they do not go gently (in to that dark night. sorry). - -If any exceptions are thrown, this does a L before propagating -them. - -=cut - -use vars qw( $in_run ); ## No, not Enron;) - -sub run { - local $in_run = 1; ## Allow run()-only optimizations. - my IPC::Run $self = start( @_ ); - my $r = eval { - $self->{clear_ins} = 0; - $self->finish; - }; - if ( $@ ) { - my $x = $@; - $self->kill_kill; - die $x; - } - return $r; -} - -=pod - -=item signal - - ## To send it a specific signal by name ("USR1"): - signal $h, "USR1"; - $h->signal ( "USR1" ); - -If $signal is provided and defined, sends a signal to all child processes. Try -not to send numeric signals, use C<"KILL"> instead of C<9>, for instance. -Numeric signals aren't portable. - -Throws an exception if $signal is undef. - -This will I clean up the harness, C it if you kill it. - -Normally TERM kills a process gracefully (this is what the command line utility -C does by default), INT is sent by one of the keys C<^C>, C or -CDelE>, and C is used to kill a process and make it coredump. - -The C signal is often used to get a process to "restart", rereading -config files, and C and C for really application-specific things. - -Often, running C (that's a lower case "L") on the command line will -list the signals present on your operating system. - -B: The signal subsystem is not at all portable. We *may* offer -to simulate C and C on some operating systems, submit code -to me if you want this. - -B: Up to and including perl v5.6.1, doing almost anything in a -signal handler could be dangerous. The most safe code avoids all -mallocs and system calls, usually by preallocating a flag before -entering the signal handler, altering the flag's value in the -handler, and responding to the changed value in the main system: - - my $got_usr1 = 0; - sub usr1_handler { ++$got_signal } - - $SIG{USR1} = \&usr1_handler; - while () { sleep 1; print "GOT IT" while $got_usr1--; } - -Even this approach is perilous if ++ and -- aren't atomic on your system -(I've never heard of this on any modern CPU large enough to run perl). - -=cut - -sub signal { - my IPC::Run $self = shift; - - local $cur_self = $self; - - $self->_kill_kill_kill_pussycat_kill unless @_; - - Carp::cluck "Ignoring extra parameters passed to kill()" if @_ > 1; - - my ( $signal ) = @_; - croak "Undefined signal passed to signal" unless defined $signal; - for ( grep $_->{PID} && ! defined $_->{RESULT}, @{$self->{KIDS}} ) { - _debug "sending $signal to $_->{PID}" - if _debugging; - kill $signal, $_->{PID} - or _debugging && _debug "$! sending $signal to $_->{PID}"; - } - - return; -} - -=pod - -=item kill_kill - - ## To kill off a process: - $h->kill_kill; - kill_kill $h; - - ## To specify the grace period other than 30 seconds: - kill_kill $h, grace => 5; - - ## To send QUIT instead of KILL if a process refuses to die: - kill_kill $h, coup_d_grace => "QUIT"; - -Sends a C, waits for all children to exit for up to 30 seconds, then -sends a C to any that survived the C. - -Will wait for up to 30 more seconds for the OS to successfully C the -processes. - -The 30 seconds may be overridden by setting the C option, this -overrides both timers. - -The harness is then cleaned up. - -The doubled name indicates that this function may kill again and avoids -colliding with the core Perl C function. - -Returns a 1 if the C was sufficient, or a 0 if C was -required. Throws an exception if C did not permit the children -to be reaped. - -B: The grace period is actually up to 1 second longer than that -given. This is because the granularity of C