Algorithm-Diff
view release on metacpan or search on metacpan
#
# Variables:
# before_diff - how much longer file 2 is than file 1 due to all hunks
# until but NOT including this one
# after_diff - difference due to all hunks including this one
my ($class, $piece, $context_items) = @_;
my $block = new Block ($piece); # this modifies $FLD!
my $before_diff = $File_Length_Difference; # BEFORE this hunk
my $after_diff = $before_diff + $block->{"length_diff"};
$File_Length_Difference += $block->{"length_diff"};
# @remove_array and @insert_array hold the items to insert and remove
# Save the start & beginning of each array. If the array doesn't exist
# though (e.g., we're only adding items in this block), then figure
# out the line number based on the line number of the other file and
# the current difference in file lengths
my @remove_array = $block->remove;
my @insert_array = $block->insert;
my ($a1, $a2, $b1, $b2, $start1, $start2, $end1, $end2);
$a1 = @remove_array ? $remove_array[0 ]->{"item_no"} : -1;
$a2 = @remove_array ? $remove_array[-1]->{"item_no"} : -1;
$b1 = @insert_array ? $insert_array[0 ]->{"item_no"} : -1;
$b2 = @insert_array ? $insert_array[-1]->{"item_no"} : -1;
$start1 = $a1 == -1 ? $b1 - $before_diff : $a1;
$end1 = $a2 == -1 ? $b2 - $after_diff : $a2;
# Calculate item number range.
my $range1 = $hunk->unified_range(1);
my $range2 = $hunk->unified_range(2);
print "@@ -$range1 +$range2 @@\n";
# Outlist starts containing the hunk of file 1.
# Removing an item just means putting a '-' in front of it.
# Inserting an item requires getting it from file2 and splicing it in.
# We splice in $num_added items. Remove blocks use $num_added because
# splicing changed the length of outlist.
# We remove $num_removed items. Insert blocks use $num_removed because
# their item numbers---corresponding to positions in file *2*--- don't take
# removed items into account.
my $low = $hunk->{"start1"};
my $hi = $hunk->{"end1"};
my ($num_added, $num_removed) = (0,0);
my @outlist = @$fileref1[$low..$hi];
map {s/^/ /} @outlist; # assume it's just context
foreach my $block (@{$hunk->{"blocks"}}) {
return $range;
}
sub unified_range {
# Generate a range of item numbers to print for unified diff
# Print number where block starts, followed by number of lines in the block
# (don't print number of lines if it's 1)
my ($hunk, $flag) = @_;
my ($start, $end) = ($hunk->{"start$flag"},$hunk->{"end$flag"});
$start++; $end++; # index from 1, not zero
my $length = $end - $start + 1;
my $first = $length < 2 ? $end : $start; # strange, but correct...
my $range = $length== 1 ? $first : "$first,$length";
return $range;
}
} # end Package Hunk
# Package Block. A block is an operation removing, adding, or changing
# a group of items. Basically, this is just a list of changes, where each
# change adds or deletes a single item.
# (Change could be a separate class, but it didn't seem worth it)
{
package Block;
sub new {
# Input is a chunk from &Algorithm::LCS::diff
# Fields in a block:
# length_diff - how much longer file 2 is than file 1 due to this block
# Each change has:
# sign - '+' for insert, '-' for remove
# item_no - number of the item in the file (e.g., line number)
# We don't bother storing the text of the item
#
my ($class,$chunk) = @_;
my @changes = ();
# This just turns each change into a hash.
foreach my $item (@$chunk) {
my ($sign, $item_no, $text) = @$item;
my $hashref = {"sign" => $sign, "item_no" => $item_no};
push @changes, $hashref;
}
my $block = { "changes" => \@changes };
bless $block, $class;
$block->{"length_diff"} = $block->insert - $block->remove;
return $block;
}
# LOW LEVEL FUNCTIONS
sub op {
# what kind of block is this?
my $block = shift;
my $insert = $block->insert;
my $remove = $block->remove;
#
# Variables:
# before_diff - how much longer file 2 is than file 1 due to all hunks
# until but NOT including this one
# after_diff - difference due to all hunks including this one
my ($class, $piece, $context_items) = @_;
my $block = new Block ($piece); # this modifies $FLD!
my $before_diff = $File_Length_Difference; # BEFORE this hunk
my $after_diff = $before_diff + $block->{"length_diff"};
$File_Length_Difference += $block->{"length_diff"};
# @remove_array and @insert_array hold the items to insert and remove
# Save the start & beginning of each array. If the array doesn't exist
# though (e.g., we're only adding items in this block), then figure
# out the line number based on the line number of the other file and
# the current difference in file lenghts
my @remove_array = $block->remove;
my @insert_array = $block->insert;
my ($a1, $a2, $b1, $b2, $start1, $start2, $end1, $end2);
$a1 = @remove_array ? $remove_array[0 ]->{"item_no"} : -1;
# Calculate item number range.
my $range1 = $hunk->unified_range(1);
my $range2 = $hunk->unified_range(2);
print "@@ -$range1 +$range2 @@\n";
# Outlist starts containing the hunk of file 1.
# Removing an item just means putting a '-' in front of it.
# Inserting an item requires getting it from file2 and splicing it in.
# We splice in $num_added items. Remove blocks use $num_added because
# splicing changed the length of outlist.
# We remove $num_removed items. Insert blocks use $num_removed because
# their item numbers---corresponding to positions in file *2*--- don't take
# removed items into account.
my $low = $hunk->{"start1"};
my $hi = $hunk->{"end1"};
my ($num_added, $num_removed) = (0,0);
my @outlist = @$fileref1[$low..$hi];
map {s/^/ /} @outlist; # assume it's just context
foreach my $block (@{$hunk->{"blocks"}}) {
return $range;
}
sub unified_range {
# Generate a range of item numbers to print for unified diff
# Print number where block starts, followed by number of lines in the block
# (don't print number of lines if it's 1)
my ($hunk, $flag) = @_;
my ($start, $end) = ($hunk->{"start$flag"},$hunk->{"end$flag"});
$start++; $end++; # index from 1, not zero
my $length = $end - $start + 1;
my $first = $length < 2 ? $end : $start; # strange, but correct...
my $range = $length== 1 ? $first : "$first,$length";
return $range;
}
} # end Package Hunk
########
# Package Block. A block is an operation removing, adding, or changing
# a group of items. Basically, this is just a list of changes, where each
# change adds or deletes a single item.
# (Change could be a separate class, but it didn't seem worth it)
{
package Block;
sub new {
# Input is a chunk from &Algorithm::LCS::diff
# Fields in a block:
# length_diff - how much longer file 2 is than file 1 due to this block
# Each change has:
# sign - '+' for insert, '-' for remove
# item_no - number of the item in the file (e.g., line number)
# We don't bother storing the text of the item
#
my ($class,$chunk) = @_;
my @changes = ();
# This just turns each change into a hash.
foreach my $item (@$chunk) {
my ($sign, $item_no, $text) = @$item;
my $hashref = {"sign" => $sign, "item_no" => $item_no};
push @changes, $hashref;
}
my $block = { "changes" => \@changes };
bless $block, $class;
$block->{"length_diff"} = $block->insert - $block->remove;
return $block;
}
# LOW LEVEL FUNCTIONS
sub op {
# what kind of block is this?
my $block = shift;
my $insert = $block->insert;
my $remove = $block->remove;
lib/Algorithm/Diff.pm view on Meta::CPAN
use integer; # see below in _replaceNextLargerWith() for mod to make
# if you don't use this
use vars qw( $VERSION @EXPORT_OK );
$VERSION = 1.19_03;
# ^ ^^ ^^-- Incremented at will
# | \+----- Incremented for non-trivial changes to features
# \-------- Incremented for fundamental changes
require Exporter;
*import = \&Exporter::import;
@EXPORT_OK = qw(
prepare LCS LCSidx LCS_length
diff sdiff compact_diff
traverse_sequences traverse_balanced
);
# McIlroy-Hunt diff algorithm
# Adapted from the Smalltalk code of Mario I. Wolczko, <mario@wolczko.com>
# by Ned Konz, perl@bike-nomad.com
# Updates by Tye McQueen, http://perlmonks.org/?node=tye
# Create a hash that maps each element of $aCollection to the set of
lib/Algorithm/Diff.pm view on Meta::CPAN
for ( $i = 0 ; $i <= $#$matchVector ; $i++ )
{
if ( defined( $matchVector->[$i] ) )
{
push ( @retval, $a->[$i] );
}
}
return wantarray ? @retval : \@retval;
}
sub LCS_length
{
my $a = shift; # array ref
my $b = shift; # array ref or hash ref
return _longestCommonSubsequence( $a, $b, 1, @_ );
}
sub LCSidx
{
my $a= shift @_;
my $b= shift @_;
lib/Algorithm/Diff.pm view on Meta::CPAN
for my $arg ( @_ ) {
for my $word ( split ' ', $arg ) {
my $meth;
if( $word !~ /^(-?\d+)?([a-zA-Z]+)([12])?$/
|| not $meth= $getName{ lc $2 }
) {
Die( $Root, ", Get: Invalid request ($word)" );
}
my( $base, $name, $seq )= ( $1, $2, $3 );
push @value, scalar(
4 == length($name)
? $meth->( $me )
: $meth->( $me, $seq, $base )
);
}
}
if( wantarray ) {
return @value;
} elsif( 1 == @value ) {
return $value[0];
}
lib/Algorithm/Diff.pm view on Meta::CPAN
}
print "< $_" for $diff->Items(1);
print $sep;
print "> $_" for $diff->Items(2);
}
# Alternate interfaces:
use Algorithm::Diff qw(
LCS LCS_length LCSidx
diff sdiff compact_diff
traverse_sequences traverse_balanced );
@lcs = LCS( \@seq1, \@seq2 );
$lcsref = LCS( \@seq1, \@seq2 );
$count = LCS_length( \@seq1, \@seq2 );
( $seq1idxref, $seq2idxref ) = LCSidx( \@seq1, \@seq2 );
# Complicated interfaces:
@diffs = diff( \@seq1, \@seq2 );
@sdiffs = sdiff( \@seq1, \@seq2 );
lib/Algorithm/Diff.pm view on Meta::CPAN
=head1 USAGE
(See also the README file and several example
scripts include with this module.)
This module now provides an object-oriented interface that uses less
memory and is easier to use than most of the previous procedural
interfaces. It also still provides several exportable functions. We'll
deal with these in ascending order of difficulty: C<LCS>,
C<LCS_length>, C<LCSidx>, OO interface, C<prepare>, C<diff>, C<sdiff>,
C<traverse_sequences>, and C<traverse_balanced>.
=head2 C<LCS>
Given references to two lists of items, LCS returns an array containing
their longest common subsequence. In scalar context, it returns a
reference to such a list.
@lcs = LCS( \@seq1, \@seq2 );
$lcsref = LCS( \@seq1, \@seq2 );
lib/Algorithm/Diff.pm view on Meta::CPAN
C<LCS> may be passed an optional third parameter; this is a CODE
reference to a key generation function. See L</KEY GENERATION
FUNCTIONS>.
@lcs = LCS( \@seq1, \@seq2, \&keyGen, @args );
$lcsref = LCS( \@seq1, \@seq2, \&keyGen, @args );
Additional parameters, if any, will be passed to the key generation
routine.
=head2 C<LCS_length>
This is just like C<LCS> except it only returns the length of the
longest common subsequence. This provides a performance gain of about
9% compared to C<LCS>.
=head2 C<LCSidx>
Like C<LCS> except it returns references to two arrays. The first array
contains the indices into @seq1 where the LCS items are located. The
second array contains the indices into @seq2 where the LCS items are located.
Therefore, the following three lists will contain the same values:
lib/Algorithm/Diff.pm view on Meta::CPAN
Using C<Get> in a scalar context when you've passed in more than one
name is a fatal error (C<die> is called).
=back
=head2 C<prepare>
Given a reference to a list of items, C<prepare> returns a reference
to a hash which can be used when comparing this sequence to other
sequences with C<LCS> or C<LCS_length>.
$prep = prepare( \@seq1 );
for $i ( 0 .. 10_000 )
{
@lcs = LCS( $prep, $seq[$i] );
# do something useful with @lcs
}
C<prepare> may be passed an optional third parameter; this is a CODE
reference to a key generation function. See L</KEY GENERATION
[ '-', 9, 'p' ],
[ '+', 10, 's' ],
[ '+', 11, 't' ],
]
];
# Result of LCS must be as long as @a
my @result = Algorithm::Diff::_longestCommonSubsequence( \@a, \@b );
ok( scalar(grep { defined } @result),
scalar(@correctResult),
"length of _longestCommonSubsequence" );
# result has b[] line#s keyed by a[] line#
# print "result =", join(" ", map { defined($_) ? $_ : 'undef' } @result), "\n";
my @aresult = map { defined( $result[$_] ) ? $a[$_] : () } 0 .. $#result;
my @bresult =
map { defined( $result[$_] ) ? $b[ $result[$_] ] : () } 0 .. $#result;
ok( "@aresult", $correctResult, "A results" );
ok( "@bresult", $correctResult, "B results" );
( run in 0.371 second using v1.01-cache-2.11-cpan-65fba6d93b7 )