Algorithm-Diff

 view release on metacpan or  search on metacpan

cdiff.pl  view on Meta::CPAN

#
# Variables:
# before_diff - how much longer file 2 is than file 1 due to all hunks
#               until but NOT including this one
# after_diff  - difference due to all hunks including this one
    my ($class, $piece, $context_items) = @_;

    my $block = new Block ($piece); # this modifies $FLD!

    my $before_diff = $File_Length_Difference; # BEFORE this hunk
    my $after_diff = $before_diff + $block->{"length_diff"};
    $File_Length_Difference += $block->{"length_diff"};

    # @remove_array and @insert_array hold the items to insert and remove
    # Save the start & beginning of each array. If the array doesn't exist
    # though (e.g., we're only adding items in this block), then figure
    # out the line number based on the line number of the other file and
    # the current difference in file lengths
    my @remove_array = $block->remove;
    my @insert_array = $block->insert;
    my ($a1, $a2, $b1, $b2, $start1, $start2, $end1, $end2);
    $a1 = @remove_array ? $remove_array[0 ]->{"item_no"} : -1;
    $a2 = @remove_array ? $remove_array[-1]->{"item_no"} : -1;
    $b1 = @insert_array ? $insert_array[0 ]->{"item_no"} : -1;
    $b2 = @insert_array ? $insert_array[-1]->{"item_no"} : -1;

    $start1 = $a1 == -1 ? $b1 - $before_diff : $a1;
    $end1   = $a2 == -1 ? $b2 - $after_diff  : $a2;

cdiff.pl  view on Meta::CPAN


    # Calculate item number range.
    my $range1 = $hunk->unified_range(1);
    my $range2 = $hunk->unified_range(2);
    print "@@ -$range1 +$range2 @@\n";

    # Outlist starts containing the hunk of file 1.
    # Removing an item just means putting a '-' in front of it.
    # Inserting an item requires getting it from file2 and splicing it in.
    #    We splice in $num_added items. Remove blocks use $num_added because
    # splicing changed the length of outlist.
    #    We remove $num_removed items. Insert blocks use $num_removed because
    # their item numbers---corresponding to positions in file *2*--- don't take
    # removed items into account.
    my $low = $hunk->{"start1"};
    my $hi = $hunk->{"end1"};
    my ($num_added, $num_removed) = (0,0);
    my @outlist = @$fileref1[$low..$hi];
    map {s/^/ /} @outlist; # assume it's just context

    foreach my $block (@{$hunk->{"blocks"}}) {

cdiff.pl  view on Meta::CPAN

    return $range;
}

sub unified_range {
# Generate a range of item numbers to print for unified diff
# Print number where block starts, followed by number of lines in the block
# (don't print number of lines if it's 1)
    my ($hunk, $flag) = @_;
    my ($start, $end) = ($hunk->{"start$flag"},$hunk->{"end$flag"});
    $start++; $end++;  # index from 1, not zero
    my $length = $end - $start + 1;
    my $first = $length < 2 ? $end : $start; # strange, but correct...
    my $range = $length== 1 ? $first : "$first,$length";
    return $range;
}
} # end Package Hunk

# Package Block. A block is an operation removing, adding, or changing
# a group of items. Basically, this is just a list of changes, where each
# change adds or deletes a single item.
# (Change could be a separate class, but it didn't seem worth it)
{
package Block;
sub new {
# Input is a chunk from &Algorithm::LCS::diff
# Fields in a block:
# length_diff - how much longer file 2 is than file 1 due to this block
# Each change has:
# sign        - '+' for insert, '-' for remove
# item_no     - number of the item in the file (e.g., line number)
# We don't bother storing the text of the item
#
    my ($class,$chunk) = @_;
    my @changes = ();

# This just turns each change into a hash.
    foreach my $item (@$chunk) {
	my ($sign, $item_no, $text) = @$item;
	my $hashref = {"sign" => $sign, "item_no" => $item_no};
	push @changes, $hashref;
    }

    my $block = { "changes" => \@changes };
    bless $block, $class;

    $block->{"length_diff"} = $block->insert - $block->remove;
    return $block;
}


# LOW LEVEL FUNCTIONS
sub op {
# what kind of block is this?
    my $block = shift;
    my $insert = $block->insert;
    my $remove = $block->remove;

diffnew.pl  view on Meta::CPAN

#
# Variables:
# before_diff - how much longer file 2 is than file 1 due to all hunks
#               until but NOT including this one
# after_diff  - difference due to all hunks including this one
    my ($class, $piece, $context_items) = @_;

    my $block = new Block ($piece); # this modifies $FLD!

    my $before_diff = $File_Length_Difference; # BEFORE this hunk
    my $after_diff = $before_diff + $block->{"length_diff"};
    $File_Length_Difference += $block->{"length_diff"};

    # @remove_array and @insert_array hold the items to insert and remove
    # Save the start & beginning of each array. If the array doesn't exist
    # though (e.g., we're only adding items in this block), then figure
    # out the line number based on the line number of the other file and
    # the current difference in file lenghts
    my @remove_array = $block->remove;
    my @insert_array = $block->insert;
    my ($a1, $a2, $b1, $b2, $start1, $start2, $end1, $end2);
    $a1 = @remove_array ? $remove_array[0 ]->{"item_no"} : -1;

diffnew.pl  view on Meta::CPAN


    # Calculate item number range.
    my $range1 = $hunk->unified_range(1);
    my $range2 = $hunk->unified_range(2);
    print "@@ -$range1 +$range2 @@\n";

    # Outlist starts containing the hunk of file 1.
    # Removing an item just means putting a '-' in front of it.
    # Inserting an item requires getting it from file2 and splicing it in.
    #    We splice in $num_added items. Remove blocks use $num_added because
    # splicing changed the length of outlist.
    #    We remove $num_removed items. Insert blocks use $num_removed because
    # their item numbers---corresponding to positions in file *2*--- don't take
    # removed items into account.
    my $low = $hunk->{"start1"};
    my $hi = $hunk->{"end1"};
    my ($num_added, $num_removed) = (0,0);
    my @outlist = @$fileref1[$low..$hi];
    map {s/^/ /} @outlist; # assume it's just context

    foreach my $block (@{$hunk->{"blocks"}}) {

diffnew.pl  view on Meta::CPAN

    return $range;
}

sub unified_range {
# Generate a range of item numbers to print for unified diff
# Print number where block starts, followed by number of lines in the block
# (don't print number of lines if it's 1)
    my ($hunk, $flag) = @_;
    my ($start, $end) = ($hunk->{"start$flag"},$hunk->{"end$flag"});
    $start++; $end++;  # index from 1, not zero
    my $length = $end - $start + 1;
    my $first = $length < 2 ? $end : $start; # strange, but correct...
    my $range = $length== 1 ? $first : "$first,$length";
    return $range;
}
} # end Package Hunk

########
# Package Block. A block is an operation removing, adding, or changing
# a group of items. Basically, this is just a list of changes, where each
# change adds or deletes a single item.
# (Change could be a separate class, but it didn't seem worth it)
{
package Block;
sub new {
# Input is a chunk from &Algorithm::LCS::diff
# Fields in a block:
# length_diff - how much longer file 2 is than file 1 due to this block
# Each change has:
# sign        - '+' for insert, '-' for remove
# item_no     - number of the item in the file (e.g., line number)
# We don't bother storing the text of the item
#
    my ($class,$chunk) = @_;
    my @changes = ();

# This just turns each change into a hash.
    foreach my $item (@$chunk) {
	my ($sign, $item_no, $text) = @$item;
	my $hashref = {"sign" => $sign, "item_no" => $item_no};
	push @changes, $hashref;
    }

    my $block = { "changes" => \@changes };
    bless $block, $class;

    $block->{"length_diff"} = $block->insert - $block->remove;
    return $block;
}


# LOW LEVEL FUNCTIONS
sub op {
# what kind of block is this?
    my $block = shift;
    my $insert = $block->insert;
    my $remove = $block->remove;

lib/Algorithm/Diff.pm  view on Meta::CPAN

use integer;    # see below in _replaceNextLargerWith() for mod to make
                # if you don't use this
use vars qw( $VERSION @EXPORT_OK );
$VERSION = 1.19_03;
#          ^ ^^ ^^-- Incremented at will
#          | \+----- Incremented for non-trivial changes to features
#          \-------- Incremented for fundamental changes
require Exporter;
*import    = \&Exporter::import;
@EXPORT_OK = qw(
    prepare LCS LCSidx LCS_length
    diff sdiff compact_diff
    traverse_sequences traverse_balanced
);

# McIlroy-Hunt diff algorithm
# Adapted from the Smalltalk code of Mario I. Wolczko, <mario@wolczko.com>
# by Ned Konz, perl@bike-nomad.com
# Updates by Tye McQueen, http://perlmonks.org/?node=tye

# Create a hash that maps each element of $aCollection to the set of

lib/Algorithm/Diff.pm  view on Meta::CPAN

    for ( $i = 0 ; $i <= $#$matchVector ; $i++ )
    {
        if ( defined( $matchVector->[$i] ) )
        {
            push ( @retval, $a->[$i] );
        }
    }
    return wantarray ? @retval : \@retval;
}

sub LCS_length
{
    my $a = shift;                          # array ref
    my $b = shift;                          # array ref or hash ref
    return _longestCommonSubsequence( $a, $b, 1, @_ );
}

sub LCSidx
{
    my $a= shift @_;
    my $b= shift @_;

lib/Algorithm/Diff.pm  view on Meta::CPAN

    for my $arg (  @_  ) {
        for my $word (  split ' ', $arg  ) {
            my $meth;
            if(     $word !~ /^(-?\d+)?([a-zA-Z]+)([12])?$/
                ||  not  $meth= $getName{ lc $2 }
            ) {
                Die( $Root, ", Get: Invalid request ($word)" );
            }
            my( $base, $name, $seq )= ( $1, $2, $3 );
            push @value, scalar(
                4 == length($name)
                    ? $meth->( $me )
                    : $meth->( $me, $seq, $base )
            );
        }
    }
    if(  wantarray  ) {
        return @value;
    } elsif(  1 == @value  ) {
        return $value[0];
    }

lib/Algorithm/Diff.pm  view on Meta::CPAN

        }
        print "< $_"   for  $diff->Items(1);
        print $sep;
        print "> $_"   for  $diff->Items(2);
    }


    # Alternate interfaces:

    use Algorithm::Diff qw(
        LCS LCS_length LCSidx
        diff sdiff compact_diff
        traverse_sequences traverse_balanced );

    @lcs    = LCS( \@seq1, \@seq2 );
    $lcsref = LCS( \@seq1, \@seq2 );
    $count  = LCS_length( \@seq1, \@seq2 );

    ( $seq1idxref, $seq2idxref ) = LCSidx( \@seq1, \@seq2 );


    # Complicated interfaces:

    @diffs  = diff( \@seq1, \@seq2 );

    @sdiffs = sdiff( \@seq1, \@seq2 );

lib/Algorithm/Diff.pm  view on Meta::CPAN


=head1 USAGE

(See also the README file and several example
scripts include with this module.)

This module now provides an object-oriented interface that uses less
memory and is easier to use than most of the previous procedural
interfaces.  It also still provides several exportable functions.  We'll
deal with these in ascending order of difficulty:  C<LCS>,
C<LCS_length>, C<LCSidx>, OO interface, C<prepare>, C<diff>, C<sdiff>,
C<traverse_sequences>, and C<traverse_balanced>.

=head2 C<LCS>

Given references to two lists of items, LCS returns an array containing
their longest common subsequence.  In scalar context, it returns a
reference to such a list.

    @lcs    = LCS( \@seq1, \@seq2 );
    $lcsref = LCS( \@seq1, \@seq2 );

lib/Algorithm/Diff.pm  view on Meta::CPAN

C<LCS> may be passed an optional third parameter; this is a CODE
reference to a key generation function.  See L</KEY GENERATION
FUNCTIONS>.

    @lcs    = LCS( \@seq1, \@seq2, \&keyGen, @args );
    $lcsref = LCS( \@seq1, \@seq2, \&keyGen, @args );

Additional parameters, if any, will be passed to the key generation
routine.

=head2 C<LCS_length>

This is just like C<LCS> except it only returns the length of the
longest common subsequence.  This provides a performance gain of about
9% compared to C<LCS>.

=head2 C<LCSidx>

Like C<LCS> except it returns references to two arrays.  The first array
contains the indices into @seq1 where the LCS items are located.  The
second array contains the indices into @seq2 where the LCS items are located.

Therefore, the following three lists will contain the same values:

lib/Algorithm/Diff.pm  view on Meta::CPAN


Using C<Get> in a scalar context when you've passed in more than one
name is a fatal error (C<die> is called).

=back

=head2 C<prepare>

Given a reference to a list of items, C<prepare> returns a reference
to a hash which can be used when comparing this sequence to other
sequences with C<LCS> or C<LCS_length>.

    $prep = prepare( \@seq1 );
    for $i ( 0 .. 10_000 )
    {
        @lcs = LCS( $prep, $seq[$i] );
        # do something useful with @lcs
    }

C<prepare> may be passed an optional third parameter; this is a CODE
reference to a key generation function.  See L</KEY GENERATION

t/base.t  view on Meta::CPAN

		[ '-', 9,  'p' ],
		[ '+', 10, 's' ],
		[ '+', 11, 't' ],
	]
];

# Result of LCS must be as long as @a
my @result = Algorithm::Diff::_longestCommonSubsequence( \@a, \@b );
ok( scalar(grep { defined } @result),
	scalar(@correctResult),
	"length of _longestCommonSubsequence" );

# result has b[] line#s keyed by a[] line#
# print "result =", join(" ", map { defined($_) ? $_ : 'undef' } @result), "\n";

my @aresult = map { defined( $result[$_] ) ? $a[$_] : () } 0 .. $#result;
my @bresult =
  map { defined( $result[$_] ) ? $b[ $result[$_] ] : () } 0 .. $#result;

ok( "@aresult", $correctResult, "A results" );
ok( "@bresult", $correctResult, "B results" );



( run in 0.371 second using v1.01-cache-2.11-cpan-65fba6d93b7 )