start results from the CPAN

start
Algorithm-Diff-Apply
view release on metacpan or search on metacpan
lib/Algorithm/Diff/Apply.pm view on Meta::CPAN
                {
                        my ($op, $pos, $data) = @$change;
                        if ($op eq "-")
                        {
                                splice(@ary, $pos+$delta, 1);
                                --$delta;
			}
                        elsif ($op eq "+")
                        {
                                splice(@ary, $pos, 0, $data);
                                ++$delta;
                        }
                        else
                        {
                                die "unknown operation: \"$op\"\n";
                        }
                }
        }
        return wantarray ? @ary : \@ary;
}


# Apply one or more labelled diff sequences to a target array.
# Somewhat more complex; needs prepasses and consideration of
# conflicts.

sub apply_diffs
{
	# Collect args:
	my @ary = @{shift(@_)};
	my %opt;
	%opt = %{shift(@_)} if ref($_[0]) && (ref($_[0]) eq 'HASH');
	my %diffset;
	while (my $tag = shift)
	{
		ref($tag) and croak("Tagnames must be scalar");
		my $diff = shift;
		ref($diff) eq 'ARRAY'
			or croak("Diff sequences must be references of "
				 . "type \"ARRAY\"");
		$diffset{$tag} = __homogenise_diff($diff, %opt);
	}

	# Trivial case
	if (scalar keys %diffset < 1)
	{
		return wantarray ? @ary : \@ary;
	}

	my @alts = __optimise_conflicts(diffset => \%diffset,
	                                opts => \%opt);
	__apply_alternatives(target => \@ary,
	                     alts => \@alts,
			     opts => \%opt);
	return wantarray ? @ary : \@ary;
}


# Converts all the hunks in an Algorithm::Diff-style diff to a
# normalised form in which all hunks are a) still internally
# contiguous, and b) have start indices which refer to items in the
# original array, before any diffs are applied. Normally, hunks
# consisting of only inserts don't meet criterion b).
#
# Allso attaches hash data if the hashing function is defined.

sub __homogenise_diff
{
	my ($orig_diff, %opt) = @_;
	my @hdiff = ();
	my $delta = 0;   # difference between orig and resultant
	foreach my $orig_hunk (@$orig_diff)
	{
		my ($first_op, $start) = @{$orig_hunk->[0]} [0, 1];
		$start -= $delta  if $first_op eq '+';
		my $hhunk = {
			start => $start,
			changes => [],
		};
		foreach my $change (@$orig_hunk)
		{
			my ($op, $data);
			($op, undef, $data) = @$change;
			$delta += (($op eq '+') ? 1 : -1);
			my $hash = (exists($opt{key_generator})
				    ? $opt{key_generator}->($data)
				    : undef);
			push @{$hhunk->{changes}}, [$op, $data, $hash];
		}
		push @hdiff, $hhunk;
	}
	return \@hdiff;
}


# Calls the specified optimisation callbacks, returning a list of discrete
# alternative blocks in a format that __apply_alternatives() can handle.

sub __optimise_conflicts
{
	my %args = @_;
	my %diffset = %{$args{diffset} || confess "\"diffset\" not specified"};
	my %opt = %{$args{opts} || confess "\"opts\" not specified"};

	my @optim;
	if ($opt{optimisers} or $opt{optimizers})
	{
		push @optim, @{$opt{optimisers} || []};
		push @optim, @{$opt{optimizers} || []};
	}
	else
	{
		@optim = &DEFAULT_OPTIMISERS;
	}
	my @alts;
	while (my ($u_min, $u_max, %u_alt)
	       = __shift_next_alternatives(\%diffset))
	{
		# Non-conflict case:
		if (scalar(keys(%u_alt)) <= 1)
		{
			push(@alts, [$u_min, $u_max, %u_alt]);
			next;
		}

		# Conflict case: pass each optimiser over it once.
		foreach my $o (@optim)
		{
			%u_alt = $o->("conflict_block" => \%u_alt);
			%u_alt = __diffset_discard_empties(%u_alt);
		}
		#__dump_diffset(%u_alt);
		
		# An optimiser could turn one block of conflicts into
		# two or more, so re-detect any remaining conflicts
		# within the block.

		while (my ($o_min, $o_max, %o_alt)
		       = __shift_next_alternatives(\%u_alt))
		{
			push(@alts, [$o_min, $o_max, %o_alt]);
		}
	}
	return @alts;
}


# Extracts the array ($min, $max, %alts) from %$diffset where $min and
# $max describe the range of lines affected by all the diff hunks in
# %alts, and %alts is a diffset containing at least one alternative.
# Returns an empty array if there are no diff hunks left.

sub __shift_next_alternatives
{
	my $diffset = shift;
	my $id = __next_hunk_id($diffset);
	defined($id) or return ();
	my ($cflict_max, $cflict_min);
	my %cflict;
	my $hunk = shift @{$diffset->{$id}};
	$cflict{$id} = [ $hunk ];

	# Seed range with $hunk:
	my @ch = @{$hunk->{changes}};
	my $span = grep { $_->[0] eq '-' } @ch;
	$cflict_min = $hunk->{start};
	$cflict_max = $cflict_min + $span;

	# Detect conflicting hunks, and add those in too.
	my %ignore;
	while (my $tmp_id = __next_hunk_id($diffset, %ignore))
	{
		my $tmp_hunk = $diffset->{$tmp_id}->[0];
		@ch = @{$tmp_hunk->{changes}};
		my $tmp_span = grep { $_->[0] eq '-' } @ch;
		my $tmp_max = $tmp_hunk->{start} + $tmp_span;
		if ($tmp_hunk->{start} <= $cflict_max)
		{
			exists $cflict{$tmp_id} or $cflict{$tmp_id} = [];
			shift @{$diffset->{$tmp_id}};
			push @{$cflict{$tmp_id}}, $tmp_hunk;
			$cflict_max = $tmp_max if $tmp_max > $cflict_max;
		}
		else
		{
			$ignore{$tmp_id} = 1;
		}
	}

	return ($cflict_min, $cflict_max, %cflict);
}


# Returns the ID of the hunk in %$diffset whose ->{start} is lowest,
# or undef. %ignore{SOMEID} can be set to a true value to cause a
# given sequence to be skipped over.

sub __next_hunk_id
{
	my ($diffset, %ignore) = @_;
	my ($lo_id, $lo_start);
	foreach my $id (keys %$diffset)
	{
		next if $ignore{$id};
		my $diff = $diffset->{$id};
		next if $#$diff < 0;
		my $start = $diff->[0]->{start};
		if ((! defined($lo_start))
		    || $start < $lo_start)
		{
			$lo_id = $id;
			$lo_start = $start;
		}
	}
	return $lo_id;
}


sub __diffset_discard_empties
{
	my %dset = @_;
	return map {
		($#{$dset{$_}} < 0) ? () : ($_ => $dset{$_});
	} keys %dset;
}


sub __apply_alternatives
{
	my %args = @_;
	my %opt = %{$args{opts} || confess "\"opts\" not specified"};
	my $ary = $args{target} || confess "\"target\" not specified";
	my @alts = @{$args{alts} || confess "\"alts\" not specified"};
	my $resolver = $opt{resolver} || \&mark_conflicts;

	my $delta = 0;
	while (my $alt = shift @alts)
	{
		my ($min, $max, %alts) = @$alt;
		my @orig = @{$ary}[$min + $delta .. $max + $delta - 1];
		my @replacement;

		my %alt_txts;
		foreach my $id (sort keys %alts)
		{
			my @tmp = @orig;
			my $tmp_delta = -$min;
			foreach my $hunk (@{ $alts{$id} })
			{
				__apply_hunk(\@tmp, \$tmp_delta, $hunk);
			}
			$alt_txts{$id} = \@tmp;
		}
		
		if (scalar keys %alt_txts == 1)
		{
			my ($r) = values %alt_txts;
			@replacement = @$r;
		}
		else
		{
			@replacement = $resolver->(src_range_end => $max,
						   src_range_start => $min,
						   src_range => \@orig,
						   alt_txts => \%alt_txts,
						   invoc_opts => \%opt);
		}		
		splice(@$ary, $min + $delta, $#orig+1, @replacement);
		$delta += ($#replacement - $#orig);
	}
}


# Applies a hunk to an array, and calculates the lines lost or gained
# by doing so.

sub __apply_hunk
{
        my ($ary, $rdelta, $hunk) = @_;
	my $pos = $hunk->{start} + $$rdelta;
        foreach my $change (@{$hunk->{changes}})
        {
                if ($change->[0] eq '+')
                {
                        splice(@$ary, $pos, 0, $change->[1]);
                        ++$$rdelta;
                        ++$pos;
                }
                else
                {
                        splice(@$ary, $pos, 1);
                        --$$rdelta;
                }
        }
}


# The default conflict resolution subroutine. Returns all alternative
# texts with conflict markers inserted around them.

sub mark_conflicts (%)
{
	my %opt = @_;
	defined $opt{alt_txts} or confess("alt_txts not defined\n");
	my %alt = %{$opt{alt_txts}};
	my @ret;
	foreach my $id (sort keys %alt)
	{
		push @ret, ">>>>>> $id\n";
		push @ret, @{$alt{$id}};
	}
	push @ret, "<<<<<<\n";
	return @ret;
}


sub optimise_remove_duplicates (%)
{
	my %opt = @_;
	my $block = $opt{conflict_block};
	defined $block or confess("conflict_block not defined\n");
	my @tags = reverse sort keys(%$block);
	my %ret = map {$_ => []} @tags;
    REFTAG:
	while (my $tag = shift @tags)
	{
	    REFHUNK:
		for my $hunk (@{$block->{$tag}})
		{
			for my $t (@tags)
			{
				for my $h (@{$block->{$t}})
				{
					__hunks_identical($hunk, $h)
						and next REFHUNK;
				}
			}
			push @{$ret{$tag}}, $hunk;
		}
	}
	return %ret;
}


sub __hunks_identical
{
	my ($h1, $h2) = @_;
	$h1->{start} == $h2->{start} or return 0;
	$#{$h1->{changes}} == $#{$h2->{changes}} or return 0;
	foreach my $i (0 .. $#{$h1->{changes}})
	{
		my ($op1, $data1, $hash1) = @{ $h1->{changes}->[$i] };
		my ($op2, $data2, $hash2) = @{ $h2->{changes}->[$i] };
		$op1 eq $op2 or return 0;
		if (defined($hash1) && defined($hash2))
		{
			$hash1 eq $hash2 or return 0;
		}
		else
		{
			$data1 eq $data2 or return 0;
		}
	}
	return 1;
}


sub __dump_diffset
{
	my %dset = @_;
	print STDERR "-- begin diffset --\n";
	for my $tag (sort keys %dset)
	{
		print STDERR "-- begin seq tag=\"$tag\" --\n";
		my @diff = @{$dset{$tag}};
		for my $diff (@diff)
		{
			print STDERR "\n\@".$diff->{start}."\n";
			for my $e (@{$diff->{changes}})
			{
				my ($op, $data) = @$e;
				$data = quotemeta($data);
				$data =~ s{^(.{0,75})(.*)}{
					$1 . ($2 eq "" ? "" : "...");
				}se;
				print STDERR "$op $data\n";
			}
		}
		print STDERR "\n-- end seq tag=\"$tag\" --\n";
	}	
	print STDERR "-- end diffset --\n";
}


# *Terminology*
#
# A "diffset" is a hash of IDs whose values are arrays holding
# sequences of diffs generated from different sources. There may be
# conflicts within a diffset.
# 
# An "alternatives" diffset is a minimal diffset which contains no
# more than one conflict. I can't think of a better name for it, as
# there's a special case where it only consists of a single key
# pointing at a single hunk.

1;
( run in 0.676 second using v1.01-cache-2.11-cpan-f6376fbd888 )