XML-Diff
view release on metacpan or search on metacpan
$self->{$type}->{root} = $self->{$type}->{doc}->documentElement();
}
#$self->{$type}->{doc}->indexElements();
return 1;
}
=pod
=head2 _buildTree
=cut
# _________________________________________________________
sub _buildTree {
my $self = shift;
my $node = shift;
my $lookup = shift;
my $old = shift;
my $position = shift || 0;
my $signature;
my $thumbprint;
my $weight;
# currently we only look at Element and Text nodes (Attribute nodes
# we handle as a known sub-element of Element nodes)
#next unless( $node->nodeType == 3 || $node->nodeType == 1 );
# need to consider full, content and structure matches for better diffs
# but that's for the future.. right now we just do structure
my $nodeType = $node->nodeType;
if( $node->nodeType == 1 ) {
#$self->_debug( "- element node -" );
# XML_ELEMENT_NODE= 1,
# XML_ATTRIBUTE_NODE= 2,
$signature = $node->nodeName();
$thumbprint = $signature;
my $p;
foreach my $child ( $node->childNodes() ) {
my($thumbprint2,$signature2) = $self->_buildTree( $child, $lookup, $old, $p );
$thumbprint .= $thumbprint2;
$signature .= $signature2;
$p++;
}
foreach my $attr ( sort {$a->nodeName cmp $b->nodeName } $node->attributes() ) {
$weight += length($attr->nodeName);
$thumbprint .= $attr->nodeName();
}
} elsif( $nodeType == 3 ) {
#$self->_debug( "- text node -" );
# XML_TEXT_NODE= 3,
# text node hashes are their text value
$signature = 'TEXT';
$thumbprint = $signature.$node->textContent();
$weight = length($thumbprint);
} elsif( $nodeType == 4 ) {
#$self->_debug( "- cdata section -" );
# XML_CDATA_SECTION_NODE= 4,
# cdata section
$signature = 'CDATA';
$thumbprint = $signature.$node->textContent();
$weight = length($thumbprint);
} elsif( $nodeType == 7 ) {
#$self->_debug( "- processing instruction -" );
# XML_PI_NODE= 7,
# processing instruction
$signature = 'PI';
$thumbprint = $signature;
$weight = 5;
} elsif( $nodeType == 8 ) {
#$self->_debug( "- comment node -" );
# XML_COMMENT_NODE= 8,
# comment node
$signature = 'COMMENT';
$thumbprint = $signature.$node->textContent();
$weight = length($thumbprint);
} else {
#$self->_debug( "- UNHANDLED NODE TYPE -" );
# unhandled
# XML_ENTITY_REF_NODE= 5,
# XML_ENTITY_NODE= 6,
# XML_DOCUMENT_NODE= 9,
# XML_DOCUMENT_TYPE_NODE= 10,
# XML_DOCUMENT_FRAG_NODE= 11,
# XML_NOTATION_NODE= 12,
# XML_HTML_DOCUMENT_NODE= 13,
# XML_DTD_NODE= 14,
# XML_ELEMENT_DECL= 15,
# XML_ATTRIBUTE_DECL= 16,
# XML_ENTITY_DECL= 17,
# XML_NAMESPACE_DECL= 18,
# XML_XINCLUDE_START= 19,
# XML_XINCLUDE_END= 20
next;
}
my $md5 = Digest::MD5->new();
$md5->add($signature);
my $hash = $md5->b64digest();
my $node_id = $$node;
my $md5_2 = Digest::MD5->new();
$md5_2->add($thumbprint);
$thumbprint = $md5_2->b64digest();
#$self->_debug( "$node_id\t$weight\t$hash" );
my $id;
push(@{$lookup->{hash}->{$hash}->{$position}},$node);
if( !$lookup->{hash}->{$hash}->{max} || $lookup->{hash}->{$hash}->{max} <= $position ) {
$lookup->{hash}->{$hash}->{max} = $position;
}
#if( !$self->{_HARD_MATCH} && @{$lookup->{hash}->{$hash}} > 100 ) {
# $self->{_HARD_MATCH} = 1;
( run in 2.150 seconds using v1.01-cache-2.11-cpan-e1769b4cff6 )