XML-Diff

 view release on metacpan or  search on metacpan

Diff.pm  view on Meta::CPAN

    $self->{$type}->{root} = $self->{$type}->{doc}->documentElement();
  }

  #$self->{$type}->{doc}->indexElements();

  return 1;

}

=pod

=head2 _buildTree

=cut
# _________________________________________________________
sub _buildTree {
  my $self     = shift;
  my $node     = shift;
  my $lookup   = shift;
  my $old      = shift;
  my $position = shift || 0;
  my $signature;
  my $thumbprint;
  my $weight;

  # currently we only look at Element and Text nodes (Attribute nodes
  # we handle as a known sub-element of Element nodes)
  #next unless( $node->nodeType == 3 || $node->nodeType == 1 );

  # need to consider full, content and structure matches for better diffs
  # but that's for the future.. right now we just do structure
  my $nodeType = $node->nodeType;
  if( $node->nodeType == 1 ) {
    #$self->_debug( "- element node -" );
    #     XML_ELEMENT_NODE=           1,
    #     XML_ATTRIBUTE_NODE=         2,
    $signature = $node->nodeName();
    $thumbprint = $signature;
    my $p;
    foreach my $child ( $node->childNodes() ) {
      my($thumbprint2,$signature2) = $self->_buildTree( $child, $lookup, $old, $p );
      $thumbprint .= $thumbprint2;
      $signature  .= $signature2;
      $p++;
    }

    foreach my $attr ( sort {$a->nodeName cmp $b->nodeName } $node->attributes() ) {
      $weight += length($attr->nodeName);
      $thumbprint .= $attr->nodeName();
    }

  } elsif( $nodeType == 3 ) {
    #$self->_debug( "- text node -" );
    #     XML_TEXT_NODE=              3,
    # text node hashes are their text value
    $signature = 'TEXT';
    $thumbprint = $signature.$node->textContent();
    $weight    = length($thumbprint);

  } elsif( $nodeType == 4 ) {
    #$self->_debug( "- cdata section -" );
    #     XML_CDATA_SECTION_NODE=     4,
    # cdata section
    $signature  = 'CDATA';
    $thumbprint = $signature.$node->textContent();
    $weight     = length($thumbprint);

  } elsif( $nodeType == 7 ) {
    #$self->_debug( "- processing instruction -" );
    #     XML_PI_NODE=                7,
    # processing instruction
    $signature = 'PI';
    $thumbprint = $signature;
    $weight    = 5;

  } elsif( $nodeType == 8 ) {
    #$self->_debug( "- comment node -" );
    #     XML_COMMENT_NODE=           8,
    # comment node
    $signature = 'COMMENT';
    $thumbprint = $signature.$node->textContent();
    $weight     = length($thumbprint);


  } else {
    #$self->_debug( "- UNHANDLED NODE TYPE -" );

    # unhandled
    #     XML_ENTITY_REF_NODE=        5,
    #     XML_ENTITY_NODE=            6,
    #     XML_DOCUMENT_NODE=          9,
    #     XML_DOCUMENT_TYPE_NODE=     10,
    #     XML_DOCUMENT_FRAG_NODE=     11,
    #     XML_NOTATION_NODE=          12,
    #     XML_HTML_DOCUMENT_NODE=     13,
    #     XML_DTD_NODE=               14,
    #     XML_ELEMENT_DECL=           15,
    #     XML_ATTRIBUTE_DECL=         16,
    #     XML_ENTITY_DECL=            17,
    #     XML_NAMESPACE_DECL=         18,
    #     XML_XINCLUDE_START=         19,
    #     XML_XINCLUDE_END=           20
    next;
  }

  my $md5 = Digest::MD5->new();
  $md5->add($signature);
  my $hash  = $md5->b64digest();
  my $node_id = $$node;

  my $md5_2 = Digest::MD5->new();
  $md5_2->add($thumbprint);
  $thumbprint = $md5_2->b64digest();
  #$self->_debug( "$node_id\t$weight\t$hash" );

  my $id;
  push(@{$lookup->{hash}->{$hash}->{$position}},$node);
  if( !$lookup->{hash}->{$hash}->{max} || $lookup->{hash}->{$hash}->{max} <= $position ) {
    $lookup->{hash}->{$hash}->{max} = $position;
  }

  #if( !$self->{_HARD_MATCH} && @{$lookup->{hash}->{$hash}} > 100 ) {
  #  $self->{_HARD_MATCH} = 1;



( run in 2.150 seconds using v1.01-cache-2.11-cpan-e1769b4cff6 )