Data-Edit-Xml

 view release on metacpan or  search on metacpan

lib/Data/Edit/Xml.pm  view on Meta::CPAN

  if (@_)
   {my $x = bless {input=>@_};                                                  # Create L<XML> editor with a string or file
    $x->parser = $x;                                                            # Parser root node
    return $x->parse;                                                           # Parse
   }
  my $x = bless {};                                                             # Create empty L<XML> editor
  $x->parser = $x;                                                              # Parser root node
  $x                                                                            # Parser
 }

sub cdata()                                                                     # The name of the tag to be used to represent text - this tag must not also be used as a command tag otherwise the parser will L<confess>.
 {'CDATA'
 }

sub reduceParseErroMessage($)                                                   #P Reduce the parse failure message to the bare essentials.
 {my ($e) = @_;                                                                 # Error message
  if ($e =~ m((not well-formed.*?byte\s*\d+))is) {return $1}
  $e                                                                            # Return full  message of it cannot be further reduced
 }

sub parse($)                                                                    # Parse input L<XML> specified via: L<inputFile|/inputFile>, L<input|/input> or L<inputString|/inputString>.

lib/Data/Edit/Xml.pm  view on Meta::CPAN

   }

  $parser                                                                       # Parse details
 }

sub tree($$)                                                                    #P Build a tree representation of the parsed L<XML> which can be easily traversed to look for things.
 {my ($parent, $parse) = @_;                                                    # The parent node, the remaining parse
  while(@$parse)
   {my $tag  = shift @$parse;                                                   # Tag for node
    my $node = bless {parser=>$parent->parser};                                 # New node
    if ($tag eq cdata)
     {confess cdata.' tag encountered';                                         # We use this tag for text and so it cannot be used as a user tag in the document
     }
    elsif ($tag eq '0')                                                         # Text
     {my $s = shift @$parse;
      if ($s !~ /\A\s*\Z/)                                                      # Ignore entirely blank strings
       {$s = replaceSpecialChars($s);                                           # Restore special characters in the text
        $node->tag  = cdata;                                                    # Save text. ASSUMPTION: CDATA is not used as a tag anywhere.
        $node->text = $s;
        push @{$parent->content}, $node;                                        # Save on parents content list
       }
     }
    else                                                                        # Node
     {my $children   = shift @$parse;
      my $attributes = shift @$children;
      $node->tag = $tag;                                                        # Save tag
      $_ = replaceSpecialChars($_) for values %$attributes;                     # Restore in text with L<XML> special characters
      $node->attributes = $attributes;                                          # Save attributes

lib/Data/Edit/Xml.pm  view on Meta::CPAN

   }
  join '', @s                                                                   # New source string
 }

#D2 Node by Node                                                                # Construct a parse tree node by node.

sub newText($$)                                                                 # Create a new text node.
 {my (undef, $text) = @_;                                                       # Any reference to this package, content of new text node
  my $node = bless {};                                                          # New node
  $node->parser = $node;                                                        # Root node of this parse
  $node->tag    = cdata;                                                        # Text node
  $node->text   = $text;                                                        # Content of node
  $node                                                                         # Return new non text node
 }

sub newTag($$%)                                                                 # Create a new non text node.
 {my (undef, $command, %attributes) = @_;                                       # Any reference to this package, the tag for the node, attributes as a hash.
  my $node = bless {};                                                          # New node
  $node->parser = $node;                                                        # Root node of this parse
  $node->tag    = $command;                                                     # Tag for node
  $node->attributes = \%attributes;                                             # Attributes for node

lib/Data/Edit/Xml.pm  view on Meta::CPAN

  for my $n($node->contents)                                                    # Index content
   {push @{$node->indexes->{$n->tag}}, $n;                                      # Indices to sub nodes
   }
 }

sub indexNode($)                                                                #P Merge multiple text segments and set parent and parser after changes to a node
 {my ($node) = @_;                                                              # Node to index.
  return unless keys @{$node->{content}};
  my @contents = @{$node->content};                                             # Contents of the node

#  eval {grep {$_->{tag} eq cdata} @contents};
#  $@ and confess "$@\n";

  if ((grep {$_->{tag} eq cdata} @contents) > 1)                                # Make parsing easier for the user by concatenating successive text nodes - NB: this statement has been optimized
   {my (@c, @t);                                                                # New content, pending intermediate texts list
    for(@contents)                                                              # Each node under the current node
     {if ($_->{tag} eq cdata)                                                   # Text node. NB: optimized
       {push @t, $_;                                                            # Add the text node to pending intermediate texts list
       }
      elsif (@t == 1)                                                           # Non text element encountered with one pending intermediate text
       {push @c, @t, $_;                                                        # Save the text node and the latest non text node
        @t = ();                                                                # Empty pending intermediate texts list
       }
      elsif (@t  > 1)                                                           # Non text element encountered with two or more pending intermediate texts
       {my $t = shift @t;                                                       # Reuse the first text node
        $t->text .= join '', map {$_->text} @t;                                 # Concatenate the remaining text nodes
        $_->disconnectLeafNode for @t;                                          # Disconnect the remain text nodes as they are no longer needed

lib/Data/Edit/Xml.pm  view on Meta::CPAN

  inputFile=>undef,                                                             # Source file of the L<parse|/parse> if this is the L<parser|/parse> root node. Use this parameter to explicitly set the file to be L<parsed|/parse>.
  input=>undef,                                                                 # Source of the L<parse|/parse> if this is the L<parser|/parse> root node. Use this parameter to specify some input either as a string or as a file name for the L<parser|...
  inputString=>undef,                                                           # Source string of the L<parse|/parse> if this is the L<parser|/parse> root node. Use this parameter to explicitly set the string to be L<parsed|/parse>.
  lineNumbers=>undef,                                                           # If true then save the line number.column number at which tag starts and ends on the xtrf attribute of each node.
  numbering=>undef,                                                             # Last number used to number a node in this L<parse|/parse> tree.
  number=>undef,                                                                # Number of the specified B<$node>, see L<findByNumber|/findByNumber>.
  parent=>undef,                                                                # Parent node of the specified B<$node> or B<undef> if the L<parser|/parse> root node. See also L</Traversal> and L</Navigation>. Consider as read only.
  parser=>undef,                                                                # L<Parser|/parse> details: the root node of a tree is the L<parser|/parse> node for that tree. Consider as read only.
  representationLast=>undef,                                                    # The last representation set for this node by one of: L<setRepresentationAsTagsAndText|/setRepresentationAsTagsAndText>.
  tag=>undef,                                                                   # Tag name for the specified B<$node>, see also L</Traversal> and L</Navigation>. Consider as read only.
  text=>undef,                                                                  # Text of the specified B<$node> but only if it is a text node otherwise B<undef>, i.e. the tag is cdata() <=> L</isText> is true.
 );

#D2 Parse tree                                                                  # Construct a L<parse|/parse> tree from another L<parse|/parse> tree.

sub renew($@)                                                                   #C Returns a renewed copy of the L<parse|/parse> tree by first printing it and then re-parsing it, optionally checking that the starting node is in a specified context: u...
 {my ($node, @context) = @_;                                                    # Node to renew from, optional context
  return undef if @context and !$node->at(@context);                            # Not in specified context
  my $x = new($node->string);                                                   # Reconstruct parse tree from node
  $x->inputFile = $node->root->inputFile;                                       # Convey the input file name if present so that relative references can be resolved in the new parse tree
  $x                                                                            # Return new parse tree

lib/Data/Edit/Xml.pm  view on Meta::CPAN

  $r =~ s(>\n( *[.,;:\)] *)) (>$1\n)gsr                                         # Overall result moves some punctuation through one new line to be closer to its tag
 }

sub prettyStringCDATA($;$)                                                      #U Return a readable string representing a node of a L<parse|/parse> tree and all the nodes below it with the text fields wrapped with <CDATA>...</CDATA>.
 {my ($node, $depth) = @_;                                                      # Start node, optional depth.
  $depth //= 0;                                                                 # Start depth if none supplied

  if ($node->isText)                                                            # Text node
   {my $n = $node->next;
    my $s = !defined($n) || $n->isText ? '' : "\n";                             # Add a new line after contiguous blocks of text to offset next node
    return '<'.cdata.'>'.$node->text.'</'.cdata.'>'.$s;
   }

  my $t = $node->tag;                                                           # Not text so it has a tag
  my $content = $node->content;                                                 # Sub nodes
  my $space   = "  "x($depth//0);
  return $space.'<'.$t.$node->printAttributes.'/>'."\n" if !@$content;          # No sub nodes

  my $s = $space.'<'.$t.$node->printAttributes.'>'.                             # Has sub nodes
    ($node->first->isText ? '' : "\n");                                         # Continue text on the same line, otherwise place nodes on following lines
  $s .= $_->prettyStringCDATA($depth+2) for @$content;                          # Recurse to get the sub content

lib/Data/Edit/Xml.pm  view on Meta::CPAN

  my $p = decodeJson($json);                                                    # Json represented as Perl
  my $x = jsonToXml2($p);                                                       # Parse tree - enough to print
  my $s = string($x);                                                           # Parse tree as string
  new($s);                                                                      # Recreate full parse tree from string
 }

#D2 Dense                                                                       # Print the L<parse|/parse> tree densely for reuse by computers rather than humans.

sub string($)                                                                   #U Return a dense string representing a node of a L<parse|/parse> tree and all the nodes below it. Or use L<-s|/opString> B<$node>.
 {my ($node) = @_;                                                              # Start node.
  return $node->{text} if $node->{tag} eq cdata;                                # Text node
  my $content = $node->content;                                                 # Sub nodes
  my $attr    = keys %{$node->{attributes}};                                    # Number of attributes

  return  '<'.$node->{tag}.                       '/>'
    if !@$content and !keys %{$node->{attributes}};                             # No sub nodes or attributes

  return  '<'.$node->{tag}.$node->printAttributes.'/>'
    if !@$content;                                                              # No sub nodes

  join '', '<', $node->{tag}, $node->printAttributes, '>',                      # Has sub nodes
   (map {$_->{tag} eq cdata ? $_->{text} : $_->string} @$content),              # Recurse to get the sub content
           '</', $node->{tag}, '>'
 }

sub stringAsMd5Sum($)                                                           #U Return the L<md5> of the dense L<string|/string> representing a node of a L<parse|/parse> tree minus its L<id> and all the nodes below it. Or use L<-g|/opString> B<$no...
 {my ($node) = @_;                                                              # Node.
  $node->id = undef if my $i = $node->id;                                       # Save id
  my $md5 = stringMd5Sum($node->string);                                        # Md5 sum of string content minus id. The various printing methods will of course all produce different md5 sums but the md5 sum is big enough to accommodate the variatio...
  $node->id = $i if $i;                                                         # Restore id if present
  $md5                                                                          # Return md5 sum
 }

lib/Data/Edit/Xml.pm  view on Meta::CPAN

sub c($$)                                                                       #U Return an array of all the nodes with the specified tag below the specified B<$node>.
 {my ($node, $tag) = @_;                                                        # Node, tag.
  reindexNode($node);                                                           # Create index for this node
  my $c = $node->indexes->{$tag};                                               # Index for specified tags
  $c ? @$c : ()                                                                 # Contents as an array
 }

sub cText($)                                                                    #U Return an array of all the text nodes immediately below the specified B<$node>.
 {my ($node) = @_;                                                              # Node.
  reindexNode($node);                                                           # Create index for this node
  $node->c(cdata);                                                              # Index for text data
 }

sub findById($$)                                                                #U Find a node in the parse tree under the specified B<$node> with the specified B<$id>.
 {my ($node, $id) = @_;                                                         # Parse tree, id desired.
  my $i;                                                                        # Node found
  eval {$node->by(sub                                                           # Look for an instance of such a node
   {if ($_->idX eq $id) {$i = $_; die}                                          # Found the node - die to stop the search from going further
   })};
  $i                                                                            # Node found if any
 }

lib/Data/Edit/Xml.pm  view on Meta::CPAN

  return $node->text if $node->isText;                                          # Return reference to text if on a text node
  $dummy                                                                        # Nor on a text node
 }

sub isText($@)                                                                  #UCY Return the specified B<$node> if the specified B<$node> is a text node, optionally in the specified context, else return B<undef>.
 {my ($node, @context) = @_;                                                    # Node to test, optional context
  if (@context)                                                                 # Optionally check context
   {my $p = $node->parent;                                                      # Parent
    return undef if !$p or !$p->at(@context);                                   # Parent must match context
   }
  $node->tag eq cdata ? $node : undef
 }

sub isFirstText($@)                                                             #UCY Return the specified B<$node> if the specified B<$node> is a text node, the first node under its parent and that the parent is optionally in the specified context, e...
 {my ($node, @context) = @_;                                                    # Node to test, optional context for parent
  return undef unless $node->isText(@context) and $node->isFirst;               # Check that this node is a text node, that it is first, and, optionally check context of parent
  $node                                                                         # Return the node as it passes all tests
 }

sub isLastText($@)                                                              #UCY Return the specified B<$node> if the specified B<$node> is a text node, the last node under its parent and that the parent is optionally in the specified context, el...
 {my ($node, @context) = @_;                                                    # Node to test, optional context for parent

lib/Data/Edit/Xml.pm  view on Meta::CPAN

    </d>
  </a>
  END


This is a static method and so should either be imported or invoked as:

  Data::Edit::Xml::new


=head3 cdata()

The name of the tag to be used to represent text - this tag must not also be used as a command tag otherwise the parser will L<confess|http://perldoc.perl.org/Carp.html#SYNOPSIS/>.


B<Example:>



    ok Data::Edit::Xml::cdata eq q(CDATA);                                          # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲



=head3 parse($parser)

Parse input L<Xml|https://en.wikipedia.org/wiki/XML> specified via: L<inputFile|/inputFile>, L<input|/input> or L<inputString|/inputString>.

     Parameter  Description
  1  $parser    Parser created by L</new>

lib/Data/Edit/Xml.pm  view on Meta::CPAN

=head4 style

Attribute B<style> for a node as an L<lvalue method|http://perldoc.perl.org/perlsub.html#Lvalue-subroutines> B<sub>.       Use B<styleX()> to return B<q()> rather than B<undef>.

=head4 tag

Tag name for the specified B<$node>, see also L</Traversal> and L</Navigation>. Consider as read only.

=head4 text

Text of the specified B<$node> but only if it is a text node otherwise B<undef>, i.e. the tag is cdata() <=> L</isText> is true.

=head4 type

Attribute B<type> for a node as an L<lvalue method|http://perldoc.perl.org/perlsub.html#Lvalue-subroutines> B<sub>.        Use B<typeX()> to return B<q()> rather than B<undef>.

=head4 xtrc

Attribute B<xtrc> for a node as an L<lvalue method|http://perldoc.perl.org/perlsub.html#Lvalue-subroutines> B<sub>.        Use B<classX()> to return B<q()> rather than B<undef>.

=head4 xtrf

lib/Data/Edit/Xml.pm  view on Meta::CPAN

56 L<byReverseX|/byReverseX> - Reverse post-order traversal of a L<parse|/parse> tree or sub tree below the specified B<$node> calling the specified B<sub> within L<eval|http://perldoc.perl.org/functions/eval.html>B<{}> at each node and returning the...

57 L<byX|/byX> - Post-order traversal of a L<parse|/parse> tree calling the specified B<sub> at each node as long as this sub does not L<die|http://perldoc.perl.org/functions/die.html>.

58 L<byX2|/byX2> - Post-order traversal of a L<parse|/parse> tree or sub tree calling the specified B<sub> within L<eval|http://perldoc.perl.org/functions/eval.html>B<{}> at each node and returning the specified starting node.

59 L<byX22|/byX22> - Post-order traversal of a L<parse|/parse> tree or sub tree calling the specified B<sub> within L<eval|http://perldoc.perl.org/functions/eval.html>B<{}> at each node and returning the specified starting node.

60 L<c|/c> - Return an array of all the nodes with the specified tag below the specified B<$node>.

61 L<cdata|/cdata> - The name of the tag to be used to represent text - this tag must not also be used as a command tag otherwise the parser will L<confess|http://perldoc.perl.org/Carp.html#SYNOPSIS/>.

62 L<change|/change> - Change the name of the specified B<$node>, optionally  confirming that the B<$node> is in a specified context and return the B<$node>.

63 L<changeAttr|/changeAttr> - Rename attribute B<$old> to B<$new> in the specified B<$node> with optional context B<@context> unless attribute B<$new> is already set and return the B<$node>.

64 L<changeAttributeValue|/changeAttributeValue> - Apply a sub to the value of an attribute of the specified B<$node>.

65 L<changeAttrValue|/changeAttrValue> - Rename attribute B<$old> to B<$new> with new value B<$newValue> on the specified B<$node> in the optional B<@context> unless attribute B<$new> is already set or the value of the B<$old> attribute is not B<$old...

66 L<changeKids|/changeKids> - Change the names of all the immediate children of the specified B<$node>, if they match the optional B<@context>, to the specified B<$tag> and return the B<$node>.

lib/Data/Edit/Xml.pm  view on Meta::CPAN

  ok  $c->upn_2__number == 6;
  ok  $c->upWhile_b__number == 4;
  ok  $c->upWhile_a_b__number == 4;
  ok  $c->upWhile_b_c__number == 2;

  ok  $c->upUntil__number == 8;
  ok  $c->upUntil_b_c__number == 4;
 }

if (1) {
  ok Data::Edit::Xml::cdata eq q(CDATA);                                        #Tcdata
  ok Data::Edit::Xml::replaceSpecialChars(q(<">)) eq q(&lt;&quot;&gt;);         #TreplaceSpecialChars
  ok Data::Edit::Xml::undoSpecialChars(q(&lt;&quot;&gt;)) eq q(<">);            #TundoSpecialChars
 }

if (1) {                                                                        # Break in and out
  my $A = Data::Edit::Xml::new("<a><b><d/><c/><c/><e/><c/><c/><d/></b></a>");   #TbreakOut
  ok -p $A eq <<END;
<a>
  <b>
    <d/>



( run in 0.530 second using v1.01-cache-2.11-cpan-454fe037f31 )