Data-Edit-Xml

 view release on metacpan or  search on metacpan

lib/Data/Edit/Xml.pm  view on Meta::CPAN

  my $space   = "  "x($depth//0);
  return $space.'<'.$t.$node->printAttributes.'/>'."\n" if !@$content;          # No sub nodes

  my $s = $space.'<'.$t.$node->printAttributes.'>'.                             # Has sub nodes
    ($node->first->isText ? '' : "\n");                                         # Continue text on the same line, otherwise place nodes on following lines
  $s .= $_->prettyString($depth+1) for @$content;                               # Recurse to get the sub content
  $s .= $node->last->isText ? ((grep{!$_->isText} @$content)                    # Continue text on the same line, otherwise place nodes on following lines
                            ? "\n$space": "") : $space;
  my $r = $s .  '</'.$t.'>'."\n";                                               # Closing tag
  return $r if $depth;                                                          # Return from sub tree
  $r =~ s(>\n( *[.,;:\)] *)) (>$1\n)gsr                                         # Overall result moves some punctuation through one new line to be closer to its tag
     =~ s(\n\s*\n) (\n)gsr                                                      # Remove blank lines
 }

sub prettyStringHtml2($$)                                                       #P Return a string of html representing a node of a L<parse|/parse> tree and all the nodes below it. Or use L<-p|/opString> $node
 {my ($node, $depth) = @_;                                                      # Start node, optional depth.
  $depth //= 0;                                                                 # Start depth if none supplied

  if ($node->isText)                                                            # Text block
   {my $t =                                                                     # Wrap text in span on one line
     qq(<span class="xmlText">)
     .nws($node->text)
     .qq(</span>);
    return $t;                                                                  # Text already has a new line and so no additional separator required
   }

  my $t       = qq(<span class="xmlTag">).$node->tag.q(</span>);                # Not text so it has a tag
  my $content = $node->content;                                                 # Sub nodes

  my $space   = qq(<span class="xmlLineStartTag">)                              # Space before text
   .("&nbsp;"x(4*($depth//0)))
   .qq(</span>);

  return $space                                                                 # No sub nodes
   .q(<span class="xmlLt">&lt;</span>)
   .$t
   .$node->printAttributesHtml
   .q(<span class="xmlSlashGt">/&gt;</span>)
   ."\n" if !@$content;

  my $s = $space                                                                # Has sub nodes
   .q(<span class="xmlLt">&lt;</span>)
   .$t
   .$node->printAttributesHtml
   .q(<span class="xmlGt">&gt;</span>)
   .($node->first->isText ? '' : "\n");                                         # Continue text on the same line, otherwise place nodes on following lines

  $s .= $_->prettyStringHtml2($depth+1) for @$content;                          # Recurse to get the sub content

  $s .= $node->last->isText ? ((grep{!$_->isText} @$content)                    # Continue text on the same line, otherwise place nodes on following lines
                            ? "\n$space": "") : $space;

  my $r = $s                                                                    # Closing tag
   .q(<span class="xmlLtSlash">&lt;/</span>)
   .$t
   .q(<span class="xmlGt">&gt;</span>)
   ."\n";

  return $r if $depth;                                                          # Return from sub tree

  my $h = join "\n", map {qq(<div class="xmlLine">$_</div>)} split m/\n/, $r;   # Wrap div around each line
  qq($h\n)
 }

sub prettyStringHtml($@)                                                        # Return a string of L<html> representing a node of a L<parse|/parse> tree and all the nodes below it if the node is in the specified context.
 {my ($node, @context) = @_;                                                    # Node, optional context
  return undef if @context and !$node->at(@context);                            # Check optional context
  prettyStringHtml2($node, 0);                                                  # Print as html
 }

sub prettyStringDitaHeaders($)                                                  #U Return a readable string representing the L<parse|/parse> tree below the specified B<$node> with appropriate headers. Or use L<-x|/opString> $node
 {my ($node) = @_;                                                              # Start node
# cluck "Please use: ditaPrettyPrintWithHeaders...redirecting";
  $node->ditaPrettyPrintWithHeaders
 }

sub prettyStringNumbered($;$)                                                   #U Return a readable string representing a node of a L<parse|/parse> tree and all the nodes below it with a L<number|/number> attached to each tag. The node numbers can t...
 {my ($node, $depth) = @_;                                                      # Start node, optional depth.
  $depth //= 0;                                                                 # Start depth if none supplied

  my $N = $node->number;                                                        # Node number if present

  if ($node->isText)                                                            # Text node
   {my $n = $node->next;
    my $s = !defined($n) || $n->isText ? '' : "\n";                             # Add a new line after contiguous blocks of text to offset next node
    return ($N ? "($N)" : '').$node->text.$s;                                   # Number text
   }

  my $t = $node->tag;                                                           # Number tag in a way which allows us to skip between start and end tags in L<Geany|http://www.geany.org> using the ctrl+up and ctrl+down arrows
  my $i = $N && !defined($node->id) ? " id=\"$N\""  : '';                       # Use id to hold tag
  my $content = $node->content;                                                 # Sub nodes
  my $space   = "  "x($depth//0);
  return $space.'<'.$t.$i.$node->printAttributes.'/>'."\n" if !@$content;       # No sub nodes

  my $s = $space.'<'.$t.$i.$node->printAttributes.'>'.                          # Has sub nodes
    ($node->first->isText ? '' : "\n");                                         # Continue text on the same line, otherwise place nodes on following lines
  $s .= $_->prettyStringNumbered($depth+1) for @$content;                       # Recurse to get the sub content
  $s .= $node->last->isText ? ((grep{!$_->isText} @$content)                    # Continue text on the same line, otherwise place nodes on following lines
                            ? "\n$space": "") : $space;
  my $r = $s .  '</'.$t.'>'."\n";                                               # Closing tag
  return $r if $depth;                                                          # Return from sub tree
  $r =~ s(>\n( *[.,;:\)] *)) (>$1\n)gsr                                         # Overall result moves some punctuation through one new line to be closer to its tag
 }

sub prettyStringCDATA($;$)                                                      #U Return a readable string representing a node of a L<parse|/parse> tree and all the nodes below it with the text fields wrapped with <CDATA>...</CDATA>.
 {my ($node, $depth) = @_;                                                      # Start node, optional depth.
  $depth //= 0;                                                                 # Start depth if none supplied

  if ($node->isText)                                                            # Text node
   {my $n = $node->next;
    my $s = !defined($n) || $n->isText ? '' : "\n";                             # Add a new line after contiguous blocks of text to offset next node
    return '<'.cdata.'>'.$node->text.'</'.cdata.'>'.$s;
   }

  my $t = $node->tag;                                                           # Not text so it has a tag
  my $content = $node->content;                                                 # Sub nodes
  my $space   = "  "x($depth//0);
  return $space.'<'.$t.$node->printAttributes.'/>'."\n" if !@$content;          # No sub nodes

  my $s = $space.'<'.$t.$node->printAttributes.'>'.                             # Has sub nodes
    ($node->first->isText ? '' : "\n");                                         # Continue text on the same line, otherwise place nodes on following lines

lib/Data/Edit/Xml.pm  view on Meta::CPAN

  $node
 }

sub throughX($$$@)                                                              # Identical to L<through|/through> except the B<$before, $after> subs are called in an L<eval> block to prevent L<die> terminating the traversal of the full tree.
 {my ($node, $before, $after, @context) = @_;                                   # Starting node, sub to call when we meet a node, sub to call we leave a node, accumulated context.
  &$before(local $_ = $node, @context);                                         # Process specified node first with before()
  $_->throughX($before, $after, $node, @context) for $node->contents;           # Recurse to process sub nodes in deeper context
  &$after(local $_ = $node, @context);                                          # Process specified node last with after()
  $node
 }

#D2 Range                                                                       # Ranges of nodes

sub from($@)                                                                    # Return a list consisting of the specified node and its following siblings optionally including only those nodes that match one of the tags in the specified list.
 {my ($start, @match) = @_;                                                     # Start node, optional list of tags to match
  my $p = $start->parent;                                                       # Parent node
  confess "No parent" unless $p;                                                # Not possible on a root node
  my @c = $p->contents;                                                         # Content
  shift @c while @c and $c[ 0] != $start;                                       # Position on start node
  if (@match)                                                                   # Select matching nodes if requested
   {my %m = map {$_=>1} @match;
    return grep {$m{$_->tag}} @c;
   }
  @c                                                                            # Elements in the specified range
 }

sub to($@)                                                                      # Return a list of the sibling nodes preceding the specified node optionally including only those nodes that match one of the tags in the specified list.
 {my ($end, @match) = @_;                                                       # End node, optional list of tags to match
  my $q = $end->parent;                                                         # Parent node
  confess "No parent" unless $q;                                                # Not possible on a root node
  my @c = $q->contents;                                                         # Content
  pop @c while @c and $c[-1] != $end;                                           # Position on end
  if (@match)                                                                   # Select matching nodes if requested
   {my %m = map {$_=>1} @match;
    return grep {$m{$_->tag}} @c;
   }
  @c                                                                            # Elements in the specified range
 }

sub fromTo($$@)                                                                 # Return a list of the nodes between the specified start and end nodes optionally including only those nodes that match one of the tags in the specified list.
 {my ($start, $end, @match) = @_;                                               # Start node, end node, optional list of tags to match
  my $p = $start->parent;                                                       # Parent node
  confess "No parent" unless $p;                                                # Not possible on a root node
  my $q = $end->parent;                                                         # Parent node
  confess "No parent" unless $q;                                                # Not possible on a root node
  confess "Not siblings" unless $p == $q;                                       # Not possible unless the two nodes are siblings under the same parent
  my @c = $p->contents;                                                         # Content
  shift @c while @c and $c[ 0] != $start;                                       # Position on start node
  pop   @c while @c and $c[-1] != $end;                                         # Position on end
  if (@match)                                                                   # Select matching nodes if requested
   {my %m = map {$_=>1} @match;
    return grep {$m{$_->tag}} @c;
   }
  @c                                                                            # Elements in the specified range
 }

#D1 Location                                                                    # Locate the line numbers and columns of a specified node and write that information as a L<Oxygen Message|/https://www.oxygenxml.com/doc/versions/20.1/ug-author/topics/l...

sub parseLineLocation($)                                                        #PS Parse a line location
 {my ($loc) = @_;                                                               # Location
  my ($l, $c, $L, $C) = split m/[.:]/, $loc;                                    # Position of node in source

  for my $n($l, $c, $L)                                                         # Check that some-one else is not using xtrf for some other reason
   {return () unless $n and $n =~ m(\A\d+\Z)s;
   }
  return () if $L and $L !~ m(\A\d+\Z)s;

  unless(defined $C)                                                            # Same line
   {$C = $L;
    $L = 0;
   }
  $L += $l;                                                                     # Final line

 ($l, $c, $L, $C)                                                               # Return parsed line location
 }

sub lineLocation($)                                                             #U Return the line number.column location of this tag in its source file or string if the source was parsed with the L<line number|/lineNumber> option on.
 {my ($node) = @_;                                                              # Node
  my $loc  = $node->attr(q(xtrf));                                              # Location of node in source
  return q() unless $loc;                                                       # No location specified
  my @c = my ($l, $c, $L, $C) = parseLineLocation $loc;                         # Position of node in source
  for(@c)
   {return q() unless defined $_;
   }
  if ($l eq $L)                                                                 # All on one line
   {return qq(on line $l from $c to $C)                                         # Single line
   }
  qq(from line $l at $c to line $L at $C)                                       # Spans two or more lines
 }

sub location($;$)                                                               #U Return the line number.column plus file location of this tag in its source file or string if the source was parsed with the L<line number|/lineNumber> option on.
 {my ($node, $file) = @_;                                                       # Node, optionally the location of the source.
  my $lmsg   = $node->lineLocation();                                           # Line location
     $lmsg   = q( ).$lmsg if $lmsg;
  my $parser = $node->parser;                                                   # Parser associated with this node
  my $fmsg   = sub                                                              # Description of the containing file
   {return qq( in file: ).$file if $file;                                       # In the specified file
    return qq( in file: ).$parser->inputFile if $parser->inputFile;             # Position of node in source
    q()                                                                         # Unknown location
   }->();
  $lmsg.$fmsg                                                                   # Return location
 }

sub closestLocation($)                                                          #U Return the nearest node with line number.column information
 {my ($node) = @_;                                                              # Node
  return $node if $node->attr(q(xtrf));                                         # Node has a location so return it.
  my $best;                                                                     # Best location so far
  if ($node->parser->lineNumbers)                                               # Search through tree for closest node if line numbering is in effect
   {my $before = 1;                                                             # Best node before (after) the specified node
    for my $p($node->parser->downList)                                          # Preorder traversal
     {if ($before or !$best)                                                    # Before or at the node
       {$best = $p if $p->attr(q(xtrf));
       }
      $before = 0 if $p == $node and $before;                                   # Pass the node
     }
   }
  $best                                                                         # Return best node if known
 }

sub approxLocation($;$)                                                         #U Return the line number.column location of the node nearest to this node in the source file if the source was parsed with the L<line number|/lineNumber> option on.
 {my ($node, $file) = @_;                                                       # Node, optionally the location of the source.

lib/Data/Edit/Xml.pm  view on Meta::CPAN


sub checkParentage($)                                                           #UP Check the parent pointers are correct in a L<parse|/parse> tree.
 {my ($x) = @_;                                                                 # Parse tree.
  $x->by(sub
   {my ($o) = @_;
   for($o->contents)
     {my $p = $_->parent;
      $p == $o or confess "No parent: ". $_->tag;
      $p and $p == $o or confess "Wrong parent: ".$o->tag. ", ". $_->tag;
     }
   });
 }

sub checkParser($)                                                              #UP Check that every node has a L<parse|/parse>r.
 {my ($x) = @_;                                                                 # Parse tree.
  $x->by(sub
   {$_->parser or confess "No parser for ". $_->tag;
    $_->parser == $x or confess "Wrong parser for ". $_->tag;
   })
 }

sub goFish($@)                                                                  #U A debug version of L<go|/go> that returns additional information explaining any failure to reach the node identified by the L<path|/path>.\mReturns ([B<reachable tag>....
 {my ($node, @path) = @_;                                                       # Node, search specification.
  my $p = $node;                                                                # Current node
  my @p;                                                                        # Elements of the path successfully processed
  while(@path)                                                                  # Position specification
   {my $i = shift @path;                                                        # Index name
    return ([@p], $i, [sort keys %{$p->indexes}]) unless $p;                    # There is no node of the named type under this node
    reindexNode($p);                                                            # Create index for this node
    my $q = $p->indexes->{$i};                                                  # Index
    return ([@p], $i, [sort keys %{$p->indexes}]) unless defined $q;            # Complain if no such index
    push @p, $i;
    if (@path)                                                                  # Position within index
     {if ($path[0] =~ /\A([-+]?\d+)\Z/)                                         # Numeric position in index from start
       {my $n = shift @path;                                                    # Next path item
        my $N = scalar(@$q);                                                    # Dimension of index
        return ([@p], $n, [0..$N]) unless defined($p = $q->[$n]);               # Complain if no such index
        push @p, $n;                                                            # Save successfully processed index
       }
      elsif (@path == 1 and $path[0] =~ /\A\*\Z/)                               # Final index wanted
       {return [@p];
       }
      else {$p = $q->[0]}                                                       # Step into first sub node by default
     }
    else {$p = $q->[0]}                                                         # Step into first sub node by default on last step
   }
  [@p]                                                                          # Success!
 }

sub nn($)                                                                       #P Replace new lines in a string with N to make testing easier.
 {my ($s) = @_;                                                                 # String.
  $s =~ s/\n/N/gsr
 }

#D1 Validation                                                                  # Validate Xml

sub checkAllPaths($)                                                            #S Create a representation of all the paths permitted in a block of L<xml>. The syntax of each line is a word representing an L<xml> tag followed by one of: tag B<1 * + ?...
 {my ($valid) = @_;                                                             # Path descriptions
  my %valid;                                                                    # Perl representation of validating string
  my @stack;                                                                    # Tag stack
  my @lines = split m/\n/, $valid;                                              # Split into lines

  for my $i(keys @lines)                                                        # Each line
   {my $line    = $lines[$i] =~ s(\s*#.*\Z) ()r;                                # Remove trailing comments
    next unless $line =~ m(\S);                                                 # Ignore blank lines

    my sub error(@)                                                             # Write an error message
     {confess join ' ', @_, 'on line:', $i + 1, "\n";
     };

    my $tag     = $line =~ s(\A\s*) ()r;                                        # Check indentation carefully as it shows the desired structure
    my $indent  = length($line) - length($tag);
    $indent % 2 and error "Indentation is not even";
    my $indent2 = $indent / 2;
    $i == 0 and $indent != 0 and error "Indent is not zero";
    @stack+1 < $indent2 and error "Too much indentation";

    while(@stack)                                                               # Reduce the stack to the current level
     {if (@stack > $indent2)
       {pop @stack;
        next;
       }
      last;
     }

    my ($tagName, @words) = split m/\s+/, $tag;                                 # Save tag on the tag stack
    push @stack, $tagName;

    my $count = sub                                                             # The count indicator optionally follows the tag
     {return 1 unless @words;                                                   # The default is just one and it is required
      my $c = shift @words;
      return $c if $c  =~ m(\A[-1+*?]\Z)i;                                      # Valid operators
      1
     }->();

    if (@stack > 1)                                                             # Element description
     {$valid{join ' ', @stack[1..$#stack]} =
        [$count, my $comment = join ' ', @words];
     }
   }

  my ($root) = @stack;                                                          # The root tag

  for   my $a(sort keys %valid)                                                 # Create get methods
   {for my $b(sort keys %valid)
     {my $c = $valid{$b}[0];                                                    # Count field
      my @b = split m/\s+/, $b;
      my $m = pop @b;
      if ($a eq join " ", @b or !@b)                                            # Has children
       {my @m = ($root, @b);
        if (!isSubInPackage((join '::', @m), $m))
         {my $d = $c eq q(*) || $c eq q(+) ? q([]) : q/q()/;                    # Default return value

          my $s = join ' ', qq(sub), (join '::', @m, $m), qq({\$_[0]{$m} // $d});
          eval $s;
          if ($@)
           {confess join '', "Unable to create method: ",$a, q(::).qq($m\n$@\n);
           }
         }
        if (@b and $valid{$a}[0] eq q(-))                                       # Check that tags marked as boolean leaves do not have any children
         {confess <<END;
Path: '$a' has been marked with '-' making it a boolean leaf yet it has
a child: '$b'  '$a'
END
         }
       }
     }
   }

  \%valid
 } # checkAllPaths

sub xmlToPerl($$)                                                               #S Parse some L<xml>, validate using a description recognized by  L<checkAllPaths> and return the corresponding L<Perl> structure.  Valid fields in the returned structure...
 {my ($xml, $valid) = @_;                                                       # Xml represented as a string, Xml validating represented as a string

  my $xmlTree   = Data::Edit::Xml::new($xml);                                   # Xml parse tree
  my $validator = checkAllPaths($valid);                                        # Create a validating string to check all the paths in the xml description of the system
  my $perl;                                                                     # Perl representation of the Xml

  $xmlTree->by(sub                                                              # Traverse xml to build Perl data structure
   {my ($o) = @_;
    return if $o == $xmlTree;

    my @path = reverse @_;                                                      # Node path from root downwards
    shift @path;                                                                # Remove root tag as it is boiler plate
    pop @path if $o->isText;                                                    # Remove CDATA
    my $p = join ' ', map {-t $_} @path;                                        # Tag path

    if (my $valid = $$validator{$p}[0])                                         # Details of this path
     {my @keys;                                                                 # Path as text
      for my $path(@path)                                                       # Describe path
       {my $t = -t $path;
        if ($path != $xmlTree)
         {my $i = $path->index;
          push @keys, qq({$t}[$i]);
         }
       }
      if ($o->isText)                                                           # Text field
       {my $s = join "", q($$perl), @keys, q( = ), dump trim $o->text;          # Load Perl data structure
        eval $s;
        $@ and confess "$@\n";
       }
      else                                                                      # Check that a tag has content unless it is a boolean leaf
       {if ($o->isEmpty)                                                        # Tag has no content
         {if ($valid eq q(-))
           {my $s = join "", q($$perl), @keys, q( = 1);                            # Show leaf tag present
            eval $s;
            $@ and confess "$@\n";
           }
          else
           {confess join " ", "Tag has no content on path(zero based):",
              $o->pathString, "\n";
           }
         }
        else                                                                    # Tag has  content
         {if ($valid eq q(-))
           {confess join " ", "Leaf boolean has content on path(zero based):",
            $o->pathString, "\n";
           }
         }
       }

      if (1)                                                                    # Bless parents so we can write $a->b rather than $a->{b} and get an error if we choose an invalid field.
       {pop @keys; pop @path;
        my @p = map{$_->tag} @path;
        my $k = join '',                  @keys;
        my $p = join '::', $xmlTree->tag, @p;
        my $s = @keys ? qq(bless \$\$perl$k, "$p") : qq(bless \$perl$k, "$p");
        eval $s;
        $@ and confess "$@\n";
       }
     }
    elsif ($p)
     {confess join ' ',
       "No description in validator for path:", $o->pathString, "\n";
     }
    else
     {my $j = -p $o;
      confess "Junk: $j\n";
     }
   });

  for my $v(sort keys %$validator)                                              # Validate presence of required elements by checking the application of each rule which requires at least one sub element
   {my ($count) = $$validator{$v}->@*;                                          # Count specification from this validation specification
    next unless $count =~ m(\A[1+]\Z)i;                                         # We are only interested in required elements
    my @path    = split m/\s+/, $v;                                             # Path to this rule
    my $parent  = join ' ', reverse $xmlTree->tag, @path[0..@path-2];           # Path to parent of this rule

    $xmlTree->by(sub                                                            # Traverse xml to build Perl data structure
     {my ($o) = @_;
      if ($parent eq $o->context)                                               # Point in the xml parse tree that matches the parent rule
       {my $child = $path[-1];
        my @c = $o->c($child);
        if (@c < 1)
         {say STDERR -p $o;
          confess join " ", "$child required under", $o->context, "\n";
         }
       }
     });
   }

  my $sublimate; $sublimate = sub                                               # Replace arrays with direct references where possible
   {my ($data, @path) = @_;                                                     # Data point, path to data point

    for   my $k(sort keys %$data)
     {push @path, $k;
      for my $d($$data{$k}->@*)                                                 # Sublimate lower trees
       {$sublimate->($d, @path) if ref $d;
        if (my $valid = $$validator{join ' ',  @path})
         {if ($$valid[0] =~ m(\A[-1?]\Z)i and $$data{$k}->@* <= 1)
           {$$data{$k} = $$data{$k}[0];
           }
         }
       }
      pop @path;
     }
   };

  &$sublimate($perl);                                                           # Replace arrays with direct references where possible

  $perl
 } # xmlToPerl

#D1 Documentation                                                               # Update documentation describing this module

sub extractDocumentationFlags($$)                                               #P Generate documentation for a method with a user flag.
 {my ($flags, $method) = @_;                                                    # Flags, method name.
  my $b = "${method}NonBlank";                                                  # Not blank method name - used for a small number of navigation methods
  my $x = "${method}NonBlankX";                                                 # Not blank, die on B<undef> method name
  my $m = $method;                                                              # Second action method
     $m =~ s/\Afirst/next/gs;
     $m =~ s/\Alast/prev/gs;
  my @doc; my @code;

  if ($flags =~ m/C/is)                                                         # Context flag for a method that returns a single node or B<undef> if in the wrong context
   {push @doc, <<'END' if $flags =~ m/C/s;
Use the optional B<@context> parameter to test the context of the specified
B<$node> as understood by method L<at|/at>. If the context is supplied and
B<$node> is not in this context then this method returns B<undef> immediately.
END
    push @doc, <<'END' if $flags =~ m/c/s;
Use the required B<$tag> parameter to specify the expected tag on the specified
B<$node> using a single match expression as understood by method L<at|/at>. Use
the optional B<@context> parameter to test the context as understood by method
L<at|/at> of the parent node of the specified B<$node>. If either test fails
this method returns B<undef> immediately.



( run in 1.257 second using v1.01-cache-2.11-cpan-71847e10f99 )