CAM-XML

 view release on metacpan or  search on metacpan

lib/CAM/XML.pm  view on Meta::CPAN

package CAM::XML;

use 5.006;
use strict;
use warnings;
use CAM::XML::Text;
use English qw(-no_match_vars);
use Carp;

our $VERSION = '1.14';

=for stopwords XHTML XPath pre-formatted attr1 attr2

=head1 NAME 

CAM::XML - Encapsulation of a simple XML data structure

=head1 LICENSE

Copyright 2006 Clotho Advanced Media, Inc., <cpan@clotho.com>

This library is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.

=head1 SYNOPSIS

  my $pollTag = CAM::XML->new('poll');
  
  foreach my $q (@questions) {
    my $questionTag = CAM::XML->new('question');
    
    $questionTag->add(-text => $q->{text});
    my $choicesTag = CAM::XML->new('choices');
    
    foreach my $c (@{$q->{choices}}) {
      my $choiceTag = CAM::XML->new('choice');
      $choiceTag->setAttributes('value', $c->{value});
      $choiceTag->add(-text => $c->{text});
      $choicesTag->add($choiceTag);
    }
    $questionTag->add($choicesTag);
    $pollTag->add($questionTag);
  }
  print CAM::XML->header();
  print $pollTag->toString();

=head1 DESCRIPTION

This module reads and writes XML into a simple object model.  It is
optimized for ease of creating code that interacts with XML.

This module is not as powerful or as standards-compliant as say
XML::LibXML, XML::SAX, XML::DOM, etc, but it's darn easy to use.  I
recommend it to people who want to just read/write a quick but valid
XML file and don't want to bother with the bigger modules.

In our experience, this module is actually easier to use than
XML::Simple because the latter makes some assumptions about XML
structure that prevents it from handling all XML files well.  YMMV.

However, one exception to the simplicity claimed above is our
implementation of a subset of XPath.  That's not very simple.  Sorry.

=head1 CLASS METHODS

=over

=item $pkg->parse($xmlstring)

=item $pkg->parse(-string => $xmlstring)

=item $pkg->parse(-filename => $xmlfilename)

lib/CAM/XML.pm  view on Meta::CPAN


=item $self->getInnerText()

For the given node, descend through all of its children and
concatenate all the text values that are found.  If none, this method
returns an empty string (not undef).

=cut

sub getInnerText
{
   my $self = shift;

   my $text  = q{};
   my @stack = ([@{ $self->{children} }]);
   while (@stack > 0)
   {
      my $list  = $stack[-1];
      my $child = shift @{$list};
      if ($child)
      {
         if ($child->isa(__PACKAGE__))
         {
            push @stack, [@{ $child->{children} }];
         }
         else # CAM::XML::Text
         {
            $text .= $child->{text};
         }
      }
      else
      {
         pop @stack;
      }
   }
   return $text;
}

=item $self->getNodes(-tag => $tagname)

=item $self->getNodes(-attr => $attrname, -value => $attrvalue)

=item $self->getNodes(-path => $path)

Return an array of CAM::XML objects representing nodes that match the
requested properties.

A path is a syntactic path into the XML doc something like an XPath

  '/' divides nodes
  '//' means any number of nodes
  '/[n]' means the nth child of a node (1-based)
  '<tag>[n]' means the nth instance of this node
  '/[-n]' means the nth child of a node, counting backward
  '/[last()]' means the last child of a node (same as [-1])
  '/[@attr="value"]' means a node with this attribute value
  '/text()' means all of the text data inside a node
            (note this returns just one node, not all the nodes)

For example, C</html/body//table/tr[1]/td/a[@target="_blank"]>
searches an XHTML body for all tables, and returns all anchor nodes in
the first row which pop new windows.

Please note that while this syntax resembles XPath, it is FAR from a
complete (or even correct) implementation.  It's useful for basic
delving into an XML document, however.

=cut

sub getNodes
{
   my $self     = shift;
   my %criteria = (@_);

   if ($criteria{-path})
   {
      # This is a very different beast.  Handle it separately.
      return $self->_get_path_nodes($criteria{-path}, [$self]);
   }

   my @list  = ();
   my @stack = ([$self]);
   while (@stack > 0)
   {
      my $list = $stack[-1];
      my $obj  = shift @{$list};
      if ($obj)
      {
         if ($obj->isa(__PACKAGE__))
         {
            push @stack, [@{ $obj->{children} }];
            if (($criteria{-tag} && $criteria{-tag} eq $obj->{name}) ||
                ($criteria{-attr} && exists $obj->{attributes}->{$criteria{-attr}} &&
                 $obj->{attributes}->{$criteria{-attr}} eq $criteria{-value}))
            {
               push @list, $obj;
            }
         }
      }
      else
      {
         pop @stack;
      }
   }
   return @list;
}

# Internal use only

sub _get_path_nodes
{
   my $self = shift;
   my $path = shift;
   my $kids = shift || $self->{children};

   my @list = !$path                                   ? $self
            : $path =~ m,\A /?text\(\)          \z,xms ? $self->_get_path_nodes_text()
            : $path =~ m,\A /?\[(\d+)\](.*)     \z,xms ? $self->_get_path_nodes_easyindex($kids, $1, $2)
            : $path =~ m,\A /?\[([^\]]+)\](.*)  \z,xms ? $self->_get_path_nodes_index($kids, $1, $2)
            : $path =~ m,\A //+                 \z,xms ? $self->_get_path_nodes_all($kids, $path)
            : $path =~ m,   (/?)(/?)([^/]+)(.*) \z,xms ? $self->_get_path_nodes_match($kids, $path, $1, $2, $3, $4)



( run in 0.686 second using v1.01-cache-2.11-cpan-39bf76dae61 )