XML-TreeBuilder

 view release on metacpan or  search on metacpan

Changes  view on Meta::CPAN

   Add logic to keep entities in attributes when NoExpand is set. RT #88973

Jul 17 2013  Jeff Fearn <Jeff.Fearn@gmail.com>
   Release 4.3

   Hard code VERSION in TreeBuilder.pm for PAUSE. RT #86964

Jul 15 2013  Jeff Fearn <Jeff.Fearn@gmail.com>
   Release 4.2

   Added store_cdata to handle CDATA.
   More test coverage.

Mar 13 2011 Jeff Fearn <Jeff.Fearn@gmail.com>
   Release 4.1

   Doc typo fixes. (Ansgar Burchardt) RT #66404
   Use same version in all packages for CPAN indexing. RT #66111
   Moved perlcritic tests to xt/author
   Fix Authors in all PM files

lib/XML/Element.pm  view on Meta::CPAN

$VERSION = '5.4';
@ISA     = ('HTML::Element');

# Init:
my %emptyElement = ();
foreach my $e (%HTML::Tagset::emptyElement) {
    $emptyElement{$e} = 1
        if substr( $e, 0, 1 ) eq '~' and $HTML::Tagset::emptyElement{$e};
}

my $in_cdata = 0;
my $nillio   = [];

#--------------------------------------------------------------------------
#Some basic overrides:

sub _empty_element_map { \%emptyElement }

*_fold_case      = \&HTML::Element::_fold_case_NOT;
*starttag        = \&starttag_XML;
*endtag          = \&endtag_XML;

lib/XML/Element.pm  view on Meta::CPAN

            # Does this ever get used?  And is this right?
            $name = join( ' ', @{ $self->{'text'} } );
        }
        else {
            $name = $self->{'text'};
        }
        $name =~ s/--/-&#45;/g;    # can't have double --'s in XML comments
        return "<!-- $name -->";
    }

    if ( $name eq '~cdata' ) {
        $in_cdata = 1;
        return "<![CDATA[";
    }

    my $tag = "<$name";
    my $val;
    for ( sort keys %$self ) {     # predictable ordering
        next if !length $_ or m/^_/s or $_ eq '/';

        # Hm -- what to do if val is undef?
        # I suppose that shouldn't ever happen.

lib/XML/Element.pm  view on Meta::CPAN

    @_ == 3 ? "$tag />" : "$tag>";
}

## copied from HTML::Element to support CDATDA
sub endtag_XML {
    my ($self) = @_;

    # and a third parameter to signal emptiness?

    my $name = $self->{'_tag'};
    if ( $name eq '~cdata' ) {
        $in_cdata = 0;
        return "]]>";
    }

    "</$_[0]->{'_tag'}>";
}

## copied from HTML::Element to support CDATDA
sub as_XML {

    my ($self) = @_;

lib/XML/Element.pm  view on Meta::CPAN

                }
                else {    # on the way out
                    unless ( $empty_element_map->{$tag}
                        and !@{ $node->{'_content'} || $nillio } )
                    {
                        push( @xml, $node->endtag_XML() );
                    }     # otherwise it will have been an <... /> tag.
                }
            }
            else {        # it's just text
                _xml_escape($node) unless ($in_cdata);
                push( @xml, $node );
            }
            1;            # keep traversing
        }
    );

    join( '', @xml, "\n" );
}

#--------------------------------------------------------------------------

lib/XML/TreeBuilder.pm  view on Meta::CPAN

    if ( %{$arg} ) {
        croak "unknown args: " . join( ", ", keys %{$arg} );
    }

    my $self = XML::Element->new('NIL');
    bless $self, $class;    # and rebless
    $self->{_element_class}      = 'XML::Element';
    $self->{_store_comments}     = 0;
    $self->{_store_pis}          = 0;
    $self->{_store_declarations} = 0;
    $self->{_store_cdata}        = 0;

    # have to let HTML::Element know there are encoded entities
    $XML::Element::encoded_content = $NoExpand if ($NoExpand);

    my @stack;

 # Compare the simplicity of this to the sheer nastiness of HTML::TreeBuilder!

    $self->{_xml_parser} = XML::Parser->new(
        Handlers => {

lib/XML/TreeBuilder.pm  view on Meta::CPAN

                        'text' => join( ' ', ( 'ENTITY', @_ ) ),
                        type   => 'ENTITY',
                        name   => $_[0],
                        value  => $_[1],
                    )
                );
                return;
            },

            CdataStart => sub {
                return unless $self->{_store_cdata};
                shift;
                push @stack,
                    $self->{_element_class}->new( '~cdata', 'text' => $_[1] );
                $stack[-2]->push_content( $stack[-1] );
                return;
            },

            CdataEnd => sub {
                return unless $self->{_store_cdata};
                pop @stack;
                return;
            },

            ExternEnt => sub {
                return if ($NoExpand);
                my $xp = shift;
                my ( $base, $sysid, $pubid ) = @_;
                my $file = "$sysid";

lib/XML/TreeBuilder.pm  view on Meta::CPAN

{
    my ( $self, $elem, $val ) = @_;
    my $old = $self->{$elem};
    $self->{$elem} = $val if defined $val;
    return $old;
}

sub store_comments     { shift->_elem( '_store_comments',     @_ ); }
sub store_declarations { shift->_elem( '_store_declarations', @_ ); }
sub store_pis          { shift->_elem( '_store_pis',          @_ ); }
sub store_cdata        { shift->_elem( '_store_cdata',        @_ ); }

#==========================================================================

sub parse {
    shift->{_xml_parser}->parse(@_);
}

sub parse_file { shift->parsefile(@_) }    # alias

sub parsefile {

lib/XML/TreeBuilder.pm  view on Meta::CPAN

This determines whether TreeBuilder will normally store markup
declarations found while parsing content into C<$root>.  Currently,
this is off by default.

=item $root->store_pis(value)

This determines whether TreeBuilder will normally store processing
instructions found while parsing content into C<$root>.
Currently, this is off (false) by default.

=item $root->store_cdata(value)

This determines whether TreeBuilder will normally store CDATA
sectitons found while parsing content into C<$root>. Adds a ~cdata node.

Currently, this is off (false) by default.

=back

=head1 SEE ALSO

L<XML::Parser>, L<XML::Element>, L<HTML::TreeBuilder>, L<HTML::DOMbo>.

And for alternate XML document interfaces, L<XML::DOM> and L<XML::Twig>.

t/10main.t  view on Meta::CPAN

        [ 'foo', { 'Id' => 'me', 'xml:foo' => 'lal' }, 'Hello World' ],
        ['lor'],
        [ '~comment', { 'text' => ' foo ' } ],
        [ '~comment', { 'text' => ' glarg ' } ],
    ]
);

ok( $x->same_as($y), "same as" );

my $z = XML::TreeBuilder->new( { NoExpand => 1, ErrorContext => 2 } );
$z->store_cdata(1);
$z->parsefile("t/parse_test.xml");
is(
    $z->as_XML(),
    q{<p id="&id;">Here &amp;foo; There
<![CDATA[
&foo;
]]>
&foo;
</p>
},
    'Decoded ampersand and cdata'
);
$z->delete_ignorable_whitespace();

my $za = XML::TreeBuilder->new( { NoExpand => 1, ErrorContext => 2 } );
$za->store_declarations(1);
$za->store_pis(1);
$za->store_declarations(1);
$za->parse(
    qq{<?xml version='1.0' encoding='utf-8' ?>
<!DOCTYPE para PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [



( run in 0.250 second using v1.01-cache-2.11-cpan-ec4f86ec37b )