XML-AutoWriter
view release on metacpan or search on metacpan
lib/XML/AutoWriter.pm view on Meta::CPAN
croak "Unknown tag '$root'" unless exists $elts->{$root} ;
croak "Unknown tag '$dest'"
unless $dest eq '#PCDATA' || exists $elts->{$dest} ;
require XML::Doctype::ElementDecl;
my XML::Doctype::ElementDecl $root_elt = $elts->{$root} ;
# print STDERR "searching for $root ... $dest\n" ;
return []
if $root_elt->is_any
|| ( $dest eq '#PCDATA' && $root_elt->can_contain_pcdata ) ;
my $paths = $root_elt->{PATHS} ;
unless ( $paths ) {
## Init the cache
$paths = $root_elt->{PATHS} = {
map {( $_ => [] )} $root_elt->child_names
} ;
$root_elt->{TODO} = [ $root_elt->child_names ] ;
}
lib/XML/AutoWriter.pm view on Meta::CPAN
my $todo = $root_elt->{TODO} ;
while ( @$todo ) {
# print STDERR "todo: ", join( ' ', @$todo ), "\n" ;
my $gkid = shift @$todo ;
# print STDERR "doing $gkid\n" ;
push @$todo, $elts->{$gkid}->child_names ;
my $gkid_path = $paths->{$gkid} ;
if ( $elts->{$gkid}->can_contain_pcdata() ) {
$paths->{'#PCDATA'} = [ @$gkid_path, $gkid ]
unless exists $paths->{'#PCDATA'} ;
# print STDERR "checking (pcdata) ",
# join( '', map "<$_>", @{$paths->{'#PCDATA'}} ), "\n" ;
if ( $dest eq '#PCDATA' ) {
# print STDERR "Yahoo!\n" ;
return $paths->{'#PCDATA'} ;
}
}
for my $ggkid ( $elts->{$gkid}->child_names ) {
next if exists $paths->{$ggkid} ;
lib/XML/Doctype/ElementDecl.pm view on Meta::CPAN
=item is_mixed
=cut
sub is_mixed {
my XML::Doctype::ElementDecl $self = shift ;
return $self->{CONTENT} && $self->{CONTENT} =~ /#PCDATA/ ;
}
sub can_contain_pcdata {
my XML::Doctype::ElementDecl $self = shift ;
return $self->{CONTENT}
&& (
$self->{CONTENT} eq 'ANY'
|| return $self->{CONTENT} =~ /#PCDATA/
) ;
}
=item name
lib/XML/ValidWriter.pm view on Meta::CPAN
"Invalid character 0x%02d (^%s) sent",
ord $1,
chr( ord( "A" ) + ord( $1 ) - 1 )
)
}
$text =~ s{([&<"'])}{$escapees{$1}}eg ;
return $text ;
}
sub _esc_cdata_ends {
## This could be very memory hungry, but alas...
my $text = join( '', @_ ) ;
if ( $text =~ /([\x00-\x08\x0B\x0C\x0E-\x1F])/ ) {
croak sprintf(
"Invalid character 0x%02d (^%s) sent",
ord $1,
chr( ord( "A" ) + ord( $1 ) - 1 )
)
}
$text =~ s{\]\]>}{]]]]><![CDATA[>}g ;
lib/XML/ValidWriter.pm view on Meta::CPAN
if ! defined $self->{EMITTED_ROOT} ;
my $stack = $self->{STACK} ;
croak "Can't emit characters outside of the root element"
unless @$stack ;
my XML::VWElement $end_elt = $stack->[-1];
my $open_elt = $self->getDoctype->element_decl( $end_elt->{NAME} ) ;
croak "Element '$open_elt->{NAME}' can't contain #PCDATA"
unless ! $open_elt || $open_elt->can_contain_pcdata ;
croak "Undefined value passed to characters() in <$open_elt->{NAME}>"
if grep ! defined $_, @_ ;
my $length ;
my $decide_cdata = $self->{STRAGGLERS} eq '>' ;
my $in_cdata_mode ;
if ( $decide_cdata ) {
my $escs = 0 ;
my $cdata_ends = 0 ;
my $cdata_escs = 0 ;
my $pos ;
## I assume that splitting CDATA ends between chunks is very
## rare. If an app does that a lot, then this could guess 'wrong'
## and use CDATA escapes in a situation where they result in more
## bytes out than <& escaping would.
for ( @_ ) {
$escs += tr/<&// ;
$pos = 0 ;
++$cdata_ends while ( $pos = index $_, ']]>', $pos + 3 ) >= 0 ;
$cdata_escs += tr/\x00-\x08\x0b\x0c\x0e-\x1f// ;
$length += length $_ ;
}
## Each < or & is 4 or 5 chars.
## Each ]]]]><![CDATA[< is 15.
## Each ]]>&#xN;<![CDATA[ is 17 or 18.
## We ## add 12 since <![CDATA[]]> is 12 chars.
$in_cdata_mode = 4.5*$escs > 15*$cdata_ends + 17.75*$cdata_escs + 12 ;
}
else {
$in_cdata_mode = $self->{STRAGGLERS} eq ']]>' ;
$length += length $_ for @_ ;
}
return unless $length ;
## I chose to stay in or out of CDATA mode for an element
## in order to keep document structure relatively simple...to keep human
## readers from getting confused between escaping modes.
## This may lead to degeneracy if it's an (SG|X)ML document being emitted in
## an element, so this may change.
if ( $in_cdata_mode ) {
if ( $self->{STRAGGLERS} eq ']]>' ) {
## Don't emit ']]><![CDATA[' between consecutive CDATA character
## chunks.
$self->{STRAGGLERS} = '' ;
}
else {
$self->{STRAGGLERS} .= '<![CDATA['
}
if ( ref $to eq 'SCALAR' ) {
$$to = join( '',
$$to,
$self->{STRAGGLERS},
_esc_cdata_ends( $self->{CDATA_END_PART}, @_ )
) ;
$self->{CDATA_END_PART} =
$$to =~ s/(\]\]?)(?!\n)\Z//
? $1
: '' ;
}
else {
no strict 'refs' ;
my $chunk = _esc_cdata_ends( $self->{CDATA_END_PART}, @_ ) ;
$self->{CDATA_END_PART} =
$chunk =~ s/(\]\]?)(?!\n)\Z//
? $1
: '' ;
print $to $self->{STRAGGLERS}, $chunk
or croak "$! writing chars in <$open_elt->{NAME}>" ;
}
lib/XML/ValidWriter.pm view on Meta::CPAN
sub rawCharacters {
my XML::ValidWriter $self = &_self ;
my $to= $self->{OUTPUT} || select ;
return unless grep length $_, @_ ;
if ( ref $to eq 'SCALAR' ) {
$$to .= join(
'',
_esc_cdata_ends( $self->{CDATA_END_PART} ),
$self->{STRAGGLERS},
@_
) ;
$self->{AT_BOL} = substr( $$to, -1, 1 ) eq "\n" ;
}
else {
no strict 'refs' ;
for ( my $i = $#_ ; $i >= 0 ; --$i ) {
next unless length $_[$i] ;
$self->{AT_BOL} = substr( $_[$i], -1, 1 ) eq "\n" ;
last ;
}
print $to
_esc_cdata_ends( $self->{CDATA_END_PART} ),
$self->{STRAGGLERS},
@_ or croak $!;
}
$self->{CDATA_END_PART} = '' ;
$self->{STRAGGLERS} = '' ;
}
=item reset
<!ELEMENT b EMPTY >
TOHERE
my $out_name = "$t/out" ;
my $buf ;
my $xml_decl = qq{<?xml version="1.0"?>} ;
sub test_cdata_esc {
## See if contiguously emitted CDATA end sequences are escaped properly
package Foo ;
$buf = '' ;
defaultWriter()->reset ;
select_xml( \$buf ) ;
## The extra ()'s are necessary because we didn't import at compile time.
xmlDecl() ;
start_a() ;
## Kick us in to CDATA mode
characters( "<<<<<" ) ;
eval { test_char_data_esc( $char ) } ;
## Older dists of perl don't know about qr// passed to ok():
if ( $@ && $@ =~ /invalid char/i ) {
ok( 1 ) ;
}
else {
ok( $@, "invalid char", sprintf( "0x%02x", $ord ) )
}
},
sub {
eval { test_cdata_esc( $char ) } ;
## Older dists of perl don't know about qr// passed to ok():
if ( $@ && $@ =~ /invalid char/i ) {
ok( 1 ) ;
}
else {
ok( $@, "invalid char", sprintf( "0x%02x", $ord ) )
}
},
)
} ( 0..0x08, 0x0b, 0x0c, 0x0e..0x1f )
),
##
## CDATA escape mode tests
##
sub { ok( test_cdata_esc( "]]>" ), "]]]]><![CDATA[>" ) },
sub { ok( test_cdata_esc( "]]>" ), "]]]]><![CDATA[>" ) },
sub { ok( test_cdata_esc( "]]", ">" ), "]]]]><![CDATA[>" ) },
sub { ok( test_cdata_esc( "]", "]>" ), "]]]]><![CDATA[>" ) },
sub { ok( test_cdata_esc( "\t" ), "\t", "\\t, 0x09, ^I, TAB" ) },
sub { ok( test_cdata_esc( "\n" ), "\n", "\\n, 0x0A, ^J, NL" ) },
sub { ok( test_cdata_esc( "\r" ), "\r", "\\r, 0x0D, ^M, CR" ) },
sub {
package Foo ;
$buf = '' ;
defaultWriter()->reset ;
select_xml( \$buf ) ;
## The extra ()'s are necessary because we didn't import at compile time.
xmlDecl() ;
start_a() ;
## Kick us in to CDATA mode, but with a closing ']'
( run in 0.578 second using v1.01-cache-2.11-cpan-454fe037f31 )