XML-AutoWriter
view release on metacpan or search on metacpan
lib/XML/ValidWriter.pm view on Meta::CPAN
croak "Function '$sym' not exported by '$pkg' or " . __PACKAGE__ ;
}
}
my %escapees ;
$escapees{'&'} = '&' ;
$escapees{'<'} = '<' ;
$escapees{'>'} = '>' ;
$escapees{']>'} = ']>' ;
$escapees{']]>'} = ']]>' ;
$escapees{'"'} = '"' ;
$escapees{"'"} = ''' ;
# Takes a list, returns a list: don't use in scalar context.
sub _esc {
croak "_esc used in scalar context" unless wantarray ;
my $text ;
return map {
$text = $_ ;
if ( $text =~ /([\x00-\x08\x0B\x0C\x0E-\x1F])/ ) {
croak sprintf(
"Illegal character 0x%02d (^%s) sent",
ord $1,
chr( ord( "A" ) + ord( $1 ) - 1 )
)
}
$text =~ s{([&<]|^>|^\]>|\]\]>)}{$escapees{$1}}eg ;
$text ;
} @_ ;
}
sub _esc1 {
my $text = shift ;
if ( $text =~ /([\x00-\x08\x0B\x0C\x0E-\x1F])/ ) {
croak sprintf(
"Invalid character 0x%02d (^%s) sent",
ord $1,
chr( ord( "A" ) + ord( $1 ) - 1 )
)
}
$text =~ s{([&<]|^>|^\]>|\]\]>)}{$escapees{$1}}eg ;
return $text ;
}
sub _attr_esc1 {
my $text = shift ;
if ( $text =~ /([\x00-\x08\x0B\x0C\x0E-\x1F])/ ) {
croak sprintf(
"Invalid character 0x%02d (^%s) sent",
ord $1,
chr( ord( "A" ) + ord( $1 ) - 1 )
)
}
$text =~ s{([&<"'])}{$escapees{$1}}eg ;
return $text ;
}
sub _esc_cdata_ends {
## This could be very memory hungry, but alas...
my $text = join( '', @_ ) ;
if ( $text =~ /([\x00-\x08\x0B\x0C\x0E-\x1F])/ ) {
croak sprintf(
"Invalid character 0x%02d (^%s) sent",
ord $1,
chr( ord( "A" ) + ord( $1 ) - 1 )
)
}
$text =~ s{\]\]>}{]]]]><![CDATA[>}g ;
return $text ;
}
=item characters
characters( "escaped text", "& more" ) ;
$writer->characters( "escaped text", "& more" ) ;
Emits character data. Character data will be escaped before output, by either
transforming 'E<lt>' and '&' to < and &, or by enclosing in a
'C<E<lt>![CDATA[...]]E<gt>>' bracket, depending on which will be more
human-readable, according to the module.
=cut
sub characters {
my XML::ValidWriter $self = &_self ;
my $to = $self->{OUTPUT} || select ;
croak "Can't emit characters before the root element"
if ! defined $self->{EMITTED_ROOT} ;
my $stack = $self->{STACK} ;
croak "Can't emit characters outside of the root element"
unless @$stack ;
my XML::VWElement $end_elt = $stack->[-1];
my $open_elt = $self->getDoctype->element_decl( $end_elt->{NAME} ) ;
croak "Element '$open_elt->{NAME}' can't contain #PCDATA"
unless ! $open_elt || $open_elt->can_contain_pcdata ;
croak "Undefined value passed to characters() in <$open_elt->{NAME}>"
if grep ! defined $_, @_ ;
my $length ;
my $decide_cdata = $self->{STRAGGLERS} eq '>' ;
my $in_cdata_mode ;
if ( $decide_cdata ) {
my $escs = 0 ;
my $cdata_ends = 0 ;
my $cdata_escs = 0 ;
my $pos ;
## I assume that splitting CDATA ends between chunks is very
## rare. If an app does that a lot, then this could guess 'wrong'
## and use CDATA escapes in a situation where they result in more
## bytes out than <& escaping would.
for ( @_ ) {
$escs += tr/<&// ;
$pos = 0 ;
++$cdata_ends while ( $pos = index $_, ']]>', $pos + 3 ) >= 0 ;
$cdata_escs += tr/\x00-\x08\x0b\x0c\x0e-\x1f// ;
$length += length $_ ;
}
## Each < or & is 4 or 5 chars.
## Each ]]]]><![CDATA[< is 15.
## Each ]]>&#xN;<![CDATA[ is 17 or 18.
## We ## add 12 since <![CDATA[]]> is 12 chars.
$in_cdata_mode = 4.5*$escs > 15*$cdata_ends + 17.75*$cdata_escs + 12 ;
}
else {
$in_cdata_mode = $self->{STRAGGLERS} eq ']]>' ;
$length += length $_ for @_ ;
}
return unless $length ;
## I chose to stay in or out of CDATA mode for an element
## in order to keep document structure relatively simple...to keep human
## readers from getting confused between escaping modes.
## This may lead to degeneracy if it's an (SG|X)ML document being emitted in
## an element, so this may change.
if ( $in_cdata_mode ) {
if ( $self->{STRAGGLERS} eq ']]>' ) {
## Don't emit ']]><![CDATA[' between consecutive CDATA character
## chunks.
$self->{STRAGGLERS} = '' ;
}
else {
$self->{STRAGGLERS} .= '<![CDATA['
}
if ( ref $to eq 'SCALAR' ) {
$$to = join( '',
$$to,
$self->{STRAGGLERS},
_esc_cdata_ends( $self->{CDATA_END_PART}, @_ )
) ;
$self->{CDATA_END_PART} =
$$to =~ s/(\]\]?)(?!\n)\Z//
? $1
: '' ;
}
else {
no strict 'refs' ;
my $chunk = _esc_cdata_ends( $self->{CDATA_END_PART}, @_ ) ;
$self->{CDATA_END_PART} =
$chunk =~ s/(\]\]?)(?!\n)\Z//
? $1
: '' ;
print $to $self->{STRAGGLERS}, $chunk
or croak "$! writing chars in <$open_elt->{NAME}>" ;
}
$self->{STRAGGLERS} = ']]>' ;
}
else {
if ( ref $to eq 'SCALAR' ) {
$$to .= $self->{STRAGGLERS} ;
$$to .= _esc1( join( '', @_ ) ) ;
}
else {
no strict 'refs' ;
print $to $self->{STRAGGLERS}, _esc( @_ )
or croak "$! writing chars in <$open_elt->{NAME}>" ;
}
$self->{STRAGGLERS} = '' ;
# $self->{CDATA_END_PART} = '' ;
}
$stack->[-1]->add_content( '#PCDATA' )
if @{$stack} ;
$self->{WAS_END_TAG} = 0 ;
return ;
}
=item dataElement
$writer->dataElement( $tag ) ;
$writer->dataElement( $tag, $content ) ;
$writer->dataElement( $tag, $content, attr1 => $val1, ... ) ;
dataElement( $tag ) ;
dataElement( $tag, $content ) ;
dataElement( $tag, $content, attr1 => $val1, ... ) ;
Does the equivalent to
## Split the optional args in to attributes and elements arrays.
$writer->startTag( $tag, @attributes ) ;
$writer->characters( $content ) ;
$writer->endTag( $tag ) ;
This function is exportable as dataElement(), and is also exported
for each element 'foo' found in the DTD as foo().
=cut
sub dataElement {
my XML::ValidWriter $self = shift ;
my ( $tag ) = shift ;
lib/XML/ValidWriter.pm view on Meta::CPAN
sub getDataMode {
my XML::ValidWriter $self = shift ;
return $self->{DATA_MODE} ;
}
=item getDoctype
$dtd = getDoctype ;
$dtd = $writer->getDoctype ;
This is used to get the writer's XML::Doctype object.
=cut
sub getDoctype {
my XML::ValidWriter $self = &_self ;
return $self->{DOCTYPE} ;
}
=item getOutput
$fh = getOutput ;
$fh = $writer->getOutput ;
Gets the filehandle an XML::ValidWriter sends output to.
=cut
sub getOutput {
my XML::ValidWriter $self = &_self ;
return $self->{OUTPUT} ;
}
=item rawCharacters
rawCharacters( "<unescaped text>", "& more text" ) ;
$writer->rawCharacters( "<unescaped text>", "& more text" ) ;
This allows you to emit raw text without any escape processing. The text
is not examined for tags, so you can invalidate your document and even
corrupt it's well-formedness.
=cut
## This is called everywhere to emit raw characters *except* characters(),
## which must go direct because it uses STRAGGLERS and CDATA_END_PART
## differently.
sub rawCharacters {
my XML::ValidWriter $self = &_self ;
my $to= $self->{OUTPUT} || select ;
return unless grep length $_, @_ ;
if ( ref $to eq 'SCALAR' ) {
$$to .= join(
'',
_esc_cdata_ends( $self->{CDATA_END_PART} ),
$self->{STRAGGLERS},
@_
) ;
$self->{AT_BOL} = substr( $$to, -1, 1 ) eq "\n" ;
}
else {
no strict 'refs' ;
for ( my $i = $#_ ; $i >= 0 ; --$i ) {
next unless length $_[$i] ;
$self->{AT_BOL} = substr( $_[$i], -1, 1 ) eq "\n" ;
last ;
}
print $to
_esc_cdata_ends( $self->{CDATA_END_PART} ),
$self->{STRAGGLERS},
@_ or croak $!;
}
$self->{CDATA_END_PART} = '' ;
$self->{STRAGGLERS} = '' ;
}
=item reset
$writer->reset ; # Not a function!
Resets a writer to be initialized, but not have emitted anything.
This is useful if you need to abort output, but want to reuse the
XML::ValidWriter.
=cut
sub reset {
my XML::ValidWriter $self = shift ;
$self->{STACK} = [] ;
# If we should warn, clear the flag that says we checked it & vice versa
$self->{CHECKED_XML_DECL} = ! $self->{SHOULD_WARN} ;
## I'd use assignement to a slice here, but older perls...
$self->{IS_STANDALONE} = 0 ;
$self->{EMITTED_DOCTYPE} = undef ;
$self->{EMITTED_ROOT} = undef ;
$self->{EMITTED_XML} = undef ;
$self->{AT_BOL} = 1 ;
$self->{WAS_END_TAG} = 1 ;
$self->{STRAGGLERS} = '' ;
$self->{CDATA_END_PART} = '' ;
if ( defined $self->{FILE_NAME} ) {
if ( defined $self->{OUTPUT} ) {
close $self->{OUTPUT} or croak "$! closing '$self->{FILE_NAME}'." ;
}
else {
require Symbol ;
$self->{OUTPUT} = Symbol::gensym() ;
}
eval "use Fcntl ; 1" or croak $@ ;
open(
$self->{OUTPUT},
">$self->{FILE_NAME}",
)
or croak "$!: $self->{FILE_NAME}" ;
}
return ;
}
=item setDataMode
( run in 0.684 second using v1.01-cache-2.11-cpan-39bf76dae61 )