XML-AutoWriter

 view release on metacpan or  search on metacpan

lib/XML/AutoWriter.pm  view on Meta::CPAN

   croak "Unknown tag '$root'" unless exists $elts->{$root} ;
   croak "Unknown tag '$dest'"
      unless $dest eq '#PCDATA' || exists $elts->{$dest} ;

   require XML::Doctype::ElementDecl;
   my XML::Doctype::ElementDecl $root_elt = $elts->{$root} ;
   # print STDERR "searching for $root ... $dest\n" ;

   return []
      if $root_elt->is_any
         || ( $dest eq '#PCDATA' && $root_elt->can_contain_pcdata ) ;

   my $paths = $root_elt->{PATHS} ;
   unless ( $paths ) {
      ## Init the cache
      $paths = $root_elt->{PATHS} = {
         map {( $_ => [] )} $root_elt->child_names
      } ;
      $root_elt->{TODO} = [ $root_elt->child_names ] ;
   }

lib/XML/AutoWriter.pm  view on Meta::CPAN

   my $todo = $root_elt->{TODO} ;
   while ( @$todo ) {
      # print STDERR "todo: ", join( ' ', @$todo ), "\n" ;

      my $gkid = shift @$todo ;
      # print STDERR "doing $gkid\n" ;
      push @$todo, $elts->{$gkid}->child_names ;

      my $gkid_path = $paths->{$gkid} ;

      if ( $elts->{$gkid}->can_contain_pcdata() ) {
	 $paths->{'#PCDATA'} = [ @$gkid_path, $gkid ]
	    unless exists $paths->{'#PCDATA'} ;
	 # print STDERR "checking (pcdata) ",
	 # join( '', map "<$_>", @{$paths->{'#PCDATA'}} ), "\n" ;
	 if ( $dest eq '#PCDATA' ) {
	    # print STDERR "Yahoo!\n" ;
	    return $paths->{'#PCDATA'} ;
	 }
      }

      for my $ggkid ( $elts->{$gkid}->child_names ) {
	 next if exists $paths->{$ggkid} ;

lib/XML/Doctype/ElementDecl.pm  view on Meta::CPAN

=item is_mixed

=cut

sub is_mixed {
   my XML::Doctype::ElementDecl $self = shift ;

   return $self->{CONTENT} && $self->{CONTENT} =~ /#PCDATA/ ;
}

sub can_contain_pcdata {
   my XML::Doctype::ElementDecl $self = shift ;

   return $self->{CONTENT}
      && (
	 $self->{CONTENT} eq 'ANY'
	 || return $self->{CONTENT} =~ /#PCDATA/
      ) ;
}

=item name

lib/XML/ValidWriter.pm  view on Meta::CPAN

         "Invalid character 0x%02d (^%s) sent",
         ord $1,
	 chr( ord( "A" ) + ord( $1 ) - 1 )
      )
   }
   $text =~ s{([&<"'])}{$escapees{$1}}eg ;
   return $text ;
}


sub _esc_cdata_ends {
   ## This could be very memory hungry, but alas...
   my $text = join( '', @_ ) ;
   if ( $text =~ /([\x00-\x08\x0B\x0C\x0E-\x1F])/ ) {
      croak sprintf(
         "Invalid character 0x%02d (^%s) sent",
         ord $1,
	 chr( ord( "A" ) + ord( $1 ) - 1 )
      )
   }
   $text =~ s{\]\]>}{]]]]><![CDATA[>}g ;

lib/XML/ValidWriter.pm  view on Meta::CPAN

      if ! defined $self->{EMITTED_ROOT} ;

   my $stack = $self->{STACK} ;
   croak "Can't emit characters outside of the root element"
      unless @$stack ;

   my XML::VWElement $end_elt = $stack->[-1];
   my $open_elt = $self->getDoctype->element_decl( $end_elt->{NAME} ) ;

   croak "Element '$open_elt->{NAME}' can't contain #PCDATA"
      unless ! $open_elt || $open_elt->can_contain_pcdata ;

   croak "Undefined value passed to characters() in <$open_elt->{NAME}>"
      if grep ! defined $_, @_ ;

   my $length ;
   my $decide_cdata = $self->{STRAGGLERS} eq '>' ;
   my $in_cdata_mode ;

   if ( $decide_cdata ) {
      my $escs = 0 ;
      my $cdata_ends = 0 ;
      my $cdata_escs = 0 ;
      my $pos ;

      ## I assume that splitting CDATA ends between chunks is very
      ## rare.  If an app does that a lot, then this could guess 'wrong'
      ## and use CDATA escapes in a situation where they result in more
      ## bytes out than <& escaping would.
      for ( @_ ) {
	 $escs += tr/<&// ;
	 $pos = 0 ;
	 ++$cdata_ends while ( $pos = index $_, ']]>', $pos + 3 ) >= 0 ;
	 $cdata_escs += tr/\x00-\x08\x0b\x0c\x0e-\x1f// ;
	 $length += length $_ ;
      }
      ## Each &lt; or &amp; is 4 or 5 chars.
      ## Each ]]]]><![CDATA[< is 15.
      ## Each ]]>&#xN;<![CDATA[ is 17 or 18.
      ## We ## add 12 since <![CDATA[]]> is 12 chars.
      $in_cdata_mode = 4.5*$escs > 15*$cdata_ends + 17.75*$cdata_escs + 12 ;
   }
   else {
      $in_cdata_mode = $self->{STRAGGLERS} eq ']]>' ;
      $length += length $_ for @_ ;
   }

   return unless $length ;

   ## I chose to stay in or out of CDATA mode for an element
   ## in order to keep document structure relatively simple...to keep human
   ## readers from getting confused between escaping modes.
   ## This may lead to degeneracy if it's an (SG|X)ML document being emitted in
   ## an element, so this may change.
   if ( $in_cdata_mode ) {
      if ( $self->{STRAGGLERS} eq ']]>' ) {
	 ## Don't emit ']]><![CDATA[' between consecutive CDATA character
	 ## chunks.
         $self->{STRAGGLERS} = '' ;
      }
      else {
	 $self->{STRAGGLERS} .= '<![CDATA['
      }
      if ( ref $to eq 'SCALAR' ) {
	 $$to = join( '',
	    $$to,
	    $self->{STRAGGLERS},
	    _esc_cdata_ends( $self->{CDATA_END_PART}, @_ )
	 ) ;

	 $self->{CDATA_END_PART} = 
	    $$to =~ s/(\]\]?)(?!\n)\Z//
	       ? $1
	       : '' ;

      }
      else {
	 no strict 'refs' ;

	 my $chunk = _esc_cdata_ends( $self->{CDATA_END_PART}, @_ ) ;
	 $self->{CDATA_END_PART} = 
	    $chunk =~ s/(\]\]?)(?!\n)\Z//
	       ? $1
	       : '' ;

	 print $to $self->{STRAGGLERS}, $chunk
	    or croak "$! writing chars in <$open_elt->{NAME}>" ;

      }

lib/XML/ValidWriter.pm  view on Meta::CPAN

sub rawCharacters {
   my XML::ValidWriter $self = &_self ;

   my $to= $self->{OUTPUT} || select ;

   return unless grep length $_, @_ ;

   if ( ref $to eq 'SCALAR' ) {
      $$to .= join(
         '',
         _esc_cdata_ends( $self->{CDATA_END_PART} ),
	 $self->{STRAGGLERS},
	 @_
      ) ;
      $self->{AT_BOL} = substr( $$to, -1, 1 ) eq "\n" ;
   }
   else {
      no strict 'refs' ;

      for ( my $i = $#_ ; $i >= 0 ; --$i ) {
         next unless length $_[$i] ;
	 $self->{AT_BOL} = substr( $_[$i], -1, 1 ) eq "\n" ;
	 last ;
      }

      print $to
         _esc_cdata_ends( $self->{CDATA_END_PART} ),
         $self->{STRAGGLERS},
	 @_ or croak $!;
   }
   $self->{CDATA_END_PART} = '' ;
   $self->{STRAGGLERS} = '' ;
}


=item reset

t/escape.t  view on Meta::CPAN

<!ELEMENT b EMPTY >

TOHERE

my $out_name    = "$t/out"  ;

my $buf ;

my $xml_decl = qq{<?xml version="1.0"?>} ;

sub test_cdata_esc {
   ## See if contiguously emitted CDATA end sequences are escaped properly
   package Foo ;
   $buf = '' ;
   defaultWriter()->reset ;
   select_xml( \$buf ) ;
   ## The extra ()'s are necessary because we didn't import at compile time.
   xmlDecl() ;
   start_a()  ;
   ## Kick us in to CDATA mode
   characters( "<<<<<" ) ;

t/escape.t  view on Meta::CPAN

	    eval { test_char_data_esc( $char ) } ;
	    ## Older dists of perl don't know about qr// passed to ok():
	    if ( $@ && $@ =~ /invalid char/i ) {
	       ok( 1 ) ;
	    }
	    else {
	       ok( $@, "invalid char", sprintf( "0x%02x", $ord ) )
	    }
	 },
	 sub {
	    eval { test_cdata_esc( $char ) } ;
	    ## Older dists of perl don't know about qr// passed to ok():
	    if ( $@ && $@ =~ /invalid char/i ) {
	       ok( 1 ) ;
	    }
	    else {
	       ok( $@, "invalid char", sprintf( "0x%02x", $ord ) )
	    }
	 },
      )
   } ( 0..0x08, 0x0b, 0x0c, 0x0e..0x1f )
),

##
## CDATA escape mode tests
##
sub { ok( test_cdata_esc( "]]>"     ), "]]]]><![CDATA[>" ) },
sub { ok( test_cdata_esc( "]]>"     ), "]]]]><![CDATA[>" ) },
sub { ok( test_cdata_esc( "]]", ">" ), "]]]]><![CDATA[>" ) },
sub { ok( test_cdata_esc( "]", "]>" ), "]]]]><![CDATA[>" ) },
sub { ok( test_cdata_esc( "\t"      ), "\t", "\\t, 0x09, ^I, TAB" ) },
sub { ok( test_cdata_esc( "\n"      ), "\n", "\\n, 0x0A, ^J, NL"  ) },
sub { ok( test_cdata_esc( "\r"      ), "\r", "\\r, 0x0D, ^M, CR"  ) },

sub {
   package Foo ;
   $buf = '' ;
   defaultWriter()->reset ;
   select_xml( \$buf ) ;
   ## The extra ()'s are necessary because we didn't import at compile time.
   xmlDecl() ;
   start_a()  ;
   ## Kick us in to CDATA mode, but with a closing ']'



( run in 0.578 second using v1.01-cache-2.11-cpan-454fe037f31 )