XML-Comma

 view release on metacpan or  search on metacpan

lib/XML/Comma/Element.pm  view on Meta::CPAN

@ISA = ( 'XML::Comma::AbstractElement' );

use strict;

use XML::Comma::Util qw( dbg trim arrayref_remove );

##
# object fields
#
# _content             : string holding element content
# _cdata               : mark this element as needing to be wrapped in
#                        a CDATA container. currently, any element that
#                        has a CDATA start tag anywhere immediately inside it
#                        will be marked this way

# no need for an init sub here (parent's does the work of initting the
# def and tag fields, this is just a reminder)
#
# sub _init {
# $self->{_content} = undef; }
# }

lib/XML/Comma/Element.pm  view on Meta::CPAN

sub finish_initial_read {
  $_[0]->{_content} = trim ( $_[0]->{_content} );
  $_[0]->SUPER::finish_initial_read();
}
#
##

##
# called by parser and part of public api
#
# mark this element as needing a cdata wrapper
sub cdata_wrap {
  $_[0]->assert_not_read_only();
  $_[0]->{_cdata} = 1;
}
#
##


########
#
# Content Manipulation
#
########

lib/XML/Comma/Element.pm  view on Meta::CPAN

#
sub validate {
  my $self = shift();
  $self->validate_content ( $self->get(unescape=>0) );
}

# all callees (validate_content_hooks) should die with a message
# string if they encounter an error
sub validate_content {
  my ( $self, $text ) = @_;
  if ( $_[0]->{_cdata} ) {
    $text = "<![CDATA[$text]]>";
  }
  # check for un-parseable content by trying to parse and catching
  # errors. then ask the def to call any of its validate_hooks
  eval {
    if ( defined $text ) {
      XML::Comma->parser()->parse ( block => "<_>$text</_>" );
    }
    $self->def()->validate ( $self, $text );
  }; if ( $@ ) {

lib/XML/Comma/Element.pm  view on Meta::CPAN

}

sub to_string {
  my $self = shift();
  my $content = $self->get_without_default();
  # don't output if empty
  return ''  unless defined $content and
                            $content ne '';
  my $str;
  $str = '<' . $self->tag() . $self->attr_string() . '>';
  $str .= '<![CDATA['  if  $self->{_cdata};
  $str .= $content;
  $str .= ']]>'  if $self->{_cdata};
  $str .= '</'. $self->tag() . '>';
  $str .= "\n";
  return $str;
}


##
# auto_dispatch -- called by AUTOLOAD, and anyone else who wants to
# mimic the shortcut syntax
#

lib/XML/Comma/Parsing/PurePerl.pm  view on Meta::CPAN

    $self->{_el_stack} = undef;
    XML::Comma::Log->err 
        ( 'PARSE_ERR', $@, undef,
          "(in '$context' at " . $self->pos_line_and_column() . ")\n" );
  }
  $self->{_el_stack} = undef;
}


sub raw_append {}
sub cdata_wrap {}

####
# document parsing
####

sub handle_document {
  my ( $self, $read_args ) = @_;
  my $doc;
  eval {
    # prolog and outermost envelope

lib/XML/Comma/Parsing/PurePerl.pm  view on Meta::CPAN

          $el->finish_initial_read ( $self );
          pop @{$self->{_el_stack}};
        } else {
          $el->raw_append ( $string );
        }
        return; # ok
      } else {
        die "mismatched tag: '$tag', '$special'\n";
      }
    } elsif ( $type == $CDATA ) {
      # cdata -- extract and append
      if ( $nested ) {
        die "cdata content '$string' found for nested element '$tag'\n";
      } else {
        $el->cdata_wrap();
        $el->raw_append ( $special );
      }
    } elsif ( $type == $DOCTYPE ) {
      # doctype -- throw an error
      die "doctype after prolog\n";
    } elsif ( $type == $DONE ) {
      unless ( $self->{_in_include} ) {
        # finished prematurely
        die "reached end of document unexpectedly\n";
      }

lib/XML/Comma/Parsing/PurePerl.pm  view on Meta::CPAN

}

sub bang_instruction {
  my $self = shift();
  my $next = $self->get_chars(2);
  if ( $next eq '--' ) {
    return $self->comment()
  } elsif ( $next eq 'DO' and $self->get_chars(5) eq 'CTYPE' ) {
    return $self->doctype();
  } elsif ( $next eq '[C' and $self->get_chars(5) eq 'DATA[' ) {
    return $self->cdata();
  } else {
    die "unrecognized tag, '<!$next'";
  }
}

sub doctype {
  my $self = shift();
  my $c;
  while ( defined ($c = $self->get_c()) ) {
    if ( $c eq '>' ) {

lib/XML/Comma/Parsing/PurePerl.pm  view on Meta::CPAN

      } else {
        die "string '--' not allowed inside comments\n";
      }
    }
  }
  # if we get here, we've exited the while loop by overrunning the
  # end of our string
  die "reached end of document while inside a comment\n";
}

sub cdata {
  my $self = shift();
  my $c;
  while ( defined ($c = $self->get_c(1)) ) {
    if ( $c eq ']' ) {
      my $point = $self->{_wpos};
      if ( $self->get_c(1) eq ']' and $self->get_c(1) eq '>' ) {
        my $token_string = substr $self->{_string}, $self->{_pos},
          $self->{_wpos} - $self->{_pos};
        my $contents_string = substr $token_string, 9, length($token_string)-12;
        $self->{_pos} = $self->{_wpos};

lib/XML/Comma/Parsing/SAXEventParser.pm  view on Meta::CPAN

use XML::Comma::Util qw( dbg );
use Carp;

# _parser
# _current_element
# _element_stack
# _local_tag_stack
# _last_action
# _top_level_class
# _doc
# _inside_cdata
# _from_file

sub new {
  my ( $class, %arg ) = @_; my $self = {}; bless ( $self, $class );
  $self->{_top_level_class} = $arg{top_level_class} || 'XML::Comma::Doc';
  $self->{_element_stack} = [];
  $self->{_local_tag_stack} = [];
  $self->{_inside_cdata} = 0;
  $self->{_last_action} = '';

  my $doc;

  $self->{_parser} = XML::Parser::PerlSAX->new ( Handler=>$self );

  if ( $arg{block} ) {
    $doc = $self->{_parser}->parse ( $arg{block} );
  } elsif ( $arg{filename} ) {
    $self->{_from_file} = $arg{filename};

lib/XML/Comma/Parsing/SAXEventParser.pm  view on Meta::CPAN

}

sub end_document {
  return $_[0]->{_doc};
}

sub characters {
  my ( $self, $chars ) = @_;
#  dbg "curr", $self->{_current_element} || '';
#  dbg "chars", $chars->{Data};
  if ( ! $self->{_inside_cdata} ) {
    $chars->{Data} =~ s/\&/\&amp\;/g ;
    $chars->{Data} =~ s/\</\&lt\;/g  ;
    $chars->{Data} =~ s/\>/\&gt\;/g  ;
  }
  $self->{_current_element}->raw_append 
    ( Unicode::String::utf8($chars->{Data})->latin1() );
#  $self->{_current_element}->raw_append ( $chars->{Data} );
  $self->{_last_action} = 'characters';
}

sub start_cdata {
  $_[0]->{_inside_cdata} = 1;
  $_[0]->{_current_element}->start_cdata();
}

sub end_cdata {
  $_[0]->{_inside_cdata} = 0;
}


sub tag_append_string {
  my ( $self, $el, $close ) = @_;
  if ( $close ) {
    return '</' . $el->{Name} . '>';
  } else {
    my $el_string = '<' . $el->{Name};
    foreach my $att ( keys %{$el->{Attributes}} ) {

lib/XML/Comma/Parsing/SimpleC.pm  view on Meta::CPAN

    my $context = join '/', map { $_->tag() } $self->down_tree_branch();
    undef @{$self->_el_stack()};
    XML::Comma::Log->err 
        ( 'PARSE_ERR', $@, undef, 
          "(in '$context' at " . $self->pos_line_and_column() . ")\n" );
  }
}


sub raw_append {}
sub cdata_wrap {}

####
# document parsing
####

sub handle_document {
  my ( $self, $read_args ) = @_;
  my $doc;
  my $file = $self->from_file();
  eval {

lib/XML/Comma/Parsing/SimpleC.pm  view on Meta::CPAN

          $el->finish_initial_read ( $self );
          pop @{$self->_el_stack()};
        } else {
          $el->raw_append ( $string );
        }
        return; # ok
      } else {
        die "mismatched tag: '$tag', '$special'\n";
      }
    } elsif ( $type == $CDATA ) {
      # cdata -- extract and append
      if ( $nested ) {
        die "cdata content '$string' found for nested element '$tag'\n";
      } else {
        $el->cdata_wrap();
        $el->raw_append ( $special );
      }
    } elsif ( $type == $DOCTYPE ) {
      # doctype -- throw an error
      die "doctype after prolog\n";
    } elsif ( $type == $DONE ) {
      unless ( $self->_in_include() ) {
        # finished prematurely
        die "reached end of document unexpectedly\n";
      }

lib/XML/Comma/Parsing/SimpleC.pm  view on Meta::CPAN

void eat_whitespace ( SV* self );
void next_token ( SV* self );
void done_return ( void );
void b_token ( Cobj* cobj );
void open_tag ( Cobj* cobj );
void close_tag ( Cobj* cobj );
void processing_instruction ( Cobj* cobj );
void bang_instruction ( Cobj* cobj );
void doctype ( Cobj* cobj );
void comment ( Cobj* cobj );
void cdata ( Cobj* cobj );
void text ( Cobj* cobj );
char t_get_c ( Cobj* cobj );

//----------------------------------------

SV* _c_new ( char* class, char* string, char* from_file, char* doc_class, int in_include ) {
  Cobj*   cobj = malloc ( sizeof(Cobj) );
  SV*     obj_ref = newSViv(0);
  SV*     obj = newSVrv ( obj_ref, class );

lib/XML/Comma/Parsing/SimpleC.pm  view on Meta::CPAN

    return comment(cobj);
  } else if ( (c == 'D') && (d == 'O') &&
              (*(cobj->wpos++) == 'C') && (*(cobj->wpos++) == 'T') &&
              (*(cobj->wpos++) == 'Y') && (*(cobj->wpos++) == 'P') &&
              (*(cobj->wpos++) == 'E') ) {
    return doctype(cobj);
  } else if ( (c == '[') && (d == 'C') &&
              (*(cobj->wpos++) == 'D') && (*(cobj->wpos++) == 'A') &&
              (*(cobj->wpos++) == 'T') && (*(cobj->wpos++) == 'A') &&
              (*(cobj->wpos++) == '[') ) {
    return cdata(cobj);
  } else {
    croak ( "bad <! tag\n" );
  }
}

void doctype ( Cobj* cobj ) {
  char c;
  Inline_Stack_Vars;
  while ( (c = t_get_c(cobj)) != '\0' ) {
    if ( c == '>' ) {

lib/XML/Comma/Parsing/SimpleC.pm  view on Meta::CPAN

    Inline_Stack_Push(sv_2mortal(newSViv(3))); //COMMENT
    Inline_Stack_Push(sv_2mortal(newSVpvn(cobj->pos,cobj->wpos-cobj->pos)));
    Inline_Stack_Done;
    cobj->pos = cobj->wpos;
    Inline_Stack_Return ( 2 );
  } else {
    croak ( "string '--' not allowed inside comments\n" );
  }
}

void cdata ( Cobj* cobj ) {
  char c;
  Inline_Stack_Vars;
  cobj->wpos = strstr ( cobj->pos, "]]>" );

  if ( cobj->wpos == NULL ) {
    croak ( "reached end of document while inside <![CDATA...\n" );
  }

  cobj->wpos += 3;
  Inline_Stack_Reset;

lib/XML/Comma/docs/guide.html  view on Meta::CPAN

$doc-&gt;element('bio')-&gt;get ( unescape=&gt;1 );
</pre>

<p> Our other option, as mentioned above, is to "wrap" the bio
element's content in an XML CDATA section. The CDATA envelope forces
an XML parser to treat the characters inside it as plain text. Comma
allows an element to be flagged as CDATA-fied, meaning that on output
the entire contents will be wrapped in a CDATA section. Comma treats
this CDATA facility as high-impact and coarse-grained. As a result the
declaration is a one-way street: once a CDATA element, always a CDATA
element. The <b>cdata_wrap()</b> method flips the switch, so to
speak. </p>

<pre>
# configure the bio element so that it always CDATA-wraps its content
$doc-&gt;element('bio')-&gt;cdata_wrap();
# now we can set() with impunity
$doc-&gt;set ( $messy_html );
</pre>

<p> The <b>to_string()</b> method on the CDATA-set element will
produce output that looks something like this: </p>

<pre>
&lt;bio&gt;&lt;![CDATA[Kwin is a programmer who likes &lt;a href="http://use.perl.org"&gt;Perl&lt;/a&gt;
and &lt;a href="http://www.motorola.com/mcu"&gt;6812&lt;/a&gt;

lib/XML/Comma/docs/guide.html  view on Meta::CPAN

    <li>$string = $el-&gt;get_location()</li>
  </ul></li>

  <li>simple elements<ul>
    <li>$string = $el-&gt;get( [unescape=&gt;], [%args] )</li>
    <li>$string = $el-&gt;get_without_default()</li>
    <li>$string = $el-&gt;set ( $string, [escape=&gt;], [%args] )</li>
    <li>$string = $el-&gt;append ( $more_string )</li>
    <li>$string = $el-&gt;validate()</li>
    <li>$string = $el-&gt;validate_content ( $string )</li>
    <li>1 = $el-&gt;cdata_wrap();</li>
  </ul></li>

  <li>nested elements<ul>
    <li>@els/[] = $el-&gt;elements ( [@tags] )</li>
    <li>$el = $el-&gt;element ( $tag )</li>
    <li>$el = $el-&gt;add_element ( $tag )</li>
    <li>$el = $el-&gt;delete_element ( $tag )</li>
    <li>@strings/[] = $el-&gt;elements_group_get ( $tag )</li>
    <li>@strings/[] = $el-&gt;elements_group_add ( $tag, @strings )</li>
    <li>@els/[] = $el-&gt;elements_group_delete ( $tag, @strings ) </li>

t/parser.t  view on Meta::CPAN

use XML::Comma::Util qw( dbg );

my $doc_block = <<END;
<?xml version="1.0"?>
<!-- dummy comment -->
<_test_parser>
  <sing attr1="foo" attr2="bar"><a href="/foo">some link text</a></sing>
</_test_parser>
END

my $doc_cdata_block = <<END;
<?xml version="1.0"?>
<!-- dummy comment -->
<_test_parser>
  <sing><![CDATA[ a cdata string ]]></sing>
</_test_parser>
END

###########

use Test::More 'no_plan';

##
# a bunch of simple parser tests
##

t/parser.t  view on Meta::CPAN

  XML::Comma->parser()->parse ( block=>'<a' );
};
ok( $@ );

# unclosed comment
eval {
  XML::Comma->parser()->parse ( block=>'<a><!-- foo </a>' );
};
ok( $@ );

# unclosed cdata
eval {
  XML::Comma->parser()->parse ( block=>'<a><![CDATA[ foo </a>' );
};
ok( $@ );

# unclosed processing instruction
eval {
  XML::Comma->parser()->parse ( block=>'<a><? ... </a>' );
};
ok( $@ );

t/parser.t  view on Meta::CPAN

eval {
  XML::Comma->parser()->parse ( block=>'<a><!-- illegal -- oops --></a>' );
};
ok( $@ );

eval {
  XML::Comma->parser()->parse ( block=>'<a><!-  and other things' );
};
ok( $@ );

# cdata
eval {
  XML::Comma->parser()->parse (block=>'<a><![CDATA[ hmmm & > < <foo> ]]></a>');
};
ok ( ! $@ );

# tricky cdata ending
eval {
  XML::Comma->parser()->parse (block=>'<a><![CDATA[ hmmm & > < <foo> ]   ]]]></a>');
};
ok ( ! $@ );

# trailing junk after root element
eval {
  XML::Comma->parser()->parse ( block=>'<a><b>foo</b></a> more' );
};
ok ( $@ );

t/parser.t  view on Meta::CPAN

ok($def);

## create a doc, so we can test what we get in elements
my $doc = XML::Comma::Doc->new ( block=>$doc_block );
ok($doc);
ok($doc->sing() eq '<a href="/foo">some link text</a>');
# and attributes
ok($doc->element('sing')->get_attr('attr1') eq 'foo');
ok($doc->element('sing')->get_attr('attr2') eq 'bar');

my $doc_cd = XML::Comma::Doc->new ( block=>$doc_cdata_block );
ok($doc_cd->sing() eq 'a cdata string');

$doc->element ( 'included_element_one' )->set ( 'foo bar' );
ok("didn't die - ok");
ok($doc->element ( 'included_element_one' )->get() eq 'foo bar');

$doc->element ( 'included_element_two' )->set ( 'b' );
ok("didn't die - ok");
ok($doc->element ( 'included_element_two' )->get() eq 'b');

ok(join ( ',', sort $doc->element ( 'included_element_two' )->enum_options() ) eq 'a,b,c');



( run in 0.744 second using v1.01-cache-2.11-cpan-454fe037f31 )