libxml-perl
view release on metacpan or search on metacpan
lib/XML/Parser/PerlSAX.pm view on Meta::CPAN
return bless $self, $type;
}
sub parse {
my $self = shift;
die "XML::Parser::PerlSAX: parser instance ($self) already parsing\n"
if (defined $self->{ParseOptions});
# If there's one arg and it has no ref, it's a string
my $args;
if (scalar (@_) == 1 && !ref($_[0])) {
$args = { Source => { String => shift } };
} else {
$args = (scalar (@_) == 1) ? shift : { @_ };
}
my $parse_options = { %$self, %$args };
$self->{ParseOptions} = $parse_options;
# ensure that we have at least one source
if (!defined $parse_options->{Source}
|| !(defined $parse_options->{Source}{String}
|| defined $parse_options->{Source}{ByteStream}
|| defined $parse_options->{Source}{SystemId})) {
die "XML::Parser::PerlSAX: no source defined for parse\n";
}
# assign default Handler to any undefined handlers
if (defined $parse_options->{Handler}) {
$parse_options->{DocumentHandler} = $parse_options->{Handler}
if (!defined $parse_options->{DocumentHandler});
$parse_options->{DTDHandler} = $parse_options->{Handler}
if (!defined $parse_options->{DTDHandler});
$parse_options->{EntityResolver} = $parse_options->{Handler}
if (!defined $parse_options->{EntityResolver});
}
my @handlers;
if (defined $parse_options->{DocumentHandler}) {
# cache DocumentHandler in self for callbacks
$self->{DocumentHandler} = $parse_options->{DocumentHandler};
my $doc_h = $parse_options->{DocumentHandler};
push (@handlers, Init => sub { $self->_handle_init(@_) } )
if (UNIVERSAL::can($doc_h, 'start_document'));
push (@handlers, Final => sub { $self->_handle_final(@_) } )
if (UNIVERSAL::can($doc_h, 'end_document'));
push (@handlers, Start => sub { $self->_handle_start(@_) } )
if (UNIVERSAL::can($doc_h, 'start_element'));
push (@handlers, End => sub { $self->_handle_end(@_) } )
if (UNIVERSAL::can($doc_h, 'end_element'));
push (@handlers, Char => sub { $self->_handle_char(@_) } )
if (UNIVERSAL::can($doc_h, 'characters'));
push (@handlers, Proc => sub { $self->_handle_proc(@_) } )
if (UNIVERSAL::can($doc_h, 'processing_instruction'));
push (@handlers, Comment => sub { $self->_handle_comment(@_) } )
if (UNIVERSAL::can($doc_h, 'comment'));
push (@handlers, CdataStart => sub { $self->_handle_cdatastart(@_) } )
if (UNIVERSAL::can($doc_h, 'start_cdata'));
push (@handlers, CdataEnd => sub { $self->_handle_cdataend(@_) } )
if (UNIVERSAL::can($doc_h, 'end_cdata'));
if (UNIVERSAL::can($doc_h, 'entity_reference')) {
push (@handlers, Default => sub { $self->_handle_default(@_) } );
$self->{UseEntRefs} = 1;
}
}
if (defined $parse_options->{DTDHandler}) {
# cache DTDHandler in self for callbacks
$self->{DTDHandler} = $parse_options->{DTDHandler};
my $dtd_h = $parse_options->{DTDHandler};
push (@handlers, Notation => sub { $self->_handle_notation(@_) } )
if (UNIVERSAL::can($dtd_h, 'notation_decl'));
push (@handlers, Unparsed => sub { $self->_handle_unparsed(@_) } )
if (UNIVERSAL::can($dtd_h, 'unparsed_entity_decl'));
push (@handlers, Entity => sub { $self->_handle_entity(@_) } )
if ($self->{UseEntRefs}
|| UNIVERSAL::can($dtd_h, 'entity_decl'));
push (@handlers, Element => sub { $self->_handle_element(@_) } )
if (UNIVERSAL::can($dtd_h, 'element_decl'));
push (@handlers, Attlist => sub { $self->_handle_attlist(@_) } )
if (UNIVERSAL::can($dtd_h, 'attlist_decl'));
push (@handlers, Doctype => sub { $self->_handle_doctype(@_) } )
if (UNIVERSAL::can($dtd_h, 'doctype_decl'));
push (@handlers, XMLDecl => sub { $self->_handle_xmldecl(@_) } )
if (UNIVERSAL::can($dtd_h, 'xml_decl'));
}
if (defined $parse_options->{EntityResolver}) {
# cache EntityResolver in self for callbacks
$self->{EntityResolver} = $parse_options->{EntityResolver};
my $er = $parse_options->{EntityResolver};
push (@handlers, ExternEnt => sub { $self->_handle_extern_ent(@_) } )
if (UNIVERSAL::can($er, 'resolve_entity'));
}
my @xml_parser_options;
if ($self->{UseEntRefs}) {
@xml_parser_options = ( NoExpand => 1,
Handlers => { @handlers } );
} else {
@xml_parser_options = ( Handlers => { @handlers } );
}
push (@xml_parser_options,
ProtocolEncoding => $self->{ParseOptions}{Source}{Encoding})
if (defined $self->{ParseOptions}{Source}{Encoding});
my $parser = new XML::Parser(@xml_parser_options);
my $result;
if (defined $self->{ParseOptions}{Source}{ByteStream}) {
$result = $parser->parse($self->{ParseOptions}{Source}{ByteStream});
} elsif (defined $self->{ParseOptions}{Source}{String}) {
$result = $parser->parse($self->{ParseOptions}{Source}{String});
} elsif (defined $self->{ParseOptions}{Source}{SystemId}) {
lib/XML/Parser/PerlSAX.pm view on Meta::CPAN
my $self = shift;
my $expat = shift;
my $element = shift;
my @properties;
if ($self->{ParseOptions}{UseAttributeOrder}) {
# Capture order and defined() status for attributes
my $ii;
my $order = [];
for ($ii = 0; $ii < $#_; $ii += 2) {
push @$order, $_[$ii];
}
push @properties, 'AttributeOrder', $order;
# Divide by two because XML::Parser counts both attribute name
# and value within it's index
push @properties, 'Defaulted', ($expat->specified_attr() / 2);
}
$self->{DocumentHandler}->start_element( { Name => $element,
Attributes => { @_ },
@properties } );
}
sub _handle_end {
my $self = shift;
my $expat = shift;
my $element = shift;
$self->{DocumentHandler}->end_element( { Name => $element } );
}
sub _handle_char {
my $self = shift;
my $expat = shift;
my $string = shift;
$self->{DocumentHandler}->characters( { Data => $string } );
}
sub _handle_proc {
my $self = shift;
my $expat = shift;
my $target = shift;
my $data = shift;
$self->{DocumentHandler}->processing_instruction( { Target => $target,
Data => $data } );
}
sub _handle_comment {
my $self = shift;
my $expat = shift;
my $data = shift;
$self->{DocumentHandler}->comment( { Data => $data } );
}
sub _handle_cdatastart {
my $self = shift;
my $expat = shift;
$self->{DocumentHandler}->start_cdata( { } );
}
sub _handle_cdataend {
my $self = shift;
my $expat = shift;
$self->{DocumentHandler}->end_cdata( { } );
}
# Default receives all characters that aren't handled by some other
# handler, this means a lot of stuff goes through here. All we're
# looking for are `&NAME;' entity reference sequences
sub _handle_default {
my $self = shift;
my $expat = shift;
my $string = shift;
if ($string =~ /^&($name_re);$/) {
my $ent_ref = $self->{EntRefs}{$1};
if (!defined $ent_ref) {
$ent_ref = { Name => $1 };
}
$self->{DocumentHandler}->entity_reference($ent_ref);
}
}
###
### DTDHandler methods
###
sub _handle_notation {
my $self = shift;
my $expat = shift;
my $notation = shift;
my $base = shift;
my $sysid = shift;
my $pubid = shift;
my @properties = (Name => $notation);
push (@properties, Base => $base)
if (defined $base);
push (@properties, SystemId => $sysid)
if (defined $sysid);
push (@properties, PublicId => $pubid)
if (defined $pubid);
$self->{DTDHandler}->notation_decl( { @properties } );
}
sub _handle_unparsed {
my $self = shift;
my $expat = shift;
my $entity = shift;
my $base = shift;
my $sysid = shift;
my $pubid = shift;
my @properties = (Name => $entity, SystemId => $sysid);
push (@properties, Base => $base)
if (defined $base);
push (@properties, PublicId => $pubid)
if (defined $pubid);
$self->{DTDHandler}->unparsed_entity_decl( { @properties } );
}
lib/XML/Parser/PerlSAX.pm view on Meta::CPAN
No properties defined.
=item end_document
Receive notification of the end of a document.
No properties defined.
=item start_element
Receive notification of the beginning of an element.
Name The element type name.
Attributes A hash containing the attributes attached to the
element, if any.
The `C<Attributes>' hash contains only string values.
If the `C<UseAttributeOrder>' parser option is true, the following
properties are also passed to `C<start_element>':
AttributeOrder An array of attribute names in the order they were
specified, followed by the defaulted attribute
names.
Defaulted The index number of the first defaulted attribute in
`AttributeOrder. If this index is equal to the
length of `AttributeOrder', there were no defaulted
values.
Note to C<XML::Parser> users: `C<Defaulted>' will be half the value of
C<XML::Parser::Expat>'s `C<specified_attr()>' function because only
attribute names are provided, not their values.
=item end_element
Receive notification of the end of an element.
Name The element type name.
=item characters
Receive notification of character data.
Data The characters from the XML document.
=item processing_instruction
Receive notification of a processing instruction.
Target The processing instruction target.
Data The processing instruction data, if any.
=item comment
Receive notification of a comment.
Data The comment data, if any.
=item start_cdata
Receive notification of the start of a CDATA section.
No properties defined.
=item end_cdata
Receive notification of the end of a CDATA section.
No properties defined.
=item entity_reference
Receive notification of an internal entity reference. If this handler
is defined, internal entities will not be expanded and not passed to
the `C<characters()>' handler. If this handler is not defined,
internal entities will be expanded if possible and passed to the
`C<characters()>' handler.
Name The entity reference name
Value The entity reference value
=back
=head2 DTDHandler methods
=over 4
=item notation_decl
Receive notification of a notation declaration event.
Name The notation name.
PublicId The notation's public identifier, if any.
SystemId The notation's system identifier, if any.
Base The base for resolving a relative URI, if any.
=item unparsed_entity_decl
Receive notification of an unparsed entity declaration event.
Name The unparsed entity's name.
SystemId The entity's system identifier.
PublicId The entity's public identifier, if any.
Base The base for resolving a relative URI, if any.
=item entity_decl
Receive notification of an entity declaration event.
Name The entity name.
Value The entity value, if any.
PublicId The notation's public identifier, if any.
SystemId The notation's system identifier, if any.
Notation The notation declared for this entity, if any.
For internal entities, the `C<Value>' parameter will contain the value
and the `C<PublicId>', `C<SystemId>', and `C<Notation>' will be
undefined. For external entities, the `C<Value>' parameter will be
undefined, the `C<SystemId>' parameter will have the system id, the
`C<PublicId>' parameter will have the public id if it was provided (it
will be undefined otherwise), the `C<Notation>' parameter will contain
the notation name for unparsed entities. If this is a parameter entity
declaration, then a '%' will be prefixed to the entity name.
Note that `C<entity_decl()>' and `C<unparsed_entity_decl()>' overlap.
( run in 1.477 second using v1.01-cache-2.11-cpan-140bd7fdf52 )