libxml-perl

 view release on metacpan or  search on metacpan

lib/XML/Parser/PerlSAX.pm  view on Meta::CPAN


    return bless $self, $type;
}

sub parse {
    my $self = shift;

    die "XML::Parser::PerlSAX: parser instance ($self) already parsing\n"
	if (defined $self->{ParseOptions});

    # If there's one arg and it has no ref, it's a string
    my $args;
    if (scalar (@_) == 1 && !ref($_[0])) {
	$args = { Source => { String => shift } };
    } else {
	$args = (scalar (@_) == 1) ? shift : { @_ };
    }

    my $parse_options = { %$self, %$args };
    $self->{ParseOptions} = $parse_options;

    # ensure that we have at least one source
    if (!defined $parse_options->{Source}
	|| !(defined $parse_options->{Source}{String}
	     || defined $parse_options->{Source}{ByteStream}
	     || defined $parse_options->{Source}{SystemId})) {
	die "XML::Parser::PerlSAX: no source defined for parse\n";
    }

    # assign default Handler to any undefined handlers
    if (defined $parse_options->{Handler}) {
	$parse_options->{DocumentHandler} = $parse_options->{Handler}
	    if (!defined $parse_options->{DocumentHandler});
	$parse_options->{DTDHandler} = $parse_options->{Handler}
	    if (!defined $parse_options->{DTDHandler});
	$parse_options->{EntityResolver} = $parse_options->{Handler}
	    if (!defined $parse_options->{EntityResolver});
    }

    my @handlers;
    if (defined $parse_options->{DocumentHandler}) {
	# cache DocumentHandler in self for callbacks
	$self->{DocumentHandler} = $parse_options->{DocumentHandler};

	my $doc_h = $parse_options->{DocumentHandler};

	push (@handlers, Init => sub { $self->_handle_init(@_) } )
	    if (UNIVERSAL::can($doc_h, 'start_document'));
	push (@handlers, Final => sub { $self->_handle_final(@_) } )
	    if (UNIVERSAL::can($doc_h, 'end_document'));
	push (@handlers, Start => sub { $self->_handle_start(@_) } )
	    if (UNIVERSAL::can($doc_h, 'start_element'));
	push (@handlers, End => sub { $self->_handle_end(@_) } )
	    if (UNIVERSAL::can($doc_h, 'end_element'));
	push (@handlers, Char => sub { $self->_handle_char(@_) } )
	    if (UNIVERSAL::can($doc_h, 'characters'));
	push (@handlers, Proc => sub { $self->_handle_proc(@_) } )
	    if (UNIVERSAL::can($doc_h, 'processing_instruction'));
	push (@handlers, Comment => sub { $self->_handle_comment(@_) } )
	    if (UNIVERSAL::can($doc_h, 'comment'));
	push (@handlers, CdataStart => sub { $self->_handle_cdatastart(@_) } )
	    if (UNIVERSAL::can($doc_h, 'start_cdata'));
	push (@handlers, CdataEnd => sub { $self->_handle_cdataend(@_) } )
	    if (UNIVERSAL::can($doc_h, 'end_cdata'));
	if (UNIVERSAL::can($doc_h, 'entity_reference')) {
	    push (@handlers, Default => sub { $self->_handle_default(@_) } );
	    $self->{UseEntRefs} = 1;
	}
    }

    if (defined $parse_options->{DTDHandler}) {
	# cache DTDHandler in self for callbacks
	$self->{DTDHandler} = $parse_options->{DTDHandler};

	my $dtd_h = $parse_options->{DTDHandler};

	push (@handlers, Notation => sub { $self->_handle_notation(@_) } )
	    if (UNIVERSAL::can($dtd_h, 'notation_decl'));
	push (@handlers, Unparsed => sub { $self->_handle_unparsed(@_) } )
	    if (UNIVERSAL::can($dtd_h, 'unparsed_entity_decl'));
	push (@handlers, Entity => sub { $self->_handle_entity(@_) } )
	    if ($self->{UseEntRefs}
		|| UNIVERSAL::can($dtd_h, 'entity_decl'));
	push (@handlers, Element => sub { $self->_handle_element(@_) } )
	    if (UNIVERSAL::can($dtd_h, 'element_decl'));
	push (@handlers, Attlist => sub { $self->_handle_attlist(@_) } )
	    if (UNIVERSAL::can($dtd_h, 'attlist_decl'));
	push (@handlers, Doctype => sub { $self->_handle_doctype(@_) } )
	    if (UNIVERSAL::can($dtd_h, 'doctype_decl'));
	push (@handlers, XMLDecl => sub { $self->_handle_xmldecl(@_) } )
	    if (UNIVERSAL::can($dtd_h, 'xml_decl'));
    }

    
    if (defined $parse_options->{EntityResolver}) {
	# cache EntityResolver in self for callbacks
	$self->{EntityResolver} = $parse_options->{EntityResolver};

	my $er = $parse_options->{EntityResolver};

	push (@handlers, ExternEnt => sub { $self->_handle_extern_ent(@_) } )
	    if (UNIVERSAL::can($er, 'resolve_entity'));
    }

    my @xml_parser_options;
    if ($self->{UseEntRefs}) {
	@xml_parser_options = ( NoExpand => 1,
				Handlers => { @handlers } );
    } else {
	@xml_parser_options = ( Handlers => { @handlers } );
    }

    push (@xml_parser_options,
	  ProtocolEncoding => $self->{ParseOptions}{Source}{Encoding})
	if (defined $self->{ParseOptions}{Source}{Encoding});

    my $parser = new XML::Parser(@xml_parser_options);
    my $result;

    if (defined $self->{ParseOptions}{Source}{ByteStream}) {
	$result = $parser->parse($self->{ParseOptions}{Source}{ByteStream});
    } elsif (defined $self->{ParseOptions}{Source}{String}) {
	$result = $parser->parse($self->{ParseOptions}{Source}{String});
    } elsif (defined $self->{ParseOptions}{Source}{SystemId}) {

lib/XML/Parser/PerlSAX.pm  view on Meta::CPAN

    my $self = shift;
    my $expat = shift;
    my $element = shift;

    my @properties;
    if ($self->{ParseOptions}{UseAttributeOrder}) {
	# Capture order and defined() status for attributes
	my $ii;

	my $order = [];
	for ($ii = 0; $ii < $#_; $ii += 2) {
	    push @$order, $_[$ii];
	}

	push @properties, 'AttributeOrder', $order;

	# Divide by two because XML::Parser counts both attribute name
	# and value within it's index
	push @properties, 'Defaulted', ($expat->specified_attr() / 2);
    }

    $self->{DocumentHandler}->start_element( { Name => $element,
					       Attributes => { @_ },
					       @properties } );
}

sub _handle_end {
    my $self = shift;
    my $expat = shift;
    my $element = shift;

    $self->{DocumentHandler}->end_element( { Name => $element } );
}

sub _handle_char {
    my $self = shift;
    my $expat = shift;
    my $string = shift;

    $self->{DocumentHandler}->characters( { Data => $string } );
}

sub _handle_proc {
    my $self = shift;
    my $expat = shift;
    my $target = shift;
    my $data = shift;

    $self->{DocumentHandler}->processing_instruction( { Target => $target,
							Data => $data } );
}

sub _handle_comment {
    my $self = shift;
    my $expat = shift;
    my $data = shift;

    $self->{DocumentHandler}->comment( { Data => $data } );
}

sub _handle_cdatastart {
    my $self = shift;
    my $expat = shift;

    $self->{DocumentHandler}->start_cdata( { } );
}

sub _handle_cdataend {
    my $self = shift;
    my $expat = shift;

    $self->{DocumentHandler}->end_cdata( { } );
}

# Default receives all characters that aren't handled by some other
# handler, this means a lot of stuff goes through here.  All we're
# looking for are `&NAME;' entity reference sequences
sub _handle_default {
    my $self = shift;
    my $expat = shift;
    my $string = shift;

    if ($string =~ /^&($name_re);$/) {
	my $ent_ref = $self->{EntRefs}{$1};
	if (!defined $ent_ref) {
	    $ent_ref = { Name => $1 };
	}
	$self->{DocumentHandler}->entity_reference($ent_ref);
    }
}

###
### DTDHandler methods
###

sub _handle_notation {
    my $self = shift;
    my $expat = shift;
    my $notation = shift;
    my $base = shift;
    my $sysid = shift;
    my $pubid = shift;
    my @properties = (Name => $notation);

    push (@properties, Base => $base)
	if (defined $base);
    push (@properties, SystemId => $sysid)
	if (defined $sysid);
    push (@properties, PublicId => $pubid)
	if (defined $pubid);


    $self->{DTDHandler}->notation_decl( { @properties } );
}

sub _handle_unparsed {
    my $self = shift;
    my $expat = shift;
    my $entity = shift;
    my $base = shift;
    my $sysid = shift;
    my $pubid = shift;
    my @properties = (Name => $entity, SystemId => $sysid);

    push (@properties, Base => $base)
	if (defined $base);
    push (@properties, PublicId => $pubid)
	if (defined $pubid);

    $self->{DTDHandler}->unparsed_entity_decl( { @properties } );
}

lib/XML/Parser/PerlSAX.pm  view on Meta::CPAN


No properties defined.

=item end_document

Receive notification of the end of a document.

No properties defined.

=item start_element

Receive notification of the beginning of an element.

 Name             The element type name.
 Attributes       A hash containing the attributes attached to the
                  element, if any.

The `C<Attributes>' hash contains only string values.

If the `C<UseAttributeOrder>' parser option is true, the following
properties are also passed to `C<start_element>':

 AttributeOrder   An array of attribute names in the order they were
                  specified, followed by the defaulted attribute
                  names.
 Defaulted        The index number of the first defaulted attribute in
                  `AttributeOrder.  If this index is equal to the
                  length of `AttributeOrder', there were no defaulted
                  values.

Note to C<XML::Parser> users:  `C<Defaulted>' will be half the value of
C<XML::Parser::Expat>'s `C<specified_attr()>' function because only
attribute names are provided, not their values.


=item end_element

Receive notification of the end of an element.

 Name             The element type name.

=item characters

Receive notification of character data.

 Data             The characters from the XML document.

=item processing_instruction

Receive notification of a processing instruction. 

 Target           The processing instruction target. 
 Data             The processing instruction data, if any.

=item comment

Receive notification of a comment.

 Data             The comment data, if any.

=item start_cdata

Receive notification of the start of a CDATA section.

No properties defined.

=item end_cdata

Receive notification of the end of a CDATA section.

No properties defined.

=item entity_reference

Receive notification of an internal entity reference.  If this handler
is defined, internal entities will not be expanded and not passed to
the `C<characters()>' handler.  If this handler is not defined,
internal entities will be expanded if possible and passed to the
`C<characters()>' handler.

 Name             The entity reference name
 Value            The entity reference value

=back

=head2 DTDHandler methods

=over 4

=item notation_decl

Receive notification of a notation declaration event.

 Name             The notation name.
 PublicId         The notation's public identifier, if any.
 SystemId         The notation's system identifier, if any.
 Base             The base for resolving a relative URI, if any.

=item unparsed_entity_decl

Receive notification of an unparsed entity declaration event.

 Name             The unparsed entity's name.
 SystemId         The entity's system identifier.
 PublicId         The entity's public identifier, if any.
 Base             The base for resolving a relative URI, if any.

=item entity_decl

Receive notification of an entity declaration event.

 Name             The entity name.
 Value            The entity value, if any.
 PublicId         The notation's public identifier, if any.
 SystemId         The notation's system identifier, if any.
 Notation         The notation declared for this entity, if any.

For internal entities, the `C<Value>' parameter will contain the value
and the `C<PublicId>', `C<SystemId>', and `C<Notation>' will be
undefined.  For external entities, the `C<Value>' parameter will be
undefined, the `C<SystemId>' parameter will have the system id, the
`C<PublicId>' parameter will have the public id if it was provided (it
will be undefined otherwise), the `C<Notation>' parameter will contain
the notation name for unparsed entities.  If this is a parameter entity
declaration, then a '%' will be prefixed to the entity name.

Note that `C<entity_decl()>' and `C<unparsed_entity_decl()>' overlap.



( run in 1.477 second using v1.01-cache-2.11-cpan-140bd7fdf52 )